mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-22 07:31:57 +00:00
Merge branch 'master' of github.com:ClickHouse/ClickHouse into ADQM-1892
This commit is contained in:
commit
b98eca1e60
20
.github/ISSUE_TEMPLATE/10_question.md
vendored
20
.github/ISSUE_TEMPLATE/10_question.md
vendored
@ -1,20 +0,0 @@
|
||||
---
|
||||
name: Question
|
||||
about: Ask a question about ClickHouse
|
||||
title: ''
|
||||
labels: question
|
||||
assignees: ''
|
||||
|
||||
---
|
||||
|
||||
> Make sure to check documentation https://clickhouse.com/docs/en/ first. If the question is concise and probably has a short answer, asking it in [community Slack](https://join.slack.com/t/clickhousedb/shared_invite/zt-1gh9ds7f4-PgDhJAaF8ad5RbWBAAjzFg) is probably the fastest way to find the answer. For more complicated questions, consider asking them on StackOverflow with "clickhouse" tag https://stackoverflow.com/questions/tagged/clickhouse
|
||||
|
||||
> If you still prefer GitHub issues, remove all this text and ask your question here.
|
||||
|
||||
**Company or project name**
|
||||
|
||||
Put your company name or project description here
|
||||
|
||||
**Question**
|
||||
|
||||
Your question
|
20
.github/ISSUE_TEMPLATE/10_question.yaml
vendored
Normal file
20
.github/ISSUE_TEMPLATE/10_question.yaml
vendored
Normal file
@ -0,0 +1,20 @@
|
||||
name: Question
|
||||
description: Ask a question about ClickHouse
|
||||
labels: ["question"]
|
||||
body:
|
||||
- type: markdown
|
||||
attributes:
|
||||
value: |
|
||||
> Make sure to check documentation https://clickhouse.com/docs/en/ first. If the question is concise and probably has a short answer, asking it in [community Slack](https://join.slack.com/t/clickhousedb/shared_invite/zt-1gh9ds7f4-PgDhJAaF8ad5RbWBAAjzFg) is probably the fastest way to find the answer. For more complicated questions, consider asking them on StackOverflow with "clickhouse" tag https://stackoverflow.com/questions/tagged/clickhouse
|
||||
- type: textarea
|
||||
attributes:
|
||||
label: Company or project name
|
||||
description: Put your company name or project description here.
|
||||
validations:
|
||||
required: false
|
||||
- type: textarea
|
||||
attributes:
|
||||
label: Question
|
||||
description: Please put your question here.
|
||||
validations:
|
||||
required: true
|
2
contrib/orc
vendored
2
contrib/orc
vendored
@ -1 +1 @@
|
||||
Subproject commit e24f2c2a3ca0769c96704ab20ad6f512a83ea2ad
|
||||
Subproject commit 947cebaf9432d708253ac08dc3012daa6b4ede6f
|
@ -285,7 +285,7 @@ stop_logs_replication
|
||||
|
||||
# Try to get logs while server is running
|
||||
failed_to_save_logs=0
|
||||
for table in query_log zookeeper_log trace_log transactions_info_log metric_log
|
||||
for table in query_log zookeeper_log trace_log transactions_info_log metric_log blob_storage_log
|
||||
do
|
||||
err=$(clickhouse-client -q "select * from system.$table into outfile '/test_output/$table.tsv.gz' format TSVWithNamesAndTypes")
|
||||
echo "$err"
|
||||
@ -339,7 +339,7 @@ if [ $failed_to_save_logs -ne 0 ]; then
|
||||
# directly
|
||||
# - even though ci auto-compress some files (but not *.tsv) it does this only
|
||||
# for files >64MB, we want this files to be compressed explicitly
|
||||
for table in query_log zookeeper_log trace_log transactions_info_log metric_log
|
||||
for table in query_log zookeeper_log trace_log transactions_info_log metric_log blob_storage_log
|
||||
do
|
||||
clickhouse-local "$data_path_config" --only-system-tables --stacktrace -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.tsv.zst ||:
|
||||
if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then
|
||||
|
@ -30,6 +30,7 @@ RUN pip3 install \
|
||||
mypy==1.8.0 \
|
||||
pylint==3.1.0 \
|
||||
python-magic==0.4.24 \
|
||||
flake8==4.0.1 \
|
||||
requests \
|
||||
thefuzz \
|
||||
types-requests \
|
||||
|
@ -9,6 +9,8 @@ echo "Check style" | ts
|
||||
./check-style -n |& tee /test_output/style_output.txt
|
||||
echo "Check python formatting with black" | ts
|
||||
./check-black -n |& tee /test_output/black_output.txt
|
||||
echo "Check python with flake8" | ts
|
||||
./check-flake8 |& tee /test_output/flake8_output.txt
|
||||
echo "Check python type hinting with mypy" | ts
|
||||
./check-mypy -n |& tee /test_output/mypy_output.txt
|
||||
echo "Check typos" | ts
|
||||
|
@ -25,7 +25,8 @@ azurite-blob --blobHost 0.0.0.0 --blobPort 10000 --debug /azurite_log &
|
||||
./setup_minio.sh stateless # to have a proper environment
|
||||
|
||||
echo "Get previous release tag"
|
||||
previous_release_tag=$(dpkg --info package_folder/clickhouse-client*.deb | grep "Version: " | awk '{print $2}' | cut -f1 -d'+' | get_previous_release_tag)
|
||||
# shellcheck disable=SC2016
|
||||
previous_release_tag=$(dpkg-deb --showformat='${Version}' --show package_folder/clickhouse-client*.deb | get_previous_release_tag)
|
||||
echo $previous_release_tag
|
||||
|
||||
echo "Clone previous release repository"
|
||||
|
@ -91,6 +91,9 @@ cd ./utils/check-style
|
||||
# Check python type hinting with mypy
|
||||
./check-mypy
|
||||
|
||||
# Check python with flake8
|
||||
./check-flake8
|
||||
|
||||
# Check code with codespell
|
||||
./check-typos
|
||||
|
||||
|
@ -39,8 +39,8 @@ If you need to update rows frequently, we recommend using the [`ReplacingMergeTr
|
||||
``` sql
|
||||
CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
|
||||
(
|
||||
name1 [type1] [[NOT] NULL] [DEFAULT|MATERIALIZED|ALIAS|EPHEMERAL expr1] [COMMENT ...] [CODEC(codec1)] [STATISTIC(stat1)] [TTL expr1] [PRIMARY KEY] [SETTINGS (name = value, ...)],
|
||||
name2 [type2] [[NOT] NULL] [DEFAULT|MATERIALIZED|ALIAS|EPHEMERAL expr2] [COMMENT ...] [CODEC(codec2)] [STATISTIC(stat2)] [TTL expr2] [PRIMARY KEY] [SETTINGS (name = value, ...)],
|
||||
name1 [type1] [[NOT] NULL] [DEFAULT|MATERIALIZED|ALIAS|EPHEMERAL expr1] [COMMENT ...] [CODEC(codec1)] [STATISTICS(stat1)] [TTL expr1] [PRIMARY KEY] [SETTINGS (name = value, ...)],
|
||||
name2 [type2] [[NOT] NULL] [DEFAULT|MATERIALIZED|ALIAS|EPHEMERAL expr2] [COMMENT ...] [CODEC(codec2)] [STATISTICS(stat2)] [TTL expr2] [PRIMARY KEY] [SETTINGS (name = value, ...)],
|
||||
...
|
||||
INDEX index_name1 expr1 TYPE type1(...) [GRANULARITY value1],
|
||||
INDEX index_name2 expr2 TYPE type2(...) [GRANULARITY value2],
|
||||
@ -1043,12 +1043,12 @@ ClickHouse versions 22.3 through 22.7 use a different cache configuration, see [
|
||||
|
||||
## Column Statistics (Experimental) {#column-statistics}
|
||||
|
||||
The statistic declaration is in the columns section of the `CREATE` query for tables from the `*MergeTree*` Family when we enable `set allow_experimental_statistic = 1`.
|
||||
The statistics declaration is in the columns section of the `CREATE` query for tables from the `*MergeTree*` Family when we enable `set allow_experimental_statistics = 1`.
|
||||
|
||||
``` sql
|
||||
CREATE TABLE tab
|
||||
(
|
||||
a Int64 STATISTIC(tdigest),
|
||||
a Int64 STATISTICS(TDigest, Uniq),
|
||||
b Float64
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
@ -1058,19 +1058,23 @@ ORDER BY a
|
||||
We can also manipulate statistics with `ALTER` statements.
|
||||
|
||||
```sql
|
||||
ALTER TABLE tab ADD STATISTIC b TYPE tdigest;
|
||||
ALTER TABLE tab DROP STATISTIC a TYPE tdigest;
|
||||
ALTER TABLE tab ADD STATISTICS b TYPE TDigest, Uniq;
|
||||
ALTER TABLE tab DROP STATISTICS a;
|
||||
```
|
||||
|
||||
These lightweight statistics aggregate information about distribution of values in columns.
|
||||
They can be used for query optimization when we enable `set allow_statistic_optimize = 1`.
|
||||
These lightweight statistics aggregate information about distribution of values in columns. Statistics are stored in every part and updated when every insert comes.
|
||||
They can be used for prewhere optimization only if we enable `set allow_statistics_optimize = 1`.
|
||||
|
||||
#### Available Types of Column Statistics {#available-types-of-column-statistics}
|
||||
|
||||
- `tdigest`
|
||||
- `TDigest`
|
||||
|
||||
Stores distribution of values from numeric columns in [TDigest](https://github.com/tdunning/t-digest) sketch.
|
||||
|
||||
- `Uniq`
|
||||
|
||||
Estimate the number of distinct values of a column by HyperLogLog.
|
||||
|
||||
## Column-level Settings {#column-level-settings}
|
||||
|
||||
Certain MergeTree settings can be override at column level:
|
||||
|
@ -1206,6 +1206,16 @@ Expired time for HSTS in seconds. The default value is 0 means clickhouse disabl
|
||||
<hsts_max_age>600000</hsts_max_age>
|
||||
```
|
||||
|
||||
## mlock_executable {#mlock_executable}
|
||||
|
||||
Perform mlockall after startup to lower first queries latency and to prevent clickhouse executable from being paged out under high IO load. Enabling this option is recommended but will lead to increased startup time for up to a few seconds.
|
||||
Keep in mind that this parameter would not work without "CAP_IPC_LOCK" capability.
|
||||
**Example**
|
||||
|
||||
``` xml
|
||||
<mlock_executable>false</mlock_executable>
|
||||
```
|
||||
|
||||
## include_from {#include_from}
|
||||
|
||||
The path to the file with substitutions. Both XML and YAML formats are supported.
|
||||
@ -1353,6 +1363,26 @@ Examples:
|
||||
<listen_host>127.0.0.1</listen_host>
|
||||
```
|
||||
|
||||
## listen_try {#listen_try}
|
||||
|
||||
The server will not exit if IPv6 or IPv4 networks are unavailable while trying to listen.
|
||||
|
||||
Examples:
|
||||
|
||||
``` xml
|
||||
<listen_try>0</listen_try>
|
||||
```
|
||||
|
||||
## listen_reuse_port {#listen_reuse_port}
|
||||
|
||||
Allow multiple servers to listen on the same address:port. Requests will be routed to a random server by the operating system. Enabling this setting is not recommended.
|
||||
|
||||
Examples:
|
||||
|
||||
``` xml
|
||||
<listen_reuse_port>0</listen_reuse_port>
|
||||
```
|
||||
|
||||
## listen_backlog {#listen_backlog}
|
||||
|
||||
Backlog (queue size of pending connections) of the listen socket.
|
||||
|
@ -3170,6 +3170,18 @@ Possible values:
|
||||
|
||||
Default value: `0`.
|
||||
|
||||
## lightweight_deletes_sync {#lightweight_deletes_sync}
|
||||
|
||||
The same as 'mutation_sync', but controls only execution of lightweight deletes.
|
||||
|
||||
Possible values:
|
||||
|
||||
- 0 - Mutations execute asynchronously.
|
||||
- 1 - The query waits for the lightweight deletes to complete on the current server.
|
||||
- 2 - The query waits for the lightweight deletes to complete on all replicas (if they exist).
|
||||
|
||||
Default value: `2`.
|
||||
|
||||
**See Also**
|
||||
|
||||
- [Synchronicity of ALTER Queries](../../sql-reference/statements/alter/index.md#synchronicity-of-alter-queries)
|
||||
@ -4604,6 +4616,16 @@ Read more about [memory overcommit](memory-overcommit.md).
|
||||
|
||||
Default value: `1GiB`.
|
||||
|
||||
## max_untracked_memory {#max_untracked_memory}
|
||||
Small allocations and deallocations are grouped in thread local variable and tracked or profiled only when amount (in absolute value) becomes larger than specified value. If the value is higher than 'memory_profiler_step' it will be effectively lowered to 'memory_profiler_step'.
|
||||
|
||||
Default value: `4MiB`.
|
||||
|
||||
## min_untracked_memory {#min_untracked_memory}
|
||||
Lower bound for untracked memory limit which is applied to threads with low memory consumption. Untracked memory limit equals thread memory usage divided by 16 and clamped between `min_untracked_memory` and `max_untracked_memory` for every thread. It guarantees that total untracked memory does not exceed 10% of current memory footprint even with a lot of small threads. To disable dynamic limit for untracked memory set value `4MiB`.
|
||||
|
||||
Default value: `4KiB`.
|
||||
|
||||
## Schema Inference settings
|
||||
|
||||
See [schema inference](../../interfaces/schema-inference.md#schema-inference-modes) documentation for more details.
|
||||
@ -5108,7 +5130,7 @@ a Tuple(
|
||||
)
|
||||
```
|
||||
|
||||
## allow_experimental_statistic {#allow_experimental_statistic}
|
||||
## allow_experimental_statistics {#allow_experimental_statistics}
|
||||
|
||||
Allows defining columns with [statistics](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) and [manipulate statistics](../../engines/table-engines/mergetree-family/mergetree.md#column-statistics).
|
||||
|
||||
|
@ -24,6 +24,8 @@ Alias: `lttb`.
|
||||
- `x` — x coordinate. [Integer](../../../sql-reference/data-types/int-uint.md) , [Float](../../../sql-reference/data-types/float.md) , [Decimal](../../../sql-reference/data-types/decimal.md) , [Date](../../../sql-reference/data-types/date.md), [Date32](../../../sql-reference/data-types/date32.md), [DateTime](../../../sql-reference/data-types/datetime.md), [DateTime64](../../../sql-reference/data-types/datetime64.md).
|
||||
- `y` — y coordinate. [Integer](../../../sql-reference/data-types/int-uint.md) , [Float](../../../sql-reference/data-types/float.md) , [Decimal](../../../sql-reference/data-types/decimal.md) , [Date](../../../sql-reference/data-types/date.md), [Date32](../../../sql-reference/data-types/date32.md), [DateTime](../../../sql-reference/data-types/datetime.md), [DateTime64](../../../sql-reference/data-types/datetime64.md).
|
||||
|
||||
NaNs are ignored in the provided series, meaning that any NaN values will be excluded from the analysis. This ensures that the function operates only on valid numerical data.
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `n` — number of points in the resulting series. [UInt64](../../../sql-reference/data-types/int-uint.md).
|
||||
@ -61,7 +63,7 @@ Result:
|
||||
|
||||
``` text
|
||||
┌────────largestTriangleThreeBuckets(4)(x, y)───────────┐
|
||||
│ [(1,10),(3,15),(5,40),(10,70)] │
|
||||
│ [(1,10),(3,15),(9,55),(10,70)] │
|
||||
└───────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
|
@ -2423,11 +2423,7 @@ Result:
|
||||
|
||||
## toUnixTimestamp64Milli
|
||||
|
||||
## toUnixTimestamp64Micro
|
||||
|
||||
## toUnixTimestamp64Nano
|
||||
|
||||
Converts a `DateTime64` to a `Int64` value with fixed sub-second precision. Input value is scaled up or down appropriately depending on it precision.
|
||||
Converts a `DateTime64` to a `Int64` value with fixed millisecond precision. The input value is scaled up or down appropriately depending on its precision.
|
||||
|
||||
:::note
|
||||
The output value is a timestamp in UTC, not in the timezone of `DateTime64`.
|
||||
@ -2437,24 +2433,22 @@ The output value is a timestamp in UTC, not in the timezone of `DateTime64`.
|
||||
|
||||
```sql
|
||||
toUnixTimestamp64Milli(value)
|
||||
toUnixTimestamp64Micro(value)
|
||||
toUnixTimestamp64Nano(value)
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `value` — DateTime64 value with any precision.
|
||||
- `value` — DateTime64 value with any precision. [DateTime64](../data-types/datetime64.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- `value` converted to the `Int64` data type.
|
||||
- `value` converted to the `Int64` data type. [Int64](../data-types/int-uint.md).
|
||||
|
||||
**Examples**
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
WITH toDateTime64('2019-09-16 19:20:12.345678910', 6) AS dt64
|
||||
WITH toDateTime64('2009-02-13 23:31:31.011', 3, 'UTC') AS dt64
|
||||
SELECT toUnixTimestamp64Milli(dt64);
|
||||
```
|
||||
|
||||
@ -2462,14 +2456,77 @@ Result:
|
||||
|
||||
```response
|
||||
┌─toUnixTimestamp64Milli(dt64)─┐
|
||||
│ 1568650812345 │
|
||||
│ 1234567891011 │
|
||||
└──────────────────────────────┘
|
||||
```
|
||||
|
||||
## toUnixTimestamp64Micro
|
||||
|
||||
Converts a `DateTime64` to a `Int64` value with fixed microsecond precision. The input value is scaled up or down appropriately depending on its precision.
|
||||
|
||||
:::note
|
||||
The output value is a timestamp in UTC, not in the timezone of `DateTime64`.
|
||||
:::
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
toUnixTimestamp64Micro(value)
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `value` — DateTime64 value with any precision. [DateTime64](../data-types/datetime64.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- `value` converted to the `Int64` data type. [Int64](../data-types/int-uint.md).
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
WITH toDateTime64('2019-09-16 19:20:12.345678910', 6) AS dt64
|
||||
```sql
|
||||
WITH toDateTime64('1970-01-15 06:56:07.891011', 6, 'UTC') AS dt64
|
||||
SELECT toUnixTimestamp64Micro(dt64);
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```response
|
||||
┌─toUnixTimestamp64Micro(dt64)─┐
|
||||
│ 1234567891011 │
|
||||
└──────────────────────────────┘
|
||||
```
|
||||
|
||||
## toUnixTimestamp64Nano
|
||||
|
||||
Converts a `DateTime64` to a `Int64` value with fixed nanosecond precision. The input value is scaled up or down appropriately depending on its precision.
|
||||
|
||||
:::note
|
||||
The output value is a timestamp in UTC, not in the timezone of `DateTime64`.
|
||||
:::
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
toUnixTimestamp64Nano(value)
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `value` — DateTime64 value with any precision. [DateTime64](../data-types/datetime64.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- `value` converted to the `Int64` data type. [Int64](../data-types/int-uint.md).
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
WITH toDateTime64('1970-01-01 00:20:34.567891011', 9, 'UTC') AS dt64
|
||||
SELECT toUnixTimestamp64Nano(dt64);
|
||||
```
|
||||
|
||||
@ -2477,34 +2534,32 @@ Result:
|
||||
|
||||
```response
|
||||
┌─toUnixTimestamp64Nano(dt64)─┐
|
||||
│ 1568650812345678000 │
|
||||
│ 1234567891011 │
|
||||
└─────────────────────────────┘
|
||||
```
|
||||
|
||||
## fromUnixTimestamp64Milli
|
||||
|
||||
## fromUnixTimestamp64Micro
|
||||
Converts an `Int64` to a `DateTime64` value with fixed millisecond precision and optional timezone. The input value is scaled up or down appropriately depending on its precision.
|
||||
|
||||
## fromUnixTimestamp64Nano
|
||||
|
||||
Converts an `Int64` to a `DateTime64` value with fixed sub-second precision and optional timezone. Input value is scaled up or down appropriately depending on it’s precision. Please note that input value is treated as UTC timestamp, not timestamp at given (or implicit) timezone.
|
||||
:::note
|
||||
Please note that input value is treated as a UTC timestamp, not timestamp at the given (or implicit) timezone.
|
||||
:::
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
fromUnixTimestamp64Milli(value[, timezone])
|
||||
fromUnixTimestamp64Micro(value[, timezone])
|
||||
fromUnixTimestamp64Nano(value[, timezone])
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `value` — `Int64` value with any precision.
|
||||
- `timezone` — `String` (optional) timezone name of the result.
|
||||
- `value` — value with any precision. [Int64](../data-types/int-uint.md).
|
||||
- `timezone` — (optional) timezone name of the result. [String](../data-types/string.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- `value` converted to the `DateTime64` data type.
|
||||
- `value` converted to DateTime64 with precision `3`. [DateTime64](../data-types/datetime64.md).
|
||||
|
||||
**Example**
|
||||
|
||||
@ -2512,15 +2567,101 @@ Query:
|
||||
|
||||
``` sql
|
||||
WITH CAST(1234567891011, 'Int64') AS i64
|
||||
SELECT fromUnixTimestamp64Milli(i64, 'UTC');
|
||||
SELECT
|
||||
fromUnixTimestamp64Milli(i64, 'UTC') AS x,
|
||||
toTypeName(x);
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```response
|
||||
┌─fromUnixTimestamp64Milli(i64, 'UTC')─┐
|
||||
│ 2009-02-13 23:31:31.011 │
|
||||
└──────────────────────────────────────┘
|
||||
┌───────────────────────x─┬─toTypeName(x)────────┐
|
||||
│ 2009-02-13 23:31:31.011 │ DateTime64(3, 'UTC') │
|
||||
└─────────────────────────┴──────────────────────┘
|
||||
```
|
||||
|
||||
## fromUnixTimestamp64Micro
|
||||
|
||||
Converts an `Int64` to a `DateTime64` value with fixed microsecond precision and optional timezone. The input value is scaled up or down appropriately depending on its precision.
|
||||
|
||||
:::note
|
||||
Please note that input value is treated as a UTC timestamp, not timestamp at the given (or implicit) timezone.
|
||||
:::
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
fromUnixTimestamp64Micro(value[, timezone])
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `value` — value with any precision. [Int64](../data-types/int-uint.md).
|
||||
- `timezone` — (optional) timezone name of the result. [String](../data-types/string.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- `value` converted to DateTime64 with precision `6`. [DateTime64](../data-types/datetime64.md).
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
WITH CAST(1234567891011, 'Int64') AS i64
|
||||
SELECT
|
||||
fromUnixTimestamp64Micro(i64, 'UTC') AS x,
|
||||
toTypeName(x);
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```response
|
||||
┌──────────────────────────x─┬─toTypeName(x)────────┐
|
||||
│ 1970-01-15 06:56:07.891011 │ DateTime64(6, 'UTC') │
|
||||
└────────────────────────────┴──────────────────────┘
|
||||
```
|
||||
|
||||
## fromUnixTimestamp64Nano
|
||||
|
||||
Converts an `Int64` to a `DateTime64` value with fixed nanosecond precision and optional timezone. The input value is scaled up or down appropriately depending on its precision.
|
||||
|
||||
:::note
|
||||
Please note that input value is treated as a UTC timestamp, not timestamp at the given (or implicit) timezone.
|
||||
:::
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
fromUnixTimestamp64Nano(value[, timezone])
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `value` — value with any precision. [Int64](../data-types/int-uint.md).
|
||||
- `timezone` — (optional) timezone name of the result. [String](../data-types/string.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- `value` converted to DateTime64 with precision `9`. [DateTime64](../data-types/datetime64.md).
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
WITH CAST(1234567891011, 'Int64') AS i64
|
||||
SELECT
|
||||
fromUnixTimestamp64Nano(i64, 'UTC') AS x,
|
||||
toTypeName(x);
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```response
|
||||
┌─────────────────────────────x─┬─toTypeName(x)────────┐
|
||||
│ 1970-01-01 00:20:34.567891011 │ DateTime64(9, 'UTC') │
|
||||
└───────────────────────────────┴──────────────────────┘
|
||||
```
|
||||
|
||||
## formatRow
|
||||
|
@ -16,7 +16,7 @@ Most `ALTER TABLE` queries modify table settings or data:
|
||||
- [INDEX](/docs/en/sql-reference/statements/alter/skipping-index.md)
|
||||
- [CONSTRAINT](/docs/en/sql-reference/statements/alter/constraint.md)
|
||||
- [TTL](/docs/en/sql-reference/statements/alter/ttl.md)
|
||||
- [STATISTIC](/docs/en/sql-reference/statements/alter/statistic.md)
|
||||
- [STATISTICS](/docs/en/sql-reference/statements/alter/statistics.md)
|
||||
- [APPLY DELETED MASK](/docs/en/sql-reference/statements/alter/apply-deleted-mask.md)
|
||||
|
||||
:::note
|
||||
|
@ -139,7 +139,7 @@ For the query to run successfully, the following conditions must be met:
|
||||
ALTER TABLE table2 [ON CLUSTER cluster] REPLACE PARTITION partition_expr FROM table1
|
||||
```
|
||||
|
||||
This query copies the data partition from the `table1` to `table2` and replaces existing partition in the `table2`.
|
||||
This query copies the data partition from `table1` to `table2` and replaces the existing partition in `table2`. The operation is atomic.
|
||||
|
||||
Note that:
|
||||
|
||||
|
@ -1,25 +0,0 @@
|
||||
---
|
||||
slug: /en/sql-reference/statements/alter/statistic
|
||||
sidebar_position: 45
|
||||
sidebar_label: STATISTIC
|
||||
---
|
||||
|
||||
# Manipulating Column Statistics
|
||||
|
||||
The following operations are available:
|
||||
|
||||
- `ALTER TABLE [db].table ADD STATISTIC (columns list) TYPE type` - Adds statistic description to tables metadata.
|
||||
|
||||
- `ALTER TABLE [db].table DROP STATISTIC (columns list) TYPE type` - Removes statistic description from tables metadata and deletes statistic files from disk.
|
||||
|
||||
- `ALTER TABLE [db].table CLEAR STATISTIC (columns list) TYPE type` - Deletes statistic files from disk.
|
||||
|
||||
- `ALTER TABLE [db.]table MATERIALIZE STATISTIC (columns list) TYPE type` - Rebuilds the statistic for columns. Implemented as a [mutation](../../../sql-reference/statements/alter/index.md#mutations).
|
||||
|
||||
The first two commands are lightweight in a sense that they only change metadata or remove files.
|
||||
|
||||
Also, they are replicated, syncing statistics metadata via ZooKeeper.
|
||||
|
||||
:::note
|
||||
Statistic manipulation is supported only for tables with [`*MergeTree`](../../../engines/table-engines/mergetree-family/mergetree.md) engine (including [replicated](../../../engines/table-engines/mergetree-family/replication.md) variants).
|
||||
:::
|
33
docs/en/sql-reference/statements/alter/statistics.md
Normal file
33
docs/en/sql-reference/statements/alter/statistics.md
Normal file
@ -0,0 +1,33 @@
|
||||
---
|
||||
slug: /en/sql-reference/statements/alter/statistics
|
||||
sidebar_position: 45
|
||||
sidebar_label: STATISTICS
|
||||
---
|
||||
|
||||
# Manipulating Column Statistics
|
||||
|
||||
The following operations are available:
|
||||
|
||||
- `ALTER TABLE [db].table ADD STATISTICS (columns list) TYPE (type list)` - Adds statistic description to tables metadata.
|
||||
|
||||
- `ALTER TABLE [db].table MODIFY STATISTICS (columns list) TYPE (type list)` - Modifies statistic description to tables metadata.
|
||||
|
||||
- `ALTER TABLE [db].table DROP STATISTICS (columns list)` - Removes statistics from the metadata of the specified columns and deletes all statistics objects in all parts for the specified columns.
|
||||
|
||||
- `ALTER TABLE [db].table CLEAR STATISTICS (columns list)` - Deletes all statistics objects in all parts for the specified columns. Statistics objects can be rebuild using `ALTER TABLE MATERIALIZE STATISTICS`.
|
||||
|
||||
- `ALTER TABLE [db.]table MATERIALIZE STATISTICS (columns list)` - Rebuilds the statistic for columns. Implemented as a [mutation](../../../sql-reference/statements/alter/index.md#mutations).
|
||||
|
||||
The first two commands are lightweight in a sense that they only change metadata or remove files.
|
||||
|
||||
Also, they are replicated, syncing statistics metadata via ZooKeeper.
|
||||
|
||||
There is an example adding two statistics types to two columns:
|
||||
|
||||
```
|
||||
ALTER TABLE t1 MODIFY STATISTICS c, d TYPE TDigest, Uniq;
|
||||
```
|
||||
|
||||
:::note
|
||||
Statistic manipulation is supported only for tables with [`*MergeTree`](../../../engines/table-engines/mergetree-family/mergetree.md) engine (including [replicated](../../../engines/table-engines/mergetree-family/replication.md) variants).
|
||||
:::
|
@ -51,10 +51,11 @@ enum class AccessType : uint8_t
|
||||
M(ALTER_CLEAR_INDEX, "CLEAR INDEX", TABLE, ALTER_INDEX) \
|
||||
M(ALTER_INDEX, "INDEX", GROUP, ALTER_TABLE) /* allows to execute ALTER ORDER BY or ALTER {ADD|DROP...} INDEX */\
|
||||
\
|
||||
M(ALTER_ADD_STATISTIC, "ALTER ADD STATISTIC", TABLE, ALTER_STATISTIC) \
|
||||
M(ALTER_DROP_STATISTIC, "ALTER DROP STATISTIC", TABLE, ALTER_STATISTIC) \
|
||||
M(ALTER_MATERIALIZE_STATISTIC, "ALTER MATERIALIZE STATISTIC", TABLE, ALTER_STATISTIC) \
|
||||
M(ALTER_STATISTIC, "STATISTIC", GROUP, ALTER_TABLE) /* allows to execute ALTER STATISTIC */\
|
||||
M(ALTER_ADD_STATISTICS, "ALTER ADD STATISTIC", TABLE, ALTER_STATISTICS) \
|
||||
M(ALTER_DROP_STATISTICS, "ALTER DROP STATISTIC", TABLE, ALTER_STATISTICS) \
|
||||
M(ALTER_MODIFY_STATISTICS, "ALTER MODIFY STATISTIC", TABLE, ALTER_STATISTICS) \
|
||||
M(ALTER_MATERIALIZE_STATISTICS, "ALTER MATERIALIZE STATISTIC", TABLE, ALTER_STATISTICS) \
|
||||
M(ALTER_STATISTICS, "STATISTIC", GROUP, ALTER_TABLE) /* allows to execute ALTER STATISTIC */\
|
||||
\
|
||||
M(ALTER_ADD_PROJECTION, "ADD PROJECTION", TABLE, ALTER_PROJECTION) \
|
||||
M(ALTER_DROP_PROJECTION, "DROP PROJECTION", TABLE, ALTER_PROJECTION) \
|
||||
|
@ -334,6 +334,18 @@ public:
|
||||
compress(); // Allows reading/writing TDigests with different epsilon/max_centroids params
|
||||
}
|
||||
|
||||
Float64 getCountEqual(Float64 value) const
|
||||
{
|
||||
Float64 result = 0;
|
||||
for (const auto & c : centroids)
|
||||
{
|
||||
/// std::cerr << "c "<< c.mean << " "<< c.count << std::endl;
|
||||
if (value == c.mean)
|
||||
result += c.count;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
Float64 getCountLessThan(Float64 value) const
|
||||
{
|
||||
bool first = true;
|
||||
|
@ -1,5 +1,6 @@
|
||||
#include <Analyzer/ArrayJoinNode.h>
|
||||
#include <Analyzer/ColumnNode.h>
|
||||
#include <Analyzer/FunctionNode.h>
|
||||
#include <Analyzer/Utils.h>
|
||||
#include <IO/Operators.h>
|
||||
#include <IO/WriteBuffer.h>
|
||||
@ -64,7 +65,12 @@ ASTPtr ArrayJoinNode::toASTImpl(const ConvertToASTOptions & options) const
|
||||
|
||||
auto * column_node = array_join_expression->as<ColumnNode>();
|
||||
if (column_node && column_node->getExpression())
|
||||
array_join_expression_ast = column_node->getExpression()->toAST(options);
|
||||
{
|
||||
if (const auto * function_node = column_node->getExpression()->as<FunctionNode>(); function_node && function_node->getFunctionName() == "nested")
|
||||
array_join_expression_ast = array_join_expression->toAST(options);
|
||||
else
|
||||
array_join_expression_ast = column_node->getExpression()->toAST(options);
|
||||
}
|
||||
else
|
||||
array_join_expression_ast = array_join_expression->toAST(options);
|
||||
|
||||
|
@ -22,6 +22,7 @@ public:
|
||||
|
||||
if (query_node->hasOrderBy())
|
||||
{
|
||||
QueryTreeNodeConstRawPtrWithHashSet unique_expressions_nodes_set;
|
||||
QueryTreeNodes result_nodes;
|
||||
|
||||
auto & query_order_by_nodes = query_node->getOrderBy().getNodes();
|
||||
@ -45,10 +46,9 @@ public:
|
||||
query_order_by_nodes = std::move(result_nodes);
|
||||
}
|
||||
|
||||
unique_expressions_nodes_set.clear();
|
||||
|
||||
if (query_node->hasLimitBy())
|
||||
{
|
||||
QueryTreeNodeConstRawPtrWithHashSet unique_expressions_nodes_set;
|
||||
QueryTreeNodes result_nodes;
|
||||
|
||||
auto & query_limit_by_nodes = query_node->getLimitBy().getNodes();
|
||||
@ -63,9 +63,6 @@ public:
|
||||
query_limit_by_nodes = std::move(result_nodes);
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
QueryTreeNodeConstRawPtrWithHashSet unique_expressions_nodes_set;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -57,6 +57,7 @@ AllocationTrace CurrentMemoryTracker::allocImpl(Int64 size, bool throw_if_memory
|
||||
{
|
||||
auto res = memory_tracker->allocImpl(will_be, throw_if_memory_exceeded);
|
||||
current_thread->untracked_memory = 0;
|
||||
current_thread->updateUntrackedMemoryLimit(memory_tracker->get());
|
||||
return res;
|
||||
}
|
||||
else
|
||||
@ -84,6 +85,13 @@ void CurrentMemoryTracker::check()
|
||||
std::ignore = memory_tracker->allocImpl(0, true);
|
||||
}
|
||||
|
||||
Int64 CurrentMemoryTracker::get()
|
||||
{
|
||||
if (auto * memory_tracker = getMemoryTracker())
|
||||
return memory_tracker->get();
|
||||
return 0;
|
||||
}
|
||||
|
||||
AllocationTrace CurrentMemoryTracker::alloc(Int64 size)
|
||||
{
|
||||
bool throw_if_memory_exceeded = true;
|
||||
@ -103,10 +111,12 @@ AllocationTrace CurrentMemoryTracker::free(Int64 size)
|
||||
if (current_thread)
|
||||
{
|
||||
current_thread->untracked_memory -= size;
|
||||
if (current_thread->untracked_memory < -current_thread->untracked_memory_limit)
|
||||
// Note that we use `max_untracked_memory` and not `untracked_memory_limit` to create hysteresis to avoid track/untrack cycles
|
||||
if (current_thread->untracked_memory < -current_thread->max_untracked_memory)
|
||||
{
|
||||
Int64 untracked_memory = current_thread->untracked_memory;
|
||||
current_thread->untracked_memory = 0;
|
||||
current_thread->updateUntrackedMemoryLimit(memory_tracker->get() + untracked_memory);
|
||||
return memory_tracker->free(-untracked_memory);
|
||||
}
|
||||
}
|
||||
|
@ -12,7 +12,9 @@ struct CurrentMemoryTracker
|
||||
|
||||
/// This function should be called after memory deallocation.
|
||||
[[nodiscard]] static AllocationTrace free(Int64 size);
|
||||
|
||||
static void check();
|
||||
[[nodiscard]] static Int64 get();
|
||||
|
||||
/// Throws MEMORY_LIMIT_EXCEEDED (if it's allowed to throw exceptions)
|
||||
static void injectFault();
|
||||
|
@ -586,7 +586,7 @@
|
||||
M(705, TABLE_NOT_EMPTY) \
|
||||
M(706, LIBSSH_ERROR) \
|
||||
M(707, GCP_ERROR) \
|
||||
M(708, ILLEGAL_STATISTIC) \
|
||||
M(708, ILLEGAL_STATISTICS) \
|
||||
M(709, CANNOT_GET_REPLICATED_DATABASE_SNAPSHOT) \
|
||||
M(710, FAULT_INJECTED) \
|
||||
M(711, FILECACHE_ACCESS_DENIED) \
|
||||
|
@ -15,6 +15,7 @@ struct MemoryTrackerSwitcher
|
||||
return;
|
||||
|
||||
auto * thread_tracker = CurrentThread::getMemoryTracker();
|
||||
|
||||
prev_untracked_memory = current_thread->untracked_memory;
|
||||
prev_memory_tracker_parent = thread_tracker->getParent();
|
||||
|
||||
@ -31,8 +32,10 @@ struct MemoryTrackerSwitcher
|
||||
CurrentThread::flushUntrackedMemory();
|
||||
auto * thread_tracker = CurrentThread::getMemoryTracker();
|
||||
|
||||
current_thread->untracked_memory = prev_untracked_memory;
|
||||
/// It is important to set untracked memory after the call of
|
||||
/// 'setParent' because it may flush untracked memory to the wrong parent.
|
||||
thread_tracker->setParent(prev_memory_tracker_parent);
|
||||
current_thread->untracked_memory = prev_untracked_memory;
|
||||
}
|
||||
|
||||
private:
|
||||
|
@ -140,6 +140,18 @@ inline bool isPrintableASCII(char c)
|
||||
return uc >= 32 && uc <= 126; /// 127 is ASCII DEL.
|
||||
}
|
||||
|
||||
inline bool isCSIParameterByte(char c)
|
||||
{
|
||||
uint8_t uc = c;
|
||||
return uc >= 0x30 && uc <= 0x3F; /// ASCII 0–9:;<=>?
|
||||
}
|
||||
|
||||
inline bool isCSIIntermediateByte(char c)
|
||||
{
|
||||
uint8_t uc = c;
|
||||
return uc >= 0x20 && uc <= 0x2F; /// ASCII !"#$%&'()*+,-./
|
||||
}
|
||||
|
||||
inline bool isCSIFinalByte(char c)
|
||||
{
|
||||
uint8_t uc = c;
|
||||
|
@ -183,6 +183,12 @@ public:
|
||||
Int64 untracked_memory = 0;
|
||||
/// Each thread could new/delete memory in range of (-untracked_memory_limit, untracked_memory_limit) without access to common counters.
|
||||
Int64 untracked_memory_limit = 4 * 1024 * 1024;
|
||||
/// To keep total untracked memory limited to `untracked_memory_ratio * RSS` we have to account threads with small and large memory footprint differently.
|
||||
/// For this purpose we dynamically change `untracked_memory_limit` after every tracking event using a simple formula:
|
||||
/// untracked_memory_limit = clamp(untracked_memory_ratio * cur_memory_bytes, min_untracked_memory, max_untracked_memory)
|
||||
/// Note that this values are updated when thread is attached to a group
|
||||
Int64 min_untracked_memory = 4 * 1024 * 1024; // Default value is kept 4MB mostly for tests and client (should be changed to 4KB as default value a setting)
|
||||
Int64 max_untracked_memory = 4 * 1024 * 1024;
|
||||
|
||||
/// Statistics of read and write rows/bytes
|
||||
Progress progress_in;
|
||||
@ -309,6 +315,12 @@ public:
|
||||
|
||||
void initGlobalProfiler(UInt64 global_profiler_real_time_period, UInt64 global_profiler_cpu_time_period);
|
||||
|
||||
void updateUntrackedMemoryLimit(Int64 current)
|
||||
{
|
||||
constexpr Int64 untracked_memory_ratio_bits = 4; // untracked_memory_ratio = 1.0 / (1 << untracked_memory_ratio_bits) = 1.0 / 16 = 6.25%
|
||||
untracked_memory_limit = std::clamp<Int64>(current >> untracked_memory_ratio_bits, min_untracked_memory, max_untracked_memory);
|
||||
}
|
||||
|
||||
private:
|
||||
void applyGlobalSettings();
|
||||
void applyQuerySettings();
|
||||
|
@ -103,7 +103,7 @@ template <ComputeWidthMode mode>
|
||||
size_t computeWidthImpl(const UInt8 * data, size_t size, size_t prefix, size_t limit) noexcept
|
||||
{
|
||||
UTF8Decoder decoder;
|
||||
int isEscapeSequence = false;
|
||||
bool is_escape_sequence = false;
|
||||
size_t width = 0;
|
||||
size_t rollback = 0;
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
@ -116,6 +116,9 @@ size_t computeWidthImpl(const UInt8 * data, size_t size, size_t prefix, size_t l
|
||||
|
||||
while (i + 15 < size)
|
||||
{
|
||||
if (is_escape_sequence)
|
||||
break;
|
||||
|
||||
__m128i bytes = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&data[i]));
|
||||
|
||||
const uint16_t non_regular_width_mask = _mm_movemask_epi8(
|
||||
@ -132,25 +135,28 @@ size_t computeWidthImpl(const UInt8 * data, size_t size, size_t prefix, size_t l
|
||||
}
|
||||
else
|
||||
{
|
||||
if (isEscapeSequence)
|
||||
{
|
||||
break;
|
||||
}
|
||||
else
|
||||
{
|
||||
i += 16;
|
||||
width += 16;
|
||||
}
|
||||
i += 16;
|
||||
width += 16;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
while (i < size && isPrintableASCII(data[i]))
|
||||
{
|
||||
if (!isEscapeSequence)
|
||||
bool ignore_width = is_escape_sequence && (isCSIParameterByte(data[i]) || isCSIIntermediateByte(data[i]));
|
||||
|
||||
if (ignore_width || (data[i] == '[' && is_escape_sequence))
|
||||
{
|
||||
/// don't count the width
|
||||
}
|
||||
else if (is_escape_sequence && isCSIFinalByte(data[i]))
|
||||
{
|
||||
is_escape_sequence = false;
|
||||
}
|
||||
else
|
||||
{
|
||||
++width;
|
||||
else if (isCSIFinalByte(data[i]) && data[i - 1] != '\x1b')
|
||||
isEscapeSequence = false; /// end of CSI escape sequence reached
|
||||
}
|
||||
++i;
|
||||
}
|
||||
|
||||
@ -178,7 +184,7 @@ size_t computeWidthImpl(const UInt8 * data, size_t size, size_t prefix, size_t l
|
||||
// special treatment for '\t' and for ESC
|
||||
size_t next_width = width;
|
||||
if (decoder.codepoint == '\x1b')
|
||||
isEscapeSequence = true;
|
||||
is_escape_sequence = true;
|
||||
else if (decoder.codepoint == '\t')
|
||||
next_width += 8 - (prefix + width) % 8;
|
||||
else
|
||||
|
@ -160,8 +160,8 @@ class IColumn;
|
||||
M(Bool, enable_multiple_prewhere_read_steps, true, "Move more conditions from WHERE to PREWHERE and do reads from disk and filtering in multiple steps if there are multiple conditions combined with AND", 0) \
|
||||
M(Bool, move_primary_key_columns_to_end_of_prewhere, true, "Move PREWHERE conditions containing primary key columns to the end of AND chain. It is likely that these conditions are taken into account during primary key analysis and thus will not contribute a lot to PREWHERE filtering.", 0) \
|
||||
\
|
||||
M(Bool, allow_statistic_optimize, false, "Allows using statistic to optimize queries", 0) \
|
||||
M(Bool, allow_experimental_statistic, false, "Allows using statistic", 0) \
|
||||
M(Bool, allow_statistics_optimize, false, "Allows using statistics to optimize queries", 0) ALIAS(allow_statistic_optimize) \
|
||||
M(Bool, allow_experimental_statistics, false, "Allows using statistics", 0) ALIAS(allow_experimental_statistic) \
|
||||
\
|
||||
M(UInt64, alter_sync, 1, "Wait for actions to manipulate the partitions. 0 - do not wait, 1 - wait for execution only of itself, 2 - wait for everyone.", 0) ALIAS(replication_alter_partitions_sync) \
|
||||
M(Int64, replication_wait_for_inactive_replica_timeout, 120, "Wait for inactive replica to execute ALTER/OPTIMIZE. Time in seconds, 0 - do not wait, negative - wait for unlimited time.", 0) \
|
||||
@ -491,6 +491,7 @@ class IColumn;
|
||||
M(UInt64, max_memory_usage_for_user, 0, "Maximum memory usage for processing all concurrently running queries for the user. Zero means unlimited.", 0) \
|
||||
M(UInt64, memory_overcommit_ratio_denominator_for_user, 1_GiB, "It represents soft memory limit on the global level. This value is used to compute query overcommit ratio.", 0) \
|
||||
M(UInt64, max_untracked_memory, (4 * 1024 * 1024), "Small allocations and deallocations are grouped in thread local variable and tracked or profiled only when amount (in absolute value) becomes larger than specified value. If the value is higher than 'memory_profiler_step' it will be effectively lowered to 'memory_profiler_step'.", 0) \
|
||||
M(UInt64, min_untracked_memory, (4 * 1024), "Lower bound for untracked memory limit which is applied to threads with low memory consumption. Untracked memory limit equals thread_memory_usage/16 and clamped between min_untracked_memory and max_untracked_memory for every thread.", 0) \
|
||||
M(UInt64, memory_profiler_step, (4 * 1024 * 1024), "Whenever query memory usage becomes larger than every next step in number of bytes the memory profiler will collect the allocating stack trace. Zero means disabled memory profiler. Values lower than a few megabytes will slow down query processing.", 0) \
|
||||
M(Float, memory_profiler_sample_probability, 0., "Collect random allocations and deallocations and write them into system.trace_log with 'MemorySample' trace_type. The probability is for every alloc/free regardless to the size of the allocation (can be changed with `memory_profiler_sample_min_allocation_size` and `memory_profiler_sample_max_allocation_size`). Note that sampling happens only when the amount of untracked memory exceeds 'max_untracked_memory'. You may want to set 'max_untracked_memory' to 0 for extra fine grained sampling.", 0) \
|
||||
M(UInt64, memory_profiler_sample_min_allocation_size, 0, "Collect random allocations of size greater or equal than specified value with probability equal to `memory_profiler_sample_probability`. 0 means disabled. You may want to set 'max_untracked_memory' to 0 to make this threshold to work as expected.", 0) \
|
||||
@ -891,6 +892,7 @@ class IColumn;
|
||||
M(Bool, geo_distance_returns_float64_on_float64_arguments, true, "If all four arguments to `geoDistance`, `greatCircleDistance`, `greatCircleAngle` functions are Float64, return Float64 and use double precision for internal calculations. In previous ClickHouse versions, the functions always returned Float32.", 0) \
|
||||
M(Bool, allow_get_client_http_header, false, "Allow to use the function `getClientHTTPHeader` which lets to obtain a value of an the current HTTP request's header. It is not enabled by default for security reasons, because some headers, such as `Cookie`, could contain sensitive info. Note that the `X-ClickHouse-*` and `Authentication` headers are always restricted and cannot be obtained with this function.", 0) \
|
||||
M(Bool, cast_string_to_dynamic_use_inference, false, "Use types inference during String to Dynamic conversion", 0) \
|
||||
M(Bool, enable_blob_storage_log, true, "Write information about blob storage operations to system.blob_storage_log table", 0) \
|
||||
\
|
||||
/** Experimental functions */ \
|
||||
M(Bool, allow_experimental_materialized_postgresql_table, false, "Allows to use the MaterializedPostgreSQL table engine. Disabled by default, because this feature is experimental", 0) \
|
||||
|
@ -83,7 +83,7 @@ namespace SettingsChangesHistory
|
||||
/// For newly added setting choose the most appropriate previous_value (for example, if new setting
|
||||
/// controls new feature and it's 'true' by default, use 'false' as previous_value).
|
||||
/// It's used to implement `compatibility` setting (see https://github.com/ClickHouse/ClickHouse/issues/35972)
|
||||
static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> settings_changes_history =
|
||||
static const std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> settings_changes_history =
|
||||
{
|
||||
{"24.6", {{"materialize_skip_indexes_on_insert", true, true, "Added new setting to allow to disable materialization of skip indexes on insert"},
|
||||
{"materialize_statistics_on_insert", true, true, "Added new setting to allow to disable materialization of statistics on insert"},
|
||||
@ -96,6 +96,12 @@ static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> sett
|
||||
{"hdfs_ignore_file_doesnt_exist", false, false, "Allow to return 0 rows when the requested files don't exist instead of throwing an exception in HDFS table engine"},
|
||||
{"azure_ignore_file_doesnt_exist", false, false, "Allow to return 0 rows when the requested files don't exist instead of throwing an exception in AzureBlobStorage table engine"},
|
||||
{"s3_ignore_file_doesnt_exist", false, false, "Allow to return 0 rows when the requested files don't exist instead of throwing an exception in S3 table engine"},
|
||||
{"min_untracked_memory", 4_MiB, 4_KiB, "A new setting to enable more accurate memory tracking."},
|
||||
{"enable_blob_storage_log", true, true, "Write information about blob storage operations to system.blob_storage_log table"},
|
||||
{"allow_statistic_optimize", false, false, "Old setting which popped up here being renamed."},
|
||||
{"allow_experimental_statistic", false, false, "Old setting which popped up here being renamed."},
|
||||
{"allow_statistics_optimize", false, false, "The setting was renamed. The previous name is `allow_statistic_optimize`."},
|
||||
{"allow_experimental_statistics", false, false, "The setting was renamed. The previous name is `allow_experimental_statistic`."}
|
||||
}},
|
||||
{"24.5", {{"allow_deprecated_error_prone_window_functions", true, false, "Allow usage of deprecated error prone window functions (neighbor, runningAccumulate, runningDifferenceStartingWithFirstValue, runningDifference)"},
|
||||
{"allow_experimental_join_condition", false, false, "Support join with inequal conditions which involve columns from both left and right table. e.g. t1.y < t2.y."},
|
||||
|
@ -543,6 +543,7 @@ template <typename DataType> constexpr bool IsDataTypeNumber = false;
|
||||
template <typename DataType> constexpr bool IsDataTypeDateOrDateTime = false;
|
||||
template <typename DataType> constexpr bool IsDataTypeDate = false;
|
||||
template <typename DataType> constexpr bool IsDataTypeEnum = false;
|
||||
template <typename DataType> constexpr bool IsDataTypeStringOrFixedString = false;
|
||||
|
||||
template <typename DataType> constexpr bool IsDataTypeDecimalOrNumber = IsDataTypeDecimal<DataType> || IsDataTypeNumber<DataType>;
|
||||
|
||||
@ -556,6 +557,8 @@ class DataTypeDate;
|
||||
class DataTypeDate32;
|
||||
class DataTypeDateTime;
|
||||
class DataTypeDateTime64;
|
||||
class DataTypeString;
|
||||
class DataTypeFixedString;
|
||||
|
||||
template <is_decimal T> constexpr bool IsDataTypeDecimal<DataTypeDecimal<T>> = true;
|
||||
|
||||
@ -572,6 +575,9 @@ template <> inline constexpr bool IsDataTypeDateOrDateTime<DataTypeDate32> = tru
|
||||
template <> inline constexpr bool IsDataTypeDateOrDateTime<DataTypeDateTime> = true;
|
||||
template <> inline constexpr bool IsDataTypeDateOrDateTime<DataTypeDateTime64> = true;
|
||||
|
||||
template <> inline constexpr bool IsDataTypeStringOrFixedString<DataTypeString> = true;
|
||||
template <> inline constexpr bool IsDataTypeStringOrFixedString<DataTypeFixedString> = true;
|
||||
|
||||
template <typename T>
|
||||
class DataTypeEnum;
|
||||
|
||||
|
@ -1,20 +1,21 @@
|
||||
#include <filesystem>
|
||||
#include <Databases/DatabaseAtomic.h>
|
||||
#include <Databases/DatabaseFactory.h>
|
||||
#include <Databases/DatabaseOnDisk.h>
|
||||
#include <Databases/DatabaseReplicated.h>
|
||||
#include <Databases/DatabaseFactory.h>
|
||||
#include <IO/ReadBufferFromFile.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <IO/ReadBufferFromFile.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Interpreters/DDLTask.h>
|
||||
#include <Interpreters/DatabaseCatalog.h>
|
||||
#include <Interpreters/ExternalDictionariesLoader.h>
|
||||
#include <Parsers/formatAST.h>
|
||||
#include <Storages/StorageMaterializedView.h>
|
||||
#include "Common/logger_useful.h"
|
||||
#include <Common/PoolId.h>
|
||||
#include <Common/atomicRename.h>
|
||||
#include <Common/filesystemHelpers.h>
|
||||
#include <Storages/StorageMaterializedView.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Interpreters/DatabaseCatalog.h>
|
||||
#include <Interpreters/ExternalDictionariesLoader.h>
|
||||
#include <filesystem>
|
||||
#include <Interpreters/DDLTask.h>
|
||||
|
||||
namespace fs = std::filesystem;
|
||||
|
||||
@ -393,6 +394,7 @@ DatabaseAtomic::DetachedTables DatabaseAtomic::cleanupDetachedTables()
|
||||
{
|
||||
DetachedTables not_in_use;
|
||||
auto it = detached_tables.begin();
|
||||
LOG_DEBUG(log, "There are {} detached tables. Start searching non used tables.", detached_tables.size());
|
||||
while (it != detached_tables.end())
|
||||
{
|
||||
if (it->second.unique())
|
||||
@ -403,6 +405,7 @@ DatabaseAtomic::DetachedTables DatabaseAtomic::cleanupDetachedTables()
|
||||
else
|
||||
++it;
|
||||
}
|
||||
LOG_DEBUG(log, "Found {} non used tables in detached tables.", not_in_use.size());
|
||||
/// It should be destroyed in caller with released database mutex
|
||||
return not_in_use;
|
||||
}
|
||||
|
@ -670,7 +670,7 @@ void DatabaseOnDisk::iterateMetadataFiles(ContextPtr local_context, const Iterat
|
||||
for (auto it = metadata_files.begin(); it < metadata_files.end(); std::advance(it, batch_size))
|
||||
{
|
||||
std::span batch{it, std::min(std::next(it, batch_size), metadata_files.end())};
|
||||
pool.scheduleOrThrowOnError(
|
||||
pool.scheduleOrThrow(
|
||||
[batch, &process_metadata_file, &process_tmp_drop_metadata_file]() mutable
|
||||
{
|
||||
setThreadName("DatabaseOnDisk");
|
||||
@ -679,7 +679,7 @@ void DatabaseOnDisk::iterateMetadataFiles(ContextPtr local_context, const Iterat
|
||||
process_metadata_file(file.first);
|
||||
else
|
||||
process_tmp_drop_metadata_file(file.first);
|
||||
});
|
||||
}, Priority{}, getContext()->getSettingsRef().lock_acquire_timeout.totalMicroseconds());
|
||||
}
|
||||
pool.wait();
|
||||
}
|
||||
|
@ -44,6 +44,7 @@ namespace ErrorCodes
|
||||
extern const int LOGICAL_ERROR;
|
||||
extern const int UNKNOWN_DATABASE_ENGINE;
|
||||
extern const int NOT_IMPLEMENTED;
|
||||
extern const int UNEXPECTED_NODE_IN_ZOOKEEPER;
|
||||
}
|
||||
|
||||
static constexpr size_t METADATA_FILE_BUFFER_SIZE = 32768;
|
||||
@ -76,6 +77,20 @@ static void setReplicatedEngine(ASTCreateQuery * create_query, ContextPtr contex
|
||||
String replica_path = server_settings.default_replica_path;
|
||||
String replica_name = server_settings.default_replica_name;
|
||||
|
||||
/// Check that replica path doesn't exist
|
||||
Macros::MacroExpansionInfo info;
|
||||
StorageID table_id = StorageID(create_query->getDatabase(), create_query->getTable(), create_query->uuid);
|
||||
info.table_id = table_id;
|
||||
info.expand_special_macros_only = false;
|
||||
|
||||
String zookeeper_path = context->getMacros()->expand(replica_path, info);
|
||||
if (context->getZooKeeper()->exists(zookeeper_path))
|
||||
throw Exception(
|
||||
ErrorCodes::UNEXPECTED_NODE_IN_ZOOKEEPER,
|
||||
"Found existing ZooKeeper path {} while trying to convert table {} to replicated. Table will not be converted.",
|
||||
zookeeper_path, backQuote(table_id.getFullTableName())
|
||||
);
|
||||
|
||||
auto args = std::make_shared<ASTExpressionList>();
|
||||
args->children.push_back(std::make_shared<ASTLiteral>(replica_path));
|
||||
args->children.push_back(std::make_shared<ASTLiteral>(replica_name));
|
||||
|
@ -36,30 +36,24 @@ void IObjectStorageIteratorAsync::deactivate()
|
||||
void IObjectStorageIteratorAsync::nextBatch()
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
|
||||
if (is_finished)
|
||||
{
|
||||
current_batch.clear();
|
||||
current_batch_iterator = current_batch.begin();
|
||||
return;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (!is_initialized)
|
||||
{
|
||||
outcome_future = scheduleBatch();
|
||||
is_initialized = true;
|
||||
}
|
||||
|
||||
if (!is_initialized)
|
||||
{
|
||||
outcome_future = scheduleBatch();
|
||||
is_initialized = true;
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
chassert(outcome_future.valid());
|
||||
BatchAndHasNext result;
|
||||
try
|
||||
{
|
||||
result = outcome_future.get();
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
is_finished = true;
|
||||
throw;
|
||||
}
|
||||
BatchAndHasNext result = outcome_future.get();
|
||||
|
||||
current_batch = std::move(result.batch);
|
||||
current_batch_iterator = current_batch.begin();
|
||||
@ -71,6 +65,11 @@ void IObjectStorageIteratorAsync::nextBatch()
|
||||
else
|
||||
is_finished = true;
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
is_finished = true;
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
void IObjectStorageIteratorAsync::next()
|
||||
@ -95,35 +94,39 @@ std::future<IObjectStorageIteratorAsync::BatchAndHasNext> IObjectStorageIterator
|
||||
|
||||
bool IObjectStorageIteratorAsync::isValid()
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
|
||||
if (!is_initialized)
|
||||
nextBatch();
|
||||
|
||||
std::lock_guard lock(mutex);
|
||||
return current_batch_iterator != current_batch.end();
|
||||
}
|
||||
|
||||
RelativePathWithMetadataPtr IObjectStorageIteratorAsync::current()
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
|
||||
if (!isValid())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Trying to access invalid iterator");
|
||||
|
||||
std::lock_guard lock(mutex);
|
||||
return *current_batch_iterator;
|
||||
}
|
||||
|
||||
|
||||
RelativePathsWithMetadata IObjectStorageIteratorAsync::currentBatch()
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
|
||||
if (!isValid())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Trying to access invalid iterator");
|
||||
|
||||
std::lock_guard lock(mutex);
|
||||
return current_batch;
|
||||
}
|
||||
|
||||
std::optional<RelativePathsWithMetadata> IObjectStorageIteratorAsync::getCurrentBatchAndScheduleNext()
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
|
||||
if (!is_initialized)
|
||||
nextBatch();
|
||||
|
||||
|
@ -709,7 +709,7 @@ bool tryParseImpl(typename DataType::FieldType & x, ReadBuffer & rb, const DateL
|
||||
else
|
||||
return tryReadFloatTextFast(x, rb);
|
||||
}
|
||||
else /*if constexpr (is_integer_v<typename DataType::FieldType>)*/
|
||||
else /*if constexpr (is_integral_v<typename DataType::FieldType>)*/
|
||||
return tryReadIntText(x, rb);
|
||||
}
|
||||
|
||||
@ -814,6 +814,16 @@ enum class ConvertFromStringParsingMode : uint8_t
|
||||
BestEffortUS
|
||||
};
|
||||
|
||||
struct AccurateConvertStrategyAdditions
|
||||
{
|
||||
UInt32 scale { 0 };
|
||||
};
|
||||
|
||||
struct AccurateOrNullConvertStrategyAdditions
|
||||
{
|
||||
UInt32 scale { 0 };
|
||||
};
|
||||
|
||||
template <typename FromDataType, typename ToDataType, typename Name,
|
||||
ConvertFromStringExceptionMode exception_mode, ConvertFromStringParsingMode parsing_mode>
|
||||
struct ConvertThroughParsing
|
||||
@ -1020,7 +1030,13 @@ struct ConvertThroughParsing
|
||||
break;
|
||||
}
|
||||
}
|
||||
parseImpl<ToDataType>(vec_to[i], read_buffer, local_time_zone, precise_float_parsing);
|
||||
if constexpr (std::is_same_v<Additions, AccurateConvertStrategyAdditions>)
|
||||
{
|
||||
if (!tryParseImpl<ToDataType>(vec_to[i], read_buffer, local_time_zone, precise_float_parsing))
|
||||
throw Exception(ErrorCodes::CANNOT_PARSE_TEXT, "Cannot parse string to type {}", TypeName<typename ToDataType::FieldType>);
|
||||
}
|
||||
else
|
||||
parseImpl<ToDataType>(vec_to[i], read_buffer, local_time_zone, precise_float_parsing);
|
||||
} while (false);
|
||||
}
|
||||
}
|
||||
@ -1120,16 +1136,6 @@ struct ConvertThroughParsing
|
||||
/// Function toUnixTimestamp has exactly the same implementation as toDateTime of String type.
|
||||
struct NameToUnixTimestamp { static constexpr auto name = "toUnixTimestamp"; };
|
||||
|
||||
struct AccurateConvertStrategyAdditions
|
||||
{
|
||||
UInt32 scale { 0 };
|
||||
};
|
||||
|
||||
struct AccurateOrNullConvertStrategyAdditions
|
||||
{
|
||||
UInt32 scale { 0 };
|
||||
};
|
||||
|
||||
enum class BehaviourOnErrorFromString : uint8_t
|
||||
{
|
||||
ConvertDefaultBehaviorTag,
|
||||
@ -3174,8 +3180,11 @@ private:
|
||||
{
|
||||
TypeIndex from_type_index = from_type->getTypeId();
|
||||
WhichDataType which(from_type_index);
|
||||
TypeIndex to_type_index = to_type->getTypeId();
|
||||
WhichDataType to(to_type_index);
|
||||
bool can_apply_accurate_cast = (cast_type == CastType::accurate || cast_type == CastType::accurateOrNull)
|
||||
&& (which.isInt() || which.isUInt() || which.isFloat());
|
||||
can_apply_accurate_cast |= cast_type == CastType::accurate && which.isStringOrFixedString() && to.isNativeInteger();
|
||||
|
||||
FormatSettings::DateTimeOverflowBehavior date_time_overflow_behavior = default_date_time_overflow_behavior;
|
||||
if (context)
|
||||
@ -3260,6 +3269,20 @@ private:
|
||||
return true;
|
||||
}
|
||||
}
|
||||
else if constexpr (IsDataTypeStringOrFixedString<LeftDataType>)
|
||||
{
|
||||
if constexpr (IsDataTypeNumber<RightDataType>)
|
||||
{
|
||||
chassert(wrapper_cast_type == CastType::accurate);
|
||||
result_column = ConvertImpl<LeftDataType, RightDataType, FunctionCastName>::execute(
|
||||
arguments,
|
||||
result_type,
|
||||
input_rows_count,
|
||||
BehaviourOnErrorFromString::ConvertDefaultBehaviorTag,
|
||||
AccurateConvertStrategyAdditions());
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
});
|
||||
|
@ -61,7 +61,7 @@ public:
|
||||
return std::make_shared<DataTypeTuple>(tuple_arg_types);
|
||||
}
|
||||
|
||||
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t) const override
|
||||
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
|
||||
{
|
||||
const size_t num_arguments = arguments.size();
|
||||
Columns columns;
|
||||
@ -92,6 +92,9 @@ public:
|
||||
columns.push_back(inner_col);
|
||||
}
|
||||
|
||||
if (columns.empty())
|
||||
return ColumnTuple::create(input_rows_count);
|
||||
|
||||
return ColumnTuple::create(columns);
|
||||
}
|
||||
};
|
||||
|
@ -23,6 +23,9 @@ void BlobStorageLogWriter::addEvent(
|
||||
if (!log)
|
||||
return;
|
||||
|
||||
if (log->shouldIgnorePath(local_path_.empty() ? local_path : local_path_))
|
||||
return;
|
||||
|
||||
if (!time_now.time_since_epoch().count())
|
||||
time_now = std::chrono::system_clock::now();
|
||||
|
||||
|
@ -438,6 +438,12 @@ BlockIO InterpreterGrantQuery::execute()
|
||||
RolesOrUsersSet roles_to_revoke;
|
||||
collectRolesToGrantOrRevoke(access_control, query, roles_to_grant, roles_to_revoke);
|
||||
|
||||
/// Replacing empty database with the default. This step must be done before replication to avoid privilege escalation.
|
||||
String current_database = getContext()->getCurrentDatabase();
|
||||
elements_to_grant.replaceEmptyDatabase(current_database);
|
||||
elements_to_revoke.replaceEmptyDatabase(current_database);
|
||||
query.access_rights_elements.replaceEmptyDatabase(current_database);
|
||||
|
||||
/// Executing on cluster.
|
||||
if (!query.cluster.empty())
|
||||
{
|
||||
@ -453,9 +459,6 @@ BlockIO InterpreterGrantQuery::execute()
|
||||
}
|
||||
|
||||
/// Check if the current user has corresponding access rights granted with grant option.
|
||||
String current_database = getContext()->getCurrentDatabase();
|
||||
elements_to_grant.replaceEmptyDatabase(current_database);
|
||||
elements_to_revoke.replaceEmptyDatabase(current_database);
|
||||
bool need_check_grantees_are_allowed = true;
|
||||
if (!query.current_grants)
|
||||
checkGrantOption(access_control, *current_user_access, grantees, need_check_grantees_are_allowed, elements_to_grant, elements_to_revoke);
|
||||
|
@ -9,6 +9,8 @@
|
||||
#include <DataTypes/DataTypeLowCardinality.h>
|
||||
#include <DataTypes/DataTypeDate.h>
|
||||
|
||||
#include <Storages/IStorage.h>
|
||||
#include <Storages/MergeTree/MergeTreeData.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -69,4 +71,32 @@ void BlobStorageLogElement::appendToBlock(MutableColumns & columns) const
|
||||
columns[i++]->insert(error_message);
|
||||
}
|
||||
|
||||
void BlobStorageLog::addSettingsForQuery(ContextMutablePtr & mutable_context, IAST::QueryKind query_kind) const
|
||||
{
|
||||
SystemLog<BlobStorageLogElement>::addSettingsForQuery(mutable_context, query_kind);
|
||||
|
||||
if (query_kind == IAST::QueryKind::Insert)
|
||||
mutable_context->setSetting("enable_blob_storage_log", false);
|
||||
}
|
||||
|
||||
static std::string_view normalizePath(std::string_view path)
|
||||
{
|
||||
if (path.starts_with("./"))
|
||||
path.remove_prefix(2);
|
||||
if (path.ends_with("/"))
|
||||
path.remove_suffix(1);
|
||||
return path;
|
||||
}
|
||||
|
||||
void BlobStorageLog::prepareTable()
|
||||
{
|
||||
SystemLog<BlobStorageLogElement>::prepareTable();
|
||||
if (auto merge_tree_table = std::dynamic_pointer_cast<MergeTreeData>(getStorage()))
|
||||
{
|
||||
std::unique_lock lock{prepare_mutex};
|
||||
const auto & relative_data_path = merge_tree_table->getRelativeDataPath();
|
||||
prefix_to_ignore = normalizePath(relative_data_path);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -1,11 +1,14 @@
|
||||
#pragma once
|
||||
|
||||
#include <Interpreters/SystemLog.h>
|
||||
#include <Core/NamesAndTypes.h>
|
||||
#include <Core/NamesAndAliases.h>
|
||||
#include <Poco/Message.h>
|
||||
#include <Storages/ColumnsDescription.h>
|
||||
#include <chrono>
|
||||
#include <shared_mutex>
|
||||
|
||||
#include <Poco/Message.h>
|
||||
|
||||
#include <Core/NamesAndAliases.h>
|
||||
#include <Core/NamesAndTypes.h>
|
||||
#include <Interpreters/SystemLog.h>
|
||||
#include <Storages/ColumnsDescription.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -51,7 +54,23 @@ struct BlobStorageLogElement
|
||||
|
||||
class BlobStorageLog : public SystemLog<BlobStorageLogElement>
|
||||
{
|
||||
public:
|
||||
using SystemLog<BlobStorageLogElement>::SystemLog;
|
||||
|
||||
/// We should not log events for table itself to avoid infinite recursion
|
||||
bool shouldIgnorePath(const String & path) const
|
||||
{
|
||||
std::shared_lock lock{prepare_mutex};
|
||||
return !prefix_to_ignore.empty() && path.starts_with(prefix_to_ignore);
|
||||
}
|
||||
|
||||
protected:
|
||||
void prepareTable() override;
|
||||
void addSettingsForQuery(ContextMutablePtr & mutable_context, IAST::QueryKind query_kind) const override;
|
||||
|
||||
private:
|
||||
mutable std::shared_mutex prepare_mutex;
|
||||
String prefix_to_ignore;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -4103,6 +4103,13 @@ std::shared_ptr<BackupLog> Context::getBackupLog() const
|
||||
|
||||
std::shared_ptr<BlobStorageLog> Context::getBlobStorageLog() const
|
||||
{
|
||||
bool enable_blob_storage_log = settings.enable_blob_storage_log;
|
||||
if (hasQueryContext())
|
||||
enable_blob_storage_log = getQueryContext()->getSettingsRef().enable_blob_storage_log;
|
||||
|
||||
if (!enable_blob_storage_log)
|
||||
return {};
|
||||
|
||||
SharedLockGuard lock(shared->mutex);
|
||||
|
||||
if (!shared->system_logs)
|
||||
|
@ -240,4 +240,34 @@ bool SplitTokenExtractor::nextInStringLike(const char * data, size_t length, siz
|
||||
return !bad_token && !token.empty();
|
||||
}
|
||||
|
||||
void SplitTokenExtractor::substringToBloomFilter(const char * data, size_t length, BloomFilter & bloom_filter, bool is_prefix, bool is_suffix) const
|
||||
{
|
||||
size_t cur = 0;
|
||||
size_t token_start = 0;
|
||||
size_t token_len = 0;
|
||||
|
||||
while (cur < length && nextInString(data, length, &cur, &token_start, &token_len))
|
||||
// In order to avoid filter updates with incomplete tokens,
|
||||
// first token is ignored, unless substring is prefix and
|
||||
// last token is ignored, unless substring is suffix
|
||||
if ((token_start > 0 || is_prefix) && (token_start + token_len < length || is_suffix))
|
||||
bloom_filter.add(data + token_start, token_len);
|
||||
}
|
||||
|
||||
void SplitTokenExtractor::substringToGinFilter(const char * data, size_t length, GinFilter & gin_filter, bool is_prefix, bool is_suffix) const
|
||||
{
|
||||
gin_filter.setQueryString(data, length);
|
||||
|
||||
size_t cur = 0;
|
||||
size_t token_start = 0;
|
||||
size_t token_len = 0;
|
||||
|
||||
while (cur < length && nextInString(data, length, &cur, &token_start, &token_len))
|
||||
// In order to avoid filter updates with incomplete tokens,
|
||||
// first token is ignored, unless substring is prefix and
|
||||
// last token is ignored, unless substring is suffix
|
||||
if ((token_start > 0 || is_prefix) && (token_start + token_len < length || is_suffix))
|
||||
gin_filter.addTerm(data + token_start, token_len);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -28,8 +28,22 @@ struct ITokenExtractor
|
||||
/// It skips unescaped `%` and `_` and supports escaping symbols, but it is less lightweight.
|
||||
virtual bool nextInStringLike(const char * data, size_t length, size_t * pos, String & out) const = 0;
|
||||
|
||||
/// Updates Bloom filter from exact-match string filter value
|
||||
virtual void stringToBloomFilter(const char * data, size_t length, BloomFilter & bloom_filter) const = 0;
|
||||
|
||||
/// Updates Bloom filter from substring-match string filter value.
|
||||
/// An `ITokenExtractor` implementation may decide to skip certain
|
||||
/// tokens depending on whether the substring is a prefix or a suffix.
|
||||
virtual void substringToBloomFilter(
|
||||
const char * data,
|
||||
size_t length,
|
||||
BloomFilter & bloom_filter,
|
||||
bool is_prefix [[maybe_unused]],
|
||||
bool is_suffix [[maybe_unused]]) const
|
||||
{
|
||||
stringToBloomFilter(data, length, bloom_filter);
|
||||
}
|
||||
|
||||
virtual void stringPaddedToBloomFilter(const char * data, size_t length, BloomFilter & bloom_filter) const
|
||||
{
|
||||
stringToBloomFilter(data, length, bloom_filter);
|
||||
@ -37,8 +51,22 @@ struct ITokenExtractor
|
||||
|
||||
virtual void stringLikeToBloomFilter(const char * data, size_t length, BloomFilter & bloom_filter) const = 0;
|
||||
|
||||
/// Updates GIN filter from exact-match string filter value
|
||||
virtual void stringToGinFilter(const char * data, size_t length, GinFilter & gin_filter) const = 0;
|
||||
|
||||
/// Updates GIN filter from substring-match string filter value.
|
||||
/// An `ITokenExtractor` implementation may decide to skip certain
|
||||
/// tokens depending on whether the substring is a prefix or a suffix.
|
||||
virtual void substringToGinFilter(
|
||||
const char * data,
|
||||
size_t length,
|
||||
GinFilter & gin_filter,
|
||||
bool is_prefix [[maybe_unused]],
|
||||
bool is_suffix [[maybe_unused]]) const
|
||||
{
|
||||
stringToGinFilter(data, length, gin_filter);
|
||||
}
|
||||
|
||||
virtual void stringPaddedToGinFilter(const char * data, size_t length, GinFilter & gin_filter) const
|
||||
{
|
||||
stringToGinFilter(data, length, gin_filter);
|
||||
@ -148,6 +176,11 @@ struct SplitTokenExtractor final : public ITokenExtractorHelper<SplitTokenExtrac
|
||||
|
||||
bool nextInStringLike(const char * data, size_t length, size_t * __restrict pos, String & token) const override;
|
||||
|
||||
void substringToBloomFilter(const char * data, size_t length, BloomFilter & bloom_filter, bool is_prefix, bool is_suffix) const override;
|
||||
|
||||
void substringToGinFilter(const char * data, size_t length, GinFilter & gin_filter, bool is_prefix, bool is_suffix) const override;
|
||||
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -175,11 +175,11 @@ BlockIO InterpreterAlterQuery::executeToTable(const ASTAlterQuery & alter)
|
||||
else
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Wrong parameter type in ALTER query");
|
||||
|
||||
if (!getContext()->getSettings().allow_experimental_statistic && (
|
||||
command_ast->type == ASTAlterCommand::ADD_STATISTIC ||
|
||||
command_ast->type == ASTAlterCommand::DROP_STATISTIC ||
|
||||
command_ast->type == ASTAlterCommand::MATERIALIZE_STATISTIC))
|
||||
throw Exception(ErrorCodes::INCORRECT_QUERY, "Alter table with statistic is now disabled. Turn on allow_experimental_statistic");
|
||||
if (!getContext()->getSettings().allow_experimental_statistics && (
|
||||
command_ast->type == ASTAlterCommand::ADD_STATISTICS ||
|
||||
command_ast->type == ASTAlterCommand::DROP_STATISTICS ||
|
||||
command_ast->type == ASTAlterCommand::MATERIALIZE_STATISTICS))
|
||||
throw Exception(ErrorCodes::INCORRECT_QUERY, "Alter table with statistics is now disabled. Turn on allow_experimental_statistics");
|
||||
}
|
||||
|
||||
if (typeid_cast<DatabaseReplicated *>(database.get()))
|
||||
@ -343,19 +343,24 @@ AccessRightsElements InterpreterAlterQuery::getRequiredAccessForCommand(const AS
|
||||
required_access.emplace_back(AccessType::ALTER_SAMPLE_BY, database, table);
|
||||
break;
|
||||
}
|
||||
case ASTAlterCommand::ADD_STATISTIC:
|
||||
case ASTAlterCommand::ADD_STATISTICS:
|
||||
{
|
||||
required_access.emplace_back(AccessType::ALTER_ADD_STATISTIC, database, table);
|
||||
required_access.emplace_back(AccessType::ALTER_ADD_STATISTICS, database, table);
|
||||
break;
|
||||
}
|
||||
case ASTAlterCommand::DROP_STATISTIC:
|
||||
case ASTAlterCommand::MODIFY_STATISTICS:
|
||||
{
|
||||
required_access.emplace_back(AccessType::ALTER_DROP_STATISTIC, database, table);
|
||||
required_access.emplace_back(AccessType::ALTER_MODIFY_STATISTICS, database, table);
|
||||
break;
|
||||
}
|
||||
case ASTAlterCommand::MATERIALIZE_STATISTIC:
|
||||
case ASTAlterCommand::DROP_STATISTICS:
|
||||
{
|
||||
required_access.emplace_back(AccessType::ALTER_MATERIALIZE_STATISTIC, database, table);
|
||||
required_access.emplace_back(AccessType::ALTER_DROP_STATISTICS, database, table);
|
||||
break;
|
||||
}
|
||||
case ASTAlterCommand::MATERIALIZE_STATISTICS:
|
||||
{
|
||||
required_access.emplace_back(AccessType::ALTER_MATERIALIZE_STATISTICS, database, table);
|
||||
break;
|
||||
}
|
||||
case ASTAlterCommand::ADD_INDEX:
|
||||
|
@ -448,9 +448,9 @@ ASTPtr InterpreterCreateQuery::formatColumns(const ColumnsDescription & columns)
|
||||
column_declaration->children.push_back(column_declaration->codec);
|
||||
}
|
||||
|
||||
if (column.stat)
|
||||
if (!column.statistics.empty())
|
||||
{
|
||||
column_declaration->stat_type = column.stat->ast;
|
||||
column_declaration->stat_type = column.statistics.getAST();
|
||||
column_declaration->children.push_back(column_declaration->stat_type);
|
||||
}
|
||||
|
||||
@ -675,11 +675,13 @@ ColumnsDescription InterpreterCreateQuery::getColumnsDescription(
|
||||
col_decl.codec, column.type, sanity_check_compression_codecs, allow_experimental_codecs, enable_deflate_qpl_codec, enable_zstd_qat_codec);
|
||||
}
|
||||
|
||||
column.statistics.column_name = column.name; /// We assign column name here for better exception error message.
|
||||
if (col_decl.stat_type)
|
||||
{
|
||||
if (!skip_checks && !context_->getSettingsRef().allow_experimental_statistic)
|
||||
throw Exception(ErrorCodes::INCORRECT_QUERY, "Create table with statistic is now disabled. Turn on allow_experimental_statistic");
|
||||
column.stat = StatisticDescription::getStatisticFromColumnDeclaration(col_decl);
|
||||
if (!skip_checks && !context_->getSettingsRef().allow_experimental_statistics)
|
||||
throw Exception(ErrorCodes::INCORRECT_QUERY, "Create table with statistics is now disabled. Turn on allow_experimental_statistics");
|
||||
column.statistics = ColumnStatisticsDescription::fromColumnDeclaration(col_decl);
|
||||
column.statistics.data_type = column.type;
|
||||
}
|
||||
|
||||
if (col_decl.ttl)
|
||||
@ -754,7 +756,7 @@ InterpreterCreateQuery::TableProperties InterpreterCreateQuery::getTableProperti
|
||||
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Experimental full-text index feature is not enabled (the setting 'allow_experimental_full_text_index')");
|
||||
/// ----
|
||||
/// Temporary check during a transition period. Please remove at the end of 2024.
|
||||
if (index_desc.type == INVERTED_INDEX_NAME && settings.allow_experimental_inverted_index) /// The funny condition is not a mistake, see 02346_fulltext_index_old_name.sql
|
||||
if (index_desc.type == INVERTED_INDEX_NAME && !settings.allow_experimental_inverted_index)
|
||||
throw Exception(ErrorCodes::ILLEGAL_INDEX, "Please use index type 'full_text' instead of 'inverted'");
|
||||
/// ----
|
||||
if (index_desc.type == "annoy" && !settings.allow_experimental_annoy_index)
|
||||
|
@ -67,8 +67,8 @@ namespace
|
||||
|
||||
static void visit(ASTSelectQuery & select, ASTPtr & node, Data & data)
|
||||
{
|
||||
/// we need to read statistic when `allow_statistic_optimize` is enabled.
|
||||
bool only_analyze = !data.getContext()->getSettings().allow_statistic_optimize;
|
||||
/// we need to read statistic when `allow_statistics_optimize` is enabled.
|
||||
bool only_analyze = !data.getContext()->getSettings().allow_statistics_optimize;
|
||||
InterpreterSelectQuery interpreter(
|
||||
node, data.getContext(), SelectQueryOptions(QueryProcessingStage::FetchColumns).analyze(only_analyze).modify());
|
||||
|
||||
|
@ -657,7 +657,7 @@ InterpreterSelectQuery::InterpreterSelectQuery(
|
||||
MergeTreeWhereOptimizer where_optimizer{
|
||||
std::move(column_compressed_sizes),
|
||||
metadata_snapshot,
|
||||
storage->getConditionEstimatorByPredicate(storage_snapshot, nullptr, context),
|
||||
storage->getConditionSelectivityEstimatorByPredicate(storage_snapshot, nullptr, context),
|
||||
queried_columns,
|
||||
supported_prewhere_columns,
|
||||
log};
|
||||
|
@ -55,7 +55,7 @@ namespace ErrorCodes
|
||||
extern const int CANNOT_UPDATE_COLUMN;
|
||||
extern const int UNEXPECTED_EXPRESSION;
|
||||
extern const int THERE_IS_NO_COLUMN;
|
||||
extern const int ILLEGAL_STATISTIC;
|
||||
extern const int ILLEGAL_STATISTICS;
|
||||
}
|
||||
|
||||
|
||||
@ -781,7 +781,7 @@ void MutationsInterpreter::prepare(bool dry_run)
|
||||
}
|
||||
else if (command.type == MutationCommand::MATERIALIZE_INDEX)
|
||||
{
|
||||
mutation_kind.set(MutationKind::MUTATE_INDEX_STATISTIC_PROJECTION);
|
||||
mutation_kind.set(MutationKind::MUTATE_INDEX_STATISTICS_PROJECTION);
|
||||
auto it = std::find_if(
|
||||
std::cbegin(indices_desc), std::end(indices_desc),
|
||||
[&](const IndexDescription & index)
|
||||
@ -801,20 +801,20 @@ void MutationsInterpreter::prepare(bool dry_run)
|
||||
materialized_indices.emplace(command.index_name);
|
||||
}
|
||||
}
|
||||
else if (command.type == MutationCommand::MATERIALIZE_STATISTIC)
|
||||
else if (command.type == MutationCommand::MATERIALIZE_STATISTICS)
|
||||
{
|
||||
mutation_kind.set(MutationKind::MUTATE_INDEX_STATISTIC_PROJECTION);
|
||||
for (const auto & stat_column_name: command.statistic_columns)
|
||||
mutation_kind.set(MutationKind::MUTATE_INDEX_STATISTICS_PROJECTION);
|
||||
for (const auto & stat_column_name: command.statistics_columns)
|
||||
{
|
||||
if (!columns_desc.has(stat_column_name) || !columns_desc.get(stat_column_name).stat)
|
||||
throw Exception(ErrorCodes::ILLEGAL_STATISTIC, "Unknown statistic column: {}", stat_column_name);
|
||||
dependencies.emplace(stat_column_name, ColumnDependency::STATISTIC);
|
||||
if (!columns_desc.has(stat_column_name) || columns_desc.get(stat_column_name).statistics.empty())
|
||||
throw Exception(ErrorCodes::ILLEGAL_STATISTICS, "Unknown statistics column: {}", stat_column_name);
|
||||
dependencies.emplace(stat_column_name, ColumnDependency::STATISTICS);
|
||||
materialized_statistics.emplace(stat_column_name);
|
||||
}
|
||||
}
|
||||
else if (command.type == MutationCommand::MATERIALIZE_PROJECTION)
|
||||
{
|
||||
mutation_kind.set(MutationKind::MUTATE_INDEX_STATISTIC_PROJECTION);
|
||||
mutation_kind.set(MutationKind::MUTATE_INDEX_STATISTICS_PROJECTION);
|
||||
const auto & projection = projections_desc.get(command.projection_name);
|
||||
if (!source.hasProjection(projection.name) || source.hasBrokenProjection(projection.name))
|
||||
{
|
||||
@ -825,18 +825,18 @@ void MutationsInterpreter::prepare(bool dry_run)
|
||||
}
|
||||
else if (command.type == MutationCommand::DROP_INDEX)
|
||||
{
|
||||
mutation_kind.set(MutationKind::MUTATE_INDEX_STATISTIC_PROJECTION);
|
||||
mutation_kind.set(MutationKind::MUTATE_INDEX_STATISTICS_PROJECTION);
|
||||
materialized_indices.erase(command.index_name);
|
||||
}
|
||||
else if (command.type == MutationCommand::DROP_STATISTIC)
|
||||
else if (command.type == MutationCommand::DROP_STATISTICS)
|
||||
{
|
||||
mutation_kind.set(MutationKind::MUTATE_INDEX_STATISTIC_PROJECTION);
|
||||
for (const auto & stat_column_name: command.statistic_columns)
|
||||
mutation_kind.set(MutationKind::MUTATE_INDEX_STATISTICS_PROJECTION);
|
||||
for (const auto & stat_column_name: command.statistics_columns)
|
||||
materialized_statistics.erase(stat_column_name);
|
||||
}
|
||||
else if (command.type == MutationCommand::DROP_PROJECTION)
|
||||
{
|
||||
mutation_kind.set(MutationKind::MUTATE_INDEX_STATISTIC_PROJECTION);
|
||||
mutation_kind.set(MutationKind::MUTATE_INDEX_STATISTICS_PROJECTION);
|
||||
materialized_projections.erase(command.projection_name);
|
||||
}
|
||||
else if (command.type == MutationCommand::MATERIALIZE_TTL)
|
||||
@ -888,7 +888,7 @@ void MutationsInterpreter::prepare(bool dry_run)
|
||||
{
|
||||
if (dependency.kind == ColumnDependency::SKIP_INDEX
|
||||
|| dependency.kind == ColumnDependency::PROJECTION
|
||||
|| dependency.kind == ColumnDependency::STATISTIC)
|
||||
|| dependency.kind == ColumnDependency::STATISTICS)
|
||||
dependencies.insert(dependency);
|
||||
}
|
||||
}
|
||||
@ -1360,7 +1360,7 @@ QueryPipelineBuilder MutationsInterpreter::execute()
|
||||
Block MutationsInterpreter::getUpdatedHeader() const
|
||||
{
|
||||
// If it's an index/projection materialization, we don't write any data columns, thus empty header is used
|
||||
return mutation_kind.mutation_kind == MutationKind::MUTATE_INDEX_STATISTIC_PROJECTION ? Block{} : *updated_header;
|
||||
return mutation_kind.mutation_kind == MutationKind::MUTATE_INDEX_STATISTICS_PROJECTION ? Block{} : *updated_header;
|
||||
}
|
||||
|
||||
const ColumnDependencies & MutationsInterpreter::getColumnDependencies() const
|
||||
|
@ -102,7 +102,7 @@ public:
|
||||
enum MutationKindEnum
|
||||
{
|
||||
MUTATE_UNKNOWN,
|
||||
MUTATE_INDEX_STATISTIC_PROJECTION,
|
||||
MUTATE_INDEX_STATISTICS_PROJECTION,
|
||||
MUTATE_OTHER,
|
||||
} mutation_kind = MUTATE_UNKNOWN;
|
||||
|
||||
|
@ -519,8 +519,7 @@ void SystemLog<LogElement>::flushImpl(const std::vector<LogElement> & to_flush,
|
||||
// we need query context to do inserts to target table with MV containing subqueries or joins
|
||||
auto insert_context = Context::createCopy(context);
|
||||
insert_context->makeQueryContext();
|
||||
/// We always want to deliver the data to the original table regardless of the MVs
|
||||
insert_context->setSetting("materialized_views_ignore_errors", true);
|
||||
addSettingsForQuery(insert_context, IAST::QueryKind::Insert);
|
||||
|
||||
InterpreterInsertQuery interpreter(query_ptr, insert_context);
|
||||
BlockIO io = interpreter.execute();
|
||||
@ -541,13 +540,18 @@ void SystemLog<LogElement>::flushImpl(const std::vector<LogElement> & to_flush,
|
||||
LOG_TRACE(log, "Flushed system log up to offset {}", to_flush_end);
|
||||
}
|
||||
|
||||
template <typename LogElement>
|
||||
StoragePtr SystemLog<LogElement>::getStorage() const
|
||||
{
|
||||
return DatabaseCatalog::instance().tryGetTable(table_id, getContext());
|
||||
}
|
||||
|
||||
template <typename LogElement>
|
||||
void SystemLog<LogElement>::prepareTable()
|
||||
{
|
||||
String description = table_id.getNameForLogs();
|
||||
|
||||
auto table = DatabaseCatalog::instance().tryGetTable(table_id, getContext());
|
||||
auto table = getStorage();
|
||||
if (table)
|
||||
{
|
||||
if (old_create_query.empty())
|
||||
@ -596,10 +600,9 @@ void SystemLog<LogElement>::prepareTable()
|
||||
merges_lock = table->getActionLock(ActionLocks::PartsMerge);
|
||||
|
||||
auto query_context = Context::createCopy(context);
|
||||
/// As this operation is performed automatically we don't want it to fail because of user dependencies on log tables
|
||||
query_context->setSetting("check_table_dependencies", Field{false});
|
||||
query_context->setSetting("check_referential_table_dependencies", Field{false});
|
||||
query_context->makeQueryContext();
|
||||
addSettingsForQuery(query_context, IAST::QueryKind::Rename);
|
||||
|
||||
InterpreterRenameQuery(rename, query_context).execute();
|
||||
|
||||
/// The required table will be created.
|
||||
@ -616,6 +619,7 @@ void SystemLog<LogElement>::prepareTable()
|
||||
|
||||
auto query_context = Context::createCopy(context);
|
||||
query_context->makeQueryContext();
|
||||
addSettingsForQuery(query_context, IAST::QueryKind::Create);
|
||||
|
||||
auto create_query_ast = getCreateTableQuery();
|
||||
InterpreterCreateQuery interpreter(create_query_ast, query_context);
|
||||
@ -630,6 +634,22 @@ void SystemLog<LogElement>::prepareTable()
|
||||
is_prepared = true;
|
||||
}
|
||||
|
||||
template <typename LogElement>
|
||||
void SystemLog<LogElement>::addSettingsForQuery(ContextMutablePtr & mutable_context, IAST::QueryKind query_kind) const
|
||||
{
|
||||
if (query_kind == IAST::QueryKind::Insert)
|
||||
{
|
||||
/// We always want to deliver the data to the original table regardless of the MVs
|
||||
mutable_context->setSetting("materialized_views_ignore_errors", true);
|
||||
}
|
||||
else if (query_kind == IAST::QueryKind::Rename)
|
||||
{
|
||||
/// As this operation is performed automatically we don't want it to fail because of user dependencies on log tables
|
||||
mutable_context->setSetting("check_table_dependencies", Field{false});
|
||||
mutable_context->setSetting("check_referential_table_dependencies", Field{false});
|
||||
}
|
||||
}
|
||||
|
||||
template <typename LogElement>
|
||||
ASTPtr SystemLog<LogElement>::getCreateTableQuery()
|
||||
{
|
||||
|
@ -2,6 +2,7 @@
|
||||
|
||||
#include <Interpreters/StorageID.h>
|
||||
#include <Common/SystemLogBase.h>
|
||||
#include <Parsers/IAST.h>
|
||||
|
||||
#include <boost/noncopyable.hpp>
|
||||
|
||||
@ -139,6 +140,17 @@ protected:
|
||||
using ISystemLog::thread_mutex;
|
||||
using Base::queue;
|
||||
|
||||
StoragePtr getStorage() const;
|
||||
|
||||
/** Creates new table if it does not exist.
|
||||
* Renames old table if its structure is not suitable.
|
||||
* This cannot be done in constructor to avoid deadlock while renaming a table under locked Context when SystemLog object is created.
|
||||
*/
|
||||
void prepareTable() override;
|
||||
|
||||
/// Some tables can override settings for internal queries
|
||||
virtual void addSettingsForQuery(ContextMutablePtr & mutable_context, IAST::QueryKind query_kind) const;
|
||||
|
||||
private:
|
||||
/* Saving thread data */
|
||||
const StorageID table_id;
|
||||
@ -147,12 +159,6 @@ private:
|
||||
String old_create_query;
|
||||
bool is_prepared = false;
|
||||
|
||||
/** Creates new table if it does not exist.
|
||||
* Renames old table if its structure is not suitable.
|
||||
* This cannot be done in constructor to avoid deadlock while renaming a table under locked Context when SystemLog object is created.
|
||||
*/
|
||||
void prepareTable() override;
|
||||
|
||||
void savingThreadFunction() override;
|
||||
|
||||
/// flushImpl can be executed only in saving_thread.
|
||||
|
@ -11,6 +11,7 @@
|
||||
#include <Parsers/formatAST.h>
|
||||
#include <Parsers/queryNormalization.h>
|
||||
#include <Common/CurrentThread.h>
|
||||
#include <Common/CurrentMemoryTracker.h>
|
||||
#include <Common/Exception.h>
|
||||
#include <Common/ProfileEvents.h>
|
||||
#include <Common/QueryProfiler.h>
|
||||
@ -210,9 +211,12 @@ void ThreadStatus::applyQuerySettings()
|
||||
query_id_from_query_context = query_context_ptr->getCurrentQueryId();
|
||||
initQueryProfiler();
|
||||
|
||||
untracked_memory_limit = settings.max_untracked_memory;
|
||||
if (settings.memory_profiler_step && settings.memory_profiler_step < static_cast<UInt64>(untracked_memory_limit))
|
||||
untracked_memory_limit = settings.memory_profiler_step;
|
||||
max_untracked_memory = settings.max_untracked_memory;
|
||||
if (settings.memory_profiler_step && settings.memory_profiler_step < static_cast<UInt64>(max_untracked_memory))
|
||||
max_untracked_memory = settings.memory_profiler_step;
|
||||
min_untracked_memory = std::min<Int64>(settings.min_untracked_memory, max_untracked_memory);
|
||||
|
||||
updateUntrackedMemoryLimit(CurrentMemoryTracker::get());
|
||||
|
||||
#if defined(OS_LINUX)
|
||||
/// Set "nice" value if required.
|
||||
|
@ -42,8 +42,8 @@ ASTPtr ASTAlterCommand::clone() const
|
||||
res->projection_decl = res->children.emplace_back(projection_decl->clone()).get();
|
||||
if (projection)
|
||||
res->projection = res->children.emplace_back(projection->clone()).get();
|
||||
if (statistic_decl)
|
||||
res->statistic_decl = res->children.emplace_back(statistic_decl->clone()).get();
|
||||
if (statistics_decl)
|
||||
res->statistics_decl = res->children.emplace_back(statistics_decl->clone()).get();
|
||||
if (partition)
|
||||
res->partition = res->children.emplace_back(partition->clone()).get();
|
||||
if (predicate)
|
||||
@ -200,27 +200,33 @@ void ASTAlterCommand::formatImpl(const FormatSettings & settings, FormatState &
|
||||
partition->formatImpl(settings, state, frame);
|
||||
}
|
||||
}
|
||||
else if (type == ASTAlterCommand::ADD_STATISTIC)
|
||||
else if (type == ASTAlterCommand::ADD_STATISTICS)
|
||||
{
|
||||
settings.ostr << (settings.hilite ? hilite_keyword : "") << "ADD STATISTIC " << (if_not_exists ? "IF NOT EXISTS " : "")
|
||||
settings.ostr << (settings.hilite ? hilite_keyword : "") << "ADD STATISTICS " << (if_not_exists ? "IF NOT EXISTS " : "")
|
||||
<< (settings.hilite ? hilite_none : "");
|
||||
statistic_decl->formatImpl(settings, state, frame);
|
||||
statistics_decl->formatImpl(settings, state, frame);
|
||||
}
|
||||
else if (type == ASTAlterCommand::DROP_STATISTIC)
|
||||
else if (type == ASTAlterCommand::MODIFY_STATISTICS)
|
||||
{
|
||||
settings.ostr << (settings.hilite ? hilite_keyword : "") << (clear_statistic ? "CLEAR " : "DROP ") << "STATISTIC "
|
||||
settings.ostr << (settings.hilite ? hilite_keyword : "") << "MODIFY STATISTICS "
|
||||
<< (settings.hilite ? hilite_none : "");
|
||||
statistics_decl->formatImpl(settings, state, frame);
|
||||
}
|
||||
else if (type == ASTAlterCommand::DROP_STATISTICS)
|
||||
{
|
||||
settings.ostr << (settings.hilite ? hilite_keyword : "") << (clear_statistics ? "CLEAR " : "DROP ") << "STATISTICS "
|
||||
<< (if_exists ? "IF EXISTS " : "") << (settings.hilite ? hilite_none : "");
|
||||
statistic_decl->formatImpl(settings, state, frame);
|
||||
statistics_decl->formatImpl(settings, state, frame);
|
||||
if (partition)
|
||||
{
|
||||
settings.ostr << (settings.hilite ? hilite_keyword : "") << " IN PARTITION " << (settings.hilite ? hilite_none : "");
|
||||
partition->formatImpl(settings, state, frame);
|
||||
}
|
||||
}
|
||||
else if (type == ASTAlterCommand::MATERIALIZE_STATISTIC)
|
||||
else if (type == ASTAlterCommand::MATERIALIZE_STATISTICS)
|
||||
{
|
||||
settings.ostr << (settings.hilite ? hilite_keyword : "") << "MATERIALIZE STATISTIC " << (settings.hilite ? hilite_none : "");
|
||||
statistic_decl->formatImpl(settings, state, frame);
|
||||
settings.ostr << (settings.hilite ? hilite_keyword : "") << "MATERIALIZE STATISTICS " << (settings.hilite ? hilite_none : "");
|
||||
statistics_decl->formatImpl(settings, state, frame);
|
||||
if (partition)
|
||||
{
|
||||
settings.ostr << (settings.hilite ? hilite_keyword : "") << " IN PARTITION " << (settings.hilite ? hilite_none : "");
|
||||
@ -507,7 +513,7 @@ void ASTAlterCommand::forEachPointerToChild(std::function<void(void**)> f)
|
||||
f(reinterpret_cast<void **>(&constraint));
|
||||
f(reinterpret_cast<void **>(&projection_decl));
|
||||
f(reinterpret_cast<void **>(&projection));
|
||||
f(reinterpret_cast<void **>(&statistic_decl));
|
||||
f(reinterpret_cast<void **>(&statistics_decl));
|
||||
f(reinterpret_cast<void **>(&partition));
|
||||
f(reinterpret_cast<void **>(&predicate));
|
||||
f(reinterpret_cast<void **>(&update_assignments));
|
||||
|
@ -55,9 +55,10 @@ public:
|
||||
DROP_PROJECTION,
|
||||
MATERIALIZE_PROJECTION,
|
||||
|
||||
ADD_STATISTIC,
|
||||
DROP_STATISTIC,
|
||||
MATERIALIZE_STATISTIC,
|
||||
ADD_STATISTICS,
|
||||
DROP_STATISTICS,
|
||||
MODIFY_STATISTICS,
|
||||
MATERIALIZE_STATISTICS,
|
||||
|
||||
DROP_PARTITION,
|
||||
DROP_DETACHED_PARTITION,
|
||||
@ -135,7 +136,7 @@ public:
|
||||
*/
|
||||
IAST * projection = nullptr;
|
||||
|
||||
IAST * statistic_decl = nullptr;
|
||||
IAST * statistics_decl = nullptr;
|
||||
|
||||
/** Used in DROP PARTITION, ATTACH PARTITION FROM, FORGET PARTITION, UPDATE, DELETE queries.
|
||||
* The value or ID of the partition is stored here.
|
||||
@ -180,7 +181,7 @@ public:
|
||||
|
||||
bool clear_index = false; /// for CLEAR INDEX (do not drop index from metadata)
|
||||
|
||||
bool clear_statistic = false; /// for CLEAR STATISTIC (do not drop statistic from metadata)
|
||||
bool clear_statistics = false; /// for CLEAR STATISTICS (do not drop statistics from metadata)
|
||||
|
||||
bool clear_projection = false; /// for CLEAR PROJECTION (do not drop projection from metadata)
|
||||
|
||||
|
@ -1,42 +0,0 @@
|
||||
#include <Parsers/ASTStatisticDeclaration.h>
|
||||
#include <Parsers/ASTIdentifier.h>
|
||||
|
||||
#include <Common/quoteString.h>
|
||||
#include <IO/Operators.h>
|
||||
#include <Parsers/ASTFunction.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
ASTPtr ASTStatisticDeclaration::clone() const
|
||||
{
|
||||
auto res = std::make_shared<ASTStatisticDeclaration>();
|
||||
|
||||
res->set(res->columns, columns->clone());
|
||||
res->type = type;
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
std::vector<String> ASTStatisticDeclaration::getColumnNames() const
|
||||
{
|
||||
std::vector<String> result;
|
||||
result.reserve(columns->children.size());
|
||||
for (const ASTPtr & column_ast : columns->children)
|
||||
{
|
||||
result.push_back(column_ast->as<ASTIdentifier &>().name());
|
||||
}
|
||||
return result;
|
||||
|
||||
}
|
||||
|
||||
void ASTStatisticDeclaration::formatImpl(const FormatSettings & s, FormatState & state, FormatStateStacked frame) const
|
||||
{
|
||||
columns->formatImpl(s, state, frame);
|
||||
s.ostr << (s.hilite ? hilite_keyword : "") << " TYPE " << (s.hilite ? hilite_none : "");
|
||||
s.ostr << backQuoteIfNeed(type);
|
||||
}
|
||||
|
||||
}
|
||||
|
60
src/Parsers/ASTStatisticsDeclaration.cpp
Normal file
60
src/Parsers/ASTStatisticsDeclaration.cpp
Normal file
@ -0,0 +1,60 @@
|
||||
#include <Parsers/ASTStatisticsDeclaration.h>
|
||||
#include <Parsers/ASTIdentifier.h>
|
||||
|
||||
#include <Common/quoteString.h>
|
||||
#include <IO/Operators.h>
|
||||
#include <Parsers/ASTFunction.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
ASTPtr ASTStatisticsDeclaration::clone() const
|
||||
{
|
||||
auto res = std::make_shared<ASTStatisticsDeclaration>();
|
||||
|
||||
res->set(res->columns, columns->clone());
|
||||
if (types)
|
||||
res->set(res->types, types->clone());
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
std::vector<String> ASTStatisticsDeclaration::getColumnNames() const
|
||||
{
|
||||
std::vector<String> result;
|
||||
result.reserve(columns->children.size());
|
||||
for (const ASTPtr & column_ast : columns->children)
|
||||
{
|
||||
result.push_back(column_ast->as<ASTIdentifier &>().name());
|
||||
}
|
||||
return result;
|
||||
|
||||
}
|
||||
|
||||
std::vector<String> ASTStatisticsDeclaration::getTypeNames() const
|
||||
{
|
||||
chassert(types != nullptr);
|
||||
std::vector<String> result;
|
||||
result.reserve(types->children.size());
|
||||
for (const ASTPtr & column_ast : types->children)
|
||||
{
|
||||
result.push_back(column_ast->as<ASTFunction &>().name);
|
||||
}
|
||||
return result;
|
||||
|
||||
}
|
||||
|
||||
void ASTStatisticsDeclaration::formatImpl(const FormatSettings & s, FormatState & state, FormatStateStacked frame) const
|
||||
{
|
||||
columns->formatImpl(s, state, frame);
|
||||
s.ostr << (s.hilite ? hilite_keyword : "");
|
||||
if (types)
|
||||
{
|
||||
s.ostr << " TYPE " << (s.hilite ? hilite_none : "");
|
||||
types->formatImpl(s, state, frame);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -9,17 +9,17 @@ class ASTFunction;
|
||||
|
||||
/** name BY columns TYPE typename(args) in create query
|
||||
*/
|
||||
class ASTStatisticDeclaration : public IAST
|
||||
class ASTStatisticsDeclaration : public IAST
|
||||
{
|
||||
public:
|
||||
IAST * columns;
|
||||
/// TODO type should be a list of ASTFunction, for example, 'tdigest(256), hyperloglog(128)', etc.
|
||||
String type;
|
||||
IAST * types;
|
||||
|
||||
/** Get the text that identifies this element. */
|
||||
String getID(char) const override { return "Stat"; }
|
||||
|
||||
std::vector<String> getColumnNames() const;
|
||||
std::vector<String> getTypeNames() const;
|
||||
|
||||
ASTPtr clone() const override;
|
||||
void formatImpl(const FormatSettings & s, FormatState & state, FormatStateStacked frame) const override;
|
@ -13,7 +13,7 @@ namespace DB
|
||||
MR_MACROS(ADD_CONSTRAINT, "ADD CONSTRAINT") \
|
||||
MR_MACROS(ADD_INDEX, "ADD INDEX") \
|
||||
MR_MACROS(ADD_PROJECTION, "ADD PROJECTION") \
|
||||
MR_MACROS(ADD_STATISTIC, "ADD STATISTIC") \
|
||||
MR_MACROS(ADD_STATISTICS, "ADD STATISTICS") \
|
||||
MR_MACROS(ADD, "ADD") \
|
||||
MR_MACROS(ADMIN_OPTION_FOR, "ADMIN OPTION FOR") \
|
||||
MR_MACROS(AFTER, "AFTER") \
|
||||
@ -83,7 +83,7 @@ namespace DB
|
||||
MR_MACROS(CLEAR_COLUMN, "CLEAR COLUMN") \
|
||||
MR_MACROS(CLEAR_INDEX, "CLEAR INDEX") \
|
||||
MR_MACROS(CLEAR_PROJECTION, "CLEAR PROJECTION") \
|
||||
MR_MACROS(CLEAR_STATISTIC, "CLEAR STATISTIC") \
|
||||
MR_MACROS(CLEAR_STATISTICS, "CLEAR STATISTICS") \
|
||||
MR_MACROS(CLUSTER, "CLUSTER") \
|
||||
MR_MACROS(CLUSTERS, "CLUSTERS") \
|
||||
MR_MACROS(CN, "CN") \
|
||||
@ -150,7 +150,7 @@ namespace DB
|
||||
MR_MACROS(DROP_PART, "DROP PART") \
|
||||
MR_MACROS(DROP_PARTITION, "DROP PARTITION") \
|
||||
MR_MACROS(DROP_PROJECTION, "DROP PROJECTION") \
|
||||
MR_MACROS(DROP_STATISTIC, "DROP STATISTIC") \
|
||||
MR_MACROS(DROP_STATISTICS, "DROP STATISTICS") \
|
||||
MR_MACROS(DROP_TABLE, "DROP TABLE") \
|
||||
MR_MACROS(DROP_TEMPORARY_TABLE, "DROP TEMPORARY TABLE") \
|
||||
MR_MACROS(DROP, "DROP") \
|
||||
@ -279,7 +279,7 @@ namespace DB
|
||||
MR_MACROS(MATERIALIZE_COLUMN, "MATERIALIZE COLUMN") \
|
||||
MR_MACROS(MATERIALIZE_INDEX, "MATERIALIZE INDEX") \
|
||||
MR_MACROS(MATERIALIZE_PROJECTION, "MATERIALIZE PROJECTION") \
|
||||
MR_MACROS(MATERIALIZE_STATISTIC, "MATERIALIZE STATISTIC") \
|
||||
MR_MACROS(MATERIALIZE_STATISTICS, "MATERIALIZE STATISTICS") \
|
||||
MR_MACROS(MATERIALIZE_TTL, "MATERIALIZE TTL") \
|
||||
MR_MACROS(MATERIALIZE, "MATERIALIZE") \
|
||||
MR_MACROS(MATERIALIZED, "MATERIALIZED") \
|
||||
@ -304,6 +304,7 @@ namespace DB
|
||||
MR_MACROS(MODIFY_QUERY, "MODIFY QUERY") \
|
||||
MR_MACROS(MODIFY_REFRESH, "MODIFY REFRESH") \
|
||||
MR_MACROS(MODIFY_SAMPLE_BY, "MODIFY SAMPLE BY") \
|
||||
MR_MACROS(MODIFY_STATISTICS, "MODIFY STATISTICS") \
|
||||
MR_MACROS(MODIFY_SETTING, "MODIFY SETTING") \
|
||||
MR_MACROS(MODIFY_SQL_SECURITY, "MODIFY SQL SECURITY") \
|
||||
MR_MACROS(MODIFY_TTL, "MODIFY TTL") \
|
||||
@ -447,7 +448,7 @@ namespace DB
|
||||
MR_MACROS(SQL_SECURITY, "SQL SECURITY") \
|
||||
MR_MACROS(SS, "SS") \
|
||||
MR_MACROS(START_TRANSACTION, "START TRANSACTION") \
|
||||
MR_MACROS(STATISTIC, "STATISTIC") \
|
||||
MR_MACROS(STATISTICS, "STATISTICS") \
|
||||
MR_MACROS(STEP, "STEP") \
|
||||
MR_MACROS(STORAGE, "STORAGE") \
|
||||
MR_MACROS(STRICT, "STRICT") \
|
||||
|
@ -703,7 +703,7 @@ bool ParserCodec::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
|
||||
return true;
|
||||
}
|
||||
|
||||
bool ParserStatisticType::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
|
||||
bool ParserStatisticsType::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
|
||||
{
|
||||
ParserList stat_type_parser(std::make_unique<ParserIdentifierWithOptionalParameters>(),
|
||||
std::make_unique<ParserToken>(TokenType::Comma), false);
|
||||
@ -722,7 +722,7 @@ bool ParserStatisticType::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte
|
||||
++pos;
|
||||
|
||||
auto function_node = std::make_shared<ASTFunction>();
|
||||
function_node->name = "STATISTIC";
|
||||
function_node->name = "STATISTICS";
|
||||
function_node->arguments = stat_type;
|
||||
function_node->children.push_back(function_node->arguments);
|
||||
|
||||
|
@ -202,11 +202,11 @@ protected:
|
||||
bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
|
||||
};
|
||||
|
||||
/// STATISTIC(tdigest(200))
|
||||
class ParserStatisticType : public IParserBase
|
||||
/// STATISTICS(tdigest(200))
|
||||
class ParserStatisticsType : public IParserBase
|
||||
{
|
||||
protected:
|
||||
const char * getName() const override { return "statistic"; }
|
||||
const char * getName() const override { return "statistics"; }
|
||||
bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
|
||||
};
|
||||
|
||||
|
@ -59,9 +59,6 @@ Token quotedStringWithUnicodeQuotes(const char *& pos, const char * const token_
|
||||
pos = find_first_symbols<'\xE2'>(pos, end);
|
||||
if (pos + 2 >= end)
|
||||
return Token(error_token, token_begin, end);
|
||||
/// Empty identifiers are not allowed, while empty strings are.
|
||||
if (success_token == TokenType::QuotedIdentifier && pos + 3 >= end)
|
||||
return Token(error_token, token_begin, end);
|
||||
|
||||
if (pos[0] == '\xE2' && pos[1] == '\x80' && pos[2] == expected_end_byte)
|
||||
{
|
||||
|
@ -49,10 +49,11 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected
|
||||
ParserKeyword s_clear_index(Keyword::CLEAR_INDEX);
|
||||
ParserKeyword s_materialize_index(Keyword::MATERIALIZE_INDEX);
|
||||
|
||||
ParserKeyword s_add_statistic(Keyword::ADD_STATISTIC);
|
||||
ParserKeyword s_drop_statistic(Keyword::DROP_STATISTIC);
|
||||
ParserKeyword s_clear_statistic(Keyword::CLEAR_STATISTIC);
|
||||
ParserKeyword s_materialize_statistic(Keyword::MATERIALIZE_STATISTIC);
|
||||
ParserKeyword s_add_statistics(Keyword::ADD_STATISTICS);
|
||||
ParserKeyword s_drop_statistics(Keyword::DROP_STATISTICS);
|
||||
ParserKeyword s_modify_statistics(Keyword::MODIFY_STATISTICS);
|
||||
ParserKeyword s_clear_statistics(Keyword::CLEAR_STATISTICS);
|
||||
ParserKeyword s_materialize_statistics(Keyword::MATERIALIZE_STATISTICS);
|
||||
|
||||
ParserKeyword s_add_constraint(Keyword::ADD_CONSTRAINT);
|
||||
ParserKeyword s_drop_constraint(Keyword::DROP_CONSTRAINT);
|
||||
@ -126,7 +127,8 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected
|
||||
ParserIdentifier parser_remove_property;
|
||||
ParserCompoundColumnDeclaration parser_col_decl;
|
||||
ParserIndexDeclaration parser_idx_decl;
|
||||
ParserStatisticDeclaration parser_stat_decl;
|
||||
ParserStatisticsDeclaration parser_stat_decl;
|
||||
ParserStatisticsDeclarationWithoutTypes parser_stat_decl_without_types;
|
||||
ParserConstraintDeclaration parser_constraint_decl;
|
||||
ParserProjectionDeclaration parser_projection_decl;
|
||||
ParserCompoundColumnDeclaration parser_modify_col_decl(false, false, true);
|
||||
@ -154,7 +156,7 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected
|
||||
ASTPtr command_constraint;
|
||||
ASTPtr command_projection_decl;
|
||||
ASTPtr command_projection;
|
||||
ASTPtr command_statistic_decl;
|
||||
ASTPtr command_statistics_decl;
|
||||
ASTPtr command_partition;
|
||||
ASTPtr command_predicate;
|
||||
ASTPtr command_update_assignments;
|
||||
@ -368,36 +370,43 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected
|
||||
return false;
|
||||
}
|
||||
}
|
||||
else if (s_add_statistic.ignore(pos, expected))
|
||||
else if (s_add_statistics.ignore(pos, expected))
|
||||
{
|
||||
if (s_if_not_exists.ignore(pos, expected))
|
||||
command->if_not_exists = true;
|
||||
|
||||
if (!parser_stat_decl.parse(pos, command_statistic_decl, expected))
|
||||
if (!parser_stat_decl.parse(pos, command_statistics_decl, expected))
|
||||
return false;
|
||||
|
||||
command->type = ASTAlterCommand::ADD_STATISTIC;
|
||||
command->type = ASTAlterCommand::ADD_STATISTICS;
|
||||
}
|
||||
else if (s_drop_statistic.ignore(pos, expected))
|
||||
else if (s_modify_statistics.ignore(pos, expected))
|
||||
{
|
||||
if (!parser_stat_decl.parse(pos, command_statistics_decl, expected))
|
||||
return false;
|
||||
|
||||
command->type = ASTAlterCommand::MODIFY_STATISTICS;
|
||||
}
|
||||
else if (s_drop_statistics.ignore(pos, expected))
|
||||
{
|
||||
if (s_if_exists.ignore(pos, expected))
|
||||
command->if_exists = true;
|
||||
|
||||
if (!parser_stat_decl.parse(pos, command_statistic_decl, expected))
|
||||
if (!parser_stat_decl_without_types.parse(pos, command_statistics_decl, expected))
|
||||
return false;
|
||||
|
||||
command->type = ASTAlterCommand::DROP_STATISTIC;
|
||||
command->type = ASTAlterCommand::DROP_STATISTICS;
|
||||
}
|
||||
else if (s_clear_statistic.ignore(pos, expected))
|
||||
else if (s_clear_statistics.ignore(pos, expected))
|
||||
{
|
||||
if (s_if_exists.ignore(pos, expected))
|
||||
command->if_exists = true;
|
||||
|
||||
if (!parser_stat_decl.parse(pos, command_statistic_decl, expected))
|
||||
if (!parser_stat_decl_without_types.parse(pos, command_statistics_decl, expected))
|
||||
return false;
|
||||
|
||||
command->type = ASTAlterCommand::DROP_STATISTIC;
|
||||
command->clear_statistic = true;
|
||||
command->type = ASTAlterCommand::DROP_STATISTICS;
|
||||
command->clear_statistics = true;
|
||||
command->detach = false;
|
||||
|
||||
if (s_in_partition.ignore(pos, expected))
|
||||
@ -406,15 +415,15 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected
|
||||
return false;
|
||||
}
|
||||
}
|
||||
else if (s_materialize_statistic.ignore(pos, expected))
|
||||
else if (s_materialize_statistics.ignore(pos, expected))
|
||||
{
|
||||
if (s_if_exists.ignore(pos, expected))
|
||||
command->if_exists = true;
|
||||
|
||||
if (!parser_stat_decl.parse(pos, command_statistic_decl, expected))
|
||||
if (!parser_stat_decl_without_types.parse(pos, command_statistics_decl, expected))
|
||||
return false;
|
||||
|
||||
command->type = ASTAlterCommand::MATERIALIZE_STATISTIC;
|
||||
command->type = ASTAlterCommand::MATERIALIZE_STATISTICS;
|
||||
command->detach = false;
|
||||
|
||||
if (s_in_partition.ignore(pos, expected))
|
||||
@ -931,8 +940,8 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected
|
||||
command->projection_decl = command->children.emplace_back(std::move(command_projection_decl)).get();
|
||||
if (command_projection)
|
||||
command->projection = command->children.emplace_back(std::move(command_projection)).get();
|
||||
if (command_statistic_decl)
|
||||
command->statistic_decl = command->children.emplace_back(std::move(command_statistic_decl)).get();
|
||||
if (command_statistics_decl)
|
||||
command->statistics_decl = command->children.emplace_back(std::move(command_statistics_decl)).get();
|
||||
if (command_partition)
|
||||
command->partition = command->children.emplace_back(std::move(command_partition)).get();
|
||||
if (command_predicate)
|
||||
|
@ -7,7 +7,7 @@
|
||||
#include <Parsers/ASTFunction.h>
|
||||
#include <Parsers/ASTIdentifier.h>
|
||||
#include <Parsers/ASTIndexDeclaration.h>
|
||||
#include <Parsers/ASTStatisticDeclaration.h>
|
||||
#include <Parsers/ASTStatisticsDeclaration.h>
|
||||
#include <Parsers/ASTLiteral.h>
|
||||
#include <Parsers/ASTProjectionDeclaration.h>
|
||||
#include <Parsers/ASTSelectWithUnionQuery.h>
|
||||
@ -225,15 +225,15 @@ bool ParserIndexDeclaration::parseImpl(Pos & pos, ASTPtr & node, Expected & expe
|
||||
return true;
|
||||
}
|
||||
|
||||
bool ParserStatisticDeclaration::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
|
||||
bool ParserStatisticsDeclaration::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
|
||||
{
|
||||
ParserKeyword s_type(Keyword::TYPE);
|
||||
|
||||
ParserList columns_p(std::make_unique<ParserIdentifier>(), std::make_unique<ParserToken>(TokenType::Comma), false);
|
||||
ParserIdentifier type_p;
|
||||
ParserList types_p(std::make_unique<ParserDataType>(), std::make_unique<ParserToken>(TokenType::Comma), false);
|
||||
|
||||
ASTPtr columns;
|
||||
ASTPtr type;
|
||||
ASTPtr types;
|
||||
|
||||
if (!columns_p.parse(pos, columns, expected))
|
||||
return false;
|
||||
@ -241,12 +241,29 @@ bool ParserStatisticDeclaration::parseImpl(Pos & pos, ASTPtr & node, Expected &
|
||||
if (!s_type.ignore(pos, expected))
|
||||
return false;
|
||||
|
||||
if (!type_p.parse(pos, type, expected))
|
||||
if (!types_p.parse(pos, types, expected))
|
||||
return false;
|
||||
|
||||
auto stat = std::make_shared<ASTStatisticDeclaration>();
|
||||
auto stat = std::make_shared<ASTStatisticsDeclaration>();
|
||||
stat->set(stat->columns, columns);
|
||||
stat->set(stat->types, types);
|
||||
node = stat;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool ParserStatisticsDeclarationWithoutTypes::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
|
||||
{
|
||||
|
||||
ParserList columns_p(std::make_unique<ParserIdentifier>(), std::make_unique<ParserToken>(TokenType::Comma), false);
|
||||
|
||||
ASTPtr columns;
|
||||
|
||||
if (!columns_p.parse(pos, columns, expected))
|
||||
return false;
|
||||
|
||||
auto stat = std::make_shared<ASTStatisticsDeclaration>();
|
||||
stat->set(stat->columns, columns);
|
||||
stat->type = type->as<ASTIdentifier &>().name();
|
||||
node = stat;
|
||||
|
||||
return true;
|
||||
|
@ -138,7 +138,7 @@ bool IParserColumnDeclaration<NameParser>::parseImpl(Pos & pos, ASTPtr & node, E
|
||||
ParserKeyword s_auto_increment{Keyword::AUTO_INCREMENT};
|
||||
ParserKeyword s_comment{Keyword::COMMENT};
|
||||
ParserKeyword s_codec{Keyword::CODEC};
|
||||
ParserKeyword s_stat{Keyword::STATISTIC};
|
||||
ParserKeyword s_stat{Keyword::STATISTICS};
|
||||
ParserKeyword s_ttl{Keyword::TTL};
|
||||
ParserKeyword s_remove{Keyword::REMOVE};
|
||||
ParserKeyword s_modify_setting(Keyword::MODIFY_SETTING);
|
||||
@ -155,7 +155,7 @@ bool IParserColumnDeclaration<NameParser>::parseImpl(Pos & pos, ASTPtr & node, E
|
||||
ParserLiteral literal_parser;
|
||||
ParserCodec codec_parser;
|
||||
ParserCollation collation_parser;
|
||||
ParserStatisticType stat_type_parser;
|
||||
ParserStatisticsType stat_type_parser;
|
||||
ParserExpression expression_parser;
|
||||
ParserSetQuery settings_parser(true);
|
||||
|
||||
@ -452,16 +452,27 @@ protected:
|
||||
bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
|
||||
};
|
||||
|
||||
class ParserStatisticDeclaration : public IParserBase
|
||||
class ParserStatisticsDeclaration : public IParserBase
|
||||
{
|
||||
public:
|
||||
ParserStatisticDeclaration() = default;
|
||||
ParserStatisticsDeclaration() = default;
|
||||
|
||||
protected:
|
||||
const char * getName() const override { return "statistics declaration"; }
|
||||
bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
|
||||
};
|
||||
|
||||
class ParserStatisticsDeclarationWithoutTypes : public IParserBase
|
||||
{
|
||||
public:
|
||||
ParserStatisticsDeclarationWithoutTypes() = default;
|
||||
|
||||
protected:
|
||||
const char * getName() const override { return "statistics declaration"; }
|
||||
bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
|
||||
};
|
||||
|
||||
|
||||
class ParserConstraintDeclaration : public IParserBase
|
||||
{
|
||||
protected:
|
||||
|
@ -1526,6 +1526,8 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_
|
||||
left_join_tree_query_plan.actions_dags.emplace_back(std::move(join_clauses_and_actions.left_join_expressions_actions));
|
||||
if (join_clauses_and_actions.right_join_expressions_actions)
|
||||
left_join_tree_query_plan.actions_dags.emplace_back(std::move(join_clauses_and_actions.right_join_expressions_actions));
|
||||
if (join_clauses_and_actions.mixed_join_expressions_actions)
|
||||
left_join_tree_query_plan.actions_dags.push_back(join_clauses_and_actions.mixed_join_expressions_actions);
|
||||
|
||||
auto mapping = std::move(left_join_tree_query_plan.query_node_to_plan_step_mapping);
|
||||
auto & r_mapping = right_join_tree_query_plan.query_node_to_plan_step_mapping;
|
||||
|
@ -269,7 +269,12 @@ convertFieldToORCLiteral(const orc::Type & orc_type, const Field & field, DataTy
|
||||
case orc::SHORT:
|
||||
case orc::INT:
|
||||
case orc::LONG: {
|
||||
/// May throw exception
|
||||
/// May throw exception.
|
||||
///
|
||||
/// In particular, it'll throw if we request the column as unsigned, like this:
|
||||
/// SELECT * FROM file('t.orc', ORC, 'x UInt8') WHERE x > 10
|
||||
/// We have to reject this, otherwise it would miss values > 127 (because
|
||||
/// they're treated as negative by ORC).
|
||||
auto val = field.get<Int64>();
|
||||
return orc::Literal(val);
|
||||
}
|
||||
|
@ -315,18 +315,20 @@ void ORCBlockOutputFormat::writeColumn(
|
||||
if (null_bytemap)
|
||||
orc_column.hasNulls = true;
|
||||
|
||||
/// ORC doesn't have unsigned types, so cast everything to signed and sign-extend to Int64 to
|
||||
/// make the ORC library calculate min and max correctly.
|
||||
switch (type->getTypeId())
|
||||
{
|
||||
case TypeIndex::Enum8: [[fallthrough]];
|
||||
case TypeIndex::Int8:
|
||||
{
|
||||
/// Note: Explicit cast to avoid clang-tidy error: 'signed char' to 'long' conversion; consider casting to 'unsigned char' first.
|
||||
writeNumbers<Int8, orc::LongVectorBatch>(orc_column, column, null_bytemap, [](const Int8 & value){ return static_cast<int64_t>(value); });
|
||||
writeNumbers<Int8, orc::LongVectorBatch>(orc_column, column, null_bytemap, [](const Int8 & value){ return Int64(Int8(value)); });
|
||||
break;
|
||||
}
|
||||
case TypeIndex::UInt8:
|
||||
{
|
||||
writeNumbers<UInt8, orc::LongVectorBatch>(orc_column, column, null_bytemap, [](const UInt8 & value){ return value; });
|
||||
writeNumbers<UInt8, orc::LongVectorBatch>(orc_column, column, null_bytemap, [](const UInt8 & value){ return Int64(Int8(value)); });
|
||||
break;
|
||||
}
|
||||
case TypeIndex::Enum16: [[fallthrough]];
|
||||
@ -338,7 +340,7 @@ void ORCBlockOutputFormat::writeColumn(
|
||||
case TypeIndex::Date: [[fallthrough]];
|
||||
case TypeIndex::UInt16:
|
||||
{
|
||||
writeNumbers<UInt16, orc::LongVectorBatch>(orc_column, column, null_bytemap, [](const UInt16 & value){ return value; });
|
||||
writeNumbers<UInt16, orc::LongVectorBatch>(orc_column, column, null_bytemap, [](const UInt16 & value){ return Int64(Int16(value)); });
|
||||
break;
|
||||
}
|
||||
case TypeIndex::Date32: [[fallthrough]];
|
||||
@ -349,12 +351,12 @@ void ORCBlockOutputFormat::writeColumn(
|
||||
}
|
||||
case TypeIndex::UInt32:
|
||||
{
|
||||
writeNumbers<UInt32, orc::LongVectorBatch>(orc_column, column, null_bytemap, [](const UInt32 & value){ return value; });
|
||||
writeNumbers<UInt32, orc::LongVectorBatch>(orc_column, column, null_bytemap, [](const UInt32 & value){ return Int64(Int32(value)); });
|
||||
break;
|
||||
}
|
||||
case TypeIndex::IPv4:
|
||||
{
|
||||
writeNumbers<IPv4, orc::LongVectorBatch>(orc_column, column, null_bytemap, [](const IPv4 & value){ return value.toUnderType(); });
|
||||
writeNumbers<IPv4, orc::LongVectorBatch>(orc_column, column, null_bytemap, [](const IPv4 & value){ return Int64(Int32(value.toUnderType())); });
|
||||
break;
|
||||
}
|
||||
case TypeIndex::Int64:
|
||||
|
@ -83,7 +83,7 @@ void optimizePrewhere(Stack & stack, QueryPlan::Nodes &)
|
||||
MergeTreeWhereOptimizer where_optimizer{
|
||||
std::move(column_compressed_sizes),
|
||||
storage_metadata,
|
||||
storage.getConditionEstimatorByPredicate(storage_snapshot, source_step_with_filter->getFilterActionsDAG(), context),
|
||||
storage.getConditionSelectivityEstimatorByPredicate(storage_snapshot, source_step_with_filter->getFilterActionsDAG(), context),
|
||||
queried_columns,
|
||||
storage.supportedPrewhereColumns(),
|
||||
getLogger("QueryPlanOptimizePrewhere")};
|
||||
|
@ -118,7 +118,7 @@ void optimizeTreeSecondPass(const QueryPlanOptimizationSettings & optimization_s
|
||||
optimizePrimaryKeyCondition(stack);
|
||||
|
||||
/// NOTE: optimizePrewhere can modify the stack.
|
||||
/// Prewhere optimization relies on PK optimization (getConditionEstimatorByPredicate)
|
||||
/// Prewhere optimization relies on PK optimization (getConditionSelectivityEstimatorByPredicate)
|
||||
if (optimization_settings.optimize_prewhere)
|
||||
optimizePrewhere(stack, nodes);
|
||||
|
||||
|
@ -67,6 +67,8 @@ namespace ErrorCodes
|
||||
{
|
||||
extern const int BAD_ARGUMENTS;
|
||||
extern const int LOGICAL_ERROR;
|
||||
extern const int CANNOT_COMPILE_REGEXP;
|
||||
extern const int CANNOT_OPEN_FILE;
|
||||
extern const int CANNOT_PARSE_TEXT;
|
||||
extern const int CANNOT_PARSE_ESCAPE_SEQUENCE;
|
||||
extern const int CANNOT_PARSE_QUOTED_STRING;
|
||||
@ -78,8 +80,7 @@ namespace ErrorCodes
|
||||
extern const int CANNOT_PARSE_IPV6;
|
||||
extern const int CANNOT_PARSE_UUID;
|
||||
extern const int CANNOT_PARSE_INPUT_ASSERTION_FAILED;
|
||||
extern const int CANNOT_OPEN_FILE;
|
||||
extern const int CANNOT_COMPILE_REGEXP;
|
||||
extern const int CANNOT_SCHEDULE_TASK;
|
||||
extern const int DUPLICATE_COLUMN;
|
||||
extern const int ILLEGAL_COLUMN;
|
||||
extern const int THERE_IS_NO_COLUMN;
|
||||
@ -267,6 +268,10 @@ static Poco::Net::HTTPResponse::HTTPStatus exceptionCodeToHTTPStatus(int excepti
|
||||
{
|
||||
return HTTPResponse::HTTP_REQUEST_TIMEOUT;
|
||||
}
|
||||
else if (exception_code == ErrorCodes::CANNOT_SCHEDULE_TASK)
|
||||
{
|
||||
return HTTPResponse::HTTP_SERVICE_UNAVAILABLE;
|
||||
}
|
||||
|
||||
return HTTPResponse::HTTP_INTERNAL_SERVER_ERROR;
|
||||
}
|
||||
|
@ -25,7 +25,7 @@
|
||||
#include <Parsers/ASTIdentifier.h>
|
||||
#include <Parsers/ASTIndexDeclaration.h>
|
||||
#include <Parsers/ASTProjectionDeclaration.h>
|
||||
#include <Parsers/ASTStatisticDeclaration.h>
|
||||
#include <Parsers/ASTStatisticsDeclaration.h>
|
||||
#include <Parsers/ASTLiteral.h>
|
||||
#include <Parsers/ASTSetQuery.h>
|
||||
#include <Parsers/queryToString.h>
|
||||
@ -44,7 +44,7 @@ namespace DB
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int ILLEGAL_COLUMN;
|
||||
extern const int ILLEGAL_STATISTIC;
|
||||
extern const int ILLEGAL_STATISTICS;
|
||||
extern const int BAD_ARGUMENTS;
|
||||
extern const int NOT_FOUND_COLUMN_IN_BLOCK;
|
||||
extern const int LOGICAL_ERROR;
|
||||
@ -263,17 +263,32 @@ std::optional<AlterCommand> AlterCommand::parse(const ASTAlterCommand * command_
|
||||
|
||||
return command;
|
||||
}
|
||||
else if (command_ast->type == ASTAlterCommand::ADD_STATISTIC)
|
||||
else if (command_ast->type == ASTAlterCommand::ADD_STATISTICS)
|
||||
{
|
||||
AlterCommand command;
|
||||
command.ast = command_ast->clone();
|
||||
command.statistic_decl = command_ast->statistic_decl->clone();
|
||||
command.type = AlterCommand::ADD_STATISTIC;
|
||||
command.statistics_decl = command_ast->statistics_decl->clone();
|
||||
command.type = AlterCommand::ADD_STATISTICS;
|
||||
|
||||
const auto & ast_stat_decl = command_ast->statistic_decl->as<ASTStatisticDeclaration &>();
|
||||
const auto & ast_stat_decl = command_ast->statistics_decl->as<ASTStatisticsDeclaration &>();
|
||||
|
||||
command.statistic_columns = ast_stat_decl.getColumnNames();
|
||||
command.statistic_type = ast_stat_decl.type;
|
||||
command.statistics_columns = ast_stat_decl.getColumnNames();
|
||||
command.statistics_types = ast_stat_decl.getTypeNames();
|
||||
command.if_not_exists = command_ast->if_not_exists;
|
||||
|
||||
return command;
|
||||
}
|
||||
else if (command_ast->type == ASTAlterCommand::MODIFY_STATISTICS)
|
||||
{
|
||||
AlterCommand command;
|
||||
command.ast = command_ast->clone();
|
||||
command.statistics_decl = command_ast->statistics_decl->clone();
|
||||
command.type = AlterCommand::MODIFY_STATISTICS;
|
||||
|
||||
const auto & ast_stat_decl = command_ast->statistics_decl->as<ASTStatisticsDeclaration &>();
|
||||
|
||||
command.statistics_columns = ast_stat_decl.getColumnNames();
|
||||
command.statistics_types = ast_stat_decl.getTypeNames();
|
||||
command.if_not_exists = command_ast->if_not_exists;
|
||||
|
||||
return command;
|
||||
@ -337,17 +352,17 @@ std::optional<AlterCommand> AlterCommand::parse(const ASTAlterCommand * command_
|
||||
|
||||
return command;
|
||||
}
|
||||
else if (command_ast->type == ASTAlterCommand::DROP_STATISTIC)
|
||||
else if (command_ast->type == ASTAlterCommand::DROP_STATISTICS)
|
||||
{
|
||||
AlterCommand command;
|
||||
command.ast = command_ast->clone();
|
||||
command.type = AlterCommand::DROP_STATISTIC;
|
||||
const auto & ast_stat_decl = command_ast->statistic_decl->as<ASTStatisticDeclaration &>();
|
||||
command.statistics_decl = command_ast->statistics_decl->clone();
|
||||
command.type = AlterCommand::DROP_STATISTICS;
|
||||
const auto & ast_stat_decl = command_ast->statistics_decl->as<ASTStatisticsDeclaration &>();
|
||||
|
||||
command.statistic_columns = ast_stat_decl.getColumnNames();
|
||||
command.statistic_type = ast_stat_decl.type;
|
||||
command.statistics_columns = ast_stat_decl.getColumnNames();
|
||||
command.if_exists = command_ast->if_exists;
|
||||
command.clear = command_ast->clear_statistic;
|
||||
command.clear = command_ast->clear_statistics;
|
||||
|
||||
if (command_ast->partition)
|
||||
command.partition = command_ast->partition->clone();
|
||||
@ -676,41 +691,56 @@ void AlterCommand::apply(StorageInMemoryMetadata & metadata, ContextPtr context)
|
||||
metadata.secondary_indices.erase(erase_it);
|
||||
}
|
||||
}
|
||||
else if (type == ADD_STATISTIC)
|
||||
else if (type == ADD_STATISTICS)
|
||||
{
|
||||
for (const auto & statistic_column_name : statistic_columns)
|
||||
for (const auto & statistics_column_name : statistics_columns)
|
||||
{
|
||||
if (!metadata.columns.has(statistic_column_name))
|
||||
if (!metadata.columns.has(statistics_column_name))
|
||||
{
|
||||
throw Exception(ErrorCodes::ILLEGAL_STATISTIC, "Cannot add statistic {} with type {}: this column is not found", statistic_column_name, statistic_type);
|
||||
throw Exception(ErrorCodes::ILLEGAL_STATISTICS, "Cannot add statistics for column {}: this column is not found", statistics_column_name);
|
||||
}
|
||||
if (!if_exists && metadata.columns.get(statistic_column_name).stat)
|
||||
throw Exception(ErrorCodes::ILLEGAL_STATISTIC, "Cannot add statistic {} with type {}: statistic on this column with this type already exists", statistic_column_name, statistic_type);
|
||||
}
|
||||
|
||||
auto stats = StatisticDescription::getStatisticsFromAST(statistic_decl, metadata.columns);
|
||||
for (auto && stat : stats)
|
||||
auto stats_vec = ColumnStatisticsDescription::fromAST(statistics_decl, metadata.columns);
|
||||
for (const auto & stats : stats_vec)
|
||||
{
|
||||
metadata.columns.modify(stat.column_name,
|
||||
[&](ColumnDescription & column) { column.stat = std::move(stat); });
|
||||
metadata.columns.modify(stats.column_name,
|
||||
[&](ColumnDescription & column) { column.statistics.merge(stats, column.name, column.type, if_not_exists); });
|
||||
}
|
||||
}
|
||||
else if (type == DROP_STATISTIC)
|
||||
else if (type == DROP_STATISTICS)
|
||||
{
|
||||
for (const auto & stat_column_name : statistic_columns)
|
||||
for (const auto & statistics_column_name : statistics_columns)
|
||||
{
|
||||
if (!metadata.columns.has(stat_column_name) || !metadata.columns.get(stat_column_name).stat)
|
||||
if (!metadata.columns.has(statistics_column_name)
|
||||
|| metadata.columns.get(statistics_column_name).statistics.empty())
|
||||
{
|
||||
if (if_exists)
|
||||
return;
|
||||
throw Exception(ErrorCodes::ILLEGAL_STATISTIC, "Wrong statistic name. Cannot find statistic {} with type {} to drop", backQuote(stat_column_name), statistic_type);
|
||||
throw Exception(ErrorCodes::ILLEGAL_STATISTICS, "Wrong statistics name. Cannot find statistics {} to drop", backQuote(statistics_column_name));
|
||||
}
|
||||
if (!partition && !clear)
|
||||
|
||||
if (!clear && !partition)
|
||||
metadata.columns.modify(statistics_column_name,
|
||||
[&](ColumnDescription & column) { column.statistics.clear(); });
|
||||
}
|
||||
}
|
||||
else if (type == MODIFY_STATISTICS)
|
||||
{
|
||||
for (const auto & statistics_column_name : statistics_columns)
|
||||
{
|
||||
if (!metadata.columns.has(statistics_column_name))
|
||||
{
|
||||
metadata.columns.modify(stat_column_name,
|
||||
[&](ColumnDescription & column) { column.stat = std::nullopt; });
|
||||
throw Exception(ErrorCodes::ILLEGAL_STATISTICS, "Cannot add statistics for column {}: this column is not found", statistics_column_name);
|
||||
}
|
||||
}
|
||||
|
||||
auto stats_vec = ColumnStatisticsDescription::fromAST(statistics_decl, metadata.columns);
|
||||
for (const auto & stats : stats_vec)
|
||||
{
|
||||
metadata.columns.modify(stats.column_name,
|
||||
[&](ColumnDescription & column) { column.statistics.assign(stats); });
|
||||
}
|
||||
}
|
||||
else if (type == ADD_CONSTRAINT)
|
||||
{
|
||||
@ -833,8 +863,8 @@ void AlterCommand::apply(StorageInMemoryMetadata & metadata, ContextPtr context)
|
||||
rename_visitor.visit(column_to_modify.default_desc.expression);
|
||||
if (column_to_modify.ttl)
|
||||
rename_visitor.visit(column_to_modify.ttl);
|
||||
if (column_to_modify.name == column_name && column_to_modify.stat)
|
||||
column_to_modify.stat->column_name = rename_to;
|
||||
if (column_to_modify.name == column_name && !column_to_modify.statistics.empty())
|
||||
column_to_modify.statistics.column_name = rename_to;
|
||||
});
|
||||
}
|
||||
if (metadata.table_ttl.definition_ast)
|
||||
@ -958,7 +988,7 @@ bool AlterCommand::isRequireMutationStage(const StorageInMemoryMetadata & metada
|
||||
if (isRemovingProperty() || type == REMOVE_TTL || type == REMOVE_SAMPLE_BY)
|
||||
return false;
|
||||
|
||||
if (type == DROP_INDEX || type == DROP_PROJECTION || type == RENAME_COLUMN || type == DROP_STATISTIC)
|
||||
if (type == DROP_INDEX || type == DROP_PROJECTION || type == RENAME_COLUMN || type == DROP_STATISTICS)
|
||||
return true;
|
||||
|
||||
/// Drop alias is metadata alter, in other case mutation is required.
|
||||
@ -1065,10 +1095,10 @@ std::optional<MutationCommand> AlterCommand::tryConvertToMutationCommand(Storage
|
||||
|
||||
result.predicate = nullptr;
|
||||
}
|
||||
else if (type == DROP_STATISTIC)
|
||||
else if (type == DROP_STATISTICS)
|
||||
{
|
||||
result.type = MutationCommand::Type::DROP_STATISTIC;
|
||||
result.statistic_columns = statistic_columns;
|
||||
result.type = MutationCommand::Type::DROP_STATISTICS;
|
||||
result.statistics_columns = statistics_columns;
|
||||
|
||||
if (clear)
|
||||
result.clear = true;
|
||||
|
@ -38,8 +38,9 @@ struct AlterCommand
|
||||
DROP_CONSTRAINT,
|
||||
ADD_PROJECTION,
|
||||
DROP_PROJECTION,
|
||||
ADD_STATISTIC,
|
||||
DROP_STATISTIC,
|
||||
ADD_STATISTICS,
|
||||
DROP_STATISTICS,
|
||||
MODIFY_STATISTICS,
|
||||
MODIFY_TTL,
|
||||
MODIFY_SETTING,
|
||||
RESET_SETTING,
|
||||
@ -123,9 +124,9 @@ struct AlterCommand
|
||||
/// For ADD/DROP PROJECTION
|
||||
String projection_name;
|
||||
|
||||
ASTPtr statistic_decl = nullptr;
|
||||
std::vector<String> statistic_columns;
|
||||
String statistic_type;
|
||||
ASTPtr statistics_decl = nullptr;
|
||||
std::vector<String> statistics_columns;
|
||||
std::vector<String> statistics_types;
|
||||
|
||||
/// For MODIFY TTL
|
||||
ASTPtr ttl = nullptr;
|
||||
|
@ -26,8 +26,8 @@ struct ColumnDependency
|
||||
/// TTL is set for @column_name.
|
||||
TTL_TARGET,
|
||||
|
||||
/// Exists any statistic, that requires @column_name
|
||||
STATISTIC,
|
||||
/// Exists any statistics, that requires @column_name
|
||||
STATISTICS,
|
||||
};
|
||||
|
||||
ColumnDependency(const String & column_name_, Kind kind_)
|
||||
|
@ -72,7 +72,7 @@ ColumnDescription & ColumnDescription::operator=(const ColumnDescription & other
|
||||
codec = other.codec ? other.codec->clone() : nullptr;
|
||||
settings = other.settings;
|
||||
ttl = other.ttl ? other.ttl->clone() : nullptr;
|
||||
stat = other.stat;
|
||||
statistics = other.statistics;
|
||||
|
||||
return *this;
|
||||
}
|
||||
@ -95,7 +95,7 @@ ColumnDescription & ColumnDescription::operator=(ColumnDescription && other) noe
|
||||
ttl = other.ttl ? other.ttl->clone() : nullptr;
|
||||
other.ttl.reset();
|
||||
|
||||
stat = std::move(other.stat);
|
||||
statistics = std::move(other.statistics);
|
||||
|
||||
return *this;
|
||||
}
|
||||
@ -107,7 +107,7 @@ bool ColumnDescription::operator==(const ColumnDescription & other) const
|
||||
return name == other.name
|
||||
&& type->equals(*other.type)
|
||||
&& default_desc == other.default_desc
|
||||
&& stat == other.stat
|
||||
&& statistics == other.statistics
|
||||
&& ast_to_str(codec) == ast_to_str(other.codec)
|
||||
&& settings == other.settings
|
||||
&& ast_to_str(ttl) == ast_to_str(other.ttl);
|
||||
@ -154,10 +154,10 @@ void ColumnDescription::writeText(WriteBuffer & buf) const
|
||||
DB::writeText(")", buf);
|
||||
}
|
||||
|
||||
if (stat)
|
||||
if (!statistics.empty())
|
||||
{
|
||||
writeChar('\t', buf);
|
||||
writeEscapedString(queryToString(stat->ast), buf);
|
||||
writeEscapedString(queryToString(statistics.getAST()), buf);
|
||||
}
|
||||
|
||||
if (ttl)
|
||||
|
@ -89,7 +89,7 @@ struct ColumnDescription
|
||||
ASTPtr codec;
|
||||
SettingsChanges settings;
|
||||
ASTPtr ttl;
|
||||
std::optional<StatisticDescription> stat;
|
||||
ColumnStatisticsDescription statistics;
|
||||
|
||||
ColumnDescription() = default;
|
||||
ColumnDescription(const ColumnDescription & other) { *this = other; }
|
||||
|
@ -12,7 +12,7 @@
|
||||
#include <Processors/QueryPlan/ReadFromPreparedSource.h>
|
||||
#include <Processors/QueryPlan/QueryPlan.h>
|
||||
#include <Storages/AlterCommands.h>
|
||||
#include <Storages/Statistics/Estimator.h>
|
||||
#include <Storages/Statistics/ConditionSelectivityEstimator.h>
|
||||
#include <Backups/RestorerFromBackup.h>
|
||||
#include <Backups/IBackup.h>
|
||||
|
||||
@ -236,7 +236,7 @@ StorageID IStorage::getStorageID() const
|
||||
return storage_id;
|
||||
}
|
||||
|
||||
ConditionEstimator IStorage::getConditionEstimatorByPredicate(const StorageSnapshotPtr &, const ActionsDAGPtr &, ContextPtr) const
|
||||
ConditionSelectivityEstimator IStorage::getConditionSelectivityEstimatorByPredicate(const StorageSnapshotPtr &, const ActionsDAGPtr &, ContextPtr) const
|
||||
{
|
||||
return {};
|
||||
}
|
||||
|
@ -68,7 +68,7 @@ using DatabaseAndTableName = std::pair<String, String>;
|
||||
class BackupEntriesCollector;
|
||||
class RestorerFromBackup;
|
||||
|
||||
class ConditionEstimator;
|
||||
class ConditionSelectivityEstimator;
|
||||
|
||||
struct ColumnSize
|
||||
{
|
||||
@ -135,7 +135,7 @@ public:
|
||||
/// Returns true if the storage supports queries with the PREWHERE section.
|
||||
virtual bool supportsPrewhere() const { return false; }
|
||||
|
||||
virtual ConditionEstimator getConditionEstimatorByPredicate(const StorageSnapshotPtr &, const ActionsDAGPtr &, ContextPtr) const;
|
||||
virtual ConditionSelectivityEstimator getConditionSelectivityEstimatorByPredicate(const StorageSnapshotPtr &, const ActionsDAGPtr &, ContextPtr) const;
|
||||
|
||||
/// Returns which columns supports PREWHERE, or empty std::nullopt if all columns is supported.
|
||||
/// This is needed for engines whose aggregates data from multiple tables, like Merge.
|
||||
|
@ -673,16 +673,16 @@ String IMergeTreeDataPart::getColumnNameWithMinimumCompressedSize(bool with_subc
|
||||
return *minimum_size_column;
|
||||
}
|
||||
|
||||
Statistics IMergeTreeDataPart::loadStatistics() const
|
||||
ColumnsStatistics IMergeTreeDataPart::loadStatistics() const
|
||||
{
|
||||
const auto & metadata_snaphost = storage.getInMemoryMetadata();
|
||||
|
||||
auto total_statistics = MergeTreeStatisticsFactory::instance().getMany(metadata_snaphost.getColumns());
|
||||
|
||||
Statistics result;
|
||||
ColumnsStatistics result;
|
||||
for (auto & stat : total_statistics)
|
||||
{
|
||||
String file_name = stat->getFileName() + STAT_FILE_SUFFIX;
|
||||
String file_name = stat->getFileName() + STATS_FILE_SUFFIX;
|
||||
String file_path = fs::path(getDataPartStorage().getRelativePath()) / file_name;
|
||||
|
||||
if (!metadata_manager->exists(file_name))
|
||||
|
@ -171,7 +171,7 @@ public:
|
||||
|
||||
void remove();
|
||||
|
||||
Statistics loadStatistics() const;
|
||||
ColumnsStatistics loadStatistics() const;
|
||||
|
||||
/// Initialize columns (from columns.txt if exists, or create from column files if not).
|
||||
/// Load various metadata into memory: checksums from checksums.txt, index if required, etc.
|
||||
|
@ -119,7 +119,7 @@ MergeTreeDataPartWriterPtr createMergeTreeDataPartCompactWriter(
|
||||
const StorageMetadataPtr & metadata_snapshot,
|
||||
const VirtualsDescriptionPtr & virtual_columns,
|
||||
const std::vector<MergeTreeIndexPtr> & indices_to_recalc,
|
||||
const Statistics & stats_to_recalc_,
|
||||
const ColumnsStatistics & stats_to_recalc_,
|
||||
const String & marks_file_extension_,
|
||||
const CompressionCodecPtr & default_codec_,
|
||||
const MergeTreeWriterSettings & writer_settings,
|
||||
@ -136,7 +136,7 @@ MergeTreeDataPartWriterPtr createMergeTreeDataPartWideWriter(
|
||||
const StorageMetadataPtr & metadata_snapshot,
|
||||
const VirtualsDescriptionPtr & virtual_columns,
|
||||
const std::vector<MergeTreeIndexPtr> & indices_to_recalc,
|
||||
const Statistics & stats_to_recalc_,
|
||||
const ColumnsStatistics & stats_to_recalc_,
|
||||
const String & marks_file_extension_,
|
||||
const CompressionCodecPtr & default_codec_,
|
||||
const MergeTreeWriterSettings & writer_settings,
|
||||
@ -156,7 +156,7 @@ MergeTreeDataPartWriterPtr createMergeTreeDataPartWriter(
|
||||
const StorageMetadataPtr & metadata_snapshot,
|
||||
const VirtualsDescriptionPtr & virtual_columns,
|
||||
const std::vector<MergeTreeIndexPtr> & indices_to_recalc,
|
||||
const Statistics & stats_to_recalc_,
|
||||
const ColumnsStatistics & stats_to_recalc_,
|
||||
const String & marks_file_extension_,
|
||||
const CompressionCodecPtr & default_codec_,
|
||||
const MergeTreeWriterSettings & writer_settings,
|
||||
|
@ -84,7 +84,7 @@ MergeTreeDataPartWriterPtr createMergeTreeDataPartWriter(
|
||||
const StorageMetadataPtr & metadata_snapshot,
|
||||
const VirtualsDescriptionPtr & virtual_columns_,
|
||||
const std::vector<MergeTreeIndexPtr> & indices_to_recalc,
|
||||
const Statistics & stats_to_recalc_,
|
||||
const ColumnsStatistics & stats_to_recalc_,
|
||||
const String & marks_file_extension,
|
||||
const CompressionCodecPtr & default_codec_,
|
||||
const MergeTreeWriterSettings & writer_settings,
|
||||
|
@ -658,7 +658,7 @@ void MergeTask::VerticalMergeStage::prepareVerticalMergeForOneColumn() const
|
||||
/// because all of them were already recalculated and written
|
||||
/// as key part of vertical merge
|
||||
std::vector<MergeTreeIndexPtr>{},
|
||||
std::vector<StatisticPtr>{}, /// TODO: think about it
|
||||
ColumnsStatistics{}, /// TODO(hanfei)
|
||||
&global_ctx->written_offset_columns,
|
||||
global_ctx->to->getIndexGranularity());
|
||||
|
||||
|
@ -73,7 +73,7 @@
|
||||
#include <Storages/MergeTree/DataPartStorageOnDiskFull.h>
|
||||
#include <Storages/MergeTree/MergeTreeDataPartBuilder.h>
|
||||
#include <Storages/MergeTree/MergeTreeDataPartCompact.h>
|
||||
#include <Storages/Statistics/Estimator.h>
|
||||
#include <Storages/Statistics/ConditionSelectivityEstimator.h>
|
||||
#include <Storages/MergeTree/MergeTreeSelectProcessor.h>
|
||||
#include <Storages/MergeTree/checkDataPart.h>
|
||||
#include <Storages/MutationCommands.h>
|
||||
@ -471,10 +471,10 @@ StoragePolicyPtr MergeTreeData::getStoragePolicy() const
|
||||
return storage_policy;
|
||||
}
|
||||
|
||||
ConditionEstimator MergeTreeData::getConditionEstimatorByPredicate(
|
||||
ConditionSelectivityEstimator MergeTreeData::getConditionSelectivityEstimatorByPredicate(
|
||||
const StorageSnapshotPtr & storage_snapshot, const ActionsDAGPtr & filter_dag, ContextPtr local_context) const
|
||||
{
|
||||
if (!local_context->getSettings().allow_statistic_optimize)
|
||||
if (!local_context->getSettings().allow_statistics_optimize)
|
||||
return {};
|
||||
|
||||
const auto & parts = assert_cast<const MergeTreeData::SnapshotData &>(*storage_snapshot->data).parts;
|
||||
@ -486,23 +486,29 @@ ConditionEstimator MergeTreeData::getConditionEstimatorByPredicate(
|
||||
|
||||
ASTPtr expression_ast;
|
||||
|
||||
ConditionEstimator result;
|
||||
ConditionSelectivityEstimator result;
|
||||
PartitionPruner partition_pruner(storage_snapshot->metadata, filter_dag, local_context);
|
||||
|
||||
if (partition_pruner.isUseless())
|
||||
{
|
||||
/// Read all partitions.
|
||||
for (const auto & part : parts)
|
||||
try
|
||||
{
|
||||
auto stats = part->loadStatistics();
|
||||
/// TODO: We only have one stats file for every part.
|
||||
for (const auto & stat : stats)
|
||||
result.merge(part->info.getPartNameV1(), part->rows_count, stat);
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
tryLogCurrentException(log, fmt::format("while loading statistics on part {}", part->info.getPartNameV1()));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (const auto & part : parts)
|
||||
try
|
||||
{
|
||||
if (!partition_pruner.canBePruned(*part))
|
||||
{
|
||||
@ -511,6 +517,10 @@ ConditionEstimator MergeTreeData::getConditionEstimatorByPredicate(
|
||||
result.merge(part->info.getPartNameV1(), part->rows_count, stat);
|
||||
}
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
tryLogCurrentException(log, fmt::format("while loading statistics on part {}", part->info.getPartNameV1()));
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
@ -691,8 +701,8 @@ void MergeTreeData::checkProperties(
|
||||
|
||||
for (const auto & col : new_metadata.columns)
|
||||
{
|
||||
if (col.stat)
|
||||
MergeTreeStatisticsFactory::instance().validate(*col.stat, col.type);
|
||||
if (!col.statistics.empty())
|
||||
MergeTreeStatisticsFactory::instance().validate(col.statistics, col.type);
|
||||
}
|
||||
|
||||
checkKeyExpression(*new_sorting_key.expression, new_sorting_key.sample_block, "Sorting", allow_nullable_key_);
|
||||
@ -3469,13 +3479,13 @@ void MergeTreeData::checkAlterIsPossible(const AlterCommands & commands, Context
|
||||
new_metadata.getColumns().getPhysical(command.column_name));
|
||||
|
||||
const auto & old_column = old_metadata.getColumns().get(command.column_name);
|
||||
if (old_column.stat)
|
||||
if (!old_column.statistics.empty())
|
||||
{
|
||||
const auto & new_column = new_metadata.getColumns().get(command.column_name);
|
||||
if (!old_column.type->equals(*new_column.type))
|
||||
throw Exception(ErrorCodes::ALTER_OF_COLUMN_IS_FORBIDDEN,
|
||||
"ALTER types of column {} with statistic is not not safe "
|
||||
"because it can change the representation of statistic",
|
||||
"ALTER types of column {} with statistics is not not safe "
|
||||
"because it can change the representation of statistics",
|
||||
backQuoteIfNeed(command.column_name));
|
||||
}
|
||||
}
|
||||
@ -8510,7 +8520,7 @@ std::pair<MergeTreeData::MutableDataPartPtr, scope_guard> MergeTreeData::createE
|
||||
const auto & index_factory = MergeTreeIndexFactory::instance();
|
||||
MergedBlockOutputStream out(new_data_part, metadata_snapshot, columns,
|
||||
index_factory.getMany(metadata_snapshot->getSecondaryIndices()),
|
||||
Statistics{},
|
||||
ColumnsStatistics{},
|
||||
compression_codec, txn ? txn->tid : Tx::PrehistoricTID);
|
||||
|
||||
bool sync_on_insert = settings->fsync_after_insert;
|
||||
|
@ -426,7 +426,7 @@ public:
|
||||
|
||||
bool supportsPrewhere() const override { return true; }
|
||||
|
||||
ConditionEstimator getConditionEstimatorByPredicate(const StorageSnapshotPtr &, const ActionsDAGPtr &, ContextPtr) const override;
|
||||
ConditionSelectivityEstimator getConditionSelectivityEstimatorByPredicate(const StorageSnapshotPtr &, const ActionsDAGPtr &, ContextPtr) const override;
|
||||
|
||||
bool supportsFinal() const override;
|
||||
|
||||
|
@ -59,7 +59,7 @@ MergeTreeDataPartWriterPtr createMergeTreeDataPartCompactWriter(
|
||||
const StorageMetadataPtr & metadata_snapshot,
|
||||
const VirtualsDescriptionPtr & virtual_columns,
|
||||
const std::vector<MergeTreeIndexPtr> & indices_to_recalc,
|
||||
const Statistics & stats_to_recalc_,
|
||||
const ColumnsStatistics & stats_to_recalc_,
|
||||
const String & marks_file_extension_,
|
||||
const CompressionCodecPtr & default_codec_,
|
||||
const MergeTreeWriterSettings & writer_settings,
|
||||
|
@ -64,7 +64,7 @@ MergeTreeDataPartWriterPtr createMergeTreeDataPartWideWriter(
|
||||
const StorageMetadataPtr & metadata_snapshot,
|
||||
const VirtualsDescriptionPtr & virtual_columns,
|
||||
const std::vector<MergeTreeIndexPtr> & indices_to_recalc,
|
||||
const Statistics & stats_to_recalc_,
|
||||
const ColumnsStatistics & stats_to_recalc_,
|
||||
const String & marks_file_extension_,
|
||||
const CompressionCodecPtr & default_codec_,
|
||||
const MergeTreeWriterSettings & writer_settings,
|
||||
|
@ -20,7 +20,7 @@ MergeTreeDataPartWriterCompact::MergeTreeDataPartWriterCompact(
|
||||
const StorageMetadataPtr & metadata_snapshot_,
|
||||
const VirtualsDescriptionPtr & virtual_columns_,
|
||||
const std::vector<MergeTreeIndexPtr> & indices_to_recalc_,
|
||||
const Statistics & stats_to_recalc,
|
||||
const ColumnsStatistics & stats_to_recalc,
|
||||
const String & marks_file_extension_,
|
||||
const CompressionCodecPtr & default_codec_,
|
||||
const MergeTreeWriterSettings & settings_,
|
||||
|
@ -21,7 +21,7 @@ public:
|
||||
const StorageMetadataPtr & metadata_snapshot_,
|
||||
const VirtualsDescriptionPtr & virtual_columns_,
|
||||
const std::vector<MergeTreeIndexPtr> & indices_to_recalc,
|
||||
const Statistics & stats_to_recalc,
|
||||
const ColumnsStatistics & stats_to_recalc,
|
||||
const String & marks_file_extension,
|
||||
const CompressionCodecPtr & default_codec,
|
||||
const MergeTreeWriterSettings & settings,
|
||||
|
@ -150,7 +150,7 @@ MergeTreeDataPartWriterOnDisk::MergeTreeDataPartWriterOnDisk(
|
||||
const StorageMetadataPtr & metadata_snapshot_,
|
||||
const VirtualsDescriptionPtr & virtual_columns_,
|
||||
const MergeTreeIndices & indices_to_recalc_,
|
||||
const Statistics & stats_to_recalc_,
|
||||
const ColumnsStatistics & stats_to_recalc_,
|
||||
const String & marks_file_extension_,
|
||||
const CompressionCodecPtr & default_codec_,
|
||||
const MergeTreeWriterSettings & settings_,
|
||||
@ -265,7 +265,7 @@ void MergeTreeDataPartWriterOnDisk::initStatistics()
|
||||
stats_streams.emplace_back(std::make_unique<MergeTreeDataPartWriterOnDisk::Stream<true>>(
|
||||
stats_name,
|
||||
data_part_storage,
|
||||
stats_name, STAT_FILE_SUFFIX,
|
||||
stats_name, STATS_FILE_SUFFIX,
|
||||
default_codec, settings.max_compress_block_size,
|
||||
settings.query_write_settings));
|
||||
}
|
||||
|
@ -111,7 +111,7 @@ public:
|
||||
const StorageMetadataPtr & metadata_snapshot_,
|
||||
const VirtualsDescriptionPtr & virtual_columns_,
|
||||
const std::vector<MergeTreeIndexPtr> & indices_to_recalc,
|
||||
const Statistics & stats_to_recalc_,
|
||||
const ColumnsStatistics & stats_to_recalc_,
|
||||
const String & marks_file_extension,
|
||||
const CompressionCodecPtr & default_codec,
|
||||
const MergeTreeWriterSettings & settings,
|
||||
@ -155,7 +155,7 @@ protected:
|
||||
|
||||
const MergeTreeIndices skip_indices;
|
||||
|
||||
const Statistics stats;
|
||||
const ColumnsStatistics stats;
|
||||
std::vector<StatisticStreamPtr> stats_streams;
|
||||
|
||||
const String marks_file_extension;
|
||||
|
@ -86,7 +86,7 @@ MergeTreeDataPartWriterWide::MergeTreeDataPartWriterWide(
|
||||
const StorageMetadataPtr & metadata_snapshot_,
|
||||
const VirtualsDescriptionPtr & virtual_columns_,
|
||||
const std::vector<MergeTreeIndexPtr> & indices_to_recalc_,
|
||||
const Statistics & stats_to_recalc_,
|
||||
const ColumnsStatistics & stats_to_recalc_,
|
||||
const String & marks_file_extension_,
|
||||
const CompressionCodecPtr & default_codec_,
|
||||
const MergeTreeWriterSettings & settings_,
|
||||
|
@ -31,7 +31,7 @@ public:
|
||||
const StorageMetadataPtr & metadata_snapshot,
|
||||
const VirtualsDescriptionPtr & virtual_columns_,
|
||||
const std::vector<MergeTreeIndexPtr> & indices_to_recalc,
|
||||
const Statistics & stats_to_recalc_,
|
||||
const ColumnsStatistics & stats_to_recalc_,
|
||||
const String & marks_file_extension,
|
||||
const CompressionCodecPtr & default_codec,
|
||||
const MergeTreeWriterSettings & settings,
|
||||
|
@ -760,9 +760,16 @@ RangesInDataParts MergeTreeDataSelectExecutor::filterPartsByPrimaryKeyAndSkipInd
|
||||
CurrentMetrics::MergeTreeDataSelectExecutorThreadsScheduled,
|
||||
num_threads);
|
||||
|
||||
|
||||
/// Instances of ThreadPool "borrow" threads from the global thread pool.
|
||||
/// We intentionally use scheduleOrThrow here to avoid a deadlock.
|
||||
/// For example, queries can already be running with threads from the
|
||||
/// global pool, and if we saturate max_thread_pool_size whilst requesting
|
||||
/// more in this loop, queries will block infinitely.
|
||||
/// So we wait until lock_acquire_timeout, and then raise an exception.
|
||||
for (size_t part_index = 0; part_index < parts.size(); ++part_index)
|
||||
{
|
||||
pool.scheduleOrThrowOnError([&, part_index, thread_group = CurrentThread::getGroup()]
|
||||
pool.scheduleOrThrow([&, part_index, thread_group = CurrentThread::getGroup()]
|
||||
{
|
||||
setThreadName("MergeTreeIndex");
|
||||
|
||||
@ -774,7 +781,7 @@ RangesInDataParts MergeTreeDataSelectExecutor::filterPartsByPrimaryKeyAndSkipInd
|
||||
CurrentThread::attachToGroupIfDetached(thread_group);
|
||||
|
||||
process_part(part_index);
|
||||
});
|
||||
}, Priority{}, context->getSettingsRef().lock_acquire_timeout.totalMicroseconds());
|
||||
}
|
||||
|
||||
pool.wait();
|
||||
|
@ -469,7 +469,7 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeTempPartImpl(
|
||||
if (context->getSettingsRef().materialize_skip_indexes_on_insert)
|
||||
indices = MergeTreeIndexFactory::instance().getMany(metadata_snapshot->getSecondaryIndices());
|
||||
|
||||
Statistics statistics;
|
||||
ColumnsStatistics statistics;
|
||||
if (context->getSettingsRef().materialize_statistics_on_insert)
|
||||
statistics = MergeTreeStatisticsFactory::instance().getMany(metadata_snapshot->getColumns());
|
||||
|
||||
@ -754,7 +754,8 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeProjectionPartImpl(
|
||||
metadata_snapshot,
|
||||
columns,
|
||||
MergeTreeIndices{},
|
||||
Statistics{}, /// TODO(hanfei): It should be helpful to write statistics for projection result.
|
||||
/// TODO(hanfei): It should be helpful to write statistics for projection result.
|
||||
ColumnsStatistics{},
|
||||
compression_codec,
|
||||
Tx::PrehistoricTID,
|
||||
false, false, data.getContext()->getWriteSettings());
|
||||
|
@ -566,7 +566,7 @@ bool MergeTreeConditionBloomFilterText::traverseTreeEquals(
|
||||
out.function = RPNElement::FUNCTION_EQUALS;
|
||||
out.bloom_filter = std::make_unique<BloomFilter>(params);
|
||||
const auto & value = const_value.get<String>();
|
||||
token_extractor->stringToBloomFilter(value.data(), value.size(), *out.bloom_filter);
|
||||
token_extractor->substringToBloomFilter(value.data(), value.size(), *out.bloom_filter, true, false);
|
||||
return true;
|
||||
}
|
||||
else if (function_name == "endsWith")
|
||||
@ -575,7 +575,7 @@ bool MergeTreeConditionBloomFilterText::traverseTreeEquals(
|
||||
out.function = RPNElement::FUNCTION_EQUALS;
|
||||
out.bloom_filter = std::make_unique<BloomFilter>(params);
|
||||
const auto & value = const_value.get<String>();
|
||||
token_extractor->stringToBloomFilter(value.data(), value.size(), *out.bloom_filter);
|
||||
token_extractor->substringToBloomFilter(value.data(), value.size(), *out.bloom_filter, false, true);
|
||||
return true;
|
||||
}
|
||||
else if (function_name == "multiSearchAny"
|
||||
@ -596,7 +596,15 @@ bool MergeTreeConditionBloomFilterText::traverseTreeEquals(
|
||||
|
||||
bloom_filters.back().emplace_back(params);
|
||||
const auto & value = element.get<String>();
|
||||
token_extractor->stringToBloomFilter(value.data(), value.size(), bloom_filters.back().back());
|
||||
|
||||
if (function_name == "multiSearchAny")
|
||||
{
|
||||
token_extractor->substringToBloomFilter(value.data(), value.size(), bloom_filters.back().back(), false, false);
|
||||
}
|
||||
else
|
||||
{
|
||||
token_extractor->stringToBloomFilter(value.data(), value.size(), bloom_filters.back().back());
|
||||
}
|
||||
}
|
||||
out.set_bloom_filters = std::move(bloom_filters);
|
||||
return true;
|
||||
@ -625,12 +633,12 @@ bool MergeTreeConditionBloomFilterText::traverseTreeEquals(
|
||||
for (const auto & alternative : alternatives)
|
||||
{
|
||||
bloom_filters.back().emplace_back(params);
|
||||
token_extractor->stringToBloomFilter(alternative.data(), alternative.size(), bloom_filters.back().back());
|
||||
token_extractor->substringToBloomFilter(alternative.data(), alternative.size(), bloom_filters.back().back(), false, false);
|
||||
}
|
||||
out.set_bloom_filters = std::move(bloom_filters);
|
||||
}
|
||||
else
|
||||
token_extractor->stringToBloomFilter(required_substring.data(), required_substring.size(), *out.bloom_filter);
|
||||
token_extractor->substringToBloomFilter(required_substring.data(), required_substring.size(), *out.bloom_filter, false, false);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
@ -595,7 +595,7 @@ bool MergeTreeConditionFullText::traverseASTEquals(
|
||||
out.function = RPNElement::FUNCTION_EQUALS;
|
||||
out.gin_filter = std::make_unique<GinFilter>(params);
|
||||
const auto & value = const_value.get<String>();
|
||||
token_extractor->stringToGinFilter(value.data(), value.size(), *out.gin_filter);
|
||||
token_extractor->substringToGinFilter(value.data(), value.size(), *out.gin_filter, true, false);
|
||||
return true;
|
||||
}
|
||||
else if (function_name == "endsWith")
|
||||
@ -604,7 +604,7 @@ bool MergeTreeConditionFullText::traverseASTEquals(
|
||||
out.function = RPNElement::FUNCTION_EQUALS;
|
||||
out.gin_filter = std::make_unique<GinFilter>(params);
|
||||
const auto & value = const_value.get<String>();
|
||||
token_extractor->stringToGinFilter(value.data(), value.size(), *out.gin_filter);
|
||||
token_extractor->substringToGinFilter(value.data(), value.size(), *out.gin_filter, false, true);
|
||||
return true;
|
||||
}
|
||||
else if (function_name == "multiSearchAny")
|
||||
@ -622,7 +622,7 @@ bool MergeTreeConditionFullText::traverseASTEquals(
|
||||
|
||||
gin_filters.back().emplace_back(params);
|
||||
const auto & value = element.get<String>();
|
||||
token_extractor->stringToGinFilter(value.data(), value.size(), gin_filters.back().back());
|
||||
token_extractor->substringToGinFilter(value.data(), value.size(), gin_filters.back().back(), false, false);
|
||||
}
|
||||
out.set_gin_filters = std::move(gin_filters);
|
||||
return true;
|
||||
@ -650,14 +650,14 @@ bool MergeTreeConditionFullText::traverseASTEquals(
|
||||
for (const auto & alternative : alternatives)
|
||||
{
|
||||
gin_filters.back().emplace_back(params);
|
||||
token_extractor->stringToGinFilter(alternative.data(), alternative.size(), gin_filters.back().back());
|
||||
token_extractor->substringToGinFilter(alternative.data(), alternative.size(), gin_filters.back().back(), false, false);
|
||||
}
|
||||
out.set_gin_filters = std::move(gin_filters);
|
||||
}
|
||||
else
|
||||
{
|
||||
out.gin_filter = std::make_unique<GinFilter>(params);
|
||||
token_extractor->stringToGinFilter(required_substring.data(), required_substring.size(), *out.gin_filter);
|
||||
token_extractor->substringToGinFilter(required_substring.data(), required_substring.size(), *out.gin_filter, false, false);
|
||||
}
|
||||
|
||||
return true;
|
||||
@ -742,6 +742,7 @@ bool MergeTreeConditionFullText::tryPrepareSetGinFilter(
|
||||
|
||||
MergeTreeIndexGranulePtr MergeTreeIndexFullText::createIndexGranule() const
|
||||
{
|
||||
/// ------
|
||||
/// Index type 'inverted' was renamed to 'full_text' in May 2024.
|
||||
/// Tables with old indexes can be loaded during a transition period. We still want let users know that they should drop existing
|
||||
/// indexes and re-create them. Function `createIndexGranule` is called whenever the index is used by queries. Reject the query if we
|
||||
@ -749,6 +750,7 @@ MergeTreeIndexGranulePtr MergeTreeIndexFullText::createIndexGranule() const
|
||||
/// TODO: remove this at the end of 2024.
|
||||
if (index.type == INVERTED_INDEX_NAME)
|
||||
throw Exception(ErrorCodes::ILLEGAL_INDEX, "Indexes of type 'inverted' are no longer supported. Please drop and recreate the index as type 'full-text'");
|
||||
/// ------
|
||||
|
||||
return std::make_shared<MergeTreeIndexGranuleFullText>(index.name, index.column_names.size(), params);
|
||||
}
|
||||
|
@ -53,7 +53,7 @@ static Int64 findMinPosition(const NameSet & condition_table_columns, const Name
|
||||
MergeTreeWhereOptimizer::MergeTreeWhereOptimizer(
|
||||
std::unordered_map<std::string, UInt64> column_sizes_,
|
||||
const StorageMetadataPtr & metadata_snapshot,
|
||||
const ConditionEstimator & estimator_,
|
||||
const ConditionSelectivityEstimator & estimator_,
|
||||
const Names & queried_columns_,
|
||||
const std::optional<NameSet> & supported_columns_,
|
||||
LoggerPtr log_)
|
||||
@ -92,7 +92,7 @@ void MergeTreeWhereOptimizer::optimize(SelectQueryInfo & select_query_info, cons
|
||||
where_optimizer_context.move_all_conditions_to_prewhere = context->getSettingsRef().move_all_conditions_to_prewhere;
|
||||
where_optimizer_context.move_primary_key_columns_to_end_of_prewhere = context->getSettingsRef().move_primary_key_columns_to_end_of_prewhere;
|
||||
where_optimizer_context.is_final = select.final();
|
||||
where_optimizer_context.use_statistic = context->getSettingsRef().allow_statistic_optimize;
|
||||
where_optimizer_context.use_statistics = context->getSettingsRef().allow_statistics_optimize;
|
||||
|
||||
RPNBuilderTreeContext tree_context(context, std::move(block_with_constants), {} /*prepared_sets*/);
|
||||
RPNBuilderTreeNode node(select.where().get(), tree_context);
|
||||
@ -123,7 +123,7 @@ MergeTreeWhereOptimizer::FilterActionsOptimizeResult MergeTreeWhereOptimizer::op
|
||||
where_optimizer_context.move_all_conditions_to_prewhere = context->getSettingsRef().move_all_conditions_to_prewhere;
|
||||
where_optimizer_context.move_primary_key_columns_to_end_of_prewhere = context->getSettingsRef().move_primary_key_columns_to_end_of_prewhere;
|
||||
where_optimizer_context.is_final = is_final;
|
||||
where_optimizer_context.use_statistic = context->getSettingsRef().allow_statistic_optimize;
|
||||
where_optimizer_context.use_statistics = context->getSettingsRef().allow_statistics_optimize;
|
||||
|
||||
RPNBuilderTreeContext tree_context(context);
|
||||
RPNBuilderTreeNode node(&filter_dag->findInOutputs(filter_column_name), tree_context);
|
||||
@ -273,15 +273,17 @@ void MergeTreeWhereOptimizer::analyzeImpl(Conditions & res, const RPNBuilderTree
|
||||
/// Do not move conditions involving all queried columns.
|
||||
&& cond.table_columns.size() < queried_columns.size();
|
||||
|
||||
if (where_optimizer_context.use_statistic)
|
||||
if (cond.viable)
|
||||
cond.good = isConditionGood(node, table_columns);
|
||||
|
||||
if (where_optimizer_context.use_statistics)
|
||||
{
|
||||
cond.good = cond.viable;
|
||||
cond.selectivity = estimator.estimateSelectivity(node);
|
||||
LOG_TEST(log, "Condition {} has selectivity {}", node.getColumnName(), cond.selectivity);
|
||||
}
|
||||
else if (cond.viable)
|
||||
{
|
||||
cond.good = isConditionGood(node, table_columns);
|
||||
|
||||
cond.estimated_row_count = estimator.estimateRowCount(node);
|
||||
|
||||
if (node.getASTNode() != nullptr)
|
||||
LOG_DEBUG(log, "Condition {} has estimated row count {}", node.getASTNode()->dumpTree(), cond.estimated_row_count);
|
||||
}
|
||||
|
||||
if (where_optimizer_context.move_primary_key_columns_to_end_of_prewhere)
|
||||
|
@ -4,7 +4,7 @@
|
||||
#include <Interpreters/Context_fwd.h>
|
||||
#include <Storages/SelectQueryInfo.h>
|
||||
#include <Storages/MergeTree/RPNBuilder.h>
|
||||
#include <Storages/Statistics/Estimator.h>
|
||||
#include <Storages/Statistics/ConditionSelectivityEstimator.h>
|
||||
|
||||
#include <boost/noncopyable.hpp>
|
||||
|
||||
@ -38,7 +38,7 @@ public:
|
||||
MergeTreeWhereOptimizer(
|
||||
std::unordered_map<std::string, UInt64> column_sizes_,
|
||||
const StorageMetadataPtr & metadata_snapshot,
|
||||
const ConditionEstimator & estimator_,
|
||||
const ConditionSelectivityEstimator & estimator_,
|
||||
const Names & queried_columns_,
|
||||
const std::optional<NameSet> & supported_columns_,
|
||||
LoggerPtr log_);
|
||||
@ -76,7 +76,7 @@ private:
|
||||
bool good = false;
|
||||
|
||||
/// the lower the better
|
||||
Float64 selectivity = 1.0;
|
||||
Float64 estimated_row_count = 0;
|
||||
|
||||
/// Does the condition contain primary key column?
|
||||
/// If so, it is better to move it further to the end of PREWHERE chain depending on minimal position in PK of any
|
||||
@ -85,7 +85,7 @@ private:
|
||||
|
||||
auto tuple() const
|
||||
{
|
||||
return std::make_tuple(!viable, !good, -min_position_in_primary_key, selectivity, columns_size, table_columns.size());
|
||||
return std::make_tuple(!viable, !good, -min_position_in_primary_key, estimated_row_count, columns_size, table_columns.size());
|
||||
}
|
||||
|
||||
/// Is condition a better candidate for moving to PREWHERE?
|
||||
@ -104,7 +104,7 @@ private:
|
||||
bool move_all_conditions_to_prewhere = false;
|
||||
bool move_primary_key_columns_to_end_of_prewhere = false;
|
||||
bool is_final = false;
|
||||
bool use_statistic = false;
|
||||
bool use_statistics = false;
|
||||
};
|
||||
|
||||
struct OptimizeResult
|
||||
@ -147,7 +147,7 @@ private:
|
||||
|
||||
static NameSet determineArrayJoinedNames(const ASTSelectQuery & select);
|
||||
|
||||
const ConditionEstimator estimator;
|
||||
const ConditionSelectivityEstimator estimator;
|
||||
|
||||
const NameSet table_columns;
|
||||
const Names queried_columns;
|
||||
|
@ -20,7 +20,7 @@ MergedBlockOutputStream::MergedBlockOutputStream(
|
||||
const StorageMetadataPtr & metadata_snapshot_,
|
||||
const NamesAndTypesList & columns_list_,
|
||||
const MergeTreeIndices & skip_indices,
|
||||
const Statistics & statistics,
|
||||
const ColumnsStatistics & statistics,
|
||||
CompressionCodecPtr default_codec_,
|
||||
TransactionID tid,
|
||||
bool reset_columns_,
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user