Merge branch 'master' into alter-table-add-comment

This commit is contained in:
Alexey Milovidov 2023-06-26 12:01:34 +03:00 committed by GitHub
commit 533a279f6c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
228 changed files with 2783 additions and 1191 deletions

View File

@ -146,7 +146,7 @@ add_contrib (amqpcpp-cmake AMQP-CPP) # requires: libuv
add_contrib (cassandra-cmake cassandra) # requires: libuv
if (NOT OS_DARWIN)
add_contrib (curl-cmake curl)
add_contrib (azure-cmake azure)
add_contrib (azure-cmake azure) # requires: curl
add_contrib (sentry-native-cmake sentry-native) # requires: curl
endif()
add_contrib (fmtlib-cmake fmtlib)
@ -157,7 +157,7 @@ add_contrib (librdkafka-cmake librdkafka) # requires: libgsasl
add_contrib (nats-io-cmake nats-io)
add_contrib (isa-l-cmake isa-l)
add_contrib (libhdfs3-cmake libhdfs3) # requires: google-protobuf, krb5, isa-l
add_contrib (hive-metastore-cmake hive-metastore) # requires: thrift/avro/arrow/libhdfs3
add_contrib (hive-metastore-cmake hive-metastore) # requires: thrift, avro, arrow, libhdfs3
add_contrib (cppkafka-cmake cppkafka)
add_contrib (libpqxx-cmake libpqxx)
add_contrib (libpq-cmake libpq)

View File

@ -1,6 +1,6 @@
option (ENABLE_AZURE_BLOB_STORAGE "Enable Azure blob storage" ${ENABLE_LIBRARIES})
if (NOT ENABLE_AZURE_BLOB_STORAGE OR BUILD_STANDALONE_KEEPER OR OS_FREEBSD OR (NOT ARCH_AMD64))
if (NOT ENABLE_AZURE_BLOB_STORAGE OR BUILD_STANDALONE_KEEPER OR OS_FREEBSD)
message(STATUS "Not using Azure blob storage")
return()
endif()

View File

@ -1,11 +1,11 @@
if(NOT ARCH_AARCH64 AND NOT OS_FREEBSD AND NOT APPLE AND NOT ARCH_PPC64LE AND NOT ARCH_S390X)
if(NOT OS_FREEBSD AND NOT APPLE AND NOT ARCH_PPC64LE AND NOT ARCH_S390X)
option(ENABLE_HDFS "Enable HDFS" ${ENABLE_LIBRARIES})
elseif(ENABLE_HDFS)
message (${RECONFIGURE_MESSAGE_LEVEL} "Cannot use HDFS3 with current configuration")
endif()
if(NOT ENABLE_HDFS)
message(STATUS "Not using hdfs")
message(STATUS "Not using HDFS")
return()
endif()

View File

@ -1,6 +1,6 @@
---
slug: /en/engines/table-engines/integrations/ExternalDistributed
sidebar_position: 12
sidebar_position: 55
sidebar_label: ExternalDistributed
title: ExternalDistributed
---

View File

@ -1,5 +1,6 @@
---
slug: /en/engines/table-engines/integrations/azureBlobStorage
sidebar_position: 10
sidebar_label: Azure Blob Storage
---
@ -29,8 +30,8 @@ CREATE TABLE azure_blob_storage_table (name String, value UInt32)
**Example**
``` sql
CREATE TABLE test_table (key UInt64, data String)
ENGINE = AzureBlobStorage('DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;BlobEndpoint=http://azurite1:10000/devstoreaccount1/;',
CREATE TABLE test_table (key UInt64, data String)
ENGINE = AzureBlobStorage('DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;BlobEndpoint=http://azurite1:10000/devstoreaccount1/;',
'test_container', 'test_table', 'CSV');
INSERT INTO test_table VALUES (1, 'a'), (2, 'b'), (3, 'c');

View File

@ -1,5 +1,6 @@
---
slug: /en/engines/table-engines/integrations/deltalake
sidebar_position: 40
sidebar_label: DeltaLake
---

View File

@ -1,6 +1,6 @@
---
slug: /en/engines/table-engines/integrations/embedded-rocksdb
sidebar_position: 9
sidebar_position: 50
sidebar_label: EmbeddedRocksDB
---
@ -99,7 +99,7 @@ INSERT INTO test VALUES ('some key', 1, 'value', 3.2);
### Deletes
Rows can be deleted using `DELETE` query or `TRUNCATE`.
Rows can be deleted using `DELETE` query or `TRUNCATE`.
```sql
DELETE FROM test WHERE key LIKE 'some%' AND v1 > 1;

View File

@ -1,6 +1,6 @@
---
slug: /en/engines/table-engines/integrations/hdfs
sidebar_position: 6
sidebar_position: 80
sidebar_label: HDFS
---
@ -63,7 +63,7 @@ SELECT * FROM hdfs_engine_table LIMIT 2
- `ALTER` and `SELECT...SAMPLE` operations.
- Indexes.
- [Zero-copy](../../../operations/storing-data.md#zero-copy) replication is possible, but not recommended.
:::note Zero-copy replication is not ready for production
Zero-copy replication is disabled by default in ClickHouse version 22.8 and higher. This feature is not recommended for production use.
:::

View File

@ -1,6 +1,6 @@
---
slug: /en/engines/table-engines/integrations/hive
sidebar_position: 4
sidebar_position: 84
sidebar_label: Hive
---

View File

@ -1,5 +1,6 @@
---
slug: /en/engines/table-engines/integrations/hudi
sidebar_position: 86
sidebar_label: Hudi
---

View File

@ -1,5 +1,6 @@
---
slug: /en/engines/table-engines/integrations/iceberg
sidebar_position: 90
sidebar_label: Iceberg
---

View File

@ -1,6 +1,6 @@
---
slug: /en/engines/table-engines/integrations/jdbc
sidebar_position: 3
sidebar_position: 100
sidebar_label: JDBC
---

View File

@ -1,6 +1,6 @@
---
slug: /en/engines/table-engines/integrations/kafka
sidebar_position: 8
sidebar_position: 110
sidebar_label: Kafka
---

View File

@ -1,6 +1,6 @@
---
slug: /en/engines/table-engines/integrations/materialized-postgresql
sidebar_position: 12
sidebar_position: 130
sidebar_label: MaterializedPostgreSQL
title: MaterializedPostgreSQL
---

View File

@ -1,6 +1,6 @@
---
slug: /en/engines/table-engines/integrations/mongodb
sidebar_position: 5
sidebar_position: 135
sidebar_label: MongoDB
---

View File

@ -1,6 +1,6 @@
---
slug: /en/engines/table-engines/integrations/mysql
sidebar_position: 4
sidebar_position: 138
sidebar_label: MySQL
---

View File

@ -1,6 +1,6 @@
---
slug: /en/engines/table-engines/integrations/nats
sidebar_position: 14
sidebar_position: 140
sidebar_label: NATS
---
@ -83,12 +83,12 @@ You can select one of the subjects the table reads from and publish your data th
CREATE TABLE queue (
key UInt64,
value UInt64
) ENGINE = NATS
) ENGINE = NATS
SETTINGS nats_url = 'localhost:4444',
nats_subjects = 'subject1,subject2',
nats_format = 'JSONEachRow';
INSERT INTO queue
INSERT INTO queue
SETTINGS stream_like_engine_insert_queue = 'subject2'
VALUES (1, 1);
```
@ -102,7 +102,7 @@ Example:
key UInt64,
value UInt64,
date DateTime
) ENGINE = NATS
) ENGINE = NATS
SETTINGS nats_url = 'localhost:4444',
nats_subjects = 'subject1',
nats_format = 'JSONEachRow',
@ -137,7 +137,7 @@ Example:
CREATE TABLE queue (
key UInt64,
value UInt64
) ENGINE = NATS
) ENGINE = NATS
SETTINGS nats_url = 'localhost:4444',
nats_subjects = 'subject1',
nats_format = 'JSONEachRow',

View File

@ -1,6 +1,6 @@
---
slug: /en/engines/table-engines/integrations/odbc
sidebar_position: 2
sidebar_position: 150
sidebar_label: ODBC
---

View File

@ -1,6 +1,6 @@
---
slug: /en/engines/table-engines/integrations/postgresql
sidebar_position: 11
sidebar_position: 160
sidebar_label: PostgreSQL
---

View File

@ -1,6 +1,6 @@
---
slug: /en/engines/table-engines/integrations/rabbitmq
sidebar_position: 10
sidebar_position: 170
sidebar_label: RabbitMQ
---

View File

@ -1,6 +1,6 @@
---
slug: /en/engines/table-engines/integrations/redis
sidebar_position: 43
sidebar_position: 175
sidebar_label: Redis
---

View File

@ -1,6 +1,6 @@
---
slug: /en/engines/table-engines/integrations/s3
sidebar_position: 7
sidebar_position: 180
sidebar_label: S3
---
@ -8,30 +8,7 @@ sidebar_label: S3
This engine provides integration with [Amazon S3](https://aws.amazon.com/s3/) ecosystem. This engine is similar to the [HDFS](../../../engines/table-engines/special/file.md#table_engines-hdfs) engine, but provides S3-specific features.
## Create Table {#creating-a-table}
``` sql
CREATE TABLE s3_engine_table (name String, value UInt32)
ENGINE = S3(path [, NOSIGN | aws_access_key_id, aws_secret_access_key,] format, [compression])
[PARTITION BY expr]
[SETTINGS ...]
```
**Engine parameters**
- `path` — Bucket url with path to file. Supports following wildcards in readonly mode: `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc'`, `'def'` — strings. For more information see [below](#wildcards-in-path).
- `NOSIGN` - If this keyword is provided in place of credentials, all the requests will not be signed.
- `format` — The [format](../../../interfaces/formats.md#formats) of the file.
- `aws_access_key_id`, `aws_secret_access_key` - Long-term credentials for the [AWS](https://aws.amazon.com/) account user. You can use these to authenticate your requests. Parameter is optional. If credentials are not specified, they are used from the configuration file. For more information see [Using S3 for Data Storage](../mergetree-family/mergetree.md#table_engine-mergetree-s3).
- `compression` — Compression type. Supported values: `none`, `gzip/gz`, `brotli/br`, `xz/LZMA`, `zstd/zst`. Parameter is optional. By default, it will auto-detect compression by file extension.
### PARTITION BY
`PARTITION BY` — Optional. In most cases you don't need a partition key, and if it is needed you generally don't need a partition key more granular than by month. Partitioning does not speed up queries (in contrast to the ORDER BY expression). You should never use too granular partitioning. Don't partition your data by client identifiers or names (instead, make client identifier or name the first column in the ORDER BY expression).
For partitioning by month, use the `toYYYYMM(date_column)` expression, where `date_column` is a column with a date of the type [Date](/docs/en/sql-reference/data-types/date.md). The partition names here have the `"YYYYMM"` format.
**Example**
## Example
``` sql
CREATE TABLE s3_engine_table (name String, value UInt32)
@ -49,6 +26,135 @@ SELECT * FROM s3_engine_table LIMIT 2;
│ two │ 2 │
└──────┴───────┘
```
## Create Table {#creating-a-table}
``` sql
CREATE TABLE s3_engine_table (name String, value UInt32)
ENGINE = S3(path [, NOSIGN | aws_access_key_id, aws_secret_access_key,] format, [compression])
[PARTITION BY expr]
[SETTINGS ...]
```
### Engine parameters
- `path` — Bucket url with path to file. Supports following wildcards in readonly mode: `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc'`, `'def'` — strings. For more information see [below](#wildcards-in-path).
- `NOSIGN` - If this keyword is provided in place of credentials, all the requests will not be signed.
- `format` — The [format](../../../interfaces/formats.md#formats) of the file.
- `aws_access_key_id`, `aws_secret_access_key` - Long-term credentials for the [AWS](https://aws.amazon.com/) account user. You can use these to authenticate your requests. Parameter is optional. If credentials are not specified, they are used from the configuration file. For more information see [Using S3 for Data Storage](../mergetree-family/mergetree.md#table_engine-mergetree-s3).
- `compression` — Compression type. Supported values: `none`, `gzip/gz`, `brotli/br`, `xz/LZMA`, `zstd/zst`. Parameter is optional. By default, it will auto-detect compression by file extension.
### PARTITION BY
`PARTITION BY` — Optional. In most cases you don't need a partition key, and if it is needed you generally don't need a partition key more granular than by month. Partitioning does not speed up queries (in contrast to the ORDER BY expression). You should never use too granular partitioning. Don't partition your data by client identifiers or names (instead, make client identifier or name the first column in the ORDER BY expression).
For partitioning by month, use the `toYYYYMM(date_column)` expression, where `date_column` is a column with a date of the type [Date](/docs/en/sql-reference/data-types/date.md). The partition names here have the `"YYYYMM"` format.
### Querying partitioned data
This example uses the [docker compose recipe](https://github.com/ClickHouse/examples/tree/5fdc6ff72f4e5137e23ea075c88d3f44b0202490/docker-compose-recipes/recipes/ch-and-minio-S3), which integrates ClickHouse and MinIO. You should be able to reproduce the same queries using S3 by replacing the endpoint and authentication values.
Notice that the S3 endpoint in the `ENGINE` configuration uses the parameter token `{_partition_id}` as part of the S3 object (filename), and that the SELECT queries select against those resulting object names (e.g., `test_3.csv`).
:::note
As shown in the example, querying from S3 tables that are partitioned is
not directly supported at this time, but can be accomplished by querying the bucket contents with a wildcard.
The primary use-case for writing
partitioned data in S3 is to enable transferring that data into another
ClickHouse system (for example, moving from on-prem systems to ClickHouse
Cloud). Because ClickHouse datasets are often very large, and network
reliability is sometimes imperfect it makes sense to transfer datasets
in subsets, hence partitioned writes.
:::
#### Create the table
```sql
CREATE TABLE p
(
`column1` UInt32,
`column2` UInt32,
`column3` UInt32
)
ENGINE = S3(
# highlight-next-line
'http://minio:10000/clickhouse//test_{_partition_id}.csv',
'minioadmin',
'minioadminpassword',
'CSV')
PARTITION BY column3
```
#### Insert data
```sql
insert into p values (1, 2, 3), (3, 2, 1), (78, 43, 45)
```
#### Select from partition 3
:::tip
This query uses the s3 table function
:::
```sql
SELECT *
FROM s3('http://minio:10000/clickhouse//test_3.csv', 'minioadmin', 'minioadminpassword', 'CSV')
```
```response
┌─c1─┬─c2─┬─c3─┐
│ 1 │ 2 │ 3 │
└────┴────┴────┘
```
#### Select from partition 1
```sql
SELECT *
FROM s3('http://minio:10000/clickhouse//test_1.csv', 'minioadmin', 'minioadminpassword', 'CSV')
```
```response
┌─c1─┬─c2─┬─c3─┐
│ 3 │ 2 │ 1 │
└────┴────┴────┘
```
#### Select from partition 45
```sql
SELECT *
FROM s3('http://minio:10000/clickhouse//test_45.csv', 'minioadmin', 'minioadminpassword', 'CSV')
```
```response
┌─c1─┬─c2─┬─c3─┐
│ 78 │ 43 │ 45 │
└────┴────┴────┘
```
#### Select from all partitions
```sql
SELECT *
FROM s3('http://minio:10000/clickhouse//**', 'minioadmin', 'minioadminpassword', 'CSV')
```
```response
┌─c1─┬─c2─┬─c3─┐
│ 3 │ 2 │ 1 │
└────┴────┴────┘
┌─c1─┬─c2─┬─c3─┐
│ 1 │ 2 │ 3 │
└────┴────┴────┘
┌─c1─┬─c2─┬─c3─┐
│ 78 │ 43 │ 45 │
└────┴────┴────┘
```
You may naturally try to `Select * from p`, but as noted above, this query will fail; use the preceding query.
```sql
SELECT * FROM p
```
```response
Received exception from server (version 23.4.1):
Code: 48. DB::Exception: Received from localhost:9000. DB::Exception: Reading from a partitioned S3 storage is not implemented yet. (NOT_IMPLEMENTED)
```
## Virtual columns {#virtual-columns}
- `_path` — Path to the file.

View File

@ -1,6 +1,6 @@
---
slug: /en/engines/table-engines/integrations/sqlite
sidebar_position: 7
sidebar_position: 185
sidebar_label: SQLite
---

View File

@ -1975,6 +1975,10 @@ The time zone is necessary for conversions between String and DateTime formats w
<timezone>Asia/Istanbul</timezone>
```
**See also**
- [session_timezone](../settings/settings.md#session_timezone)
## tcp_port {#server_configuration_parameters-tcp_port}
Port for communicating with clients over the TCP protocol.

View File

@ -4251,6 +4251,69 @@ Default value: `0`.
Use this setting only for backward compatibility if your use cases depend on old syntax.
:::
## session_timezone {#session_timezone}
Sets the implicit time zone of the current session or query.
The implicit time zone is the time zone applied to values of type DateTime/DateTime64 which have no explicitly specified time zone.
The setting takes precedence over the globally configured (server-level) implicit time zone.
A value of '' (empty string) means that the implicit time zone of the current session or query is equal to the [server time zone](../server-configuration-parameters/settings.md#server_configuration_parameters-timezone).
You can use functions `timeZone()` and `serverTimeZone()` to get the session time zone and server time zone.
Possible values:
- Any time zone name from `system.time_zones`, e.g. `Europe/Berlin`, `UTC` or `Zulu`
Default value: `''`.
Examples:
```sql
SELECT timeZone(), serverTimeZone() FORMAT TSV
Europe/Berlin Europe/Berlin
```
```sql
SELECT timeZone(), serverTimeZone() SETTINGS session_timezone = 'Asia/Novosibirsk' FORMAT TSV
Asia/Novosibirsk Europe/Berlin
```
Assign session time zone 'America/Denver' to the inner DateTime without explicitly specified time zone:
```sql
SELECT toDateTime64(toDateTime64('1999-12-12 23:23:23.123', 3), 3, 'Europe/Zurich') SETTINGS session_timezone = 'America/Denver' FORMAT TSV
1999-12-13 07:23:23.123
```
:::warning
Not all functions that parse DateTime/DateTime64 respect `session_timezone`. This can lead to subtle errors.
See the following example and explanation.
:::
```sql
CREATE TABLE test_tz (`d` DateTime('UTC')) ENGINE = Memory AS SELECT toDateTime('2000-01-01 00:00:00', 'UTC');
SELECT *, timeZone() FROM test_tz WHERE d = toDateTime('2000-01-01 00:00:00') SETTINGS session_timezone = 'Asia/Novosibirsk'
0 rows in set.
SELECT *, timeZone() FROM test_tz WHERE d = '2000-01-01 00:00:00' SETTINGS session_timezone = 'Asia/Novosibirsk'
┌───────────────────d─┬─timeZone()───────┐
│ 2000-01-01 00:00:00 │ Asia/Novosibirsk │
└─────────────────────┴──────────────────┘
```
This happens due to different parsing pipelines:
- `toDateTime()` without explicitly given time zone used in the first `SELECT` query honors setting `session_timezone` and the global time zone.
- In the second query, a DateTime is parsed from a String, and inherits the type and time zone of the existing column`d`. Thus, setting `session_timezone` and the global time zone are not honored.
**See also**
- [timezone](../server-configuration-parameters/settings.md#server_configuration_parameters-timezone)
## final {#final}
Automatically applies [FINAL](../../sql-reference/statements/select/from.md#final-modifier) modifier to all tables in a query, to tables where [FINAL](../../sql-reference/statements/select/from.md#final-modifier) is applicable, including joined tables and tables in sub-queries, and

View File

@ -139,8 +139,8 @@ makeDateTime32(year, month, day, hour, minute, second[, fraction[, precision[, t
## timeZone
Returns the timezone of the server.
If the function is executed in the context of a distributed table, it generates a normal column with values relevant to each shard, otherwise it produces a constant value.
Returns the timezone of the current session, i.e. the value of setting [session_timezone](../../operations/settings/settings.md#session_timezone).
If the function is executed in the context of a distributed table, then it generates a normal column with values relevant to each shard, otherwise it produces a constant value.
**Syntax**
@ -156,6 +156,33 @@ Alias: `timezone`.
Type: [String](../../sql-reference/data-types/string.md).
**See also**
- [serverTimeZone](#serverTimeZone)
## serverTimeZone
Returns the timezone of the server, i.e. the value of setting [timezone](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone).
If the function is executed in the context of a distributed table, then it generates a normal column with values relevant to each shard. Otherwise, it produces a constant value.
**Syntax**
``` sql
serverTimeZone()
```
Alias: `serverTimezone`.
**Returned value**
- Timezone.
Type: [String](../../sql-reference/data-types/string.md).
**See also**
- [timeZone](#timeZone)
## toTimeZone
Converts a date or date with time to the specified time zone. Does not change the internal value (number of unix seconds) of the data, only the value's time zone attribute and the value's string representation changes.

View File

@ -237,6 +237,43 @@ Result:
└────────────────────────────┘
```
## L2SquaredDistance
Calculates the sum of the squares of the difference between the corresponding elements of two vectors.
**Syntax**
```sql
L2SquaredDistance(vector1, vector2)
```
Alias: `distanceL2Squared`.
**Arguments**
- `vector1` — First vector. [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md).
- `vector2` — Second vector. [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md).
**Returned value**
Type: [Float](../../sql-reference/data-types/float.md).
**Example**
Query:
```sql
SELECT L2SquaredDistance([1, 2, 3], [0, 0, 0])
```
Result:
```response
┌─L2SquaredDistance([1, 2, 3], [0, 0, 0])─┐
│ 14 │
└─────────────────────────────────────────┘
```
## LinfDistance
Calculates the distance between two points (the values of the vectors are the coordinates) in `L_{inf}` space ([maximum norm](https://en.wikipedia.org/wiki/Norm_(mathematics)#Maximum_norm_(special_case_of:_infinity_norm,_uniform_norm,_or_supremum_norm))).

View File

@ -4,6 +4,8 @@ sidebar_position: 130
sidebar_label: NLP (experimental)
---
# Natural Language Processing (NLP) Functions
:::note
This is an experimental feature that is currently in development and is not ready for general use. It will change in unpredictable backwards-incompatible ways in future releases. Set `allow_experimental_nlp_functions = 1` to enable it.
:::

View File

@ -1,5 +1,6 @@
---
slug: /en/sql-reference/table-functions/azureBlobStorage
sidebar_position: 10
sidebar_label: azureBlobStorage
keywords: [azure blob storage]
---
@ -34,16 +35,16 @@ A table with the specified structure for reading or writing data in the specifie
Write data into azure blob storage using the following :
```sql
INSERT INTO TABLE FUNCTION azureBlobStorage('http://azurite1:10000/devstoreaccount1',
INSERT INTO TABLE FUNCTION azureBlobStorage('http://azurite1:10000/devstoreaccount1',
'test_container', 'test_{_partition_id}.csv', 'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==',
'CSV', 'auto', 'column1 UInt32, column2 UInt32, column3 UInt32') PARTITION BY column3 VALUES (1, 2, 3), (3, 2, 1), (78, 43, 3);
```
And then it can be read using
And then it can be read using
```sql
SELECT * FROM azureBlobStorage('http://azurite1:10000/devstoreaccount1',
'test_container', 'test_1.csv', 'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==',
SELECT * FROM azureBlobStorage('http://azurite1:10000/devstoreaccount1',
'test_container', 'test_1.csv', 'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==',
'CSV', 'auto', 'column1 UInt32, column2 UInt32, column3 UInt32');
```

View File

@ -1,6 +1,6 @@
---
slug: /en/sql-reference/table-functions/cluster
sidebar_position: 50
sidebar_position: 30
sidebar_label: cluster
title: "cluster, clusterAllReplicas"
---
@ -9,7 +9,7 @@ Allows to access all shards in an existing cluster which configured in `remote_s
`clusterAllReplicas` function — same as `cluster`, but all replicas are queried. Each replica in a cluster is used as a separate shard/connection.
:::note
:::note
All available clusters are listed in the [system.clusters](../../operations/system-tables/clusters.md) table.
:::
@ -23,9 +23,9 @@ clusterAllReplicas('cluster_name', db, table[, sharding_key])
```
**Arguments**
- `cluster_name` Name of a cluster that is used to build a set of addresses and connection parameters to remote and local servers.
- `db.table` or `db`, `table` - Name of a database and a table.
- `sharding_key` - A sharding key. Optional. Needs to be specified if the cluster has more than one shard.
- `cluster_name` Name of a cluster that is used to build a set of addresses and connection parameters to remote and local servers.
- `db.table` or `db`, `table` - Name of a database and a table.
- `sharding_key` - A sharding key. Optional. Needs to be specified if the cluster has more than one shard.
**Returned value**

View File

@ -1,6 +1,7 @@
---
slug: /en/sql-reference/table-functions/deltalake
sidebar_label: DeltaLake
sidebar_position: 45
sidebar_label: deltaLake
---
# deltaLake Table Function

View File

@ -1,6 +1,6 @@
---
slug: /en/sql-reference/table-functions/dictionary
sidebar_position: 54
sidebar_position: 47
sidebar_label: dictionary
title: dictionary
---

View File

@ -1,6 +1,6 @@
---
slug: /en/engines/table-functions/executable
sidebar_position: 55
sidebar_position: 50
sidebar_label: executable
keywords: [udf, user defined function, clickhouse, executable, table, function]
---

View File

@ -1,6 +1,6 @@
---
slug: /en/sql-reference/table-functions/file
sidebar_position: 37
sidebar_position: 60
sidebar_label: file
---

View File

@ -1,6 +1,6 @@
---
slug: /en/sql-reference/table-functions/format
sidebar_position: 56
sidebar_position: 65
sidebar_label: format
---

View File

@ -1,6 +1,6 @@
---
slug: /en/sql-reference/table-functions/gcs
sidebar_position: 45
sidebar_position: 70
sidebar_label: gcs
keywords: [gcs, bucket]
---
@ -16,7 +16,7 @@ gcs(path [,hmac_key, hmac_secret] [,format] [,structure] [,compression])
```
:::tip GCS
The GCS Table Function integrates with Google Cloud Storage by using the GCS XML API and HMAC keys. See the [Google interoperability docs]( https://cloud.google.com/storage/docs/interoperability) for more details about the endpoint and HMAC.
The GCS Table Function integrates with Google Cloud Storage by using the GCS XML API and HMAC keys. See the [Google interoperability docs]( https://cloud.google.com/storage/docs/interoperability) for more details about the endpoint and HMAC.
:::

View File

@ -1,6 +1,6 @@
---
slug: /en/sql-reference/table-functions/generate
sidebar_position: 47
sidebar_position: 75
sidebar_label: generateRandom
---

View File

@ -1,6 +1,6 @@
---
slug: /en/sql-reference/table-functions/hdfs
sidebar_position: 45
sidebar_position: 80
sidebar_label: hdfs
---
@ -79,7 +79,7 @@ SELECT count(*)
FROM hdfs('hdfs://hdfs1:9000/{some,another}_dir/*', 'TSV', 'name String, value UInt32')
```
:::note
:::note
If your listing of files contains number ranges with leading zeros, use the construction with braces for each digit separately or use `?`.
:::

View File

@ -1,6 +1,6 @@
---
slug: /en/sql-reference/table-functions/hdfsCluster
sidebar_position: 55
sidebar_position: 81
sidebar_label: hdfsCluster
---
@ -50,7 +50,7 @@ SELECT count(*)
FROM hdfsCluster('cluster_simple', 'hdfs://hdfs1:9000/{some,another}_dir/*', 'TSV', 'name String, value UInt32')
```
:::note
:::note
If your listing of files contains number ranges with leading zeros, use the construction with braces for each digit separately or use `?`.
:::

View File

@ -1,6 +1,7 @@
---
slug: /en/sql-reference/table-functions/hudi
sidebar_label: Hudi
sidebar_position: 85
sidebar_label: hudi
---
# hudi Table Function

View File

@ -1,6 +1,7 @@
---
slug: /en/sql-reference/table-functions/iceberg
sidebar_label: Iceberg
sidebar_position: 90
sidebar_label: iceberg
---
# iceberg Table Function

View File

@ -1,10 +1,10 @@
---
slug: /en/sql-reference/table-functions/
sidebar_label: Table Functions
sidebar_position: 34
sidebar_position: 1
---
# Table Functions
# Table Functions
Table functions are methods for constructing tables.

View File

@ -1,6 +1,6 @@
---
slug: /en/sql-reference/table-functions/input
sidebar_position: 46
sidebar_position: 95
sidebar_label: input
---

View File

@ -1,6 +1,6 @@
---
slug: /en/sql-reference/table-functions/jdbc
sidebar_position: 43
sidebar_position: 100
sidebar_label: jdbc
---

View File

@ -1,6 +1,6 @@
---
slug: /en/sql-reference/table-functions/merge
sidebar_position: 38
sidebar_position: 130
sidebar_label: merge
---
@ -16,7 +16,7 @@ merge('db_name', 'tables_regexp')
**Arguments**
- `db_name` — Possible values:
- database name,
- database name,
- constant expression that returns a string with a database name, for example, `currentDatabase()`,
- `REGEXP(expression)`, where `expression` is a regular expression to match the DB names.

View File

@ -1,6 +1,6 @@
---
slug: /en/sql-reference/table-functions/mongodb
sidebar_position: 42
sidebar_position: 135
sidebar_label: mongodb
---

View File

@ -1,6 +1,6 @@
---
slug: /en/sql-reference/table-functions/mysql
sidebar_position: 42
sidebar_position: 137
sidebar_label: mysql
---

View File

@ -1,6 +1,6 @@
---
slug: /en/sql-reference/table-functions/null
sidebar_position: 53
sidebar_position: 140
sidebar_label: null function
title: 'null'
---

View File

@ -1,6 +1,6 @@
---
slug: /en/sql-reference/table-functions/numbers
sidebar_position: 39
sidebar_position: 145
sidebar_label: numbers
---

View File

@ -1,6 +1,6 @@
---
slug: /en/sql-reference/table-functions/odbc
sidebar_position: 44
sidebar_position: 150
sidebar_label: odbc
---

View File

@ -1,6 +1,6 @@
---
slug: /en/sql-reference/table-functions/postgresql
sidebar_position: 42
sidebar_position: 160
sidebar_label: postgresql
---

View File

@ -1,6 +1,6 @@
---
slug: /en/sql-reference/table-functions/redis
sidebar_position: 43
sidebar_position: 170
sidebar_label: redis
---
@ -31,7 +31,7 @@ redis(host:port, key, structure[, db_index[, password[, pool_size]]])
- `primary` must be specified, it supports only one column in the primary key. The primary key will be serialized in binary as a Redis key.
- columns other than the primary key will be serialized in binary as Redis value in corresponding order.
- queries with key equals or in filtering will be optimized to multi keys lookup from Redis. If queries without filtering key full table scan will happen which is a heavy operation.

View File

@ -1,6 +1,6 @@
---
slug: /en/sql-reference/table-functions/remote
sidebar_position: 40
sidebar_position: 175
sidebar_label: remote
---
@ -89,10 +89,10 @@ SELECT * FROM remote_table;
```
### Migration of tables from one system to another:
This example uses one table from a sample dataset. The database is `imdb`, and the table is `actors`.
This example uses one table from a sample dataset. The database is `imdb`, and the table is `actors`.
#### On the source ClickHouse system (the system that currently hosts the data)
- Verify the source database and table name (`imdb.actors`)
- Verify the source database and table name (`imdb.actors`)
```sql
show databases
```

View File

@ -1,6 +1,6 @@
---
slug: /en/sql-reference/table-functions/s3
sidebar_position: 45
sidebar_position: 180
sidebar_label: s3
keywords: [s3, gcs, bucket]
---
@ -33,7 +33,7 @@ For GCS, substitute your HMAC key and HMAC secret where you see `aws_access_key_
and not ~~https://storage.cloud.google.com~~.
:::
- `NOSIGN` - If this keyword is provided in place of credentials, all the requests will not be signed.
- `NOSIGN` - If this keyword is provided in place of credentials, all the requests will not be signed.
- `format` — The [format](../../interfaces/formats.md#formats) of the file.
- `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`.
- `compression` — Parameter is optional. Supported values: `none`, `gzip/gz`, `brotli/br`, `xz/LZMA`, `zstd/zst`. By default, it will autodetect compression by file extension.

View File

@ -1,6 +1,6 @@
---
slug: /en/sql-reference/table-functions/s3Cluster
sidebar_position: 55
sidebar_position: 181
sidebar_label: s3Cluster
title: "s3Cluster Table Function"
---
@ -31,18 +31,18 @@ Select the data from all the files in the `/root/data/clickhouse` and `/root/dat
``` sql
SELECT * FROM s3Cluster(
'cluster_simple',
'http://minio1:9001/root/data/{clickhouse,database}/*',
'minio',
'minio123',
'CSV',
'cluster_simple',
'http://minio1:9001/root/data/{clickhouse,database}/*',
'minio',
'minio123',
'CSV',
'name String, value UInt32, polygon Array(Array(Tuple(Float64, Float64)))'
) ORDER BY (name, value, polygon);
```
Count the total amount of rows in all files in the cluster `cluster_simple`:
:::tip
:::tip
If your listing of files contains number ranges with leading zeros, use the construction with braces for each digit separately or use `?`.
:::

View File

@ -1,19 +1,19 @@
---
slug: /en/sql-reference/table-functions/sqlite
sidebar_position: 55
sidebar_position: 185
sidebar_label: sqlite
title: sqlite
---
Allows to perform queries on a data stored in an [SQLite](../../engines/database-engines/sqlite.md) database.
**Syntax**
**Syntax**
``` sql
sqlite('db_path', 'table_name')
```
**Arguments**
**Arguments**
- `db_path` — Path to a file with an SQLite database. [String](../../sql-reference/data-types/string.md).
- `table_name` — Name of a table in the SQLite database. [String](../../sql-reference/data-types/string.md).
@ -40,6 +40,6 @@ Result:
└───────┴──────┘
```
**See Also**
**See Also**
- [SQLite](../../engines/table-engines/integrations/sqlite.md) table engine

View File

@ -1,6 +1,6 @@
---
slug: /en/sql-reference/table-functions/url
sidebar_position: 41
sidebar_position: 200
sidebar_label: url
---

View File

@ -1,6 +1,6 @@
---
slug: /en/sql-reference/table-functions/urlCluster
sidebar_position: 55
sidebar_position: 201
sidebar_label: urlCluster
---

View File

@ -1,6 +1,6 @@
---
slug: /en/sql-reference/table-functions/view
sidebar_position: 51
sidebar_position: 210
sidebar_label: view
title: view
---

View File

@ -1355,6 +1355,10 @@ Parameters:
<timezone>Europe/Moscow</timezone>
```
**См. также**
- [session_timezone](../settings/settings.md#session_timezone)
## tcp_port {#server_configuration_parameters-tcp_port}
Порт для взаимодействия с клиентами по протоколу TCP.

View File

@ -4127,6 +4127,63 @@ SELECT sum(number) FROM numbers(10000000000) SETTINGS partial_result_on_first_ca
Значение по умолчанию: `false`
## session_timezone {#session_timezone}
Задаёт значение часового пояса (session_timezone) по умолчанию для текущей сессии вместо [часового пояса сервера](../server-configuration-parameters/settings.md#server_configuration_parameters-timezone). То есть, все значения DateTime/DateTime64, для которых явно не задан часовой пояс, будут интерпретированы как относящиеся к указанной зоне.
При значении настройки `''` (пустая строка), будет совпадать с часовым поясом сервера.
Функции `timeZone()` and `serverTimezone()` возвращают часовой пояс текущей сессии и сервера соответственно.
Примеры:
```sql
SELECT timeZone(), serverTimezone() FORMAT TSV
Europe/Berlin Europe/Berlin
```
```sql
SELECT timeZone(), serverTimezone() SETTINGS session_timezone = 'Asia/Novosibirsk' FORMAT TSV
Asia/Novosibirsk Europe/Berlin
```
```sql
SELECT toDateTime64(toDateTime64('1999-12-12 23:23:23.123', 3), 3, 'Europe/Zurich') SETTINGS session_timezone = 'America/Denver' FORMAT TSV
1999-12-13 07:23:23.123
```
Возможные значения:
- Любая зона из `system.time_zones`, например `Europe/Berlin`, `UTC` или `Zulu`
Значение по умолчанию: `''`.
:::warning
Иногда при формировании значений типа `DateTime` и `DateTime64` параметр `session_timezone` может быть проигнорирован.
Это может привести к путанице. Пример и пояснение см. ниже.
:::
```sql
CREATE TABLE test_tz (`d` DateTime('UTC')) ENGINE = Memory AS SELECT toDateTime('2000-01-01 00:00:00', 'UTC');
SELECT *, timezone() FROM test_tz WHERE d = toDateTime('2000-01-01 00:00:00') SETTINGS session_timezone = 'Asia/Novosibirsk'
0 rows in set.
SELECT *, timezone() FROM test_tz WHERE d = '2000-01-01 00:00:00' SETTINGS session_timezone = 'Asia/Novosibirsk'
┌───────────────────d─┬─timezone()───────┐
│ 2000-01-01 00:00:00 │ Asia/Novosibirsk │
└─────────────────────┴──────────────────┘
```
Это происходит из-за различного происхождения значения, используемого для сравнения:
- В первом запросе функция `toDateTime()`, создавая значение типа `DateTime`, принимает во внимание параметр `session_timezone` из контекста запроса;
- Во втором запросе `DateTime` формируется из строки неявно, наследуя тип колонки `d` (в том числе и числовой пояс), и параметр `session_timezone` игнорируется.
**Смотрите также**
- [timezone](../server-configuration-parameters/settings.md#server_configuration_parameters-timezone)
## rename_files_after_processing
- **Тип:** Строка

View File

@ -26,7 +26,8 @@ SELECT
## timeZone {#timezone}
Возвращает часовой пояс сервера.
Возвращает часовой пояс сервера, считающийся умолчанием для текущей сессии: значение параметра [session_timezone](../../operations/settings/settings.md#session_timezone), если установлено.
Если функция вызывается в контексте распределенной таблицы, то она генерирует обычный столбец со значениями, актуальными для каждого шарда. Иначе возвращается константа.
**Синтаксис**
@ -43,6 +44,33 @@ timeZone()
Тип: [String](../../sql-reference/data-types/string.md).
**Смотрите также**
- [serverTimeZone](#servertimezone)
## serverTimeZone {#servertimezone}
Возвращает часовой пояс сервера по умолчанию, в т.ч. установленный [timezone](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone)
Если функция вызывается в контексте распределенной таблицы, то она генерирует обычный столбец со значениями, актуальными для каждого шарда. Иначе возвращается константа.
**Синтаксис**
``` sql
serverTimeZone()
```
Синонимы: `serverTimezone`.
**Возвращаемое значение**
- Часовой пояс.
Тип: [String](../../sql-reference/data-types/string.md).
**Смотрите также**
- [timeZone](#timezone)
## toTimeZone {#totimezone}
Переводит дату или дату с временем в указанный часовой пояс. Часовой пояс - это атрибут типов `Date` и `DateTime`. Внутреннее значение (количество секунд) поля таблицы или результирующего столбца не изменяется, изменяется тип поля и, соответственно, его текстовое отображение.

View File

@ -4,7 +4,9 @@
#include <map>
#include <iostream>
#include <iomanip>
#include <memory>
#include <optional>
#include <Common/ThreadStatus.h>
#include <Common/scope_guard_safe.h>
#include <boost/program_options.hpp>
#include <boost/algorithm/string/replace.hpp>
@ -307,7 +309,7 @@ int Client::main(const std::vector<std::string> & /*args*/)
try
{
UseSSL use_ssl;
MainThreadStatus::getInstance();
auto & thread_status = MainThreadStatus::getInstance();
setupSignalHandler();
std::cout << std::fixed << std::setprecision(3);
@ -320,6 +322,14 @@ try
processConfig();
initTtyBuffer(toProgressOption(config().getString("progress", "default")));
{
// All that just to set DB::CurrentThread::get().getGlobalContext()
// which is required for client timezone (pushed from server) to work.
auto thread_group = std::make_shared<ThreadGroup>();
const_cast<ContextWeakPtr&>(thread_group->global_context) = global_context;
thread_status.attachToGroup(thread_group, false);
}
/// Includes delayed_interactive.
if (is_interactive)
{

View File

@ -44,7 +44,7 @@ void ClusterCopierApp::initialize(Poco::Util::Application & self)
time_t timestamp = Poco::Timestamp().epochTime();
auto curr_pid = Poco::Process::id();
process_id = std::to_string(DateLUT::instance().toNumYYYYMMDDhhmmss(timestamp)) + "_" + std::to_string(curr_pid);
process_id = std::to_string(DateLUT::serverTimezoneInstance().toNumYYYYMMDDhhmmss(timestamp)) + "_" + std::to_string(curr_pid);
host_id = escapeForFileName(getFQDNOrHostName()) + '#' + process_id;
process_path = fs::weakly_canonical(fs::path(base_dir) / ("clickhouse-copier_" + process_id));
fs::create_directories(process_path);

View File

@ -306,8 +306,8 @@ try
/// Initialize DateLUT early, to not interfere with running time of first query.
LOG_DEBUG(log, "Initializing DateLUT.");
DateLUT::instance();
LOG_TRACE(log, "Initialized DateLUT with time zone '{}'.", DateLUT::instance().getTimeZone());
DateLUT::serverTimezoneInstance();
LOG_TRACE(log, "Initialized DateLUT with time zone '{}'.", DateLUT::serverTimezoneInstance().getTimeZone());
/// Don't want to use DNS cache
DNSResolver::instance().setDisableCacheFlag();

View File

@ -491,7 +491,7 @@ private:
const DateLUTImpl & date_lut;
public:
explicit DateTimeModel(UInt64 seed_) : seed(seed_), date_lut(DateLUT::instance()) {}
explicit DateTimeModel(UInt64 seed_) : seed(seed_), date_lut(DateLUT::serverTimezoneInstance()) {}
void train(const IColumn &) override {}
void finalize() override {}

View File

@ -960,8 +960,8 @@ try
/// Initialize DateLUT early, to not interfere with running time of first query.
LOG_DEBUG(log, "Initializing DateLUT.");
DateLUT::instance();
LOG_TRACE(log, "Initialized DateLUT with time zone '{}'.", DateLUT::instance().getTimeZone());
DateLUT::serverTimezoneInstance();
LOG_TRACE(log, "Initialized DateLUT with time zone '{}'.", DateLUT::serverTimezoneInstance().getTimeZone());
/// Storage with temporary data for processing of heavy queries.
if (!server_settings.tmp_policy.value.empty())

View File

@ -1543,12 +1543,12 @@
-->
<!-- Configuration for the query cache -->
<!-- <query_cache> -->
<!-- <max_size_in_bytes>1073741824</max_size_in_bytes> -->
<!-- <max_entries>1024</max_entries> -->
<!-- <max_entry_size_in_bytes>1048576</max_entry_size_in_bytes> -->
<!-- <max_entry_size_in_rows>30000000</max_entry_size_in_rows> -->
<!-- </query_cache> -->
<query_cache>
<max_size_in_bytes>1073741824</max_size_in_bytes>
<max_entries>1024</max_entries>
<max_entry_size_in_bytes>1048576</max_entry_size_in_bytes>
<max_entry_size_in_rows>30000000</max_entry_size_in_rows>
</query_cache>
<!-- Uncomment if enable merge tree metadata cache -->
<!--merge_tree_metadata_cache>

View File

@ -78,14 +78,14 @@ namespace detail
void serialize(WriteBuffer & buf) const
{
writeBinary(count, buf);
writeBinaryLittleEndian(count, buf);
buf.write(reinterpret_cast<const char *>(elems), count * sizeof(elems[0]));
}
void deserialize(ReadBuffer & buf)
{
UInt16 new_count = 0;
readBinary(new_count, buf);
readBinaryLittleEndian(new_count, buf);
if (new_count > TINY_MAX_ELEMS)
throw Exception(ErrorCodes::INCORRECT_DATA, "The number of elements {} for the 'tiny' kind of quantileTiming is exceeding the maximum of {}", new_count, TINY_MAX_ELEMS);
buf.readStrict(reinterpret_cast<char *>(elems), new_count * sizeof(elems[0]));
@ -164,14 +164,14 @@ namespace detail
void serialize(WriteBuffer & buf) const
{
writeBinary(elems.size(), buf);
writeBinaryLittleEndian(elems.size(), buf);
buf.write(reinterpret_cast<const char *>(elems.data()), elems.size() * sizeof(elems[0]));
}
void deserialize(ReadBuffer & buf)
{
size_t size = 0;
readBinary(size, buf);
readBinaryLittleEndian(size, buf);
if (size > 10'000)
throw Exception(ErrorCodes::INCORRECT_DATA, "The number of elements {} for the 'medium' kind of quantileTiming is too large", size);
@ -341,7 +341,7 @@ namespace detail
void serialize(WriteBuffer & buf) const
{
writeBinary(count, buf);
writeBinaryLittleEndian(count, buf);
if (count * 2 > SMALL_THRESHOLD + BIG_SIZE)
{
@ -356,8 +356,8 @@ namespace detail
{
if (count_small[i])
{
writeBinary(UInt16(i), buf);
writeBinary(count_small[i], buf);
writeBinaryLittleEndian(UInt16(i), buf);
writeBinaryLittleEndian(count_small[i], buf);
}
}
@ -365,19 +365,19 @@ namespace detail
{
if (count_big[i])
{
writeBinary(UInt16(i + SMALL_THRESHOLD), buf);
writeBinary(count_big[i], buf);
writeBinaryLittleEndian(UInt16(i + SMALL_THRESHOLD), buf);
writeBinaryLittleEndian(count_big[i], buf);
}
}
/// Symbolizes end of data.
writeBinary(UInt16(BIG_THRESHOLD), buf);
writeBinaryLittleEndian(UInt16(BIG_THRESHOLD), buf);
}
}
void deserialize(ReadBuffer & buf)
{
readBinary(count, buf);
readBinaryLittleEndian(count, buf);
if (count * 2 > SMALL_THRESHOLD + BIG_SIZE)
{
@ -388,12 +388,12 @@ namespace detail
while (true)
{
UInt16 index = 0;
readBinary(index, buf);
readBinaryLittleEndian(index, buf);
if (index == BIG_THRESHOLD)
break;
UInt64 elem_count = 0;
readBinary(elem_count, buf);
readBinaryLittleEndian(elem_count, buf);
if (index < SMALL_THRESHOLD)
count_small[index] = elem_count;
@ -692,7 +692,7 @@ public:
void serialize(WriteBuffer & buf) const
{
auto kind = which();
DB::writePODBinary(kind, buf);
writeBinaryLittleEndian(kind, buf);
if (kind == Kind::Tiny)
tiny.serialize(buf);
@ -706,7 +706,7 @@ public:
void deserialize(ReadBuffer & buf)
{
Kind kind;
DB::readPODBinary(kind, buf);
readBinaryLittleEndian(kind, buf);
if (kind == Kind::Tiny)
{

View File

@ -721,7 +721,15 @@ void BackupCoordinationRemote::prepareFileInfos() const
bool BackupCoordinationRemote::startWritingFile(size_t data_file_index)
{
bool acquired_writing = false;
{
/// Check if this host is already writing this file.
std::lock_guard lock{writing_files_mutex};
if (writing_files.contains(data_file_index))
return false;
}
/// Store in Zookeeper that this host is the only host which is allowed to write this file.
bool host_is_assigned = false;
String full_path = zookeeper_path + "/writing_files/" + std::to_string(data_file_index);
String host_index_str = std::to_string(current_host_index);
@ -733,14 +741,23 @@ bool BackupCoordinationRemote::startWritingFile(size_t data_file_index)
auto code = zk->tryCreate(full_path, host_index_str, zkutil::CreateMode::Persistent);
if (code == Coordination::Error::ZOK)
acquired_writing = true; /// If we've just created this ZooKeeper's node, the writing is acquired, i.e. we should write this data file.
host_is_assigned = true; /// If we've just created this ZooKeeper's node, this host is assigned.
else if (code == Coordination::Error::ZNODEEXISTS)
acquired_writing = (zk->get(full_path) == host_index_str); /// The previous retry could write this ZooKeeper's node and then fail.
host_is_assigned = (zk->get(full_path) == host_index_str); /// The previous retry could write this ZooKeeper's node and then fail.
else
throw zkutil::KeeperException(code, full_path);
});
return acquired_writing;
if (!host_is_assigned)
return false; /// Other host is writing this file.
{
/// Check if this host is already writing this file,
/// and if it's not, mark that this host is writing this file.
/// We have to check that again because we were accessing ZooKeeper with the mutex unlocked.
std::lock_guard lock{writing_files_mutex};
return writing_files.emplace(data_file_index).second; /// Return false if this host is already writing this file.
}
}
bool BackupCoordinationRemote::hasConcurrentBackups(const std::atomic<size_t> &) const

View File

@ -106,12 +106,14 @@ private:
mutable std::optional<BackupCoordinationReplicatedAccess> TSA_GUARDED_BY(replicated_access_mutex) replicated_access;
mutable std::optional<BackupCoordinationReplicatedSQLObjects> TSA_GUARDED_BY(replicated_sql_objects_mutex) replicated_sql_objects;
mutable std::optional<BackupCoordinationFileInfos> TSA_GUARDED_BY(file_infos_mutex) file_infos;
std::unordered_set<size_t> TSA_GUARDED_BY(writing_files_mutex) writing_files;
mutable std::mutex zookeeper_mutex;
mutable std::mutex replicated_tables_mutex;
mutable std::mutex replicated_access_mutex;
mutable std::mutex replicated_sql_objects_mutex;
mutable std::mutex file_infos_mutex;
mutable std::mutex writing_files_mutex;
};
}

View File

@ -77,7 +77,6 @@
#include "config_version.h"
#include "config.h"
namespace fs = std::filesystem;
using namespace std::literals;
@ -103,6 +102,7 @@ namespace ErrorCodes
extern const int UNRECOGNIZED_ARGUMENTS;
extern const int LOGICAL_ERROR;
extern const int CANNOT_OPEN_FILE;
extern const int FILE_ALREADY_EXISTS;
}
}
@ -568,30 +568,17 @@ try
CompressionMethod compression_method = chooseCompressionMethod(out_file, compression_method_string);
UInt64 compression_level = 3;
if (query_with_output->is_outfile_append && compression_method != CompressionMethod::None)
{
throw Exception(
ErrorCodes::BAD_ARGUMENTS,
"Cannot append to compressed file. Please use uncompressed file or remove APPEND keyword.");
}
if (query_with_output->compression_level)
{
const auto & compression_level_node = query_with_output->compression_level->as<ASTLiteral &>();
bool res = compression_level_node.value.tryGet<UInt64>(compression_level);
auto range = getCompressionLevelRange(compression_method);
if (!res || compression_level < range.first || compression_level > range.second)
throw Exception(
ErrorCodes::BAD_ARGUMENTS,
"Invalid compression level, must be positive integer in range {}-{}",
range.first,
range.second);
compression_level_node.value.tryGet<UInt64>(compression_level);
}
auto flags = O_WRONLY | O_EXCL;
if (query_with_output->is_outfile_append)
flags |= O_APPEND;
else if (query_with_output->is_outfile_truncate)
flags |= O_TRUNC;
else
flags |= O_CREAT;
@ -872,6 +859,67 @@ void ClientBase::processOrdinaryQuery(const String & query_to_execute, ASTPtr pa
}
}
// Run some local checks to make sure queries into output file will work before sending to server.
if (const auto * query_with_output = dynamic_cast<const ASTQueryWithOutput *>(parsed_query.get()))
{
String out_file;
if (query_with_output->out_file)
{
const auto & out_file_node = query_with_output->out_file->as<ASTLiteral &>();
out_file = out_file_node.value.safeGet<std::string>();
std::string compression_method_string;
if (query_with_output->compression)
{
const auto & compression_method_node = query_with_output->compression->as<ASTLiteral &>();
compression_method_string = compression_method_node.value.safeGet<std::string>();
}
CompressionMethod compression_method = chooseCompressionMethod(out_file, compression_method_string);
UInt64 compression_level = 3;
if (query_with_output->is_outfile_append && query_with_output->is_outfile_truncate)
{
throw Exception(
ErrorCodes::BAD_ARGUMENTS,
"Cannot use INTO OUTFILE with APPEND and TRUNCATE simultaneously.");
}
if (query_with_output->is_outfile_append && compression_method != CompressionMethod::None)
{
throw Exception(
ErrorCodes::BAD_ARGUMENTS,
"Cannot append to compressed file. Please use uncompressed file or remove APPEND keyword.");
}
if (query_with_output->compression_level)
{
const auto & compression_level_node = query_with_output->compression_level->as<ASTLiteral &>();
bool res = compression_level_node.value.tryGet<UInt64>(compression_level);
auto range = getCompressionLevelRange(compression_method);
if (!res || compression_level < range.first || compression_level > range.second)
throw Exception(
ErrorCodes::BAD_ARGUMENTS,
"Invalid compression level, must be positive integer in range {}-{}",
range.first,
range.second);
}
if (fs::exists(out_file))
{
if (!query_with_output->is_outfile_append && !query_with_output->is_outfile_truncate)
{
throw Exception(
ErrorCodes::FILE_ALREADY_EXISTS,
"File {} exists, consider using APPEND or TRUNCATE.",
out_file);
}
}
}
}
const auto & settings = global_context->getSettingsRef();
const Int32 signals_before_stop = settings.partial_result_on_first_cancel ? 2 : 1;
@ -896,7 +944,6 @@ void ClientBase::processOrdinaryQuery(const String & query_to_execute, ASTPtr pa
if (send_external_tables)
sendExternalTables(parsed_query);
receiveResult(parsed_query, signals_before_stop, settings.partial_result_on_first_cancel);
break;
@ -1048,6 +1095,10 @@ bool ClientBase::receiveAndProcessPacket(ASTPtr parsed_query, bool cancelled_)
onProfileEvents(packet.block);
return true;
case Protocol::Server::TimezoneUpdate:
onTimezoneUpdate(packet.server_timezone);
return true;
default:
throw Exception(
ErrorCodes::UNKNOWN_PACKET_FROM_SERVER, "Unknown packet {} from server {}", packet.type, connection->getDescription());
@ -1070,6 +1121,11 @@ void ClientBase::onProgress(const Progress & value)
progress_indication.writeProgress(*tty_buf);
}
void ClientBase::onTimezoneUpdate(const String & tz)
{
global_context->setSetting("session_timezone", tz);
}
void ClientBase::onEndOfStream()
{
@ -1221,9 +1277,13 @@ bool ClientBase::receiveSampleBlock(Block & out, ColumnsDescription & columns_de
columns_description = ColumnsDescription::parse(packet.multistring_message[1]);
return receiveSampleBlock(out, columns_description, parsed_query);
case Protocol::Server::TimezoneUpdate:
onTimezoneUpdate(packet.server_timezone);
break;
default:
throw NetException(ErrorCodes::UNEXPECTED_PACKET_FROM_SERVER,
"Unexpected packet from server (expected Data, Exception or Log, got {})",
"Unexpected packet from server (expected Data, Exception, Log or TimezoneUpdate, got {})",
String(Protocol::Server::toString(packet.type)));
}
}
@ -1538,7 +1598,9 @@ void ClientBase::receiveLogsAndProfileEvents(ASTPtr parsed_query)
{
auto packet_type = connection->checkPacket(0);
while (packet_type && (*packet_type == Protocol::Server::Log || *packet_type == Protocol::Server::ProfileEvents))
while (packet_type && (*packet_type == Protocol::Server::Log
|| *packet_type == Protocol::Server::ProfileEvents
|| *packet_type == Protocol::Server::TimezoneUpdate))
{
receiveAndProcessPacket(parsed_query, false);
packet_type = connection->checkPacket(0);
@ -1575,6 +1637,10 @@ bool ClientBase::receiveEndOfQuery()
onProfileEvents(packet.block);
break;
case Protocol::Server::TimezoneUpdate:
onTimezoneUpdate(packet.server_timezone);
break;
default:
throw NetException(ErrorCodes::UNEXPECTED_PACKET_FROM_SERVER,
"Unexpected packet from server (expected Exception, EndOfStream, Log, Progress or ProfileEvents. Got {})",

View File

@ -148,6 +148,7 @@ private:
void cancelQuery();
void onProgress(const Progress & value);
void onTimezoneUpdate(const String & tz);
void onData(Block & block, ASTPtr parsed_query);
void onLogData(Block & block);
void onTotals(Block & block, ASTPtr parsed_query);

View File

@ -1022,6 +1022,11 @@ Packet Connection::receivePacket()
res.block = receiveProfileEvents();
return res;
case Protocol::Server::TimezoneUpdate:
readStringBinary(server_timezone, *in);
res.server_timezone = server_timezone;
return res;
default:
/// In unknown state, disconnect - to not leave unsynchronised connection.
disconnect();

View File

@ -419,6 +419,7 @@ Packet HedgedConnections::receivePacketFromReplica(const ReplicaLocation & repli
}
replica_with_last_received_packet = replica_location;
break;
case Protocol::Server::TimezoneUpdate:
case Protocol::Server::PartUUIDs:
case Protocol::Server::ProfileInfo:
case Protocol::Server::Totals:

View File

@ -38,6 +38,8 @@ struct Packet
ParallelReadRequest request;
ParallelReadResponse response;
std::string server_timezone;
Packet() : type(Protocol::Server::Hello) {}
};

View File

@ -259,6 +259,7 @@ Packet MultiplexedConnections::drain()
switch (packet.type)
{
case Protocol::Server::TimezoneUpdate:
case Protocol::Server::MergeTreeAllRangesAnnounecement:
case Protocol::Server::MergeTreeReadTaskRequest:
case Protocol::Server::ReadTaskRequest:
@ -340,6 +341,7 @@ Packet MultiplexedConnections::receivePacketUnlocked(AsyncCallback async_callbac
switch (packet.type)
{
case Protocol::Server::TimezoneUpdate:
case Protocol::Server::MergeTreeAllRangesAnnounecement:
case Protocol::Server::MergeTreeReadTaskRequest:
case Protocol::Server::ReadTaskRequest:

View File

@ -43,7 +43,7 @@ Suggest::Suggest()
"IN", "KILL", "QUERY", "SYNC", "ASYNC", "TEST", "BETWEEN", "TRUNCATE", "USER", "ROLE",
"PROFILE", "QUOTA", "POLICY", "ROW", "GRANT", "REVOKE", "OPTION", "ADMIN", "EXCEPT", "REPLACE",
"IDENTIFIED", "HOST", "NAME", "READONLY", "WRITABLE", "PERMISSIVE", "FOR", "RESTRICTIVE", "RANDOMIZED",
"INTERVAL", "LIMITS", "ONLY", "TRACKING", "IP", "REGEXP", "ILIKE", "CLEANUP"
"INTERVAL", "LIMITS", "ONLY", "TRACKING", "IP", "REGEXP", "ILIKE", "CLEANUP", "APPEND"
});
}
@ -160,6 +160,7 @@ void Suggest::fetch(IServerConnection & connection, const ConnectionTimeouts & t
fillWordsFromBlock(packet.block);
continue;
case Protocol::Server::TimezoneUpdate:
case Protocol::Server::Progress:
case Protocol::Server::ProfileInfo:
case Protocol::Server::Totals:

View File

@ -7,6 +7,7 @@
#include <filesystem>
#include <fstream>
#include <Interpreters/Context.h>
namespace
@ -163,3 +164,8 @@ DateLUT & DateLUT::getInstance()
static DateLUT ret;
return ret;
}
std::string DateLUT::extractTimezoneFromContext(DB::ContextPtr query_context)
{
return query_context->getSettingsRef().session_timezone.value;
}

View File

@ -5,6 +5,7 @@
#include <base/defines.h>
#include <boost/noncopyable.hpp>
#include "Common/CurrentThread.h"
#include <atomic>
#include <memory>
@ -16,22 +17,59 @@
class DateLUT : private boost::noncopyable
{
public:
/// Return singleton DateLUTImpl instance for the default time zone.
/// Return DateLUTImpl instance for session timezone.
/// session_timezone is a session-level setting.
/// If setting is not set, returns the server timezone.
static ALWAYS_INLINE const DateLUTImpl & instance()
{
const auto & date_lut = getInstance();
if (DB::CurrentThread::isInitialized())
{
std::string timezone_from_context;
const DB::ContextPtr query_context = DB::CurrentThread::get().getQueryContext();
if (query_context)
{
timezone_from_context = extractTimezoneFromContext(query_context);
if (!timezone_from_context.empty())
return date_lut.getImplementation(timezone_from_context);
}
/// On the server side, timezone is passed in query_context,
/// but on CH-client side we have no query context,
/// and each time we modify client's global context
const DB::ContextPtr global_context = DB::CurrentThread::get().getGlobalContext();
if (global_context)
{
timezone_from_context = extractTimezoneFromContext(global_context);
if (!timezone_from_context.empty())
return date_lut.getImplementation(timezone_from_context);
}
}
return serverTimezoneInstance();
}
static ALWAYS_INLINE const DateLUTImpl & instance(const std::string & time_zone)
{
if (time_zone.empty())
return instance();
const auto & date_lut = getInstance();
return date_lut.getImplementation(time_zone);
}
/// Return singleton DateLUTImpl for the server time zone.
/// It may be set using 'timezone' server setting.
static ALWAYS_INLINE const DateLUTImpl & serverTimezoneInstance()
{
const auto & date_lut = getInstance();
return *date_lut.default_impl.load(std::memory_order_acquire);
}
/// Return singleton DateLUTImpl instance for a given time zone.
static ALWAYS_INLINE const DateLUTImpl & instance(const std::string & time_zone)
{
const auto & date_lut = getInstance();
if (time_zone.empty())
return *date_lut.default_impl.load(std::memory_order_acquire);
return date_lut.getImplementation(time_zone);
}
static void setDefaultTimezone(const std::string & time_zone)
{
auto & date_lut = getInstance();
@ -45,6 +83,8 @@ protected:
private:
static DateLUT & getInstance();
static std::string extractTimezoneFromContext(DB::ContextPtr query_context);
const DateLUTImpl & getImplementation(const std::string & time_zone) const;
using DateLUTImplPtr = std::unique_ptr<DateLUTImpl>;

View File

@ -33,8 +33,24 @@ UInt8 getDayOfWeek(const cctz::civil_day & date)
UNREACHABLE();
}
inline cctz::time_point<cctz::seconds> lookupTz(const cctz::time_zone & cctz_time_zone, const cctz::civil_day & date)
{
cctz::time_zone::civil_lookup lookup = cctz_time_zone.lookup(date);
/// Ambiguity is possible if time was changed backwards at the midnight
/// or after midnight time has been changed back to midnight, for example one hour backwards at 01:00
/// or after midnight time has been changed to the previous day, for example two hours backwards at 01:00
/// Then midnight appears twice. Usually time change happens exactly at 00:00 or 01:00.
/// If transition did not involve previous day, we should use the first midnight as the start of the day,
/// otherwise it's better to use the second midnight.
return lookup.trans < lookup.post
? lookup.post /* Second midnight appears after transition, so there was a piece of previous day after transition */
: lookup.pre;
}
}
__attribute__((__weak__)) extern bool inside_main;
@ -63,27 +79,52 @@ DateLUTImpl::DateLUTImpl(const std::string & time_zone_)
offset_is_whole_number_of_minutes_during_epoch = true;
cctz::civil_day date = lut_start;
cctz::time_point<cctz::seconds> start_of_day_time_point_if_no_transitions = lookupTz(cctz_time_zone, date);
auto next_transition_date = date;
/// Fill the lookup table:
/// Adjustments only occur at the dates of transitions. We save next_transition_date and add 24h to the
/// previous value until we reach the it. Then we do the adjustment and get the new next_transition_date.
UInt32 i = 0;
do
{
cctz::time_zone::civil_lookup lookup = cctz_time_zone.lookup(date);
/// Ambiguity is possible if time was changed backwards at the midnight
/// or after midnight time has been changed back to midnight, for example one hour backwards at 01:00
/// or after midnight time has been changed to the previous day, for example two hours backwards at 01:00
/// Then midnight appears twice. Usually time change happens exactly at 00:00 or 01:00.
/// If transition did not involve previous day, we should use the first midnight as the start of the day,
/// otherwise it's better to use the second midnight.
std::chrono::time_point start_of_day_time_point = lookup.trans < lookup.post
? lookup.post /* Second midnight appears after transition, so there was a piece of previous day after transition */
: lookup.pre;
start_of_day = std::chrono::system_clock::to_time_t(start_of_day_time_point);
Values & values = lut[i];
values.time_at_offset_change_value = 0;
values.amount_of_offset_change_value = 0;
if (date >= next_transition_date)
{
start_of_day_time_point_if_no_transitions = lookupTz(cctz_time_zone, date);
/// If UTC offset was changed this day.
/// Change in time zone without transition is possible, e.g. Moscow 1991 Sun, 31 Mar, 02:00 MSK to EEST
cctz::time_zone::civil_transition transition{};
if (cctz_time_zone.next_transition(start_of_day_time_point_if_no_transitions - std::chrono::seconds(1), &transition)
&& (cctz::civil_day(transition.from) == date || cctz::civil_day(transition.to) == date)
&& transition.from != transition.to)
{
values.time_at_offset_change_value = (transition.from - cctz::civil_second(date)) / Values::OffsetChangeFactor;
values.amount_of_offset_change_value = (transition.to - transition.from) / Values::OffsetChangeFactor;
/// We don't support too large changes.
if (values.amount_of_offset_change_value > 24 * 4)
values.amount_of_offset_change_value = 24 * 4;
else if (values.amount_of_offset_change_value < -24 * 4)
values.amount_of_offset_change_value = -24 * 4;
/// We don't support cases when time change results in switching to previous day.
/// Shift the point of time change later.
if (values.time_at_offset_change_value + values.amount_of_offset_change_value < 0)
values.time_at_offset_change_value = -values.amount_of_offset_change_value;
}
next_transition_date = std::min(cctz::civil_day(transition.to), cctz::civil_day(transition.from));
}
start_of_day = std::chrono::system_clock::to_time_t(start_of_day_time_point_if_no_transitions);
values.year = date.year();
values.month = date.month();
values.day_of_month = date.day();
@ -103,38 +144,14 @@ DateLUTImpl::DateLUTImpl(const std::string & time_zone_)
else
values.days_in_month = i != 0 ? lut[i - 1].days_in_month : 31;
values.time_at_offset_change_value = 0;
values.amount_of_offset_change_value = 0;
if (offset_is_whole_number_of_hours_during_epoch && start_of_day > 0 && start_of_day % 3600)
offset_is_whole_number_of_hours_during_epoch = false;
if (offset_is_whole_number_of_minutes_during_epoch && start_of_day > 0 && start_of_day % 60)
offset_is_whole_number_of_minutes_during_epoch = false;
/// If UTC offset was changed this day.
/// Change in time zone without transition is possible, e.g. Moscow 1991 Sun, 31 Mar, 02:00 MSK to EEST
cctz::time_zone::civil_transition transition{};
if (cctz_time_zone.next_transition(start_of_day_time_point - std::chrono::seconds(1), &transition)
&& (cctz::civil_day(transition.from) == date || cctz::civil_day(transition.to) == date)
&& transition.from != transition.to)
{
values.time_at_offset_change_value = (transition.from - cctz::civil_second(date)) / Values::OffsetChangeFactor;
values.amount_of_offset_change_value = (transition.to - transition.from) / Values::OffsetChangeFactor;
/// We don't support too large changes.
if (values.amount_of_offset_change_value > 24 * 4)
values.amount_of_offset_change_value = 24 * 4;
else if (values.amount_of_offset_change_value < -24 * 4)
values.amount_of_offset_change_value = -24 * 4;
/// We don't support cases when time change results in switching to previous day.
/// Shift the point of time change later.
if (values.time_at_offset_change_value + values.amount_of_offset_change_value < 0)
values.time_at_offset_change_value = -values.amount_of_offset_change_value;
}
/// Going to next day.
start_of_day_time_point_if_no_transitions += std::chrono::hours(24);
++date;
++i;
}

View File

@ -24,9 +24,8 @@ private:
unsigned char m_month;
unsigned char m_day;
void init(time_t time)
void init(time_t time, const DateLUTImpl & date_lut)
{
const auto & date_lut = DateLUT::instance();
const auto & values = date_lut.getValues(time);
m_year = values.year;
@ -56,22 +55,22 @@ private:
}
public:
explicit LocalDate(time_t time)
explicit LocalDate(time_t time, const DateLUTImpl & time_zone = DateLUT::instance())
{
init(time);
init(time, time_zone);
}
LocalDate(DayNum day_num) /// NOLINT
LocalDate(DayNum day_num, const DateLUTImpl & time_zone = DateLUT::instance()) /// NOLINT
{
const auto & values = DateLUT::instance().getValues(day_num);
const auto & values = time_zone.getValues(day_num);
m_year = values.year;
m_month = values.month;
m_day = values.day_of_month;
}
explicit LocalDate(ExtendedDayNum day_num)
explicit LocalDate(ExtendedDayNum day_num, const DateLUTImpl & time_zone = DateLUT::instance())
{
const auto & values = DateLUT::instance().getValues(day_num);
const auto & values = time_zone.getValues(day_num);
m_year = values.year;
m_month = values.month;
m_day = values.day_of_month;
@ -99,15 +98,13 @@ public:
LocalDate(const LocalDate &) noexcept = default;
LocalDate & operator= (const LocalDate &) noexcept = default;
DayNum getDayNum() const
DayNum getDayNum(const DateLUTImpl & lut = DateLUT::instance()) const
{
const auto & lut = DateLUT::instance();
return DayNum(lut.makeDayNum(m_year, m_month, m_day).toUnderType());
}
ExtendedDayNum getExtenedDayNum() const
ExtendedDayNum getExtenedDayNum(const DateLUTImpl & lut = DateLUT::instance()) const
{
const auto & lut = DateLUT::instance();
return ExtendedDayNum (lut.makeDayNum(m_year, m_month, m_day).toUnderType());
}

View File

@ -83,7 +83,8 @@ namespace Protocol
ProfileEvents = 14, /// Packet with profile events from server.
MergeTreeAllRangesAnnounecement = 15,
MergeTreeReadTaskRequest = 16, /// Request from a MergeTree replica to a coordinator
MAX = MergeTreeReadTaskRequest,
TimezoneUpdate = 17, /// Receive server's (session-wide) default timezone
MAX = TimezoneUpdate,
};
@ -111,6 +112,7 @@ namespace Protocol
"ProfileEvents",
"MergeTreeAllRangesAnnounecement",
"MergeTreeReadTaskRequest",
"TimezoneUpdate",
};
return packet <= MAX
? data[packet]

View File

@ -53,7 +53,7 @@
/// NOTE: DBMS_TCP_PROTOCOL_VERSION has nothing common with VERSION_REVISION,
/// later is just a number for server version (one number instead of commit SHA)
/// for simplicity (sometimes it may be more convenient in some use cases).
#define DBMS_TCP_PROTOCOL_VERSION 54463
#define DBMS_TCP_PROTOCOL_VERSION 54464
#define DBMS_MIN_PROTOCOL_VERSION_WITH_INITIAL_QUERY_START_TIME 54449
@ -75,3 +75,5 @@
#define DBMS_MIN_REVISION_WITH_INTERSERVER_SECRET_V2 54462
#define DBMS_MIN_PROTOCOL_VERSION_WITH_TOTAL_BYTES_IN_PROGRESS 54463
#define DBMS_MIN_PROTOCOL_VERSION_WITH_TIMEZONE_UPDATES 54464

View File

@ -770,6 +770,7 @@ class IColumn;
M(Bool, allow_experimental_undrop_table_query, false, "Allow to use undrop query to restore dropped table in a limited time", 0) \
M(Bool, keeper_map_strict_mode, false, "Enforce additional checks during operations on KeeperMap. E.g. throw an exception on an insert for already existing key", 0) \
M(UInt64, extract_kvp_max_pairs_per_row, 1000, "Max number pairs that can be produced by extractKeyValuePairs function. Used to safeguard against consuming too much memory.", 0) \
M(Timezone, session_timezone, "", "The default timezone for current session or query. The server default timezone if empty.", 0) \
// End of COMMON_SETTINGS
// Please add settings related to formats into the FORMAT_FACTORY_SETTINGS and move obsolete settings to OBSOLETE_SETTINGS.

View File

@ -13,7 +13,6 @@
#include <cmath>
namespace DB
{
namespace ErrorCodes
@ -451,6 +450,17 @@ String SettingFieldEnumHelpers::readBinary(ReadBuffer & in)
return str;
}
void SettingFieldTimezone::writeBinary(WriteBuffer & out) const
{
writeStringBinary(value, out);
}
void SettingFieldTimezone::readBinary(ReadBuffer & in)
{
String str;
readStringBinary(str, in);
*this = std::move(str);
}
String SettingFieldCustom::toString() const
{

View File

@ -6,6 +6,7 @@
#include <Core/Field.h>
#include <Core/MultiEnum.h>
#include <boost/range/adaptor/map.hpp>
#include <cctz/time_zone.h>
#include <chrono>
#include <unordered_map>
#include <string_view>
@ -565,6 +566,42 @@ void SettingFieldMultiEnum<EnumT, Traits>::readBinary(ReadBuffer & in)
return getEnumValues<EnumType>().size();\
}
/// Setting field for specifying user-defined timezone. It is basically a string, but it needs validation.
struct SettingFieldTimezone
{
String value;
bool changed = false;
explicit SettingFieldTimezone(std::string_view str = {}) { validateTimezone(std::string(str)); value = str; }
explicit SettingFieldTimezone(const String & str) { validateTimezone(str); value = str; }
explicit SettingFieldTimezone(String && str) { validateTimezone(str); value = std::move(str); }
explicit SettingFieldTimezone(const char * str) { validateTimezone(str); value = str; }
explicit SettingFieldTimezone(const Field & f) { const String & str = f.safeGet<const String &>(); validateTimezone(str); value = str; }
SettingFieldTimezone & operator =(std::string_view str) { validateTimezone(std::string(str)); value = str; changed = true; return *this; }
SettingFieldTimezone & operator =(const String & str) { *this = std::string_view{str}; return *this; }
SettingFieldTimezone & operator =(String && str) { validateTimezone(str); value = std::move(str); changed = true; return *this; }
SettingFieldTimezone & operator =(const char * str) { *this = std::string_view{str}; return *this; }
SettingFieldTimezone & operator =(const Field & f) { *this = f.safeGet<const String &>(); return *this; }
operator const String &() const { return value; } /// NOLINT
explicit operator Field() const { return value; }
const String & toString() const { return value; }
void parseFromString(const String & str) { *this = str; }
void writeBinary(WriteBuffer & out) const;
void readBinary(ReadBuffer & in);
private:
void validateTimezone(const std::string & tz_str)
{
cctz::time_zone validated_tz;
if (!tz_str.empty() && !cctz::load_time_zone(tz_str, &validated_tz))
throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Invalid time zone: {}", tz_str);
}
};
/// Can keep a value of any type. Used for user-defined settings.
struct SettingFieldCustom
{

View File

@ -417,6 +417,8 @@ private:
{
SentryWriter::onFault(sig, error_message, stack_trace);
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wunreachable-code"
/// Advice the user to send it manually.
if constexpr (std::string_view(VERSION_OFFICIAL).contains("official build"))
{
@ -436,6 +438,8 @@ private:
{
LOG_FATAL(log, "This ClickHouse version is not official and should be upgraded to the official build.");
}
#pragma clang diagnostic pop
}
/// ClickHouse Keeper does not link to some part of Settings.
@ -1042,7 +1046,7 @@ void BaseDaemon::shouldSetupWatchdog(char * argv0_)
void BaseDaemon::setupWatchdog()
{
/// Initialize in advance to avoid double initialization in forked processes.
DateLUT::instance();
DateLUT::serverTimezoneInstance();
std::string original_process_name;
if (argv0)

View File

@ -21,7 +21,9 @@ namespace DB
* all types with different time zones are equivalent and may be used interchangingly.
* Time zone only affects parsing and displaying in text formats.
*
* If time zone is not specified (example: DateTime without parameter), then default time zone is used.
* If time zone is not specified (example: DateTime without parameter),
* then `session_timezone` setting value is used.
* If `session_timezone` is not set (or empty string), server default time zone is used.
* Default time zone is server time zone, if server is doing transformations
* and if client is doing transformations, unless 'use_client_time_zone' setting is passed to client;
* Server time zone is the time zone specified in 'timezone' parameter in configuration file,

View File

@ -13,7 +13,7 @@ namespace DB
void SerializationDate::serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const
{
writeDateText(DayNum(assert_cast<const ColumnUInt16 &>(column).getData()[row_num]), ostr);
writeDateText(DayNum(assert_cast<const ColumnUInt16 &>(column).getData()[row_num]), ostr, time_zone);
}
void SerializationDate::deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
@ -26,7 +26,7 @@ void SerializationDate::deserializeWholeText(IColumn & column, ReadBuffer & istr
void SerializationDate::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
{
DayNum x;
readDateText(x, istr);
readDateText(x, istr, time_zone);
assert_cast<ColumnUInt16 &>(column).getData().push_back(x);
}
@ -46,7 +46,7 @@ void SerializationDate::deserializeTextQuoted(IColumn & column, ReadBuffer & ist
{
DayNum x;
assertChar('\'', istr);
readDateText(x, istr);
readDateText(x, istr, time_zone);
assertChar('\'', istr);
assert_cast<ColumnUInt16 &>(column).getData().push_back(x); /// It's important to do this at the end - for exception safety.
}
@ -62,7 +62,7 @@ void SerializationDate::deserializeTextJSON(IColumn & column, ReadBuffer & istr,
{
DayNum x;
assertChar('"', istr);
readDateText(x, istr);
readDateText(x, istr, time_zone);
assertChar('"', istr);
assert_cast<ColumnUInt16 &>(column).getData().push_back(x);
}
@ -77,8 +77,12 @@ void SerializationDate::serializeTextCSV(const IColumn & column, size_t row_num,
void SerializationDate::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
{
DayNum value;
readCSV(value, istr);
readCSV(value, istr, time_zone);
assert_cast<ColumnUInt16 &>(column).getData().push_back(value);
}
SerializationDate::SerializationDate(const DateLUTImpl & time_zone_) : time_zone(time_zone_)
{
}
}

View File

@ -1,6 +1,7 @@
#pragma once
#include <DataTypes/Serializations/SerializationNumber.h>
#include <Common/DateLUT.h>
namespace DB
{
@ -8,6 +9,8 @@ namespace DB
class SerializationDate final : public SerializationNumber<UInt16>
{
public:
explicit SerializationDate(const DateLUTImpl & time_zone_ = DateLUT::instance());
void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
@ -18,6 +21,9 @@ public:
void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
protected:
const DateLUTImpl & time_zone;
};
}

View File

@ -11,7 +11,7 @@ namespace DB
void SerializationDate32::serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const
{
writeDateText(ExtendedDayNum(assert_cast<const ColumnInt32 &>(column).getData()[row_num]), ostr);
writeDateText(ExtendedDayNum(assert_cast<const ColumnInt32 &>(column).getData()[row_num]), ostr, time_zone);
}
void SerializationDate32::deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
@ -24,7 +24,7 @@ void SerializationDate32::deserializeWholeText(IColumn & column, ReadBuffer & is
void SerializationDate32::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
{
ExtendedDayNum x;
readDateText(x, istr);
readDateText(x, istr, time_zone);
assert_cast<ColumnInt32 &>(column).getData().push_back(x);
}
@ -44,7 +44,7 @@ void SerializationDate32::deserializeTextQuoted(IColumn & column, ReadBuffer & i
{
ExtendedDayNum x;
assertChar('\'', istr);
readDateText(x, istr);
readDateText(x, istr, time_zone);
assertChar('\'', istr);
assert_cast<ColumnInt32 &>(column).getData().push_back(x); /// It's important to do this at the end - for exception safety.
}
@ -60,7 +60,7 @@ void SerializationDate32::deserializeTextJSON(IColumn & column, ReadBuffer & ist
{
ExtendedDayNum x;
assertChar('"', istr);
readDateText(x, istr);
readDateText(x, istr, time_zone);
assertChar('"', istr);
assert_cast<ColumnInt32 &>(column).getData().push_back(x);
}
@ -78,4 +78,8 @@ void SerializationDate32::deserializeTextCSV(IColumn & column, ReadBuffer & istr
readCSV(value, istr);
assert_cast<ColumnInt32 &>(column).getData().push_back(value.getExtenedDayNum());
}
SerializationDate32::SerializationDate32(const DateLUTImpl & time_zone_) : time_zone(time_zone_)
{
}
}

View File

@ -1,12 +1,15 @@
#pragma once
#include <DataTypes/Serializations/SerializationNumber.h>
#include <Common/DateLUT.h>
namespace DB
{
class SerializationDate32 final : public SerializationNumber<Int32>
{
public:
explicit SerializationDate32(const DateLUTImpl & time_zone_ = DateLUT::instance());
void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
@ -17,5 +20,8 @@ public:
void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
protected:
const DateLUTImpl & time_zone;
};
}

View File

@ -985,7 +985,7 @@ void DatabaseReplicated::recoverLostReplica(const ZooKeeperPtr & current_zookeep
const auto & create_query_string = metadata_it->second;
if (isTableExist(table_name, getContext()))
{
assert(create_query_string == readMetadataFile(table_name));
assert(create_query_string == readMetadataFile(table_name) || getTableUUIDIfReplicated(create_query_string, getContext()) != UUIDHelpers::Nil);
continue;
}
@ -1274,7 +1274,7 @@ void DatabaseReplicated::commitAlterTable(const StorageID & table_id,
const String & statement, ContextPtr query_context)
{
auto txn = query_context->getZooKeeperMetadataTransaction();
assert(!ddl_worker->isCurrentlyActive() || txn);
assert(!ddl_worker || !ddl_worker->isCurrentlyActive() || txn);
if (txn && txn->isInitialQuery())
{
String metadata_zk_path = zookeeper_path + "/metadata/" + escapeForFileName(table_id.table_name);

View File

@ -91,6 +91,7 @@ void DatabaseReplicatedDDLWorker::initializeReplication()
if (zookeeper->tryGet(database->replica_path + "/digest", digest_str))
{
digest = parse<UInt64>(digest_str);
LOG_TRACE(log, "Metadata digest in ZooKeeper: {}", digest);
std::lock_guard lock{database->metadata_mutex};
local_digest = database->tables_metadata_digest;
}

View File

@ -114,7 +114,10 @@ QueryPipeline ExecutableDictionarySource::loadAll()
auto command = configuration.command;
updateCommandIfNeeded(command, coordinator_configuration.execute_direct, context);
return QueryPipeline(coordinator->createPipe(command, configuration.command_arguments, sample_block, context));
ShellCommandSourceConfiguration command_configuration {
.check_exit_code = true,
};
return QueryPipeline(coordinator->createPipe(command, configuration.command_arguments, {}, sample_block, context, command_configuration));
}
QueryPipeline ExecutableDictionarySource::loadUpdatedAll()
@ -148,7 +151,11 @@ QueryPipeline ExecutableDictionarySource::loadUpdatedAll()
update_time = new_update_time;
LOG_TRACE(log, "loadUpdatedAll {}", command);
return QueryPipeline(coordinator->createPipe(command, command_arguments, sample_block, context));
ShellCommandSourceConfiguration command_configuration {
.check_exit_code = true,
};
return QueryPipeline(coordinator->createPipe(command, command_arguments, {}, sample_block, context, command_configuration));
}
QueryPipeline ExecutableDictionarySource::loadIds(const std::vector<UInt64> & ids)
@ -179,7 +186,11 @@ QueryPipeline ExecutableDictionarySource::getStreamForBlock(const Block & block)
Pipes shell_input_pipes;
shell_input_pipes.emplace_back(std::move(shell_input_pipe));
auto pipe = coordinator->createPipe(command, configuration.command_arguments, std::move(shell_input_pipes), sample_block, context);
ShellCommandSourceConfiguration command_configuration {
.check_exit_code = true,
};
auto pipe = coordinator->createPipe(command, configuration.command_arguments, std::move(shell_input_pipes), sample_block, context, command_configuration);
if (configuration.implicit_key)
pipe.addTransform(std::make_shared<TransformWithAdditionalColumns>(block, pipe.getHeader()));

View File

@ -132,6 +132,7 @@ QueryPipeline ExecutablePoolDictionarySource::getStreamForBlock(const Block & bl
ShellCommandSourceConfiguration command_configuration;
command_configuration.read_fixed_number_of_rows = true;
command_configuration.number_of_rows_to_read = block.rows();
command_configuration.check_exit_code = true;
Pipes shell_input_pipes;
shell_input_pipes.emplace_back(std::move(shell_input_pipe));

View File

@ -69,7 +69,6 @@ CachedOnDiskReadBufferFromFile::CachedOnDiskReadBufferFromFile(
, allow_seeks_after_first_read(allow_seeks_after_first_read_)
, use_external_buffer(use_external_buffer_)
, query_context_holder(cache_->getQueryContextHolder(query_id, settings_))
, is_persistent(settings_.is_file_cache_persistent)
, cache_log(cache_log_)
{
}
@ -125,7 +124,7 @@ void CachedOnDiskReadBufferFromFile::initialize(size_t offset, size_t size)
}
else
{
CreateFileSegmentSettings create_settings(is_persistent ? FileSegmentKind::Persistent : FileSegmentKind::Regular);
CreateFileSegmentSettings create_settings(FileSegmentKind::Regular);
file_segments = cache->getOrSet(cache_key, offset, size, file_size.value(), create_settings);
}
@ -149,8 +148,6 @@ CachedOnDiskReadBufferFromFile::getCacheReadBuffer(const FileSegment & file_segm
{
ProfileEventTimeIncrement<Microseconds> watch(ProfileEvents::CachedReadBufferCreateBufferMicroseconds);
/// Use is_persistent flag from in-memory state of the filesegment,
/// because it is consistent with what is written on disk.
auto path = file_segment.getPathInLocalCache();
ReadSettings local_read_settings{settings};

View File

@ -147,8 +147,6 @@ private:
FileCache::QueryContextHolderPtr query_context_holder;
bool is_persistent;
std::shared_ptr<FilesystemCacheLog> cache_log;
};

View File

@ -194,7 +194,6 @@ CachedOnDiskWriteBufferFromFile::CachedOnDiskWriteBufferFromFile(
FileCachePtr cache_,
const String & source_path_,
const FileCache::Key & key_,
bool is_persistent_cache_file_,
const String & query_id_,
const WriteSettings & settings_)
: WriteBufferFromFileDecorator(std::move(impl_))
@ -202,7 +201,6 @@ CachedOnDiskWriteBufferFromFile::CachedOnDiskWriteBufferFromFile(
, cache(cache_)
, source_path(source_path_)
, key(key_)
, is_persistent_cache_file(is_persistent_cache_file_)
, query_id(query_id_)
, enable_cache_log(!query_id_.empty() && settings_.enable_filesystem_cache_log)
, throw_on_error_from_cache(settings_.throw_on_error_from_cache)
@ -255,8 +253,7 @@ void CachedOnDiskWriteBufferFromFile::cacheData(char * data, size_t size, bool t
try
{
auto segment_kind = is_persistent_cache_file ? FileSegmentKind::Persistent : FileSegmentKind::Regular;
if (!cache_writer->write(data, size, current_download_offset, segment_kind))
if (!cache_writer->write(data, size, current_download_offset, FileSegmentKind::Regular))
{
LOG_INFO(log, "Write-through cache is stopped as cache limit is reached and nothing can be evicted");
return;

Some files were not shown because too many files have changed in this diff Show More