Merge branch 'master' into errorCode-v2

This commit is contained in:
alexey-milovidov 2020-11-04 01:40:41 +03:00 committed by GitHub
commit 8a3f38f332
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
315 changed files with 2907 additions and 1214 deletions

View File

@ -16,8 +16,4 @@ endif ()
if (CMAKE_SYSTEM_PROCESSOR MATCHES "^(ppc64le.*|PPC64LE.*)")
set (ARCH_PPC64LE 1)
# FIXME: move this check into tools.cmake
if (COMPILER_CLANG OR (COMPILER_GCC AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 8))
message(FATAL_ERROR "Only gcc-8 or higher is supported for powerpc architecture")
endif ()
endif ()

View File

@ -84,3 +84,9 @@ if (LINKER_NAME)
message(STATUS "Using custom linker by name: ${LINKER_NAME}")
endif ()
if (ARCH_PPC64LE)
if (COMPILER_CLANG OR (COMPILER_GCC AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 8))
message(FATAL_ERROR "Only gcc-8 or higher is supported for powerpc architecture")
endif ()
endif ()

View File

@ -164,7 +164,7 @@ case "$stage" in
# Lost connection to the server. This probably means that the server died
# with abort.
echo "failure" > status.txt
if ! grep -ao "Received signal.*\|Logical error.*\|Assertion.*failed" server.log > description.txt
if ! grep -ao "Received signal.*\|Logical error.*\|Assertion.*failed\|Failed assertion.*" server.log > description.txt
then
echo "Lost connection to server. See the logs" > description.txt
fi

View File

@ -17,7 +17,8 @@ RUN apt-get update \
sqlite3 \
curl \
tar \
krb5-user
krb5-user \
iproute2
RUN rm -rf \
/var/lib/apt/lists/* \
/var/cache/debconf \

View File

@ -189,7 +189,7 @@ Replication is implemented in the `ReplicatedMergeTree` storage engine. The path
Replication uses an asynchronous multi-master scheme. You can insert data into any replica that has a session with `ZooKeeper`, and data is replicated to all other replicas asynchronously. Because ClickHouse doesnt support UPDATEs, replication is conflict-free. As there is no quorum acknowledgment of inserts, just-inserted data might be lost if one node fails.
Metadata for replication is stored in ZooKeeper. There is a replication log that lists what actions to do. Actions are: get part; merge parts; drop a partition, and so on. Each replica copies the replication log to its queue and then executes the actions from the queue. For example, on insertion, the “get the part” action is created in the log, and every replica downloads that part. Merges are coordinated between replicas to get byte-identical results. All parts are merged in the same way on all replicas. It is achieved by electing one replica as the leader, and that replica initiates merges and writes “merge parts” actions to the log.
Metadata for replication is stored in ZooKeeper. There is a replication log that lists what actions to do. Actions are: get part; merge parts; drop a partition, and so on. Each replica copies the replication log to its queue and then executes the actions from the queue. For example, on insertion, the “get the part” action is created in the log, and every replica downloads that part. Merges are coordinated between replicas to get byte-identical results. All parts are merged in the same way on all replicas. One of the leaders initiates a new merge first and writes “merge parts” actions to the log. Multiple replicas (or all) can be leaders at the same time. A replica can be prevented from becoming a leader using the `merge_tree` setting `replicated_can_become_leader`. The leaders are responsible for scheduling background merges.
Replication is physical: only compressed parts are transferred between nodes, not queries. Merges are processed on each replica independently in most cases to lower the network costs by avoiding network amplification. Large merged parts are sent over the network only in cases of significant replication lag.

View File

@ -88,7 +88,7 @@ For a description of parameters, see the [CREATE query description](../../../sql
- `index_granularity` — Maximum number of data rows between the marks of an index. Default value: 8192. See [Data Storage](#mergetree-data-storage).
- `index_granularity_bytes` — Maximum size of data granules in bytes. Default value: 10Mb. To restrict the granule size only by number of rows, set to 0 (not recommended). See [Data Storage](#mergetree-data-storage).
- `min_index_granularity_bytes` — Min allowed size of data granules in bytes. Default value: 1024b. To provide safeguard against accidentally creating tables with very low index_granularity_bytes. See [Data Storage](#mergetree-data-storage).
- `min_index_granularity_bytes` — Min allowed size of data granules in bytes. Default value: 1024b. To provide a safeguard against accidentally creating tables with very low index_granularity_bytes. See [Data Storage](#mergetree-data-storage).
- `enable_mixed_granularity_parts` — Enables or disables transitioning to control the granule size with the `index_granularity_bytes` setting. Before version 19.11, there was only the `index_granularity` setting for restricting granule size. The `index_granularity_bytes` setting improves ClickHouse performance when selecting data from tables with big rows (tens and hundreds of megabytes). If you have tables with big rows, you can enable this setting for the tables to improve the efficiency of `SELECT` queries.
- `use_minimalistic_part_header_in_zookeeper` — Storage method of the data parts headers in ZooKeeper. If `use_minimalistic_part_header_in_zookeeper=1`, then ZooKeeper stores less data. For more information, see the [setting description](../../../operations/server-configuration-parameters/settings.md#server-settings-use_minimalistic_part_header_in_zookeeper) in “Server configuration parameters”.
- `min_merge_bytes_to_use_direct_io` — The minimum data volume for merge operation that is required for using direct I/O access to the storage disk. When merging data parts, ClickHouse calculates the total storage volume of all the data to be merged. If the volume exceeds `min_merge_bytes_to_use_direct_io` bytes, ClickHouse reads and writes the data to the storage disk using the direct I/O interface (`O_DIRECT` option). If `min_merge_bytes_to_use_direct_io = 0`, then direct I/O is disabled. Default value: `10 * 1024 * 1024 * 1024` bytes.

View File

@ -148,6 +148,31 @@ You can define the parameters explicitly instead of using substitutions. This mi
When working with large clusters, we recommend using substitutions because they reduce the probability of error.
You can specify default arguments for `Replicated` table engine in the server configuration file. For instance:
```xml
<default_replica_path>/clickhouse/tables/{shard}/{database}/{table}</default_replica_path>
<default_replica_name>{replica}</default_replica_path>
```
In this case, you can omit arguments when creating tables:
``` sql
CREATE TABLE table_name (
x UInt32
) ENGINE = ReplicatedMergeTree
ORDER BY x;
```
It is equivalent to:
``` sql
CREATE TABLE table_name (
x UInt32
) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/{database}/table_name', '{replica}')
ORDER BY x;
```
Run the `CREATE TABLE` query on each replica. This query creates a new replicated table, or adds a new replica to an existing one.
If you add a new replica after the table already contains some data on other replicas, the data will be copied from the other replicas to the new one after running the query. In other words, the new replica syncs itself with the others.

View File

@ -79,7 +79,7 @@ By default, data is returned in TabSeparated format (for more information, see t
You use the FORMAT clause of the query to request any other format.
Also, you can use the default_format URL parameter or X-ClickHouse-Format header to specify a default format other than TabSeparated.
Also, you can use the default_format URL parameter or the X-ClickHouse-Format header to specify a default format other than TabSeparated.
``` bash
$ echo 'SELECT 1 FORMAT Pretty' | curl 'http://localhost:8123/?' --data-binary @-
@ -170,7 +170,7 @@ $ echo "SELECT 1" | gzip -c | curl -sS --data-binary @- -H 'Content-Encoding: gz
!!! note "Note"
Some HTTP clients might decompress data from the server by default (with `gzip` and `deflate`) and you might get decompressed data even if you use the compression settings correctly.
You can use the database URL parameter or X-ClickHouse-Database header to specify the default database.
You can use the database URL parameter or the X-ClickHouse-Database header to specify the default database.
``` bash
$ echo 'SELECT number FROM numbers LIMIT 10' | curl 'http://localhost:8123/?database=system' --data-binary @-

View File

@ -90,6 +90,7 @@ toc_title: Adopters
| <a href="https://www.splunk.com/" class="favicon">Splunk</a> | Business Analytics | Main product | — | — | [Slides in English, January 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup12/splunk.pdf) |
| <a href="https://www.spotify.com" class="favicon">Spotify</a> | Music | Experimentation | — | — | [Slides, July 2018](https://www.slideshare.net/glebus/using-clickhouse-for-experimentation-104247173) |
| <a href="https://www.staffcop.ru/" class="favicon">Staffcop</a> | Information Security | Main Product | — | — | [Official website, Documentation](https://www.staffcop.ru/sce43) |
| <a href="https://www.teralytics.net/" class="favicon">Teralytics</a> | Mobility | Analytics | — | — | [Tech blog](https://www.teralytics.net/knowledge-hub/visualizing-mobility-data-the-scalability-challenge) |
| <a href="https://www.tencent.com" class="favicon">Tencent</a> | Big Data | Data processing | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/5.%20ClickHouse大数据集群应用_李俊飞腾讯网媒事业部.pdf) |
| <a href="https://www.tencent.com" class="favicon">Tencent</a> | Messaging | Logging | — | — | [Talk in Chinese, November 2019](https://youtu.be/T-iVQRuw-QY?t=5050) |
| <a href="https://trafficstars.com/" class="favicon">Traffic Stars</a> | AD network | — | — | — | [Slides in Russian, May 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup15/lightning/ninja.pdf) |

View File

@ -2148,7 +2148,34 @@ Result:
└───────────────┘
```
[Original article](https://clickhouse.tech/docs/en/operations/settings/settings/) <!-- hide -->
## output_format_pretty_row_numbers {#output_format_pretty_row_numbers}
Adds row numbers to output in the [Pretty](../../interfaces/formats.md#pretty) format.
Possible values:
- 0 — Output without row numbers.
- 1 — Output with row numbers.
Default value: `0`.
**Example**
Query:
```sql
SET output_format_pretty_row_numbers = 1;
SELECT TOP 3 name, value FROM system.settings;
```
Result:
```text
┌─name────────────────────┬─value───┐
1. │ min_compress_block_size │ 65536 │
2. │ max_compress_block_size │ 1048576 │
3. │ max_block_size │ 65505 │
└─────────────────────────┴─────────┘
```
## allow_experimental_bigint_types {#allow_experimental_bigint_types}
@ -2160,3 +2187,5 @@ Possible values:
- 0 — The bigint data type is disabled.
Default value: `0`.
[Original article](https://clickhouse.tech/docs/en/operations/settings/settings/) <!-- hide -->

View File

@ -6,19 +6,21 @@ You can use this table to get information similar to the [DESCRIBE TABLE](../../
The `system.columns` table contains the following columns (the column type is shown in brackets):
- `database` (String) — Database name.
- `table` (String) — Table name.
- `name` (String) — Column name.
- `type` (String) — Column type.
- `default_kind` (String) — Expression type (`DEFAULT`, `MATERIALIZED`, `ALIAS`) for the default value, or an empty string if it is not defined.
- `default_expression` (String) — Expression for the default value, or an empty string if it is not defined.
- `data_compressed_bytes` (UInt64) — The size of compressed data, in bytes.
- `data_uncompressed_bytes` (UInt64) — The size of decompressed data, in bytes.
- `marks_bytes` (UInt64) — The size of marks, in bytes.
- `comment` (String) — Comment on the column, or an empty string if it is not defined.
- `is_in_partition_key` (UInt8) — Flag that indicates whether the column is in the partition expression.
- `is_in_sorting_key` (UInt8) — Flag that indicates whether the column is in the sorting key expression.
- `is_in_primary_key` (UInt8) — Flag that indicates whether the column is in the primary key expression.
- `is_in_sampling_key` (UInt8) — Flag that indicates whether the column is in the sampling key expression.
- `database` ([String](../../sql-reference/data-types/string.md)) — Database name.
- `table` ([String](../../sql-reference/data-types/string.md)) — Table name.
- `name` ([String](../../sql-reference/data-types/string.md)) — Column name.
- `type` ([String](../../sql-reference/data-types/string.md)) — Column type.
- `position` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Ordinal position of a column in a table starting with 1.
- `default_kind` ([String](../../sql-reference/data-types/string.md)) — Expression type (`DEFAULT`, `MATERIALIZED`, `ALIAS`) for the default value, or an empty string if it is not defined.
- `default_expression` ([String](../../sql-reference/data-types/string.md)) — Expression for the default value, or an empty string if it is not defined.
- `data_compressed_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The size of compressed data, in bytes.
- `data_uncompressed_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The size of decompressed data, in bytes.
- `marks_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The size of marks, in bytes.
- `comment` ([String](../../sql-reference/data-types/string.md)) — Comment on the column, or an empty string if it is not defined.
- `is_in_partition_key` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Flag that indicates whether the column is in the partition expression.
- `is_in_sorting_key` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Flag that indicates whether the column is in the sorting key expression.
- `is_in_primary_key` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Flag that indicates whether the column is in the primary key expression.
- `is_in_sampling_key` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Flag that indicates whether the column is in the sampling key expression.
- `compression_codec` ([String](../../sql-reference/data-types/string.md)) — Compression codec name.
[Original article](https://clickhouse.tech/docs/en/operations/system_tables/columns) <!--hide-->

View File

@ -0,0 +1,148 @@
# system.parts_columns {#system_tables-parts_columns}
Contains information about parts and columns of [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) tables.
Each row describes one data part.
Columns:
- `partition` ([String](../../sql-reference/data-types/string.md)) — The partition name. To learn what a partition is, see the description of the [ALTER](../../sql-reference/statements/alter/index.md#query_language_queries_alter) query.
Formats:
- `YYYYMM` for automatic partitioning by month.
- `any_string` when partitioning manually.
- `name` ([String](../../sql-reference/data-types/string.md)) — Name of the data part.
- `part_type` ([String](../../sql-reference/data-types/string.md)) — The data part storing format.
Possible values:
- `Wide` — Each column is stored in a separate file in a filesystem.
- `Compact` — All columns are stored in one file in a filesystem.
Data storing format is controlled by the `min_bytes_for_wide_part` and `min_rows_for_wide_part` settings of the [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) table.
- `active` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Flag that indicates whether the data part is active. If a data part is active, its used in a table. Otherwise, its deleted. Inactive data parts remain after merging.
- `marks` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The number of marks. To get the approximate number of rows in a data part, multiply `marks` by the index granularity (usually 8192) (this hint doesnt work for adaptive granularity).
- `rows` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The number of rows.
- `bytes_on_disk` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Total size of all the data part files in bytes.
- `data_compressed_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Total size of compressed data in the data part. All the auxiliary files (for example, files with marks) are not included.
- `data_uncompressed_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Total size of uncompressed data in the data part. All the auxiliary files (for example, files with marks) are not included.
- `marks_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The size of the file with marks.
- `modification_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — The time the directory with the data part was modified. This usually corresponds to the time of data part creation.
- `remove_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — The time when the data part became inactive.
- `refcount` ([UInt32](../../sql-reference/data-types/int-uint.md)) — The number of places where the data part is used. A value greater than 2 indicates that the data part is used in queries or merges.
- `min_date` ([Date](../../sql-reference/data-types/date.md)) — The minimum value of the date key in the data part.
- `max_date` ([Date](../../sql-reference/data-types/date.md)) — The maximum value of the date key in the data part.
- `partition_id` ([String](../../sql-reference/data-types/string.md)) — ID of the partition.
- `min_block_number` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The minimum number of data parts that make up the current part after merging.
- `max_block_number` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The maximum number of data parts that make up the current part after merging.
- `level` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Depth of the merge tree. Zero means that the current part was created by insert rather than by merging other parts.
- `data_version` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Number that is used to determine which mutations should be applied to the data part (mutations with a version higher than `data_version`).
- `primary_key_bytes_in_memory` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The amount of memory (in bytes) used by primary key values.
- `primary_key_bytes_in_memory_allocated` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The amount of memory (in bytes) reserved for primary key values.
- `database` ([String](../../sql-reference/data-types/string.md)) — Name of the database.
- `table` ([String](../../sql-reference/data-types/string.md)) — Name of the table.
- `engine` ([String](../../sql-reference/data-types/string.md)) — Name of the table engine without parameters.
- `disk_name` ([String](../../sql-reference/data-types/string.md)) — Name of a disk that stores the data part.
- `path` ([String](../../sql-reference/data-types/string.md)) — Absolute path to the folder with data part files.
- `column` ([String](../../sql-reference/data-types/string.md)) — Name of the column.
- `type` ([String](../../sql-reference/data-types/string.md)) — Column type.
- `column_position` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Ordinal position of a column in a table starting with 1.
- `default_kind` ([String](../../sql-reference/data-types/string.md)) — Expression type (`DEFAULT`, `MATERIALIZED`, `ALIAS`) for the default value, or an empty string if it is not defined.
- `default_expression` ([String](../../sql-reference/data-types/string.md)) — Expression for the default value, or an empty string if it is not defined.
- `column_bytes_on_disk` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Total size of the column in bytes.
- `column_data_compressed_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Total size of compressed data in the column, in bytes.
- `column_data_uncompressed_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Total size of the decompressed data in the column, in bytes.
- `column_marks_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The size of the column with marks, in bytes.
- `bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Alias for `bytes_on_disk`.
- `marks_size` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Alias for `marks_bytes`.
**Example**
``` sql
SELECT * FROM system.parts_columns LIMIT 1 FORMAT Vertical;
```
``` text
Row 1:
──────
partition: tuple()
name: all_1_2_1
part_type: Wide
active: 1
marks: 2
rows: 2
bytes_on_disk: 155
data_compressed_bytes: 56
data_uncompressed_bytes: 4
marks_bytes: 96
modification_time: 2020-09-23 10:13:36
remove_time: 2106-02-07 06:28:15
refcount: 1
min_date: 1970-01-01
max_date: 1970-01-01
partition_id: all
min_block_number: 1
max_block_number: 2
level: 1
data_version: 1
primary_key_bytes_in_memory: 2
primary_key_bytes_in_memory_allocated: 64
database: default
table: 53r93yleapyears
engine: MergeTree
disk_name: default
path: /var/lib/clickhouse/data/default/53r93yleapyears/all_1_2_1/
column: id
type: Int8
column_position: 1
default_kind:
default_expression:
column_bytes_on_disk: 76
column_data_compressed_bytes: 28
column_data_uncompressed_bytes: 2
column_marks_bytes: 48
```
**See Also**
- [MergeTree family](../../engines/table-engines/mergetree-family/mergetree.md)
[Original article](https://clickhouse.tech/docs/en/operations/system_tables/parts_columns) <!--hide-->

View File

@ -53,9 +53,9 @@ Columns:
- `table` (`String`) - Table name
- `engine` (`String`) - Table engine name
- `is_leader` (`UInt8`) - Whether the replica is the leader.
Only one replica at a time can be the leader. The leader is responsible for selecting background merges to perform.
Multiple replicas can be leaders at the same time. A replica can be prevented from becoming a leader using the `merge_tree` setting `replicated_can_become_leader`. The leaders are responsible for scheduling background merges.
Note that writes can be performed to any replica that is available and has a session in ZK, regardless of whether it is a leader.
- `can_become_leader` (`UInt8`) - Whether the replica can be elected as a leader.
- `can_become_leader` (`UInt8`) - Whether the replica can be a leader.
- `is_readonly` (`UInt8`) - Whether the replica is in read-only mode.
This mode is turned on if the config doesnt have sections with ZooKeeper, if an unknown error occurred when reinitializing sessions in ZooKeeper, and during session reinitialization in ZooKeeper.
- `is_session_expired` (`UInt8`) - the session with ZooKeeper has expired. Basically the same as `is_readonly`.

View File

@ -16,7 +16,7 @@ By default `clickhouse-local` does not have access to data on the same host, but
!!! warning "Warning"
It is not recommended to load production server configuration into `clickhouse-local` because data can be damaged in case of human error.
For temporary data an unique temporary data directory is created by default. If you want to override this behavior the data directory can be explicitly specified with the `-- --path` option.
For temporary data, a unique temporary data directory is created by default. If you want to override this behavior, the data directory can be explicitly specified with the `-- --path` option.
## Usage {#usage}

View File

@ -23,8 +23,6 @@ SELECT
└─────────────────────┴────────────┴────────────┴─────────────────────┘
```
Only time zones that differ from UTC by a whole number of hours are supported.
## toTimeZone {#totimezone}
Convert time or date and time to the specified time zone.

View File

@ -780,4 +780,42 @@ Result:
└──────────────────────────────────┘
```
## formatRowNoNewline {#formatrownonewline}
Converts arbitrary expressions into a string via given format. The function trims the last `\n` if any.
**Syntax**
``` sql
formatRowNoNewline(format, x, y, ...)
```
**Parameters**
- `format` — Text format. For example, [CSV](../../interfaces/formats.md#csv), [TSV](../../interfaces/formats.md#tabseparated).
- `x`,`y`, ... — Expressions.
**Returned value**
- A formatted string.
**Example**
Query:
``` sql
SELECT formatRowNoNewline('CSV', number, 'good')
FROM numbers(3)
```
Result:
``` text
┌─formatRowNoNewline('CSV', number, 'good')─┐
│ 0,"good" │
│ 1,"good" │
│ 2,"good" │
└───────────────────────────────────────────┘
```
[Original article](https://clickhouse.tech/docs/en/query_language/functions/type_conversion_functions/) <!--hide-->

View File

@ -15,7 +15,7 @@ Syntax:
CREATE [OR REPLACE] VIEW [IF NOT EXISTS] [db.]table_name [ON CLUSTER] AS SELECT ...
```
Normal views dont store any data, they just perform a read from another table on each access. In other words, a normal view is nothing more than a saved query. When reading from a view, this saved query is used as a subquery in the [FROM](../../../sql-reference/statements/select/from.md) clause.
Normal views dont store any data. They just perform a read from another table on each access. In other words, a normal view is nothing more than a saved query. When reading from a view, this saved query is used as a subquery in the [FROM](../../../sql-reference/statements/select/from.md) clause.
As an example, assume youve created a view:

View File

@ -13,12 +13,61 @@ Basic query format:
INSERT INTO [db.]table [(c1, c2, c3)] VALUES (v11, v12, v13), (v21, v22, v23), ...
```
The query can specify a list of columns to insert `[(c1, c2, c3)]`. In this case, the rest of the columns are filled with:
You can specify a list of columns to insert using the `(c1, c2, c3)` or `COLUMNS(c1,c2,c3)` syntax.
Instead of listing all the required columns you can use the `(* EXCEPT(column_list))` syntax.
For example, consider the table:
``` sql
SHOW CREATE insert_select_testtable;
```
```
┌─statement────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐
│ CREATE TABLE insert_select_testtable
(
`a` Int8,
`b` String,
`c` Int8
)
ENGINE = MergeTree()
ORDER BY a
SETTINGS index_granularity = 8192 │
└──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
```
``` sql
INSERT INTO insert_select_testtable (*) VALUES (1, 'a', 1) ;
```
If you want to insert data in all the columns, except 'b', you need to pass so many values how many columns you chose in parenthesis then:
``` sql
INSERT INTO insert_select_testtable (* EXCEPT(b)) Values (2, 2);
```
``` sql
SELECT * FROM insert_select_testtable;
```
```
┌─a─┬─b─┬─c─┐
│ 2 │ │ 2 │
└───┴───┴───┘
┌─a─┬─b─┬─c─┐
│ 1 │ a │ 1 │
└───┴───┴───┘
```
In this example, we see that the second inserted row has `a` and `c` columns filled by the passed values, and `b` filled with value by default.
If a list of columns doesn't include all existing columns, the rest of the columns are filled with:
- The values calculated from the `DEFAULT` expressions specified in the table definition.
- Zeros and empty strings, if `DEFAULT` expressions are not defined.
If [strict_insert_defaults=1](../../operations/settings/settings.md), columns that do not have `DEFAULT` defined must be listed in the query.
If [strict\_insert\_defaults=1](../../operations/settings/settings.md), columns that do not have `DEFAULT` defined must be listed in the query.
Data can be passed to the INSERT in any [format](../../interfaces/formats.md#formats) supported by ClickHouse. The format must be specified explicitly in the query:

View File

@ -4,13 +4,17 @@ toc_title: WITH
# WITH Clause {#with-clause}
This section provides support for Common Table Expressions ([CTE](https://en.wikipedia.org/wiki/Hierarchical_and_recursive_queries_in_SQL)), so the results of `WITH` clause can be used in the rest of `SELECT` query.
Clickhouse supports Common Table Expressions ([CTE](https://en.wikipedia.org/wiki/Hierarchical_and_recursive_queries_in_SQL)), that is provides to use results of `WITH` clause in the rest of `SELECT` query. Named subqueries can be included to the current and child query context in places where table objects are allowed. Recursion is prevented by hiding the current level CTEs from the WITH expression.
## Limitations {#limitations}
## Syntax
1. Recursive queries are not supported.
2. When subquery is used inside WITH section, its result should be scalar with exactly one row.
3. Expressions results are not available in subqueries.
``` sql
WITH <expression> AS <identifier>
```
or
``` sql
WITH <identifier> AS <subquery expression>
```
## Examples {#examples}
@ -22,10 +26,10 @@ SELECT *
FROM hits
WHERE
EventDate = toDate(ts_upper_bound) AND
EventTime <= ts_upper_bound
EventTime <= ts_upper_bound;
```
**Example 2:** Evicting sum(bytes) expression result from SELECT clause column list
**Example 2:** Evicting a sum(bytes) expression result from the SELECT clause column list
``` sql
WITH sum(bytes) as s
@ -34,10 +38,10 @@ SELECT
table
FROM system.parts
GROUP BY table
ORDER BY s
ORDER BY s;
```
**Example 3:** Using results of scalar subquery
**Example 3:** Using results of a scalar subquery
``` sql
/* this example would return TOP 10 of most huge tables */
@ -53,27 +57,14 @@ SELECT
FROM system.parts
GROUP BY table
ORDER BY table_disk_usage DESC
LIMIT 10
LIMIT 10;
```
**Example 4:** Re-using expression in subquery
As a workaround for current limitation for expression usage in subqueries, you may duplicate it.
**Example 4:** Reusing expression in a subquery
``` sql
WITH ['hello'] AS hello
SELECT
hello,
*
FROM
(
WITH ['hello'] AS hello
SELECT hello
)
WITH test1 AS (SELECT i + 1, j + 1 FROM test1)
SELECT * FROM test1;
```
``` text
┌─hello─────┬─hello─────┐
│ ['hello'] │ ['hello'] │
└───────────┴───────────┘
```
[Original article](https://clickhouse.tech/docs/en/sql-reference/statements/select/with/) <!--hide-->

View File

@ -190,7 +190,7 @@ ClickHouse имеет сильную типизацию, поэтому нет
Репликация использует асинхронную multi-master схему. Вы можете вставить данные в любую реплику, которая имеет открытую сессию в `ZooKeeper`, и данные реплицируются на все другие реплики асинхронно. Поскольку ClickHouse не поддерживает UPDATE, репликация исключает конфликты (conflict-free replication). Поскольку подтверждение вставок кворумом не реализовано, только что вставленные данные могут быть потеряны в случае сбоя одного узла.
Метаданные для репликации хранятся в `ZooKeeper`. Существует журнал репликации, в котором перечислены действия, которые необходимо выполнить. Среди этих действий: получить часть (get the part); объединить части (merge parts); удалить партицию (drop a partition) и так далее. Каждая реплика копирует журнал репликации в свою очередь, а затем выполняет действия из очереди. Например, при вставке в журнале создается действие «получить часть» (get the part), и каждая реплика загружает эту часть. Слияния координируются между репликами, чтобы получить идентичные до байта результаты. Все части объединяются одинаково на всех репликах. Это достигается путем выбора одной реплики в качестве лидера, и эта реплика инициирует слияния и записывает действия «слияния частей» в журнал.
Метаданные для репликации хранятся в `ZooKeeper`. Существует журнал репликации, в котором перечислены действия, которые необходимо выполнить. Среди этих действий: получить часть (get the part); объединить части (merge parts); удалить партицию (drop a partition) и так далее. Каждая реплика копирует журнал репликации в свою очередь, а затем выполняет действия из очереди. Например, при вставке в журнале создается действие «получить часть» (get the part), и каждая реплика загружает эту часть. Слияния координируются между репликами, чтобы получить идентичные до байта результаты. Все части объединяются одинаково на всех репликах. Одна из реплик-лидеров инициирует новое слияние кусков первой и записывает действия «слияния частей» в журнал. Несколько реплик (или все) могут быть лидерами одновременно. Реплике можно запретить быть лидером с помощью `merge_tree` настройки `replicated_can_become_leader`.
Репликация является физической: между узлами передаются только сжатые части, а не запросы. Слияния обрабатываются на каждой реплике независимо, в большинстве случаев, чтобы снизить затраты на сеть, во избежание усиления роли сети. Крупные объединенные части отправляются по сети только в случае значительной задержки репликации.

View File

@ -159,6 +159,22 @@ Kafka(kafka_broker_list, kafka_topic_list, kafka_group_name, kafka_format
В документе [librdkafka configuration reference](https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md) можно увидеть список возможных опций конфигурации. Используйте подчеркивание (`_`) вместо точки в конфигурации ClickHouse. Например, `check.crcs=true` будет соответствовать `<check_crcs>true</check_crcs>`.
### Поддержка Kerberos {#kafka-kerberos-support}
Чтобы начать работу с Kafka с поддержкой Kerberos, добавьте дочерний элемент `security_protocol` со значением `sasl_plaintext`. Этого будет достаточно, если получен тикет на получение тикета (ticket-granting ticket) Kerberos и он кэшируется средствами ОС.
ClickHouse может поддерживать учетные данные Kerberos с помощью файла keytab. Рассмотрим дочерние элементы `sasl_kerberos_service_name`, `sasl_kerberos_keytab`, `sasl_kerberos_principal` и `sasl.kerberos.kinit.cmd`.
Пример:
``` xml
<!-- Kerberos-aware Kafka -->
<kafka>
<security_protocol>SASL_PLAINTEXT</security_protocol>
<sasl_kerberos_keytab>/home/kafkauser/kafkauser.keytab</sasl_kerberos_keytab>
<sasl_kerberos_principal>kafkauser/kafkahost@EXAMPLE.COM</sasl_kerberos_principal>
</kafka>
```
## Виртуальные столбцы {#virtualnye-stolbtsy}
- `_topic` — топик Kafka.

View File

@ -79,6 +79,7 @@ ORDER BY expr
- `index_granularity` — максимальное количество строк данных между засечками индекса. По умолчанию — 8192. Смотрите [Хранение данных](#mergetree-data-storage).
- `index_granularity_bytes` — максимальный размер гранул данных в байтах. По умолчанию — 10Mb. Чтобы ограничить размер гранул только количеством строк, установите значение 0 (не рекомендовано). Смотрите [Хранение данных](#mergetree-data-storage).
- `min_index_granularity_bytes` — минимально допустимый размер гранул данных в байтах. Значение по умолчанию — 1024b. Для обеспечения защиты от случайного создания таблиц с очень низким значением `index_granularity_bytes`. Смотрите [Хранение данных](#mergetree-data-storage).
- `enable_mixed_granularity_parts` — включает или выключает переход к ограничению размера гранул с помощью настройки `index_granularity_bytes`. Настройка `index_granularity_bytes` улучшает производительность ClickHouse при выборке данных из таблиц с большими (десятки и сотни мегабайтов) строками. Если у вас есть таблицы с большими строками, можно включить эту настройку, чтобы повысить эффективность запросов `SELECT`.
- `use_minimalistic_part_header_in_zookeeper` — Способ хранения заголовков кусков данных в ZooKeeper. Если `use_minimalistic_part_header_in_zookeeper = 1`, то ZooKeeper хранит меньше данных. Подробнее читайте в [описании настройки](../../../operations/server-configuration-parameters/settings.md#server-settings-use_minimalistic_part_header_in_zookeeper) в разделе "Конфигурационные параметры сервера".
- `min_merge_bytes_to_use_direct_io` — минимальный объём данных при слиянии, необходимый для прямого (небуферизованного) чтения/записи (direct I/O) на диск. При слиянии частей данных ClickHouse вычисляет общий объём хранения всех данных, подлежащих слиянию. Если общий объём хранения всех данных для чтения превышает `min_bytes_to_use_direct_io` байт, тогда ClickHouse использует флаг `O_DIRECT` при чтении данных с диска. Если `min_merge_bytes_to_use_direct_io = 0`, тогда прямой ввод-вывод отключен. Значение по умолчанию: `10 * 1024 * 1024 * 1024` байтов.

View File

@ -149,6 +149,31 @@ CREATE TABLE table_name
При работе с большими кластерами мы рекомендуем использовать подстановки, они уменьшают вероятность ошибки.
Можно указать аргументы по умолчанию для движка реплицируемых таблиц в файле конфигурации сервера.
```xml
<default_replica_path>/clickhouse/tables/{shard}/{database}/{table}</default_replica_path>
<default_replica_name>{replica}</default_replica_path>
```
В этом случае можно опустить аргументы при создании таблиц:
``` sql
CREATE TABLE table_name (
x UInt32
) ENGINE = ReplicatedMergeTree
ORDER BY x;
```
Это будет эквивалентно следующему запросу:
``` sql
CREATE TABLE table_name (
x UInt32
) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/{database}/table_name', '{replica}')
ORDER BY x;
```
Выполните запрос `CREATE TABLE` на каждой реплике. Запрос создаёт новую реплицируемую таблицу, или добавляет новую реплику к имеющимся.
Если вы добавляете новую реплику после того, как таблица на других репликах уже содержит некоторые данные, то после выполнения запроса, данные на новую реплику будут скачаны с других реплик. То есть, новая реплика синхронизирует себя с остальными.

View File

@ -76,8 +76,11 @@ ECT 1
```
По умолчанию, данные возвращаются в формате TabSeparated (подробнее смотри раздел «Форматы»).
Можно попросить любой другой формат - с помощью секции FORMAT запроса.
Кроме того, вы можете использовать параметр URL-адреса `default_format` или заголовок `X-ClickHouse-Format`, чтобы указать формат по умолчанию, отличный от `TabSeparated`.
``` bash
$ echo 'SELECT 1 FORMAT Pretty' | curl 'http://localhost:8123/?' --data-binary @-
┏━━━┓
@ -168,7 +171,7 @@ $ echo "SELECT 1" | gzip -c | curl -sS --data-binary @- -H 'Content-Encoding: gz
!!! note "Примечание"
Некоторые HTTP-клиенты могут по умолчанию распаковывать данные (`gzip` и `deflate`) с сервера в фоновом режиме и вы можете получить распакованные данные, даже если правильно используете настройки сжатия.
В параметре URL database может быть указана БД по умолчанию.
Вы можете использовать параметр URL `database` или заголовок `X-ClickHouse-Database`, чтобы указать БД по умолчанию.
``` bash
$ echo 'SELECT number FROM numbers LIMIT 10' | curl 'http://localhost:8123/?database=system' --data-binary @-

View File

@ -1977,6 +1977,48 @@ SELECT range(number) FROM system.numbers LIMIT 5 FORMAT PrettyCompactNoEscapes;
└───────────────┘
```
## output_format_pretty_row_numbers {#output_format_pretty_row_numbers}
Включает режим отображения номеров строк для запросов, выводимых в формате [Pretty](../../interfaces/formats.md#pretty).
Возможные значения:
- 0 — номера строк не выводятся.
- 1 — номера строк выводятся.
Значение по умолчанию: `0`.
**Пример**
Запрос:
```sql
SET output_format_pretty_row_numbers = 1;
SELECT TOP 3 name, value FROM system.settings;
```
Результат:
```text
┌─name────────────────────┬─value───┐
1. │ min_compress_block_size │ 65536 │
2. │ max_compress_block_size │ 1048576 │
3. │ max_block_size │ 65505 │
└─────────────────────────┴─────────┘
```
## allow_experimental_bigint_types {#allow_experimental_bigint_types}
Включает или отключает поддержку целочисленных значений, превышающих максимальное значение, допустимое для типа `int`.
Возможные значения:
- 1 — большие целочисленные значения поддерживаются.
- 0 — большие целочисленные значения не поддерживаются.
Значение по умолчанию: `0`.
## lock_acquire_timeout {#lock_acquire_timeout}
Устанавливает, сколько секунд сервер ожидает возможности выполнить блокировку таблицы.

View File

@ -6,19 +6,21 @@
Таблица `system.columns` содержит столбцы (тип столбца указан в скобках):
- `database` (String) — имя базы данных.
- `table` (String) — имя таблицы.
- `name` (String) — имя столбца.
- `type` (String) — тип столбца.
- `default_kind` (String) — тип выражения (`DEFAULT`, `MATERIALIZED`, `ALIAS`) значения по умолчанию, или пустая строка.
- `default_expression` (String) — выражение для значения по умолчанию или пустая строка.
- `data_compressed_bytes` (UInt64) — размер сжатых данных в байтах.
- `data_uncompressed_bytes` (UInt64) — размер распакованных данных в байтах.
- `marks_bytes` (UInt64) — размер засечек в байтах.
- `comment` (String) — комментарий к столбцу или пустая строка.
- `is_in_partition_key` (UInt8) — флаг, показывающий включение столбца в ключ партиционирования.
- `is_in_sorting_key` (UInt8) — флаг, показывающий включение столбца в ключ сортировки.
- `is_in_primary_key` (UInt8) — флаг, показывающий включение столбца в первичный ключ.
- `is_in_sampling_key` (UInt8) — флаг, показывающий включение столбца в ключ выборки.
- `database` ([String](../../sql-reference/data-types/string.md)) — имя базы данных.
- `table` ([String](../../sql-reference/data-types/string.md)) — имя таблицы.
- `name` ([String](../../sql-reference/data-types/string.md)) — имя столбца.
- `type` ([String](../../sql-reference/data-types/string.md)) — тип столбца.
- `position` ([UInt64](../../sql-reference/data-types/int-uint.md)) — порядковый номер столбца в таблице (нумерация начинается с 1).
- `default_kind` ([String](../../sql-reference/data-types/string.md)) — тип выражения (`DEFAULT`, `MATERIALIZED`, `ALIAS`) для значения по умолчанию или пустая строка.
- `default_expression` ([String](../../sql-reference/data-types/string.md)) — выражение для значения по умолчанию или пустая строка.
- `data_compressed_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — размер сжатых данных в байтах.
- `data_uncompressed_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — размер распакованных данных в байтах.
- `marks_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — размер засечек в байтах.
- `comment` ([String](../../sql-reference/data-types/string.md)) — комментарий к столбцу или пустая строка.
- `is_in_partition_key` ([UInt8](../../sql-reference/data-types/int-uint.md)) — флаг, показывающий включение столбца в ключ партиционирования.
- `is_in_sorting_key` ([UInt8](../../sql-reference/data-types/int-uint.md)) — флаг, показывающий включение столбца в ключ сортировки.
- `is_in_primary_key` ([UInt8](../../sql-reference/data-types/int-uint.md)) — флаг, показывающий включение столбца в первичный ключ.
- `is_in_sampling_key` ([UInt8](../../sql-reference/data-types/int-uint.md)) — флаг, показывающий включение столбца в ключ выборки.
- `compression_codec` ([String](../../sql-reference/data-types/string.md)) — имя кодека сжатия.
[Оригинальная статья](https://clickhouse.tech/docs/ru/operations/system_tables/columns) <!--hide-->

View File

@ -0,0 +1,148 @@
# system.parts_columns {#system_tables-parts_columns}
Содержит информацию о кусках данных и столбцах таблиц семейства [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md).
Каждая строка описывает один кусок данных.
Столбцы:
- `partition` ([String](../../sql-reference/data-types/string.md)) — имя партиции. Что такое партиция вы можете узнать из описания запроса [ALTER](../../sql-reference/statements/alter/index.md#query_language_queries_alter).
Форматы:
- `YYYYMM` для автоматической схемы партиционирования по месяцам.
- `any_string` при партиционировании вручную.
- `name` ([String](../../sql-reference/data-types/string.md)) — имя куска данных.
- `part_type` ([String](../../sql-reference/data-types/string.md)) — формат хранения данных.
Возможные значения:
- `Wide` — каждая колонка хранится в отдельном файле.
- `Compact` — все колонки хранятся в одном файле.
Формат хранения данных определяется настройками `min_bytes_for_wide_part` и `min_rows_for_wide_part` таблицы [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md).
- `active` ([UInt8](../../sql-reference/data-types/int-uint.md)) — признак активности. Если кусок данных активен, то он используется таблицей, в противном случае он будет удален. Неактивные куски остаются после слияний.
- `marks` ([UInt64](../../sql-reference/data-types/int-uint.md)) — количество засечек. Чтобы получить примерное количество строк в куске данных, умножьте `marks` на гранулированность индекса (обычно 8192).
- `rows` ([UInt64](../../sql-reference/data-types/int-uint.md)) — количество строк.
- `bytes_on_disk` ([UInt64](../../sql-reference/data-types/int-uint.md)) — общий размер всех файлов кусков данных в байтах.
- `data_compressed_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — общий размер сжатой информации в куске данных. Размер всех дополнительных файлов (например, файлов с засечками) не учитывается.
- `data_uncompressed_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — общий размер распакованной информации в куске данных. Размер всех дополнительных файлов (например, файлов с засечками) не учитывается.
- `marks_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — размер файла с засечками.
- `modification_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — время модификации директории с куском данных. Обычно соответствует времени создания куска.
- `remove_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — время, когда кусок данных стал неактивным.
- `refcount` ([UInt32](../../sql-reference/data-types/int-uint.md)) — количество мест, в котором кусок данных используется. Значение больше 2 говорит о том, что кусок участвует в запросах или в слияниях.
- `min_date` ([Date](../../sql-reference/data-types/date.md)) — минимальное значение ключа даты в куске данных.
- `max_date` ([Date](../../sql-reference/data-types/date.md)) — максимальное значение ключа даты в куске данных.
- `partition_id` ([String](../../sql-reference/data-types/string.md)) — ID партиции.
- `min_block_number` ([UInt64](../../sql-reference/data-types/int-uint.md)) — минимальное число кусков данных, из которых состоит текущий после слияния.
- `max_block_number` ([UInt64](../../sql-reference/data-types/int-uint.md)) — максимальное число кусков данных, из которых состоит текущий после слияния.
- `level` ([UInt32](../../sql-reference/data-types/int-uint.md)) — глубина дерева слияний. Если слияний не было, то `level=0`.
- `data_version` ([UInt64](../../sql-reference/data-types/int-uint.md)) — число, которое используется для определения того, какие мутации необходимо применить к куску данных (мутации с версией большей, чем `data_version`).
- `primary_key_bytes_in_memory` ([UInt64](../../sql-reference/data-types/int-uint.md)) — объём памяти в байтах, занимаемой значениями первичных ключей.
- `primary_key_bytes_in_memory_allocated` ([UInt64](../../sql-reference/data-types/int-uint.md)) — объём памяти в байтах, выделенный для размещения первичных ключей.
- `database` ([String](../../sql-reference/data-types/string.md)) — имя базы данных.
- `table` ([String](../../sql-reference/data-types/string.md)) — имя таблицы.
- `engine` ([String](../../sql-reference/data-types/string.md)) — имя движка таблицы, без параметров.
- `disk_name` ([String](../../sql-reference/data-types/string.md)) — имя диска, на котором находится кусок данных.
- `path` ([String](../../sql-reference/data-types/string.md)) — абсолютный путь к папке с файлами кусков данных.
- `column` ([String](../../sql-reference/data-types/string.md)) — имя столбца.
- `type` ([String](../../sql-reference/data-types/string.md)) — тип столбца.
- `column_position` ([UInt64](../../sql-reference/data-types/int-uint.md)) — порядковый номер столбца (нумерация начинается с 1).
- `default_kind` ([String](../../sql-reference/data-types/string.md)) — тип выражения (`DEFAULT`, `MATERIALIZED`, `ALIAS`) для значения по умолчанию или пустая строка.
- `default_expression` ([String](../../sql-reference/data-types/string.md)) — выражение для значения по умолчанию или пустая строка.
- `column_bytes_on_disk` ([UInt64](../../sql-reference/data-types/int-uint.md)) — общий размер столбца в байтах.
- `column_data_compressed_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — общий размер сжатой информации в столбце в байтах.
- `column_data_uncompressed_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — общий размер распакованной информации в столбце в байтах.
- `column_marks_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — размер столбца с засечками в байтах.
- `bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — алиас для `bytes_on_disk`.
- `marks_size` ([UInt64](../../sql-reference/data-types/int-uint.md)) — алиас для `marks_bytes`.
**Пример**
``` sql
SELECT * FROM system.parts_columns LIMIT 1 FORMAT Vertical;
```
``` text
Row 1:
──────
partition: tuple()
name: all_1_2_1
part_type: Wide
active: 1
marks: 2
rows: 2
bytes_on_disk: 155
data_compressed_bytes: 56
data_uncompressed_bytes: 4
marks_bytes: 96
modification_time: 2020-09-23 10:13:36
remove_time: 2106-02-07 06:28:15
refcount: 1
min_date: 1970-01-01
max_date: 1970-01-01
partition_id: all
min_block_number: 1
max_block_number: 2
level: 1
data_version: 1
primary_key_bytes_in_memory: 2
primary_key_bytes_in_memory_allocated: 64
database: default
table: 53r93yleapyears
engine: MergeTree
disk_name: default
path: /var/lib/clickhouse/data/default/53r93yleapyears/all_1_2_1/
column: id
type: Int8
column_position: 1
default_kind:
default_expression:
column_bytes_on_disk: 76
column_data_compressed_bytes: 28
column_data_uncompressed_bytes: 2
column_marks_bytes: 48
```
**Смотрите также**
- [Движок MergeTree](../../engines/table-engines/mergetree-family/mergetree.md)
[Оригинальная статья](https://clickhouse.tech/docs/en/operations/system_tables/parts_columns) <!--hide-->

View File

@ -53,9 +53,9 @@ active_replicas: 2
- `table` (`String`) - имя таблицы.
- `engine` (`String`) - имя движка таблицы.
- `is_leader` (`UInt8`) - является ли реплика лидером.
В один момент времени, не более одной из реплик является лидером. Лидер отвечает за выбор фоновых слияний, которые следует произвести.
Несколько реплик могут быть лидерами одновременно. Реплике можно запретить быть лидером с помощью `merge_tree` настройки `replicated_can_become_leader`. Лидеры назначают фоновые слияния, которые следует произвести.
Замечу, что запись можно осуществлять на любую реплику (доступную и имеющую сессию в ZK), независимо от лидерства.
- `can_become_leader` (`UInt8`) - может ли реплика быть выбрана лидером.
- `can_become_leader` (`UInt8`) - может ли реплика быть лидером.
- `is_readonly` (`UInt8`) - находится ли реплика в режиме «только для чтения»
Этот режим включается, если в конфиге нет секции с ZK; если при переинициализации сессии в ZK произошла неизвестная ошибка; во время переинициализации сессии с ZK.
- `is_session_expired` (`UInt8`) - истекла ли сессия с ZK. В основном, то же самое, что и `is_readonly`.

View File

@ -14,6 +14,8 @@ toc_title: clickhouse-local
!!! warning "Warning"
Мы не рекомендуем подключать серверную конфигурацию к `clickhouse-local`, поскольку данные можно легко повредить неосторожными действиями.
Для временных данных по умолчанию создается специальный каталог. Если вы хотите обойти это действие, каталог данных можно указать с помощью опции `-- --path`.
## Вызов программы {#vyzov-programmy}
Основной формат вызова:
@ -39,25 +41,51 @@ $ clickhouse-local --structure "table_structure" --input-format "format_of_incom
## Примеры вызова {#primery-vyzova}
``` bash
$ echo -e "1,2\n3,4" | clickhouse-local -S "a Int64, b Int64" -if "CSV" -q "SELECT * FROM table"
$ echo -e "1,2\n3,4" | clickhouse-local --structure "a Int64, b Int64" \
--input-format "CSV" --query "SELECT * FROM table"
Read 2 rows, 32.00 B in 0.000 sec., 5182 rows/sec., 80.97 KiB/sec.
1 2
3 4
1 2
3 4
```
Вызов выше эквивалентен следующему:
``` bash
$ echo -e "1,2\n3,4" | clickhouse-local -q "CREATE TABLE table (a Int64, b Int64) ENGINE = File(CSV, stdin); SELECT a, b FROM table; DROP TABLE table"
$ echo -e "1,2\n3,4" | clickhouse-local --query "
CREATE TABLE table (a Int64, b Int64) ENGINE = File(CSV, stdin);
SELECT a, b FROM table;
DROP TABLE table"
Read 2 rows, 32.00 B in 0.000 sec., 4987 rows/sec., 77.93 KiB/sec.
1 2
3 4
1 2
3 4
```
Необязательно использовать ключи `stdin` или `--file`. Вы можете открывать любое количество файлов с помощью [табличной функции `file`](../../sql-reference/table-functions/file.md):
``` bash
$ echo 1 | tee 1.tsv
1
$ echo 2 | tee 2.tsv
2
$ clickhouse-local --query "
select * from file('1.tsv', TSV, 'a int') t1
cross join file('2.tsv', TSV, 'b int') t2"
1 2
```
А теперь давайте выведем на экран объём оперативной памяти, занимаемой пользователями (Unix):
``` bash
$ ps aux | tail -n +2 | awk '{ printf("%s\t%s\n", $1, $4) }' | clickhouse-local -S "user String, mem Float64" -q "SELECT user, round(sum(mem), 2) as memTotal FROM table GROUP BY user ORDER BY memTotal DESC FORMAT Pretty"
$ ps aux | tail -n +2 | awk '{ printf("%s\t%s\n", $1, $4) }' \
| clickhouse-local --structure "user String, mem Float64" \
--query "SELECT user, round(sum(mem), 2) as memTotal
FROM table GROUP BY user ORDER BY memTotal DESC FORMAT Pretty"
```
``` text
Read 186 rows, 4.15 KiB in 0.035 sec., 5302 rows/sec., 118.34 KiB/sec.
┏━━━━━━━━━━┳━━━━━━━━━━┓
┃ user ┃ memTotal ┃

View File

@ -772,4 +772,43 @@ FROM numbers(3)
└──────────────────────────────────┘
```
## formatRowNoNewline {#formatrownonewline}
Преобразует произвольные выражения в строку заданного формата. При этом удаляет лишние переводы строк `\n`, если они появились.
**Синтаксис**
``` sql
formatRowNoNewline(format, x, y, ...)
```
**Параметры**
- `format` — Текстовый формат. Например, [CSV](../../interfaces/formats.md#csv), [TSV](../../interfaces/formats.md#tabseparated).
- `x`,`y`, ... — Выражения.
**Возвращаемое значение**
- Отформатированная строка (в текстовых форматах без завершающего перевода строки).
**Пример**
Запрос:
``` sql
SELECT formatRowNoNewline('CSV', number, 'good')
FROM numbers(3)
```
Ответ:
``` text
┌─formatRowNoNewline('CSV', number, 'good')─┐
│ 0,"good" │
│ 1,"good" │
│ 2,"good" │
└───────────────────────────────────────────┘
```
[Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/functions/type_conversion_functions/) <!--hide-->

View File

@ -13,7 +13,7 @@ toc_title: "\u041f\u0440\u0435\u0434\u0441\u0442\u0430\u0432\u043b\u0435\u043d\u
CREATE [OR REPLACE] VIEW [IF NOT EXISTS] [db.]table_name [ON CLUSTER] AS SELECT ...
```
Normal views dont store any data, they just perform a read from another table on each access. In other words, a normal view is nothing more than a saved query. When reading from a view, this saved query is used as a subquery in the [FROM](../../../sql-reference/statements/select/from.md) clause.
Обычные представления не хранят никаких данных, они выполняют чтение данных из другой таблицы при каждом доступе. Другими словами, обычное представление - это не что иное, как сохраненный запрос. При чтении данных из представления этот сохраненный запрос используется как подзапрос в секции [FROM](../../../sql-reference/statements/select/from.md).
Для примера, пусть вы создали представление:

View File

@ -13,7 +13,55 @@ toc_title: INSERT INTO
INSERT INTO [db.]table [(c1, c2, c3)] VALUES (v11, v12, v13), (v21, v22, v23), ...
```
В запросе можно указать список столбцов для вставки `[(c1, c2, c3)]`. В этом случае, в остальные столбцы записываются:
Вы можете указать список столбцов для вставки, используя следующий синтаксис: `(c1, c2, c3)` или `COLUMNS(c1,c2,c3)`.
Можно не перечислять все необходимые столбцы, а использовать синтаксис `(* EXCEPT(column_list))`.
В качестве примера рассмотрим таблицу:
``` sql
SHOW CREATE insert_select_testtable
```
```
┌─statement────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐
│ CREATE TABLE insert_select_testtable
(
`a` Int8,
`b` String,
`c` Int8
)
ENGINE = MergeTree()
ORDER BY a
SETTINGS index_granularity = 8192 │
└──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
```
``` sql
INSERT INTO insert_select_testtable (*) VALUES (1, 'a', 1)
```
Если вы хотите вставить данные во все столбцы, кроме 'b', вам нужно передать столько значений, сколько столбцов вы указали в скобках:
``` sql
INSERT INTO insert_select_testtable (* EXCEPT(b)) Values (2, 2)
```
``` sql
SELECT * FROM insert_select_testtable
```
```
┌─a─┬─b─┬─c─┐
│ 2 │ │ 2 │
└───┴───┴───┘
┌─a─┬─b─┬─c─┐
│ 1 │ a │ 1 │
└───┴───┴───┘
```
В этом примере мы видим, что вторая строка содержит столбцы `a` и `c`, заполненные переданными значениями и `b`, заполненный значением по умолчанию.
Если список столбцов не включает все существующие столбцы, то все остальные столбцы заполняются следующим образом:
- Значения, вычисляемые из `DEFAULT` выражений, указанных в определении таблицы.
- Нули и пустые строки, если `DEFAULT` не определены.

View File

@ -2,18 +2,21 @@
toc_title: WITH
---
# Секция WITH {#sektsiia-with}
# Секция WITH {#with-clause}
Данная секция представляет собой [Common Table Expressions](https://ru.wikipedia.org/wiki/Иерархические_и_рекурсивныеапросы_в_SQL), то есть позволяет использовать результаты выражений из секции `WITH` в остальной части `SELECT` запроса.
Clickhouse поддерживает [Общие табличные выражения](https://ru.wikipedia.org/wiki/Иерархические_и_рекурсивныеапросы_в_SQL), то есть позволяет использовать результаты выражений из секции `WITH` в остальной части `SELECT` запроса. Именованные подзапросы могут быть включены в текущий и дочерний контекст запроса в тех местах, где разрешены табличные объекты. Рекурсия предотвращается путем скрытия общего табличного выражения текущего уровня из выражения `WITH`.
## Синтаксис
``` sql
WITH <expression> AS <identifier>
```
или
``` sql
WITH <identifier> AS <subquery expression>
```
### Ограничения
1. Рекурсивные запросы не поддерживаются
2. Если в качестве выражения используется подзапрос, то результат должен содержать ровно одну строку
3. Результаты выражений нельзя переиспользовать во вложенных запросах
В дальнейшем, результаты выражений можно использовать в секции SELECT.
### Примеры
## Примеры
**Пример 1:** Использование константного выражения как «переменной»
@ -23,7 +26,7 @@ SELECT *
FROM hits
WHERE
EventDate = toDate(ts_upper_bound) AND
EventTime <= ts_upper_bound
EventTime <= ts_upper_bound;
```
**Пример 2:** Выкидывание выражения sum(bytes) из списка колонок в SELECT
@ -35,7 +38,7 @@ SELECT
table
FROM system.parts
GROUP BY table
ORDER BY s
ORDER BY s;
```
**Пример 3:** Использование результатов скалярного подзапроса
@ -54,27 +57,14 @@ SELECT
FROM system.parts
GROUP BY table
ORDER BY table_disk_usage DESC
LIMIT 10
LIMIT 10;
```
**Пример 4:** Переиспользование выражения
В настоящий момент, переиспользование выражения из секции WITH внутри подзапроса возможно только через дублирование.
``` sql
WITH ['hello'] AS hello
SELECT
hello,
*
FROM
(
WITH ['hello'] AS hello
SELECT hello
)
WITH test1 AS (SELECT i + 1, j + 1 FROM test1)
SELECT * FROM test1;
```
``` text
┌─hello─────┬─hello─────┐
│ ['hello'] │ ['hello'] │
└───────────┴───────────┘
```
[Оригинальная статья](https://clickhouse.tech/docs/en/sql-reference/statements/select/with/) <!--hide-->

View File

@ -33,6 +33,6 @@ singledispatch==3.4.0.3
six==1.15.0
soupsieve==2.0.1
termcolor==1.1.0
tornado==5.1.1
tornado==6.1
Unidecode==1.1.1
urllib3==1.25.10

View File

@ -3,7 +3,7 @@ machine_translated: true
machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3
---
# 系统。query_log {#system_tables-query_log}
# system.query_log {#system_tables-query_log}
包含有关已执行查询的信息,例如,开始时间、处理持续时间、错误消息。
@ -140,4 +140,4 @@ Settings.Values: ['0','random','1','10000000000']
**另请参阅**
- [系统。query_thread_log](../../operations/system-tables/query_thread_log.md#system_tables-query_thread_log) — This table contains information about each query execution thread.
- [system.query_thread_log](../../operations/system-tables/query_thread_log.md#system_tables-query_thread_log) — This table contains information about each query execution thread.

View File

@ -1502,7 +1502,7 @@ private:
ASTPtr parseQuery(const char * & pos, const char * end, bool allow_multi_statements)
{
ParserQuery parser(end, true);
ParserQuery parser(end);
ASTPtr res;
const auto & settings = context.getSettingsRef();

View File

@ -5,7 +5,6 @@ PEERDIR(
clickhouse/src/Common
)
CFLAGS(-g0)
SRCS(
AccessControlManager.cpp

View File

@ -4,7 +4,6 @@ PEERDIR(
clickhouse/src/Common
)
CFLAGS(-g0)
SRCS(
<? find . -name '*.cpp' | sed 's/^\.\// /' | sort ?>

View File

@ -8,7 +8,7 @@ namespace DB
{
AggregateFunctionPtr AggregateFunctionCount::getOwnNullAdapter(
const AggregateFunctionPtr &, const DataTypes & types, const Array & params, const AggregateFunctionProperties & /*properties*/) const
const AggregateFunctionPtr &, const DataTypes & types, const Array & params) const
{
return std::make_shared<AggregateFunctionCountNotNullUnary>(types[0], params);
}

View File

@ -69,7 +69,7 @@ public:
}
AggregateFunctionPtr getOwnNullAdapter(
const AggregateFunctionPtr &, const DataTypes & types, const Array & params, const AggregateFunctionProperties & /*properties*/) const override;
const AggregateFunctionPtr &, const DataTypes & types, const Array & params) const override;
};

View File

@ -1,7 +1,6 @@
#include <AggregateFunctions/AggregateFunctionIf.h>
#include <AggregateFunctions/AggregateFunctionCombinatorFactory.h>
#include "registerAggregateFunctions.h"
#include "AggregateFunctionNull.h"
namespace DB
@ -9,7 +8,6 @@ namespace DB
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
}
@ -42,164 +40,6 @@ public:
}
};
/** There are two cases: for single argument and variadic.
* Code for single argument is much more efficient.
*/
template <bool result_is_nullable, bool serialize_flag>
class AggregateFunctionIfNullUnary final
: public AggregateFunctionNullBase<result_is_nullable, serialize_flag,
AggregateFunctionIfNullUnary<result_is_nullable, serialize_flag>>
{
private:
size_t num_arguments;
using Base = AggregateFunctionNullBase<result_is_nullable, serialize_flag,
AggregateFunctionIfNullUnary<result_is_nullable, serialize_flag>>;
public:
String getName() const override
{
return Base::getName() + "If";
}
AggregateFunctionIfNullUnary(AggregateFunctionPtr nested_function_, const DataTypes & arguments, const Array & params)
: Base(std::move(nested_function_), arguments, params), num_arguments(arguments.size())
{
if (num_arguments == 0)
throw Exception("Aggregate function " + getName() + " require at least one argument",
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
}
static inline bool singleFilter(const IColumn ** columns, size_t row_num, size_t num_arguments)
{
const IColumn * filter_column = columns[num_arguments - 1];
if (const ColumnNullable * nullable_column = typeid_cast<const ColumnNullable *>(filter_column))
filter_column = nullable_column->getNestedColumnPtr().get();
return assert_cast<const ColumnUInt8 &>(*filter_column).getData()[row_num];
}
void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena * arena) const override
{
const ColumnNullable * column = assert_cast<const ColumnNullable *>(columns[0]);
const IColumn * nested_column = &column->getNestedColumn();
if (!column->isNullAt(row_num) && singleFilter(columns, row_num, num_arguments))
{
this->setFlag(place);
this->nested_function->add(this->nestedPlace(place), &nested_column, row_num, arena);
}
}
};
template <bool result_is_nullable, bool serialize_flag, bool null_is_skipped>
class AggregateFunctionIfNullVariadic final
: public AggregateFunctionNullBase<result_is_nullable, serialize_flag,
AggregateFunctionIfNullVariadic<result_is_nullable, serialize_flag, null_is_skipped>>
{
public:
String getName() const override
{
return Base::getName() + "If";
}
AggregateFunctionIfNullVariadic(AggregateFunctionPtr nested_function_, const DataTypes & arguments, const Array & params)
: Base(std::move(nested_function_), arguments, params), number_of_arguments(arguments.size())
{
if (number_of_arguments == 1)
throw Exception("Logical error: single argument is passed to AggregateFunctionIfNullVariadic", ErrorCodes::LOGICAL_ERROR);
if (number_of_arguments > MAX_ARGS)
throw Exception("Maximum number of arguments for aggregate function with Nullable types is " + toString(size_t(MAX_ARGS)),
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
for (size_t i = 0; i < number_of_arguments; ++i)
is_nullable[i] = arguments[i]->isNullable();
}
static inline bool singleFilter(const IColumn ** columns, size_t row_num, size_t num_arguments)
{
return assert_cast<const ColumnUInt8 &>(*columns[num_arguments - 1]).getData()[row_num];
}
void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena * arena) const override
{
/// This container stores the columns we really pass to the nested function.
const IColumn * nested_columns[number_of_arguments];
for (size_t i = 0; i < number_of_arguments; ++i)
{
if (is_nullable[i])
{
const ColumnNullable & nullable_col = assert_cast<const ColumnNullable &>(*columns[i]);
if (null_is_skipped && nullable_col.isNullAt(row_num))
{
/// If at least one column has a null value in the current row,
/// we don't process this row.
return;
}
nested_columns[i] = &nullable_col.getNestedColumn();
}
else
nested_columns[i] = columns[i];
}
if (singleFilter(nested_columns, row_num, number_of_arguments))
{
this->setFlag(place);
this->nested_function->add(this->nestedPlace(place), nested_columns, row_num, arena);
}
}
private:
using Base = AggregateFunctionNullBase<result_is_nullable, serialize_flag,
AggregateFunctionIfNullVariadic<result_is_nullable, serialize_flag, null_is_skipped>>;
enum { MAX_ARGS = 8 };
size_t number_of_arguments = 0;
std::array<char, MAX_ARGS> is_nullable; /// Plain array is better than std::vector due to one indirection less.
};
AggregateFunctionPtr AggregateFunctionIf::getOwnNullAdapter(
const AggregateFunctionPtr & nested_function, const DataTypes & arguments,
const Array & params, const AggregateFunctionProperties & properties) const
{
bool return_type_is_nullable = !properties.returns_default_when_only_null && getReturnType()->canBeInsideNullable();
size_t nullable_size = std::count_if(arguments.begin(), arguments.end(), [](const auto & element) { return element->isNullable(); });
return_type_is_nullable &= nullable_size != 1 || !arguments.back()->isNullable(); /// If only condition is nullable. we should non-nullable type.
bool serialize_flag = return_type_is_nullable || properties.returns_default_when_only_null;
if (arguments.size() <= 2 && arguments.front()->isNullable())
{
if (return_type_is_nullable)
{
return std::make_shared<AggregateFunctionIfNullUnary<true, true>>(nested_func, arguments, params);
}
else
{
if (serialize_flag)
return std::make_shared<AggregateFunctionIfNullUnary<false, true>>(nested_func, arguments, params);
else
return std::make_shared<AggregateFunctionIfNullUnary<false, false>>(nested_func, arguments, params);
}
}
else
{
if (return_type_is_nullable)
{
return std::make_shared<AggregateFunctionIfNullVariadic<true, true, true>>(nested_function, arguments, params);
}
else
{
if (serialize_flag)
return std::make_shared<AggregateFunctionIfNullVariadic<false, true, true>>(nested_function, arguments, params);
else
return std::make_shared<AggregateFunctionIfNullVariadic<false, true, false>>(nested_function, arguments, params);
}
}
}
void registerAggregateFunctionCombinatorIf(AggregateFunctionCombinatorFactory & factory)
{
factory.registerCombinator(std::make_shared<AggregateFunctionCombinatorIf>());

View File

@ -109,10 +109,6 @@ public:
{
return nested_func->isState();
}
AggregateFunctionPtr getOwnNullAdapter(
const AggregateFunctionPtr & nested_function, const DataTypes & arguments,
const Array & params, const AggregateFunctionProperties & properties) const override;
};
}

View File

@ -72,7 +72,7 @@ public:
assert(nested_function);
if (auto adapter = nested_function->getOwnNullAdapter(nested_function, arguments, params, properties))
if (auto adapter = nested_function->getOwnNullAdapter(nested_function, arguments, params))
return adapter;
/// If applied to aggregate function with -State combinator, we apply -Null combinator to it's nested_function instead of itself.

View File

@ -241,8 +241,7 @@ public:
}
AggregateFunctionPtr getOwnNullAdapter(
const AggregateFunctionPtr & nested_function, const DataTypes & arguments, const Array & params,
const AggregateFunctionProperties & /*properties*/) const override
const AggregateFunctionPtr & nested_function, const DataTypes & arguments, const Array & params) const override
{
return std::make_shared<AggregateFunctionNullVariadic<false, false, false>>(nested_function, arguments, params);
}

View File

@ -33,7 +33,6 @@ using ConstAggregateDataPtr = const char *;
class IAggregateFunction;
using AggregateFunctionPtr = std::shared_ptr<IAggregateFunction>;
struct AggregateFunctionProperties;
/** Aggregate functions interface.
* Instances of classes with this interface do not contain the data itself for aggregation,
@ -186,8 +185,7 @@ public:
* arguments and params are for nested_function.
*/
virtual AggregateFunctionPtr getOwnNullAdapter(
const AggregateFunctionPtr & /*nested_function*/, const DataTypes & /*arguments*/,
const Array & /*params*/, const AggregateFunctionProperties & /*properties*/) const
const AggregateFunctionPtr & /*nested_function*/, const DataTypes & /*arguments*/, const Array & /*params*/) const
{
return nullptr;
}

View File

@ -200,9 +200,6 @@ public:
}
private:
friend void qdigest_test(int normal_size, UInt64 value_limit, const std::vector<UInt64> & values, int queries_count, bool verbose);
friend void rs_perf_test();
/// We allocate a little memory on the stack - to avoid allocations when there are many objects with a small number of elements.
using Array = DB::PODArrayWithStackMemory<T, 64>;

View File

@ -5,7 +5,6 @@ PEERDIR(
clickhouse/src/Common
)
CFLAGS(-g0)
SRCS(
AggregateFunctionAggThrow.cpp

View File

@ -4,7 +4,6 @@ PEERDIR(
clickhouse/src/Common
)
CFLAGS(-g0)
SRCS(
<? find . -name '*.cpp' | grep -v -F tests | grep -v -F GroupBitmap | sed 's/^\.\// /' | sort ?>

View File

@ -6,7 +6,6 @@ PEERDIR(
contrib/libs/poco/NetSSL_OpenSSL
)
CFLAGS(-g0)
SRCS(
Connection.cpp

View File

@ -5,7 +5,6 @@ PEERDIR(
contrib/libs/poco/NetSSL_OpenSSL
)
CFLAGS(-g0)
SRCS(
<? find . -name '*.cpp' | grep -v -F tests | sed 's/^\.\// /' | sort ?>

View File

@ -13,7 +13,6 @@ PEERDIR(
contrib/libs/pdqsort
)
CFLAGS(-g0)
SRCS(
Collator.cpp

View File

@ -515,15 +515,16 @@
M(546, NO_ROW_DELIMITER) \
M(547, INVALID_RAID_TYPE) \
M(548, UNKNOWN_VOLUME) \
M(549, DATA_TYPE_CANNOT_BE_USED_IN_KEY) \
M(550, CONDITIONAL_TREE_PARENT_NOT_FOUND) \
M(551, ILLEGAL_PROJECTION_MANIPULATOR) \
M(552, UNRECOGNIZED_ARGUMENTS) \
\
M(999, KEEPER_EXCEPTION) \
M(1000, POCO_EXCEPTION) \
M(1001, STD_EXCEPTION) \
M(1002, UNKNOWN_EXCEPTION) \
\
M(2001, CONDITIONAL_TREE_PARENT_NOT_FOUND) \
M(2002, ILLEGAL_PROJECTION_MANIPULATOR) \
M(2003, UNRECOGNIZED_ARGUMENTS)
/* See END */
namespace DB
@ -536,11 +537,11 @@ namespace ErrorCodes
#undef M
constexpr Value END = 3000;
std::atomic<Value> values[END+1] {};
std::atomic<Value> values[END + 1] {};
struct ErrorCodesNames
{
std::string_view names[END+1];
std::string_view names[END + 1];
ErrorCodesNames()
{
#define M(VALUE, NAME) names[VALUE] = std::string_view(#NAME);

View File

@ -4,6 +4,7 @@
#include <math.h>
#include <new>
#include <utility>
#include <boost/noncopyable.hpp>
@ -314,8 +315,8 @@ public:
zeroValue()->~Cell();
}
Cell * zeroValue() { return reinterpret_cast<Cell*>(&zero_value_storage); }
const Cell * zeroValue() const { return reinterpret_cast<const Cell*>(&zero_value_storage); }
Cell * zeroValue() { return std::launder(reinterpret_cast<Cell*>(&zero_value_storage)); }
const Cell * zeroValue() const { return std::launder(reinterpret_cast<const Cell*>(&zero_value_storage)); }
};
template <typename Cell>

View File

@ -3,8 +3,10 @@
#include <Common/HashTable/HashMap.h>
#include <Common/HashTable/HashTable.h>
#include <new>
#include <variant>
using StringKey8 = UInt64;
using StringKey16 = DB::UInt128;
struct StringKey24
@ -106,8 +108,8 @@ public:
zeroValue()->~Cell();
}
Cell * zeroValue() { return reinterpret_cast<Cell *>(&zero_value_storage); }
const Cell * zeroValue() const { return reinterpret_cast<const Cell *>(&zero_value_storage); }
Cell * zeroValue() { return std::launder(reinterpret_cast<Cell *>(&zero_value_storage)); }
const Cell * zeroValue() const { return std::launder(reinterpret_cast<const Cell *>(&zero_value_storage)); }
using LookupResult = Cell *;
using ConstLookupResult = const Cell *;

View File

@ -75,13 +75,6 @@ inline TUInt safeDiff(TUInt prev, TUInt curr)
}
inline UInt64 getCurrentTimeNanoseconds(clockid_t clock_type = CLOCK_MONOTONIC)
{
struct timespec ts;
clock_gettime(clock_type, &ts);
return ts.tv_sec * 1000000000ULL + ts.tv_nsec;
}
struct RUsageCounters
{
/// In nanoseconds
@ -108,13 +101,13 @@ struct RUsageCounters
hard_page_faults = static_cast<UInt64>(rusage.ru_majflt);
}
static RUsageCounters current(UInt64 real_time_ = getCurrentTimeNanoseconds())
static RUsageCounters current()
{
::rusage rusage {};
#if !defined(__APPLE__)
::getrusage(RUSAGE_THREAD, &rusage);
#endif
return RUsageCounters(rusage, real_time_);
return RUsageCounters(rusage, getClockMonotonic());
}
static void incrementProfileEvents(const RUsageCounters & prev, const RUsageCounters & curr, ProfileEvents::Counters & profile_events)
@ -133,6 +126,14 @@ struct RUsageCounters
incrementProfileEvents(last_counters, current_counters, profile_events);
last_counters = current_counters;
}
private:
static inline UInt64 getClockMonotonic()
{
struct timespec ts;
clock_gettime(CLOCK_MONOTONIC, &ts);
return ts.tv_sec * 1000000000ULL + ts.tv_nsec;
}
};
// thread_local is disabled in Arcadia, so we have to use a dummy implementation

View File

@ -172,7 +172,7 @@ protected:
void finalizeQueryProfiler();
void logToQueryThreadLog(QueryThreadLog & thread_log);
void logToQueryThreadLog(QueryThreadLog & thread_log, const String & current_database);
void assertState(const std::initializer_list<int> & permitted_states, const char * description = nullptr) const;

View File

@ -21,7 +21,6 @@ PEERDIR(
INCLUDE(${ARCADIA_ROOT}/clickhouse/cmake/yandex/ya.make.versions.inc)
CFLAGS(-g0)
SRCS(
ActionLock.cpp

View File

@ -20,7 +20,6 @@ PEERDIR(
INCLUDE(${ARCADIA_ROOT}/clickhouse/cmake/yandex/ya.make.versions.inc)
CFLAGS(-g0)
SRCS(
<? find . -name '*.cpp' | grep -v -F tests | sed 's/^\.\// /' | sort ?>

View File

@ -12,7 +12,6 @@ PEERDIR(
contrib/libs/zstd
)
CFLAGS(-g0)
SRCS(
CachedCompressedReadBuffer.cpp

View File

@ -11,7 +11,6 @@ PEERDIR(
contrib/libs/zstd
)
CFLAGS(-g0)
SRCS(
<? find . -name '*.cpp' | grep -v -F tests | sed 's/^\.\// /' | sort ?>

View File

@ -767,9 +767,10 @@ T & Field::get()
#ifndef NDEBUG
// Disregard signedness when converting between int64 types.
constexpr Field::Types::Which target = TypeToEnum<NearestFieldType<ValueType>>::value;
assert(target == which
|| (isInt64FieldType(target) && isInt64FieldType(which))
|| target == Field::Types::Decimal64 /* DateTime64 fields */);
if (target != which
&& (!isInt64FieldType(target) || !isInt64FieldType(which))
&& target != Field::Types::Decimal64 /* DateTime64 fields */)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid Field get from type {} to type {}", Types::toString(which), Types::toString(target));
#endif
ValueType * MAY_ALIAS ptr = reinterpret_cast<ValueType *>(&storage);

View File

@ -85,6 +85,9 @@ void GTIDSets::update(const GTID & other)
ErrorCodes::LOGICAL_ERROR);
}
/// Try to shirnk Sequence interval.
GTIDSet::tryShirnk(set, i, current);
/// Sequence, extend the interval.
if (other.seq_no == current.end)
{
@ -116,6 +119,16 @@ void GTIDSets::update(const GTID & other)
sets.emplace_back(set);
}
void GTIDSet::tryShirnk(GTIDSet & set, unsigned int i, GTIDSet::Interval & current)
{
if (i != set.intervals.size() -1)
{
auto & next = set.intervals[i+1];
if (current.end == next.start)
set.tryMerge(i);
}
}
String GTIDSets::toString() const
{
WriteBufferFromOwnString buffer;

View File

@ -26,6 +26,8 @@ public:
std::vector<Interval> intervals;
void tryMerge(size_t i);
static void tryShirnk(GTIDSet & set, unsigned int i, Interval & current);
};
class GTIDSets

View File

@ -126,6 +126,7 @@ class IColumn;
M(UInt64, merge_tree_coarse_index_granularity, 8, "If the index segment can contain the required keys, divide it into as many parts and recursively check them.", 0) \
M(UInt64, merge_tree_max_rows_to_use_cache, (128 * 8192), "The maximum number of rows per request, to use the cache of uncompressed data. If the request is large, the cache is not used. (For large queries not to flush out the cache.)", 0) \
M(UInt64, merge_tree_max_bytes_to_use_cache, (192 * 10 * 1024 * 1024), "The maximum number of bytes per request, to use the cache of uncompressed data. If the request is large, the cache is not used. (For large queries not to flush out the cache.)", 0) \
M(Bool, do_not_merge_across_partitions_select_final, false, "Merge parts only in one partition in select final", 0) \
\
M(UInt64, mysql_max_rows_to_insert, 65536, "The maximum number of rows in MySQL batch insertion of the MySQL storage engine", 0) \
\
@ -169,6 +170,8 @@ class IColumn;
M(Milliseconds, read_backoff_min_interval_between_events_ms, 1000, "Settings to reduce the number of threads in case of slow reads. Do not pay attention to the event, if the previous one has passed less than a certain amount of time.", 0) \
M(UInt64, read_backoff_min_events, 2, "Settings to reduce the number of threads in case of slow reads. The number of events after which the number of threads will be reduced.", 0) \
\
M(UInt64, read_backoff_min_concurrency, 1, "Settings to try keeping the minimal number of threads in case of slow reads.", 0) \
\
M(Float, memory_tracker_fault_probability, 0., "For testing of `exception safety` - throw an exception every time you allocate memory with the specified probability.", 0) \
\
M(Bool, enable_http_compression, 0, "Compress the result if the client over HTTP said that it understands data compressed by gzip or deflate.", 0) \
@ -329,7 +332,6 @@ class IColumn;
M(Bool, calculate_text_stack_trace, 1, "Calculate text stack trace in case of exceptions during query execution. This is the default. It requires symbol lookups that may slow down fuzzing tests when huge amount of wrong queries are executed. In normal cases you should not disable this option.", 0) \
M(Bool, allow_ddl, true, "If it is set to true, then a user is allowed to executed DDL queries.", 0) \
M(Bool, parallel_view_processing, false, "Enables pushing to attached views concurrently instead of sequentially.", 0) \
M(Bool, enable_debug_queries, false, "Enables debug queries such as AST.", 0) \
M(Bool, enable_unaligned_array_join, false, "Allow ARRAY JOIN with multiple arrays that have different sizes. When this settings is enabled, arrays will be resized to the longest one.", 0) \
M(Bool, optimize_read_in_order, true, "Enable ORDER BY optimization for reading data in corresponding order in MergeTree tables.", 0) \
M(Bool, optimize_aggregation_in_order, false, "Enable GROUP BY optimization for aggregating data in corresponding order in MergeTree tables.", 0) \
@ -403,7 +405,8 @@ class IColumn;
M(Bool, optimize_trivial_insert_select, true, "Optimize trivial 'INSERT INTO table SELECT ... FROM TABLES' query", 0) \
M(Bool, allow_experimental_database_atomic, true, "Obsolete setting, does nothing. Will be removed after 2021-02-12", 0) \
M(Bool, allow_non_metadata_alters, true, "Allow to execute alters which affects not only tables metadata, but also data on disk", 0) \
M(Bool, enable_global_with_statement, false, "Propagate WITH statements to UNION queries and all subqueries", 0)
M(Bool, enable_global_with_statement, false, "Propagate WITH statements to UNION queries and all subqueries", 0) \
M(Bool, enable_debug_queries, false, "Enabled debug queries, but now is obsolete", 0)
// End of COMMON_SETTINGS
// Please add settings related to formats into the FORMAT_FACTORY_SETTINGS below.

View File

@ -260,6 +260,17 @@ int main(int argc, char ** argv)
"10662d71-9d91-11ea-bbc2-0242ac110003:6-7",
"20662d71-9d91-11ea-bbc2-0242ac110003:9",
"10662d71-9d91-11ea-bbc2-0242ac110003:6-7,20662d71-9d91-11ea-bbc2-0242ac110003:9"},
{"shirnk-sequence",
"10662d71-9d91-11ea-bbc2-0242ac110003:1-3:4-5:7",
"10662d71-9d91-11ea-bbc2-0242ac110003:6",
"10662d71-9d91-11ea-bbc2-0242ac110003:1-7"},
{"shirnk-sequence",
"10662d71-9d91-11ea-bbc2-0242ac110003:1-3:4-5:10",
"10662d71-9d91-11ea-bbc2-0242ac110003:8",
"10662d71-9d91-11ea-bbc2-0242ac110003:1-5:8:10"
}
};
for (auto & tc : cases)

View File

@ -7,7 +7,6 @@ PEERDIR(
contrib/restricted/boost/libs
)
CFLAGS(-g0)
SRCS(
BackgroundSchedulePool.cpp

View File

@ -6,7 +6,6 @@ PEERDIR(
contrib/restricted/boost/libs
)
CFLAGS(-g0)
SRCS(
<? find . -name '*.cpp' | grep -v -F tests | sed 's/^\.\// /' | sort ?>

View File

@ -8,7 +8,6 @@ PEERDIR(
NO_COMPILER_WARNINGS()
CFLAGS(-g0)
SRCS(
AddingDefaultBlockOutputStream.cpp

View File

@ -7,7 +7,6 @@ PEERDIR(
NO_COMPILER_WARNINGS()
CFLAGS(-g0)
SRCS(
<? find . -name '*.cpp' | grep -v -F tests | sed 's/^\.\// /' | sort ?>

View File

@ -6,7 +6,6 @@ PEERDIR(
clickhouse/src/Formats
)
CFLAGS(-g0)
SRCS(
convertMySQLDataType.cpp

View File

@ -5,7 +5,6 @@ PEERDIR(
clickhouse/src/Formats
)
CFLAGS(-g0)
SRCS(
<? find . -name '*.cpp' | grep -v -F tests | sed 's/^\.\// /' | sort ?>

View File

@ -114,7 +114,8 @@ void DatabaseAtomic::dropTable(const Context &, const String & table_name, bool
DatabaseWithDictionaries::detachTableUnlocked(table_name, lock); /// Should never throw
table_name_to_path.erase(table_name);
}
tryRemoveSymlink(table_name);
if (table->storesDataOnDisk())
tryRemoveSymlink(table_name);
/// Remove the inner table (if any) to avoid deadlock
/// (due to attempt to execute DROP from the worker thread)
if (auto * mv = dynamic_cast<StorageMaterializedView *>(table.get()))
@ -145,7 +146,7 @@ void DatabaseAtomic::renameTable(const Context & context, const String & table_n
String old_metadata_path = getObjectMetadataPath(table_name);
String new_metadata_path = to_database.getObjectMetadataPath(to_table_name);
auto detach = [](DatabaseAtomic & db, const String & table_name_)
auto detach = [](DatabaseAtomic & db, const String & table_name_, bool has_symlink)
{
auto it = db.table_name_to_path.find(table_name_);
String table_data_path_saved;
@ -155,7 +156,7 @@ void DatabaseAtomic::renameTable(const Context & context, const String & table_n
assert(!table_data_path_saved.empty() || db.dictionaries.find(table_name_) != db.dictionaries.end());
db.tables.erase(table_name_);
db.table_name_to_path.erase(table_name_);
if (!table_data_path_saved.empty())
if (has_symlink)
db.tryRemoveSymlink(table_name_);
return table_data_path_saved;
};
@ -166,7 +167,8 @@ void DatabaseAtomic::renameTable(const Context & context, const String & table_n
if (table_data_path_.empty())
return;
db.table_name_to_path.emplace(table_name_, table_data_path_);
db.tryCreateSymlink(table_name_, table_data_path_);
if (table_->storesDataOnDisk())
db.tryCreateSymlink(table_name_, table_data_path_);
};
auto assert_can_move_mat_view = [inside_database](const StoragePtr & table_)
@ -228,9 +230,9 @@ void DatabaseAtomic::renameTable(const Context & context, const String & table_n
renameNoReplace(old_metadata_path, new_metadata_path);
/// After metadata was successfully moved, the following methods should not throw (if them do, it's a logical error)
table_data_path = detach(*this, table_name);
table_data_path = detach(*this, table_name, table->storesDataOnDisk());
if (exchange)
other_table_data_path = detach(other_db, to_table_name);
other_table_data_path = detach(other_db, to_table_name, other_table->storesDataOnDisk());
auto old_table_id = table->getStorageID();
@ -286,7 +288,8 @@ void DatabaseAtomic::commitCreateTable(const ASTCreateQuery & query, const Stora
DatabaseCatalog::instance().removeUUIDMappingFinally(query.uuid);
throw;
}
tryCreateSymlink(query.table, table_data_path);
if (table->storesDataOnDisk())
tryCreateSymlink(query.table, table_data_path);
}
void DatabaseAtomic::commitAlterTable(const StorageID & table_id, const String & table_metadata_tmp_path, const String & table_metadata_path)
@ -383,17 +386,18 @@ void DatabaseAtomic::loadStoredObjects(Context & context, bool has_force_restore
Poco::File(path_to_table_symlinks).createDirectories();
for (const auto & table : table_names)
tryCreateSymlink(table.first, table.second);
tryCreateSymlink(table.first, table.second, true);
}
}
void DatabaseAtomic::tryCreateSymlink(const String & table_name, const String & actual_data_path)
void DatabaseAtomic::tryCreateSymlink(const String & table_name, const String & actual_data_path, bool if_data_path_exist)
{
try
{
String link = path_to_table_symlinks + escapeForFileName(table_name);
String data = Poco::Path(global_context.getPath()).makeAbsolute().toString() + actual_data_path;
Poco::File{data}.linkTo(link, Poco::File::LINK_SYMBOLIC);
Poco::File data = Poco::Path(global_context.getPath()).makeAbsolute().toString() + actual_data_path;
if (!if_data_path_exist || data.exists())
data.linkTo(link, Poco::File::LINK_SYMBOLIC);
}
catch (...)
{

View File

@ -55,7 +55,7 @@ public:
UUID tryGetTableUUID(const String & table_name) const override;
void tryCreateSymlink(const String & table_name, const String & actual_data_path);
void tryCreateSymlink(const String & table_name, const String & actual_data_path, bool if_data_path_exist = false);
void tryRemoveSymlink(const String & table_name);
void waitDetachedTableNotInUse(const UUID & uuid);

View File

@ -321,7 +321,7 @@ void DatabaseOnDisk::renameTable(
/// Special case: usually no actions with symlinks are required when detaching/attaching table,
/// but not when moving from Atomic database to Ordinary
if (from_atomic_to_ordinary)
if (from_atomic_to_ordinary && table->storesDataOnDisk())
{
auto & atomic_db = assert_cast<DatabaseAtomic &>(*this);
atomic_db.tryRemoveSymlink(table_name);

View File

@ -5,7 +5,6 @@ PEERDIR(
clickhouse/src/Common
)
CFLAGS(-g0)
SRCS(
DatabaseAtomic.cpp

View File

@ -4,7 +4,6 @@ PEERDIR(
clickhouse/src/Common
)
CFLAGS(-g0)
SRCS(
<? find . -name '*.cpp' | sed 's/^\.\// /' | sort ?>

View File

@ -1467,7 +1467,6 @@ void SSDComplexKeyCacheDictionary::getItemsNumberImpl(
{
assert(dict_struct.key);
assert(key_columns.size() == key_types.size());
assert(key_columns.size() == dict_struct.key->size());
dict_struct.validateKeyTypes(key_types);

View File

@ -12,7 +12,6 @@ PEERDIR(
NO_COMPILER_WARNINGS()
CFLAGS(-g0)
SRCS(
CacheDictionary.cpp

View File

@ -11,7 +11,6 @@ PEERDIR(
NO_COMPILER_WARNINGS()
CFLAGS(-g0)
SRCS(
<? find . -name '*.cpp' | grep -v -F tests | grep -v -F Trie | sed 's/^\.\// /' | sort ?>

View File

@ -4,7 +4,6 @@ PEERDIR(
clickhouse/src/Common
)
CFLAGS(-g0)
SRCS(
DiskS3.cpp

View File

@ -5,7 +5,6 @@ PEERDIR(
clickhouse/src/Common
)
CFLAGS(-g0)
SRCS(
createVolume.cpp

View File

@ -4,7 +4,6 @@ PEERDIR(
clickhouse/src/Common
)
CFLAGS(-g0)
SRCS(
<? find . -name '*.cpp' | grep -v -F tests | grep -v -F S3 | sed 's/^\.\// /' | sort ?>

View File

@ -7,7 +7,6 @@ PEERDIR(
contrib/libs/protoc
)
CFLAGS(-g0)
SRCS(
FormatFactory.cpp

View File

@ -6,7 +6,6 @@ PEERDIR(
contrib/libs/protoc
)
CFLAGS(-g0)
SRCS(
<? find . -name '*.cpp' | grep -v -F tests | sed 's/^\.\// /' | sort ?>

View File

@ -31,6 +31,7 @@ namespace ErrorCodes
{
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int ARGUMENT_OUT_OF_BOUND;
extern const int ILLEGAL_COLUMN;
extern const int BAD_ARGUMENTS;
}
@ -84,6 +85,9 @@ enum class TieBreakingMode
Bankers, // use banker's rounding
};
/// For N, no more than the number of digits in the largest type.
using Scale = Int16;
/** Rounding functions for integer values.
*/
@ -416,7 +420,7 @@ private:
using Container = typename ColumnDecimal<T>::Container;
public:
static NO_INLINE void apply(const Container & in, Container & out, Int64 scale_arg)
static NO_INLINE void apply(const Container & in, Container & out, Scale scale_arg)
{
scale_arg = in.getScale() - scale_arg;
if (scale_arg > 0)
@ -458,7 +462,7 @@ class Dispatcher
FloatRoundingImpl<T, rounding_mode, scale_mode>,
IntegerRoundingImpl<T, rounding_mode, scale_mode, tie_breaking_mode>>;
static ColumnPtr apply(const ColumnVector<T> * col, Int64 scale_arg)
static ColumnPtr apply(const ColumnVector<T> * col, Scale scale_arg)
{
auto col_res = ColumnVector<T>::create();
@ -487,7 +491,7 @@ class Dispatcher
return col_res;
}
static ColumnPtr apply(const ColumnDecimal<T> * col, Int64 scale_arg)
static ColumnPtr apply(const ColumnDecimal<T> * col, Scale scale_arg)
{
const typename ColumnDecimal<T>::Container & vec_src = col->getData();
@ -501,7 +505,7 @@ class Dispatcher
}
public:
static ColumnPtr apply(const IColumn * column, Int64 scale_arg)
static ColumnPtr apply(const IColumn * column, Scale scale_arg)
{
if constexpr (IsNumber<T>)
return apply(checkAndGetColumn<ColumnVector<T>>(column), scale_arg);
@ -544,20 +548,25 @@ public:
return arguments[0];
}
static Int64 getScaleArg(ColumnsWithTypeAndName & arguments)
static Scale getScaleArg(ColumnsWithTypeAndName & arguments)
{
if (arguments.size() == 2)
{
const IColumn & scale_column = *arguments[1].column;
if (!isColumnConst(scale_column))
throw Exception("Scale argument for rounding functions must be constant.", ErrorCodes::ILLEGAL_COLUMN);
throw Exception("Scale argument for rounding functions must be constant", ErrorCodes::ILLEGAL_COLUMN);
Field scale_field = assert_cast<const ColumnConst &>(scale_column).getField();
if (scale_field.getType() != Field::Types::UInt64
&& scale_field.getType() != Field::Types::Int64)
throw Exception("Scale argument for rounding functions must have integer type.", ErrorCodes::ILLEGAL_COLUMN);
throw Exception("Scale argument for rounding functions must have integer type", ErrorCodes::ILLEGAL_COLUMN);
return scale_field.get<Int64>();
Int64 scale64 = scale_field.get<Int64>();
if (scale64 > std::numeric_limits<Scale>::max()
|| scale64 < std::numeric_limits<Scale>::min())
throw Exception("Scale argument for rounding function is too large", ErrorCodes::ARGUMENT_OUT_OF_BOUND);
return scale64;
}
return 0;
}
@ -568,7 +577,7 @@ public:
ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override
{
const ColumnWithTypeAndName & column = arguments[0];
Int64 scale_arg = getScaleArg(arguments);
Scale scale_arg = getScaleArg(arguments);
ColumnPtr res;
auto call = [&](const auto & types) -> bool

View File

@ -32,7 +32,6 @@ PEERDIR(
)
# "Arcadia" build is slightly deficient. It lacks many libraries that we need.
CFLAGS(-g0)
SRCS(
abs.cpp

View File

@ -31,7 +31,6 @@ PEERDIR(
)
# "Arcadia" build is slightly deficient. It lacks many libraries that we need.
CFLAGS(-g0)
SRCS(
<? find . -name '*.cpp' | grep -i -v -P 'tests|Bitmap|sumbur|abtesting' | sed 's/^\.\// /' | sort ?>

View File

@ -480,7 +480,7 @@ void readEscapedString(String & s, ReadBuffer & buf)
}
template void readEscapedStringInto<PaddedPODArray<UInt8>>(PaddedPODArray<UInt8> & s, ReadBuffer & buf);
template void readEscapedStringInto<NullSink>(NullSink & s, ReadBuffer & buf);
template void readEscapedStringInto<NullOutput>(NullOutput & s, ReadBuffer & buf);
/** If enable_sql_style_quoting == true,
@ -562,7 +562,7 @@ void readQuotedStringWithSQLStyle(String & s, ReadBuffer & buf)
template void readQuotedStringInto<true>(PaddedPODArray<UInt8> & s, ReadBuffer & buf);
template void readDoubleQuotedStringInto<false>(NullSink & s, ReadBuffer & buf);
template void readDoubleQuotedStringInto<false>(NullOutput & s, ReadBuffer & buf);
void readDoubleQuotedString(String & s, ReadBuffer & buf)
{
@ -742,7 +742,7 @@ void readJSONString(String & s, ReadBuffer & buf)
template void readJSONStringInto<PaddedPODArray<UInt8>, void>(PaddedPODArray<UInt8> & s, ReadBuffer & buf);
template bool readJSONStringInto<PaddedPODArray<UInt8>, bool>(PaddedPODArray<UInt8> & s, ReadBuffer & buf);
template void readJSONStringInto<NullSink>(NullSink & s, ReadBuffer & buf);
template void readJSONStringInto<NullOutput>(NullOutput & s, ReadBuffer & buf);
template void readJSONStringInto<String>(String & s, ReadBuffer & buf);
@ -891,7 +891,7 @@ void skipJSONField(ReadBuffer & buf, const StringRef & name_of_field)
throw Exception("Unexpected EOF for key '" + name_of_field.toString() + "'", ErrorCodes::INCORRECT_DATA);
else if (*buf.position() == '"') /// skip double-quoted string
{
NullSink sink;
NullOutput sink;
readJSONStringInto(sink, buf);
}
else if (isNumericASCII(*buf.position()) || *buf.position() == '-' || *buf.position() == '+' || *buf.position() == '.') /// skip number
@ -955,7 +955,7 @@ void skipJSONField(ReadBuffer & buf, const StringRef & name_of_field)
// field name
if (*buf.position() == '"')
{
NullSink sink;
NullOutput sink;
readJSONStringInto(sink, buf);
}
else

View File

@ -527,7 +527,7 @@ bool tryReadJSONStringInto(Vector & s, ReadBuffer & buf)
}
/// This could be used as template parameter for functions above, if you want to just skip data.
struct NullSink
struct NullOutput
{
void append(const char *, size_t) {}
void push_back(char) {}

View File

@ -106,6 +106,7 @@ namespace detail
std::vector<Poco::Net::HTTPCookie> cookies;
HTTPHeaderEntries http_header_entries;
RemoteHostFilter remote_host_filter;
std::function<void(size_t)> next_callback;
std::istream * call(const Poco::URI uri_, Poco::Net::HTTPResponse & response)
{
@ -154,6 +155,7 @@ namespace detail
}
public:
using NextCallback = std::function<void(size_t)>;
using OutStreamCallback = std::function<void(std::ostream &)>;
explicit ReadWriteBufferFromHTTPBase(
@ -185,7 +187,7 @@ namespace detail
session->updateSession(uri_redirect);
istr = call(uri_redirect,response);
istr = call(uri_redirect, response);
}
try
@ -204,6 +206,8 @@ namespace detail
bool nextImpl() override
{
if (next_callback)
next_callback(count());
if (!impl->next())
return false;
internal_buffer = impl->buffer();
@ -218,6 +222,17 @@ namespace detail
return cookie.getValue();
return def;
}
/// Set function to call on each nextImpl, useful when you need to track
/// progress.
/// NOTE: parameter on each call is not incremental -- it's all bytes count
/// passed through the buffer
void setNextCallback(NextCallback next_callback_)
{
next_callback = next_callback_;
/// Some data maybe already read
next_callback(count());
}
};
}
@ -226,7 +241,8 @@ class UpdatableSession : public UpdatableSessionBase<HTTPSessionPtr>
using Parent = UpdatableSessionBase<HTTPSessionPtr>;
public:
explicit UpdatableSession(const Poco::URI uri,
explicit UpdatableSession(
const Poco::URI uri,
const ConnectionTimeouts & timeouts_,
const UInt64 max_redirects_)
: Parent(uri, timeouts_, max_redirects_)
@ -245,7 +261,8 @@ class ReadWriteBufferFromHTTP : public detail::ReadWriteBufferFromHTTPBase<std::
using Parent = detail::ReadWriteBufferFromHTTPBase<std::shared_ptr<UpdatableSession>>;
public:
explicit ReadWriteBufferFromHTTP(Poco::URI uri_,
explicit ReadWriteBufferFromHTTP(
Poco::URI uri_,
const std::string & method_,
OutStreamCallback out_stream_callback_,
const ConnectionTimeouts & timeouts,
@ -254,7 +271,8 @@ public:
size_t buffer_size_ = DBMS_DEFAULT_BUFFER_SIZE,
const HTTPHeaderEntries & http_header_entries_ = {},
const RemoteHostFilter & remote_host_filter_ = {})
: Parent(std::make_shared<UpdatableSession>(uri_, timeouts, max_redirects), uri_, method_, out_stream_callback_, credentials_, buffer_size_, http_header_entries_, remote_host_filter_)
: Parent(std::make_shared<UpdatableSession>(uri_, timeouts, max_redirects),
uri_, method_, out_stream_callback_, credentials_, buffer_size_, http_header_entries_, remote_host_filter_)
{
}
};

View File

@ -533,6 +533,10 @@ ReturnType parseDateTimeBestEffortImpl(
}
}
/// If neither Date nor Time is parsed successfully, it should fail
if (!year && !month && !day_of_month && !has_time)
return on_error("Cannot read DateTime: neither Date nor Time was parsed successfully", ErrorCodes::CANNOT_PARSE_DATETIME);
if (!year)
year = 2000;
if (!month)

View File

@ -8,7 +8,6 @@ PEERDIR(
contrib/libs/poco/NetSSL_OpenSSL
)
CFLAGS(-g0)
SRCS(
AIOContextPool.cpp

View File

@ -7,7 +7,6 @@ PEERDIR(
contrib/libs/poco/NetSSL_OpenSSL
)
CFLAGS(-g0)
SRCS(
<? find . -name '*.cpp' | grep -v -F tests | grep -v -P 'S3|HDFS' | sed 's/^\.\// /' | sort ?>

View File

@ -207,8 +207,22 @@ void AsynchronousMetrics::update()
/// We must update the value of total_memory_tracker periodically.
/// Otherwise it might be calculated incorrectly - it can include a "drift" of memory amount.
/// See https://github.com/ClickHouse/ClickHouse/issues/10293
total_memory_tracker.set(data.resident);
CurrentMetrics::set(CurrentMetrics::MemoryTracking, data.resident);
{
Int64 amount = total_memory_tracker.get();
Int64 peak = total_memory_tracker.getPeak();
Int64 new_peak = data.resident;
LOG_DEBUG(&Poco::Logger::get("AsynchronousMetrics"),
"MemoryTracking: was {}, peak {}, will set to {} (RSS), difference: {}",
ReadableSize(amount),
ReadableSize(peak),
ReadableSize(new_peak),
ReadableSize(new_peak - peak)
);
total_memory_tracker.set(new_peak);
CurrentMetrics::set(CurrentMetrics::MemoryTracking, new_peak);
}
}
#endif

View File

@ -20,6 +20,8 @@
#include <Storages/MarkCache.h>
#include <Storages/MergeTree/BackgroundProcessingPool.h>
#include <Storages/MergeTree/MergeList.h>
#include <Storages/MergeTree/ReplicatedFetchList.h>
#include <Storages/MergeTree/MergeTreeData.h>
#include <Storages/MergeTree/MergeTreeSettings.h>
#include <Storages/CompressionCodecSelector.h>
#include <Storages/StorageS3Settings.h>
@ -328,6 +330,7 @@ struct ContextShared
mutable MarkCachePtr mark_cache; /// Cache of marks in compressed files.
ProcessList process_list; /// Executing queries at the moment.
MergeList merge_list; /// The list of executable merge (for (Replicated)?MergeTree)
ReplicatedFetchList replicated_fetch_list;
ConfigurationPtr users_config; /// Config with the users, profiles and quotas sections.
InterserverIOHandler interserver_io_handler; /// Handler for interserver communication.
std::optional<BackgroundSchedulePool> buffer_flush_schedule_pool; /// A thread pool that can do background flush for Buffer tables.
@ -505,6 +508,8 @@ ProcessList & Context::getProcessList() { return shared->process_list; }
const ProcessList & Context::getProcessList() const { return shared->process_list; }
MergeList & Context::getMergeList() { return shared->merge_list; }
const MergeList & Context::getMergeList() const { return shared->merge_list; }
ReplicatedFetchList & Context::getReplicatedFetchList() { return shared->replicated_fetch_list; }
const ReplicatedFetchList & Context::getReplicatedFetchList() const { return shared->replicated_fetch_list; }
void Context::enableNamedSessions()

View File

@ -65,6 +65,7 @@ class InterserverIOHandler;
class BackgroundProcessingPool;
class BackgroundSchedulePool;
class MergeList;
class ReplicatedFetchList;
class Cluster;
class Compiler;
class MarkCache;
@ -478,6 +479,9 @@ public:
MergeList & getMergeList();
const MergeList & getMergeList() const;
ReplicatedFetchList & getReplicatedFetchList();
const ReplicatedFetchList & getReplicatedFetchList() const;
/// If the current session is expired at the time of the call, synchronously creates and returns a new session with the startNewSession() call.
/// If no ZooKeeper configured, throws an exception.
std::shared_ptr<zkutil::ZooKeeper> getZooKeeper() const;

View File

@ -134,7 +134,10 @@ void DatabaseCatalog::loadDatabases()
loadMarkedAsDroppedTables();
auto task_holder = global_context->getSchedulePool().createTask("DatabaseCatalog", [this](){ this->dropTableDataTask(); });
drop_task = std::make_unique<BackgroundSchedulePoolTaskHolder>(std::move(task_holder));
(*drop_task)->activateAndSchedule();
(*drop_task)->activate();
std::lock_guard lock{tables_marked_dropped_mutex};
if (!tables_marked_dropped.empty())
(*drop_task)->schedule();
}
void DatabaseCatalog::shutdownImpl()
@ -760,14 +763,15 @@ void DatabaseCatalog::enqueueDroppedTableCleanup(StorageID table_id, StoragePtr
std::lock_guard lock(tables_marked_dropped_mutex);
if (ignore_delay)
tables_marked_dropped.push_front({table_id, table, dropped_metadata_path, 0});
tables_marked_dropped.push_front({table_id, table, dropped_metadata_path, drop_time});
else
tables_marked_dropped.push_back({table_id, table, dropped_metadata_path, drop_time});
tables_marked_dropped.push_back({table_id, table, dropped_metadata_path, drop_time + drop_delay_sec});
tables_marked_dropped_ids.insert(table_id.uuid);
CurrentMetrics::add(CurrentMetrics::TablesToDropQueueSize, 1);
/// If list of dropped tables was empty, start a drop task
if (drop_task && tables_marked_dropped.size() == 1)
/// If list of dropped tables was empty, start a drop task.
/// If ignore_delay is set, schedule drop task as soon as possible.
if (drop_task && (tables_marked_dropped.size() == 1 || ignore_delay))
(*drop_task)->schedule();
}
@ -777,26 +781,40 @@ void DatabaseCatalog::dropTableDataTask()
/// Table can be removed when it's not used by queries and drop_delay_sec elapsed since it was marked as dropped.
bool need_reschedule = true;
/// Default reschedule time for the case when we are waiting for reference count to become 1.
size_t schedule_after_ms = reschedule_time_ms;
TableMarkedAsDropped table;
try
{
std::lock_guard lock(tables_marked_dropped_mutex);
assert(!tables_marked_dropped.empty());
time_t current_time = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now());
time_t min_drop_time = std::numeric_limits<time_t>::max();
size_t tables_in_use_count = 0;
auto it = std::find_if(tables_marked_dropped.begin(), tables_marked_dropped.end(), [&](const auto & elem)
{
bool not_in_use = !elem.table || elem.table.unique();
bool old_enough = elem.drop_time + drop_delay_sec < current_time;
bool old_enough = elem.drop_time <= current_time;
min_drop_time = std::min(min_drop_time, elem.drop_time);
tables_in_use_count += !not_in_use;
return not_in_use && old_enough;
});
if (it != tables_marked_dropped.end())
{
table = std::move(*it);
LOG_INFO(log, "Will try drop {}", table.table_id.getNameForLogs());
LOG_INFO(log, "Have {} tables in drop queue ({} of them are in use), will try drop {}",
tables_marked_dropped.size(), tables_in_use_count, table.table_id.getNameForLogs());
tables_marked_dropped.erase(it);
/// Schedule the task as soon as possible, while there are suitable tables to drop.
schedule_after_ms = 0;
}
else
else if (current_time < min_drop_time)
{
LOG_TRACE(log, "Not found any suitable tables to drop, still have {} tables in drop queue", tables_marked_dropped.size());
/// We are waiting for drop_delay_sec to exceed, no sense to wakeup until min_drop_time.
/// If new table is added to the queue with ignore_delay flag, schedule() is called to wakeup the task earlier.
schedule_after_ms = (min_drop_time - current_time) * 1000;
LOG_TRACE(log, "Not found any suitable tables to drop, still have {} tables in drop queue ({} of them are in use). "
"Will check again after {} seconds", tables_marked_dropped.size(), tables_in_use_count, min_drop_time - current_time);
}
need_reschedule = !tables_marked_dropped.empty();
}
@ -820,11 +838,15 @@ void DatabaseCatalog::dropTableDataTask()
tryLogCurrentException(log, "Cannot drop table " + table.table_id.getNameForLogs() +
". Will retry later.");
{
table.drop_time = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now()) + drop_error_cooldown_sec;
std::lock_guard lock(tables_marked_dropped_mutex);
tables_marked_dropped.emplace_back(std::move(table));
/// If list of dropped tables was empty, schedule a task to retry deletion.
if (tables_marked_dropped.size() == 1)
{
need_reschedule = true;
schedule_after_ms = drop_error_cooldown_sec * 1000;
}
}
}
@ -833,7 +855,7 @@ void DatabaseCatalog::dropTableDataTask()
/// Do not schedule a task if there is no tables to drop
if (need_reschedule)
(*drop_task)->scheduleAfter(reschedule_time_ms);
(*drop_task)->scheduleAfter(schedule_after_ms);
}
void DatabaseCatalog::dropTableFinally(const TableMarkedAsDropped & table)

View File

@ -234,6 +234,7 @@ private:
void dropTableFinally(const TableMarkedAsDropped & table);
static constexpr size_t reschedule_time_ms = 100;
static constexpr time_t drop_error_cooldown_sec = 5;
private:
using UUIDToDatabaseMap = std::unordered_map<UUID, DatabasePtr>;

View File

@ -6,6 +6,7 @@
#include <Common/escapeForFileName.h>
#include <Common/typeid_cast.h>
#include <Common/Macros.h>
#include <Common/randomSeed.h>
#include <Core/Defines.h>
#include <Core/Settings.h>
@ -362,7 +363,7 @@ ColumnsDescription InterpreterCreateQuery::getColumnsDescription(
if (col_decl.type)
{
const auto & final_column_name = col_decl.name;
const auto tmp_column_name = final_column_name + "_tmp";
const auto tmp_column_name = final_column_name + "_tmp_alter" + toString(randomSeed());
const auto * data_type_ptr = column_names_and_types.back().type.get();
default_expr_list->children.emplace_back(

View File

@ -250,7 +250,7 @@ BlockIO InterpreterDropQuery::executeToDatabase(const ASTDropQuery & query)
{
if (query.no_delay)
{
for (const auto table_uuid : tables_to_wait)
for (const auto & table_uuid : tables_to_wait)
waitForTableToBeActuallyDroppedOrDetached(query, database, table_uuid);
}
throw;
@ -258,7 +258,7 @@ BlockIO InterpreterDropQuery::executeToDatabase(const ASTDropQuery & query)
if (query.no_delay)
{
for (const auto table_uuid : tables_to_wait)
for (const auto & table_uuid : tables_to_wait)
waitForTableToBeActuallyDroppedOrDetached(query, database, table_uuid);
}
return res;

Some files were not shown because too many files have changed in this diff Show More