mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-18 21:51:57 +00:00
Merge branch 'master' into parameters-rename-query
This commit is contained in:
commit
d8c5c7c503
3
.gitmodules
vendored
3
.gitmodules
vendored
@ -296,6 +296,9 @@
|
|||||||
[submodule "contrib/libdivide"]
|
[submodule "contrib/libdivide"]
|
||||||
path = contrib/libdivide
|
path = contrib/libdivide
|
||||||
url = https://github.com/ridiculousfish/libdivide
|
url = https://github.com/ridiculousfish/libdivide
|
||||||
|
[submodule "contrib/ulid-c"]
|
||||||
|
path = contrib/ulid-c
|
||||||
|
url = https://github.com/ClickHouse/ulid-c.git
|
||||||
[submodule "contrib/aws-crt-cpp"]
|
[submodule "contrib/aws-crt-cpp"]
|
||||||
path = contrib/aws-crt-cpp
|
path = contrib/aws-crt-cpp
|
||||||
url = https://github.com/ClickHouse/aws-crt-cpp
|
url = https://github.com/ClickHouse/aws-crt-cpp
|
||||||
|
2
contrib/CMakeLists.txt
vendored
2
contrib/CMakeLists.txt
vendored
@ -191,6 +191,8 @@ add_contrib (xxHash-cmake xxHash)
|
|||||||
|
|
||||||
add_contrib (google-benchmark-cmake google-benchmark)
|
add_contrib (google-benchmark-cmake google-benchmark)
|
||||||
|
|
||||||
|
add_contrib (ulid-c-cmake ulid-c)
|
||||||
|
|
||||||
# Put all targets defined here and in subdirectories under "contrib/<immediate-subdir>" folders in GUI-based IDEs.
|
# Put all targets defined here and in subdirectories under "contrib/<immediate-subdir>" folders in GUI-based IDEs.
|
||||||
# Some of third-party projects may override CMAKE_FOLDER or FOLDER property of their targets, so they would not appear
|
# Some of third-party projects may override CMAKE_FOLDER or FOLDER property of their targets, so they would not appear
|
||||||
# in "contrib/..." as originally planned, so we workaround this by fixing FOLDER properties of all targets manually,
|
# in "contrib/..." as originally planned, so we workaround this by fixing FOLDER properties of all targets manually,
|
||||||
|
2
contrib/aws
vendored
2
contrib/aws
vendored
@ -1 +1 @@
|
|||||||
Subproject commit 06a6610e6fb3385e22ad85014a67aa307825ffb1
|
Subproject commit ecccfc026a42b30023289410a67024d561f4bf3e
|
1
contrib/ulid-c
vendored
Submodule
1
contrib/ulid-c
vendored
Submodule
@ -0,0 +1 @@
|
|||||||
|
Subproject commit c433b6783cf918b8f996dacd014cb2b68c7de419
|
16
contrib/ulid-c-cmake/CMakeLists.txt
Normal file
16
contrib/ulid-c-cmake/CMakeLists.txt
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
option (ENABLE_ULID "Enable ulid" ${ENABLE_LIBRARIES})
|
||||||
|
|
||||||
|
if (NOT ENABLE_ULID)
|
||||||
|
message(STATUS "Not using ulid")
|
||||||
|
return()
|
||||||
|
endif()
|
||||||
|
|
||||||
|
set (LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/ulid-c")
|
||||||
|
|
||||||
|
set (SRCS
|
||||||
|
"${LIBRARY_DIR}/src/ulid.c"
|
||||||
|
)
|
||||||
|
|
||||||
|
add_library(_ulid ${SRCS})
|
||||||
|
target_include_directories(_ulid SYSTEM PUBLIC "${LIBRARY_DIR}/include")
|
||||||
|
add_library(ch_contrib::ulid ALIAS _ulid)
|
2
contrib/unixodbc
vendored
2
contrib/unixodbc
vendored
@ -1 +1 @@
|
|||||||
Subproject commit a2cd5395e8c7f7390025ec93af5bfebef3fb5fcd
|
Subproject commit 18e0ebe2a1fb53b9072ff60a558f6bd6ad2a0551
|
@ -645,7 +645,7 @@ if [ "$DISABLE_BC_CHECK" -ne "1" ]; then
|
|||||||
-e "} <Error> TCPHandler: Code:" \
|
-e "} <Error> TCPHandler: Code:" \
|
||||||
-e "} <Error> executeQuery: Code:" \
|
-e "} <Error> executeQuery: Code:" \
|
||||||
-e "Missing columns: 'v3' while processing query: 'v3, k, v1, v2, p'" \
|
-e "Missing columns: 'v3' while processing query: 'v3, k, v1, v2, p'" \
|
||||||
-e "[Queue = DB::MergeMutateRuntimeQueue]: Code: 235. DB::Exception: Part" \
|
-e "[Queue = DB::DynamicRuntimeQueue]: Code: 235. DB::Exception: Part" \
|
||||||
-e "The set of parts restored in place of" \
|
-e "The set of parts restored in place of" \
|
||||||
-e "(ReplicatedMergeTreeAttachThread): Initialization failed. Error" \
|
-e "(ReplicatedMergeTreeAttachThread): Initialization failed. Error" \
|
||||||
-e "Code: 269. DB::Exception: Destination table is myself" \
|
-e "Code: 269. DB::Exception: Destination table is myself" \
|
||||||
|
33
docs/changelogs/v22.3.18.37-lts.md
Normal file
33
docs/changelogs/v22.3.18.37-lts.md
Normal file
@ -0,0 +1,33 @@
|
|||||||
|
---
|
||||||
|
sidebar_position: 1
|
||||||
|
sidebar_label: 2023
|
||||||
|
---
|
||||||
|
|
||||||
|
# 2023 Changelog
|
||||||
|
|
||||||
|
### ClickHouse release v22.3.18.37-lts (fe512717551) FIXME as compared to v22.3.17.13-lts (fcc4de7e805)
|
||||||
|
|
||||||
|
#### Performance Improvement
|
||||||
|
* Backported in [#46372](https://github.com/ClickHouse/ClickHouse/issues/46372): Fix too big memory usage for vertical merges on non-remote disk. Respect `max_insert_delayed_streams_for_parallel_write` for the remote disk. [#46275](https://github.com/ClickHouse/ClickHouse/pull/46275) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||||
|
* Backported in [#46357](https://github.com/ClickHouse/ClickHouse/issues/46357): Allow using Vertical merge algorithm with parts in Compact format. This will allow ClickHouse server to use much less memory for background operations. This closes [#46084](https://github.com/ClickHouse/ClickHouse/issues/46084). [#46282](https://github.com/ClickHouse/ClickHouse/pull/46282) ([Anton Popov](https://github.com/CurtizJ)).
|
||||||
|
|
||||||
|
#### Build/Testing/Packaging Improvement
|
||||||
|
* Backported in [#45856](https://github.com/ClickHouse/ClickHouse/issues/45856): Fix zookeeper downloading, update the version, and optimize the image size. [#44853](https://github.com/ClickHouse/ClickHouse/pull/44853) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||||
|
|
||||||
|
#### Bug Fix (user-visible misbehavior in official stable or prestable release)
|
||||||
|
|
||||||
|
* Backported in [#45620](https://github.com/ClickHouse/ClickHouse/issues/45620): Another fix for `Cannot read all data` error which could happen while reading `LowCardinality` dictionary from remote fs. Fixes [#44709](https://github.com/ClickHouse/ClickHouse/issues/44709). [#44875](https://github.com/ClickHouse/ClickHouse/pull/44875) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||||
|
* Backported in [#45549](https://github.com/ClickHouse/ClickHouse/issues/45549): Fix `SELECT ... FROM system.dictionaries` exception when there is a dictionary with a bad structure (e.g. incorrect type in xml config). [#45399](https://github.com/ClickHouse/ClickHouse/pull/45399) ([Aleksei Filatov](https://github.com/aalexfvk)).
|
||||||
|
|
||||||
|
#### NOT FOR CHANGELOG / INSIGNIFICANT
|
||||||
|
|
||||||
|
* Automatically merge green backport PRs and green approved PRs [#41110](https://github.com/ClickHouse/ClickHouse/pull/41110) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||||
|
* Fix wrong approved_at, simplify conditions [#45302](https://github.com/ClickHouse/ClickHouse/pull/45302) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||||
|
* Get rid of artifactory in favor of r2 + ch-repos-manager [#45421](https://github.com/ClickHouse/ClickHouse/pull/45421) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||||
|
* Another attempt to fix automerge, or at least to have debug footprint [#45476](https://github.com/ClickHouse/ClickHouse/pull/45476) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||||
|
* Trim refs/tags/ from GITHUB_TAG in release workflow [#45636](https://github.com/ClickHouse/ClickHouse/pull/45636) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||||
|
* Add check for running workflows to merge_pr.py [#45803](https://github.com/ClickHouse/ClickHouse/pull/45803) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||||
|
* Get rid of progress timestamps in release publishing [#45818](https://github.com/ClickHouse/ClickHouse/pull/45818) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||||
|
* Add helping logging to auto-merge script [#46080](https://github.com/ClickHouse/ClickHouse/pull/46080) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||||
|
* Fix write buffer destruction order for vertical merge. [#46205](https://github.com/ClickHouse/ClickHouse/pull/46205) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||||
|
|
@ -76,6 +76,7 @@ Engines in the family:
|
|||||||
- [View](../../engines/table-engines/special/view.md#table_engines-view)
|
- [View](../../engines/table-engines/special/view.md#table_engines-view)
|
||||||
- [Memory](../../engines/table-engines/special/memory.md#memory)
|
- [Memory](../../engines/table-engines/special/memory.md#memory)
|
||||||
- [Buffer](../../engines/table-engines/special/buffer.md#buffer)
|
- [Buffer](../../engines/table-engines/special/buffer.md#buffer)
|
||||||
|
- [KeeperMap](../../engines/table-engines/special/keepermap.md)
|
||||||
|
|
||||||
## Virtual Columns {#table_engines-virtual_columns}
|
## Virtual Columns {#table_engines-virtual_columns}
|
||||||
|
|
||||||
|
@ -84,3 +84,39 @@ You can also change any [rocksdb options](https://github.com/facebook/rocksdb/wi
|
|||||||
</tables>
|
</tables>
|
||||||
</rocksdb>
|
</rocksdb>
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## Supported operations {#table_engine-EmbeddedRocksDB-supported-operations}
|
||||||
|
|
||||||
|
### Inserts
|
||||||
|
|
||||||
|
When new rows are inserted into `EmbeddedRocksDB`, if the key already exists, the value will be updated, otherwise a new key is created.
|
||||||
|
|
||||||
|
Example:
|
||||||
|
|
||||||
|
```sql
|
||||||
|
INSERT INTO test VALUES ('some key', 1, 'value', 3.2);
|
||||||
|
```
|
||||||
|
|
||||||
|
### Deletes
|
||||||
|
|
||||||
|
Rows can be deleted using `DELETE` query or `TRUNCATE`.
|
||||||
|
|
||||||
|
```sql
|
||||||
|
DELETE FROM test WHERE key LIKE 'some%' AND v1 > 1;
|
||||||
|
```
|
||||||
|
|
||||||
|
```sql
|
||||||
|
ALTER TABLE test DELETE WHERE key LIKE 'some%' AND v1 > 1;
|
||||||
|
```
|
||||||
|
|
||||||
|
```sql
|
||||||
|
TRUNCATE TABLE test;
|
||||||
|
```
|
||||||
|
|
||||||
|
### Updates
|
||||||
|
|
||||||
|
Values can be updated using the `ALTER TABLE` query. The primary key cannot be updated.
|
||||||
|
|
||||||
|
```sql
|
||||||
|
ALTER TABLE test UPDATE v1 = v1 * 10 + 2 WHERE key LIKE 'some%' AND v3 > 3.1;
|
||||||
|
```
|
||||||
|
111
docs/en/engines/table-engines/special/keepermap.md
Normal file
111
docs/en/engines/table-engines/special/keepermap.md
Normal file
@ -0,0 +1,111 @@
|
|||||||
|
---
|
||||||
|
slug: /en/engines/table-engines/special/keeper-map
|
||||||
|
sidebar_position: 150
|
||||||
|
sidebar_label: KeeperMap
|
||||||
|
---
|
||||||
|
|
||||||
|
# KeeperMap {#keepermap}
|
||||||
|
|
||||||
|
This engine allows you to use Keeper/ZooKeeper cluster as consistent key-value store with linearizable writes and sequentially consistent reads.
|
||||||
|
|
||||||
|
To enable KeeperMap storage engine, you need to define a ZooKeeper path where the tables will be stored using `<keeper_map_path_prefix>` config.
|
||||||
|
|
||||||
|
For example:
|
||||||
|
|
||||||
|
```xml
|
||||||
|
<clickhouse>
|
||||||
|
<keeper_map_path_prefix>/keeper_map_tables</keeper_map_path_prefix>
|
||||||
|
</clickhouse>
|
||||||
|
```
|
||||||
|
|
||||||
|
where path can be any other valid ZooKeeper path.
|
||||||
|
|
||||||
|
## Creating a Table {#table_engine-KeeperMap-creating-a-table}
|
||||||
|
|
||||||
|
``` sql
|
||||||
|
CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
|
||||||
|
(
|
||||||
|
name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1],
|
||||||
|
name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2],
|
||||||
|
...
|
||||||
|
) ENGINE = KeeperMap(root_path, [keys_limit]) PRIMARY KEY(primary_key_name)
|
||||||
|
```
|
||||||
|
|
||||||
|
Engine parameters:
|
||||||
|
|
||||||
|
- `root_path` - ZooKeeper path where the `table_name` will be stored.
|
||||||
|
This path should not contain the prefix defined by `<keeper_map_path_prefix>` config because the prefix will be automatically appended to the `root_path`.
|
||||||
|
Additionally, format of `auxiliary_zookeper_cluster_name:/some/path` is also supported where `auxiliary_zookeper_cluster` is a ZooKeeper cluster defined inside `<auxiliary_zookeepers>` config.
|
||||||
|
By default, ZooKeeper cluster defined inside `<zookeeper>` config is used.
|
||||||
|
- `keys_limit` - number of keys allowed inside the table.
|
||||||
|
This limit is a soft limit and it can be possible that more keys will end up in the table for some edge cases.
|
||||||
|
- `primary_key_name` – any column name in the column list.
|
||||||
|
- `primary key` must be specified, it supports only one column in the primary key. The primary key will be serialized in binary as a `node name` inside ZooKeeper.
|
||||||
|
- columns other than the primary key will be serialized to binary in corresponding order and stored as a value of the resulting node defined by the serialized key.
|
||||||
|
- queries with key `equals` or `in` filtering will be optimized to multi keys lookup from `Keeper`, otherwise all values will be fetched.
|
||||||
|
|
||||||
|
Example:
|
||||||
|
|
||||||
|
``` sql
|
||||||
|
CREATE TABLE keeper_map_table
|
||||||
|
(
|
||||||
|
`key` String,
|
||||||
|
`v1` UInt32,
|
||||||
|
`v2` String,
|
||||||
|
`v3` Float32
|
||||||
|
)
|
||||||
|
ENGINE = KeeperMap(/keeper_map_table, 4)
|
||||||
|
PRIMARY KEY key
|
||||||
|
```
|
||||||
|
|
||||||
|
with
|
||||||
|
|
||||||
|
```xml
|
||||||
|
<clickhouse>
|
||||||
|
<keeper_map_path_prefix>/keeper_map_tables</keeper_map_path_prefix>
|
||||||
|
</clickhouse>
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
Each value, which is binary serialization of `(v1, v2, v3)`, will be stored inside `/keeper_map_tables/keeper_map_table/data/serialized_key` in `Keeper`.
|
||||||
|
Additionally, number of keys will have a soft limit of 4 for the number of keys.
|
||||||
|
|
||||||
|
If multiple tables are created on the same ZooKeeper path, the values are persisted until there exists at least 1 table using it.
|
||||||
|
As a result, it is possible to use `ON CLUSTER` clause when creating the table and sharing the data from multiple ClickHouse instances.
|
||||||
|
Of course, it's possible to manually run `CREATE TABLE` with same path on nonrelated ClickHouse instances to have same data sharing effect.
|
||||||
|
|
||||||
|
## Supported operations {#table_engine-KeeperMap-supported-operations}
|
||||||
|
|
||||||
|
### Inserts
|
||||||
|
|
||||||
|
When new rows are inserted into `KeeperMap`, if the key already exists, the value will be updated, otherwise new key is created.
|
||||||
|
|
||||||
|
Example:
|
||||||
|
|
||||||
|
```sql
|
||||||
|
INSERT INTO keeper_map_table VALUES ('some key', 1, 'value', 3.2);
|
||||||
|
```
|
||||||
|
|
||||||
|
### Deletes
|
||||||
|
|
||||||
|
Rows can be deleted using `DELETE` query or `TRUNCATE`.
|
||||||
|
|
||||||
|
```sql
|
||||||
|
DELETE FROM keeper_map_table WHERE key LIKE 'some%' AND v1 > 1;
|
||||||
|
```
|
||||||
|
|
||||||
|
```sql
|
||||||
|
ALTER TABLE keeper_map_table DELETE WHERE key LIKE 'some%' AND v1 > 1;
|
||||||
|
```
|
||||||
|
|
||||||
|
```sql
|
||||||
|
TRUNCATE TABLE keeper_map_table;
|
||||||
|
```
|
||||||
|
|
||||||
|
### Updates
|
||||||
|
|
||||||
|
Values can be updated using `ALTER TABLE` query. Primary key cannot be updated.
|
||||||
|
|
||||||
|
```sql
|
||||||
|
ALTER TABLE keeper_map_table UPDATE v1 = v1 * 10 + 2 WHERE key LIKE 'some%' AND v3 > 3.1;
|
||||||
|
```
|
@ -1012,6 +1012,24 @@ Default value: 2.
|
|||||||
<background_merges_mutations_concurrency_ratio>3</background_merges_mutations_concurrency_ratio>
|
<background_merges_mutations_concurrency_ratio>3</background_merges_mutations_concurrency_ratio>
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## background_merges_mutations_scheduling_policy {#background_merges_mutations_scheduling_policy}
|
||||||
|
|
||||||
|
Algorithm used to select next merge or mutation to be executed by background thread pool. Policy may be changed at runtime without server restart.
|
||||||
|
Could be applied from the `default` profile for backward compatibility.
|
||||||
|
|
||||||
|
Possible values:
|
||||||
|
|
||||||
|
- "round_robin" — Every concurrent merge and mutation is executed in round-robin order to ensure starvation-free operation. Smaller merges are completed faster than bigger ones just because they have fewer blocks to merge.
|
||||||
|
- "shortest_task_first" — Always execute smaller merge or mutation. Merges and mutations are assigned priorities based on their resulting size. Merges with smaller sizes are strictly preferred over bigger ones. This policy ensures the fastest possible merge of small parts but can lead to indefinite starvation of big merges in partitions heavily overloaded by INSERTs.
|
||||||
|
|
||||||
|
Default value: "round_robin".
|
||||||
|
|
||||||
|
**Example**
|
||||||
|
|
||||||
|
```xml
|
||||||
|
<background_merges_mutations_scheduling_policy>shortest_task_first</background_merges_mutations_scheduling_policy>
|
||||||
|
```
|
||||||
|
|
||||||
## background_move_pool_size {#background_move_pool_size}
|
## background_move_pool_size {#background_move_pool_size}
|
||||||
|
|
||||||
Sets the number of threads performing background moves for tables with MergeTree engines. Could be increased at runtime and could be applied at server startup from the `default` profile for backward compatibility.
|
Sets the number of threads performing background moves for tables with MergeTree engines. Could be increased at runtime and could be applied at server startup from the `default` profile for backward compatibility.
|
||||||
|
53
docs/en/sql-reference/functions/ulid-functions.md
Normal file
53
docs/en/sql-reference/functions/ulid-functions.md
Normal file
@ -0,0 +1,53 @@
|
|||||||
|
---
|
||||||
|
slug: /en/sql-reference/functions/ulid-functions
|
||||||
|
sidebar_position: 54
|
||||||
|
sidebar_label: ULID
|
||||||
|
---
|
||||||
|
|
||||||
|
# Functions for Working with ULID
|
||||||
|
|
||||||
|
## generateULID
|
||||||
|
|
||||||
|
Generates the [ULID](https://github.com/ulid/spec).
|
||||||
|
|
||||||
|
**Syntax**
|
||||||
|
|
||||||
|
``` sql
|
||||||
|
generateULID([x])
|
||||||
|
```
|
||||||
|
|
||||||
|
**Arguments**
|
||||||
|
|
||||||
|
- `x` — [Expression](../../sql-reference/syntax.md#syntax-expressions) resulting in any of the [supported data types](../../sql-reference/data-types/index.md#data_types). The resulting value is discarded, but the expression itself if used for bypassing [common subexpression elimination](../../sql-reference/functions/index.md#common-subexpression-elimination) if the function is called multiple times in one query. Optional parameter.
|
||||||
|
|
||||||
|
**Returned value**
|
||||||
|
|
||||||
|
The [FixedString](../data-types/fixedstring.md) type value.
|
||||||
|
|
||||||
|
**Usage example**
|
||||||
|
|
||||||
|
``` sql
|
||||||
|
SELECT generateULID()
|
||||||
|
```
|
||||||
|
|
||||||
|
``` text
|
||||||
|
┌─generateULID()─────────────┐
|
||||||
|
│ 01GNB2S2FGN2P93QPXDNB4EN2R │
|
||||||
|
└────────────────────────────┘
|
||||||
|
```
|
||||||
|
|
||||||
|
**Usage example if it is needed to generate multiple values in one row**
|
||||||
|
|
||||||
|
```sql
|
||||||
|
SELECT generateULID(1), generateULID(2)
|
||||||
|
```
|
||||||
|
|
||||||
|
``` text
|
||||||
|
┌─generateULID(1)────────────┬─generateULID(2)────────────┐
|
||||||
|
│ 01GNB2SGG4RHKVNT9ZGA4FFMNP │ 01GNB2SGG4V0HMQVH4VBVPSSRB │
|
||||||
|
└────────────────────────────┴────────────────────────────┘
|
||||||
|
```
|
||||||
|
|
||||||
|
## See Also
|
||||||
|
|
||||||
|
- [UUID](../../sql-reference/functions/uuid-functions.md)
|
@ -26,3 +26,44 @@ The `PREWHERE` section is executed before `FINAL`, so the results of `FROM ... F
|
|||||||
## Limitations
|
## Limitations
|
||||||
|
|
||||||
`PREWHERE` is only supported by tables from the [*MergeTree](../../../engines/table-engines/mergetree-family/index.md) family.
|
`PREWHERE` is only supported by tables from the [*MergeTree](../../../engines/table-engines/mergetree-family/index.md) family.
|
||||||
|
|
||||||
|
## Example
|
||||||
|
|
||||||
|
```sql
|
||||||
|
CREATE TABLE mydata
|
||||||
|
(
|
||||||
|
`A` Int64,
|
||||||
|
`B` Int8,
|
||||||
|
`C` String
|
||||||
|
)
|
||||||
|
ENGINE = MergeTree
|
||||||
|
ORDER BY A AS
|
||||||
|
SELECT
|
||||||
|
number,
|
||||||
|
0,
|
||||||
|
if(number between 1000 and 2000, 'x', toString(number))
|
||||||
|
FROM numbers(10000000);
|
||||||
|
|
||||||
|
SELECT count()
|
||||||
|
FROM mydata
|
||||||
|
WHERE (B = 0) AND (C = 'x');
|
||||||
|
|
||||||
|
1 row in set. Elapsed: 0.074 sec. Processed 10.00 million rows, 168.89 MB (134.98 million rows/s., 2.28 GB/s.)
|
||||||
|
|
||||||
|
-- let's enable tracing to see which predicate are moved to PREWHERE
|
||||||
|
set send_logs_level='debug';
|
||||||
|
|
||||||
|
MergeTreeWhereOptimizer: condition "B = 0" moved to PREWHERE
|
||||||
|
-- Clickhouse moves automatically `B = 0` to PREWHERE, but it has no sense because B is always 0.
|
||||||
|
|
||||||
|
-- Let's move other predicate `C = 'x'`
|
||||||
|
|
||||||
|
SELECT count()
|
||||||
|
FROM mydata
|
||||||
|
PREWHERE C = 'x'
|
||||||
|
WHERE B = 0;
|
||||||
|
|
||||||
|
1 row in set. Elapsed: 0.069 sec. Processed 10.00 million rows, 158.89 MB (144.90 million rows/s., 2.30 GB/s.)
|
||||||
|
|
||||||
|
-- This query with manual `PREWHERE` processes slightly less data: 158.89 MB VS 168.89 MB
|
||||||
|
```
|
||||||
|
@ -27,18 +27,21 @@ A table with the specified structure for reading or writing data in the specifie
|
|||||||
|
|
||||||
**Examples**
|
**Examples**
|
||||||
|
|
||||||
Select the data from all files in the cluster `cluster_simple`:
|
Select the data from all the files in the `/root/data/clickhouse` and `/root/data/database/` folders, using all the nodes in the `cluster_simple` cluster:
|
||||||
|
|
||||||
``` sql
|
``` sql
|
||||||
SELECT * FROM s3Cluster('cluster_simple', 'http://minio1:9001/root/data/{clickhouse,database}/*', 'minio', 'minio123', 'CSV', 'name String, value UInt32, polygon Array(Array(Tuple(Float64, Float64)))') ORDER BY (name, value, polygon);
|
SELECT * FROM s3Cluster(
|
||||||
|
'cluster_simple',
|
||||||
|
'http://minio1:9001/root/data/{clickhouse,database}/*',
|
||||||
|
'minio',
|
||||||
|
'minio123',
|
||||||
|
'CSV',
|
||||||
|
'name String, value UInt32, polygon Array(Array(Tuple(Float64, Float64)))') ORDER BY (name, value, polygon
|
||||||
|
);
|
||||||
```
|
```
|
||||||
|
|
||||||
Count the total amount of rows in all files in the cluster `cluster_simple`:
|
Count the total amount of rows in all files in the cluster `cluster_simple`:
|
||||||
|
|
||||||
``` sql
|
|
||||||
SELECT count(*) FROM s3Cluster('cluster_simple', 'http://minio1:9001/root/data/{clickhouse,database}/*', 'minio', 'minio123', 'CSV', 'name String, value UInt32, polygon Array(Array(Tuple(Float64, Float64)))');
|
|
||||||
```
|
|
||||||
|
|
||||||
:::warning
|
:::warning
|
||||||
If your listing of files contains number ranges with leading zeros, use the construction with braces for each digit separately or use `?`.
|
If your listing of files contains number ranges with leading zeros, use the construction with braces for each digit separately or use `?`.
|
||||||
:::
|
:::
|
||||||
|
@ -21,6 +21,7 @@ ClickHouse supports the standard grammar for defining windows and window functio
|
|||||||
| `lag/lead(value, offset)` | Not supported. Workarounds: |
|
| `lag/lead(value, offset)` | Not supported. Workarounds: |
|
||||||
| | 1) replace with `any(value) over (.... rows between <offset> preceding and <offset> preceding)`, or `following` for `lead` |
|
| | 1) replace with `any(value) over (.... rows between <offset> preceding and <offset> preceding)`, or `following` for `lead` |
|
||||||
| | 2) use `lagInFrame/leadInFrame`, which are analogous, but respect the window frame. To get behavior identical to `lag/lead`, use `rows between unbounded preceding and unbounded following` |
|
| | 2) use `lagInFrame/leadInFrame`, which are analogous, but respect the window frame. To get behavior identical to `lag/lead`, use `rows between unbounded preceding and unbounded following` |
|
||||||
|
| ntile(buckets) | Supported. Specify window like, (partition by x order by y rows between unbounded preceding and unounded following). |
|
||||||
|
|
||||||
## ClickHouse-specific Window Functions
|
## ClickHouse-specific Window Functions
|
||||||
|
|
||||||
|
@ -61,7 +61,7 @@ ClickHouse 中的物化视图更像是插入触发器。 如果视图查询中
|
|||||||
|
|
||||||
请注意,物化视图受[optimize_on_insert](../../../operations/settings/settings.md#optimize-on-insert)设置的影响。 在插入视图之前合并数据。
|
请注意,物化视图受[optimize_on_insert](../../../operations/settings/settings.md#optimize-on-insert)设置的影响。 在插入视图之前合并数据。
|
||||||
|
|
||||||
视图看起来与普通表相同。 例如,它们列在1SHOW TABLES1查询的结果中。
|
视图看起来与普通表相同。 例如,它们列在`SHOW TABLES`查询的结果中。
|
||||||
|
|
||||||
删除视图,使用[DROP VIEW](../../../sql-reference/statements/drop#drop-view). `DROP TABLE`也适用于视图。
|
删除视图,使用[DROP VIEW](../../../sql-reference/statements/drop#drop-view). `DROP TABLE`也适用于视图。
|
||||||
|
|
||||||
|
@ -1225,8 +1225,8 @@ TaskStatus ClusterCopier::iterateThroughAllPiecesInPartition(const ConnectionTim
|
|||||||
std::this_thread::sleep_for(retry_delay_ms);
|
std::this_thread::sleep_for(retry_delay_ms);
|
||||||
}
|
}
|
||||||
|
|
||||||
was_active_pieces = (res == TaskStatus::Active);
|
was_active_pieces |= (res == TaskStatus::Active);
|
||||||
was_failed_pieces = (res == TaskStatus::Error);
|
was_failed_pieces |= (res == TaskStatus::Error);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (was_failed_pieces)
|
if (was_failed_pieces)
|
||||||
|
@ -1282,6 +1282,8 @@ try
|
|||||||
auto new_pool_size = config->getUInt64("background_pool_size", 16);
|
auto new_pool_size = config->getUInt64("background_pool_size", 16);
|
||||||
auto new_ratio = config->getUInt64("background_merges_mutations_concurrency_ratio", 2);
|
auto new_ratio = config->getUInt64("background_merges_mutations_concurrency_ratio", 2);
|
||||||
global_context->getMergeMutateExecutor()->increaseThreadsAndMaxTasksCount(new_pool_size, new_pool_size * new_ratio);
|
global_context->getMergeMutateExecutor()->increaseThreadsAndMaxTasksCount(new_pool_size, new_pool_size * new_ratio);
|
||||||
|
auto new_scheduling_policy = config->getString("background_merges_mutations_scheduling_policy", "round_robin");
|
||||||
|
global_context->getMergeMutateExecutor()->updateSchedulingPolicy(new_scheduling_policy);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (global_context->areBackgroundExecutorsInitialized() && config->has("background_move_pool_size"))
|
if (global_context->areBackgroundExecutorsInitialized() && config->has("background_move_pool_size"))
|
||||||
|
@ -339,6 +339,7 @@
|
|||||||
<background_buffer_flush_schedule_pool_size>16</background_buffer_flush_schedule_pool_size>
|
<background_buffer_flush_schedule_pool_size>16</background_buffer_flush_schedule_pool_size>
|
||||||
<background_pool_size>16</background_pool_size>
|
<background_pool_size>16</background_pool_size>
|
||||||
<background_merges_mutations_concurrency_ratio>2</background_merges_mutations_concurrency_ratio>
|
<background_merges_mutations_concurrency_ratio>2</background_merges_mutations_concurrency_ratio>
|
||||||
|
<background_merges_mutations_scheduling_policy>round_robin</background_merges_mutations_scheduling_policy>
|
||||||
<background_move_pool_size>8</background_move_pool_size>
|
<background_move_pool_size>8</background_move_pool_size>
|
||||||
<background_fetches_pool_size>8</background_fetches_pool_size>
|
<background_fetches_pool_size>8</background_fetches_pool_size>
|
||||||
<background_common_pool_size>8</background_common_pool_size>
|
<background_common_pool_size>8</background_common_pool_size>
|
||||||
|
@ -92,7 +92,52 @@
|
|||||||
|
|
||||||
.chart div { position: absolute; }
|
.chart div { position: absolute; }
|
||||||
|
|
||||||
.inputs { font-size: 14pt; }
|
.inputs {
|
||||||
|
height: auto;
|
||||||
|
width: 100%;
|
||||||
|
|
||||||
|
font-size: 14pt;
|
||||||
|
|
||||||
|
display: flex;
|
||||||
|
flex-flow: column nowrap;
|
||||||
|
justify-content: center;
|
||||||
|
}
|
||||||
|
|
||||||
|
.inputs.unconnected {
|
||||||
|
height: 100vh;
|
||||||
|
}
|
||||||
|
.unconnected #params {
|
||||||
|
display: flex;
|
||||||
|
flex-flow: column nowrap;
|
||||||
|
justify-content: center;
|
||||||
|
align-items: center;
|
||||||
|
}
|
||||||
|
.unconnected #connection-params {
|
||||||
|
width: 50%;
|
||||||
|
|
||||||
|
display: flex;
|
||||||
|
flex-flow: row wrap;
|
||||||
|
}
|
||||||
|
.unconnected #url {
|
||||||
|
width: 100%;
|
||||||
|
}
|
||||||
|
.unconnected #user {
|
||||||
|
width: 50%;
|
||||||
|
}
|
||||||
|
.unconnected #password {
|
||||||
|
width: 49.5%;
|
||||||
|
}
|
||||||
|
.unconnected input {
|
||||||
|
margin-bottom: 5px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.inputs #chart-params {
|
||||||
|
display: block;
|
||||||
|
}
|
||||||
|
|
||||||
|
.inputs.unconnected #chart-params {
|
||||||
|
display: none;
|
||||||
|
}
|
||||||
|
|
||||||
#connection-params {
|
#connection-params {
|
||||||
margin-bottom: 0.5rem;
|
margin-bottom: 0.5rem;
|
||||||
@ -223,6 +268,10 @@
|
|||||||
color: var(--chart-button-hover-color);
|
color: var(--chart-button-hover-color);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
.disabled {
|
||||||
|
opacity: 0.5;
|
||||||
|
}
|
||||||
|
|
||||||
.query-editor {
|
.query-editor {
|
||||||
display: none;
|
display: none;
|
||||||
grid-template-columns: auto fit-content(10%);
|
grid-template-columns: auto fit-content(10%);
|
||||||
@ -286,7 +335,7 @@
|
|||||||
</style>
|
</style>
|
||||||
</head>
|
</head>
|
||||||
<body>
|
<body>
|
||||||
<div class="inputs">
|
<div class="inputs unconnected">
|
||||||
<form id="params">
|
<form id="params">
|
||||||
<div id="connection-params">
|
<div id="connection-params">
|
||||||
<input spellcheck="false" id="url" type="text" value="" placeholder="URL" />
|
<input spellcheck="false" id="url" type="text" value="" placeholder="URL" />
|
||||||
@ -294,8 +343,8 @@
|
|||||||
<input spellcheck="false" id="password" type="password" placeholder="password" />
|
<input spellcheck="false" id="password" type="password" placeholder="password" />
|
||||||
</div>
|
</div>
|
||||||
<div>
|
<div>
|
||||||
<input id="reload" type="button" value="Reload" style="display: none;">
|
<input id="reload" type="button" value="Reload">
|
||||||
<input id="add" type="button" value="Add chart">
|
<input id="add" type="button" value="Add chart" style="display: none;">
|
||||||
<span class="nowrap themes"><span id="toggle-dark">🌚</span><span id="toggle-light">🌞</span></span>
|
<span class="nowrap themes"><span id="toggle-dark">🌚</span><span id="toggle-light">🌞</span></span>
|
||||||
<div id="chart-params"></div>
|
<div id="chart-params"></div>
|
||||||
</div>
|
</div>
|
||||||
@ -845,7 +894,7 @@ async function draw(idx, chart, url_params, query) {
|
|||||||
error_div.firstChild.data = error;
|
error_div.firstChild.data = error;
|
||||||
title_div.style.display = 'none';
|
title_div.style.display = 'none';
|
||||||
error_div.style.display = 'block';
|
error_div.style.display = 'block';
|
||||||
return;
|
return false;
|
||||||
} else {
|
} else {
|
||||||
error_div.firstChild.data = '';
|
error_div.firstChild.data = '';
|
||||||
error_div.style.display = 'none';
|
error_div.style.display = 'none';
|
||||||
@ -886,6 +935,7 @@ async function draw(idx, chart, url_params, query) {
|
|||||||
/// Set title
|
/// Set title
|
||||||
const title = queries[idx] && queries[idx].title ? queries[idx].title.replaceAll(/\{(\w+)\}/g, (_, name) => params[name] ) : '';
|
const title = queries[idx] && queries[idx].title ? queries[idx].title.replaceAll(/\{(\w+)\}/g, (_, name) => params[name] ) : '';
|
||||||
chart.querySelector('.title').firstChild.data = title;
|
chart.querySelector('.title').firstChild.data = title;
|
||||||
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
function showAuthError(message) {
|
function showAuthError(message) {
|
||||||
@ -902,8 +952,6 @@ function showAuthError(message) {
|
|||||||
function hideAuthError() {
|
function hideAuthError() {
|
||||||
const charts = document.querySelector('#charts');
|
const charts = document.querySelector('#charts');
|
||||||
charts.style.display = 'flex';
|
charts.style.display = 'flex';
|
||||||
const add = document.querySelector('#add');
|
|
||||||
add.style.display = 'block';
|
|
||||||
|
|
||||||
const authError = document.querySelector('#auth-error');
|
const authError = document.querySelector('#auth-error');
|
||||||
authError.textContent = '';
|
authError.textContent = '';
|
||||||
@ -924,9 +972,20 @@ async function drawAll() {
|
|||||||
if (!firstLoad) {
|
if (!firstLoad) {
|
||||||
showAuthError(e.message);
|
showAuthError(e.message);
|
||||||
}
|
}
|
||||||
|
return false;
|
||||||
});
|
});
|
||||||
})).then(() => {
|
})).then((results) => {
|
||||||
firstLoad = false;
|
if (firstLoad) {
|
||||||
|
firstLoad = false;
|
||||||
|
} else {
|
||||||
|
enableReloadButton();
|
||||||
|
}
|
||||||
|
if (!results.includes(false)) {
|
||||||
|
const element = document.querySelector('.inputs');
|
||||||
|
element.classList.remove('unconnected');
|
||||||
|
const add = document.querySelector('#add');
|
||||||
|
add.style.display = 'block';
|
||||||
|
}
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -941,11 +1000,25 @@ function resize() {
|
|||||||
|
|
||||||
new ResizeObserver(resize).observe(document.body);
|
new ResizeObserver(resize).observe(document.body);
|
||||||
|
|
||||||
|
function disableReloadButton() {
|
||||||
|
const reloadButton = document.getElementById('reload')
|
||||||
|
reloadButton.value = 'Reloading...'
|
||||||
|
reloadButton.disabled = true
|
||||||
|
reloadButton.classList.add('disabled')
|
||||||
|
}
|
||||||
|
|
||||||
|
function enableReloadButton() {
|
||||||
|
const reloadButton = document.getElementById('reload')
|
||||||
|
reloadButton.value = 'Reload'
|
||||||
|
reloadButton.disabled = false
|
||||||
|
reloadButton.classList.remove('disabled')
|
||||||
|
}
|
||||||
|
|
||||||
function reloadAll() {
|
function reloadAll() {
|
||||||
updateParams();
|
updateParams();
|
||||||
drawAll();
|
drawAll();
|
||||||
saveState();
|
saveState();
|
||||||
document.getElementById('reload').style.display = 'none';
|
disableReloadButton()
|
||||||
}
|
}
|
||||||
|
|
||||||
document.getElementById('params').onsubmit = function(event) {
|
document.getElementById('params').onsubmit = function(event) {
|
||||||
|
@ -550,6 +550,10 @@ if (ENABLE_NLP)
|
|||||||
dbms_target_link_libraries (PUBLIC ch_contrib::nlp_data)
|
dbms_target_link_libraries (PUBLIC ch_contrib::nlp_data)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
if (TARGET ch_contrib::ulid)
|
||||||
|
dbms_target_link_libraries (PUBLIC ch_contrib::ulid)
|
||||||
|
endif()
|
||||||
|
|
||||||
if (TARGET ch_contrib::bzip2)
|
if (TARGET ch_contrib::bzip2)
|
||||||
target_link_libraries (clickhouse_common_io PRIVATE ch_contrib::bzip2)
|
target_link_libraries (clickhouse_common_io PRIVATE ch_contrib::bzip2)
|
||||||
endif()
|
endif()
|
||||||
|
@ -56,3 +56,4 @@
|
|||||||
#cmakedefine01 USE_BLAKE3
|
#cmakedefine01 USE_BLAKE3
|
||||||
#cmakedefine01 USE_SKIM
|
#cmakedefine01 USE_SKIM
|
||||||
#cmakedefine01 USE_OPENSSL_INTREE
|
#cmakedefine01 USE_OPENSSL_INTREE
|
||||||
|
#cmakedefine01 USE_ULID
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <Processors/Transforms/MongoDBSource.h>
|
#include <Processors/Sources/MongoDBSource.h>
|
||||||
#include <Core/Block.h>
|
#include <Core/Block.h>
|
||||||
|
|
||||||
#include "DictionaryStructure.h"
|
#include "DictionaryStructure.h"
|
||||||
|
@ -8,7 +8,7 @@
|
|||||||
#if USE_LIBPQXX
|
#if USE_LIBPQXX
|
||||||
#include <Columns/ColumnString.h>
|
#include <Columns/ColumnString.h>
|
||||||
#include <DataTypes/DataTypeString.h>
|
#include <DataTypes/DataTypeString.h>
|
||||||
#include <Processors/Transforms/PostgreSQLSource.h>
|
#include <Processors/Sources/PostgreSQLSource.h>
|
||||||
#include "readInvalidateQuery.h"
|
#include "readInvalidateQuery.h"
|
||||||
#include <Interpreters/Context.h>
|
#include <Interpreters/Context.h>
|
||||||
#include <QueryPipeline/QueryPipeline.h>
|
#include <QueryPipeline/QueryPipeline.h>
|
||||||
|
94
src/Functions/generateULID.cpp
Normal file
94
src/Functions/generateULID.cpp
Normal file
@ -0,0 +1,94 @@
|
|||||||
|
#include "config.h"
|
||||||
|
|
||||||
|
#if USE_ULID
|
||||||
|
|
||||||
|
#include <Columns/ColumnFixedString.h>
|
||||||
|
#include <DataTypes/DataTypeFixedString.h>
|
||||||
|
#include <Functions/FunctionFactory.h>
|
||||||
|
#include <Functions/FunctionHelpers.h>
|
||||||
|
#include <Functions/IFunction.h>
|
||||||
|
#include <Interpreters/Context.h>
|
||||||
|
|
||||||
|
#include <ulid.h>
|
||||||
|
|
||||||
|
|
||||||
|
namespace DB
|
||||||
|
{
|
||||||
|
|
||||||
|
namespace ErrorCodes
|
||||||
|
{
|
||||||
|
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||||
|
}
|
||||||
|
|
||||||
|
class FunctionGenerateULID : public IFunction
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
static constexpr size_t ULID_LENGTH = 26;
|
||||||
|
|
||||||
|
static constexpr auto name = "generateULID";
|
||||||
|
|
||||||
|
static FunctionPtr create(ContextPtr /*context*/)
|
||||||
|
{
|
||||||
|
return std::make_shared<FunctionGenerateULID>();
|
||||||
|
}
|
||||||
|
|
||||||
|
String getName() const override { return name; }
|
||||||
|
|
||||||
|
size_t getNumberOfArguments() const override { return 0; }
|
||||||
|
|
||||||
|
bool isVariadic() const override { return true; }
|
||||||
|
bool isDeterministic() const override { return false; }
|
||||||
|
bool isDeterministicInScopeOfQuery() const override { return false; }
|
||||||
|
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; }
|
||||||
|
|
||||||
|
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
|
||||||
|
{
|
||||||
|
if (arguments.size() > 1)
|
||||||
|
throw Exception(
|
||||||
|
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
|
||||||
|
"Number of arguments for function {} doesn't match: passed {}, should be 0 or 1.",
|
||||||
|
getName(), arguments.size());
|
||||||
|
|
||||||
|
return std::make_shared<DataTypeFixedString>(ULID_LENGTH);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool useDefaultImplementationForConstants() const override { return true; }
|
||||||
|
|
||||||
|
ColumnPtr executeImpl(const ColumnsWithTypeAndName & /*arguments*/, const DataTypePtr &, size_t input_rows_count) const override
|
||||||
|
{
|
||||||
|
auto col_res = ColumnFixedString::create(ULID_LENGTH);
|
||||||
|
auto & vec_res = col_res->getChars();
|
||||||
|
|
||||||
|
vec_res.resize(input_rows_count * ULID_LENGTH);
|
||||||
|
|
||||||
|
ulid_generator generator;
|
||||||
|
ulid_generator_init(&generator, 0);
|
||||||
|
|
||||||
|
for (size_t offset = 0, size = vec_res.size(); offset < size; offset += ULID_LENGTH)
|
||||||
|
ulid_generate(&generator, reinterpret_cast<char *>(&vec_res[offset]));
|
||||||
|
|
||||||
|
return col_res;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
REGISTER_FUNCTION(GenerateULID)
|
||||||
|
{
|
||||||
|
factory.registerFunction<FunctionGenerateULID>(
|
||||||
|
{
|
||||||
|
R"(
|
||||||
|
Generates a Universally Unique Lexicographically Sortable Identifier (ULID).
|
||||||
|
This function takes an optional argument, the value of which is discarded to generate different values in case the function is called multiple times.
|
||||||
|
The function returns a value of type FixedString(26).
|
||||||
|
)",
|
||||||
|
Documentation::Examples{
|
||||||
|
{"ulid", "SELECT generateULID()"},
|
||||||
|
{"multiple", "SELECT generateULID(1), generateULID(2)"}},
|
||||||
|
Documentation::Categories{"ULID"}
|
||||||
|
},
|
||||||
|
FunctionFactory::CaseSensitive);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
@ -77,8 +77,17 @@ public:
|
|||||||
|
|
||||||
return std::make_shared<DataTypeUInt8>();
|
return std::make_shared<DataTypeUInt8>();
|
||||||
}
|
}
|
||||||
|
ColumnPtr executeImplDryRun(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /*input_rows_count*/) const override
|
||||||
|
{
|
||||||
|
return execute(arguments, result_type, true);
|
||||||
|
}
|
||||||
|
|
||||||
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /*input_rows_count*/) const override
|
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /*input_rows_count*/) const override
|
||||||
|
{
|
||||||
|
return execute(arguments, result_type, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
ColumnPtr execute(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, bool dry_run) const
|
||||||
{
|
{
|
||||||
const IColumn * col = arguments[0].column.get();
|
const IColumn * col = arguments[0].column.get();
|
||||||
|
|
||||||
@ -99,11 +108,14 @@ public:
|
|||||||
if (seconds > 3.0) /// The choice is arbitrary
|
if (seconds > 3.0) /// The choice is arbitrary
|
||||||
throw Exception(ErrorCodes::TOO_SLOW, "The maximum sleep time is 3 seconds. Requested: {}", toString(seconds));
|
throw Exception(ErrorCodes::TOO_SLOW, "The maximum sleep time is 3 seconds. Requested: {}", toString(seconds));
|
||||||
|
|
||||||
UInt64 count = (variant == FunctionSleepVariant::PerBlock ? 1 : size);
|
if (!dry_run)
|
||||||
UInt64 microseconds = static_cast<UInt64>(seconds * count * 1e6);
|
{
|
||||||
sleepForMicroseconds(microseconds);
|
UInt64 count = (variant == FunctionSleepVariant::PerBlock ? 1 : size);
|
||||||
ProfileEvents::increment(ProfileEvents::SleepFunctionCalls, count);
|
UInt64 microseconds = static_cast<UInt64>(seconds * count * 1e6);
|
||||||
ProfileEvents::increment(ProfileEvents::SleepFunctionMicroseconds, microseconds);
|
sleepForMicroseconds(microseconds);
|
||||||
|
ProfileEvents::increment(ProfileEvents::SleepFunctionCalls, count);
|
||||||
|
ProfileEvents::increment(ProfileEvents::SleepFunctionMicroseconds, microseconds);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// convertToFullColumn needed, because otherwise (constant expression case) function will not get called on each columns.
|
/// convertToFullColumn needed, because otherwise (constant expression case) function will not get called on each columns.
|
||||||
|
@ -27,6 +27,7 @@ namespace DB
|
|||||||
namespace ErrorCodes
|
namespace ErrorCodes
|
||||||
{
|
{
|
||||||
extern const int LOGICAL_ERROR;
|
extern const int LOGICAL_ERROR;
|
||||||
|
extern const int TOO_MANY_REDIRECTS;
|
||||||
}
|
}
|
||||||
|
|
||||||
namespace S3
|
namespace S3
|
||||||
@ -80,6 +81,67 @@ void Client::RetryStrategy::RequestBookkeeping(const Aws::Client::HttpResponseOu
|
|||||||
return wrapped_strategy->RequestBookkeeping(httpResponseOutcome, lastError);
|
return wrapped_strategy->RequestBookkeeping(httpResponseOutcome, lastError);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
namespace
|
||||||
|
{
|
||||||
|
|
||||||
|
void verifyClientConfiguration(const Aws::Client::ClientConfiguration & client_config)
|
||||||
|
{
|
||||||
|
if (!client_config.retryStrategy)
|
||||||
|
throw Exception(ErrorCodes::LOGICAL_ERROR, "The S3 client can only be used with Client::RetryStrategy, define it in the client configuration");
|
||||||
|
|
||||||
|
assert_cast<const Client::RetryStrategy &>(*client_config.retryStrategy);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
std::unique_ptr<Client> Client::create(
|
||||||
|
size_t max_redirects_,
|
||||||
|
const std::shared_ptr<Aws::Auth::AWSCredentialsProvider> & credentials_provider,
|
||||||
|
const Aws::Client::ClientConfiguration & client_configuration,
|
||||||
|
Aws::Client::AWSAuthV4Signer::PayloadSigningPolicy sign_payloads,
|
||||||
|
bool use_virtual_addressing)
|
||||||
|
{
|
||||||
|
verifyClientConfiguration(client_configuration);
|
||||||
|
return std::unique_ptr<Client>(
|
||||||
|
new Client(max_redirects_, credentials_provider, client_configuration, sign_payloads, use_virtual_addressing));
|
||||||
|
}
|
||||||
|
|
||||||
|
std::unique_ptr<Client> Client::create(const Client & other)
|
||||||
|
{
|
||||||
|
return std::unique_ptr<Client>(new Client(other));
|
||||||
|
}
|
||||||
|
|
||||||
|
Client::Client(
|
||||||
|
size_t max_redirects_,
|
||||||
|
const std::shared_ptr<Aws::Auth::AWSCredentialsProvider> & credentials_provider,
|
||||||
|
const Aws::Client::ClientConfiguration & client_configuration,
|
||||||
|
Aws::Client::AWSAuthV4Signer::PayloadSigningPolicy sign_payloads,
|
||||||
|
bool use_virtual_addressing)
|
||||||
|
: Aws::S3::S3Client(credentials_provider, client_configuration, std::move(sign_payloads), use_virtual_addressing)
|
||||||
|
, max_redirects(max_redirects_)
|
||||||
|
, log(&Poco::Logger::get("S3Client"))
|
||||||
|
{
|
||||||
|
auto * endpoint_provider = dynamic_cast<Aws::S3::Endpoint::S3DefaultEpProviderBase *>(accessEndpointProvider().get());
|
||||||
|
endpoint_provider->GetBuiltInParameters().GetParameter("Region").GetString(explicit_region);
|
||||||
|
std::string endpoint;
|
||||||
|
endpoint_provider->GetBuiltInParameters().GetParameter("Endpoint").GetString(endpoint);
|
||||||
|
detect_region = explicit_region == Aws::Region::AWS_GLOBAL && endpoint.find(".amazonaws.com") != std::string::npos;
|
||||||
|
|
||||||
|
cache = std::make_shared<ClientCache>();
|
||||||
|
ClientCacheRegistry::instance().registerClient(cache);
|
||||||
|
}
|
||||||
|
|
||||||
|
Client::Client(const Client & other)
|
||||||
|
: Aws::S3::S3Client(other)
|
||||||
|
, explicit_region(other.explicit_region)
|
||||||
|
, detect_region(other.detect_region)
|
||||||
|
, max_redirects(other.max_redirects)
|
||||||
|
, log(&Poco::Logger::get("S3Client"))
|
||||||
|
{
|
||||||
|
cache = std::make_shared<ClientCache>(*other.cache);
|
||||||
|
ClientCacheRegistry::instance().registerClient(cache);
|
||||||
|
}
|
||||||
|
|
||||||
bool Client::checkIfWrongRegionDefined(const std::string & bucket, const Aws::S3::S3Error & error, std::string & region) const
|
bool Client::checkIfWrongRegionDefined(const std::string & bucket, const Aws::S3::S3Error & error, std::string & region) const
|
||||||
{
|
{
|
||||||
if (detect_region)
|
if (detect_region)
|
||||||
@ -135,7 +197,7 @@ Model::HeadObjectOutcome Client::HeadObject(const HeadObjectRequest & request) c
|
|||||||
if (checkIfWrongRegionDefined(bucket, error, new_region))
|
if (checkIfWrongRegionDefined(bucket, error, new_region))
|
||||||
{
|
{
|
||||||
request.overrideRegion(new_region);
|
request.overrideRegion(new_region);
|
||||||
return HeadObject(request);
|
return Aws::S3::S3Client::HeadObject(request);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (error.GetResponseCode() != Aws::Http::HttpResponseCode::MOVED_PERMANENTLY)
|
if (error.GetResponseCode() != Aws::Http::HttpResponseCode::MOVED_PERMANENTLY)
|
||||||
@ -248,6 +310,83 @@ Model::DeleteObjectsOutcome Client::DeleteObjects(const DeleteObjectsRequest & r
|
|||||||
return doRequest(request, [this](const Model::DeleteObjectsRequest & req) { return Aws::S3::S3Client::DeleteObjects(req); });
|
return doRequest(request, [this](const Model::DeleteObjectsRequest & req) { return Aws::S3::S3Client::DeleteObjects(req); });
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename RequestType, typename RequestFn>
|
||||||
|
std::invoke_result_t<RequestFn, RequestType>
|
||||||
|
Client::doRequest(const RequestType & request, RequestFn request_fn) const
|
||||||
|
{
|
||||||
|
const auto & bucket = request.GetBucket();
|
||||||
|
|
||||||
|
if (auto region = getRegionForBucket(bucket); !region.empty())
|
||||||
|
{
|
||||||
|
if (!detect_region)
|
||||||
|
LOG_INFO(log, "Using region override {} for bucket {}", region, bucket);
|
||||||
|
|
||||||
|
request.overrideRegion(std::move(region));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (auto uri = getURIForBucket(bucket); uri.has_value())
|
||||||
|
request.overrideURI(std::move(*uri));
|
||||||
|
|
||||||
|
|
||||||
|
bool found_new_endpoint = false;
|
||||||
|
// if we found correct endpoint after 301 responses, update the cache for future requests
|
||||||
|
SCOPE_EXIT(
|
||||||
|
if (found_new_endpoint)
|
||||||
|
{
|
||||||
|
auto uri_override = request.getURIOverride();
|
||||||
|
assert(uri_override.has_value());
|
||||||
|
updateURIForBucket(bucket, std::move(*uri_override));
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
for (size_t attempt = 0; attempt <= max_redirects; ++attempt)
|
||||||
|
{
|
||||||
|
auto result = request_fn(request);
|
||||||
|
if (result.IsSuccess())
|
||||||
|
return result;
|
||||||
|
|
||||||
|
const auto & error = result.GetError();
|
||||||
|
|
||||||
|
std::string new_region;
|
||||||
|
if (checkIfWrongRegionDefined(bucket, error, new_region))
|
||||||
|
{
|
||||||
|
request.overrideRegion(new_region);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (error.GetResponseCode() != Aws::Http::HttpResponseCode::MOVED_PERMANENTLY)
|
||||||
|
return result;
|
||||||
|
|
||||||
|
// maybe we detect a correct region
|
||||||
|
if (!detect_region)
|
||||||
|
{
|
||||||
|
if (auto region = GetErrorMarshaller()->ExtractRegion(error); !region.empty() && region != explicit_region)
|
||||||
|
{
|
||||||
|
request.overrideRegion(region);
|
||||||
|
insertRegionOverride(bucket, region);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// we possibly got new location, need to try with that one
|
||||||
|
auto new_uri = getURIFromError(error);
|
||||||
|
if (!new_uri)
|
||||||
|
return result;
|
||||||
|
|
||||||
|
const auto & current_uri_override = request.getURIOverride();
|
||||||
|
/// we already tried with this URI
|
||||||
|
if (current_uri_override && current_uri_override->uri == new_uri->uri)
|
||||||
|
{
|
||||||
|
LOG_INFO(log, "Getting redirected to the same invalid location {}", new_uri->uri.toString());
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
found_new_endpoint = true;
|
||||||
|
request.overrideURI(*new_uri);
|
||||||
|
}
|
||||||
|
|
||||||
|
throw Exception(ErrorCodes::TOO_MANY_REDIRECTS, "Too many redirects");
|
||||||
|
}
|
||||||
|
|
||||||
std::string Client::getRegionForBucket(const std::string & bucket, bool force_detect) const
|
std::string Client::getRegionForBucket(const std::string & bucket, bool force_detect) const
|
||||||
{
|
{
|
||||||
std::lock_guard lock(cache->region_cache_mutex);
|
std::lock_guard lock(cache->region_cache_mutex);
|
||||||
@ -458,8 +597,8 @@ std::unique_ptr<S3::Client> ClientFactory::create( // NOLINT
|
|||||||
client_configuration.retryStrategy = std::make_shared<Client::RetryStrategy>(std::move(client_configuration.retryStrategy));
|
client_configuration.retryStrategy = std::make_shared<Client::RetryStrategy>(std::move(client_configuration.retryStrategy));
|
||||||
return Client::create(
|
return Client::create(
|
||||||
client_configuration.s3_max_redirects,
|
client_configuration.s3_max_redirects,
|
||||||
std::move(credentials_provider),
|
credentials_provider,
|
||||||
std::move(client_configuration), // Client configuration.
|
client_configuration, // Client configuration.
|
||||||
Aws::Client::AWSAuthV4Signer::PayloadSigningPolicy::Never,
|
Aws::Client::AWSAuthV4Signer::PayloadSigningPolicy::Never,
|
||||||
is_virtual_hosted_style || client_configuration.endpointOverride.empty() /// Use virtual addressing if endpoint is not specified.
|
is_virtual_hosted_style || client_configuration.endpointOverride.empty() /// Use virtual addressing if endpoint is not specified.
|
||||||
);
|
);
|
||||||
|
@ -19,16 +19,7 @@
|
|||||||
#include <aws/core/client/AWSErrorMarshaller.h>
|
#include <aws/core/client/AWSErrorMarshaller.h>
|
||||||
#include <aws/core/client/RetryStrategy.h>
|
#include <aws/core/client/RetryStrategy.h>
|
||||||
|
|
||||||
namespace DB
|
namespace DB::S3
|
||||||
{
|
|
||||||
|
|
||||||
namespace ErrorCodes
|
|
||||||
{
|
|
||||||
extern const int LOGICAL_ERROR;
|
|
||||||
extern const int TOO_MANY_REDIRECTS;
|
|
||||||
}
|
|
||||||
|
|
||||||
namespace S3
|
|
||||||
{
|
{
|
||||||
|
|
||||||
namespace Model = Aws::S3::Model;
|
namespace Model = Aws::S3::Model;
|
||||||
@ -80,15 +71,25 @@ private:
|
|||||||
/// - automatically detect endpoint and regions for each bucket and cache them
|
/// - automatically detect endpoint and regions for each bucket and cache them
|
||||||
///
|
///
|
||||||
/// For this client to work correctly both Client::RetryStrategy and Requests defined in <IO/S3/Requests.h> should be used.
|
/// For this client to work correctly both Client::RetryStrategy and Requests defined in <IO/S3/Requests.h> should be used.
|
||||||
class Client : public Aws::S3::S3Client
|
///
|
||||||
|
/// To add support for new type of request
|
||||||
|
/// - ExtendedRequest should be defined inside IO/S3/Requests.h
|
||||||
|
/// - new method accepting that request should be defined in this Client (check other requests for reference)
|
||||||
|
/// - method handling the request from Aws::S3::S3Client should be left to private so we don't use it by accident
|
||||||
|
class Client : private Aws::S3::S3Client
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
template <typename... Args>
|
/// we use a factory method to verify arguments before creating a client because
|
||||||
static std::unique_ptr<Client> create(Args &&... args)
|
/// there are certain requirements on arguments for it to work correctly
|
||||||
{
|
/// e.g. Client::RetryStrategy should be used
|
||||||
(verifyArgument(args), ...);
|
static std::unique_ptr<Client> create(
|
||||||
return std::unique_ptr<Client>(new Client(std::forward<Args>(args)...));
|
size_t max_redirects_,
|
||||||
}
|
const std::shared_ptr<Aws::Auth::AWSCredentialsProvider> & credentials_provider,
|
||||||
|
const Aws::Client::ClientConfiguration & client_configuration,
|
||||||
|
Aws::Client::AWSAuthV4Signer::PayloadSigningPolicy sign_payloads,
|
||||||
|
bool use_virtual_addressing);
|
||||||
|
|
||||||
|
static std::unique_ptr<Client> create(const Client & other);
|
||||||
|
|
||||||
Client & operator=(const Client &) = delete;
|
Client & operator=(const Client &) = delete;
|
||||||
|
|
||||||
@ -108,7 +109,12 @@ public:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Decorator for RetryStrategy needed for this client to work correctly
|
/// Decorator for RetryStrategy needed for this client to work correctly.
|
||||||
|
/// We want to manually handle permanent moves (status code 301) because:
|
||||||
|
/// - redirect location is written in XML format inside the response body something that doesn't exist for HEAD
|
||||||
|
/// requests so we need to manually find the correct location
|
||||||
|
/// - we want to cache the new location to decrease number of roundtrips for future requests
|
||||||
|
/// This decorator doesn't retry if 301 is detected and fallbacks to the inner retry strategy otherwise.
|
||||||
class RetryStrategy : public Aws::Client::RetryStrategy
|
class RetryStrategy : public Aws::Client::RetryStrategy
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
@ -149,35 +155,19 @@ public:
|
|||||||
Model::DeleteObjectOutcome DeleteObject(const DeleteObjectRequest & request) const;
|
Model::DeleteObjectOutcome DeleteObject(const DeleteObjectRequest & request) const;
|
||||||
Model::DeleteObjectsOutcome DeleteObjects(const DeleteObjectsRequest & request) const;
|
Model::DeleteObjectsOutcome DeleteObjects(const DeleteObjectsRequest & request) const;
|
||||||
|
|
||||||
|
using Aws::S3::S3Client::EnableRequestProcessing;
|
||||||
|
using Aws::S3::S3Client::DisableRequestProcessing;
|
||||||
private:
|
private:
|
||||||
template <typename... Args>
|
Client(size_t max_redirects_,
|
||||||
explicit Client(size_t max_redirects_, Args &&... args)
|
const std::shared_ptr<Aws::Auth::AWSCredentialsProvider>& credentials_provider,
|
||||||
: Aws::S3::S3Client(std::forward<Args>(args)...)
|
const Aws::Client::ClientConfiguration& client_configuration,
|
||||||
, max_redirects(max_redirects_)
|
Aws::Client::AWSAuthV4Signer::PayloadSigningPolicy sign_payloads,
|
||||||
, log(&Poco::Logger::get("S3Client"))
|
bool use_virtual_addressing);
|
||||||
{
|
|
||||||
auto * endpoint_provider = dynamic_cast<Aws::S3::Endpoint::S3DefaultEpProviderBase *>(accessEndpointProvider().get());
|
|
||||||
endpoint_provider->GetBuiltInParameters().GetParameter("Region").GetString(explicit_region);
|
|
||||||
std::string endpoint;
|
|
||||||
endpoint_provider->GetBuiltInParameters().GetParameter("Endpoint").GetString(endpoint);
|
|
||||||
detect_region = explicit_region == Aws::Region::AWS_GLOBAL && endpoint.find(".amazonaws.com") != std::string::npos;
|
|
||||||
|
|
||||||
cache = std::make_shared<ClientCache>();
|
Client(const Client & other);
|
||||||
ClientCacheRegistry::instance().registerClient(cache);
|
|
||||||
}
|
|
||||||
|
|
||||||
Client(const Client & other)
|
/// Leave regular functions private so we don't accidentally use them
|
||||||
: Aws::S3::S3Client(other)
|
/// otherwise region and endpoint redirection won't work
|
||||||
, explicit_region(other.explicit_region)
|
|
||||||
, detect_region(other.detect_region)
|
|
||||||
, max_redirects(other.max_redirects)
|
|
||||||
, log(&Poco::Logger::get("S3Client"))
|
|
||||||
{
|
|
||||||
cache = std::make_shared<ClientCache>(*other.cache);
|
|
||||||
ClientCacheRegistry::instance().registerClient(cache);
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Make regular functions private
|
|
||||||
using Aws::S3::S3Client::HeadObject;
|
using Aws::S3::S3Client::HeadObject;
|
||||||
using Aws::S3::S3Client::ListObjectsV2;
|
using Aws::S3::S3Client::ListObjectsV2;
|
||||||
using Aws::S3::S3Client::ListObjects;
|
using Aws::S3::S3Client::ListObjects;
|
||||||
@ -196,80 +186,7 @@ private:
|
|||||||
|
|
||||||
template <typename RequestType, typename RequestFn>
|
template <typename RequestType, typename RequestFn>
|
||||||
std::invoke_result_t<RequestFn, RequestType>
|
std::invoke_result_t<RequestFn, RequestType>
|
||||||
doRequest(const RequestType & request, RequestFn request_fn) const
|
doRequest(const RequestType & request, RequestFn request_fn) const;
|
||||||
{
|
|
||||||
const auto & bucket = request.GetBucket();
|
|
||||||
|
|
||||||
if (auto region = getRegionForBucket(bucket); !region.empty())
|
|
||||||
{
|
|
||||||
if (!detect_region)
|
|
||||||
LOG_INFO(log, "Using region override {} for bucket {}", region, bucket);
|
|
||||||
|
|
||||||
request.overrideRegion(std::move(region));
|
|
||||||
}
|
|
||||||
|
|
||||||
if (auto uri = getURIForBucket(bucket); uri.has_value())
|
|
||||||
request.overrideURI(std::move(*uri));
|
|
||||||
|
|
||||||
|
|
||||||
bool found_new_endpoint = false;
|
|
||||||
// if we found correct endpoint after 301 responses, update the cache for future requests
|
|
||||||
SCOPE_EXIT(
|
|
||||||
if (found_new_endpoint)
|
|
||||||
{
|
|
||||||
auto uri_override = request.getURIOverride();
|
|
||||||
assert(uri_override.has_value());
|
|
||||||
updateURIForBucket(bucket, std::move(*uri_override));
|
|
||||||
}
|
|
||||||
);
|
|
||||||
|
|
||||||
for (size_t attempt = 0; attempt <= max_redirects; ++attempt)
|
|
||||||
{
|
|
||||||
auto result = request_fn(request);
|
|
||||||
if (result.IsSuccess())
|
|
||||||
return result;
|
|
||||||
|
|
||||||
const auto & error = result.GetError();
|
|
||||||
|
|
||||||
std::string new_region;
|
|
||||||
if (checkIfWrongRegionDefined(bucket, error, new_region))
|
|
||||||
{
|
|
||||||
request.overrideRegion(new_region);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (error.GetResponseCode() != Aws::Http::HttpResponseCode::MOVED_PERMANENTLY)
|
|
||||||
return result;
|
|
||||||
|
|
||||||
// maybe we detect a correct region
|
|
||||||
if (!detect_region)
|
|
||||||
{
|
|
||||||
if (auto region = GetErrorMarshaller()->ExtractRegion(error); !region.empty() && region != explicit_region)
|
|
||||||
{
|
|
||||||
request.overrideRegion(region);
|
|
||||||
insertRegionOverride(bucket, region);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// we possibly got new location, need to try with that one
|
|
||||||
auto new_uri = getURIFromError(error);
|
|
||||||
if (!new_uri)
|
|
||||||
return result;
|
|
||||||
|
|
||||||
const auto & current_uri_override = request.getURIOverride();
|
|
||||||
/// we already tried with this URI
|
|
||||||
if (current_uri_override && current_uri_override->uri == new_uri->uri)
|
|
||||||
{
|
|
||||||
LOG_INFO(log, "Getting redirected to the same invalid location {}", new_uri->uri.toString());
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
found_new_endpoint = true;
|
|
||||||
request.overrideURI(*new_uri);
|
|
||||||
}
|
|
||||||
|
|
||||||
throw Exception(ErrorCodes::TOO_MANY_REDIRECTS, "Too many redirects");
|
|
||||||
}
|
|
||||||
|
|
||||||
void updateURIForBucket(const std::string & bucket, S3::URI new_uri) const;
|
void updateURIForBucket(const std::string & bucket, S3::URI new_uri) const;
|
||||||
std::optional<S3::URI> getURIFromError(const Aws::S3::S3Error & error) const;
|
std::optional<S3::URI> getURIFromError(const Aws::S3::S3Error & error) const;
|
||||||
@ -281,19 +198,6 @@ private:
|
|||||||
bool checkIfWrongRegionDefined(const std::string & bucket, const Aws::S3::S3Error & error, std::string & region) const;
|
bool checkIfWrongRegionDefined(const std::string & bucket, const Aws::S3::S3Error & error, std::string & region) const;
|
||||||
void insertRegionOverride(const std::string & bucket, const std::string & region) const;
|
void insertRegionOverride(const std::string & bucket, const std::string & region) const;
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
static void verifyArgument(const T & /*arg*/)
|
|
||||||
{}
|
|
||||||
|
|
||||||
template <std::derived_from<Aws::Client::ClientConfiguration> T>
|
|
||||||
static void verifyArgument(const T & client_config)
|
|
||||||
{
|
|
||||||
if (!client_config.retryStrategy)
|
|
||||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "The S3 client can only be used with Client::RetryStrategy, define it in the client configuration");
|
|
||||||
|
|
||||||
assert_cast<const RetryStrategy &>(*client_config.retryStrategy);
|
|
||||||
}
|
|
||||||
|
|
||||||
std::string explicit_region;
|
std::string explicit_region;
|
||||||
mutable bool detect_region = true;
|
mutable bool detect_region = true;
|
||||||
|
|
||||||
@ -339,6 +243,4 @@ private:
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -42,7 +42,6 @@ namespace
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Performs a request to get the size and last modification time of an object.
|
/// Performs a request to get the size and last modification time of an object.
|
||||||
/// The function performs either HeadObject or GetObjectAttributes request depending on the endpoint.
|
|
||||||
std::pair<std::optional<ObjectInfo>, Aws::S3::S3Error> tryGetObjectInfo(
|
std::pair<std::optional<ObjectInfo>, Aws::S3::S3Error> tryGetObjectInfo(
|
||||||
const S3::Client & client, const String & bucket, const String & key, const String & version_id,
|
const S3::Client & client, const String & bucket, const String & key, const String & version_id,
|
||||||
const S3Settings::RequestSettings & /*request_settings*/, bool with_metadata, bool for_disk_s3)
|
const S3Settings::RequestSettings & /*request_settings*/, bool with_metadata, bool for_disk_s3)
|
||||||
@ -87,7 +86,7 @@ ObjectInfo getObjectInfo(
|
|||||||
else if (throw_on_error)
|
else if (throw_on_error)
|
||||||
{
|
{
|
||||||
throw DB::Exception(ErrorCodes::S3_ERROR,
|
throw DB::Exception(ErrorCodes::S3_ERROR,
|
||||||
"Failed to get object attributes: {}. HTTP response code: {}",
|
"Failed to get object info: {}. HTTP response code: {}",
|
||||||
error.GetMessage(), static_cast<size_t>(error.GetResponseCode()));
|
error.GetMessage(), static_cast<size_t>(error.GetResponseCode()));
|
||||||
}
|
}
|
||||||
return {};
|
return {};
|
||||||
|
@ -3795,6 +3795,12 @@ void Context::initializeBackgroundExecutorsIfNeeded()
|
|||||||
else if (config.has("profiles.default.background_merges_mutations_concurrency_ratio"))
|
else if (config.has("profiles.default.background_merges_mutations_concurrency_ratio"))
|
||||||
background_merges_mutations_concurrency_ratio = config.getUInt64("profiles.default.background_merges_mutations_concurrency_ratio");
|
background_merges_mutations_concurrency_ratio = config.getUInt64("profiles.default.background_merges_mutations_concurrency_ratio");
|
||||||
|
|
||||||
|
String background_merges_mutations_scheduling_policy = "round_robin";
|
||||||
|
if (config.has("background_merges_mutations_scheduling_policy"))
|
||||||
|
background_merges_mutations_scheduling_policy = config.getString("background_merges_mutations_scheduling_policy");
|
||||||
|
else if (config.has("profiles.default.background_merges_mutations_scheduling_policy"))
|
||||||
|
background_merges_mutations_scheduling_policy = config.getString("profiles.default.background_merges_mutations_scheduling_policy");
|
||||||
|
|
||||||
size_t background_move_pool_size = 8;
|
size_t background_move_pool_size = 8;
|
||||||
if (config.has("background_move_pool_size"))
|
if (config.has("background_move_pool_size"))
|
||||||
background_move_pool_size = config.getUInt64("background_move_pool_size");
|
background_move_pool_size = config.getUInt64("background_move_pool_size");
|
||||||
@ -3819,10 +3825,11 @@ void Context::initializeBackgroundExecutorsIfNeeded()
|
|||||||
"MergeMutate",
|
"MergeMutate",
|
||||||
/*max_threads_count*/background_pool_size,
|
/*max_threads_count*/background_pool_size,
|
||||||
/*max_tasks_count*/background_pool_size * background_merges_mutations_concurrency_ratio,
|
/*max_tasks_count*/background_pool_size * background_merges_mutations_concurrency_ratio,
|
||||||
CurrentMetrics::BackgroundMergesAndMutationsPoolTask
|
CurrentMetrics::BackgroundMergesAndMutationsPoolTask,
|
||||||
|
background_merges_mutations_scheduling_policy
|
||||||
);
|
);
|
||||||
LOG_INFO(shared->log, "Initialized background executor for merges and mutations with num_threads={}, num_tasks={}",
|
LOG_INFO(shared->log, "Initialized background executor for merges and mutations with num_threads={}, num_tasks={}, scheduling_policy={}",
|
||||||
background_pool_size, background_pool_size * background_merges_mutations_concurrency_ratio);
|
background_pool_size, background_pool_size * background_merges_mutations_concurrency_ratio, background_merges_mutations_scheduling_policy);
|
||||||
|
|
||||||
shared->moves_executor = std::make_shared<OrdinaryBackgroundExecutor>
|
shared->moves_executor = std::make_shared<OrdinaryBackgroundExecutor>
|
||||||
(
|
(
|
||||||
|
@ -131,11 +131,17 @@ class StoragePolicySelector;
|
|||||||
using StoragePolicySelectorPtr = std::shared_ptr<const StoragePolicySelector>;
|
using StoragePolicySelectorPtr = std::shared_ptr<const StoragePolicySelector>;
|
||||||
template <class Queue>
|
template <class Queue>
|
||||||
class MergeTreeBackgroundExecutor;
|
class MergeTreeBackgroundExecutor;
|
||||||
class MergeMutateRuntimeQueue;
|
|
||||||
class OrdinaryRuntimeQueue;
|
/// Scheduling policy can be changed using `background_merges_mutations_scheduling_policy` config option.
|
||||||
using MergeMutateBackgroundExecutor = MergeTreeBackgroundExecutor<MergeMutateRuntimeQueue>;
|
/// By default concurrent merges are scheduled using "round_robin" to ensure fair and starvation-free operation.
|
||||||
|
/// Previously in heavily overloaded shards big merges could possibly be starved by smaller
|
||||||
|
/// merges due to the use of strict priority scheduling "shortest_task_first".
|
||||||
|
class DynamicRuntimeQueue;
|
||||||
|
using MergeMutateBackgroundExecutor = MergeTreeBackgroundExecutor<DynamicRuntimeQueue>;
|
||||||
using MergeMutateBackgroundExecutorPtr = std::shared_ptr<MergeMutateBackgroundExecutor>;
|
using MergeMutateBackgroundExecutorPtr = std::shared_ptr<MergeMutateBackgroundExecutor>;
|
||||||
using OrdinaryBackgroundExecutor = MergeTreeBackgroundExecutor<OrdinaryRuntimeQueue>;
|
|
||||||
|
class RoundRobinRuntimeQueue;
|
||||||
|
using OrdinaryBackgroundExecutor = MergeTreeBackgroundExecutor<RoundRobinRuntimeQueue>;
|
||||||
using OrdinaryBackgroundExecutorPtr = std::shared_ptr<OrdinaryBackgroundExecutor>;
|
using OrdinaryBackgroundExecutorPtr = std::shared_ptr<OrdinaryBackgroundExecutor>;
|
||||||
struct PartUUIDs;
|
struct PartUUIDs;
|
||||||
using PartUUIDsPtr = std::shared_ptr<PartUUIDs>;
|
using PartUUIDsPtr = std::shared_ptr<PartUUIDs>;
|
||||||
|
@ -336,6 +336,8 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(const QueryTreeNodePtr & tabl
|
|||||||
{
|
{
|
||||||
auto subquery_options = select_query_options.subquery();
|
auto subquery_options = select_query_options.subquery();
|
||||||
Planner subquery_planner(table_expression, subquery_options, planner_context->getGlobalPlannerContext());
|
Planner subquery_planner(table_expression, subquery_options, planner_context->getGlobalPlannerContext());
|
||||||
|
/// Propagate storage limits to subquery
|
||||||
|
subquery_planner.addStorageLimits(*select_query_info.storage_limits);
|
||||||
subquery_planner.buildQueryPlanIfNeeded();
|
subquery_planner.buildQueryPlanIfNeeded();
|
||||||
query_plan = std::move(subquery_planner).extractQueryPlan();
|
query_plan = std::move(subquery_planner).extractQueryPlan();
|
||||||
}
|
}
|
||||||
|
@ -86,7 +86,9 @@ size_t tryDistinctReadInOrder(QueryPlan::Node * parent_node)
|
|||||||
|
|
||||||
/// update input order info in read_from_merge_tree step
|
/// update input order info in read_from_merge_tree step
|
||||||
const int direction = 0; /// for DISTINCT direction doesn't matter, ReadFromMergeTree will choose proper one
|
const int direction = 0; /// for DISTINCT direction doesn't matter, ReadFromMergeTree will choose proper one
|
||||||
read_from_merge_tree->requestReadingInOrder(number_of_sorted_distinct_columns, direction, pre_distinct->getLimitHint());
|
bool can_read = read_from_merge_tree->requestReadingInOrder(number_of_sorted_distinct_columns, direction, pre_distinct->getLimitHint());
|
||||||
|
if (!can_read)
|
||||||
|
return 0;
|
||||||
|
|
||||||
/// update data stream's sorting properties for found transforms
|
/// update data stream's sorting properties for found transforms
|
||||||
const DataStream * input_stream = &read_from_merge_tree->getOutputStream();
|
const DataStream * input_stream = &read_from_merge_tree->getOutputStream();
|
||||||
|
@ -913,7 +913,7 @@ AggregationInputOrder buildInputOrderInfo(
|
|||||||
}
|
}
|
||||||
|
|
||||||
InputOrderInfoPtr buildInputOrderInfo(
|
InputOrderInfoPtr buildInputOrderInfo(
|
||||||
ReadFromMergeTree * reading,
|
const ReadFromMergeTree * reading,
|
||||||
const FixedColumns & fixed_columns,
|
const FixedColumns & fixed_columns,
|
||||||
const ActionsDAGPtr & dag,
|
const ActionsDAGPtr & dag,
|
||||||
const SortDescription & description,
|
const SortDescription & description,
|
||||||
@ -1041,7 +1041,11 @@ InputOrderInfoPtr buildInputOrderInfo(SortingStep & sorting, QueryPlan::Node & n
|
|||||||
limit);
|
limit);
|
||||||
|
|
||||||
if (order_info)
|
if (order_info)
|
||||||
reading->requestReadingInOrder(order_info->used_prefix_of_sorting_key_size, order_info->direction, order_info->limit);
|
{
|
||||||
|
bool can_read = reading->requestReadingInOrder(order_info->used_prefix_of_sorting_key_size, order_info->direction, order_info->limit);
|
||||||
|
if (!can_read)
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
return order_info;
|
return order_info;
|
||||||
}
|
}
|
||||||
@ -1054,7 +1058,11 @@ InputOrderInfoPtr buildInputOrderInfo(SortingStep & sorting, QueryPlan::Node & n
|
|||||||
limit);
|
limit);
|
||||||
|
|
||||||
if (order_info)
|
if (order_info)
|
||||||
merge->requestReadingInOrder(order_info);
|
{
|
||||||
|
bool can_read = merge->requestReadingInOrder(order_info);
|
||||||
|
if (!can_read)
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
return order_info;
|
return order_info;
|
||||||
}
|
}
|
||||||
@ -1086,10 +1094,14 @@ AggregationInputOrder buildInputOrderInfo(AggregatingStep & aggregating, QueryPl
|
|||||||
dag, keys);
|
dag, keys);
|
||||||
|
|
||||||
if (order_info.input_order)
|
if (order_info.input_order)
|
||||||
reading->requestReadingInOrder(
|
{
|
||||||
|
bool can_read = reading->requestReadingInOrder(
|
||||||
order_info.input_order->used_prefix_of_sorting_key_size,
|
order_info.input_order->used_prefix_of_sorting_key_size,
|
||||||
order_info.input_order->direction,
|
order_info.input_order->direction,
|
||||||
order_info.input_order->limit);
|
order_info.input_order->limit);
|
||||||
|
if (!can_read)
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
|
||||||
return order_info;
|
return order_info;
|
||||||
}
|
}
|
||||||
@ -1101,7 +1113,11 @@ AggregationInputOrder buildInputOrderInfo(AggregatingStep & aggregating, QueryPl
|
|||||||
dag, keys);
|
dag, keys);
|
||||||
|
|
||||||
if (order_info.input_order)
|
if (order_info.input_order)
|
||||||
merge->requestReadingInOrder(order_info.input_order);
|
{
|
||||||
|
bool can_read = merge->requestReadingInOrder(order_info.input_order);
|
||||||
|
if (!can_read)
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
|
||||||
return order_info;
|
return order_info;
|
||||||
}
|
}
|
||||||
@ -1296,7 +1312,9 @@ size_t tryReuseStorageOrderingForWindowFunctions(QueryPlan::Node * parent_node,
|
|||||||
|
|
||||||
if (order_info)
|
if (order_info)
|
||||||
{
|
{
|
||||||
read_from_merge_tree->requestReadingInOrder(order_info->used_prefix_of_sorting_key_size, order_info->direction, order_info->limit);
|
bool can_read = read_from_merge_tree->requestReadingInOrder(order_info->used_prefix_of_sorting_key_size, order_info->direction, order_info->limit);
|
||||||
|
if (!can_read)
|
||||||
|
return 0;
|
||||||
sorting->convertToFinishSorting(order_info->sort_description_for_merging);
|
sorting->convertToFinishSorting(order_info->sort_description_for_merging);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -283,7 +283,6 @@ Pipe ReadFromMergeTree::readFromPool(
|
|||||||
total_rows = query_info.limit;
|
total_rows = query_info.limit;
|
||||||
|
|
||||||
const auto & settings = context->getSettingsRef();
|
const auto & settings = context->getSettingsRef();
|
||||||
MergeTreeReadPool::BackoffSettings backoff_settings(settings);
|
|
||||||
|
|
||||||
/// round min_marks_to_read up to nearest multiple of block_size expressed in marks
|
/// round min_marks_to_read up to nearest multiple of block_size expressed in marks
|
||||||
/// If granularity is adaptive it doesn't make sense
|
/// If granularity is adaptive it doesn't make sense
|
||||||
@ -295,18 +294,54 @@ Pipe ReadFromMergeTree::readFromPool(
|
|||||||
/ max_block_size * max_block_size / fixed_index_granularity;
|
/ max_block_size * max_block_size / fixed_index_granularity;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto pool = std::make_shared<MergeTreeReadPool>(
|
bool all_parts_are_remote = true;
|
||||||
max_streams,
|
bool all_parts_are_local = true;
|
||||||
sum_marks,
|
for (const auto & part : parts_with_range)
|
||||||
min_marks_for_concurrent_read,
|
{
|
||||||
std::move(parts_with_range),
|
const bool is_remote = part.data_part->isStoredOnRemoteDisk();
|
||||||
storage_snapshot,
|
all_parts_are_local &= !is_remote;
|
||||||
prewhere_info,
|
all_parts_are_remote &= is_remote;
|
||||||
required_columns,
|
}
|
||||||
virt_column_names,
|
|
||||||
backoff_settings,
|
MergeTreeReadPoolPtr pool;
|
||||||
settings.preferred_block_size_bytes,
|
|
||||||
false);
|
if ((all_parts_are_remote
|
||||||
|
&& settings.allow_prefetched_read_pool_for_remote_filesystem
|
||||||
|
&& MergeTreePrefetchedReadPool::checkReadMethodAllowed(reader_settings.read_settings.remote_fs_method))
|
||||||
|
|| (!all_parts_are_local
|
||||||
|
&& settings.allow_prefetched_read_pool_for_local_filesystem
|
||||||
|
&& MergeTreePrefetchedReadPool::checkReadMethodAllowed(reader_settings.read_settings.remote_fs_method)))
|
||||||
|
{
|
||||||
|
pool = std::make_shared<MergeTreePrefetchedReadPool>(
|
||||||
|
max_streams,
|
||||||
|
sum_marks,
|
||||||
|
min_marks_for_concurrent_read,
|
||||||
|
std::move(parts_with_range),
|
||||||
|
storage_snapshot,
|
||||||
|
prewhere_info,
|
||||||
|
required_columns,
|
||||||
|
virt_column_names,
|
||||||
|
settings.preferred_block_size_bytes,
|
||||||
|
reader_settings,
|
||||||
|
context,
|
||||||
|
use_uncompressed_cache,
|
||||||
|
all_parts_are_remote,
|
||||||
|
*data.getSettings());
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
pool = std::make_shared<MergeTreeReadPool>(
|
||||||
|
max_streams,
|
||||||
|
sum_marks,
|
||||||
|
min_marks_for_concurrent_read,
|
||||||
|
std::move(parts_with_range),
|
||||||
|
storage_snapshot,
|
||||||
|
prewhere_info,
|
||||||
|
required_columns,
|
||||||
|
virt_column_names,
|
||||||
|
context,
|
||||||
|
false);
|
||||||
|
}
|
||||||
|
|
||||||
auto * logger = &Poco::Logger::get(data.getLogName() + " (SelectExecutor)");
|
auto * logger = &Poco::Logger::get(data.getLogName() + " (SelectExecutor)");
|
||||||
LOG_DEBUG(logger, "Reading approx. {} rows with {} streams", total_rows, max_streams);
|
LOG_DEBUG(logger, "Reading approx. {} rows with {} streams", total_rows, max_streams);
|
||||||
@ -1176,8 +1211,6 @@ MergeTreeDataSelectAnalysisResultPtr ReadFromMergeTree::selectRangesToReadImpl(
|
|||||||
if (key_condition->alwaysFalse())
|
if (key_condition->alwaysFalse())
|
||||||
return std::make_shared<MergeTreeDataSelectAnalysisResult>(MergeTreeDataSelectAnalysisResult{.result = std::move(result)});
|
return std::make_shared<MergeTreeDataSelectAnalysisResult>(MergeTreeDataSelectAnalysisResult{.result = std::move(result)});
|
||||||
|
|
||||||
const auto & select = query_info.query->as<ASTSelectQuery &>();
|
|
||||||
|
|
||||||
size_t total_marks_pk = 0;
|
size_t total_marks_pk = 0;
|
||||||
size_t parts_before_pk = 0;
|
size_t parts_before_pk = 0;
|
||||||
try
|
try
|
||||||
@ -1214,11 +1247,7 @@ MergeTreeDataSelectAnalysisResultPtr ReadFromMergeTree::selectRangesToReadImpl(
|
|||||||
auto reader_settings = getMergeTreeReaderSettings(context, query_info);
|
auto reader_settings = getMergeTreeReaderSettings(context, query_info);
|
||||||
|
|
||||||
bool use_skip_indexes = settings.use_skip_indexes;
|
bool use_skip_indexes = settings.use_skip_indexes;
|
||||||
bool final = false;
|
bool final = isFinal(query_info);
|
||||||
if (query_info.table_expression_modifiers)
|
|
||||||
final = query_info.table_expression_modifiers->hasFinal();
|
|
||||||
else
|
|
||||||
final = select.final();
|
|
||||||
|
|
||||||
if (final && !settings.use_skip_indexes_if_final)
|
if (final && !settings.use_skip_indexes_if_final)
|
||||||
use_skip_indexes = false;
|
use_skip_indexes = false;
|
||||||
@ -1273,12 +1302,17 @@ MergeTreeDataSelectAnalysisResultPtr ReadFromMergeTree::selectRangesToReadImpl(
|
|||||||
return std::make_shared<MergeTreeDataSelectAnalysisResult>(MergeTreeDataSelectAnalysisResult{.result = std::move(result)});
|
return std::make_shared<MergeTreeDataSelectAnalysisResult>(MergeTreeDataSelectAnalysisResult{.result = std::move(result)});
|
||||||
}
|
}
|
||||||
|
|
||||||
void ReadFromMergeTree::requestReadingInOrder(size_t prefix_size, int direction, size_t limit)
|
bool ReadFromMergeTree::requestReadingInOrder(size_t prefix_size, int direction, size_t limit)
|
||||||
{
|
{
|
||||||
/// if dirction is not set, use current one
|
/// if dirction is not set, use current one
|
||||||
if (!direction)
|
if (!direction)
|
||||||
direction = getSortDirection();
|
direction = getSortDirection();
|
||||||
|
|
||||||
|
/// Disable read-in-order optimization for reverse order with final.
|
||||||
|
/// Otherwise, it can lead to incorrect final behavior because the implementation may rely on the reading in direct order).
|
||||||
|
if (direction != 1 && isFinal(query_info))
|
||||||
|
return false;
|
||||||
|
|
||||||
auto order_info = std::make_shared<InputOrderInfo>(SortDescription{}, prefix_size, direction, limit);
|
auto order_info = std::make_shared<InputOrderInfo>(SortDescription{}, prefix_size, direction, limit);
|
||||||
if (query_info.projection)
|
if (query_info.projection)
|
||||||
query_info.projection->input_order_info = order_info;
|
query_info.projection->input_order_info = order_info;
|
||||||
@ -1312,6 +1346,8 @@ void ReadFromMergeTree::requestReadingInOrder(size_t prefix_size, int direction,
|
|||||||
output_stream->sort_description = std::move(sort_description);
|
output_stream->sort_description = std::move(sort_description);
|
||||||
output_stream->sort_scope = DataStream::SortScope::Stream;
|
output_stream->sort_scope = DataStream::SortScope::Stream;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
ReadFromMergeTree::AnalysisResult ReadFromMergeTree::getAnalysisResult() const
|
ReadFromMergeTree::AnalysisResult ReadFromMergeTree::getAnalysisResult() const
|
||||||
@ -1358,12 +1394,7 @@ void ReadFromMergeTree::initializePipeline(QueryPipelineBuilder & pipeline, cons
|
|||||||
ActionsDAGPtr result_projection;
|
ActionsDAGPtr result_projection;
|
||||||
|
|
||||||
Names column_names_to_read = std::move(result.column_names_to_read);
|
Names column_names_to_read = std::move(result.column_names_to_read);
|
||||||
const auto & select = query_info.query->as<ASTSelectQuery &>();
|
bool final = isFinal(query_info);
|
||||||
bool final = false;
|
|
||||||
if (query_info.table_expression_modifiers)
|
|
||||||
final = query_info.table_expression_modifiers->hasFinal();
|
|
||||||
else
|
|
||||||
final = select.final();
|
|
||||||
|
|
||||||
if (!final && result.sampling.use_sampling)
|
if (!final && result.sampling.use_sampling)
|
||||||
{
|
{
|
||||||
@ -1672,6 +1703,15 @@ void ReadFromMergeTree::describeIndexes(JSONBuilder::JSONMap & map) const
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool ReadFromMergeTree::isFinal(const SelectQueryInfo & query_info)
|
||||||
|
{
|
||||||
|
if (query_info.table_expression_modifiers)
|
||||||
|
return query_info.table_expression_modifiers->hasFinal();
|
||||||
|
|
||||||
|
const auto & select = query_info.query->as<ASTSelectQuery &>();
|
||||||
|
return select.final();
|
||||||
|
}
|
||||||
|
|
||||||
bool MergeTreeDataSelectAnalysisResult::error() const
|
bool MergeTreeDataSelectAnalysisResult::error() const
|
||||||
{
|
{
|
||||||
return std::holds_alternative<std::exception_ptr>(result);
|
return std::holds_alternative<std::exception_ptr>(result);
|
||||||
|
@ -158,7 +158,10 @@ public:
|
|||||||
StorageMetadataPtr getStorageMetadata() const { return metadata_for_reading; }
|
StorageMetadataPtr getStorageMetadata() const { return metadata_for_reading; }
|
||||||
const PrewhereInfo * getPrewhereInfo() const { return prewhere_info.get(); }
|
const PrewhereInfo * getPrewhereInfo() const { return prewhere_info.get(); }
|
||||||
|
|
||||||
void requestReadingInOrder(size_t prefix_size, int direction, size_t limit);
|
/// Returns `false` if requested reading cannot be performed.
|
||||||
|
bool requestReadingInOrder(size_t prefix_size, int direction, size_t limit);
|
||||||
|
|
||||||
|
static bool isFinal(const SelectQueryInfo & query_info);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
static MergeTreeDataSelectAnalysisResultPtr selectRangesToReadImpl(
|
static MergeTreeDataSelectAnalysisResultPtr selectRangesToReadImpl(
|
||||||
|
@ -555,7 +555,7 @@ void AggregatingTransform::initGenerate()
|
|||||||
double elapsed_seconds = watch.elapsedSeconds();
|
double elapsed_seconds = watch.elapsedSeconds();
|
||||||
size_t rows = variants.sizeWithoutOverflowRow();
|
size_t rows = variants.sizeWithoutOverflowRow();
|
||||||
|
|
||||||
LOG_DEBUG(log, "Aggregated. {} to {} rows (from {}) in {} sec. ({:.3f} rows/sec., {}/sec.)",
|
LOG_TRACE(log, "Aggregated. {} to {} rows (from {}) in {} sec. ({:.3f} rows/sec., {}/sec.)",
|
||||||
src_rows, rows, ReadableSize(src_bytes),
|
src_rows, rows, ReadableSize(src_bytes),
|
||||||
elapsed_seconds, src_rows / elapsed_seconds,
|
elapsed_seconds, src_rows / elapsed_seconds,
|
||||||
ReadableSize(src_bytes / elapsed_seconds));
|
ReadableSize(src_bytes / elapsed_seconds));
|
||||||
|
@ -1415,7 +1415,6 @@ void WindowTransform::work()
|
|||||||
assert(prev_frame_start <= frame_start);
|
assert(prev_frame_start <= frame_start);
|
||||||
const auto first_used_block = std::min(next_output_block_number,
|
const auto first_used_block = std::min(next_output_block_number,
|
||||||
std::min(prev_frame_start.block, current_row.block));
|
std::min(prev_frame_start.block, current_row.block));
|
||||||
|
|
||||||
if (first_block_number < first_used_block)
|
if (first_block_number < first_used_block)
|
||||||
{
|
{
|
||||||
// fmt::print(stderr, "will drop blocks from {} to {}\n", first_block_number,
|
// fmt::print(stderr, "will drop blocks from {} to {}\n", first_block_number,
|
||||||
@ -1970,6 +1969,147 @@ struct WindowFunctionRowNumber final : public WindowFunction
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Usage: ntile(n). n is the number of buckets.
|
||||||
|
struct WindowFunctionNtile final : public WindowFunction
|
||||||
|
{
|
||||||
|
WindowFunctionNtile(const std::string & name_,
|
||||||
|
const DataTypes & argument_types_, const Array & parameters_)
|
||||||
|
: WindowFunction(name_, argument_types_, parameters_, std::make_shared<DataTypeUInt64>())
|
||||||
|
{
|
||||||
|
if (argument_types.size() != 1)
|
||||||
|
{
|
||||||
|
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Function {} takes exactly one parameter", name_);
|
||||||
|
}
|
||||||
|
auto type_id = argument_types[0]->getTypeId();
|
||||||
|
if (type_id != TypeIndex::UInt8 && type_id != TypeIndex::UInt16 && type_id != TypeIndex::UInt32 && type_id != TypeIndex::UInt32 && type_id != TypeIndex::UInt64)
|
||||||
|
{
|
||||||
|
throw Exception(ErrorCodes::BAD_ARGUMENTS, "ntile's argument type must be an unsigned integer (not larger then 64-bit), but got {}", argument_types[0]->getName());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool allocatesMemoryInArena() const override { return false; }
|
||||||
|
|
||||||
|
void windowInsertResultInto(const WindowTransform * transform,
|
||||||
|
size_t function_index) override
|
||||||
|
{
|
||||||
|
if (!buckets) [[unlikely]]
|
||||||
|
{
|
||||||
|
checkWindowFrameType(transform);
|
||||||
|
const auto & current_block = transform->blockAt(transform->current_row);
|
||||||
|
const auto & workspace = transform->workspaces[function_index];
|
||||||
|
const auto & arg_col = *current_block.original_input_columns[workspace.argument_column_indices[0]];
|
||||||
|
if (!isColumnConst(arg_col))
|
||||||
|
throw Exception(ErrorCodes::BAD_ARGUMENTS, "ntile's argument must be a constant");
|
||||||
|
auto type_id = argument_types[0]->getTypeId();
|
||||||
|
if (type_id == TypeIndex::UInt8)
|
||||||
|
buckets = arg_col[transform->current_row.row].get<UInt8>();
|
||||||
|
else if (type_id == TypeIndex::UInt16)
|
||||||
|
buckets = arg_col[transform->current_row.row].get<UInt16>();
|
||||||
|
else if (type_id == TypeIndex::UInt32)
|
||||||
|
buckets = arg_col[transform->current_row.row].get<UInt32>();
|
||||||
|
else if (type_id == TypeIndex::UInt64)
|
||||||
|
buckets = arg_col[transform->current_row.row].get<UInt64>();
|
||||||
|
|
||||||
|
if (!buckets)
|
||||||
|
{
|
||||||
|
throw Exception(ErrorCodes::BAD_ARGUMENTS, "ntile's argument must > 0");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// new partition
|
||||||
|
if (transform->current_row_number == 1) [[unlikely]]
|
||||||
|
{
|
||||||
|
current_partition_rows = 0;
|
||||||
|
current_partition_inserted_row = 0;
|
||||||
|
start_row = transform->current_row;
|
||||||
|
}
|
||||||
|
current_partition_rows++;
|
||||||
|
|
||||||
|
// Only do the action when we meet the last row in this partition.
|
||||||
|
if (!transform->partition_ended)
|
||||||
|
return;
|
||||||
|
else
|
||||||
|
{
|
||||||
|
auto current_row = transform->current_row;
|
||||||
|
current_row.row++;
|
||||||
|
const auto & end_row = transform->partition_end;
|
||||||
|
if (current_row != end_row)
|
||||||
|
{
|
||||||
|
|
||||||
|
if (current_row.row < transform->blockRowsNumber(current_row))
|
||||||
|
return;
|
||||||
|
if (end_row.block != current_row.block + 1 || end_row.row)
|
||||||
|
{
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
// else, current_row is the last input row.
|
||||||
|
}
|
||||||
|
}
|
||||||
|
auto bucket_capacity = current_partition_rows / buckets;
|
||||||
|
auto capacity_diff = current_partition_rows - bucket_capacity * buckets;
|
||||||
|
|
||||||
|
// bucket number starts from 1.
|
||||||
|
UInt64 bucket_num = 1;
|
||||||
|
while (current_partition_inserted_row < current_partition_rows)
|
||||||
|
{
|
||||||
|
auto current_bucket_capacity = bucket_capacity;
|
||||||
|
if (capacity_diff > 0)
|
||||||
|
{
|
||||||
|
current_bucket_capacity += 1;
|
||||||
|
capacity_diff--;
|
||||||
|
}
|
||||||
|
auto left_rows = current_bucket_capacity;
|
||||||
|
while (left_rows)
|
||||||
|
{
|
||||||
|
auto available_block_rows = transform->blockRowsNumber(start_row) - start_row.row;
|
||||||
|
IColumn & to = *transform->blockAt(start_row).output_columns[function_index];
|
||||||
|
auto & pod_array = assert_cast<ColumnUInt64 &>(to).getData();
|
||||||
|
if (left_rows < available_block_rows)
|
||||||
|
{
|
||||||
|
pod_array.resize_fill(pod_array.size() + left_rows, bucket_num);
|
||||||
|
start_row.row += left_rows;
|
||||||
|
left_rows = 0;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
pod_array.resize_fill(pod_array.size() + available_block_rows, bucket_num);
|
||||||
|
left_rows -= available_block_rows;
|
||||||
|
start_row.block++;
|
||||||
|
start_row.row = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
current_partition_inserted_row += current_bucket_capacity;
|
||||||
|
bucket_num += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
private:
|
||||||
|
UInt64 buckets = 0;
|
||||||
|
RowNumber start_row;
|
||||||
|
UInt64 current_partition_rows = 0;
|
||||||
|
UInt64 current_partition_inserted_row = 0;
|
||||||
|
|
||||||
|
static void checkWindowFrameType(const WindowTransform * transform)
|
||||||
|
{
|
||||||
|
if (transform->order_by_indices.empty())
|
||||||
|
throw Exception(ErrorCodes::BAD_ARGUMENTS, "ntile's window frame must have order by clause");
|
||||||
|
if (transform->window_description.frame.type != WindowFrame::FrameType::ROWS)
|
||||||
|
{
|
||||||
|
throw Exception(ErrorCodes::BAD_ARGUMENTS, "ntile's frame type must be ROWS");
|
||||||
|
}
|
||||||
|
if (transform->window_description.frame.begin_type != WindowFrame::BoundaryType::Unbounded)
|
||||||
|
{
|
||||||
|
throw Exception(ErrorCodes::BAD_ARGUMENTS, "ntile's frame start type must be UNBOUNDED PRECEDING");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (transform->window_description.frame.end_type != WindowFrame::BoundaryType::Unbounded)
|
||||||
|
{
|
||||||
|
// We must wait all for the partition end and get the total rows number in this
|
||||||
|
// partition. So before the end of this partition, there is no any block could be
|
||||||
|
// dropped out.
|
||||||
|
throw Exception(ErrorCodes::BAD_ARGUMENTS, "ntile's frame end type must be UNBOUNDED FOLLOWING");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
// ClickHouse-specific variant of lag/lead that respects the window frame.
|
// ClickHouse-specific variant of lag/lead that respects the window frame.
|
||||||
template <bool is_lead>
|
template <bool is_lead>
|
||||||
struct WindowFunctionLagLeadInFrame final : public WindowFunction
|
struct WindowFunctionLagLeadInFrame final : public WindowFunction
|
||||||
@ -2338,6 +2478,13 @@ void registerWindowFunctions(AggregateFunctionFactory & factory)
|
|||||||
parameters);
|
parameters);
|
||||||
}, properties}, AggregateFunctionFactory::CaseInsensitive);
|
}, properties}, AggregateFunctionFactory::CaseInsensitive);
|
||||||
|
|
||||||
|
factory.registerFunction("ntile", {[](const std::string & name,
|
||||||
|
const DataTypes & argument_types, const Array & parameters, const Settings *)
|
||||||
|
{
|
||||||
|
return std::make_shared<WindowFunctionNtile>(name, argument_types,
|
||||||
|
parameters);
|
||||||
|
}, properties}, AggregateFunctionFactory::CaseInsensitive);
|
||||||
|
|
||||||
factory.registerFunction("nth_value", {[](const std::string & name,
|
factory.registerFunction("nth_value", {[](const std::string & name,
|
||||||
const DataTypes & argument_types, const Array & parameters, const Settings *)
|
const DataTypes & argument_types, const Array & parameters, const Settings *)
|
||||||
{
|
{
|
||||||
|
29
src/Storages/MergeTree/IMergeTreeReadPool.h
Normal file
29
src/Storages/MergeTree/IMergeTreeReadPool.h
Normal file
@ -0,0 +1,29 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <boost/noncopyable.hpp>
|
||||||
|
#include <Core/Block.h>
|
||||||
|
#include <IO/ReadBufferFromFileBase.h>
|
||||||
|
#include <Storages/MergeTree/MergeTreeData.h>
|
||||||
|
|
||||||
|
|
||||||
|
namespace DB
|
||||||
|
{
|
||||||
|
struct MergeTreeReadTask;
|
||||||
|
using MergeTreeReadTaskPtr = std::unique_ptr<MergeTreeReadTask>;
|
||||||
|
|
||||||
|
|
||||||
|
class IMergeTreeReadPool : private boost::noncopyable
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
virtual ~IMergeTreeReadPool() = default;
|
||||||
|
|
||||||
|
virtual Block getHeader() const = 0;
|
||||||
|
|
||||||
|
virtual MergeTreeReadTaskPtr getTask(size_t thread) = 0;
|
||||||
|
|
||||||
|
virtual void profileFeedback(ReadBufferFromFileBase::ProfileInfo info) = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
using MergeTreeReadPoolPtr = std::shared_ptr<IMergeTreeReadPool>;
|
||||||
|
|
||||||
|
}
|
@ -268,7 +268,8 @@ void MergeTreeBackgroundExecutor<Queue>::threadFunction()
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
template class MergeTreeBackgroundExecutor<MergeMutateRuntimeQueue>;
|
template class MergeTreeBackgroundExecutor<RoundRobinRuntimeQueue>;
|
||||||
template class MergeTreeBackgroundExecutor<OrdinaryRuntimeQueue>;
|
template class MergeTreeBackgroundExecutor<PriorityRuntimeQueue>;
|
||||||
|
template class MergeTreeBackgroundExecutor<DynamicRuntimeQueue>;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -6,7 +6,9 @@
|
|||||||
#include <future>
|
#include <future>
|
||||||
#include <condition_variable>
|
#include <condition_variable>
|
||||||
#include <set>
|
#include <set>
|
||||||
#include <iostream>
|
#include <variant>
|
||||||
|
#include <utility>
|
||||||
|
|
||||||
|
|
||||||
#include <boost/circular_buffer.hpp>
|
#include <boost/circular_buffer.hpp>
|
||||||
#include <boost/noncopyable.hpp>
|
#include <boost/noncopyable.hpp>
|
||||||
@ -17,6 +19,7 @@
|
|||||||
#include <base/defines.h>
|
#include <base/defines.h>
|
||||||
#include <Storages/MergeTree/IExecutableTask.h>
|
#include <Storages/MergeTree/IExecutableTask.h>
|
||||||
|
|
||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
{
|
{
|
||||||
namespace ErrorCodes
|
namespace ErrorCodes
|
||||||
@ -67,8 +70,8 @@ struct TaskRuntimeData
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/// Simplest First-in-First-out queue, ignores priority.
|
||||||
class OrdinaryRuntimeQueue
|
class RoundRobinRuntimeQueue
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
TaskRuntimeDataPtr pop()
|
TaskRuntimeDataPtr pop()
|
||||||
@ -78,24 +81,29 @@ public:
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
void push(TaskRuntimeDataPtr item) { queue.push_back(std::move(item));}
|
void push(TaskRuntimeDataPtr item)
|
||||||
|
{
|
||||||
|
queue.push_back(std::move(item));
|
||||||
|
}
|
||||||
|
|
||||||
void remove(StorageID id)
|
void remove(StorageID id)
|
||||||
{
|
{
|
||||||
auto it = std::remove_if(queue.begin(), queue.end(),
|
auto it = std::remove_if(queue.begin(), queue.end(),
|
||||||
[&] (auto item) -> bool { return item->task->getStorageID() == id; });
|
[&] (auto && item) -> bool { return item->task->getStorageID() == id; });
|
||||||
queue.erase(it, queue.end());
|
queue.erase(it, queue.end());
|
||||||
}
|
}
|
||||||
|
|
||||||
void setCapacity(size_t count) { queue.set_capacity(count); }
|
void setCapacity(size_t count) { queue.set_capacity(count); }
|
||||||
bool empty() { return queue.empty(); }
|
bool empty() { return queue.empty(); }
|
||||||
|
|
||||||
|
static constexpr std::string_view name = "round_robin";
|
||||||
|
|
||||||
private:
|
private:
|
||||||
boost::circular_buffer<TaskRuntimeDataPtr> queue{0};
|
boost::circular_buffer<TaskRuntimeDataPtr> queue{0};
|
||||||
};
|
};
|
||||||
|
|
||||||
/// Uses a heap to pop a task with minimal priority
|
/// Uses a heap to pop a task with minimal priority.
|
||||||
class MergeMutateRuntimeQueue
|
class PriorityRuntimeQueue
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
TaskRuntimeDataPtr pop()
|
TaskRuntimeDataPtr pop()
|
||||||
@ -115,20 +123,89 @@ public:
|
|||||||
|
|
||||||
void remove(StorageID id)
|
void remove(StorageID id)
|
||||||
{
|
{
|
||||||
auto it = std::remove_if(buffer.begin(), buffer.end(),
|
std::erase_if(buffer, [&] (auto && item) -> bool { return item->task->getStorageID() == id; });
|
||||||
[&] (auto item) -> bool { return item->task->getStorageID() == id; });
|
|
||||||
buffer.erase(it, buffer.end());
|
|
||||||
|
|
||||||
std::make_heap(buffer.begin(), buffer.end(), TaskRuntimeData::comparePtrByPriority);
|
std::make_heap(buffer.begin(), buffer.end(), TaskRuntimeData::comparePtrByPriority);
|
||||||
}
|
}
|
||||||
|
|
||||||
void setCapacity(size_t count) { buffer.reserve(count); }
|
void setCapacity(size_t count) { buffer.reserve(count); }
|
||||||
bool empty() { return buffer.empty(); }
|
bool empty() { return buffer.empty(); }
|
||||||
|
|
||||||
|
static constexpr std::string_view name = "shortest_task_first";
|
||||||
|
|
||||||
private:
|
private:
|
||||||
std::vector<TaskRuntimeDataPtr> buffer{};
|
std::vector<TaskRuntimeDataPtr> buffer;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/// Queue that can dynamically change scheduling policy
|
||||||
|
template <class ... Policies>
|
||||||
|
class DynamicRuntimeQueueImpl
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
TaskRuntimeDataPtr pop()
|
||||||
|
{
|
||||||
|
return std::visit<TaskRuntimeDataPtr>([&] (auto && queue) { return queue.pop(); }, impl);
|
||||||
|
}
|
||||||
|
|
||||||
|
void push(TaskRuntimeDataPtr item)
|
||||||
|
{
|
||||||
|
std::visit([&] (auto && queue) { queue.push(std::move(item)); }, impl);
|
||||||
|
}
|
||||||
|
|
||||||
|
void remove(StorageID id)
|
||||||
|
{
|
||||||
|
std::visit([&] (auto && queue) { queue.remove(id); }, impl);
|
||||||
|
}
|
||||||
|
|
||||||
|
void setCapacity(size_t count)
|
||||||
|
{
|
||||||
|
capacity = count;
|
||||||
|
std::visit([&] (auto && queue) { queue.setCapacity(count); }, impl);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool empty()
|
||||||
|
{
|
||||||
|
return std::visit<bool>([&] (auto && queue) { return queue.empty(); }, impl);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Change policy. It does nothing if new policy is unknown or equals current policy.
|
||||||
|
void updatePolicy(std::string_view name)
|
||||||
|
{
|
||||||
|
// We use this double lambda trick to generate code for all possible pairs of types of old and new queue.
|
||||||
|
// If types are different it moves tasks from old queue to new one using corresponding pop() and push()
|
||||||
|
resolve<Policies...>(name, [&] <class NewQueue> (std::in_place_type_t<NewQueue>)
|
||||||
|
{
|
||||||
|
std::visit([&] (auto && queue)
|
||||||
|
{
|
||||||
|
if constexpr (std::is_same_v<std::decay_t<decltype(queue)>, NewQueue>)
|
||||||
|
return; // The same policy
|
||||||
|
NewQueue new_queue;
|
||||||
|
new_queue.setCapacity(capacity);
|
||||||
|
while (!queue.empty())
|
||||||
|
new_queue.push(queue.pop());
|
||||||
|
impl = std::move(new_queue);
|
||||||
|
}, impl);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
// Find policy with specified `name` and call `func()` if found.
|
||||||
|
// Tag `std::in_place_type_t<T>` used to help templated lambda to deduce type T w/o creating its instance
|
||||||
|
template <class T, class ... Ts, class Func>
|
||||||
|
void resolve(std::string_view name, Func && func)
|
||||||
|
{
|
||||||
|
if (T::name == name)
|
||||||
|
return func(std::in_place_type<T>);
|
||||||
|
if constexpr (sizeof...(Ts))
|
||||||
|
return resolve<Ts...>(name, std::forward<Func>(func));
|
||||||
|
}
|
||||||
|
|
||||||
|
std::variant<Policies...> impl;
|
||||||
|
size_t capacity;
|
||||||
|
};
|
||||||
|
|
||||||
|
// Avoid typedef and alias to facilitate forward declaration
|
||||||
|
class DynamicRuntimeQueue : public DynamicRuntimeQueueImpl<RoundRobinRuntimeQueue, PriorityRuntimeQueue> {};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Executor for a background MergeTree related operations such as merges, mutations, fetches and so on.
|
* Executor for a background MergeTree related operations such as merges, mutations, fetches and so on.
|
||||||
* It can execute only successors of ExecutableTask interface.
|
* It can execute only successors of ExecutableTask interface.
|
||||||
@ -149,13 +226,18 @@ private:
|
|||||||
* |s|
|
* |s|
|
||||||
*
|
*
|
||||||
* Each task is simply a sequence of steps. Heavier tasks have longer sequences.
|
* Each task is simply a sequence of steps. Heavier tasks have longer sequences.
|
||||||
* When a step of a task is executed, we move tasks to pending queue. And take another from the queue's head.
|
* When a step of a task is executed, we move tasks to pending queue. And take the next task from pending queue.
|
||||||
* With these architecture all small merges / mutations will be executed faster, than bigger ones.
|
* Next task is chosen from pending tasks using one of the scheduling policies (class Queue):
|
||||||
|
* 1) RoundRobinRuntimeQueue. Uses boost::circular_buffer as FIFO-queue. Next task is taken from queue's head and after one step
|
||||||
|
* enqueued into queue's tail. With this architecture all merges / mutations are fairly scheduled and never starved.
|
||||||
|
* All decisions regarding priorities are left to components creating tasks (e.g. SimpleMergeSelector).
|
||||||
|
* 2) PriorityRuntimeQueue. Uses heap to select task with smallest priority value.
|
||||||
|
* With this architecture all small merges / mutations will be executed faster, than bigger ones.
|
||||||
|
* WARNING: Starvation is possible in case of overload.
|
||||||
*
|
*
|
||||||
* We use boost::circular_buffer as a container for queues not to do any allocations.
|
* We use boost::circular_buffer as a container for active queue to avoid allocations.
|
||||||
*
|
* Another nuisance that we face is that background operations always interact with an associated Storage.
|
||||||
* Another nuisance that we faces with is than background operations always interact with an associated Storage.
|
* So, when a Storage wants to shutdown, it must wait until all its background operations are finished.
|
||||||
* So, when a Storage want to shutdown, it must wait until all its background operations are finished.
|
|
||||||
*/
|
*/
|
||||||
template <class Queue>
|
template <class Queue>
|
||||||
class MergeTreeBackgroundExecutor final : boost::noncopyable
|
class MergeTreeBackgroundExecutor final : boost::noncopyable
|
||||||
@ -185,6 +267,18 @@ public:
|
|||||||
pool.scheduleOrThrowOnError([this] { threadFunction(); });
|
pool.scheduleOrThrowOnError([this] { threadFunction(); });
|
||||||
}
|
}
|
||||||
|
|
||||||
|
MergeTreeBackgroundExecutor(
|
||||||
|
String name_,
|
||||||
|
size_t threads_count_,
|
||||||
|
size_t max_tasks_count_,
|
||||||
|
CurrentMetrics::Metric metric_,
|
||||||
|
std::string_view policy)
|
||||||
|
requires requires(Queue queue) { queue.updatePolicy(policy); } // Because we use explicit template instantiation
|
||||||
|
: MergeTreeBackgroundExecutor(name_, threads_count_, max_tasks_count_, metric_)
|
||||||
|
{
|
||||||
|
pending.updatePolicy(policy);
|
||||||
|
}
|
||||||
|
|
||||||
~MergeTreeBackgroundExecutor()
|
~MergeTreeBackgroundExecutor()
|
||||||
{
|
{
|
||||||
wait();
|
wait();
|
||||||
@ -204,6 +298,14 @@ public:
|
|||||||
void removeTasksCorrespondingToStorage(StorageID id);
|
void removeTasksCorrespondingToStorage(StorageID id);
|
||||||
void wait();
|
void wait();
|
||||||
|
|
||||||
|
/// Update scheduling policy for pending tasks. It does nothing if `new_policy` is the same or unknown.
|
||||||
|
void updateSchedulingPolicy(std::string_view new_policy)
|
||||||
|
requires requires(Queue queue) { queue.updatePolicy(new_policy); } // Because we use explicit template instantiation
|
||||||
|
{
|
||||||
|
std::lock_guard lock(mutex);
|
||||||
|
pending.updatePolicy(new_policy);
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
String name;
|
String name;
|
||||||
size_t threads_count TSA_GUARDED_BY(mutex) = 0;
|
size_t threads_count TSA_GUARDED_BY(mutex) = 0;
|
||||||
@ -225,10 +327,8 @@ private:
|
|||||||
Poco::Logger * log = &Poco::Logger::get("MergeTreeBackgroundExecutor");
|
Poco::Logger * log = &Poco::Logger::get("MergeTreeBackgroundExecutor");
|
||||||
};
|
};
|
||||||
|
|
||||||
extern template class MergeTreeBackgroundExecutor<MergeMutateRuntimeQueue>;
|
extern template class MergeTreeBackgroundExecutor<RoundRobinRuntimeQueue>;
|
||||||
extern template class MergeTreeBackgroundExecutor<OrdinaryRuntimeQueue>;
|
extern template class MergeTreeBackgroundExecutor<PriorityRuntimeQueue>;
|
||||||
|
extern template class MergeTreeBackgroundExecutor<DynamicRuntimeQueue>;
|
||||||
using MergeMutateBackgroundExecutor = MergeTreeBackgroundExecutor<MergeMutateRuntimeQueue>;
|
|
||||||
using OrdinaryBackgroundExecutor = MergeTreeBackgroundExecutor<OrdinaryRuntimeQueue>;
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -39,16 +39,7 @@ MergeTreePrefetchedReadPool::MergeTreePrefetchedReadPool(
|
|||||||
bool use_uncompressed_cache_,
|
bool use_uncompressed_cache_,
|
||||||
bool is_remote_read_,
|
bool is_remote_read_,
|
||||||
const MergeTreeSettings & storage_settings_)
|
const MergeTreeSettings & storage_settings_)
|
||||||
: IMergeTreeReadPool(
|
: WithContext(context_)
|
||||||
storage_snapshot_,
|
|
||||||
column_names_,
|
|
||||||
virtual_column_names_,
|
|
||||||
min_marks_for_concurrent_read_,
|
|
||||||
prewhere_info_,
|
|
||||||
parts_,
|
|
||||||
(preferred_block_size_bytes_ > 0),
|
|
||||||
/*do_not_steal_tasks_*/false)
|
|
||||||
, WithContext(context_)
|
|
||||||
, log(&Poco::Logger::get("MergeTreePrefetchedReadPool(" + (parts_.empty() ? "" : parts_.front().data_part->storage.getStorageID().getNameForLogs()) + ")"))
|
, log(&Poco::Logger::get("MergeTreePrefetchedReadPool(" + (parts_.empty() ? "" : parts_.front().data_part->storage.getStorageID().getNameForLogs()) + ")"))
|
||||||
, header(storage_snapshot_->getSampleBlockForColumns(column_names_))
|
, header(storage_snapshot_->getSampleBlockForColumns(column_names_))
|
||||||
, mark_cache(context_->getGlobalContext()->getMarkCache().get())
|
, mark_cache(context_->getGlobalContext()->getMarkCache().get())
|
||||||
@ -57,6 +48,10 @@ MergeTreePrefetchedReadPool::MergeTreePrefetchedReadPool(
|
|||||||
, profile_callback([this](ReadBufferFromFileBase::ProfileInfo info_) { profileFeedback(info_); })
|
, profile_callback([this](ReadBufferFromFileBase::ProfileInfo info_) { profileFeedback(info_); })
|
||||||
, index_granularity_bytes(storage_settings_.index_granularity_bytes)
|
, index_granularity_bytes(storage_settings_.index_granularity_bytes)
|
||||||
, fixed_index_granularity(storage_settings_.index_granularity)
|
, fixed_index_granularity(storage_settings_.index_granularity)
|
||||||
|
, storage_snapshot(storage_snapshot_)
|
||||||
|
, column_names(column_names_)
|
||||||
|
, virtual_column_names(virtual_column_names_)
|
||||||
|
, prewhere_info(prewhere_info_)
|
||||||
, is_remote_read(is_remote_read_)
|
, is_remote_read(is_remote_read_)
|
||||||
, prefetch_threadpool(getContext()->getPrefetchThreadpool())
|
, prefetch_threadpool(getContext()->getPrefetchThreadpool())
|
||||||
{
|
{
|
||||||
|
@ -84,12 +84,20 @@ private:
|
|||||||
ReadBufferFromFileBase::ProfileCallback profile_callback;
|
ReadBufferFromFileBase::ProfileCallback profile_callback;
|
||||||
size_t index_granularity_bytes;
|
size_t index_granularity_bytes;
|
||||||
size_t fixed_index_granularity;
|
size_t fixed_index_granularity;
|
||||||
|
|
||||||
|
StorageSnapshotPtr storage_snapshot;
|
||||||
|
const Names column_names;
|
||||||
|
const Names virtual_column_names;
|
||||||
|
PrewhereInfoPtr prewhere_info;
|
||||||
|
RangesInDataParts parts_ranges;
|
||||||
|
|
||||||
[[ maybe_unused ]] const bool is_remote_read;
|
[[ maybe_unused ]] const bool is_remote_read;
|
||||||
ThreadPool & prefetch_threadpool;
|
ThreadPool & prefetch_threadpool;
|
||||||
|
|
||||||
PartsInfos parts_infos;
|
PartsInfos parts_infos;
|
||||||
|
|
||||||
ThreadsTasks threads_tasks;
|
ThreadsTasks threads_tasks;
|
||||||
|
std::mutex mutex;
|
||||||
|
|
||||||
struct TaskHolder
|
struct TaskHolder
|
||||||
{
|
{
|
||||||
|
@ -20,7 +20,47 @@ namespace ErrorCodes
|
|||||||
namespace DB
|
namespace DB
|
||||||
{
|
{
|
||||||
|
|
||||||
std::vector<size_t> IMergeTreeReadPool::fillPerPartInfo(const RangesInDataParts & parts)
|
MergeTreeReadPool::MergeTreeReadPool(
|
||||||
|
size_t threads_,
|
||||||
|
size_t sum_marks_,
|
||||||
|
size_t min_marks_for_concurrent_read_,
|
||||||
|
RangesInDataParts && parts_,
|
||||||
|
const StorageSnapshotPtr & storage_snapshot_,
|
||||||
|
const PrewhereInfoPtr & prewhere_info_,
|
||||||
|
const Names & column_names_,
|
||||||
|
const Names & virtual_column_names_,
|
||||||
|
ContextPtr context_,
|
||||||
|
bool do_not_steal_tasks_)
|
||||||
|
: storage_snapshot(storage_snapshot_)
|
||||||
|
, column_names(column_names_)
|
||||||
|
, virtual_column_names(virtual_column_names_)
|
||||||
|
, min_marks_for_concurrent_read(min_marks_for_concurrent_read_)
|
||||||
|
, prewhere_info(prewhere_info_)
|
||||||
|
, parts_ranges(std::move(parts_))
|
||||||
|
, predict_block_size_bytes(context_->getSettingsRef().preferred_block_size_bytes > 0)
|
||||||
|
, do_not_steal_tasks(do_not_steal_tasks_)
|
||||||
|
, backoff_settings{context_->getSettingsRef()}
|
||||||
|
, backoff_state{threads_}
|
||||||
|
{
|
||||||
|
/// parts don't contain duplicate MergeTreeDataPart's.
|
||||||
|
const auto per_part_sum_marks = fillPerPartInfo(
|
||||||
|
parts_ranges, storage_snapshot, is_part_on_remote_disk,
|
||||||
|
do_not_steal_tasks, predict_block_size_bytes,
|
||||||
|
column_names, virtual_column_names, prewhere_info, per_part_params);
|
||||||
|
|
||||||
|
fillPerThreadInfo(threads_, sum_marks_, per_part_sum_marks, parts_ranges);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<size_t> MergeTreeReadPool::fillPerPartInfo(
|
||||||
|
const RangesInDataParts & parts,
|
||||||
|
const StorageSnapshotPtr & storage_snapshot,
|
||||||
|
std::vector<bool> & is_part_on_remote_disk,
|
||||||
|
bool & do_not_steal_tasks,
|
||||||
|
bool & predict_block_size_bytes,
|
||||||
|
const Names & column_names,
|
||||||
|
const Names & virtual_column_names,
|
||||||
|
const PrewhereInfoPtr & prewhere_info,
|
||||||
|
std::vector<MergeTreeReadPool::PerPartParams> & per_part_params)
|
||||||
{
|
{
|
||||||
std::vector<size_t> per_part_sum_marks;
|
std::vector<size_t> per_part_sum_marks;
|
||||||
Block sample_block = storage_snapshot->metadata->getSampleBlock();
|
Block sample_block = storage_snapshot->metadata->getSampleBlock();
|
||||||
@ -65,35 +105,6 @@ std::vector<size_t> IMergeTreeReadPool::fillPerPartInfo(const RangesInDataParts
|
|||||||
return per_part_sum_marks;
|
return per_part_sum_marks;
|
||||||
}
|
}
|
||||||
|
|
||||||
MergeTreeReadPool::MergeTreeReadPool(
|
|
||||||
size_t threads_,
|
|
||||||
size_t sum_marks_,
|
|
||||||
size_t min_marks_for_concurrent_read_,
|
|
||||||
RangesInDataParts && parts_,
|
|
||||||
const StorageSnapshotPtr & storage_snapshot_,
|
|
||||||
const PrewhereInfoPtr & prewhere_info_,
|
|
||||||
const Names & column_names_,
|
|
||||||
const Names & virtual_column_names_,
|
|
||||||
const BackoffSettings & backoff_settings_,
|
|
||||||
size_t preferred_block_size_bytes_,
|
|
||||||
bool do_not_steal_tasks_)
|
|
||||||
: IMergeTreeReadPool(
|
|
||||||
storage_snapshot_,
|
|
||||||
column_names_,
|
|
||||||
virtual_column_names_,
|
|
||||||
min_marks_for_concurrent_read_,
|
|
||||||
prewhere_info_,
|
|
||||||
std::move(parts_),
|
|
||||||
(preferred_block_size_bytes_ > 0),
|
|
||||||
do_not_steal_tasks_)
|
|
||||||
, backoff_settings{backoff_settings_}
|
|
||||||
, backoff_state{threads_}
|
|
||||||
{
|
|
||||||
/// parts don't contain duplicate MergeTreeDataPart's.
|
|
||||||
const auto per_part_sum_marks = fillPerPartInfo(parts_ranges);
|
|
||||||
fillPerThreadInfo(threads_, sum_marks_, per_part_sum_marks, parts_ranges);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
MergeTreeReadTaskPtr MergeTreeReadPool::getTask(size_t thread)
|
MergeTreeReadTaskPtr MergeTreeReadPool::getTask(size_t thread)
|
||||||
{
|
{
|
||||||
|
@ -6,6 +6,7 @@
|
|||||||
#include <Storages/MergeTree/MergeTreeData.h>
|
#include <Storages/MergeTree/MergeTreeData.h>
|
||||||
#include <Storages/MergeTree/RangesInDataPart.h>
|
#include <Storages/MergeTree/RangesInDataPart.h>
|
||||||
#include <Storages/MergeTree/RequestResponse.h>
|
#include <Storages/MergeTree/RequestResponse.h>
|
||||||
|
#include <Storages/MergeTree/IMergeTreeReadPool.h>
|
||||||
#include <Storages/SelectQueryInfo.h>
|
#include <Storages/SelectQueryInfo.h>
|
||||||
|
|
||||||
#include <mutex>
|
#include <mutex>
|
||||||
@ -14,77 +15,42 @@
|
|||||||
namespace DB
|
namespace DB
|
||||||
{
|
{
|
||||||
|
|
||||||
|
/** Provides read tasks for MergeTreeThreadSelectProcessor`s in fine-grained batches, allowing for more
|
||||||
using MergeTreeReadTaskPtr = std::unique_ptr<MergeTreeReadTask>;
|
* uniform distribution of work amongst multiple threads. All parts and their ranges are divided into `threads`
|
||||||
|
* workloads with at most `sum_marks / threads` marks. Then, threads are performing reads from these workloads
|
||||||
|
* in "sequential" manner, requesting work in small batches. As soon as some thread has exhausted
|
||||||
class IMergeTreeReadPool
|
* it's workload, it either is signaled that no more work is available (`do_not_steal_tasks == false`) or
|
||||||
{
|
* continues taking small batches from other threads' workloads (`do_not_steal_tasks == true`).
|
||||||
public:
|
|
||||||
IMergeTreeReadPool(
|
|
||||||
StorageSnapshotPtr storage_snapshot_,
|
|
||||||
Names column_names_,
|
|
||||||
Names virtual_column_names_,
|
|
||||||
size_t min_marks_for_concurrent_read_,
|
|
||||||
PrewhereInfoPtr prewhere_info_,
|
|
||||||
RangesInDataParts parts_ranges_,
|
|
||||||
bool predict_block_size_bytes_,
|
|
||||||
bool do_not_steal_tasks_)
|
|
||||||
: storage_snapshot(storage_snapshot_)
|
|
||||||
, column_names(column_names_)
|
|
||||||
, virtual_column_names(virtual_column_names_)
|
|
||||||
, min_marks_for_concurrent_read(min_marks_for_concurrent_read_)
|
|
||||||
, prewhere_info(prewhere_info_)
|
|
||||||
, parts_ranges(parts_ranges_)
|
|
||||||
, predict_block_size_bytes(predict_block_size_bytes_)
|
|
||||||
, do_not_steal_tasks(do_not_steal_tasks_)
|
|
||||||
{}
|
|
||||||
|
|
||||||
virtual MergeTreeReadTaskPtr getTask(size_t thread) = 0;
|
|
||||||
virtual Block getHeader() const = 0;
|
|
||||||
virtual void profileFeedback(ReadBufferFromFileBase::ProfileInfo info) = 0;
|
|
||||||
virtual ~IMergeTreeReadPool() = default;
|
|
||||||
|
|
||||||
protected:
|
|
||||||
|
|
||||||
std::vector<size_t> fillPerPartInfo(const RangesInDataParts & parts);
|
|
||||||
|
|
||||||
/// Initialized in constructor
|
|
||||||
StorageSnapshotPtr storage_snapshot;
|
|
||||||
const Names column_names;
|
|
||||||
const Names virtual_column_names;
|
|
||||||
size_t min_marks_for_concurrent_read{0};
|
|
||||||
PrewhereInfoPtr prewhere_info;
|
|
||||||
RangesInDataParts parts_ranges;
|
|
||||||
bool predict_block_size_bytes;
|
|
||||||
bool do_not_steal_tasks;
|
|
||||||
|
|
||||||
struct PerPartParams
|
|
||||||
{
|
|
||||||
MergeTreeReadTaskColumns task_columns;
|
|
||||||
NameSet column_name_set;
|
|
||||||
MergeTreeBlockSizePredictorPtr size_predictor;
|
|
||||||
RangesInDataPart data_part;
|
|
||||||
};
|
|
||||||
|
|
||||||
std::vector<PerPartParams> per_part_params;
|
|
||||||
std::vector<bool> is_part_on_remote_disk;
|
|
||||||
|
|
||||||
mutable std::mutex mutex;
|
|
||||||
};
|
|
||||||
|
|
||||||
using IMergeTreeReadPoolPtr = std::shared_ptr<IMergeTreeReadPool>;
|
|
||||||
|
|
||||||
/** Provides read tasks for MergeTreeThreadSelectProcessor`s in fine-grained batches, allowing for more
|
|
||||||
* uniform distribution of work amongst multiple threads. All parts and their ranges are divided into `threads`
|
|
||||||
* workloads with at most `sum_marks / threads` marks. Then, threads are performing reads from these workloads
|
|
||||||
* in "sequential" manner, requesting work in small batches. As soon as some thread has exhausted
|
|
||||||
* it's workload, it either is signaled that no more work is available (`do_not_steal_tasks == false`) or
|
|
||||||
* continues taking small batches from other threads' workloads (`do_not_steal_tasks == true`).
|
|
||||||
*/
|
*/
|
||||||
class MergeTreeReadPool final: public IMergeTreeReadPool, private boost::noncopyable
|
class MergeTreeReadPool : public IMergeTreeReadPool
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
|
struct BackoffSettings;
|
||||||
|
|
||||||
|
MergeTreeReadPool(
|
||||||
|
size_t threads_,
|
||||||
|
size_t sum_marks_,
|
||||||
|
size_t min_marks_for_concurrent_read_,
|
||||||
|
RangesInDataParts && parts_,
|
||||||
|
const StorageSnapshotPtr & storage_snapshot_,
|
||||||
|
const PrewhereInfoPtr & prewhere_info_,
|
||||||
|
const Names & column_names_,
|
||||||
|
const Names & virtual_column_names_,
|
||||||
|
ContextPtr context_,
|
||||||
|
bool do_not_steal_tasks_ = false);
|
||||||
|
|
||||||
|
~MergeTreeReadPool() override = default;
|
||||||
|
|
||||||
|
MergeTreeReadTaskPtr getTask(size_t thread) override;
|
||||||
|
|
||||||
|
/** Each worker could call this method and pass information about read performance.
|
||||||
|
* If read performance is too low, pool could decide to lower number of threads: do not assign more tasks to several threads.
|
||||||
|
* This allows to overcome excessive load to disk subsystem, when reads are not from page cache.
|
||||||
|
*/
|
||||||
|
void profileFeedback(ReadBufferFromFileBase::ProfileInfo info) override;
|
||||||
|
|
||||||
|
Block getHeader() const override;
|
||||||
|
|
||||||
/** Pull could dynamically lower (backoff) number of threads, if read operation are too slow.
|
/** Pull could dynamically lower (backoff) number of threads, if read operation are too slow.
|
||||||
* Settings for that backoff.
|
* Settings for that backoff.
|
||||||
*/
|
*/
|
||||||
@ -107,46 +73,51 @@ public:
|
|||||||
max_throughput(settings.read_backoff_max_throughput),
|
max_throughput(settings.read_backoff_max_throughput),
|
||||||
min_interval_between_events_ms(settings.read_backoff_min_interval_between_events_ms.totalMilliseconds()),
|
min_interval_between_events_ms(settings.read_backoff_min_interval_between_events_ms.totalMilliseconds()),
|
||||||
min_events(settings.read_backoff_min_events),
|
min_events(settings.read_backoff_min_events),
|
||||||
min_concurrency(settings.read_backoff_min_concurrency)
|
min_concurrency(settings.read_backoff_min_concurrency) {}
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
BackoffSettings() : min_read_latency_ms(0) {}
|
BackoffSettings() : min_read_latency_ms(0) {}
|
||||||
};
|
};
|
||||||
|
|
||||||
BackoffSettings backoff_settings;
|
struct PerPartParams
|
||||||
|
{
|
||||||
|
MergeTreeReadTaskColumns task_columns;
|
||||||
|
NameSet column_name_set;
|
||||||
|
MergeTreeBlockSizePredictorPtr size_predictor;
|
||||||
|
RangesInDataPart data_part;
|
||||||
|
};
|
||||||
|
|
||||||
MergeTreeReadPool(
|
static std::vector<size_t> fillPerPartInfo(
|
||||||
size_t threads_,
|
const RangesInDataParts & parts,
|
||||||
size_t sum_marks_,
|
const StorageSnapshotPtr & storage_snapshot,
|
||||||
size_t min_marks_for_concurrent_read_,
|
std::vector<bool> & is_part_on_remote_disk,
|
||||||
RangesInDataParts && parts_,
|
bool & do_not_steal_tasks,
|
||||||
const StorageSnapshotPtr & storage_snapshot_,
|
bool & predict_block_size_bytes,
|
||||||
const PrewhereInfoPtr & prewhere_info_,
|
const Names & column_names,
|
||||||
const Names & column_names_,
|
const Names & virtual_column_names,
|
||||||
const Names & virtual_column_names_,
|
const PrewhereInfoPtr & prewhere_info,
|
||||||
const BackoffSettings & backoff_settings_,
|
std::vector<MergeTreeReadPool::PerPartParams> & per_part_params);
|
||||||
size_t preferred_block_size_bytes_,
|
|
||||||
bool do_not_steal_tasks_ = false);
|
|
||||||
|
|
||||||
~MergeTreeReadPool() override = default;
|
|
||||||
|
|
||||||
MergeTreeReadTaskPtr getTask(size_t thread) override;
|
|
||||||
|
|
||||||
/** Each worker could call this method and pass information about read performance.
|
|
||||||
* If read performance is too low, pool could decide to lower number of threads: do not assign more tasks to several threads.
|
|
||||||
* This allows to overcome excessive load to disk subsystem, when reads are not from page cache.
|
|
||||||
*/
|
|
||||||
void profileFeedback(ReadBufferFromFileBase::ProfileInfo info) override;
|
|
||||||
|
|
||||||
Block getHeader() const override;
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
|
||||||
void fillPerThreadInfo(
|
void fillPerThreadInfo(
|
||||||
size_t threads, size_t sum_marks, std::vector<size_t> per_part_sum_marks,
|
size_t threads, size_t sum_marks, std::vector<size_t> per_part_sum_marks,
|
||||||
const RangesInDataParts & parts);
|
const RangesInDataParts & parts);
|
||||||
|
|
||||||
|
/// Initialized in constructor
|
||||||
|
StorageSnapshotPtr storage_snapshot;
|
||||||
|
const Names column_names;
|
||||||
|
const Names virtual_column_names;
|
||||||
|
size_t min_marks_for_concurrent_read{0};
|
||||||
|
PrewhereInfoPtr prewhere_info;
|
||||||
|
RangesInDataParts parts_ranges;
|
||||||
|
bool predict_block_size_bytes;
|
||||||
|
bool do_not_steal_tasks;
|
||||||
|
|
||||||
|
std::vector<PerPartParams> per_part_params;
|
||||||
|
std::vector<bool> is_part_on_remote_disk;
|
||||||
|
|
||||||
|
BackoffSettings backoff_settings;
|
||||||
|
|
||||||
|
mutable std::mutex mutex;
|
||||||
/// State to track numbers of slow reads.
|
/// State to track numbers of slow reads.
|
||||||
struct BackoffState
|
struct BackoffState
|
||||||
{
|
{
|
||||||
@ -156,7 +127,6 @@ private:
|
|||||||
|
|
||||||
explicit BackoffState(size_t threads) : current_threads(threads) {}
|
explicit BackoffState(size_t threads) : current_threads(threads) {}
|
||||||
};
|
};
|
||||||
|
|
||||||
BackoffState backoff_state;
|
BackoffState backoff_state;
|
||||||
|
|
||||||
struct Part
|
struct Part
|
||||||
@ -185,9 +155,7 @@ private:
|
|||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
using MergeTreeReadPoolPtr = std::shared_ptr<MergeTreeReadPool>;
|
class MergeTreeReadPoolParallelReplicas : public IMergeTreeReadPool
|
||||||
|
|
||||||
class MergeTreeReadPoolParallelReplicas : public IMergeTreeReadPool, private boost::noncopyable
|
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
|
|
||||||
@ -199,21 +167,19 @@ public:
|
|||||||
const PrewhereInfoPtr & prewhere_info_,
|
const PrewhereInfoPtr & prewhere_info_,
|
||||||
const Names & column_names_,
|
const Names & column_names_,
|
||||||
const Names & virtual_column_names_,
|
const Names & virtual_column_names_,
|
||||||
size_t min_marks_for_concurrent_read_
|
size_t min_marks_for_concurrent_read_)
|
||||||
)
|
: extension(extension_)
|
||||||
: IMergeTreeReadPool(
|
, threads(threads_)
|
||||||
storage_snapshot_,
|
, prewhere_info(prewhere_info_)
|
||||||
column_names_,
|
, storage_snapshot(storage_snapshot_)
|
||||||
virtual_column_names_,
|
, min_marks_for_concurrent_read(min_marks_for_concurrent_read_)
|
||||||
min_marks_for_concurrent_read_,
|
, column_names(column_names_)
|
||||||
prewhere_info_,
|
, virtual_column_names(virtual_column_names_)
|
||||||
parts_,
|
, parts_ranges(std::move(parts_))
|
||||||
/*predict_block_size*/false,
|
|
||||||
/*do_not_steal_tasks*/false)
|
|
||||||
, extension(extension_)
|
|
||||||
, threads(threads_)
|
|
||||||
{
|
{
|
||||||
fillPerPartInfo(parts_ranges);
|
MergeTreeReadPool::fillPerPartInfo(
|
||||||
|
parts_ranges, storage_snapshot, is_part_on_remote_disk, do_not_steal_tasks,
|
||||||
|
predict_block_size_bytes, column_names, virtual_column_names, prewhere_info, per_part_params);
|
||||||
|
|
||||||
extension.all_callback({
|
extension.all_callback({
|
||||||
.description = parts_ranges.getDescriptions(),
|
.description = parts_ranges.getDescriptions(),
|
||||||
@ -223,8 +189,10 @@ public:
|
|||||||
|
|
||||||
~MergeTreeReadPoolParallelReplicas() override;
|
~MergeTreeReadPoolParallelReplicas() override;
|
||||||
|
|
||||||
MergeTreeReadTaskPtr getTask(size_t thread) override;
|
|
||||||
Block getHeader() const override;
|
Block getHeader() const override;
|
||||||
|
|
||||||
|
MergeTreeReadTaskPtr getTask(size_t thread) override;
|
||||||
|
|
||||||
void profileFeedback(ReadBufferFromFileBase::ProfileInfo) override {}
|
void profileFeedback(ReadBufferFromFileBase::ProfileInfo) override {}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
@ -234,6 +202,20 @@ private:
|
|||||||
size_t threads;
|
size_t threads;
|
||||||
bool no_more_tasks_available{false};
|
bool no_more_tasks_available{false};
|
||||||
Poco::Logger * log = &Poco::Logger::get("MergeTreeReadPoolParallelReplicas");
|
Poco::Logger * log = &Poco::Logger::get("MergeTreeReadPoolParallelReplicas");
|
||||||
|
|
||||||
|
std::mutex mutex;
|
||||||
|
|
||||||
|
PrewhereInfoPtr prewhere_info;
|
||||||
|
StorageSnapshotPtr storage_snapshot;
|
||||||
|
size_t min_marks_for_concurrent_read;
|
||||||
|
const Names column_names;
|
||||||
|
const Names virtual_column_names;
|
||||||
|
RangesInDataParts parts_ranges;
|
||||||
|
|
||||||
|
bool do_not_steal_tasks = false;
|
||||||
|
bool predict_block_size_bytes = false;
|
||||||
|
std::vector<bool> is_part_on_remote_disk;
|
||||||
|
std::vector<MergeTreeReadPool::PerPartParams> per_part_params;
|
||||||
};
|
};
|
||||||
|
|
||||||
using MergeTreeReadPoolParallelReplicasPtr = std::shared_ptr<MergeTreeReadPoolParallelReplicas>;
|
using MergeTreeReadPoolParallelReplicasPtr = std::shared_ptr<MergeTreeReadPoolParallelReplicas>;
|
||||||
@ -247,10 +229,10 @@ public:
|
|||||||
ParallelReadingExtension extension_,
|
ParallelReadingExtension extension_,
|
||||||
CoordinationMode mode_,
|
CoordinationMode mode_,
|
||||||
size_t min_marks_for_concurrent_read_)
|
size_t min_marks_for_concurrent_read_)
|
||||||
: parts_ranges(parts_)
|
: parts_ranges(parts_)
|
||||||
, extension(extension_)
|
, extension(extension_)
|
||||||
, mode(mode_)
|
, mode(mode_)
|
||||||
, min_marks_for_concurrent_read(min_marks_for_concurrent_read_)
|
, min_marks_for_concurrent_read(min_marks_for_concurrent_read_)
|
||||||
{
|
{
|
||||||
for (const auto & part : parts_ranges)
|
for (const auto & part : parts_ranges)
|
||||||
request.push_back({part.data_part->info, MarkRanges{}});
|
request.push_back({part.data_part->info, MarkRanges{}});
|
||||||
@ -266,6 +248,7 @@ public:
|
|||||||
|
|
||||||
MarkRanges getNewTask(RangesInDataPartDescription description);
|
MarkRanges getNewTask(RangesInDataPartDescription description);
|
||||||
|
|
||||||
|
|
||||||
RangesInDataParts parts_ranges;
|
RangesInDataParts parts_ranges;
|
||||||
ParallelReadingExtension extension;
|
ParallelReadingExtension extension;
|
||||||
CoordinationMode mode;
|
CoordinationMode mode;
|
||||||
|
@ -45,6 +45,12 @@ MergeTreeReaderCompact::MergeTreeReaderCompact(
|
|||||||
settings.read_settings,
|
settings.read_settings,
|
||||||
load_marks_threadpool_,
|
load_marks_threadpool_,
|
||||||
data_part_info_for_read_->getColumns().size())
|
data_part_info_for_read_->getColumns().size())
|
||||||
|
, profile_callback(profile_callback_)
|
||||||
|
, clock_type(clock_type_)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
void MergeTreeReaderCompact::initialize()
|
||||||
{
|
{
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
@ -75,8 +81,8 @@ MergeTreeReaderCompact::MergeTreeReaderCompact(
|
|||||||
uncompressed_cache,
|
uncompressed_cache,
|
||||||
/* allow_different_codecs = */ true);
|
/* allow_different_codecs = */ true);
|
||||||
|
|
||||||
if (profile_callback_)
|
if (profile_callback)
|
||||||
buffer->setProfileCallback(profile_callback_, clock_type_);
|
buffer->setProfileCallback(profile_callback, clock_type);
|
||||||
|
|
||||||
if (!settings.checksum_on_read)
|
if (!settings.checksum_on_read)
|
||||||
buffer->disableChecksumming();
|
buffer->disableChecksumming();
|
||||||
@ -95,8 +101,8 @@ MergeTreeReaderCompact::MergeTreeReaderCompact(
|
|||||||
std::nullopt, std::nullopt),
|
std::nullopt, std::nullopt),
|
||||||
/* allow_different_codecs = */ true);
|
/* allow_different_codecs = */ true);
|
||||||
|
|
||||||
if (profile_callback_)
|
if (profile_callback)
|
||||||
buffer->setProfileCallback(profile_callback_, clock_type_);
|
buffer->setProfileCallback(profile_callback, clock_type);
|
||||||
|
|
||||||
if (!settings.checksum_on_read)
|
if (!settings.checksum_on_read)
|
||||||
buffer->disableChecksumming();
|
buffer->disableChecksumming();
|
||||||
@ -157,6 +163,12 @@ void MergeTreeReaderCompact::fillColumnPositions()
|
|||||||
size_t MergeTreeReaderCompact::readRows(
|
size_t MergeTreeReaderCompact::readRows(
|
||||||
size_t from_mark, size_t current_task_last_mark, bool continue_reading, size_t max_rows_to_read, Columns & res_columns)
|
size_t from_mark, size_t current_task_last_mark, bool continue_reading, size_t max_rows_to_read, Columns & res_columns)
|
||||||
{
|
{
|
||||||
|
if (!initialized)
|
||||||
|
{
|
||||||
|
initialize();
|
||||||
|
initialized = true;
|
||||||
|
}
|
||||||
|
|
||||||
if (continue_reading)
|
if (continue_reading)
|
||||||
from_mark = next_mark;
|
from_mark = next_mark;
|
||||||
|
|
||||||
@ -302,6 +314,18 @@ void MergeTreeReaderCompact::readData(
|
|||||||
last_read_granule.emplace(from_mark, column_position);
|
last_read_granule.emplace(from_mark, column_position);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void MergeTreeReaderCompact::prefetchBeginOfRange(int64_t priority)
|
||||||
|
{
|
||||||
|
if (!initialized)
|
||||||
|
{
|
||||||
|
initialize();
|
||||||
|
initialized = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
adjustUpperBound(all_mark_ranges.back().end);
|
||||||
|
seekToMark(all_mark_ranges.front().begin, 0);
|
||||||
|
data_buffer->prefetch(priority);
|
||||||
|
}
|
||||||
|
|
||||||
void MergeTreeReaderCompact::seekToMark(size_t row_index, size_t column_index)
|
void MergeTreeReaderCompact::seekToMark(size_t row_index, size_t column_index)
|
||||||
{
|
{
|
||||||
|
@ -38,9 +38,12 @@ public:
|
|||||||
|
|
||||||
bool canReadIncompleteGranules() const override { return false; }
|
bool canReadIncompleteGranules() const override { return false; }
|
||||||
|
|
||||||
|
void prefetchBeginOfRange(int64_t priority) override;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
bool isContinuousReading(size_t mark, size_t column_position);
|
bool isContinuousReading(size_t mark, size_t column_position);
|
||||||
void fillColumnPositions();
|
void fillColumnPositions();
|
||||||
|
void initialize();
|
||||||
|
|
||||||
ReadBuffer * data_buffer;
|
ReadBuffer * data_buffer;
|
||||||
CompressedReadBufferBase * compressed_data_buffer;
|
CompressedReadBufferBase * compressed_data_buffer;
|
||||||
@ -78,6 +81,11 @@ private:
|
|||||||
|
|
||||||
/// For asynchronous reading from remote fs.
|
/// For asynchronous reading from remote fs.
|
||||||
void adjustUpperBound(size_t last_mark);
|
void adjustUpperBound(size_t last_mark);
|
||||||
|
|
||||||
|
ReadBufferFromFileBase::ProfileCallback profile_callback;
|
||||||
|
clockid_t clock_type;
|
||||||
|
|
||||||
|
bool initialized = false;
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -21,8 +21,7 @@ MergeTreeThreadSelectAlgorithm::MergeTreeThreadSelectAlgorithm(
|
|||||||
ExpressionActionsSettings actions_settings,
|
ExpressionActionsSettings actions_settings,
|
||||||
const MergeTreeReaderSettings & reader_settings_,
|
const MergeTreeReaderSettings & reader_settings_,
|
||||||
const Names & virt_column_names_)
|
const Names & virt_column_names_)
|
||||||
:
|
: IMergeTreeSelectAlgorithm{
|
||||||
IMergeTreeSelectAlgorithm{
|
|
||||||
pool_->getHeader(), storage_, storage_snapshot_, prewhere_info_, std::move(actions_settings), max_block_size_rows_,
|
pool_->getHeader(), storage_, storage_snapshot_, prewhere_info_, std::move(actions_settings), max_block_size_rows_,
|
||||||
preferred_block_size_bytes_, preferred_max_column_in_block_size_bytes_,
|
preferred_block_size_bytes_, preferred_max_column_in_block_size_bytes_,
|
||||||
reader_settings_, use_uncompressed_cache_, virt_column_names_},
|
reader_settings_, use_uncompressed_cache_, virt_column_names_},
|
||||||
@ -59,8 +58,6 @@ void MergeTreeThreadSelectAlgorithm::finalizeNewTask()
|
|||||||
const bool init_new_readers = !reader || task->reader.valid() || part_name != last_read_part_name;
|
const bool init_new_readers = !reader || task->reader.valid() || part_name != last_read_part_name;
|
||||||
if (init_new_readers)
|
if (init_new_readers)
|
||||||
{
|
{
|
||||||
initializeMergeTreeReadersForPart(
|
|
||||||
task->data_part, task->task_columns, metadata_snapshot, task->mark_ranges, value_size_map, profile_callback);
|
|
||||||
initializeMergeTreeReadersForCurrentTask(metadata_snapshot, value_size_map, profile_callback);
|
initializeMergeTreeReadersForCurrentTask(metadata_snapshot, value_size_map, profile_callback);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -42,10 +42,6 @@ MergeTreeWhereOptimizer::MergeTreeWhereOptimizer(
|
|||||||
, log{log_}
|
, log{log_}
|
||||||
, column_sizes{std::move(column_sizes_)}
|
, column_sizes{std::move(column_sizes_)}
|
||||||
{
|
{
|
||||||
const auto & primary_key = metadata_snapshot->getPrimaryKey();
|
|
||||||
if (!primary_key.column_names.empty())
|
|
||||||
first_primary_key_column = primary_key.column_names[0];
|
|
||||||
|
|
||||||
for (const auto & name : queried_columns)
|
for (const auto & name : queried_columns)
|
||||||
{
|
{
|
||||||
auto it = column_sizes.find(name);
|
auto it = column_sizes.find(name);
|
||||||
@ -193,8 +189,9 @@ void MergeTreeWhereOptimizer::analyzeImpl(Conditions & res, const ASTPtr & node,
|
|||||||
/// Condition depend on some column. Constant expressions are not moved.
|
/// Condition depend on some column. Constant expressions are not moved.
|
||||||
!cond.identifiers.empty()
|
!cond.identifiers.empty()
|
||||||
&& !cannotBeMoved(node, is_final)
|
&& !cannotBeMoved(node, is_final)
|
||||||
/// Do not take into consideration the conditions consisting only of the first primary key column
|
/// When use final, do not take into consideration the conditions with non-sorting keys. Because final select
|
||||||
&& !hasPrimaryKeyAtoms(node)
|
/// need to use all sorting keys, it will cause correctness issues if we filter other columns before final merge.
|
||||||
|
&& (!is_final || isExpressionOverSortingKey(node))
|
||||||
/// Only table columns are considered. Not array joined columns. NOTE We're assuming that aliases was expanded.
|
/// Only table columns are considered. Not array joined columns. NOTE We're assuming that aliases was expanded.
|
||||||
&& isSubsetOfTableColumns(cond.identifiers)
|
&& isSubsetOfTableColumns(cond.identifiers)
|
||||||
/// Do not move conditions involving all queried columns.
|
/// Do not move conditions involving all queried columns.
|
||||||
@ -320,48 +317,22 @@ UInt64 MergeTreeWhereOptimizer::getIdentifiersColumnSize(const NameSet & identif
|
|||||||
return size;
|
return size;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool MergeTreeWhereOptimizer::isExpressionOverSortingKey(const ASTPtr & ast) const
|
||||||
bool MergeTreeWhereOptimizer::hasPrimaryKeyAtoms(const ASTPtr & ast) const
|
|
||||||
{
|
{
|
||||||
if (const auto * func = ast->as<ASTFunction>())
|
if (const auto * func = ast->as<ASTFunction>())
|
||||||
{
|
{
|
||||||
const auto & args = func->arguments->children;
|
const auto & args = func->arguments->children;
|
||||||
|
for (const auto & arg : args)
|
||||||
if ((func->name == "not" && 1 == args.size()) || func->name == "and" || func->name == "or")
|
|
||||||
{
|
{
|
||||||
for (const auto & arg : args)
|
if (isConstant(ast) || sorting_key_names.contains(arg->getColumnName()))
|
||||||
if (hasPrimaryKeyAtoms(arg))
|
continue;
|
||||||
return true;
|
if (!isExpressionOverSortingKey(arg))
|
||||||
|
return false;
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
return isPrimaryKeyAtom(ast);
|
return isConstant(ast) || sorting_key_names.contains(ast->getColumnName());
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
bool MergeTreeWhereOptimizer::isPrimaryKeyAtom(const ASTPtr & ast) const
|
|
||||||
{
|
|
||||||
if (const auto * func = ast->as<ASTFunction>())
|
|
||||||
{
|
|
||||||
if (!KeyCondition::atom_map.contains(func->name))
|
|
||||||
return false;
|
|
||||||
|
|
||||||
const auto & args = func->arguments->children;
|
|
||||||
if (args.size() != 2)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
const auto & first_arg_name = args.front()->getColumnName();
|
|
||||||
const auto & second_arg_name = args.back()->getColumnName();
|
|
||||||
|
|
||||||
if ((first_primary_key_column == first_arg_name && isConstant(args[1]))
|
|
||||||
|| (first_primary_key_column == second_arg_name && isConstant(args[0]))
|
|
||||||
|| (first_primary_key_column == first_arg_name && functionIsInOrGlobalInOperator(func->name)))
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -83,9 +83,7 @@ private:
|
|||||||
|
|
||||||
UInt64 getIdentifiersColumnSize(const NameSet & identifiers) const;
|
UInt64 getIdentifiersColumnSize(const NameSet & identifiers) const;
|
||||||
|
|
||||||
bool hasPrimaryKeyAtoms(const ASTPtr & ast) const;
|
bool isExpressionOverSortingKey(const ASTPtr & ast) const;
|
||||||
|
|
||||||
bool isPrimaryKeyAtom(const ASTPtr & ast) const;
|
|
||||||
|
|
||||||
bool isSortingKey(const String & column_name) const;
|
bool isSortingKey(const String & column_name) const;
|
||||||
|
|
||||||
@ -105,7 +103,6 @@ private:
|
|||||||
|
|
||||||
using StringSet = std::unordered_set<std::string>;
|
using StringSet = std::unordered_set<std::string>;
|
||||||
|
|
||||||
String first_primary_key_column;
|
|
||||||
const StringSet table_columns;
|
const StringSet table_columns;
|
||||||
const Names queried_columns;
|
const Names queried_columns;
|
||||||
const NameSet sorting_key_names;
|
const NameSet sorting_key_names;
|
||||||
|
@ -4,10 +4,12 @@
|
|||||||
#include <barrier>
|
#include <barrier>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
#include <random>
|
#include <random>
|
||||||
|
#include <functional>
|
||||||
|
|
||||||
#include <Storages/MergeTree/IExecutableTask.h>
|
#include <Storages/MergeTree/IExecutableTask.h>
|
||||||
#include <Storages/MergeTree/MergeTreeBackgroundExecutor.h>
|
#include <Storages/MergeTree/MergeTreeBackgroundExecutor.h>
|
||||||
|
|
||||||
|
|
||||||
using namespace DB;
|
using namespace DB;
|
||||||
|
|
||||||
namespace CurrentMetrics
|
namespace CurrentMetrics
|
||||||
@ -55,16 +57,89 @@ private:
|
|||||||
std::uniform_int_distribution<> distribution;
|
std::uniform_int_distribution<> distribution;
|
||||||
|
|
||||||
String name;
|
String name;
|
||||||
std::function<void()> on_completed;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
using StepFunc = std::function<void(const String & name, size_t steps_left)>;
|
||||||
|
|
||||||
|
class LambdaExecutableTask : public IExecutableTask
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
explicit LambdaExecutableTask(const String & name_, size_t step_count_, StepFunc step_func_ = {}, UInt64 priority_ = 0)
|
||||||
|
: name(name_)
|
||||||
|
, step_count(step_count_)
|
||||||
|
, step_func(step_func_)
|
||||||
|
, priority(priority_)
|
||||||
|
{}
|
||||||
|
|
||||||
|
bool executeStep() override
|
||||||
|
{
|
||||||
|
if (step_func)
|
||||||
|
step_func(name, step_count);
|
||||||
|
return --step_count;
|
||||||
|
}
|
||||||
|
|
||||||
|
StorageID getStorageID() override
|
||||||
|
{
|
||||||
|
return {"test", name};
|
||||||
|
}
|
||||||
|
|
||||||
|
void onCompleted() override {}
|
||||||
|
|
||||||
|
UInt64 getPriority() override { return priority; }
|
||||||
|
|
||||||
|
private:
|
||||||
|
String name;
|
||||||
|
size_t step_count;
|
||||||
|
StepFunc step_func;
|
||||||
|
UInt64 priority;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
TEST(Executor, Simple)
|
||||||
|
{
|
||||||
|
auto executor = std::make_shared<DB::MergeTreeBackgroundExecutor<RoundRobinRuntimeQueue>>
|
||||||
|
(
|
||||||
|
"GTest",
|
||||||
|
1, // threads
|
||||||
|
100, // max_tasks
|
||||||
|
CurrentMetrics::BackgroundMergesAndMutationsPoolTask
|
||||||
|
);
|
||||||
|
|
||||||
|
String schedule; // mutex is not required because we have a single worker
|
||||||
|
String expected_schedule = "ABCDEABCDABCDBCDCDD";
|
||||||
|
std::barrier barrier(2);
|
||||||
|
auto task = [&] (const String & name, size_t)
|
||||||
|
{
|
||||||
|
schedule += name;
|
||||||
|
if (schedule.size() == expected_schedule.size())
|
||||||
|
barrier.arrive_and_wait();
|
||||||
|
};
|
||||||
|
|
||||||
|
// Schedule tasks from this `init_task` to guarantee atomicity.
|
||||||
|
// Worker will see pending queue when we push all tasks.
|
||||||
|
// This is required to check scheduling properties of round-robin in deterministic way.
|
||||||
|
auto init_task = [&] (const String &, size_t)
|
||||||
|
{
|
||||||
|
executor->trySchedule(std::make_shared<LambdaExecutableTask>("A", 3, task));
|
||||||
|
executor->trySchedule(std::make_shared<LambdaExecutableTask>("B", 4, task));
|
||||||
|
executor->trySchedule(std::make_shared<LambdaExecutableTask>("C", 5, task));
|
||||||
|
executor->trySchedule(std::make_shared<LambdaExecutableTask>("D", 6, task));
|
||||||
|
executor->trySchedule(std::make_shared<LambdaExecutableTask>("E", 1, task));
|
||||||
|
};
|
||||||
|
|
||||||
|
executor->trySchedule(std::make_shared<LambdaExecutableTask>("init_task", 1, init_task));
|
||||||
|
barrier.arrive_and_wait(); // Do not finish until tasks are done
|
||||||
|
executor->wait();
|
||||||
|
ASSERT_EQ(schedule, expected_schedule);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
TEST(Executor, RemoveTasks)
|
TEST(Executor, RemoveTasks)
|
||||||
{
|
{
|
||||||
const size_t tasks_kinds = 25;
|
const size_t tasks_kinds = 25;
|
||||||
const size_t batch = 100;
|
const size_t batch = 100;
|
||||||
|
|
||||||
auto executor = std::make_shared<DB::OrdinaryBackgroundExecutor>
|
auto executor = std::make_shared<DB::MergeTreeBackgroundExecutor<RoundRobinRuntimeQueue>>
|
||||||
(
|
(
|
||||||
"GTest",
|
"GTest",
|
||||||
tasks_kinds,
|
tasks_kinds,
|
||||||
@ -105,7 +180,7 @@ TEST(Executor, RemoveTasksStress)
|
|||||||
const size_t schedulers_count = 5;
|
const size_t schedulers_count = 5;
|
||||||
const size_t removers_count = 5;
|
const size_t removers_count = 5;
|
||||||
|
|
||||||
auto executor = std::make_shared<DB::OrdinaryBackgroundExecutor>
|
auto executor = std::make_shared<DB::MergeTreeBackgroundExecutor<RoundRobinRuntimeQueue>>
|
||||||
(
|
(
|
||||||
"GTest",
|
"GTest",
|
||||||
tasks_kinds,
|
tasks_kinds,
|
||||||
@ -151,3 +226,46 @@ TEST(Executor, RemoveTasksStress)
|
|||||||
|
|
||||||
ASSERT_EQ(CurrentMetrics::values[CurrentMetrics::BackgroundMergesAndMutationsPoolTask], 0);
|
ASSERT_EQ(CurrentMetrics::values[CurrentMetrics::BackgroundMergesAndMutationsPoolTask], 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
TEST(Executor, UpdatePolicy)
|
||||||
|
{
|
||||||
|
auto executor = std::make_shared<DB::MergeTreeBackgroundExecutor<DynamicRuntimeQueue>>
|
||||||
|
(
|
||||||
|
"GTest",
|
||||||
|
1, // threads
|
||||||
|
100, // max_tasks
|
||||||
|
CurrentMetrics::BackgroundMergesAndMutationsPoolTask
|
||||||
|
);
|
||||||
|
|
||||||
|
String schedule; // mutex is not required because we have a single worker
|
||||||
|
String expected_schedule = "ABCDEDDDDDCCBACBACB";
|
||||||
|
std::barrier barrier(2);
|
||||||
|
auto task = [&] (const String & name, size_t)
|
||||||
|
{
|
||||||
|
schedule += name;
|
||||||
|
if (schedule.size() == 5)
|
||||||
|
executor->updateSchedulingPolicy(PriorityRuntimeQueue::name);
|
||||||
|
if (schedule.size() == 12)
|
||||||
|
executor->updateSchedulingPolicy(RoundRobinRuntimeQueue::name);
|
||||||
|
if (schedule.size() == expected_schedule.size())
|
||||||
|
barrier.arrive_and_wait();
|
||||||
|
};
|
||||||
|
|
||||||
|
// Schedule tasks from this `init_task` to guarantee atomicity.
|
||||||
|
// Worker will see pending queue when we push all tasks.
|
||||||
|
// This is required to check scheduling properties in a deterministic way.
|
||||||
|
auto init_task = [&] (const String &, size_t)
|
||||||
|
{
|
||||||
|
executor->trySchedule(std::make_shared<LambdaExecutableTask>("A", 3, task, 5));
|
||||||
|
executor->trySchedule(std::make_shared<LambdaExecutableTask>("B", 4, task, 4));
|
||||||
|
executor->trySchedule(std::make_shared<LambdaExecutableTask>("C", 5, task, 3));
|
||||||
|
executor->trySchedule(std::make_shared<LambdaExecutableTask>("D", 6, task, 2));
|
||||||
|
executor->trySchedule(std::make_shared<LambdaExecutableTask>("E", 1, task, 1));
|
||||||
|
};
|
||||||
|
|
||||||
|
executor->trySchedule(std::make_shared<LambdaExecutableTask>("init_task", 1, init_task));
|
||||||
|
barrier.arrive_and_wait(); // Do not finish until tasks are done
|
||||||
|
executor->wait();
|
||||||
|
ASSERT_EQ(schedule, expected_schedule);
|
||||||
|
}
|
||||||
|
@ -4,7 +4,7 @@
|
|||||||
|
|
||||||
#include <Common/setThreadName.h>
|
#include <Common/setThreadName.h>
|
||||||
#include <Parsers/ASTTableOverrides.h>
|
#include <Parsers/ASTTableOverrides.h>
|
||||||
#include <Processors/Transforms/PostgreSQLSource.h>
|
#include <Processors/Sources/PostgreSQLSource.h>
|
||||||
#include <Processors/Executors/CompletedPipelineExecutor.h>
|
#include <Processors/Executors/CompletedPipelineExecutor.h>
|
||||||
#include <QueryPipeline/QueryPipeline.h>
|
#include <QueryPipeline/QueryPipeline.h>
|
||||||
#include <QueryPipeline/Pipe.h>
|
#include <QueryPipeline/Pipe.h>
|
||||||
|
@ -46,11 +46,12 @@ void RabbitMQHandler::startLoop()
|
|||||||
loop_running.store(false);
|
loop_running.store(false);
|
||||||
}
|
}
|
||||||
|
|
||||||
void RabbitMQHandler::iterateLoop()
|
int RabbitMQHandler::iterateLoop()
|
||||||
{
|
{
|
||||||
std::unique_lock lock(startup_mutex, std::defer_lock);
|
std::unique_lock lock(startup_mutex, std::defer_lock);
|
||||||
if (lock.try_lock())
|
if (lock.try_lock())
|
||||||
uv_run(loop, UV_RUN_NOWAIT);
|
return uv_run(loop, UV_RUN_NOWAIT);
|
||||||
|
return 1; /// We cannot know how actual value.
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Do not need synchronization as in iterateLoop(), because this method is used only for
|
/// Do not need synchronization as in iterateLoop(), because this method is used only for
|
||||||
|
@ -34,7 +34,7 @@ public:
|
|||||||
|
|
||||||
/// Loop to wait for small tasks in a non-blocking mode.
|
/// Loop to wait for small tasks in a non-blocking mode.
|
||||||
/// Adds synchronization with main background loop.
|
/// Adds synchronization with main background loop.
|
||||||
void iterateLoop();
|
int iterateLoop();
|
||||||
|
|
||||||
/// Loop to wait for small tasks in a blocking mode.
|
/// Loop to wait for small tasks in a blocking mode.
|
||||||
/// No synchronization is done with the main loop thread.
|
/// No synchronization is done with the main loop thread.
|
||||||
|
@ -15,6 +15,7 @@ namespace DB
|
|||||||
|
|
||||||
static const auto BATCH = 1000;
|
static const auto BATCH = 1000;
|
||||||
static const auto RETURNED_LIMIT = 50000;
|
static const auto RETURNED_LIMIT = 50000;
|
||||||
|
static const auto FINISH_PRODUCER_NUM_TRIES = 50;
|
||||||
|
|
||||||
namespace ErrorCodes
|
namespace ErrorCodes
|
||||||
{
|
{
|
||||||
@ -254,13 +255,20 @@ void RabbitMQProducer::startProducingTaskLoop()
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int res = 0;
|
||||||
|
size_t try_num = 0;
|
||||||
|
while (++try_num <= FINISH_PRODUCER_NUM_TRIES && (res = iterateEventLoop()))
|
||||||
|
{
|
||||||
|
LOG_TEST(log, "Waiting for pending callbacks to finish (count: {}, try: {})", res, try_num);
|
||||||
|
}
|
||||||
|
|
||||||
LOG_DEBUG(log, "Producer on channel {} completed", channel_id);
|
LOG_DEBUG(log, "Producer on channel {} completed", channel_id);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void RabbitMQProducer::iterateEventLoop()
|
int RabbitMQProducer::iterateEventLoop()
|
||||||
{
|
{
|
||||||
connection.getHandler().iterateLoop();
|
return connection.getHandler().iterateLoop();
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -43,7 +43,7 @@ private:
|
|||||||
void stopProducingTask() override;
|
void stopProducingTask() override;
|
||||||
void finishImpl() override;
|
void finishImpl() override;
|
||||||
|
|
||||||
void iterateEventLoop();
|
int iterateEventLoop();
|
||||||
void startProducingTaskLoop() override;
|
void startProducingTaskLoop() override;
|
||||||
void setupChannel();
|
void setupChannel();
|
||||||
void removeRecord(UInt64 received_delivery_tag, bool multiple, bool republish);
|
void removeRecord(UInt64 received_delivery_tag, bool multiple, bool republish);
|
||||||
|
@ -230,7 +230,7 @@ StorageDeltaLake::StorageDeltaLake(
|
|||||||
if (columns_.empty())
|
if (columns_.empty())
|
||||||
{
|
{
|
||||||
columns_ = StorageS3::getTableStructureFromData(
|
columns_ = StorageS3::getTableStructureFromData(
|
||||||
new_configuration, /*distributed processing*/ false, format_settings_, context_, nullptr);
|
new_configuration, format_settings_, context_, nullptr);
|
||||||
storage_metadata.setColumns(columns_);
|
storage_metadata.setColumns(columns_);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
@ -272,7 +272,7 @@ ColumnsDescription StorageDeltaLake::getTableStructureFromData(
|
|||||||
{
|
{
|
||||||
StorageS3::updateS3Configuration(ctx, configuration);
|
StorageS3::updateS3Configuration(ctx, configuration);
|
||||||
auto new_configuration = getAdjustedS3Configuration(ctx, configuration, &Poco::Logger::get("StorageDeltaLake"));
|
auto new_configuration = getAdjustedS3Configuration(ctx, configuration, &Poco::Logger::get("StorageDeltaLake"));
|
||||||
return StorageS3::getTableStructureFromData(new_configuration, /*distributed processing*/ false, format_settings, ctx, /*object_infos*/ nullptr);
|
return StorageS3::getTableStructureFromData(new_configuration, format_settings, ctx, /*object_infos*/ nullptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
void registerStorageDeltaLake(StorageFactory & factory)
|
void registerStorageDeltaLake(StorageFactory & factory)
|
||||||
|
@ -163,7 +163,7 @@ StorageHudi::StorageHudi(
|
|||||||
if (columns_.empty())
|
if (columns_.empty())
|
||||||
{
|
{
|
||||||
columns_ = StorageS3::getTableStructureFromData(
|
columns_ = StorageS3::getTableStructureFromData(
|
||||||
new_configuration, /*distributed processing*/ false, format_settings_, context_, nullptr);
|
new_configuration, format_settings_, context_, nullptr);
|
||||||
storage_metadata.setColumns(columns_);
|
storage_metadata.setColumns(columns_);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
@ -203,7 +203,7 @@ ColumnsDescription StorageHudi::getTableStructureFromData(
|
|||||||
{
|
{
|
||||||
StorageS3::updateS3Configuration(ctx, configuration);
|
StorageS3::updateS3Configuration(ctx, configuration);
|
||||||
auto new_configuration = getAdjustedS3Configuration(configuration, &Poco::Logger::get("StorageDeltaLake"));
|
auto new_configuration = getAdjustedS3Configuration(configuration, &Poco::Logger::get("StorageDeltaLake"));
|
||||||
return StorageS3::getTableStructureFromData(new_configuration, /*distributed processing*/ false, format_settings, ctx, /*object_infos*/ nullptr);
|
return StorageS3::getTableStructureFromData(new_configuration, format_settings, ctx, /*object_infos*/ nullptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
void registerStorageHudi(StorageFactory & factory)
|
void registerStorageHudi(StorageFactory & factory)
|
||||||
|
@ -566,11 +566,7 @@ QueryPipelineBuilderPtr ReadFromMerge::createSources(
|
|||||||
SelectQueryOptions(processed_stage).analyze()).buildQueryPipeline());
|
SelectQueryOptions(processed_stage).analyze()).buildQueryPipeline());
|
||||||
}
|
}
|
||||||
|
|
||||||
bool final = false;
|
bool final = isFinal(modified_query_info);
|
||||||
if (modified_query_info.table_expression_modifiers)
|
|
||||||
final = modified_query_info.table_expression_modifiers->hasFinal();
|
|
||||||
else
|
|
||||||
final = modified_select.final();
|
|
||||||
|
|
||||||
if (!final && storage->needRewriteQueryWithFinal(real_column_names))
|
if (!final && storage->needRewriteQueryWithFinal(real_column_names))
|
||||||
{
|
{
|
||||||
@ -912,6 +908,25 @@ void ReadFromMerge::convertingSourceStream(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool ReadFromMerge::requestReadingInOrder(InputOrderInfoPtr order_info_)
|
||||||
|
{
|
||||||
|
/// Disable read-in-order optimization for reverse order with final.
|
||||||
|
/// Otherwise, it can lead to incorrect final behavior because the implementation may rely on the reading in direct order).
|
||||||
|
if (order_info_->direction != 1 && isFinal(query_info))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
order_info = order_info_;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool ReadFromMerge::isFinal(const SelectQueryInfo & query_info)
|
||||||
|
{
|
||||||
|
if (query_info.table_expression_modifiers)
|
||||||
|
return query_info.table_expression_modifiers->hasFinal();
|
||||||
|
const auto & select_query = query_info.query->as<ASTSelectQuery &>();
|
||||||
|
return select_query.final();
|
||||||
|
}
|
||||||
|
|
||||||
IStorage::ColumnSizeByName StorageMerge::getColumnSizes() const
|
IStorage::ColumnSizeByName StorageMerge::getColumnSizes() const
|
||||||
{
|
{
|
||||||
ColumnSizeByName column_sizes;
|
ColumnSizeByName column_sizes;
|
||||||
@ -993,4 +1008,5 @@ NamesAndTypesList StorageMerge::getVirtuals() const
|
|||||||
|
|
||||||
return virtuals;
|
return virtuals;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -148,7 +148,9 @@ public:
|
|||||||
|
|
||||||
const StorageListWithLocks & getSelectedTables() const { return selected_tables; }
|
const StorageListWithLocks & getSelectedTables() const { return selected_tables; }
|
||||||
|
|
||||||
void requestReadingInOrder(InputOrderInfoPtr order_info_) { order_info = order_info_; }
|
/// Returns `false` if requested reading cannot be performed.
|
||||||
|
bool requestReadingInOrder(InputOrderInfoPtr order_info_);
|
||||||
|
static bool isFinal(const SelectQueryInfo & query_info);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
const size_t required_max_block_size;
|
const size_t required_max_block_size;
|
||||||
|
@ -16,7 +16,7 @@
|
|||||||
#include <IO/Operators.h>
|
#include <IO/Operators.h>
|
||||||
#include <Parsers/ASTLiteral.h>
|
#include <Parsers/ASTLiteral.h>
|
||||||
#include <QueryPipeline/Pipe.h>
|
#include <QueryPipeline/Pipe.h>
|
||||||
#include <Processors/Transforms/MongoDBSource.h>
|
#include <Processors/Sources/MongoDBSource.h>
|
||||||
#include <Processors/Sinks/SinkToStorage.h>
|
#include <Processors/Sinks/SinkToStorage.h>
|
||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
#include "StoragePostgreSQL.h"
|
#include "StoragePostgreSQL.h"
|
||||||
|
|
||||||
#if USE_LIBPQXX
|
#if USE_LIBPQXX
|
||||||
#include <Processors/Transforms/PostgreSQLSource.h>
|
#include <Processors/Sources/PostgreSQLSource.h>
|
||||||
|
|
||||||
#include <Common/parseAddress.h>
|
#include <Common/parseAddress.h>
|
||||||
#include <Common/assert_cast.h>
|
#include <Common/assert_cast.h>
|
||||||
|
@ -961,7 +961,6 @@ StorageS3::StorageS3(
|
|||||||
format_name,
|
format_name,
|
||||||
s3_configuration,
|
s3_configuration,
|
||||||
compression_method,
|
compression_method,
|
||||||
distributed_processing_,
|
|
||||||
is_key_with_globs,
|
is_key_with_globs,
|
||||||
format_settings,
|
format_settings,
|
||||||
context_,
|
context_,
|
||||||
@ -1369,14 +1368,13 @@ StorageS3::Configuration StorageS3::getConfiguration(ASTs & engine_args, Context
|
|||||||
|
|
||||||
ColumnsDescription StorageS3::getTableStructureFromData(
|
ColumnsDescription StorageS3::getTableStructureFromData(
|
||||||
StorageS3::Configuration & configuration,
|
StorageS3::Configuration & configuration,
|
||||||
bool distributed_processing,
|
|
||||||
const std::optional<FormatSettings> & format_settings,
|
const std::optional<FormatSettings> & format_settings,
|
||||||
ContextPtr ctx,
|
ContextPtr ctx,
|
||||||
ObjectInfos * object_infos)
|
ObjectInfos * object_infos)
|
||||||
{
|
{
|
||||||
updateS3Configuration(ctx, configuration);
|
updateS3Configuration(ctx, configuration);
|
||||||
return getTableStructureFromDataImpl(
|
return getTableStructureFromDataImpl(
|
||||||
configuration.format, configuration, configuration.compression_method, distributed_processing,
|
configuration.format, configuration, configuration.compression_method,
|
||||||
configuration.url.key.find_first_of("*?{") != std::string::npos, format_settings, ctx, object_infos);
|
configuration.url.key.find_first_of("*?{") != std::string::npos, format_settings, ctx, object_infos);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1384,7 +1382,6 @@ ColumnsDescription StorageS3::getTableStructureFromDataImpl(
|
|||||||
const String & format,
|
const String & format,
|
||||||
const Configuration & s3_configuration,
|
const Configuration & s3_configuration,
|
||||||
const String & compression_method,
|
const String & compression_method,
|
||||||
bool distributed_processing,
|
|
||||||
bool is_key_with_globs,
|
bool is_key_with_globs,
|
||||||
const std::optional<FormatSettings> & format_settings,
|
const std::optional<FormatSettings> & format_settings,
|
||||||
ContextPtr ctx,
|
ContextPtr ctx,
|
||||||
@ -1396,7 +1393,7 @@ ColumnsDescription StorageS3::getTableStructureFromDataImpl(
|
|||||||
s3_configuration,
|
s3_configuration,
|
||||||
{s3_configuration.url.key},
|
{s3_configuration.url.key},
|
||||||
is_key_with_globs,
|
is_key_with_globs,
|
||||||
distributed_processing,
|
false,
|
||||||
ctx, nullptr,
|
ctx, nullptr,
|
||||||
{}, object_infos, &read_keys);
|
{}, object_infos, &read_keys);
|
||||||
|
|
||||||
|
@ -291,7 +291,6 @@ public:
|
|||||||
|
|
||||||
static ColumnsDescription getTableStructureFromData(
|
static ColumnsDescription getTableStructureFromData(
|
||||||
StorageS3::Configuration & configuration,
|
StorageS3::Configuration & configuration,
|
||||||
bool distributed_processing,
|
|
||||||
const std::optional<FormatSettings> & format_settings,
|
const std::optional<FormatSettings> & format_settings,
|
||||||
ContextPtr ctx,
|
ContextPtr ctx,
|
||||||
ObjectInfos * object_infos = nullptr);
|
ObjectInfos * object_infos = nullptr);
|
||||||
@ -338,7 +337,6 @@ private:
|
|||||||
const String & format,
|
const String & format,
|
||||||
const Configuration & s3_configuration,
|
const Configuration & s3_configuration,
|
||||||
const String & compression_method,
|
const String & compression_method,
|
||||||
bool distributed_processing,
|
|
||||||
bool is_key_with_globs,
|
bool is_key_with_globs,
|
||||||
const std::optional<FormatSettings> & format_settings,
|
const std::optional<FormatSettings> & format_settings,
|
||||||
ContextPtr ctx,
|
ContextPtr ctx,
|
||||||
|
@ -66,8 +66,8 @@ StorageS3Cluster::StorageS3Cluster(
|
|||||||
/// `distributed_processing` is set to false, because this code is executed on the initiator, so there is no callback set
|
/// `distributed_processing` is set to false, because this code is executed on the initiator, so there is no callback set
|
||||||
/// for asking for the next tasks.
|
/// for asking for the next tasks.
|
||||||
/// `format_settings` is set to std::nullopt, because StorageS3Cluster is used only as table function
|
/// `format_settings` is set to std::nullopt, because StorageS3Cluster is used only as table function
|
||||||
auto columns = StorageS3::getTableStructureFromDataImpl(format_name, s3_configuration, compression_method,
|
auto columns = StorageS3::getTableStructureFromDataImpl(
|
||||||
/*distributed_processing_*/false, is_key_with_globs, /*format_settings=*/std::nullopt, context_);
|
format_name, s3_configuration, compression_method, is_key_with_globs, /*format_settings=*/std::nullopt, context_);
|
||||||
storage_metadata.setColumns(columns);
|
storage_metadata.setColumns(columns);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
@ -166,7 +166,6 @@ S3Settings::RequestSettings::RequestSettings(const NamedCollection & collection)
|
|||||||
max_single_read_retries = collection.getOrDefault<UInt64>("max_single_read_retries", max_single_read_retries);
|
max_single_read_retries = collection.getOrDefault<UInt64>("max_single_read_retries", max_single_read_retries);
|
||||||
max_connections = collection.getOrDefault<UInt64>("max_connections", max_connections);
|
max_connections = collection.getOrDefault<UInt64>("max_connections", max_connections);
|
||||||
list_object_keys_size = collection.getOrDefault<UInt64>("list_object_keys_size", list_object_keys_size);
|
list_object_keys_size = collection.getOrDefault<UInt64>("list_object_keys_size", list_object_keys_size);
|
||||||
allow_head_object_request = collection.getOrDefault<bool>("allow_head_object_request", allow_head_object_request);
|
|
||||||
throw_on_zero_files_match = collection.getOrDefault<bool>("throw_on_zero_files_match", throw_on_zero_files_match);
|
throw_on_zero_files_match = collection.getOrDefault<bool>("throw_on_zero_files_match", throw_on_zero_files_match);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -182,7 +181,6 @@ S3Settings::RequestSettings::RequestSettings(
|
|||||||
max_connections = config.getUInt64(key + "max_connections", settings.s3_max_connections);
|
max_connections = config.getUInt64(key + "max_connections", settings.s3_max_connections);
|
||||||
check_objects_after_upload = config.getBool(key + "check_objects_after_upload", settings.s3_check_objects_after_upload);
|
check_objects_after_upload = config.getBool(key + "check_objects_after_upload", settings.s3_check_objects_after_upload);
|
||||||
list_object_keys_size = config.getUInt64(key + "list_object_keys_size", settings.s3_list_object_keys_size);
|
list_object_keys_size = config.getUInt64(key + "list_object_keys_size", settings.s3_list_object_keys_size);
|
||||||
allow_head_object_request = config.getBool(key + "allow_head_object_request", allow_head_object_request);
|
|
||||||
throw_on_zero_files_match = config.getBool(key + "throw_on_zero_files_match", settings.s3_throw_on_zero_files_match);
|
throw_on_zero_files_match = config.getBool(key + "throw_on_zero_files_match", settings.s3_throw_on_zero_files_match);
|
||||||
|
|
||||||
/// NOTE: it would be better to reuse old throttlers to avoid losing token bucket state on every config reload,
|
/// NOTE: it would be better to reuse old throttlers to avoid losing token bucket state on every config reload,
|
||||||
|
@ -67,16 +67,6 @@ struct S3Settings
|
|||||||
ThrottlerPtr get_request_throttler;
|
ThrottlerPtr get_request_throttler;
|
||||||
ThrottlerPtr put_request_throttler;
|
ThrottlerPtr put_request_throttler;
|
||||||
|
|
||||||
/// If this is set to false then `S3::getObjectSize()` will use `GetObjectAttributes` request instead of `HeadObject`.
|
|
||||||
/// Details: `HeadObject` request never returns a response body (even if there is an error) however
|
|
||||||
/// if the request was sent without specifying a region in the endpoint (i.e. for example "https://test.s3.amazonaws.com/mydata.csv"
|
|
||||||
/// instead of "https://test.s3-us-west-2.amazonaws.com/mydata.csv") then that response body is one of the main ways to determine
|
|
||||||
/// the correct region and try to repeat the request again with the correct region.
|
|
||||||
/// For any other request type (`GetObject`, `ListObjects`, etc.) AWS SDK does that because they have response bodies, but for `HeadObject`
|
|
||||||
/// there is no response body so this way doesn't work. That's why it's better to use `GetObjectAttributes` requests sometimes.
|
|
||||||
/// See https://github.com/aws/aws-sdk-cpp/issues/1558 and also the function S3ErrorMarshaller::ExtractRegion() for more information.
|
|
||||||
bool allow_head_object_request = true;
|
|
||||||
|
|
||||||
bool throw_on_zero_files_match = false;
|
bool throw_on_zero_files_match = false;
|
||||||
|
|
||||||
const PartUploadSettings & getUploadSettings() const { return upload_settings; }
|
const PartUploadSettings & getUploadSettings() const { return upload_settings; }
|
||||||
|
@ -139,7 +139,7 @@ ColumnsDescription TableFunctionS3::getActualTableStructure(ContextPtr context)
|
|||||||
if (configuration.structure == "auto")
|
if (configuration.structure == "auto")
|
||||||
{
|
{
|
||||||
context->checkAccess(getSourceAccessType());
|
context->checkAccess(getSourceAccessType());
|
||||||
return StorageS3::getTableStructureFromData(configuration, false, std::nullopt, context);
|
return StorageS3::getTableStructureFromData(configuration, std::nullopt, context);
|
||||||
}
|
}
|
||||||
|
|
||||||
return parseColumnsListFromString(configuration.structure, context);
|
return parseColumnsListFromString(configuration.structure, context);
|
||||||
|
@ -82,7 +82,7 @@ ColumnsDescription TableFunctionS3Cluster::getActualTableStructure(ContextPtr co
|
|||||||
context->checkAccess(getSourceAccessType());
|
context->checkAccess(getSourceAccessType());
|
||||||
|
|
||||||
if (configuration.structure == "auto")
|
if (configuration.structure == "auto")
|
||||||
return StorageS3::getTableStructureFromData(configuration, false, std::nullopt, context);
|
return StorageS3::getTableStructureFromData(configuration, std::nullopt, context);
|
||||||
|
|
||||||
return parseColumnsListFromString(configuration.structure, context);
|
return parseColumnsListFromString(configuration.structure, context);
|
||||||
}
|
}
|
||||||
|
@ -94,6 +94,9 @@ endif()
|
|||||||
if (ENABLE_NLP)
|
if (ENABLE_NLP)
|
||||||
set(USE_NLP 1)
|
set(USE_NLP 1)
|
||||||
endif()
|
endif()
|
||||||
|
if (TARGET ch_contrib::ulid)
|
||||||
|
set(USE_ULID 1)
|
||||||
|
endif()
|
||||||
if (TARGET ch_contrib::llvm)
|
if (TARGET ch_contrib::llvm)
|
||||||
set(USE_EMBEDDED_COMPILER 1)
|
set(USE_EMBEDDED_COMPILER 1)
|
||||||
endif()
|
endif()
|
||||||
|
@ -6,11 +6,11 @@ import os
|
|||||||
|
|
||||||
def compress_file_fast(path, archive_path):
|
def compress_file_fast(path, archive_path):
|
||||||
if archive_path.endswith(".zst"):
|
if archive_path.endswith(".zst"):
|
||||||
subprocess.check_call("zstd < {} > {}".format(path, archive_path), shell=True)
|
subprocess.check_call(f"zstd < {path} > {archive_path}", shell=True)
|
||||||
elif os.path.exists("/usr/bin/pigz"):
|
elif os.path.exists("/usr/bin/pigz"):
|
||||||
subprocess.check_call("pigz < {} > {}".format(path, archive_path), shell=True)
|
subprocess.check_call(f"pigz < {path} > {archive_path}", shell=True)
|
||||||
else:
|
else:
|
||||||
subprocess.check_call("gzip < {} > {}".format(path, archive_path), shell=True)
|
subprocess.check_call(f"gzip < {path} > {archive_path}", shell=True)
|
||||||
|
|
||||||
|
|
||||||
def compress_fast(path, archive_path, exclude=None):
|
def compress_fast(path, archive_path, exclude=None):
|
||||||
@ -28,9 +28,9 @@ def compress_fast(path, archive_path, exclude=None):
|
|||||||
if exclude is None:
|
if exclude is None:
|
||||||
exclude_part = ""
|
exclude_part = ""
|
||||||
elif isinstance(exclude, list):
|
elif isinstance(exclude, list):
|
||||||
exclude_part = " ".join(["--exclude {}".format(x) for x in exclude])
|
exclude_part = " ".join([f"--exclude {x}" for x in exclude])
|
||||||
else:
|
else:
|
||||||
exclude_part = "--exclude {}".format(str(exclude))
|
exclude_part = f"--exclude {exclude}"
|
||||||
|
|
||||||
fname = os.path.basename(path)
|
fname = os.path.basename(path)
|
||||||
if os.path.isfile(path):
|
if os.path.isfile(path):
|
||||||
@ -38,9 +38,7 @@ def compress_fast(path, archive_path, exclude=None):
|
|||||||
else:
|
else:
|
||||||
path += "/.."
|
path += "/.."
|
||||||
|
|
||||||
cmd = "tar {} {} -cf {} -C {} {}".format(
|
cmd = f"tar {program_part} {exclude_part} -cf {archive_path} -C {path} {fname}"
|
||||||
program_part, exclude_part, archive_path, path, fname
|
|
||||||
)
|
|
||||||
logging.debug("compress_fast cmd: %s", cmd)
|
logging.debug("compress_fast cmd: %s", cmd)
|
||||||
subprocess.check_call(cmd, shell=True)
|
subprocess.check_call(cmd, shell=True)
|
||||||
|
|
||||||
@ -70,11 +68,9 @@ def decompress_fast(archive_path, result_path=None):
|
|||||||
)
|
)
|
||||||
|
|
||||||
if result_path is None:
|
if result_path is None:
|
||||||
subprocess.check_call(
|
subprocess.check_call(f"tar {program_part} -xf {archive_path}", shell=True)
|
||||||
"tar {} -xf {}".format(program_part, archive_path), shell=True
|
|
||||||
)
|
|
||||||
else:
|
else:
|
||||||
subprocess.check_call(
|
subprocess.check_call(
|
||||||
"tar {} -xf {} -C {}".format(program_part, archive_path, result_path),
|
f"tar {program_part} -xf {archive_path} -C {result_path}",
|
||||||
shell=True,
|
shell=True,
|
||||||
)
|
)
|
||||||
|
@ -7,7 +7,7 @@ import logging
|
|||||||
import sys
|
import sys
|
||||||
import subprocess
|
import subprocess
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
from shutil import copy2
|
||||||
from typing import Dict
|
from typing import Dict
|
||||||
|
|
||||||
from github import Github
|
from github import Github
|
||||||
@ -19,6 +19,7 @@ from clickhouse_helper import (
|
|||||||
prepare_tests_results_for_clickhouse,
|
prepare_tests_results_for_clickhouse,
|
||||||
)
|
)
|
||||||
from commit_status_helper import post_commit_status, update_mergeable_check
|
from commit_status_helper import post_commit_status, update_mergeable_check
|
||||||
|
from compress_files import compress_fast
|
||||||
from docker_pull_helper import get_image_with_version, DockerImage
|
from docker_pull_helper import get_image_with_version, DockerImage
|
||||||
from env_helper import CI, TEMP_PATH as TEMP, REPORTS_PATH
|
from env_helper import CI, TEMP_PATH as TEMP, REPORTS_PATH
|
||||||
from get_robot_token import get_best_robot_token
|
from get_robot_token import get_best_robot_token
|
||||||
@ -34,15 +35,22 @@ from upload_result_helper import upload_results
|
|||||||
RPM_IMAGE = "clickhouse/install-rpm-test"
|
RPM_IMAGE = "clickhouse/install-rpm-test"
|
||||||
DEB_IMAGE = "clickhouse/install-deb-test"
|
DEB_IMAGE = "clickhouse/install-deb-test"
|
||||||
TEMP_PATH = Path(TEMP)
|
TEMP_PATH = Path(TEMP)
|
||||||
|
LOGS_PATH = TEMP_PATH / "tests_logs"
|
||||||
SUCCESS = "success"
|
SUCCESS = "success"
|
||||||
FAILURE = "failure"
|
FAILURE = "failure"
|
||||||
|
OK = "OK"
|
||||||
|
FAIL = "FAIL"
|
||||||
|
|
||||||
|
|
||||||
def prepare_test_scripts():
|
def prepare_test_scripts():
|
||||||
server_test = r"""#!/bin/bash
|
server_test = r"""#!/bin/bash
|
||||||
|
set -e
|
||||||
|
trap "bash -ex /packages/preserve_logs.sh" ERR
|
||||||
systemctl start clickhouse-server
|
systemctl start clickhouse-server
|
||||||
clickhouse-client -q 'SELECT version()'"""
|
clickhouse-client -q 'SELECT version()'"""
|
||||||
keeper_test = r"""#!/bin/bash
|
keeper_test = r"""#!/bin/bash
|
||||||
|
set -e
|
||||||
|
trap "bash -ex /packages/preserve_logs.sh" ERR
|
||||||
systemctl start clickhouse-keeper
|
systemctl start clickhouse-keeper
|
||||||
for i in {1..20}; do
|
for i in {1..20}; do
|
||||||
echo wait for clickhouse-keeper to being up
|
echo wait for clickhouse-keeper to being up
|
||||||
@ -50,15 +58,18 @@ for i in {1..20}; do
|
|||||||
done
|
done
|
||||||
for i in {1..5}; do
|
for i in {1..5}; do
|
||||||
echo wait for clickhouse-keeper to answer on mntr request
|
echo wait for clickhouse-keeper to answer on mntr request
|
||||||
exec 13<>/dev/tcp/127.0.0.1/9181
|
{
|
||||||
echo mntr >&13
|
exec 13<>/dev/tcp/127.0.0.1/9181
|
||||||
cat <&13 | grep zk_version && break || sleep 1
|
echo mntr >&13
|
||||||
|
cat <&13 | grep zk_version
|
||||||
|
} && break || sleep 1
|
||||||
exec 13>&-
|
exec 13>&-
|
||||||
done
|
done
|
||||||
exec 13>&-"""
|
exec 13>&-"""
|
||||||
binary_test = r"""#!/bin/bash
|
binary_test = r"""#!/bin/bash
|
||||||
chmod +x /packages/clickhouse
|
set -e
|
||||||
/packages/clickhouse install
|
trap "bash -ex /packages/preserve_logs.sh" ERR
|
||||||
|
/packages/clickhouse.copy install
|
||||||
clickhouse-server start --daemon
|
clickhouse-server start --daemon
|
||||||
for i in {1..5}; do
|
for i in {1..5}; do
|
||||||
clickhouse-client -q 'SELECT version()' && break || sleep 1
|
clickhouse-client -q 'SELECT version()' && break || sleep 1
|
||||||
@ -70,15 +81,26 @@ for i in {1..20}; do
|
|||||||
done
|
done
|
||||||
for i in {1..5}; do
|
for i in {1..5}; do
|
||||||
echo wait for clickhouse-keeper to answer on mntr request
|
echo wait for clickhouse-keeper to answer on mntr request
|
||||||
exec 13<>/dev/tcp/127.0.0.1/9181
|
{
|
||||||
echo mntr >&13
|
exec 13<>/dev/tcp/127.0.0.1/9181
|
||||||
cat <&13 | grep zk_version && break || sleep 1
|
echo mntr >&13
|
||||||
|
cat <&13 | grep zk_version
|
||||||
|
} && break || sleep 1
|
||||||
exec 13>&-
|
exec 13>&-
|
||||||
done
|
done
|
||||||
exec 13>&-"""
|
exec 13>&-"""
|
||||||
|
preserve_logs = r"""#!/bin/bash
|
||||||
|
journalctl -u clickhouse-server > /tests_logs/clickhouse-server.service || :
|
||||||
|
journalctl -u clickhouse-keeper > /tests_logs/clickhouse-keeper.service || :
|
||||||
|
cp /var/log/clickhouse-server/clickhouse-server.* /tests_logs/ || :
|
||||||
|
cp /var/log/clickhouse-keeper/clickhouse-keeper.* /tests_logs/ || :
|
||||||
|
chmod a+rw -R /tests_logs
|
||||||
|
exit 1
|
||||||
|
"""
|
||||||
(TEMP_PATH / "server_test.sh").write_text(server_test, encoding="utf-8")
|
(TEMP_PATH / "server_test.sh").write_text(server_test, encoding="utf-8")
|
||||||
(TEMP_PATH / "keeper_test.sh").write_text(keeper_test, encoding="utf-8")
|
(TEMP_PATH / "keeper_test.sh").write_text(keeper_test, encoding="utf-8")
|
||||||
(TEMP_PATH / "binary_test.sh").write_text(binary_test, encoding="utf-8")
|
(TEMP_PATH / "binary_test.sh").write_text(binary_test, encoding="utf-8")
|
||||||
|
(TEMP_PATH / "preserve_logs.sh").write_text(preserve_logs, encoding="utf-8")
|
||||||
|
|
||||||
|
|
||||||
def test_install_deb(image: DockerImage) -> TestResults:
|
def test_install_deb(image: DockerImage) -> TestResults:
|
||||||
@ -143,27 +165,41 @@ def test_install(image: DockerImage, tests: Dict[str, str]) -> TestResults:
|
|||||||
stopwatch = Stopwatch()
|
stopwatch = Stopwatch()
|
||||||
container_name = name.lower().replace(" ", "_").replace("/", "_")
|
container_name = name.lower().replace(" ", "_").replace("/", "_")
|
||||||
log_file = TEMP_PATH / f"{container_name}.log"
|
log_file = TEMP_PATH / f"{container_name}.log"
|
||||||
|
logs = [log_file]
|
||||||
run_command = (
|
run_command = (
|
||||||
f"docker run --rm --privileged --detach --cap-add=SYS_PTRACE "
|
f"docker run --rm --privileged --detach --cap-add=SYS_PTRACE "
|
||||||
f"--volume={TEMP_PATH}:/packages {image}"
|
f"--volume={LOGS_PATH}:/tests_logs --volume={TEMP_PATH}:/packages {image}"
|
||||||
)
|
)
|
||||||
logging.info("Running docker container: `%s`", run_command)
|
|
||||||
container_id = subprocess.check_output(
|
for retry in range(1, 4):
|
||||||
run_command, shell=True, encoding="utf-8"
|
for file in LOGS_PATH.glob("*"):
|
||||||
).strip()
|
file.unlink()
|
||||||
(TEMP_PATH / "install.sh").write_text(command)
|
|
||||||
install_command = f"docker exec {container_id} bash -ex /packages/install.sh"
|
logging.info("Running docker container: `%s`", run_command)
|
||||||
with TeePopen(install_command, log_file) as process:
|
container_id = subprocess.check_output(
|
||||||
retcode = process.wait()
|
run_command, shell=True, encoding="utf-8"
|
||||||
if retcode == 0:
|
).strip()
|
||||||
status = SUCCESS
|
(TEMP_PATH / "install.sh").write_text(command)
|
||||||
else:
|
install_command = (
|
||||||
status = FAILURE
|
f"docker exec {container_id} bash -ex /packages/install.sh"
|
||||||
|
)
|
||||||
|
with TeePopen(install_command, log_file) as process:
|
||||||
|
retcode = process.wait()
|
||||||
|
if retcode == 0:
|
||||||
|
status = OK
|
||||||
|
break
|
||||||
|
|
||||||
|
status = FAIL
|
||||||
|
copy2(log_file, LOGS_PATH)
|
||||||
|
archive_path = TEMP_PATH / f"{container_name}-{retry}.tar.gz"
|
||||||
|
compress_fast(
|
||||||
|
LOGS_PATH.as_posix(),
|
||||||
|
archive_path.as_posix(),
|
||||||
|
)
|
||||||
|
logs.append(archive_path)
|
||||||
|
|
||||||
subprocess.check_call(f"docker kill -s 9 {container_id}", shell=True)
|
subprocess.check_call(f"docker kill -s 9 {container_id}", shell=True)
|
||||||
test_results.append(
|
test_results.append(TestResult(name, status, stopwatch.duration_seconds, logs))
|
||||||
TestResult(name, status, stopwatch.duration_seconds, [log_file])
|
|
||||||
)
|
|
||||||
|
|
||||||
return test_results
|
return test_results
|
||||||
|
|
||||||
@ -222,6 +258,7 @@ def main():
|
|||||||
args = parse_args()
|
args = parse_args()
|
||||||
|
|
||||||
TEMP_PATH.mkdir(parents=True, exist_ok=True)
|
TEMP_PATH.mkdir(parents=True, exist_ok=True)
|
||||||
|
LOGS_PATH.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
pr_info = PRInfo()
|
pr_info = PRInfo()
|
||||||
|
|
||||||
@ -245,18 +282,30 @@ def main():
|
|||||||
if args.download:
|
if args.download:
|
||||||
|
|
||||||
def filter_artifacts(path: str) -> bool:
|
def filter_artifacts(path: str) -> bool:
|
||||||
return (
|
is_match = False
|
||||||
path.endswith(".deb")
|
if args.deb or args.rpm:
|
||||||
or path.endswith(".rpm")
|
is_match = is_match or path.endswith("/clickhouse")
|
||||||
or path.endswith(".tgz")
|
if args.deb:
|
||||||
or path.endswith("/clickhouse")
|
is_match = is_match or path.endswith(".deb")
|
||||||
)
|
if args.rpm:
|
||||||
|
is_match = is_match or path.endswith(".rpm")
|
||||||
|
if args.tgz:
|
||||||
|
is_match = is_match or path.endswith(".tgz")
|
||||||
|
return is_match
|
||||||
|
|
||||||
download_builds_filter(
|
download_builds_filter(
|
||||||
args.check_name, REPORTS_PATH, TEMP_PATH, filter_artifacts
|
args.check_name, REPORTS_PATH, TEMP_PATH, filter_artifacts
|
||||||
)
|
)
|
||||||
|
|
||||||
test_results = [] # type: TestResults
|
test_results = [] # type: TestResults
|
||||||
|
ch_binary = Path(TEMP_PATH) / "clickhouse"
|
||||||
|
if ch_binary.exists():
|
||||||
|
# make a copy of clickhouse to avoid redownload of exctracted binary
|
||||||
|
ch_binary.chmod(0o755)
|
||||||
|
ch_copy = ch_binary.parent / "clickhouse.copy"
|
||||||
|
copy2(ch_binary, ch_binary.parent / "clickhouse.copy")
|
||||||
|
subprocess.check_output(f"{ch_copy.absolute()} local -q 'SELECT 1'", shell=True)
|
||||||
|
|
||||||
if args.deb:
|
if args.deb:
|
||||||
test_results.extend(test_install_deb(docker_images[DEB_IMAGE]))
|
test_results.extend(test_install_deb(docker_images[DEB_IMAGE]))
|
||||||
if args.rpm:
|
if args.rpm:
|
||||||
@ -266,9 +315,11 @@ def main():
|
|||||||
test_results.extend(test_install_tgz(docker_images[RPM_IMAGE]))
|
test_results.extend(test_install_tgz(docker_images[RPM_IMAGE]))
|
||||||
|
|
||||||
state = SUCCESS
|
state = SUCCESS
|
||||||
|
test_status = OK
|
||||||
description = "Packages installed successfully"
|
description = "Packages installed successfully"
|
||||||
if FAILURE in (result.status for result in test_results):
|
if FAIL in (result.status for result in test_results):
|
||||||
state = FAILURE
|
state = FAILURE
|
||||||
|
test_status = FAIL
|
||||||
description = "Failed to install packages: " + ", ".join(
|
description = "Failed to install packages: " + ", ".join(
|
||||||
result.name for result in test_results
|
result.name for result in test_results
|
||||||
)
|
)
|
||||||
@ -298,7 +349,7 @@ def main():
|
|||||||
prepared_events = prepare_tests_results_for_clickhouse(
|
prepared_events = prepare_tests_results_for_clickhouse(
|
||||||
pr_info,
|
pr_info,
|
||||||
test_results,
|
test_results,
|
||||||
state,
|
test_status,
|
||||||
stopwatch.duration_seconds,
|
stopwatch.duration_seconds,
|
||||||
stopwatch.start_time_str,
|
stopwatch.start_time_str,
|
||||||
report_url,
|
report_url,
|
||||||
|
@ -632,7 +632,8 @@ def test_async_backups_to_same_destination(interface):
|
|||||||
f"BACKUP TABLE test.table TO {backup_name} ASYNC"
|
f"BACKUP TABLE test.table TO {backup_name} ASYNC"
|
||||||
)
|
)
|
||||||
|
|
||||||
# The second backup to the same destination is expected to fail. It can either fail immediately or after a while.
|
# One of those two backups to the same destination is expected to fail.
|
||||||
|
# If the second backup is going to fail it can fail either immediately or after a while.
|
||||||
# If it fails immediately we won't even get its ID.
|
# If it fails immediately we won't even get its ID.
|
||||||
id2 = None if err else res.split("\t")[0]
|
id2 = None if err else res.split("\t")[0]
|
||||||
|
|
||||||
@ -647,17 +648,25 @@ def test_async_backups_to_same_destination(interface):
|
|||||||
"",
|
"",
|
||||||
)
|
)
|
||||||
|
|
||||||
# The first backup should succeed.
|
ids_succeeded = (
|
||||||
assert instance.query(
|
instance.query(
|
||||||
f"SELECT status, error FROM system.backups WHERE id='{id1}'"
|
f"SELECT id FROM system.backups WHERE id IN {ids_for_query} AND status == 'BACKUP_CREATED'"
|
||||||
) == TSV([["BACKUP_CREATED", ""]])
|
|
||||||
|
|
||||||
if id2:
|
|
||||||
# The second backup should fail.
|
|
||||||
assert (
|
|
||||||
instance.query(f"SELECT status FROM system.backups WHERE id='{id2}'")
|
|
||||||
== "BACKUP_FAILED\n"
|
|
||||||
)
|
)
|
||||||
|
.rstrip("\n")
|
||||||
|
.split("\n")
|
||||||
|
)
|
||||||
|
|
||||||
|
ids_failed = (
|
||||||
|
instance.query(
|
||||||
|
f"SELECT id FROM system.backups WHERE id IN {ids_for_query} AND status == 'BACKUP_FAILED'"
|
||||||
|
)
|
||||||
|
.rstrip("\n")
|
||||||
|
.split("\n")
|
||||||
|
)
|
||||||
|
|
||||||
|
assert len(ids_succeeded) == 1
|
||||||
|
assert len(ids_failed) <= 1
|
||||||
|
assert set(ids_succeeded + ids_failed) == set(ids)
|
||||||
|
|
||||||
# Check that the first backup is all right.
|
# Check that the first backup is all right.
|
||||||
instance.query("DROP TABLE test.table")
|
instance.query("DROP TABLE test.table")
|
||||||
|
@ -326,3 +326,39 @@ def test_distributed_insert_select_with_replicated(started_cluster):
|
|||||||
first_replica_first_shard.query(
|
first_replica_first_shard.query(
|
||||||
"""DROP TABLE IF EXISTS insert_select_replicated_local ON CLUSTER 'first_shard' SYNC;"""
|
"""DROP TABLE IF EXISTS insert_select_replicated_local ON CLUSTER 'first_shard' SYNC;"""
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_parallel_distributed_insert_select_with_schema_inference(started_cluster):
|
||||||
|
node = started_cluster.instances["s0_0_0"]
|
||||||
|
|
||||||
|
node.query(
|
||||||
|
"""DROP TABLE IF EXISTS parallel_insert_select ON CLUSTER 'first_shard' SYNC;"""
|
||||||
|
)
|
||||||
|
|
||||||
|
node.query(
|
||||||
|
"""
|
||||||
|
CREATE TABLE parallel_insert_select ON CLUSTER 'first_shard' (a String, b UInt64)
|
||||||
|
ENGINE=ReplicatedMergeTree('/clickhouse/tables/{shard}/insert_select_with_replicated', '{replica}')
|
||||||
|
ORDER BY (a, b);
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
|
||||||
|
node.query(
|
||||||
|
"""
|
||||||
|
INSERT INTO parallel_insert_select SELECT * FROM s3Cluster(
|
||||||
|
'first_shard',
|
||||||
|
'http://minio1:9001/root/data/generated/*.csv', 'minio', 'minio123', 'CSV'
|
||||||
|
) SETTINGS parallel_distributed_insert_select=1, use_structure_from_insertion_table_in_table_functions=0;
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
|
||||||
|
node.query("SYSTEM SYNC REPLICA parallel_insert_select")
|
||||||
|
|
||||||
|
actual_count = int(
|
||||||
|
node.query(
|
||||||
|
"SELECT count() FROM s3('http://minio1:9001/root/data/generated/*.csv', 'minio', 'minio123', 'CSV','a String, b UInt64')"
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
count = int(node.query("SELECT count() FROM parallel_insert_select"))
|
||||||
|
assert count == actual_count
|
||||||
|
@ -1019,7 +1019,6 @@ def test_rabbitmq_many_inserts(rabbitmq_cluster):
|
|||||||
), "ClickHouse lost some messages: {}".format(result)
|
), "ClickHouse lost some messages: {}".format(result)
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.skip(reason="Flaky")
|
|
||||||
def test_rabbitmq_overloaded_insert(rabbitmq_cluster):
|
def test_rabbitmq_overloaded_insert(rabbitmq_cluster):
|
||||||
instance.query(
|
instance.query(
|
||||||
"""
|
"""
|
||||||
|
@ -179,7 +179,7 @@ FROM
|
|||||||
name,
|
name,
|
||||||
value
|
value
|
||||||
FROM test_00597
|
FROM test_00597
|
||||||
WHERE id = 1
|
PREWHERE id = 1
|
||||||
)
|
)
|
||||||
WHERE id = 1
|
WHERE id = 1
|
||||||
2000-01-01 1 test string 1 1
|
2000-01-01 1 test string 1 1
|
||||||
@ -203,7 +203,7 @@ FROM
|
|||||||
name,
|
name,
|
||||||
value
|
value
|
||||||
FROM test_00597
|
FROM test_00597
|
||||||
WHERE id = 1
|
PREWHERE id = 1
|
||||||
)
|
)
|
||||||
WHERE id = 1
|
WHERE id = 1
|
||||||
)
|
)
|
||||||
@ -229,7 +229,7 @@ FROM
|
|||||||
name,
|
name,
|
||||||
value
|
value
|
||||||
FROM test_00597
|
FROM test_00597
|
||||||
WHERE id = 1
|
PREWHERE id = 1
|
||||||
) AS b
|
) AS b
|
||||||
WHERE id = 1
|
WHERE id = 1
|
||||||
)
|
)
|
||||||
@ -248,7 +248,7 @@ FROM
|
|||||||
name,
|
name,
|
||||||
value
|
value
|
||||||
FROM test_00597
|
FROM test_00597
|
||||||
WHERE id = 1
|
PREWHERE id = 1
|
||||||
)
|
)
|
||||||
WHERE id = 1
|
WHERE id = 1
|
||||||
2000-01-01 1 test string 1 1
|
2000-01-01 1 test string 1 1
|
||||||
@ -272,7 +272,7 @@ FROM
|
|||||||
name,
|
name,
|
||||||
value
|
value
|
||||||
FROM test_00597
|
FROM test_00597
|
||||||
WHERE id = 1
|
PREWHERE id = 1
|
||||||
)
|
)
|
||||||
WHERE id = 1
|
WHERE id = 1
|
||||||
)
|
)
|
||||||
@ -291,7 +291,7 @@ FROM
|
|||||||
name,
|
name,
|
||||||
value
|
value
|
||||||
FROM test_00597
|
FROM test_00597
|
||||||
WHERE id = 1
|
PREWHERE id = 1
|
||||||
) AS b
|
) AS b
|
||||||
WHERE id = 1
|
WHERE id = 1
|
||||||
2000-01-01 1 test string 1 1
|
2000-01-01 1 test string 1 1
|
||||||
@ -315,7 +315,7 @@ FROM
|
|||||||
name,
|
name,
|
||||||
value
|
value
|
||||||
FROM test_00597
|
FROM test_00597
|
||||||
WHERE id = 1
|
PREWHERE id = 1
|
||||||
) AS a
|
) AS a
|
||||||
WHERE id = 1
|
WHERE id = 1
|
||||||
) AS b
|
) AS b
|
||||||
@ -332,7 +332,7 @@ FROM
|
|||||||
date,
|
date,
|
||||||
min(value) AS value
|
min(value) AS value
|
||||||
FROM test_00597
|
FROM test_00597
|
||||||
WHERE id = 1
|
PREWHERE id = 1
|
||||||
GROUP BY
|
GROUP BY
|
||||||
id,
|
id,
|
||||||
date
|
date
|
||||||
@ -352,7 +352,7 @@ FROM
|
|||||||
name,
|
name,
|
||||||
value
|
value
|
||||||
FROM test_00597
|
FROM test_00597
|
||||||
WHERE id = 1
|
PREWHERE id = 1
|
||||||
UNION ALL
|
UNION ALL
|
||||||
SELECT
|
SELECT
|
||||||
date,
|
date,
|
||||||
@ -360,7 +360,7 @@ FROM
|
|||||||
name,
|
name,
|
||||||
value
|
value
|
||||||
FROM test_00597
|
FROM test_00597
|
||||||
WHERE id = 1
|
PREWHERE id = 1
|
||||||
)
|
)
|
||||||
WHERE id = 1
|
WHERE id = 1
|
||||||
2000-01-01 1 test string 1 1
|
2000-01-01 1 test string 1 1
|
||||||
@ -381,7 +381,7 @@ FROM
|
|||||||
name,
|
name,
|
||||||
value
|
value
|
||||||
FROM test_00597
|
FROM test_00597
|
||||||
WHERE id = 1
|
PREWHERE id = 1
|
||||||
)
|
)
|
||||||
ANY LEFT JOIN
|
ANY LEFT JOIN
|
||||||
(
|
(
|
||||||
@ -441,7 +441,7 @@ FROM
|
|||||||
name,
|
name,
|
||||||
value
|
value
|
||||||
FROM test_00597
|
FROM test_00597
|
||||||
WHERE id = 1
|
PREWHERE id = 1
|
||||||
)
|
)
|
||||||
ANY LEFT JOIN
|
ANY LEFT JOIN
|
||||||
(
|
(
|
||||||
@ -532,7 +532,7 @@ FROM
|
|||||||
name,
|
name,
|
||||||
value
|
value
|
||||||
FROM test_00597
|
FROM test_00597
|
||||||
WHERE id = 1
|
PREWHERE id = 1
|
||||||
) AS a
|
) AS a
|
||||||
ANY LEFT JOIN
|
ANY LEFT JOIN
|
||||||
(
|
(
|
||||||
@ -579,7 +579,7 @@ SEMI LEFT JOIN
|
|||||||
name,
|
name,
|
||||||
value
|
value
|
||||||
FROM test_00597
|
FROM test_00597
|
||||||
WHERE id = 1
|
PREWHERE id = 1
|
||||||
)
|
)
|
||||||
WHERE id = 1
|
WHERE id = 1
|
||||||
) AS r USING (id)
|
) AS r USING (id)
|
||||||
|
@ -24,6 +24,6 @@ FROM
|
|||||||
n,
|
n,
|
||||||
finalizeAggregation(s)
|
finalizeAggregation(s)
|
||||||
FROM test_00808_push_down_with_finalizeAggregation
|
FROM test_00808_push_down_with_finalizeAggregation
|
||||||
WHERE (n <= 5) AND (n >= 2)
|
PREWHERE (n <= 5) AND (n >= 2)
|
||||||
)
|
)
|
||||||
WHERE (n >= 2) AND (n <= 5)
|
WHERE (n >= 2) AND (n <= 5)
|
||||||
|
@ -293,8 +293,8 @@ select * from (select * from tab where (a + b) * c = 8 union all select * from t
|
|||||||
select * from (explain plan actions = 1 select * from (select * from tab where (a + b) * c = 8 union all select * from tab3 where (a + b) * c = 18) order by sin(a / b)) where explain like '%sort description%' or explain like '%ReadType%';
|
select * from (explain plan actions = 1 select * from (select * from tab where (a + b) * c = 8 union all select * from tab3 where (a + b) * c = 18) order by sin(a / b)) where explain like '%sort description%' or explain like '%ReadType%';
|
||||||
Prefix sort description: sin(divide(a, b)) ASC
|
Prefix sort description: sin(divide(a, b)) ASC
|
||||||
Result sort description: sin(divide(a, b)) ASC
|
Result sort description: sin(divide(a, b)) ASC
|
||||||
ReadType: InOrder
|
ReadType: InOrder
|
||||||
ReadType: InOrder
|
ReadType: InOrder
|
||||||
select * from (select * from tab where (a + b) * c = 8 union all select * from tab4) order by sin(a / b);
|
select * from (select * from tab where (a + b) * c = 8 union all select * from tab4) order by sin(a / b);
|
||||||
2 2 2 2
|
2 2 2 2
|
||||||
2 2 2 2
|
2 2 2 2
|
||||||
@ -311,7 +311,7 @@ select * from (select * from tab where (a + b) * c = 8 union all select * from t
|
|||||||
select * from (explain plan actions = 1 select * from (select * from tab where (a + b) * c = 8 union all select * from tab4) order by sin(a / b)) where explain like '%sort description%' or explain like '%ReadType%';
|
select * from (explain plan actions = 1 select * from (select * from tab where (a + b) * c = 8 union all select * from tab4) order by sin(a / b)) where explain like '%sort description%' or explain like '%ReadType%';
|
||||||
Prefix sort description: sin(divide(a, b)) ASC
|
Prefix sort description: sin(divide(a, b)) ASC
|
||||||
Result sort description: sin(divide(a, b)) ASC
|
Result sort description: sin(divide(a, b)) ASC
|
||||||
ReadType: InOrder
|
ReadType: InOrder
|
||||||
ReadType: InOrder
|
ReadType: InOrder
|
||||||
select * from (select * from tab union all select * from tab5) order by (a + b) * c;
|
select * from (select * from tab union all select * from tab5) order by (a + b) * c;
|
||||||
0 0 0 0
|
0 0 0 0
|
||||||
@ -403,3 +403,8 @@ select * from (explain plan actions = 1 select * from (select * from tab union a
|
|||||||
Sort description: multiply(plus(a, b), c) ASC, sin(divide(a, b)) ASC, d ASC
|
Sort description: multiply(plus(a, b), c) ASC, sin(divide(a, b)) ASC, d ASC
|
||||||
Limit 3
|
Limit 3
|
||||||
ReadType: Default
|
ReadType: Default
|
||||||
|
drop table if exists tab;
|
||||||
|
drop table if exists tab2;
|
||||||
|
drop table if exists tab3;
|
||||||
|
drop table if exists tab4;
|
||||||
|
drop table if exists tab5;
|
||||||
|
@ -1,5 +1,11 @@
|
|||||||
SET optimize_read_in_order = 1, query_plan_read_in_order=1;
|
SET optimize_read_in_order = 1, query_plan_read_in_order=1;
|
||||||
|
|
||||||
|
drop table if exists tab;
|
||||||
|
drop table if exists tab2;
|
||||||
|
drop table if exists tab3;
|
||||||
|
drop table if exists tab4;
|
||||||
|
drop table if exists tab5;
|
||||||
|
|
||||||
create table tab (a UInt32, b UInt32, c UInt32, d UInt32) engine = MergeTree order by ((a + b) * c, sin(a / b));
|
create table tab (a UInt32, b UInt32, c UInt32, d UInt32) engine = MergeTree order by ((a + b) * c, sin(a / b));
|
||||||
insert into tab select number, number, number, number from numbers(5);
|
insert into tab select number, number, number, number from numbers(5);
|
||||||
insert into tab select number, number, number, number from numbers(5);
|
insert into tab select number, number, number, number from numbers(5);
|
||||||
@ -142,3 +148,9 @@ select * from (explain plan actions = 1 select * from (select * from tab union a
|
|||||||
-- In case of tab4, we do full sorting by ((a + b) * c, sin(a / b), d) with LIMIT. We can replace it to sorting by ((a + b) * c, sin(a / b)) and LIMIT WITH TIES, when sorting alog support it.
|
-- In case of tab4, we do full sorting by ((a + b) * c, sin(a / b), d) with LIMIT. We can replace it to sorting by ((a + b) * c, sin(a / b)) and LIMIT WITH TIES, when sorting alog support it.
|
||||||
select * from (select * from tab union all select * from tab5 union all select * from tab4) order by (a + b) * c, sin(a / b), d limit 3;
|
select * from (select * from tab union all select * from tab5 union all select * from tab4) order by (a + b) * c, sin(a / b), d limit 3;
|
||||||
select * from (explain plan actions = 1 select * from (select * from tab union all select * from tab5 union all select * from tab4) order by (a + b) * c, sin(a / b), d limit 3) where explain ilike '%sort description%' or explain like '%ReadType%' or explain like '%Limit%';
|
select * from (explain plan actions = 1 select * from (select * from tab union all select * from tab5 union all select * from tab4) order by (a + b) * c, sin(a / b), d limit 3) where explain ilike '%sort description%' or explain like '%ReadType%' or explain like '%Limit%';
|
||||||
|
|
||||||
|
drop table if exists tab;
|
||||||
|
drop table if exists tab2;
|
||||||
|
drop table if exists tab3;
|
||||||
|
drop table if exists tab4;
|
||||||
|
drop table if exists tab5;
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
-- Tags: no-random-merge-tree-settings
|
-- Tags: no-random-merge-tree-settings
|
||||||
|
|
||||||
SET use_uncompressed_cache = 0;
|
SET use_uncompressed_cache = 0;
|
||||||
|
SET allow_prefetched_read_pool_for_remote_filesystem=0;
|
||||||
|
|
||||||
DROP TABLE IF EXISTS adaptive_table;
|
DROP TABLE IF EXISTS adaptive_table;
|
||||||
|
|
||||||
|
@ -1,5 +1,20 @@
|
|||||||
optimize_move_to_prewhere_if_final = 1
|
optimize_move_to_prewhere_if_final = 1
|
||||||
|
|
||||||
|
SELECT
|
||||||
|
x,
|
||||||
|
y,
|
||||||
|
z
|
||||||
|
FROM prewhere_move_select_final
|
||||||
|
PREWHERE x > 100
|
||||||
|
|
||||||
|
SELECT
|
||||||
|
x,
|
||||||
|
y,
|
||||||
|
z
|
||||||
|
FROM prewhere_move_select_final
|
||||||
|
FINAL
|
||||||
|
PREWHERE x > 100
|
||||||
|
|
||||||
SELECT
|
SELECT
|
||||||
x,
|
x,
|
||||||
y,
|
y,
|
||||||
@ -15,6 +30,21 @@ FROM prewhere_move_select_final
|
|||||||
FINAL
|
FINAL
|
||||||
PREWHERE y > 100
|
PREWHERE y > 100
|
||||||
|
|
||||||
|
SELECT
|
||||||
|
x,
|
||||||
|
y,
|
||||||
|
z
|
||||||
|
FROM prewhere_move_select_final
|
||||||
|
PREWHERE (x + y) > 100
|
||||||
|
|
||||||
|
SELECT
|
||||||
|
x,
|
||||||
|
y,
|
||||||
|
z
|
||||||
|
FROM prewhere_move_select_final
|
||||||
|
FINAL
|
||||||
|
PREWHERE (x + y) > 100
|
||||||
|
|
||||||
SELECT
|
SELECT
|
||||||
x,
|
x,
|
||||||
y,
|
y,
|
||||||
@ -32,6 +62,24 @@ FINAL
|
|||||||
PREWHERE y > 100
|
PREWHERE y > 100
|
||||||
WHERE (y > 100) AND (z > 400)
|
WHERE (y > 100) AND (z > 400)
|
||||||
|
|
||||||
|
SELECT
|
||||||
|
x,
|
||||||
|
y,
|
||||||
|
z
|
||||||
|
FROM prewhere_move_select_final
|
||||||
|
FINAL
|
||||||
|
PREWHERE x > 50
|
||||||
|
WHERE (x > 50) AND (z > 400)
|
||||||
|
|
||||||
|
SELECT
|
||||||
|
x,
|
||||||
|
y,
|
||||||
|
z
|
||||||
|
FROM prewhere_move_select_final
|
||||||
|
FINAL
|
||||||
|
PREWHERE (x + y) > 50
|
||||||
|
WHERE ((x + y) > 50) AND (z > 400)
|
||||||
|
|
||||||
optimize_move_to_prewhere_if_final = 0
|
optimize_move_to_prewhere_if_final = 0
|
||||||
|
|
||||||
SELECT
|
SELECT
|
||||||
|
@ -11,17 +11,29 @@ SET optimize_move_to_prewhere_if_final = 1;
|
|||||||
|
|
||||||
-- order key can be pushed down with final
|
-- order key can be pushed down with final
|
||||||
select '';
|
select '';
|
||||||
|
EXPLAIN SYNTAX SELECT * FROM prewhere_move_select_final WHERE x > 100;
|
||||||
|
select '';
|
||||||
|
EXPLAIN SYNTAX SELECT * FROM prewhere_move_select_final FINAL WHERE x > 100;
|
||||||
|
select '';
|
||||||
EXPLAIN SYNTAX SELECT * FROM prewhere_move_select_final WHERE y > 100;
|
EXPLAIN SYNTAX SELECT * FROM prewhere_move_select_final WHERE y > 100;
|
||||||
select '';
|
select '';
|
||||||
EXPLAIN SYNTAX SELECT * FROM prewhere_move_select_final FINAL WHERE y > 100;
|
EXPLAIN SYNTAX SELECT * FROM prewhere_move_select_final FINAL WHERE y > 100;
|
||||||
|
select '';
|
||||||
|
EXPLAIN SYNTAX SELECT * FROM prewhere_move_select_final WHERE x + y > 100;
|
||||||
|
select '';
|
||||||
|
EXPLAIN SYNTAX SELECT * FROM prewhere_move_select_final FINAL WHERE x + y > 100;
|
||||||
|
|
||||||
-- can not be pushed down
|
-- can not be pushed down
|
||||||
select '';
|
select '';
|
||||||
EXPLAIN SYNTAX SELECT * FROM prewhere_move_select_final FINAL WHERE z > 400;
|
EXPLAIN SYNTAX SELECT * FROM prewhere_move_select_final FINAL WHERE z > 400;
|
||||||
|
|
||||||
-- only y can be pushed down
|
-- only condition with x/y can be pushed down
|
||||||
select '';
|
select '';
|
||||||
EXPLAIN SYNTAX SELECT * FROM prewhere_move_select_final FINAL WHERE y > 100 and z > 400;
|
EXPLAIN SYNTAX SELECT * FROM prewhere_move_select_final FINAL WHERE y > 100 and z > 400;
|
||||||
|
select '';
|
||||||
|
EXPLAIN SYNTAX SELECT * FROM prewhere_move_select_final FINAL WHERE x > 50 and z > 400;
|
||||||
|
select '';
|
||||||
|
EXPLAIN SYNTAX SELECT * FROM prewhere_move_select_final FINAL WHERE x + y > 50 and z > 400;
|
||||||
|
|
||||||
select '';
|
select '';
|
||||||
select 'optimize_move_to_prewhere_if_final = 0';
|
select 'optimize_move_to_prewhere_if_final = 0';
|
||||||
|
@ -97,12 +97,12 @@
|
|||||||
ReadType: InReverseOrder
|
ReadType: InReverseOrder
|
||||||
Parts: 1
|
Parts: 1
|
||||||
Granules: 3
|
Granules: 3
|
||||||
ReadFromMergeTree (default.idx)
|
ReadFromMergeTree (default.idx)
|
||||||
Indexes:
|
Indexes:
|
||||||
PrimaryKey
|
PrimaryKey
|
||||||
Keys:
|
Keys:
|
||||||
x
|
x
|
||||||
plus(x, y)
|
plus(x, y)
|
||||||
Condition: or((x in 2-element set), (plus(plus(x, y), 1) in (-Inf, 2]))
|
Condition: or((x in 2-element set), (plus(plus(x, y), 1) in (-Inf, 2]))
|
||||||
Parts: 1/1
|
Parts: 1/1
|
||||||
Granules: 1/1
|
Granules: 1/1
|
||||||
|
@ -64,10 +64,8 @@ ExpressionTransform
|
|||||||
(Sorting)
|
(Sorting)
|
||||||
(Expression)
|
(Expression)
|
||||||
ExpressionTransform
|
ExpressionTransform
|
||||||
(Filter)
|
(ReadFromMergeTree)
|
||||||
FilterTransform
|
MergeTreeInOrder 0 → 1
|
||||||
(ReadFromMergeTree)
|
|
||||||
MergeTreeInOrder 0 → 1
|
|
||||||
2020-10-11 0 0
|
2020-10-11 0 0
|
||||||
2020-10-11 0 10
|
2020-10-11 0 10
|
||||||
2020-10-11 0 20
|
2020-10-11 0 20
|
||||||
@ -82,15 +80,20 @@ ExpressionTransform
|
|||||||
PartialSortingTransform
|
PartialSortingTransform
|
||||||
(Expression)
|
(Expression)
|
||||||
ExpressionTransform
|
ExpressionTransform
|
||||||
(Filter)
|
(ReadFromMergeTree)
|
||||||
FilterTransform
|
MergeTreeInOrder 0 → 1
|
||||||
(ReadFromMergeTree)
|
|
||||||
MergeTreeInOrder 0 → 1
|
|
||||||
2020-10-12 0
|
2020-10-12 0
|
||||||
2020-10-12 1
|
2020-10-12 1
|
||||||
2020-10-12 2
|
2020-10-12 2
|
||||||
2020-10-12 3
|
2020-10-12 3
|
||||||
2020-10-12 4
|
2020-10-12 4
|
||||||
|
SELECT
|
||||||
|
date,
|
||||||
|
i
|
||||||
|
FROM t_read_in_order
|
||||||
|
PREWHERE date = \'2020-10-12\'
|
||||||
|
ORDER BY i DESC
|
||||||
|
LIMIT 5
|
||||||
(Expression)
|
(Expression)
|
||||||
ExpressionTransform
|
ExpressionTransform
|
||||||
(Limit)
|
(Limit)
|
||||||
@ -98,11 +101,9 @@ ExpressionTransform
|
|||||||
(Sorting)
|
(Sorting)
|
||||||
(Expression)
|
(Expression)
|
||||||
ExpressionTransform
|
ExpressionTransform
|
||||||
(Filter)
|
(ReadFromMergeTree)
|
||||||
FilterTransform
|
ReverseTransform
|
||||||
(ReadFromMergeTree)
|
MergeTreeReverse 0 → 1
|
||||||
ReverseTransform
|
|
||||||
MergeTreeReverse 0 → 1
|
|
||||||
2020-10-12 99999
|
2020-10-12 99999
|
||||||
2020-10-12 99998
|
2020-10-12 99998
|
||||||
2020-10-12 99997
|
2020-10-12 99997
|
||||||
|
@ -30,6 +30,7 @@ INSERT INTO t_read_in_order SELECT '2020-10-12', number, number FROM numbers(100
|
|||||||
|
|
||||||
SELECT date, i FROM t_read_in_order WHERE date = '2020-10-12' ORDER BY i LIMIT 5;
|
SELECT date, i FROM t_read_in_order WHERE date = '2020-10-12' ORDER BY i LIMIT 5;
|
||||||
|
|
||||||
|
EXPLAIN SYNTAX SELECT date, i FROM t_read_in_order WHERE date = '2020-10-12' ORDER BY i DESC LIMIT 5;
|
||||||
EXPLAIN PIPELINE SELECT date, i FROM t_read_in_order WHERE date = '2020-10-12' ORDER BY i DESC LIMIT 5;
|
EXPLAIN PIPELINE SELECT date, i FROM t_read_in_order WHERE date = '2020-10-12' ORDER BY i DESC LIMIT 5;
|
||||||
SELECT date, i FROM t_read_in_order WHERE date = '2020-10-12' ORDER BY i DESC LIMIT 5;
|
SELECT date, i FROM t_read_in_order WHERE date = '2020-10-12' ORDER BY i DESC LIMIT 5;
|
||||||
|
|
||||||
|
@ -55,7 +55,6 @@ MergeTreeThread
|
|||||||
Sorting (Stream): a ASC, b ASC
|
Sorting (Stream): a ASC, b ASC
|
||||||
Sorting (Stream): a ASC, b ASC
|
Sorting (Stream): a ASC, b ASC
|
||||||
Sorting (Stream): a ASC, b ASC
|
Sorting (Stream): a ASC, b ASC
|
||||||
Sorting (Stream): a ASC, b ASC
|
|
||||||
-- check that reading in order optimization for ORDER BY and DISTINCT applied correctly in the same query
|
-- check that reading in order optimization for ORDER BY and DISTINCT applied correctly in the same query
|
||||||
-- disabled, check that sorting description for ReadFromMergeTree match ORDER BY columns
|
-- disabled, check that sorting description for ReadFromMergeTree match ORDER BY columns
|
||||||
Sorting (Stream): a ASC
|
Sorting (Stream): a ASC
|
||||||
|
1
tests/queries/0_stateless/02515_generate_ulid.reference
Normal file
1
tests/queries/0_stateless/02515_generate_ulid.reference
Normal file
@ -0,0 +1 @@
|
|||||||
|
1 FixedString(26)
|
3
tests/queries/0_stateless/02515_generate_ulid.sql
Normal file
3
tests/queries/0_stateless/02515_generate_ulid.sql
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
-- Tags: no-fasttest
|
||||||
|
|
||||||
|
SELECT generateULID(1) != generateULID(2), toTypeName(generateULID());
|
201
tests/queries/0_stateless/02661_window_ntile.reference
Normal file
201
tests/queries/0_stateless/02661_window_ntile.reference
Normal file
@ -0,0 +1,201 @@
|
|||||||
|
-- { echo }
|
||||||
|
|
||||||
|
-- Normal cases
|
||||||
|
select a, b, ntile(3) over (partition by a order by b rows between unbounded preceding and unbounded following) from(select intDiv(number,10) as a, number%10 as b from numbers(20));
|
||||||
|
0 0 1
|
||||||
|
0 1 1
|
||||||
|
0 2 1
|
||||||
|
0 3 1
|
||||||
|
0 4 2
|
||||||
|
0 5 2
|
||||||
|
0 6 2
|
||||||
|
0 7 3
|
||||||
|
0 8 3
|
||||||
|
0 9 3
|
||||||
|
1 0 1
|
||||||
|
1 1 1
|
||||||
|
1 2 1
|
||||||
|
1 3 1
|
||||||
|
1 4 2
|
||||||
|
1 5 2
|
||||||
|
1 6 2
|
||||||
|
1 7 3
|
||||||
|
1 8 3
|
||||||
|
1 9 3
|
||||||
|
select a, b, ntile(2) over (partition by a order by b rows between unbounded preceding and unbounded following) from(select intDiv(number,10) as a, number%10 as b from numbers(20));
|
||||||
|
0 0 1
|
||||||
|
0 1 1
|
||||||
|
0 2 1
|
||||||
|
0 3 1
|
||||||
|
0 4 1
|
||||||
|
0 5 2
|
||||||
|
0 6 2
|
||||||
|
0 7 2
|
||||||
|
0 8 2
|
||||||
|
0 9 2
|
||||||
|
1 0 1
|
||||||
|
1 1 1
|
||||||
|
1 2 1
|
||||||
|
1 3 1
|
||||||
|
1 4 1
|
||||||
|
1 5 2
|
||||||
|
1 6 2
|
||||||
|
1 7 2
|
||||||
|
1 8 2
|
||||||
|
1 9 2
|
||||||
|
select a, b, ntile(1) over (partition by a order by b rows between unbounded preceding and unbounded following) from(select intDiv(number,10) as a, number%10 as b from numbers(20));
|
||||||
|
0 0 1
|
||||||
|
0 1 1
|
||||||
|
0 2 1
|
||||||
|
0 3 1
|
||||||
|
0 4 1
|
||||||
|
0 5 1
|
||||||
|
0 6 1
|
||||||
|
0 7 1
|
||||||
|
0 8 1
|
||||||
|
0 9 1
|
||||||
|
1 0 1
|
||||||
|
1 1 1
|
||||||
|
1 2 1
|
||||||
|
1 3 1
|
||||||
|
1 4 1
|
||||||
|
1 5 1
|
||||||
|
1 6 1
|
||||||
|
1 7 1
|
||||||
|
1 8 1
|
||||||
|
1 9 1
|
||||||
|
select a, b, ntile(100) over (partition by a order by b rows between unbounded preceding and unbounded following) from(select intDiv(number,10) as a, number%10 as b from numbers(20));
|
||||||
|
0 0 1
|
||||||
|
0 1 2
|
||||||
|
0 2 3
|
||||||
|
0 3 4
|
||||||
|
0 4 5
|
||||||
|
0 5 6
|
||||||
|
0 6 7
|
||||||
|
0 7 8
|
||||||
|
0 8 9
|
||||||
|
0 9 10
|
||||||
|
1 0 1
|
||||||
|
1 1 2
|
||||||
|
1 2 3
|
||||||
|
1 3 4
|
||||||
|
1 4 5
|
||||||
|
1 5 6
|
||||||
|
1 6 7
|
||||||
|
1 7 8
|
||||||
|
1 8 9
|
||||||
|
1 9 10
|
||||||
|
select a, b, ntile(65535) over (partition by a order by b rows between unbounded preceding and unbounded following) from (select 1 as a, number as b from numbers(65535)) limit 100;
|
||||||
|
1 0 1
|
||||||
|
1 1 2
|
||||||
|
1 2 3
|
||||||
|
1 3 4
|
||||||
|
1 4 5
|
||||||
|
1 5 6
|
||||||
|
1 6 7
|
||||||
|
1 7 8
|
||||||
|
1 8 9
|
||||||
|
1 9 10
|
||||||
|
1 10 11
|
||||||
|
1 11 12
|
||||||
|
1 12 13
|
||||||
|
1 13 14
|
||||||
|
1 14 15
|
||||||
|
1 15 16
|
||||||
|
1 16 17
|
||||||
|
1 17 18
|
||||||
|
1 18 19
|
||||||
|
1 19 20
|
||||||
|
1 20 21
|
||||||
|
1 21 22
|
||||||
|
1 22 23
|
||||||
|
1 23 24
|
||||||
|
1 24 25
|
||||||
|
1 25 26
|
||||||
|
1 26 27
|
||||||
|
1 27 28
|
||||||
|
1 28 29
|
||||||
|
1 29 30
|
||||||
|
1 30 31
|
||||||
|
1 31 32
|
||||||
|
1 32 33
|
||||||
|
1 33 34
|
||||||
|
1 34 35
|
||||||
|
1 35 36
|
||||||
|
1 36 37
|
||||||
|
1 37 38
|
||||||
|
1 38 39
|
||||||
|
1 39 40
|
||||||
|
1 40 41
|
||||||
|
1 41 42
|
||||||
|
1 42 43
|
||||||
|
1 43 44
|
||||||
|
1 44 45
|
||||||
|
1 45 46
|
||||||
|
1 46 47
|
||||||
|
1 47 48
|
||||||
|
1 48 49
|
||||||
|
1 49 50
|
||||||
|
1 50 51
|
||||||
|
1 51 52
|
||||||
|
1 52 53
|
||||||
|
1 53 54
|
||||||
|
1 54 55
|
||||||
|
1 55 56
|
||||||
|
1 56 57
|
||||||
|
1 57 58
|
||||||
|
1 58 59
|
||||||
|
1 59 60
|
||||||
|
1 60 61
|
||||||
|
1 61 62
|
||||||
|
1 62 63
|
||||||
|
1 63 64
|
||||||
|
1 64 65
|
||||||
|
1 65 66
|
||||||
|
1 66 67
|
||||||
|
1 67 68
|
||||||
|
1 68 69
|
||||||
|
1 69 70
|
||||||
|
1 70 71
|
||||||
|
1 71 72
|
||||||
|
1 72 73
|
||||||
|
1 73 74
|
||||||
|
1 74 75
|
||||||
|
1 75 76
|
||||||
|
1 76 77
|
||||||
|
1 77 78
|
||||||
|
1 78 79
|
||||||
|
1 79 80
|
||||||
|
1 80 81
|
||||||
|
1 81 82
|
||||||
|
1 82 83
|
||||||
|
1 83 84
|
||||||
|
1 84 85
|
||||||
|
1 85 86
|
||||||
|
1 86 87
|
||||||
|
1 87 88
|
||||||
|
1 88 89
|
||||||
|
1 89 90
|
||||||
|
1 90 91
|
||||||
|
1 91 92
|
||||||
|
1 92 93
|
||||||
|
1 93 94
|
||||||
|
1 94 95
|
||||||
|
1 95 96
|
||||||
|
1 96 97
|
||||||
|
1 97 98
|
||||||
|
1 98 99
|
||||||
|
1 99 100
|
||||||
|
-- Bad arguments
|
||||||
|
select a, b, ntile(3.0) over (partition by a order by b rows between unbounded preceding and unbounded following) from(select intDiv(number,10) as a, number%10 as b from numbers(20)); -- { serverError 36 }
|
||||||
|
select a, b, ntile('2') over (partition by a order by b rows between unbounded preceding and unbounded following) from(select intDiv(number,10) as a, number%10 as b from numbers(20)); -- { serverError 36 }
|
||||||
|
select a, b, ntile(0) over (partition by a order by b rows between unbounded preceding and unbounded following) from(select intDiv(number,10) as a, number%10 as b from numbers(20)); -- { serverError 36 }
|
||||||
|
select a, b, ntile(-2) over (partition by a order by b rows between unbounded preceding and unbounded following) from(select intDiv(number,10) as a, number%10 as b from numbers(20)); -- { serverError 36 }
|
||||||
|
select a, b, ntile(b + 1) over (partition by a order by b rows between unbounded preceding and unbounded following) from(select intDiv(number,10) as a, number%10 as b from numbers(20)); -- { serverError 36 }
|
||||||
|
-- Bad window type
|
||||||
|
select a, b, ntile(2) over (partition by a) from(select intDiv(number,10) as a, number%10 as b from numbers(20)); -- { serverError 36 }
|
||||||
|
select a, b, ntile(2) over (partition by a order by b rows between 4 preceding and unbounded following) from(select intDiv(number,10) as a, number%10 as b from numbers(20)); -- { serverError 36 }
|
||||||
|
select a, b, ntile(2) over (partition by a order by b rows between unbounded preceding and 4 following) from(select intDiv(number,10) as a, number%10 as b from numbers(20)); -- { serverError 36 }
|
||||||
|
select a, b, ntile(2) over (partition by a order by b rows between 4 preceding and 4 following) from(select intDiv(number,10) as a, number%10 as b from numbers(20));; -- { serverError 36 }
|
||||||
|
select a, b, ntile(2) over (partition by a order by b rows between current row and 4 following) from(select intDiv(number,10) as a, number%10 as b from numbers(20));; -- { serverError 36 }
|
||||||
|
select a, b, ntile(2) over (partition by a order by b range unbounded preceding) from(select intDiv(number,10) as a, number%10 as b from numbers(20));; -- { serverError 36 }
|
23
tests/queries/0_stateless/02661_window_ntile.sql
Normal file
23
tests/queries/0_stateless/02661_window_ntile.sql
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
-- { echo }
|
||||||
|
|
||||||
|
-- Normal cases
|
||||||
|
select a, b, ntile(3) over (partition by a order by b rows between unbounded preceding and unbounded following) from(select intDiv(number,10) as a, number%10 as b from numbers(20));
|
||||||
|
select a, b, ntile(2) over (partition by a order by b rows between unbounded preceding and unbounded following) from(select intDiv(number,10) as a, number%10 as b from numbers(20));
|
||||||
|
select a, b, ntile(1) over (partition by a order by b rows between unbounded preceding and unbounded following) from(select intDiv(number,10) as a, number%10 as b from numbers(20));
|
||||||
|
select a, b, ntile(100) over (partition by a order by b rows between unbounded preceding and unbounded following) from(select intDiv(number,10) as a, number%10 as b from numbers(20));
|
||||||
|
select a, b, ntile(65535) over (partition by a order by b rows between unbounded preceding and unbounded following) from (select 1 as a, number as b from numbers(65535)) limit 100;
|
||||||
|
|
||||||
|
-- Bad arguments
|
||||||
|
select a, b, ntile(3.0) over (partition by a order by b rows between unbounded preceding and unbounded following) from(select intDiv(number,10) as a, number%10 as b from numbers(20)); -- { serverError 36 }
|
||||||
|
select a, b, ntile('2') over (partition by a order by b rows between unbounded preceding and unbounded following) from(select intDiv(number,10) as a, number%10 as b from numbers(20)); -- { serverError 36 }
|
||||||
|
select a, b, ntile(0) over (partition by a order by b rows between unbounded preceding and unbounded following) from(select intDiv(number,10) as a, number%10 as b from numbers(20)); -- { serverError 36 }
|
||||||
|
select a, b, ntile(-2) over (partition by a order by b rows between unbounded preceding and unbounded following) from(select intDiv(number,10) as a, number%10 as b from numbers(20)); -- { serverError 36 }
|
||||||
|
select a, b, ntile(b + 1) over (partition by a order by b rows between unbounded preceding and unbounded following) from(select intDiv(number,10) as a, number%10 as b from numbers(20)); -- { serverError 36 }
|
||||||
|
|
||||||
|
-- Bad window type
|
||||||
|
select a, b, ntile(2) over (partition by a) from(select intDiv(number,10) as a, number%10 as b from numbers(20)); -- { serverError 36 }
|
||||||
|
select a, b, ntile(2) over (partition by a order by b rows between 4 preceding and unbounded following) from(select intDiv(number,10) as a, number%10 as b from numbers(20)); -- { serverError 36 }
|
||||||
|
select a, b, ntile(2) over (partition by a order by b rows between unbounded preceding and 4 following) from(select intDiv(number,10) as a, number%10 as b from numbers(20)); -- { serverError 36 }
|
||||||
|
select a, b, ntile(2) over (partition by a order by b rows between 4 preceding and 4 following) from(select intDiv(number,10) as a, number%10 as b from numbers(20));; -- { serverError 36 }
|
||||||
|
select a, b, ntile(2) over (partition by a order by b rows between current row and 4 following) from(select intDiv(number,10) as a, number%10 as b from numbers(20));; -- { serverError 36 }
|
||||||
|
select a, b, ntile(2) over (partition by a order by b range unbounded preceding) from(select intDiv(number,10) as a, number%10 as b from numbers(20));; -- { serverError 36 }
|
@ -0,0 +1,7 @@
|
|||||||
|
1900000050000 1
|
||||||
|
1900000040000 0.05
|
||||||
|
1900000030000 0
|
||||||
|
1900000020000 -0.0002
|
||||||
|
1900000010000 -1
|
||||||
|
Ok
|
||||||
|
Ok
|
40
tests/queries/0_stateless/25336_read_in_order_final_desc.sql
Normal file
40
tests/queries/0_stateless/25336_read_in_order_final_desc.sql
Normal file
@ -0,0 +1,40 @@
|
|||||||
|
SET optimize_read_in_order = 1;
|
||||||
|
DROP TABLE IF EXISTS mytable;
|
||||||
|
|
||||||
|
CREATE TABLE mytable
|
||||||
|
(
|
||||||
|
timestamp UInt64,
|
||||||
|
insert_timestamp UInt64,
|
||||||
|
key UInt64,
|
||||||
|
value Float64
|
||||||
|
) ENGINE = ReplacingMergeTree(insert_timestamp)
|
||||||
|
PRIMARY KEY (key, timestamp)
|
||||||
|
ORDER BY (key, timestamp);
|
||||||
|
|
||||||
|
INSERT INTO mytable (timestamp, insert_timestamp, key, value) VALUES (1900000010000, 1675159000000, 5, 555), (1900000010000, 1675159770000, 5, -1), (1900000020000, 1675159770000, 5, -0.0002), (1900000030000, 1675159770000, 5, 0), (1900000020000, 1675159700000, 5, 555), (1900000040000, 1675159770000, 5, 0.05), (1900000050000, 1675159770000, 5, 1);
|
||||||
|
|
||||||
|
SELECT timestamp, value
|
||||||
|
FROM mytable FINAL
|
||||||
|
WHERE key = 5
|
||||||
|
ORDER BY timestamp DESC;
|
||||||
|
|
||||||
|
|
||||||
|
SELECT if(explain like '%ReadType: InOrder%', 'Ok', 'Error: ' || explain) FROM (
|
||||||
|
EXPLAIN PLAN actions = 1
|
||||||
|
SELECT timestamp, value
|
||||||
|
FROM mytable FINAL
|
||||||
|
WHERE key = 5
|
||||||
|
ORDER BY timestamp
|
||||||
|
) WHERE explain like '%ReadType%';
|
||||||
|
|
||||||
|
|
||||||
|
SELECT if(explain like '%ReadType: Default%', 'Ok', 'Error: ' || explain) FROM (
|
||||||
|
EXPLAIN PLAN actions = 1
|
||||||
|
SELECT timestamp, value
|
||||||
|
FROM mytable FINAL
|
||||||
|
WHERE key = 5
|
||||||
|
ORDER BY timestamp DESC
|
||||||
|
) WHERE explain like '%ReadType%';
|
||||||
|
|
||||||
|
|
||||||
|
DROP TABLE IF EXISTS mytable;
|
@ -61,6 +61,7 @@ v22.4.5.9-stable 2022-05-06
|
|||||||
v22.4.4.7-stable 2022-04-29
|
v22.4.4.7-stable 2022-04-29
|
||||||
v22.4.3.3-stable 2022-04-26
|
v22.4.3.3-stable 2022-04-26
|
||||||
v22.4.2.1-stable 2022-04-22
|
v22.4.2.1-stable 2022-04-22
|
||||||
|
v22.3.18.37-lts 2023-02-15
|
||||||
v22.3.17.13-lts 2023-01-12
|
v22.3.17.13-lts 2023-01-12
|
||||||
v22.3.16.1190-lts 2023-01-09
|
v22.3.16.1190-lts 2023-01-09
|
||||||
v22.3.15.33-lts 2022-12-02
|
v22.3.15.33-lts 2022-12-02
|
||||||
|
|
Loading…
Reference in New Issue
Block a user