Merge branch 'master' into keeper-batch-flushes

This commit is contained in:
Antonio Andelic 2023-08-28 13:14:06 +00:00
commit 4137cb916d
306 changed files with 7885 additions and 2111 deletions

2
contrib/arrow vendored

@ -1 +1 @@
Subproject commit 1f1b3d35fb6eb73e6492d3afd8a85cde848d174f
Subproject commit 1d93838f69a802639ca144ea5704a98e2481810d

2
contrib/boost vendored

@ -1 +1 @@
Subproject commit 063a9372b4ae304e869a5c5724971d0501552731
Subproject commit a01ddc144c130777d7c6727a3fc5d5cdbae016d6

View File

@ -20,6 +20,7 @@ echo '/boost/context/*' >> $FILES_TO_CHECKOUT
echo '/boost/convert/*' >> $FILES_TO_CHECKOUT
echo '/boost/coroutine/*' >> $FILES_TO_CHECKOUT
echo '/boost/core/*' >> $FILES_TO_CHECKOUT
echo '/boost/describe/*' >> $FILES_TO_CHECKOUT
echo '/boost/detail/*' >> $FILES_TO_CHECKOUT
echo '/boost/dynamic_bitset/*' >> $FILES_TO_CHECKOUT
echo '/boost/exception/*' >> $FILES_TO_CHECKOUT
@ -82,4 +83,4 @@ echo '/libs/*' >> $FILES_TO_CHECKOUT
git config core.sparsecheckout true
git checkout $1
git read-tree -mu HEAD
git read-tree -mu HEAD

View File

@ -63,7 +63,6 @@ configure
# it contains some new settings, but we can safely remove it
rm /etc/clickhouse-server/config.d/merge_tree.xml
rm /etc/clickhouse-server/config.d/enable_wait_for_shutdown_replicated_tables.xml
rm /etc/clickhouse-server/config.d/filesystem_caches_path.xml
rm /etc/clickhouse-server/users.d/nonconst_timezone.xml
start
@ -94,7 +93,6 @@ sudo chgrp clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_defau
# it contains some new settings, but we can safely remove it
rm /etc/clickhouse-server/config.d/merge_tree.xml
rm /etc/clickhouse-server/config.d/enable_wait_for_shutdown_replicated_tables.xml
rm /etc/clickhouse-server/config.d/filesystem_caches_path.xml
rm /etc/clickhouse-server/users.d/nonconst_timezone.xml
start
@ -131,6 +129,7 @@ sudo cat /etc/clickhouse-server/config.d/lost_forever_check.xml \
| sed "s|>1<|>0<|g" \
> /etc/clickhouse-server/config.d/lost_forever_check.xml.tmp
sudo mv /etc/clickhouse-server/config.d/lost_forever_check.xml.tmp /etc/clickhouse-server/config.d/lost_forever_check.xml
rm /etc/clickhouse-server/config.d/filesystem_caches_path.xml
start 500
clickhouse-client --query "SELECT 'Server successfully started', 'OK', NULL, ''" >> /test_output/test_results.tsv \

View File

@ -90,34 +90,117 @@ Process 1 stopped
## Visual Studio Code integration
- [CodeLLDB extension](https://github.com/vadimcn/vscode-lldb) is required for visual debugging, the [Command Variable](https://github.com/rioj7/command-variable) extension can help dynamic launches if using [cmake variants](https://github.com/microsoft/vscode-cmake-tools/blob/main/docs/variants.md).
- Make sure to set the backend to your llvm installation eg. `"lldb.library": "/usr/lib/x86_64-linux-gnu/liblldb-15.so"`
- Launcher:
- [CodeLLDB](https://github.com/vadimcn/vscode-lldb) extension is required for visual debugging.
- [Command Variable](https://github.com/rioj7/command-variable) extension can help dynamic launches if using [CMake Variants](https://github.com/microsoft/vscode-cmake-tools/blob/main/docs/variants.md).
- Make sure to set the backend to your LLVM installation eg. `"lldb.library": "/usr/lib/x86_64-linux-gnu/liblldb-15.so"`
- Make sure to run the clickhouse executable in debug mode prior to launch. (It is also possible to create a `preLaunchTask` that automates this)
### Example configurations
#### cmake-variants.yaml
```yaml
buildType:
default: relwithdebinfo
choices:
debug:
short: Debug
long: Emit debug information
buildType: Debug
release:
short: Release
long: Optimize generated code
buildType: Release
relwithdebinfo:
short: RelWithDebInfo
long: Release with Debug Info
buildType: RelWithDebInfo
tsan:
short: MinSizeRel
long: Minimum Size Release
buildType: MinSizeRel
toolchain:
default: default
description: Select toolchain
choices:
default:
short: x86_64
long: x86_64
s390x:
short: s390x
long: s390x
settings:
CMAKE_TOOLCHAIN_FILE: cmake/linux/toolchain-s390x.cmake
```
#### launch.json
```json
{
"version": "0.2.0",
"configurations": [
{
"name": "Debug",
"type": "lldb",
"request": "custom",
"targetCreateCommands": ["target create ${command:cmake.launchTargetDirectory}/clickhouse"],
"processCreateCommands": ["settings set target.source-map ${input:targetdir} ${workspaceFolder}", "gdb-remote 31338"],
"sourceMap": { "${input:targetdir}": "${workspaceFolder}" },
}
],
"inputs": [
{
"id": "targetdir",
"type": "command",
"command": "extension.commandvariable.transform",
"args": {
"text": "${command:cmake.launchTargetDirectory}",
"find": ".*/([^/]+)/[^/]+$",
"replace": "$1"
}
"name": "(lldb) Launch s390x with qemu",
"targetCreateCommands": ["target create ${command:cmake.launchTargetPath}"],
"processCreateCommands": ["gdb-remote 2159"],
"preLaunchTask": "Run ClickHouse"
}
]
}
```
#### settings.json
This would also put different builds under different subfolders of the `build` folder.
```json
{
"cmake.buildDirectory": "${workspaceFolder}/build/${buildKitVendor}-${buildKitVersion}-${variant:toolchain}-${variant:buildType}",
"lldb.library": "/usr/lib/x86_64-linux-gnu/liblldb-15.so"
}
```
#### run-debug.sh
```sh
#! /bin/sh
echo 'Starting debugger session'
cd $1
qemu-s390x-static -g 2159 -L /usr/s390x-linux-gnu $2 $3 $4
```
#### tasks.json
Defines a task to run the compiled executable in `server` mode under a `tmp` folder next to the binaries, with configuration from under `programs/server/config.xml`.
```json
{
"version": "2.0.0",
"tasks": [
{
"label": "Run ClickHouse",
"type": "shell",
"isBackground": true,
"command": "${workspaceFolder}/.vscode/run-debug.sh",
"args": [
"${command:cmake.launchTargetDirectory}/tmp",
"${command:cmake.launchTargetPath}",
"server",
"--config-file=${workspaceFolder}/programs/server/config.xml"
],
"problemMatcher": [
{
"pattern": [
{
"regexp": ".",
"file": 1,
"location": 2,
"message": 3
}
],
"background": {
"activeOnStart": true,
"beginsPattern": "^Starting debugger session",
"endsPattern": ".*"
}
}
]
}
]
}
```
- Make sure to run the clickhouse executable in debug mode prior to launch. (It is also possible to create a `preLaunchTask` that automates this)

View File

@ -219,6 +219,10 @@ LIMIT N
SETTINGS annoy_index_search_k_nodes=100;
```
:::note
The Annoy index currently does not work with per-table, non-default `index_granularity` settings (see
[here](https://github.com/ClickHouse/ClickHouse/pull/51325#issuecomment-1605920475)). If necessary, the value must be changed in config.xml.
:::
## USearch {#usearch}
This type of ANN index is based on the [the USearch library](https://github.com/unum-cloud/usearch), which implements the [HNSW
@ -274,4 +278,4 @@ USearch currently supports two distance functions:
([Wikipedia](https://en.wikipedia.org/wiki/Cosine_similarity)).
For normalized data, `L2Distance` is usually a better choice, otherwise `cosineDistance` is recommended to compensate for scale. If no
distance function was specified during index creation, `L2Distance` is used as default.
distance function was specified during index creation, `L2Distance` is used as default.

View File

@ -228,8 +228,8 @@ For most input formats schema inference reads some data to determine its structu
To prevent inferring the same schema every time ClickHouse read the data from the same file, the inferred schema is cached and when accessing the same file again, ClickHouse will use the schema from the cache.
There are special settings that control this cache:
- `schema_inference_cache_max_elements_for_{file/s3/hdfs/url}` - the maximum number of cached schemas for the corresponding table function. The default value is `4096`. These settings should be set in the server config.
- `schema_inference_use_cache_for_{file,s3,hdfs,url}` - allows turning on/off using cache for schema inference. These settings can be used in queries.
- `schema_inference_cache_max_elements_for_{file/s3/hdfs/url/azure}` - the maximum number of cached schemas for the corresponding table function. The default value is `4096`. These settings should be set in the server config.
- `schema_inference_use_cache_for_{file,s3,hdfs,url,azure}` - allows turning on/off using cache for schema inference. These settings can be used in queries.
The schema of the file can be changed by modifying the data or by changing format settings.
For this reason, the schema inference cache identifies the schema by file source, format name, used format settings, and the last modification time of the file.

View File

@ -217,6 +217,14 @@ Type: UInt32
Default: 1024
## index_mark_cache_policy
Index mark cache policy name.
Type: String
Default: SLRU
## index_mark_cache_size
Size of cache for index marks. Zero means disabled.
@ -229,6 +237,21 @@ Type: UInt64
Default: 0
## index_mark_cache_size_ratio
The size of the protected queue in the index mark cache relative to the cache's total size.
Type: Double
Default: 0.5
## index_uncompressed_cache_policy
Index uncompressed cache policy name.
Type: String
Default: SLRU
## index_uncompressed_cache_size
@ -242,6 +265,13 @@ Type: UInt64
Default: 0
## index_uncompressed_cache_size_ratio
The size of the protected queue in the index uncompressed cache relative to the cache's total size.
Type: Double
Default: 0.5
## io_thread_pool_queue_size
@ -271,6 +301,14 @@ Type: UInt64
Default: 5368709120
## mark_cache_size_ratio
The size of the protected queue in the mark cache relative to the cache's total size.
Type: Double
Default: 0.5
## max_backup_bandwidth_for_server
The maximum read speed in bytes per second for all backups on server. Zero means unlimited.
@ -629,6 +667,14 @@ Type: UInt64
Default: 0
## uncompressed_cache_size_ratio
The size of the protected queue in the uncompressed cache relative to the cache's total size.
Type: Double
Default: 0.5
## builtin_dictionaries_reload_interval {#builtin-dictionaries-reload-interval}
The interval in seconds before reloading built-in dictionaries.

View File

@ -623,6 +623,19 @@ Possible values:
Default value: false
## number_of_free_entries_in_pool_to_execute_optimize_entire_partition {#number_of_free_entries_in_pool_to_execute_optimize_entire_partition}
When there is less than specified number of free entries in pool, do not execute optimizing entire partition in the background (this task generated when set `min_age_to_force_merge_seconds` and enable `min_age_to_force_merge_on_partition_only`). This is to leave free threads for regular merges and avoid "Too many parts".
Possible values:
- Positive integer.
Default value: 25
The value of the `number_of_free_entries_in_pool_to_execute_optimize_entire_partition` setting should be less than the value of the [background_pool_size](/docs/en/operations/server-configuration-parameters/settings.md/#background_pool_size) * [background_merges_mutations_concurrency_ratio](/docs/en/operations/server-configuration-parameters/settings.md/#background_merges_mutations_concurrency_ratio). Otherwise, ClickHouse throws an exception.
## allow_floating_point_partition_key {#allow_floating_point_partition_key}
Enables to allow floating-point number as a partition key.

View File

@ -2383,6 +2383,23 @@ See also:
- [optimize_functions_to_subcolumns](#optimize-functions-to-subcolumns)
## optimize_count_from_files {#optimize_count_from_files}
Enables or disables the optimization of counting number of rows from files in different input formats. It applies to table functions/engines `file`/`s3`/`url`/`hdfs`/`azureBlobStorage`.
Possible values:
- 0 — Optimization disabled.
- 1 — Optimization enabled.
Default value: `1`.
## use_cache_for_count_from_files {#use_cache_for_count_from_files}
Enables caching of rows number during count from files in table functions `file`/`s3`/`url`/`hdfs`/`azureBlobStorage`.
Enabled by default.
## distributed_replica_error_half_life {#settings-distributed_replica_error_half_life}
- Type: seconds

View File

@ -1794,6 +1794,330 @@ Return value type is always [Float64](../../sql-reference/data-types/float.md).
└─────┴──────────────────────────────────────────────────────────────────────────────────────────┘
```
## arrayRotateLeft
Rotates an [array](../../sql-reference/data-types/array.md) to the left by the specified number of elements.
If the number of elements is negative, the array is rotated to the right.
**Syntax**
``` sql
arrayRotateLeft(arr, n)
```
**Arguments**
- `arr` — [Array](../../sql-reference/data-types/array.md).
- `n` — Number of elements to rotate.
**Returned value**
- An array rotated to the left by the specified number of elements.
Type: [Array](../../sql-reference/data-types/array.md).
**Examples**
Query:
``` sql
SELECT arrayRotateLeft([1,2,3,4,5,6], 2) as res;
```
Result:
``` text
┌─res───────────┐
│ [3,4,5,6,1,2] │
└───────────────┘
```
Query:
``` sql
SELECT arrayRotateLeft([1,2,3,4,5,6], -2) as res;
```
Result:
``` text
┌─res───────────┐
│ [5,6,1,2,3,4] │
└───────────────┘
```
Query:
``` sql
SELECT arrayRotateLeft(['a','b','c','d','e'], 3) as res;
```
Result:
``` text
┌─res───────────────────┐
│ ['d','e','a','b','c'] │
└───────────────────────┘
```
## arrayRotateRight
Rotates an [array](../../sql-reference/data-types/array.md) to the right by the specified number of elements.
If the number of elements is negative, the array is rotated to the left.
**Syntax**
``` sql
arrayRotateRight(arr, n)
```
**Arguments**
- `arr` — [Array](../../sql-reference/data-types/array.md).
- `n` — Number of elements to rotate.
**Returned value**
- An array rotated to the right by the specified number of elements.
Type: [Array](../../sql-reference/data-types/array.md).
**Examples**
Query:
``` sql
SELECT arrayRotateRight([1,2,3,4,5,6], 2) as res;
```
Result:
``` text
┌─res───────────┐
│ [5,6,1,2,3,4] │
└───────────────┘
```
Query:
``` sql
SELECT arrayRotateRight([1,2,3,4,5,6], -2) as res;
```
Result:
``` text
┌─res───────────┐
│ [3,4,5,6,1,2] │
└───────────────┘
```
Query:
``` sql
SELECT arrayRotateRight(['a','b','c','d','e'], 3) as res;
```
Result:
``` text
┌─res───────────────────┐
│ ['c','d','e','a','b'] │
└───────────────────────┘
```
## arrayShiftLeft
Shifts an [array](../../sql-reference/data-types/array.md) to the left by the specified number of elements.
New elements are filled with the provided argument or the default value of the array element type.
If the number of elements is negative, the array is shifted to the right.
**Syntax**
``` sql
arrayShiftLeft(arr, n[, default])
```
**Arguments**
- `arr` — [Array](../../sql-reference/data-types/array.md).
- `n` — Number of elements to shift.
- `default` — Optional. Default value for new elements.
**Returned value**
- An array shifted to the left by the specified number of elements.
Type: [Array](../../sql-reference/data-types/array.md).
**Examples**
Query:
``` sql
SELECT arrayShiftLeft([1,2,3,4,5,6], 2) as res;
```
Result:
``` text
┌─res───────────┐
│ [3,4,5,6,0,0] │
└───────────────┘
```
Query:
``` sql
SELECT arrayShiftLeft([1,2,3,4,5,6], -2) as res;
```
Result:
``` text
┌─res───────────┐
│ [0,0,1,2,3,4] │
└───────────────┘
```
Query:
``` sql
SELECT arrayShiftLeft([1,2,3,4,5,6], 2, 42) as res;
```
Result:
``` text
┌─res─────────────┐
│ [3,4,5,6,42,42] │
└─────────────────┘
```
Query:
``` sql
SELECT arrayShiftLeft(['a','b','c','d','e','f'], 3, 'foo') as res;
```
Result:
``` text
┌─res─────────────────────────────┐
│ ['d','e','f','foo','foo','foo'] │
└─────────────────────────────────┘
```
Query:
``` sql
SELECT arrayShiftLeft([1,2,3,4,5,6] :: Array(UInt16), 2, 4242) as res;
```
Result:
``` text
┌─res─────────────────┐
│ [3,4,5,6,4242,4242] │
└─────────────────────┘
```
## arrayShiftRight
Shifts an [array](../../sql-reference/data-types/array.md) to the right by the specified number of elements.
New elements are filled with the provided argument or the default value of the array element type.
If the number of elements is negative, the array is shifted to the left.
**Syntax**
``` sql
arrayShiftRight(arr, n[, default])
```
**Arguments**
- `arr` — [Array](../../sql-reference/data-types/array.md).
- `n` — Number of elements to shift.
- `default` — Optional. Default value for new elements.
**Returned value**
- An array shifted to the right by the specified number of elements.
Type: [Array](../../sql-reference/data-types/array.md).
**Examples**
Query:
``` sql
SELECT arrayShiftRight([1,2,3,4,5,6], 2) as res;
```
Result:
``` text
┌─res───────────┐
│ [0,0,1,2,3,4] │
└───────────────┘
```
Query:
``` sql
SELECT arrayShiftRight([1,2,3,4,5,6], -2) as res;
```
Result:
``` text
┌─res───────────┐
│ [3,4,5,6,0,0] │
└───────────────┘
```
Query:
``` sql
SELECT arrayShiftRight([1,2,3,4,5,6], 2, 42) as res;
```
Result:
``` text
┌─res─────────────┐
│ [42,42,1,2,3,4] │
└─────────────────┘
```
Query:
``` sql
SELECT arrayShiftRight(['a','b','c','d','e','f'], 3, 'foo') as res;
```
Result:
``` text
┌─res─────────────────────────────┐
│ ['foo','foo','foo','a','b','c'] │
└─────────────────────────────────┘
```
Query:
``` sql
SELECT arrayShiftRight([1,2,3,4,5,6] :: Array(UInt16), 2, 4242) as res;
```
Result:
``` text
┌─res─────────────────┐
│ [4242,4242,1,2,3,4] │
└─────────────────────┘
```
## Distance functions
All supported functions are described in [distance functions documentation](../../sql-reference/functions/distance-functions.md).

View File

@ -91,7 +91,7 @@ clickhouse-client --query='INSERT INTO table FORMAT TabSeparated' < data.tsv
## Import Sample Dataset {#import-sample-dataset}
Now its time to fill our ClickHouse server with some sample data. In this tutorial, well use the anonymized data of Yandex.Metrica, the first service that runs ClickHouse in production way before it became open-source (more on that in [history section](../introduction/history.md)). There are [multiple ways to import Yandex.Metrica dataset](../getting-started/example-datasets/metrica.md), and for the sake of the tutorial, well go with the most realistic one.
Now its time to fill our ClickHouse server with some sample data. In this tutorial, well use some anonymized metric data. There are [multiple ways to import the dataset](../getting-started/example-datasets/metrica.md), and for the sake of the tutorial, well go with the most realistic one.
### Download and Extract Table Data {#download-and-extract-table-data}
@ -116,7 +116,7 @@ Syntax for creating tables is way more complicated compared to databases (see [r
2. Table schema, i.e. list of columns and their [data types](../sql-reference/data-types/index.md).
3. [Table engine](../engines/table-engines/index.md) and its settings, which determines all the details on how queries to this table will be physically executed.
Yandex.Metrica is a web analytics service, and sample dataset doesnt cover its full functionality, so there are only two tables to create:
There are only two tables to create:
- `hits` is a table with each action done by all users on all websites covered by the service.
- `visits` is a table that contains pre-built sessions instead of individual actions.
@ -523,7 +523,7 @@ SELECT
sumIf(Sign, has(Goals.ID, 1105530)) AS goal_visits,
(100. * goal_visits) / visits AS goal_percent
FROM tutorial.visits_v1
WHERE (CounterID = 912887) AND (toYYYYMM(StartDate) = 201403) AND (domain(StartURL) = 'yandex.ru')
WHERE (CounterID = 912887) AND (toYYYYMM(StartDate) = 201403)
```
## Cluster Deployment {#cluster-deployment}
@ -544,19 +544,19 @@ Example config for a cluster with three shards, one replica each:
<perftest_3shards_1replicas>
<shard>
<replica>
<host>example-perftest01j.yandex.ru</host>
<host>example-perftest01j.clickhouse.com</host>
<port>9000</port>
</replica>
</shard>
<shard>
<replica>
<host>example-perftest02j.yandex.ru</host>
<host>example-perftest02j.clickhouse.com</host>
<port>9000</port>
</replica>
</shard>
<shard>
<replica>
<host>example-perftest03j.yandex.ru</host>
<host>example-perftest03j.clickhouse.com</host>
<port>9000</port>
</replica>
</shard>
@ -602,15 +602,15 @@ Example config for a cluster of one shard containing three replicas:
<perftest_1shards_3replicas>
<shard>
<replica>
<host>example-perftest01j.yandex.ru</host>
<host>example-perftest01j.clickhouse.com</host>
<port>9000</port>
</replica>
<replica>
<host>example-perftest02j.yandex.ru</host>
<host>example-perftest02j.clickhouse.com</host>
<port>9000</port>
</replica>
<replica>
<host>example-perftest03j.yandex.ru</host>
<host>example-perftest03j.clickhouse.com</host>
<port>9000</port>
</replica>
</shard>
@ -628,15 +628,15 @@ ZooKeeper locations are specified in the configuration file:
``` xml
<zookeeper>
<node>
<host>zoo01.yandex.ru</host>
<host>zoo01.clickhouse.com</host>
<port>2181</port>
</node>
<node>
<host>zoo02.yandex.ru</host>
<host>zoo02.clickhouse.com</host>
<port>2181</port>
</node>
<node>
<host>zoo03.yandex.ru</host>
<host>zoo03.clickhouse.com</host>
<port>2181</port>
</node>
</zookeeper>

View File

@ -1703,3 +1703,327 @@ SELECT arrayProduct([toDecimal64(1,8), toDecimal64(2,8), toDecimal64(3,8)]) as r
│ 6 │ Float64 │
└─────┴──────────────────────────────────────────────────────────────────────────────────────────┘
```
## arrayRotateLeft
Поворачивает [массив](../../sql-reference/data-types/array.md) влево на заданное число элементов.
Если количество элементов отрицательно, то массив поворачивается вправо.
**Синтаксис**
``` sql
arrayRotateLeft(arr, n)
```
**Аргументы**
- `arr` — [Массив](../../sql-reference/data-types/array.md).
- `n` — Число элементов, на которое нужно повернуть массив.
**Возвращаемое значение**
- Массив, повернутый на заданное число элементов влево.
Тип: [Массив](../../sql-reference/data-types/array.md).
**Примеры**
Запрос:
``` sql
SELECT arrayRotateLeft([1,2,3,4,5,6], 2) as res;
```
Результат:
``` text
┌─res───────────┐
│ [3,4,5,6,1,2] │
└───────────────┘
```
Запрос:
``` sql
SELECT arrayRotateLeft([1,2,3,4,5,6], -2) as res;
```
Результат:
``` text
┌─res───────────┐
│ [5,6,1,2,3,4] │
└───────────────┘
```
Запрос:
``` sql
SELECT arrayRotateLeft(['a','b','c','d','e'], 3) as res;
```
Результат:
``` text
┌─res───────────────────┐
│ ['d','e','a','b','c'] │
└───────────────────────┘
```
## arrayRotateRight
Поворачивает [массив](../../sql-reference/data-types/array.md) вправо на заданное число элементов.
Если количество элементов отрицательно, то массив поворачивается влево.
**Синтаксис**
``` sql
arrayRotateRight(arr, n)
```
**Аргументы**
- `arr` — [Массив](../../sql-reference/data-types/array.md).
- `n` — Число элементов, на которое нужно повернуть массив.
**Возвращаемое значение**
- Массив, повернутый на заданное число элементов вправо.
Тип: [Массив](../../sql-reference/data-types/array.md).
**Примеры**
Запрос:
``` sql
SELECT arrayRotateRight([1,2,3,4,5,6], 2) as res;
```
Результат:
``` text
┌─res───────────┐
│ [5,6,1,2,3,4] │
└───────────────┘
```
Запрос:
``` sql
SELECT arrayRotateRight([1,2,3,4,5,6], -2) as res;
```
Результат:
``` text
┌─res───────────┐
│ [3,4,5,6,1,2] │
└───────────────┘
```
Запрос:
``` sql
SELECT arrayRotateRight(['a','b','c','d','e'], 3) as res;
```
Результат:
``` text
┌─res───────────────────┐
│ ['c','d','e','a','b'] │
└───────────────────────┘
```
## arrayShiftLeft
Сдвигает [массив](../../sql-reference/data-types/array.md) влево на заданное число элементов.
Новые элементы заполняются переданным аргументом или значением по умолчанию для типа элементов массива.
Если количество элементов отрицательно, то массив сдвигается вправо.
**Синтаксис**
``` sql
arrayShiftLeft(arr, n[, default])
```
**Аргументы**
- `arr` — [Массив](../../sql-reference/data-types/array.md).
- `n` — Число элементов, на которое нужно сдвинуть массив.
- `default` — Опциональный. Значение по умолчанию для новых элементов.
**Возвращаемое значение**
- Массив, сдвинутый на заданное число элементов влево.
Тип: [Массив](../../sql-reference/data-types/array.md).
**Примеры**
Запрос:
``` sql
SELECT arrayShiftLeft([1,2,3,4,5,6], 2) as res;
```
Результат:
``` text
┌─res───────────┐
│ [3,4,5,6,0,0] │
└───────────────┘
```
Запрос:
``` sql
SELECT arrayShiftLeft([1,2,3,4,5,6], -2) as res;
```
Результат:
``` text
┌─res───────────┐
│ [0,0,1,2,3,4] │
└───────────────┘
```
Запрос:
``` sql
SELECT arrayShiftLeft([1,2,3,4,5,6], 2, 42) as res;
```
Результат:
``` text
┌─res─────────────┐
│ [3,4,5,6,42,42] │
└─────────────────┘
```
Запрос:
``` sql
SELECT arrayShiftLeft(['a','b','c','d','e','f'], 3, 'foo') as res;
```
Результат:
``` text
┌─res─────────────────────────────┐
│ ['d','e','f','foo','foo','foo'] │
└─────────────────────────────────┘
```
Запрос:
``` sql
SELECT arrayShiftLeft([1,2,3,4,5,6] :: Array(UInt16), 2, 4242) as res;
```
Результат:
``` text
┌─res─────────────────┐
│ [3,4,5,6,4242,4242] │
└─────────────────────┘
```
## arrayShiftRight
Сдвигает [массив](../../sql-reference/data-types/array.md) вправо на заданное число элементов.
Новые элементы заполняются переданным аргументом или значением по умолчанию для типа элементов массива.
Если количество элементов отрицательно, то массив сдвигается влево.
**Синтаксис**
``` sql
arrayShiftRight(arr, n[, default])
```
**Аргументы**
- `arr` — [Массив](../../sql-reference/data-types/array.md).
- `n` — Число элементов, на которое нужно сдвинуть массив.
- `default` — Опциональный. Значение по умолчанию для новых элементов.
**Возвращаемое значение**
- Массив, сдвинутый на заданное число элементов вправо.
Тип: [Массив](../../sql-reference/data-types/array.md).
**Примеры**
Запрос:
``` sql
SELECT arrayShiftRight([1,2,3,4,5,6], 2) as res;
```
Результат:
``` text
┌─res───────────┐
│ [0,0,1,2,3,4] │
└───────────────┘
```
Запрос:
``` sql
SELECT arrayShiftRight([1,2,3,4,5,6], -2) as res;
```
Результат:
``` text
┌─res───────────┐
│ [3,4,5,6,0,0] │
└───────────────┘
```
Запрос:
``` sql
SELECT arrayShiftRight([1,2,3,4,5,6], 2, 42) as res;
```
Результат:
``` text
┌─res─────────────┐
│ [42,42,1,2,3,4] │
└─────────────────┘
```
Запрос:
``` sql
SELECT arrayShiftRight(['a','b','c','d','e','f'], 3, 'foo') as res;
```
Результат:
``` text
┌─res─────────────────────────────┐
│ ['foo','foo','foo','a','b','c'] │
└─────────────────────────────────┘
```
Запрос:
``` sql
SELECT arrayShiftRight([1,2,3,4,5,6] :: Array(UInt16), 2, 4242) as res;
```
Результат:
``` text
┌─res─────────────────┐
│ [4242,4242,1,2,3,4] │
└─────────────────────┘
```

View File

@ -1171,10 +1171,267 @@ ClickHouse服务器日志文件中跟踪日志确认了ClickHouse正在对索引
主键键列之间的基数差得越大,主键中的列的顺序越重要。我们将在下一章节对此进行演示。
# 高效地为键列排序
## 高效地为键列排序
TODO
<a name="test"></a>
# 高效地识别单行
TODO
在复合主键中,键列的顺序会对以下两方面产生重大影响:
- 查询中过滤次关键字列的效率,以及
- 表数据文件的压缩率。
为了演示这一点,我们将使用我们的[网络流量样本数据集(web traffic sample data set)](#数据集)这个版本,
其中每一行包含三列,分别表示互联网用户(`UserID` 列)对 URL`URL`列)的访问是否被标记为僵尸流量(`IsRobot` 列)。
我们将使用一个包含上述所有三列的复合主键,该主键可用于加快计算以下内容的典型网络分析查询速度
- 特定 URL 有多少(百分比)流量来自机器人,或
- 我们对特定用户是否为僵尸用户有多大把握(来自该用户的流量中有多大比例被认为是(或不是)僵尸流量)
我们使用该查询来计算我们要用作复合主键中三个列的基数(注意,我们使用 [URL 表函数](/docs/en/sql-reference/table-functions/url.md) 来即席查询 TSV 数据,而无需创建本地表)。在 `clickhouse client`中运行此查询:
```sql
SELECT
formatReadableQuantity(uniq(URL)) AS cardinality_URL,
formatReadableQuantity(uniq(UserID)) AS cardinality_UserID,
formatReadableQuantity(uniq(IsRobot)) AS cardinality_IsRobot
FROM
(
SELECT
c11::UInt64 AS UserID,
c15::String AS URL,
c20::UInt8 AS IsRobot
FROM url('https://datasets.clickhouse.com/hits/tsv/hits_v1.tsv.xz')
WHERE URL != ''
)
```
响应如下:
```response
┌─cardinality_URL─┬─cardinality_UserID─┬─cardinality_IsRobot─┐
│ 2.39 million │ 119.08 thousand │ 4.00 │
└─────────────────┴────────────────────┴─────────────────────┘
1 row in set. Elapsed: 118.334 sec. Processed 8.87 million rows, 15.88 GB (74.99 thousand rows/s., 134.21 MB/s.)
```
我们可以看到,各列之间的基数,尤其是 `URL` 列和 `IsRobot` 列之间,存在着很大的差异,因此,在复合主键中,这些列的顺序对于有效加快对这些列的查询过滤速度,以及实现表中列数据文件的最佳压缩比都非常重要。
为了证明这一点,我们为僵尸流量分析数据创建了两个版本的表:
- 带有复合主键`(URL、UserID、IsRobot)`的表 `hits_URL_UserID_IsRobot`,其中的键列按基数降序排列
- 使用复合主键`(IsRobot, UserID, URL)` 创建表 `hits_IsRobot_UserID_URL`,其中的键列按基数升序排列
创建具有复合主键`(URL、UserID、IsRobot)`的表 `hits_URL_UserID_IsRobot`
```sql
CREATE TABLE hits_URL_UserID_IsRobot
(
`UserID` UInt32,
`URL` String,
`IsRobot` UInt8
)
ENGINE = MergeTree
// highlight-next-line
PRIMARY KEY (URL, UserID, IsRobot);
```
然后填充887万行数据
```sql
INSERT INTO hits_URL_UserID_IsRobot SELECT
intHash32(c11::UInt64) AS UserID,
c15 AS URL,
c20 AS IsRobot
FROM url('https://datasets.clickhouse.com/hits/tsv/hits_v1.tsv.xz')
WHERE URL != '';
```
响应如下:
```response
0 rows in set. Elapsed: 104.729 sec. Processed 8.87 million rows, 15.88 GB (84.73 thousand rows/s., 151.64 MB/s.)
```
接下来,创建带有复合主键 `(IsRobot,UserID,URL)`的表 `hits_IsRobot_UserID_URL`
```sql
CREATE TABLE hits_IsRobot_UserID_URL
(
`UserID` UInt32,
`URL` String,
`IsRobot` UInt8
)
ENGINE = MergeTree
// highlight-next-line
PRIMARY KEY (IsRobot, UserID, URL);
```
并在其中填入与上一个表相同的 887 万行数据:
```sql
INSERT INTO hits_IsRobot_UserID_URL SELECT
intHash32(c11::UInt64) AS UserID,
c15 AS URL,
c20 AS IsRobot
FROM url('https://datasets.clickhouse.com/hits/tsv/hits_v1.tsv.xz')
WHERE URL != '';
```
响应如下:
```response
0 rows in set. Elapsed: 95.959 sec. Processed 8.87 million rows, 15.88 GB (92.48 thousand rows/s., 165.50 MB/s.)
```
### 在次关键字列上高效过滤
当查询对至少一列进行过滤时,该列是复合关键字的一部分,并且是第一关键字列,[那么 ClickHouse 将在关键字列的索引标记上运行二分查找算法](#主索引被用来选择颗粒)。
当查询(仅)过滤属于复合关键字的某一列,但不是第一关键字列时,[ClickHouse 将在关键字列的索引标记上使用通用排除搜索算法](#查询使用第二位主键的性能问题)。
对于第二种情况,复合主键中关键列的排序对[通用排除搜索算法](https://github.com/ClickHouse/ClickHouse/blob/22.3/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp#L1444)的有效性很重要。
这是一个对表中的 `UserID` 列进行过滤的查询,我们对该表的关键字列`(URL、UserID、IsRobot)`按基数进行了降序排序:
```sql
SELECT count(*)
FROM hits_URL_UserID_IsRobot
WHERE UserID = 112304
```
响应如下:
```response
┌─count()─┐
│ 73 │
└─────────┘
1 row in set. Elapsed: 0.026 sec.
// highlight-next-line
Processed 7.92 million rows,
31.67 MB (306.90 million rows/s., 1.23 GB/s.)
```
对关键字列`(IsRobot, UserID, URL)`按基数升序排列的表,进行相同的查询:
```sql
SELECT count(*)
FROM hits_IsRobot_UserID_URL
WHERE UserID = 112304
```
响应如下:
```response
┌─count()─┐
│ 73 │
└─────────┘
1 row in set. Elapsed: 0.003 sec.
// highlight-next-line
Processed 20.32 thousand rows,
81.28 KB (6.61 million rows/s., 26.44 MB/s.)
```
我们可以看到,在对关键列按基数进行升序排列的表中,查询执行的效率和速度明显更高。
其原因是,当通过具有较低基数前键列的次关键字列选择[颗粒](#主索引被用来选择颗粒)时, [通用排除搜索算法](https://github.com/ClickHouse/ClickHouse/blob/22.3/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp#L1444)最有效。 我们在本指南的[上一节](#generic-exclusion-search-algorithm)中对此进行了详细说明。
### 数据文件的最佳压缩率
此查询将比较上面创建的两个表中 `UserID` 列的压缩率:
```sql
SELECT
table AS Table,
name AS Column,
formatReadableSize(data_uncompressed_bytes) AS Uncompressed,
formatReadableSize(data_compressed_bytes) AS Compressed,
round(data_uncompressed_bytes / data_compressed_bytes, 0) AS Ratio
FROM system.columns
WHERE (table = 'hits_URL_UserID_IsRobot' OR table = 'hits_IsRobot_UserID_URL') AND (name = 'UserID')
ORDER BY Ratio ASC
```
这是响应:
```response
┌─Table───────────────────┬─Column─┬─Uncompressed─┬─Compressed─┬─Ratio─┐
│ hits_URL_UserID_IsRobot │ UserID │ 33.83 MiB │ 11.24 MiB │ 3 │
│ hits_IsRobot_UserID_URL │ UserID │ 33.83 MiB │ 877.47 KiB │ 39 │
└─────────────────────────┴────────┴──────────────┴────────────┴───────┘
2 rows in set. Elapsed: 0.006 sec.
```
我们可以看到,在按关键字列`(IsRobot、UserID、URL)` 按基数升序排列的表中,`UserID` 列的压缩率明显更高。
虽然两个表中存储的数据完全相同(我们在两个表中插入了相同的 887 万行),但复合主键中关键字列的顺序对表的 [列数据文件](#数据按照主键排序存储在磁盘上)中的 <a href="https://clickhouse.com/docs/en/introduction/distinctive-features/#data-compression" target="_blank">压缩</a>数据所需的磁盘空间有很大影响:
- 在具有复合主键`(URL, UserID, IsRobot)` 的表 `hits_URL_UserID_IsRobot` 中,我们按照键列的基数降序排列,此时 `UserID.bin` 数据文件占用**11.24MB**的磁盘空间。
- 在具有复合主键`(IsRobot, UserID, URL)` 的表 `hits_IsRobot_UserID_URL` 中,我们按照键列的基数升序排列,`UserID.bin` 数据文件仅占用**877.47 KiB**的磁盘空间。
对磁盘上表的列数据进行良好的压缩比不仅能节省磁盘空间,还能使需要从该列读取数据的查询(尤其是分析查询)更快,因为将列数据从磁盘移动到主内存(操作系统的文件缓存)所需的 i/o 更少。
下面我们将说明,为什么主键列按基数升序排列有利于提高表列的压缩率。
下图阐述了主键的磁盘上行顺序,其中键列是按基数升序排列的:
<img src={require('../../../en/guides/best-practices/images/sparse-primary-indexes-14a.png').default} class="image"/>
我们讨论过 [表的行数据按主键列有序存储在磁盘上](#数据按照主键排序存储在磁盘上)。
在上图中,表格的行(它们在磁盘上的列值)首先按其 `cl` 值排序,具有相同 `cl` 值的行按其 `ch` 值排序。由于第一键列 `cl` 的基数较低,因此很可能存在具有相同 `cl` 值的行。因此,`ch`值也很可能是有序的(局部地--对于具有相同`cl`值的行而言)。
如果在一列中,相似的数据被放在彼此相近的位置,例如通过排序,那么这些数据将得到更好的压缩。
一般来说,压缩算法会受益于数据的运行长度(可见的数据越多,压缩效果越好)和局部性(数据越相似,压缩率越高)。
与上图不同的是,下图阐述了主键的磁盘上行顺序,其中主键列是按基数降序排列的:
<img src={require('../../../en/guides/best-practices/images/sparse-primary-indexes-14b.png').default} class="image"/>
现在,表格的行首先按其 `ch` 值排序,具有相同 `ch` 值的行按其 `cl` 值排序。
但是,由于第一键列 `ch` 的基数很高,因此不太可能存在具有相同 `ch` 值的行。因此,`cl`值也不太可能是有序的(局部地--对于具有相同`ch`值的行而言)。
因此,`cl`值很可能是随机排序的,因此局部性和压缩比都很差。
### 小结
为了在查询中有效地过滤次关键字列和提高表列数据文件的压缩率,按基数升序排列主键中的列是有益的。
### 相关内容
- 博客: [Super charging your ClickHouse queries](https://clickhouse.com/blog/clickhouse-faster-queries-with-projections-and-primary-indexes)
## 有效识别单行
尽管在一般情况下,它[不](/knowledgebase/key-value)是ClickHouse 的最佳用例,
但是有时建立在ClickHouse之上的应用程序,需要识别ClickHouse表中的单行。
一个直观的解决方案可能是使用[UUID](https://en.wikipedia.org/wiki/Universally_unique_identifier) 列,每一行的值都是唯一的,并且为了快速检索行,将该列用作主键列。
为了实现最快的检索UUID 列[需要成为主键列](#主索引被用来选择颗粒)。
我们讨论过,由于[ClickHouse 表的行数据是按主键列顺序存储在磁盘上的](#数据按照主键排序存储在磁盘上),因此在主键或复合主键中,在基数较小的列之前设置基数非常大的列(如 UUID 列)[不利于其他表列的压缩率](#数据文件的最佳压缩率)。
在最快检索速度和最佳数据压缩之间的折中方法是使用某种复合主键,其中 UUID 是最后一列关键字,位于(更)小基数关键字列之后,这些关键字列用于确保表中某些列的良好压缩比。
### 一个具体例子
一个具体的例子是 Alexey Milovidov 开发的文本粘贴服务 https://pastila.nl 相关[博客](https://clickhouse.com/blog/building-a-paste-service-with-clickhouse/)。
每次更改文本区域时,数据都会自动保存到 ClickHouse 表格行中(每次更改保存一行)。
识别和检索(特定版本)粘贴内容的一种方法是使用内容的哈希值作为包含内容的表行的 UUID。
下图显示了
- 当内容发生变化时(例如由于按键将文本键入文本框),行的插入顺序,以及
- 当使用 `PRIMARY KEY (hash)` 时,插入行数据的磁盘顺序:
<img src={require('../../../en/guides/best-practices/images/sparse-primary-indexes-15a.png').default} class="image"/>
由于 `hash` 列被用作主键列
- 可以[非常快速](#主索引被用来选择颗粒) 检索特定行,但
- 表格的行(列数据)是按照(唯一和随机的)哈希值升序存储在磁盘上的。因此,内容列的值也是按随机顺序存储的,不具有数据局部性,导致**内容列数据文件的压缩率不理想**。
为了大幅提高内容列的压缩率同时仍能快速检索特定行pastila.nl 使用两个哈希值(和一个复合主键)来识别特定行:
- 内容哈希值,如上所述,对于不同的数据是不同的,以及
- 对[局部性敏感的哈希值fingerprint](https://en.wikipedia.org/wiki/Locality-sensitive_hashing) 它**不会**因数据的微小变化而变化。
下图显示了
- 当内容发生变化时(例如,由于按键将文本输入文本区),行的插入顺序以及
- 当使用复合主键`(fingerprint,hash)` 时,插入行数据的磁盘顺序:
<img src={require('../../../en/guides/best-practices/images/sparse-primary-indexes-15b.png').default} class="image"/>
现在,磁盘上的行首先按指纹 (`fingerprint`) 排序,对于`fingerprint` 值相同的行,其哈希(`hash`)值决定最终的排序。
由于仅有细微差别的数据会获得相同的指纹值,因此类似的数据现在会被存储在磁盘的内容列中,并且彼此靠近。这对内容列的压缩率非常有利,因为压缩算法一般会从数据局部性中获益(数据越相似,压缩率越高)。
由此带来的妥协是,检索特定行时需要两个字段("指纹"和 "散列"),以便最佳地利用由复合主键 `(fingerprint, hash)` 产生的主索引。

View File

@ -5,6 +5,7 @@
#include <fstream>
#include <iomanip>
#include <random>
#include <string_view>
#include <pcg_random.hpp>
#include <Poco/Util/Application.h>
#include <Common/Stopwatch.h>
@ -48,6 +49,7 @@ namespace DB
{
using Ports = std::vector<UInt16>;
static constexpr std::string_view DEFAULT_CLIENT_NAME = "benchmark";
namespace ErrorCodes
{
@ -122,7 +124,7 @@ public:
default_database_, user_, password_, quota_key_,
/* cluster_= */ "",
/* cluster_secret_= */ "",
/* client_name_= */ "benchmark",
/* client_name_= */ std::string(DEFAULT_CLIENT_NAME),
Protocol::Compression::Enable,
secure));
@ -135,6 +137,8 @@ public:
global_context->makeGlobalContext();
global_context->setSettings(settings);
global_context->setClientName(std::string(DEFAULT_CLIENT_NAME));
global_context->setQueryKindInitial();
std::cerr << std::fixed << std::setprecision(3);

View File

@ -1243,6 +1243,7 @@ void Client::processConfig()
global_context->getSettingsRef().max_insert_block_size);
}
global_context->setClientName(std::string(DEFAULT_CLIENT_NAME));
global_context->setQueryKindInitial();
global_context->setQuotaClientKey(config().getString("quota_key", ""));
global_context->setQueryKind(query_kind);

View File

@ -133,8 +133,6 @@ if (BUILD_STANDALONE_KEEPER)
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/S3/S3Capabilities.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/S3/diskSettings.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/S3/ProxyListConfiguration.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/S3/ProxyResolverConfiguration.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/createReadBufferFromFileBase.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/ReadBufferFromRemoteFSGather.cpp

View File

@ -657,21 +657,23 @@ void LocalServer::processConfig()
/// There is no need for concurrent queries, override max_concurrent_queries.
global_context->getProcessList().setMaxSize(0);
const size_t memory_amount = getMemoryAmount();
const size_t physical_server_memory = getMemoryAmount();
const double cache_size_to_ram_max_ratio = config().getDouble("cache_size_to_ram_max_ratio", 0.5);
const size_t max_cache_size = static_cast<size_t>(memory_amount * cache_size_to_ram_max_ratio);
const size_t max_cache_size = static_cast<size_t>(physical_server_memory * cache_size_to_ram_max_ratio);
String uncompressed_cache_policy = config().getString("uncompressed_cache_policy", DEFAULT_UNCOMPRESSED_CACHE_POLICY);
size_t uncompressed_cache_size = config().getUInt64("uncompressed_cache_size", DEFAULT_UNCOMPRESSED_CACHE_MAX_SIZE);
double uncompressed_cache_size_ratio = config().getDouble("uncompressed_cache_size_ratio", DEFAULT_UNCOMPRESSED_CACHE_SIZE_RATIO);
if (uncompressed_cache_size > max_cache_size)
{
uncompressed_cache_size = max_cache_size;
LOG_INFO(log, "Lowered uncompressed cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(uncompressed_cache_size));
}
global_context->setUncompressedCache(uncompressed_cache_policy, uncompressed_cache_size);
global_context->setUncompressedCache(uncompressed_cache_policy, uncompressed_cache_size, uncompressed_cache_size_ratio);
String mark_cache_policy = config().getString("mark_cache_policy", DEFAULT_MARK_CACHE_POLICY);
size_t mark_cache_size = config().getUInt64("mark_cache_size", DEFAULT_MARK_CACHE_MAX_SIZE);
double mark_cache_size_ratio = config().getDouble("mark_cache_size_ratio", DEFAULT_MARK_CACHE_SIZE_RATIO);
if (!mark_cache_size)
LOG_ERROR(log, "Too low mark cache size will lead to severe performance degradation.");
if (mark_cache_size > max_cache_size)
@ -679,23 +681,27 @@ void LocalServer::processConfig()
mark_cache_size = max_cache_size;
LOG_INFO(log, "Lowered mark cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(mark_cache_size));
}
global_context->setMarkCache(mark_cache_policy, mark_cache_size);
global_context->setMarkCache(mark_cache_policy, mark_cache_size, mark_cache_size_ratio);
String index_uncompressed_cache_policy = config().getString("index_uncompressed_cache_policy", DEFAULT_INDEX_UNCOMPRESSED_CACHE_POLICY);
size_t index_uncompressed_cache_size = config().getUInt64("index_uncompressed_cache_size", DEFAULT_INDEX_UNCOMPRESSED_CACHE_MAX_SIZE);
double index_uncompressed_cache_size_ratio = config().getDouble("index_uncompressed_cache_size_ratio", DEFAULT_INDEX_UNCOMPRESSED_CACHE_SIZE_RATIO);
if (index_uncompressed_cache_size > max_cache_size)
{
index_uncompressed_cache_size = max_cache_size;
LOG_INFO(log, "Lowered index uncompressed cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(uncompressed_cache_size));
}
global_context->setIndexUncompressedCache(index_uncompressed_cache_size);
global_context->setIndexUncompressedCache(index_uncompressed_cache_policy, index_uncompressed_cache_size, index_uncompressed_cache_size_ratio);
String index_mark_cache_policy = config().getString("index_mark_cache_policy", DEFAULT_INDEX_MARK_CACHE_POLICY);
size_t index_mark_cache_size = config().getUInt64("index_mark_cache_size", DEFAULT_INDEX_MARK_CACHE_MAX_SIZE);
double index_mark_cache_size_ratio = config().getDouble("index_mark_cache_size_ratio", DEFAULT_INDEX_MARK_CACHE_SIZE_RATIO);
if (index_mark_cache_size > max_cache_size)
{
index_mark_cache_size = max_cache_size;
LOG_INFO(log, "Lowered index mark cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(uncompressed_cache_size));
}
global_context->setIndexMarkCache(index_mark_cache_size);
global_context->setIndexMarkCache(index_mark_cache_policy, index_mark_cache_size, index_mark_cache_size_ratio);
size_t mmap_cache_size = config().getUInt64("mmap_cache_size", DEFAULT_MMAP_CACHE_MAX_SIZE);
if (mmap_cache_size > max_cache_size)

View File

@ -1302,18 +1302,14 @@ try
if (structure.empty())
{
ReadBufferIterator read_buffer_iterator = [&](ColumnsDescription &)
{
auto file = std::make_unique<ReadBufferFromFileDescriptor>(STDIN_FILENO);
auto file = std::make_unique<ReadBufferFromFileDescriptor>(STDIN_FILENO);
/// stdin must be seekable
auto res = lseek(file->getFD(), 0, SEEK_SET);
if (-1 == res)
throwFromErrno("Input must be seekable file (it will be read twice).", ErrorCodes::CANNOT_SEEK_THROUGH_FILE);
return file;
};
/// stdin must be seekable
auto res = lseek(file->getFD(), 0, SEEK_SET);
if (-1 == res)
throwFromErrno("Input must be seekable file (it will be read twice).", ErrorCodes::CANNOT_SEEK_THROUGH_FILE);
SingleReadBufferIterator read_buffer_iterator(std::move(file));
schema_columns = readSchemaFromFormat(input_format, {}, read_buffer_iterator, false, context_const);
}
else

View File

@ -1111,37 +1111,43 @@ try
String uncompressed_cache_policy = server_settings.uncompressed_cache_policy;
size_t uncompressed_cache_size = server_settings.uncompressed_cache_size;
double uncompressed_cache_size_ratio = server_settings.uncompressed_cache_size_ratio;
if (uncompressed_cache_size > max_cache_size)
{
uncompressed_cache_size = max_cache_size;
LOG_INFO(log, "Lowered uncompressed cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(uncompressed_cache_size));
}
global_context->setUncompressedCache(uncompressed_cache_policy, uncompressed_cache_size);
global_context->setUncompressedCache(uncompressed_cache_policy, uncompressed_cache_size, uncompressed_cache_size_ratio);
String mark_cache_policy = server_settings.mark_cache_policy;
size_t mark_cache_size = server_settings.mark_cache_size;
double mark_cache_size_ratio = server_settings.mark_cache_size_ratio;
if (mark_cache_size > max_cache_size)
{
mark_cache_size = max_cache_size;
LOG_INFO(log, "Lowered mark cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(mark_cache_size));
}
global_context->setMarkCache(mark_cache_policy, mark_cache_size);
global_context->setMarkCache(mark_cache_policy, mark_cache_size, mark_cache_size_ratio);
String index_uncompressed_cache_policy = server_settings.index_uncompressed_cache_policy;
size_t index_uncompressed_cache_size = server_settings.index_uncompressed_cache_size;
double index_uncompressed_cache_size_ratio = server_settings.index_uncompressed_cache_size_ratio;
if (index_uncompressed_cache_size > max_cache_size)
{
index_uncompressed_cache_size = max_cache_size;
LOG_INFO(log, "Lowered index uncompressed cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(uncompressed_cache_size));
}
global_context->setIndexUncompressedCache(index_uncompressed_cache_size);
global_context->setIndexUncompressedCache(index_uncompressed_cache_policy, index_uncompressed_cache_size, index_uncompressed_cache_size_ratio);
String index_mark_cache_policy = server_settings.index_mark_cache_policy;
size_t index_mark_cache_size = server_settings.index_mark_cache_size;
double index_mark_cache_size_ratio = server_settings.index_mark_cache_size_ratio;
if (index_mark_cache_size > max_cache_size)
{
index_mark_cache_size = max_cache_size;
LOG_INFO(log, "Lowered index mark cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(uncompressed_cache_size));
}
global_context->setIndexMarkCache(index_mark_cache_size);
global_context->setIndexMarkCache(index_mark_cache_policy, index_mark_cache_size, index_mark_cache_size_ratio);
size_t mmap_cache_size = server_settings.mmap_cache_size;
if (mmap_cache_size > max_cache_size)

View File

@ -153,6 +153,7 @@ enum class AccessType
M(SYSTEM_DROP_QUERY_CACHE, "SYSTEM DROP QUERY, DROP QUERY CACHE, DROP QUERY", GLOBAL, SYSTEM_DROP_CACHE) \
M(SYSTEM_DROP_COMPILED_EXPRESSION_CACHE, "SYSTEM DROP COMPILED EXPRESSION, DROP COMPILED EXPRESSION CACHE, DROP COMPILED EXPRESSIONS", GLOBAL, SYSTEM_DROP_CACHE) \
M(SYSTEM_DROP_FILESYSTEM_CACHE, "SYSTEM DROP FILESYSTEM CACHE, DROP FILESYSTEM CACHE", GLOBAL, SYSTEM_DROP_CACHE) \
M(SYSTEM_SYNC_FILESYSTEM_CACHE, "SYSTEM REPAIR FILESYSTEM CACHE, REPAIR FILESYSTEM CACHE, SYNC FILESYSTEM CACHE", GLOBAL, SYSTEM) \
M(SYSTEM_DROP_SCHEMA_CACHE, "SYSTEM DROP SCHEMA CACHE, DROP SCHEMA CACHE", GLOBAL, SYSTEM_DROP_CACHE) \
M(SYSTEM_DROP_S3_CLIENT_CACHE, "SYSTEM DROP S3 CLIENT, DROP S3 CLIENT CACHE", GLOBAL, SYSTEM_DROP_CACHE) \
M(SYSTEM_DROP_CACHE, "DROP CACHE", GROUP, SYSTEM) \

View File

@ -1918,6 +1918,9 @@ void QueryAnalyzer::evaluateScalarSubqueryIfNeeded(QueryTreeNodePtr & node, Iden
subquery_settings.max_result_rows = 1;
subquery_settings.extremes = false;
subquery_context->setSettings(subquery_settings);
/// When execute `INSERT INTO t WITH ... SELECT ...`, it may lead to `Unknown columns`
/// exception with this settings enabled(https://github.com/ClickHouse/ClickHouse/issues/52494).
subquery_context->setSetting("use_structure_from_insertion_table_in_table_functions", false);
auto options = SelectQueryOptions(QueryProcessingStage::Complete, scope.subquery_depth, true /*is_subquery*/);
auto interpreter = std::make_unique<InterpreterSelectQueryAnalyzer>(node->toAST(), subquery_context, options);

View File

@ -50,7 +50,8 @@ namespace
context->getRemoteHostFilter(),
static_cast<unsigned>(context->getGlobalContext()->getSettingsRef().s3_max_redirects),
context->getGlobalContext()->getSettingsRef().enable_s3_requests_logging,
/* for_disk_s3 = */ false, settings.request_settings.get_request_throttler, settings.request_settings.put_request_throttler);
/* for_disk_s3 = */ false, settings.request_settings.get_request_throttler, settings.request_settings.put_request_throttler,
s3_uri.uri.getScheme());
client_configuration.endpointOverride = s3_uri.endpoint;
client_configuration.maxConnections = static_cast<unsigned>(context->getSettingsRef().s3_max_connections);

View File

@ -169,6 +169,10 @@ if (TARGET ch_contrib::jemalloc)
target_link_libraries (clickhouse_storages_system PRIVATE ch_contrib::jemalloc)
endif()
if (TARGET ch_contrib::azure_sdk)
target_link_libraries(clickhouse_storages_system PRIVATE ch_contrib::azure_sdk)
endif()
target_link_libraries (clickhouse_common_io PUBLIC ch_contrib::sparsehash ch_contrib::incbin)
add_subdirectory(Access/Common)

View File

@ -1,5 +1,6 @@
#pragma once
#include <string_view>
#include "Common/NamePrompter.h"
#include <Parsers/ASTCreateQuery.h>
#include <Common/ProgressIndication.h>
@ -24,6 +25,7 @@ namespace po = boost::program_options;
namespace DB
{
static constexpr std::string_view DEFAULT_CLIENT_NAME = "client";
static const NameSet exit_strings
{

View File

@ -12,6 +12,7 @@
#include <IO/TimeoutSetter.h>
#include <Formats/NativeReader.h>
#include <Formats/NativeWriter.h>
#include <Client/ClientBase.h>
#include <Client/Connection.h>
#include <Client/ConnectionParameters.h>
#include <Common/ClickHouseRevision.h>
@ -1204,7 +1205,7 @@ ServerConnectionPtr Connection::createConnection(const ConnectionParameters & pa
parameters.quota_key,
"", /* cluster */
"", /* cluster_secret */
"client",
std::string(DEFAULT_CLIENT_NAME),
parameters.compression,
parameters.security);
}

View File

@ -40,14 +40,17 @@ public:
using MappedPtr = typename CachePolicy::MappedPtr;
using KeyMapped = typename CachePolicy::KeyMapped;
/// Use this ctor if you don't care about the internal cache policy.
explicit CacheBase(size_t max_size_in_bytes, size_t max_count = 0, double size_ratio = 0.5)
static constexpr auto NO_MAX_COUNT = 0uz;
static constexpr auto DEFAULT_SIZE_RATIO = 0.5l;
/// Use this ctor if you only care about the cache size but not internals like the cache policy.
explicit CacheBase(size_t max_size_in_bytes, size_t max_count = NO_MAX_COUNT, double size_ratio = DEFAULT_SIZE_RATIO)
: CacheBase("SLRU", max_size_in_bytes, max_count, size_ratio)
{
}
/// Use this ctor if you want the user to configure the cache policy via some setting. Supports only general-purpose policies LRU and SLRU.
explicit CacheBase(std::string_view cache_policy_name, size_t max_size_in_bytes, size_t max_count = 0, double size_ratio = 0.5)
/// Use this ctor if the user should be able to configure the cache policy and cache sizes via settings. Supports only general-purpose policies LRU and SLRU.
explicit CacheBase(std::string_view cache_policy_name, size_t max_size_in_bytes, size_t max_count, double size_ratio)
{
auto on_weight_loss_function = [&](size_t weight_loss) { onRemoveOverflowWeightLoss(weight_loss); };
@ -79,7 +82,7 @@ public:
MappedPtr get(const Key & key)
{
std::lock_guard lock(mutex);
auto res = cache_policy->get(key, lock);
auto res = cache_policy->get(key);
if (res)
++hits;
else
@ -90,7 +93,7 @@ public:
std::optional<KeyMapped> getWithKey(const Key & key)
{
std::lock_guard lock(mutex);
auto res = cache_policy->getWithKey(key, lock);
auto res = cache_policy->getWithKey(key);
if (res.has_value())
++hits;
else
@ -101,7 +104,7 @@ public:
void set(const Key & key, const MappedPtr & mapped)
{
std::lock_guard lock(mutex);
cache_policy->set(key, mapped, lock);
cache_policy->set(key, mapped);
}
/// If the value for the key is in the cache, returns it. If it is not, calls load_func() to
@ -118,7 +121,7 @@ public:
InsertTokenHolder token_holder;
{
std::lock_guard cache_lock(mutex);
auto val = cache_policy->get(key, cache_lock);
auto val = cache_policy->get(key);
if (val)
{
++hits;
@ -156,7 +159,7 @@ public:
auto token_it = insert_tokens.find(key);
if (token_it != insert_tokens.end() && token_it->second.get() == token)
{
cache_policy->set(key, token->value, cache_lock);
cache_policy->set(key, token->value);
result = true;
}
@ -185,49 +188,49 @@ public:
insert_tokens.clear();
hits = 0;
misses = 0;
cache_policy->clear(lock);
cache_policy->clear();
}
void remove(const Key & key)
{
std::lock_guard lock(mutex);
cache_policy->remove(key, lock);
cache_policy->remove(key);
}
size_t weight() const
size_t sizeInBytes() const
{
std::lock_guard lock(mutex);
return cache_policy->weight(lock);
return cache_policy->sizeInBytes();
}
size_t count() const
{
std::lock_guard lock(mutex);
return cache_policy->count(lock);
return cache_policy->count();
}
size_t maxSize() const
size_t maxSizeInBytes() const
{
std::lock_guard lock(mutex);
return cache_policy->maxSize(lock);
return cache_policy->maxSizeInBytes();
}
void setMaxCount(size_t max_count)
{
std::lock_guard lock(mutex);
cache_policy->setMaxCount(max_count, lock);
cache_policy->setMaxCount(max_count);
}
void setMaxSize(size_t max_size_in_bytes)
void setMaxSizeInBytes(size_t max_size_in_bytes)
{
std::lock_guard lock(mutex);
cache_policy->setMaxSize(max_size_in_bytes, lock);
cache_policy->setMaxSizeInBytes(max_size_in_bytes);
}
void setQuotaForUser(const String & user_name, size_t max_size_in_bytes, size_t max_entries)
{
std::lock_guard lock(mutex);
cache_policy->setQuotaForUser(user_name, max_size_in_bytes, max_entries, lock);
cache_policy->setQuotaForUser(user_name, max_size_in_bytes, max_entries);
}
virtual ~CacheBase() = default;

View File

@ -0,0 +1,74 @@
#include "EnvironmentProxyConfigurationResolver.h"
#include <Common/logger_useful.h>
#include <Poco/URI.h>
namespace DB
{
/*
* Usually environment variables are upper-case, but it seems like proxy related variables are an exception.
* See https://unix.stackexchange.com/questions/212894/whats-the-right-format-for-the-http-proxy-environment-variable-caps-or-no-ca/212972#212972
* */
static constexpr auto PROXY_HTTP_ENVIRONMENT_VARIABLE = "http_proxy";
static constexpr auto PROXY_HTTPS_ENVIRONMENT_VARIABLE = "https_proxy";
EnvironmentProxyConfigurationResolver::EnvironmentProxyConfigurationResolver(Protocol protocol_)
: protocol(protocol_)
{}
namespace
{
const char * getProxyHost(DB::ProxyConfiguration::Protocol protocol)
{
/*
* getenv is safe to use here because ClickHouse code does not make any call to `setenv` or `putenv`
* aside from tests and a very early call during startup: https://github.com/ClickHouse/ClickHouse/blob/master/src/Daemon/BaseDaemon.cpp#L791
* */
if (protocol == DB::ProxyConfiguration::Protocol::HTTP)
{
return std::getenv(PROXY_HTTP_ENVIRONMENT_VARIABLE); // NOLINT(concurrency-mt-unsafe)
}
else if (protocol == DB::ProxyConfiguration::Protocol::HTTPS)
{
return std::getenv(PROXY_HTTPS_ENVIRONMENT_VARIABLE); // NOLINT(concurrency-mt-unsafe)
}
else
{
if (const char * http_proxy_host = std::getenv(PROXY_HTTP_ENVIRONMENT_VARIABLE)) // NOLINT(concurrency-mt-unsafe)
{
return http_proxy_host;
}
else
{
return std::getenv(PROXY_HTTPS_ENVIRONMENT_VARIABLE); // NOLINT(concurrency-mt-unsafe)
}
}
}
}
ProxyConfiguration EnvironmentProxyConfigurationResolver::resolve()
{
const auto * proxy_host = getProxyHost(protocol);
if (!proxy_host)
{
return {};
}
auto uri = Poco::URI(proxy_host);
auto host = uri.getHost();
auto scheme = uri.getScheme();
auto port = uri.getPort();
LOG_TRACE(&Poco::Logger::get("EnvironmentProxyConfigurationResolver"), "Use proxy from environment: {}://{}:{}", scheme, host, port);
return ProxyConfiguration {
host,
ProxyConfiguration::protocolFromString(scheme),
port
};
}
}

View File

@ -0,0 +1,23 @@
#pragma once
#include <Common/ProxyConfigurationResolver.h>
namespace DB
{
/*
* Grabs proxy configuration from environment variables (http_proxy and https_proxy).
* */
class EnvironmentProxyConfigurationResolver : public ProxyConfigurationResolver
{
public:
explicit EnvironmentProxyConfigurationResolver(Protocol protocol_);
ProxyConfiguration resolve() override;
void errorReport(const ProxyConfiguration &) override {}
private:
Protocol protocol;
};
}

View File

@ -584,6 +584,7 @@
M(699, INVALID_REDIS_TABLE_STRUCTURE) \
M(700, USER_SESSION_LIMIT_EXCEEDED) \
M(701, CLUSTER_DOESNT_EXIST) \
M(702, CLIENT_INFO_DOES_NOT_MATCH) \
\
M(999, KEEPER_EXCEPTION) \
M(1000, POCO_EXCEPTION) \

View File

@ -37,25 +37,25 @@ public:
explicit ICachePolicy(CachePolicyUserQuotaPtr user_quotas_) : user_quotas(std::move(user_quotas_)) {}
virtual ~ICachePolicy() = default;
virtual size_t weight(std::lock_guard<std::mutex> & /*cache_lock*/) const = 0;
virtual size_t count(std::lock_guard<std::mutex> & /*cache_lock*/) const = 0;
virtual size_t maxSize(std::lock_guard<std::mutex>& /*cache_lock*/) const = 0;
virtual size_t sizeInBytes() const = 0;
virtual size_t count() const = 0;
virtual size_t maxSizeInBytes() const = 0;
virtual void setMaxCount(size_t /*max_count*/, std::lock_guard<std::mutex> & /* cache_lock */) = 0;
virtual void setMaxSize(size_t /*max_size_in_bytes*/, std::lock_guard<std::mutex> & /* cache_lock */) = 0;
virtual void setQuotaForUser(const String & user_name, size_t max_size_in_bytes, size_t max_entries, std::lock_guard<std::mutex> & /*cache_lock*/) { user_quotas->setQuotaForUser(user_name, max_size_in_bytes, max_entries); }
virtual void setMaxCount(size_t /*max_count*/) = 0;
virtual void setMaxSizeInBytes(size_t /*max_size_in_bytes*/) = 0;
virtual void setQuotaForUser(const String & user_name, size_t max_size_in_bytes, size_t max_entries) { user_quotas->setQuotaForUser(user_name, max_size_in_bytes, max_entries); }
/// HashFunction usually hashes the entire key and the found key will be equal the provided key. In such cases, use get(). It is also
/// possible to store other, non-hashed data in the key. In that case, the found key is potentially different from the provided key.
/// Then use getWithKey() to also return the found key including it's non-hashed data.
virtual MappedPtr get(const Key & key, std::lock_guard<std::mutex> & /* cache_lock */) = 0;
virtual std::optional<KeyMapped> getWithKey(const Key &, std::lock_guard<std::mutex> & /*cache_lock*/) = 0;
virtual MappedPtr get(const Key & key) = 0;
virtual std::optional<KeyMapped> getWithKey(const Key &) = 0;
virtual void set(const Key & key, const MappedPtr & mapped, std::lock_guard<std::mutex> & /*cache_lock*/) = 0;
virtual void set(const Key & key, const MappedPtr & mapped) = 0;
virtual void remove(const Key & key, std::lock_guard<std::mutex> & /*cache_lock*/) = 0;
virtual void remove(const Key & key) = 0;
virtual void clear(std::lock_guard<std::mutex> & /*cache_lock*/) = 0;
virtual void clear() = 0;
virtual std::vector<KeyMapped> dump() const = 0;
protected:

View File

@ -34,41 +34,41 @@ public:
{
}
size_t weight(std::lock_guard<std::mutex> & /* cache_lock */) const override
size_t sizeInBytes() const override
{
return current_size_in_bytes;
}
size_t count(std::lock_guard<std::mutex> & /* cache_lock */) const override
size_t count() const override
{
return cells.size();
}
size_t maxSize(std::lock_guard<std::mutex> & /* cache_lock */) const override
size_t maxSizeInBytes() const override
{
return max_size_in_bytes;
}
void setMaxCount(size_t max_count_, std::lock_guard<std::mutex> & /* cache_lock */) override
void setMaxCount(size_t max_count_) override
{
max_count = max_count_;
removeOverflow();
}
void setMaxSize(size_t max_size_in_bytes_, std::lock_guard<std::mutex> & /* cache_lock */) override
void setMaxSizeInBytes(size_t max_size_in_bytes_) override
{
max_size_in_bytes = max_size_in_bytes_;
removeOverflow();
}
void clear(std::lock_guard<std::mutex> & /* cache_lock */) override
void clear() override
{
queue.clear();
cells.clear();
current_size_in_bytes = 0;
}
void remove(const Key & key, std::lock_guard<std::mutex> & /* cache_lock */) override
void remove(const Key & key) override
{
auto it = cells.find(key);
if (it == cells.end())
@ -79,7 +79,7 @@ public:
cells.erase(it);
}
MappedPtr get(const Key & key, std::lock_guard<std::mutex> & /* cache_lock */) override
MappedPtr get(const Key & key) override
{
auto it = cells.find(key);
if (it == cells.end())
@ -93,7 +93,7 @@ public:
return cell.value;
}
std::optional<KeyMapped> getWithKey(const Key & key, std::lock_guard<std::mutex> & /*cache_lock*/) override
std::optional<KeyMapped> getWithKey(const Key & key) override
{
auto it = cells.find(key);
if (it == cells.end())
@ -107,7 +107,7 @@ public:
return std::make_optional<KeyMapped>({it->first, cell.value});
}
void set(const Key & key, const MappedPtr & mapped, std::lock_guard<std::mutex> & /* cache_lock */) override
void set(const Key & key, const MappedPtr & mapped) override
{
auto [it, inserted] = cells.emplace(std::piecewise_construct,
std::forward_as_tuple(key),

View File

@ -366,6 +366,8 @@ The server successfully detected this situation and will download merged part fr
M(DiskS3PutObject, "Number of DiskS3 API PutObject calls.") \
M(DiskS3GetObject, "Number of DiskS3 API GetObject calls.") \
\
M(EngineFileLikeReadFiles, "Number of files read in table engines working with files (like File/S3/URL/HDFS).") \
\
M(ReadBufferFromS3Microseconds, "Time spent on reading from S3.") \
M(ReadBufferFromS3InitMicroseconds, "Time spent initializing connection to S3.") \
M(ReadBufferFromS3Bytes, "Bytes read from S3.") \
@ -391,8 +393,11 @@ The server successfully detected this situation and will download merged part fr
M(CachedWriteBufferCacheWriteBytes, "Bytes written from source (remote fs, etc) to filesystem cache") \
M(CachedWriteBufferCacheWriteMicroseconds, "Time spent writing data into filesystem cache") \
\
M(FilesystemCacheLoadMetadataMicroseconds, "Time spent loading filesystem cache metadata") \
M(FilesystemCacheEvictedBytes, "Number of bytes evicted from filesystem cache") \
M(FilesystemCacheEvictedFileSegments, "Number of file segments evicted from filesystem cache") \
M(FilesystemCacheEvictionSkippedFileSegments, "Number of file segments skipped for eviction because of being unreleasable") \
M(FilesystemCacheEvictionTries, "Number of filesystem cache eviction attempts") \
M(FilesystemCacheLockKeyMicroseconds, "Lock cache key time") \
M(FilesystemCacheLockMetadataMicroseconds, "Lock filesystem cache metadata time") \
M(FilesystemCacheLockCacheMicroseconds, "Lock filesystem cache time") \
@ -485,8 +490,12 @@ The server successfully detected this situation and will download merged part fr
M(ScalarSubqueriesLocalCacheHit, "Number of times a read from a scalar subquery was done using the local cache") \
M(ScalarSubqueriesCacheMiss, "Number of times a read from a scalar subquery was not cached and had to be calculated completely") \
\
M(SchemaInferenceCacheHits, "Number of times a schema from cache was used for schema inference") \
M(SchemaInferenceCacheMisses, "Number of times a schema is not in cache while schema inference") \
M(SchemaInferenceCacheHits, "Number of times the requested source is found in schema cache") \
M(SchemaInferenceCacheSchemaHits, "Number of times the schema is found in schema cache during schema inference") \
M(SchemaInferenceCacheNumRowsHits, "Number of times the number of rows is found in schema cache during count from files") \
M(SchemaInferenceCacheMisses, "Number of times the requested source is not in schema cache") \
M(SchemaInferenceCacheSchemaMisses, "Number of times the requested source is in cache but the schema is not in cache while schema inference") \
M(SchemaInferenceCacheNumRowsMisses, "Number of times the requested source is in cache but the number of rows is not in cache while count from files") \
M(SchemaInferenceCacheEvictions, "Number of times a schema from cache was evicted due to overflow") \
M(SchemaInferenceCacheInvalidations, "Number of times a schema in cache became invalid due to changes in data") \
\

View File

@ -0,0 +1,51 @@
#pragma once
#include <string>
namespace DB
{
struct ProxyConfiguration
{
enum class Protocol
{
HTTP,
HTTPS,
ANY
};
static auto protocolFromString(const std::string & str)
{
if (str == "http")
{
return Protocol::HTTP;
}
else if (str == "https")
{
return Protocol::HTTPS;
}
else
{
return Protocol::ANY;
}
}
static auto protocolToString(Protocol protocol)
{
switch (protocol)
{
case Protocol::HTTP:
return "http";
case Protocol::HTTPS:
return "https";
case Protocol::ANY:
return "any";
}
}
std::string host;
Protocol protocol;
uint16_t port;
};
}

View File

@ -0,0 +1,17 @@
#pragma once
#include <Common/ProxyConfiguration.h>
namespace DB
{
struct ProxyConfigurationResolver
{
using Protocol = ProxyConfiguration::Protocol;
virtual ~ProxyConfigurationResolver() = default;
virtual ProxyConfiguration resolve() = 0;
virtual void errorReport(const ProxyConfiguration & config) = 0;
};
}

View File

@ -0,0 +1,208 @@
#include <Common/ProxyConfigurationResolverProvider.h>
#include <Common/EnvironmentProxyConfigurationResolver.h>
#include <Common/Exception.h>
#include <Common/ProxyListConfigurationResolver.h>
#include <Common/RemoteProxyConfigurationResolver.h>
#include <Common/StringUtils/StringUtils.h>
#include <Common/logger_useful.h>
#include <Interpreters/Context.h>
namespace DB
{
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
}
namespace
{
std::shared_ptr<ProxyConfigurationResolver> getRemoteResolver(
const String & config_prefix, const Poco::Util::AbstractConfiguration & configuration)
{
auto endpoint = Poco::URI(configuration.getString(config_prefix + ".endpoint"));
auto proxy_scheme = configuration.getString(config_prefix + ".proxy_scheme");
if (proxy_scheme != "http" && proxy_scheme != "https")
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Only HTTP/HTTPS schemas allowed in proxy resolver config: {}", proxy_scheme);
auto proxy_port = configuration.getUInt(config_prefix + ".proxy_port");
auto cache_ttl = configuration.getUInt(config_prefix + ".proxy_cache_time", 10);
LOG_DEBUG(&Poco::Logger::get("ProxyConfigurationResolverProvider"), "Configured remote proxy resolver: {}, Scheme: {}, Port: {}",
endpoint.toString(), proxy_scheme, proxy_port);
return std::make_shared<RemoteProxyConfigurationResolver>(endpoint, proxy_scheme, proxy_port, cache_ttl);
}
std::shared_ptr<ProxyConfigurationResolver> getRemoteResolver(
ProxyConfiguration::Protocol protocol, const String & config_prefix, const Poco::Util::AbstractConfiguration & configuration)
{
std::vector<String> keys;
configuration.keys(config_prefix, keys);
std::vector<Poco::URI> uris;
for (const auto & key : keys)
{
if (startsWith(key, "resolver"))
{
auto prefix_with_key = config_prefix + "." + key;
auto proxy_scheme_config_string = prefix_with_key + ".proxy_scheme";
auto config_protocol = configuration.getString(proxy_scheme_config_string);
if (ProxyConfiguration::Protocol::ANY == protocol || config_protocol == ProxyConfiguration::protocolToString(protocol))
{
return getRemoteResolver(prefix_with_key, configuration);
}
}
}
return nullptr;
}
auto extractURIList(const String & config_prefix, const Poco::Util::AbstractConfiguration & configuration)
{
std::vector<String> keys;
configuration.keys(config_prefix, keys);
std::vector<Poco::URI> uris;
for (const auto & key : keys)
{
if (startsWith(key, "uri"))
{
Poco::URI proxy_uri(configuration.getString(config_prefix + "." + key));
if (proxy_uri.getScheme() != "http" && proxy_uri.getScheme() != "https")
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Only HTTP/HTTPS schemas allowed in proxy uri: {}", proxy_uri.toString());
if (proxy_uri.getHost().empty())
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Empty host in proxy uri: {}", proxy_uri.toString());
uris.push_back(proxy_uri);
LOG_DEBUG(&Poco::Logger::get("ProxyConfigurationResolverProvider"), "Configured proxy: {}", proxy_uri.toString());
}
}
return uris;
}
std::shared_ptr<ProxyConfigurationResolver> getListResolverNewSyntax(
ProxyConfiguration::Protocol protocol,
const String & config_prefix,
const Poco::Util::AbstractConfiguration & configuration
)
{
std::vector<Poco::URI> uris;
bool include_http_uris = ProxyConfiguration::Protocol::ANY == protocol || ProxyConfiguration::Protocol::HTTP == protocol;
if (include_http_uris && configuration.has(config_prefix + ".http"))
{
auto http_uris = extractURIList(config_prefix + ".http", configuration);
uris.insert(uris.end(), http_uris.begin(), http_uris.end());
}
bool include_https_uris = ProxyConfiguration::Protocol::ANY == protocol || ProxyConfiguration::Protocol::HTTPS == protocol;
if (include_https_uris && configuration.has(config_prefix + ".https"))
{
auto https_uris = extractURIList(config_prefix + ".https", configuration);
uris.insert(uris.end(), https_uris.begin(), https_uris.end());
}
return uris.empty() ? nullptr : std::make_shared<ProxyListConfigurationResolver>(uris);
}
std::shared_ptr<ProxyConfigurationResolver> getListResolverOldSyntax(
const String & config_prefix,
const Poco::Util::AbstractConfiguration & configuration
)
{
auto uris = extractURIList(config_prefix, configuration);
return uris.empty() ? nullptr : std::make_shared<ProxyListConfigurationResolver>(uris);
}
std::shared_ptr<ProxyConfigurationResolver> getListResolver(
ProxyConfiguration::Protocol protocol, const String & config_prefix, const Poco::Util::AbstractConfiguration & configuration
)
{
std::vector<String> keys;
configuration.keys(config_prefix, keys);
bool new_setting_syntax = std::find_if(
keys.begin(),
keys.end(),
[](const String & key)
{
return startsWith(key, "http") || startsWith(key, "https");
}) != keys.end();
return new_setting_syntax ? getListResolverNewSyntax(protocol, config_prefix, configuration)
: getListResolverOldSyntax(config_prefix, configuration);
}
}
std::shared_ptr<ProxyConfigurationResolver> ProxyConfigurationResolverProvider::get(Protocol protocol)
{
auto context = Context::getGlobalContextInstance();
chassert(context);
if (auto resolver = getFromSettings(protocol, "", context->getConfigRef()))
{
return resolver;
}
return std::make_shared<EnvironmentProxyConfigurationResolver>(protocol);
}
std::shared_ptr<ProxyConfigurationResolver> ProxyConfigurationResolverProvider::getFromSettings(
Protocol protocol,
const String & config_prefix,
const Poco::Util::AbstractConfiguration & configuration
)
{
auto proxy_prefix = config_prefix.empty() ? "proxy" : config_prefix + ".proxy";
if (configuration.has(proxy_prefix))
{
std::vector<String> config_keys;
configuration.keys(proxy_prefix, config_keys);
if (auto remote_resolver = getRemoteResolver(protocol, proxy_prefix, configuration))
{
return remote_resolver;
}
if (auto list_resolver = getListResolver(protocol, proxy_prefix, configuration))
{
return list_resolver;
}
}
return nullptr;
}
std::shared_ptr<ProxyConfigurationResolver> ProxyConfigurationResolverProvider::getFromOldSettingsFormat(
const String & config_prefix,
const Poco::Util::AbstractConfiguration & configuration
)
{
/*
* First try to get it from settings only using the combination of config_prefix and configuration.
* This logic exists for backward compatibility with old S3 storage specific proxy configuration.
* */
if (auto resolver = ProxyConfigurationResolverProvider::getFromSettings(Protocol::ANY, config_prefix, configuration))
{
return resolver;
}
/*
* In case the combination of config_prefix and configuration does not provide a resolver, try to get it from general / new settings.
* Falls back to Environment resolver if no configuration is found.
* */
return ProxyConfigurationResolverProvider::get(Protocol::ANY);
}
}

View File

@ -0,0 +1,40 @@
#pragma once
#include <base/types.h>
#include <Poco/Util/AbstractConfiguration.h>
#include <Common/ProxyConfigurationResolver.h>
namespace DB
{
class ProxyConfigurationResolverProvider
{
public:
using Protocol = ProxyConfiguration::Protocol;
/*
* Returns appropriate ProxyConfigurationResolver based on current CH settings (Remote resolver or List resolver).
* If no configuration is found, returns Environment Resolver.
* */
static std::shared_ptr<ProxyConfigurationResolver> get(Protocol protocol);
/*
* This API exists exclusively for backward compatibility with old S3 storage specific proxy configuration.
* If no configuration is found, returns nullptr.
* */
static std::shared_ptr<ProxyConfigurationResolver> getFromOldSettingsFormat(
const String & config_prefix,
const Poco::Util::AbstractConfiguration & configuration
);
private:
static std::shared_ptr<ProxyConfigurationResolver> getFromSettings(
Protocol protocol,
const String & config_prefix,
const Poco::Util::AbstractConfiguration & configuration
);
};
}

View File

@ -0,0 +1,31 @@
#include <Common/ProxyListConfigurationResolver.h>
#include <Common/StringUtils/StringUtils.h>
#include <Common/logger_useful.h>
#include <Poco/URI.h>
namespace DB
{
ProxyListConfigurationResolver::ProxyListConfigurationResolver(std::vector<Poco::URI> proxies_)
: proxies(std::move(proxies_))
{
}
ProxyConfiguration ProxyListConfigurationResolver::resolve()
{
if (proxies.empty())
{
return {};
}
/// Avoid atomic increment if number of proxies is 1.
size_t index = proxies.size() > 1 ? (access_counter++) % proxies.size() : 0;
auto & proxy = proxies[index];
LOG_DEBUG(&Poco::Logger::get("ProxyListConfigurationResolver"), "Use proxy: {}", proxies[index].toString());
return ProxyConfiguration {proxy.getHost(), ProxyConfiguration::protocolFromString(proxy.getScheme()), proxy.getPort()};
}
}

View File

@ -0,0 +1,31 @@
#pragma once
#include <base/types.h>
#include <Common/ProxyConfigurationResolver.h>
#include <Poco/URI.h>
namespace DB
{
/*
* Round-robin proxy list resolver.
* */
class ProxyListConfigurationResolver : public ProxyConfigurationResolver
{
public:
explicit ProxyListConfigurationResolver(std::vector<Poco::URI> proxies_);
ProxyConfiguration resolve() override;
void errorReport(const ProxyConfiguration &) override {}
private:
std::vector<Poco::URI> proxies;
/// Access counter to get proxy using round-robin strategy.
std::atomic<size_t> access_counter;
};
}

View File

@ -1,32 +1,36 @@
#include "ProxyResolverConfiguration.h"
#if USE_AWS_S3
#include <Common/RemoteProxyConfigurationResolver.h>
#include <utility>
#include <IO/HTTPCommon.h>
#include "Poco/StreamCopier.h"
#include <Poco/StreamCopier.h>
#include <Poco/Net/HTTPRequest.h>
#include <Poco/Net/HTTPResponse.h>
#include <Common/logger_useful.h>
#include <Common/DNSResolver.h>
namespace DB::ErrorCodes
namespace DB
{
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
}
namespace DB::S3
{
ProxyResolverConfiguration::ProxyResolverConfiguration(const Poco::URI & endpoint_, String proxy_scheme_
, unsigned proxy_port_, unsigned cache_ttl_)
: endpoint(endpoint_), proxy_scheme(std::move(proxy_scheme_)), proxy_port(proxy_port_), cache_ttl(cache_ttl_)
RemoteProxyConfigurationResolver::RemoteProxyConfigurationResolver(
const Poco::URI & endpoint_,
String proxy_protocol_,
unsigned proxy_port_,
unsigned cache_ttl_
)
: endpoint(endpoint_), proxy_protocol(std::move(proxy_protocol_)), proxy_port(proxy_port_), cache_ttl(cache_ttl_)
{
}
ClientConfigurationPerRequest ProxyResolverConfiguration::getConfiguration(const Aws::Http::HttpRequest &)
ProxyConfiguration RemoteProxyConfigurationResolver::resolve()
{
LOG_DEBUG(&Poco::Logger::get("AWSClient"), "Obtain proxy using resolver: {}", endpoint.toString());
auto * logger = &Poco::Logger::get("RemoteProxyConfigurationResolver");
LOG_DEBUG(logger, "Obtain proxy using resolver: {}", endpoint.toString());
std::lock_guard lock(cache_mutex);
@ -34,7 +38,12 @@ ClientConfigurationPerRequest ProxyResolverConfiguration::getConfiguration(const
if (cache_ttl.count() && cache_valid && now <= cache_timestamp + cache_ttl && now >= cache_timestamp)
{
LOG_DEBUG(&Poco::Logger::get("AWSClient"), "Use cached proxy: {}://{}:{}", Aws::Http::SchemeMapper::ToString(cached_config.proxy_scheme), cached_config.proxy_host, cached_config.proxy_port);
LOG_DEBUG(logger,
"Use cached proxy: {}://{}:{}",
cached_config.protocol,
cached_config.host,
cached_config.port
);
return cached_config;
}
@ -84,11 +93,11 @@ ClientConfigurationPerRequest ProxyResolverConfiguration::getConfiguration(const
/// Read proxy host as string from response body.
Poco::StreamCopier::copyToString(response_body_stream, proxy_host);
LOG_DEBUG(&Poco::Logger::get("AWSClient"), "Use proxy: {}://{}:{}", proxy_scheme, proxy_host, proxy_port);
LOG_DEBUG(logger, "Use proxy: {}://{}:{}", proxy_protocol, proxy_host, proxy_port);
cached_config.proxy_scheme = Aws::Http::SchemeMapper::FromString(proxy_scheme.c_str());
cached_config.proxy_host = proxy_host;
cached_config.proxy_port = proxy_port;
cached_config.protocol = ProxyConfiguration::protocolFromString(proxy_protocol);
cached_config.host = proxy_host;
cached_config.port = proxy_port;
cache_timestamp = std::chrono::system_clock::now();
cache_valid = true;
@ -96,16 +105,14 @@ ClientConfigurationPerRequest ProxyResolverConfiguration::getConfiguration(const
}
catch (...)
{
tryLogCurrentException("AWSClient", "Failed to obtain proxy");
/// Don't use proxy if it can't be obtained.
ClientConfigurationPerRequest cfg;
return cfg;
tryLogCurrentException("RemoteProxyConfigurationResolver", "Failed to obtain proxy");
return {};
}
}
void ProxyResolverConfiguration::errorReport(const ClientConfigurationPerRequest & config)
void RemoteProxyConfigurationResolver::errorReport(const ProxyConfiguration & config)
{
if (config.proxy_host.empty())
if (config.host.empty())
return;
std::lock_guard lock(cache_mutex);
@ -113,8 +120,8 @@ void ProxyResolverConfiguration::errorReport(const ClientConfigurationPerRequest
if (!cache_ttl.count() || !cache_valid)
return;
if (std::tie(cached_config.proxy_scheme, cached_config.proxy_host, cached_config.proxy_port)
!= std::tie(config.proxy_scheme, config.proxy_host, config.proxy_port))
if (std::tie(cached_config.protocol, cached_config.host, cached_config.port)
!= std::tie(config.protocol, config.host, config.port))
return;
/// Invalidate cached proxy when got error with this proxy
@ -122,5 +129,3 @@ void ProxyResolverConfiguration::errorReport(const ClientConfigurationPerRequest
}
}
#endif

View File

@ -0,0 +1,46 @@
#pragma once
#include <base/types.h>
#include <mutex>
#include <Common/ProxyConfigurationResolver.h>
#include <Poco/URI.h>
namespace DB
{
/*
* Makes an HTTP GET request to the specified endpoint to obtain a proxy host.
* */
class RemoteProxyConfigurationResolver : public ProxyConfigurationResolver
{
public:
RemoteProxyConfigurationResolver(
const Poco::URI & endpoint_,
String proxy_protocol_,
unsigned proxy_port_,
unsigned cache_ttl_
);
ProxyConfiguration resolve() override;
void errorReport(const ProxyConfiguration & config) override;
private:
/// Endpoint to obtain a proxy host.
const Poco::URI endpoint;
/// Scheme for obtained proxy.
const String proxy_protocol;
/// Port for obtained proxy.
const unsigned proxy_port;
std::mutex cache_mutex;
bool cache_valid = false;
std::chrono::time_point<std::chrono::system_clock> cache_timestamp;
const std::chrono::seconds cache_ttl{0};
ProxyConfiguration cached_config;
};
}

View File

@ -31,45 +31,45 @@ public:
/// TODO: construct from special struct with cache policy parameters (also with max_protected_size).
SLRUCachePolicy(size_t max_size_in_bytes_, size_t max_count_, double size_ratio_, OnWeightLossFunction on_weight_loss_function_)
: Base(std::make_unique<NoCachePolicyUserQuota>())
, size_ratio(size_ratio_)
, max_protected_size(static_cast<size_t>(max_size_in_bytes_ * std::min(1.0, size_ratio)))
, max_size_in_bytes(max_size_in_bytes_)
, max_protected_size(calculateMaxProtectedSize(max_size_in_bytes_, size_ratio_))
, max_count(max_count_)
, size_ratio(size_ratio_)
, on_weight_loss_function(on_weight_loss_function_)
{
}
size_t weight(std::lock_guard<std::mutex> & /* cache_lock */) const override
size_t sizeInBytes() const override
{
return current_size_in_bytes;
}
size_t count(std::lock_guard<std::mutex> & /* cache_lock */) const override
size_t count() const override
{
return cells.size();
}
size_t maxSize(std::lock_guard<std::mutex> & /* cache_lock */) const override
size_t maxSizeInBytes() const override
{
return max_size_in_bytes;
}
void setMaxCount(size_t max_count_, std::lock_guard<std::mutex> & /* cache_lock */) override
void setMaxCount(size_t max_count_) override
{
max_count = max_count_;
removeOverflow(protected_queue, max_protected_size, current_protected_size, /*is_protected=*/true);
removeOverflow(probationary_queue, max_size_in_bytes, current_size_in_bytes, /*is_protected=*/false);
}
void setMaxSize(size_t max_size_in_bytes_, std::lock_guard<std::mutex> & /* cache_lock */) override
void setMaxSizeInBytes(size_t max_size_in_bytes_) override
{
max_protected_size = static_cast<size_t>(max_size_in_bytes_ * std::min(1.0, size_ratio));
max_protected_size = calculateMaxProtectedSize(max_size_in_bytes_, size_ratio);
max_size_in_bytes = max_size_in_bytes_;
removeOverflow(protected_queue, max_protected_size, current_protected_size, /*is_protected=*/true);
removeOverflow(probationary_queue, max_size_in_bytes, current_size_in_bytes, /*is_protected=*/false);
}
void clear(std::lock_guard<std::mutex> & /* cache_lock */) override
void clear() override
{
cells.clear();
probationary_queue.clear();
@ -78,7 +78,7 @@ public:
current_protected_size = 0;
}
void remove(const Key & key, std::lock_guard<std::mutex> & /* cache_lock */) override
void remove(const Key & key) override
{
auto it = cells.find(key);
if (it == cells.end())
@ -95,7 +95,7 @@ public:
cells.erase(it);
}
MappedPtr get(const Key & key, std::lock_guard<std::mutex> & /* cache_lock */) override
MappedPtr get(const Key & key) override
{
auto it = cells.find(key);
if (it == cells.end())
@ -116,7 +116,7 @@ public:
return cell.value;
}
std::optional<KeyMapped> getWithKey(const Key & key, std::lock_guard<std::mutex> & /*cache_lock*/) override
std::optional<KeyMapped> getWithKey(const Key & key) override
{
auto it = cells.find(key);
if (it == cells.end())
@ -137,7 +137,7 @@ public:
return std::make_optional<KeyMapped>({it->first, cell.value});
}
void set(const Key & key, const MappedPtr & mapped, std::lock_guard<std::mutex> & /* cache_lock */) override
void set(const Key & key, const MappedPtr & mapped) override
{
auto [it, inserted] = cells.emplace(std::piecewise_construct,
std::forward_as_tuple(key),
@ -208,16 +208,21 @@ private:
Cells cells;
size_t max_size_in_bytes;
size_t max_protected_size;
size_t max_count;
const double size_ratio;
size_t current_protected_size = 0;
size_t current_size_in_bytes = 0;
size_t max_protected_size;
size_t max_size_in_bytes;
size_t max_count;
WeightFunction weight_function;
OnWeightLossFunction on_weight_loss_function;
static size_t calculateMaxProtectedSize(size_t max_size_in_bytes, double size_ratio)
{
return static_cast<size_t>(max_size_in_bytes * std::max(0.0, std::min(1.0, size_ratio)));
}
void removeOverflow(SLRUQueue & queue, size_t max_weight_size, size_t & current_weight_size, bool is_protected)
{
size_t current_weight_lost = 0;

View File

@ -94,39 +94,39 @@ public:
{
}
size_t weight(std::lock_guard<std::mutex> & /* cache_lock */) const override
size_t sizeInBytes() const override
{
return size_in_bytes;
}
size_t count(std::lock_guard<std::mutex> & /* cache_lock */) const override
size_t count() const override
{
return cache.size();
}
size_t maxSize(std::lock_guard<std::mutex> & /* cache_lock */) const override
size_t maxSizeInBytes() const override
{
return max_size_in_bytes;
}
void setMaxCount(size_t max_count_, std::lock_guard<std::mutex> & /* cache_lock */) override
void setMaxCount(size_t max_count_) override
{
/// lazy behavior: the cache only shrinks upon the next insert
max_count = max_count_;
}
void setMaxSize(size_t max_size_in_bytes_, std::lock_guard<std::mutex> & /* cache_lock */) override
void setMaxSizeInBytes(size_t max_size_in_bytes_) override
{
/// lazy behavior: the cache only shrinks upon the next insert
max_size_in_bytes = max_size_in_bytes_;
}
void clear(std::lock_guard<std::mutex> & /* cache_lock */) override
void clear() override
{
cache.clear();
}
void remove(const Key & key, std::lock_guard<std::mutex> & /* cache_lock */) override
void remove(const Key & key) override
{
auto it = cache.find(key);
if (it == cache.end())
@ -137,7 +137,7 @@ public:
size_in_bytes -= sz;
}
MappedPtr get(const Key & key, std::lock_guard<std::mutex> & /* cache_lock */) override
MappedPtr get(const Key & key) override
{
auto it = cache.find(key);
if (it == cache.end())
@ -145,7 +145,7 @@ public:
return it->second;
}
std::optional<KeyMapped> getWithKey(const Key & key, std::lock_guard<std::mutex> & /* cache_lock */) override
std::optional<KeyMapped> getWithKey(const Key & key) override
{
auto it = cache.find(key);
if (it == cache.end())
@ -154,7 +154,7 @@ public:
}
/// Evicts on a best-effort basis. If there are too many non-stale entries, the new entry may not be cached at all!
void set(const Key & key, const MappedPtr & mapped, std::lock_guard<std::mutex> & /* cache_lock */) override
void set(const Key & key, const MappedPtr & mapped) override
{
chassert(mapped.get());

View File

@ -73,3 +73,25 @@ inline std::string xmlNodeAsString(Poco::XML::Node *pNode)
result += ("</"+ node_name + ">\n");
return Poco::XML::fromXMLString(result);
}
struct EnvironmentProxySetter
{
EnvironmentProxySetter(const Poco::URI & http_proxy, const Poco::URI & https_proxy)
{
if (!http_proxy.empty())
{
setenv("http_proxy", http_proxy.toString().c_str(), 1); // NOLINT(concurrency-mt-unsafe)
}
if (!https_proxy.empty())
{
setenv("https_proxy", https_proxy.toString().c_str(), 1); // NOLINT(concurrency-mt-unsafe)
}
}
~EnvironmentProxySetter()
{
unsetenv("http_proxy"); // NOLINT(concurrency-mt-unsafe)
unsetenv("https_proxy"); // NOLINT(concurrency-mt-unsafe)
}
};

View File

@ -5,11 +5,11 @@
TEST(LRUCache, set)
{
using SimpleCacheBase = DB::CacheBase<int, int>;
auto lru_cache = SimpleCacheBase("LRU", /*max_size_in_bytes*/ 10, /*max_count*/ 10);
auto lru_cache = SimpleCacheBase("LRU", /*max_size_in_bytes*/ 10, /*max_count*/ 10, /*size_ratio*/ 0.5);
lru_cache.set(1, std::make_shared<int>(2));
lru_cache.set(2, std::make_shared<int>(3));
auto w = lru_cache.weight();
auto w = lru_cache.sizeInBytes();
auto n = lru_cache.count();
ASSERT_EQ(w, 2);
ASSERT_EQ(n, 2);
@ -18,7 +18,7 @@ TEST(LRUCache, set)
TEST(LRUCache, update)
{
using SimpleCacheBase = DB::CacheBase<int, int>;
auto lru_cache = SimpleCacheBase("LRU", /*max_size_in_bytes*/ 10, /*max_count*/ 10);
auto lru_cache = SimpleCacheBase("LRU", /*max_size_in_bytes*/ 10, /*max_count*/ 10, /*size_ratio*/ 0.5);
lru_cache.set(1, std::make_shared<int>(2));
lru_cache.set(1, std::make_shared<int>(3));
auto val = lru_cache.get(1);
@ -29,7 +29,7 @@ TEST(LRUCache, update)
TEST(LRUCache, get)
{
using SimpleCacheBase = DB::CacheBase<int, int>;
auto lru_cache = SimpleCacheBase("LRU", /*max_size_in_bytes*/ 10, /*max_count*/ 10);
auto lru_cache = SimpleCacheBase("LRU", /*max_size_in_bytes*/ 10, /*max_count*/ 10, /*size_ratio*/ 0.5);
lru_cache.set(1, std::make_shared<int>(2));
lru_cache.set(2, std::make_shared<int>(3));
SimpleCacheBase::MappedPtr value = lru_cache.get(1);
@ -49,7 +49,7 @@ struct ValueWeight
TEST(LRUCache, evictOnSize)
{
using SimpleCacheBase = DB::CacheBase<int, size_t>;
auto lru_cache = SimpleCacheBase("LRU", /*max_size_in_bytes*/ 20, /*max_count*/ 3);
auto lru_cache = SimpleCacheBase("LRU", /*max_size_in_bytes*/ 20, /*max_count*/ 3, /*size_ratio*/ 0.5);
lru_cache.set(1, std::make_shared<size_t>(2));
lru_cache.set(2, std::make_shared<size_t>(3));
lru_cache.set(3, std::make_shared<size_t>(4));
@ -65,7 +65,7 @@ TEST(LRUCache, evictOnSize)
TEST(LRUCache, evictOnWeight)
{
using SimpleCacheBase = DB::CacheBase<int, size_t, std::hash<int>, ValueWeight>;
auto lru_cache = SimpleCacheBase("LRU", /*max_size_in_bytes*/ 10, /*max_count*/ 10);
auto lru_cache = SimpleCacheBase("LRU", /*max_size_in_bytes*/ 10, /*max_count*/ 10, /*size_ratio*/ 0.5);
lru_cache.set(1, std::make_shared<size_t>(2));
lru_cache.set(2, std::make_shared<size_t>(3));
lru_cache.set(3, std::make_shared<size_t>(4));
@ -74,7 +74,7 @@ TEST(LRUCache, evictOnWeight)
auto n = lru_cache.count();
ASSERT_EQ(n, 2);
auto w = lru_cache.weight();
auto w = lru_cache.sizeInBytes();
ASSERT_EQ(w, 9);
auto value = lru_cache.get(1);
@ -86,7 +86,7 @@ TEST(LRUCache, evictOnWeight)
TEST(LRUCache, getOrSet)
{
using SimpleCacheBase = DB::CacheBase<int, size_t, std::hash<int>, ValueWeight>;
auto lru_cache = SimpleCacheBase("LRU", /*max_size_in_bytes*/ 10, /*max_count*/ 10);
auto lru_cache = SimpleCacheBase("LRU", /*max_size_in_bytes*/ 10, /*max_count*/ 10, /*size_ratio*/ 0.5);
size_t x = 10;
auto load_func = [&] { return std::make_shared<size_t>(x); };
auto [value, loaded] = lru_cache.getOrSet(1, load_func);

View File

@ -0,0 +1,122 @@
#include <gtest/gtest.h>
#include <Common/ProxyConfigurationResolverProvider.h>
#include <Common/tests/gtest_global_context.h>
#include <Common/tests/gtest_helper_functions.h>
using ConfigurationPtr = Poco::AutoPtr<Poco::Util::AbstractConfiguration>;
class ProxyConfigurationResolverProviderTests : public ::testing::Test
{
protected:
static void SetUpTestSuite() {
context = getContext().context;
}
static void TearDownTestSuite() {
context->setConfig(Poco::AutoPtr(new Poco::Util::MapConfiguration()));
}
static DB::ContextMutablePtr context;
};
DB::ContextMutablePtr ProxyConfigurationResolverProviderTests::context;
Poco::URI http_env_proxy_server = Poco::URI("http://http_environment_proxy:3128");
Poco::URI https_env_proxy_server = Poco::URI("http://https_environment_proxy:3128");
Poco::URI http_list_proxy_server = Poco::URI("http://http_list_proxy:3128");
Poco::URI https_list_proxy_server = Poco::URI("http://https_list_proxy:3128");
TEST_F(ProxyConfigurationResolverProviderTests, EnvironmentResolverShouldBeUsedIfNoSettings)
{
EnvironmentProxySetter setter(http_env_proxy_server, https_env_proxy_server);
auto http_configuration = DB::ProxyConfigurationResolverProvider::get(DB::ProxyConfiguration::Protocol::HTTP)->resolve();
auto https_configuration = DB::ProxyConfigurationResolverProvider::get(DB::ProxyConfiguration::Protocol::HTTPS)->resolve();
ASSERT_EQ(http_configuration.host, http_env_proxy_server.getHost());
ASSERT_EQ(http_configuration.port, http_env_proxy_server.getPort());
ASSERT_EQ(http_configuration.protocol, DB::ProxyConfiguration::protocolFromString(http_env_proxy_server.getScheme()));
ASSERT_EQ(https_configuration.host, https_env_proxy_server.getHost());
ASSERT_EQ(https_configuration.port, https_env_proxy_server.getPort());
ASSERT_EQ(https_configuration.protocol, DB::ProxyConfiguration::protocolFromString(https_env_proxy_server.getScheme()));
}
TEST_F(ProxyConfigurationResolverProviderTests, ListHTTPOnly)
{
ConfigurationPtr config = Poco::AutoPtr(new Poco::Util::MapConfiguration());
config->setString("proxy", "");
config->setString("proxy.http", "");
config->setString("proxy.http.uri", http_list_proxy_server.toString());
context->setConfig(config);
auto http_proxy_configuration = DB::ProxyConfigurationResolverProvider::get(DB::ProxyConfiguration::Protocol::HTTP)->resolve();
ASSERT_EQ(http_proxy_configuration.host, http_list_proxy_server.getHost());
ASSERT_EQ(http_proxy_configuration.port, http_list_proxy_server.getPort());
ASSERT_EQ(http_proxy_configuration.protocol, DB::ProxyConfiguration::protocolFromString(http_list_proxy_server.getScheme()));
auto https_proxy_configuration = DB::ProxyConfigurationResolverProvider::get(DB::ProxyConfiguration::Protocol::HTTPS)->resolve();
// No https configuration since it's not set
ASSERT_EQ(https_proxy_configuration.host, "");
ASSERT_EQ(https_proxy_configuration.port, 0);
}
TEST_F(ProxyConfigurationResolverProviderTests, ListHTTPSOnly)
{
ConfigurationPtr config = Poco::AutoPtr(new Poco::Util::MapConfiguration());
config->setString("proxy", "");
config->setString("proxy.https", "");
config->setString("proxy.https.uri", https_list_proxy_server.toString());
context->setConfig(config);
auto http_proxy_configuration = DB::ProxyConfigurationResolverProvider::get(DB::ProxyConfiguration::Protocol::HTTP)->resolve();
ASSERT_EQ(http_proxy_configuration.host, "");
ASSERT_EQ(http_proxy_configuration.port, 0);
auto https_proxy_configuration = DB::ProxyConfigurationResolverProvider::get(DB::ProxyConfiguration::Protocol::HTTPS)->resolve();
ASSERT_EQ(https_proxy_configuration.host, https_list_proxy_server.getHost());
// still HTTP because the proxy host is not HTTPS
ASSERT_EQ(https_proxy_configuration.protocol, DB::ProxyConfiguration::protocolFromString(https_list_proxy_server.getScheme()));
ASSERT_EQ(https_proxy_configuration.port, https_list_proxy_server.getPort());
}
TEST_F(ProxyConfigurationResolverProviderTests, ListBoth)
{
ConfigurationPtr config = Poco::AutoPtr(new Poco::Util::MapConfiguration());
config->setString("proxy", "");
config->setString("proxy.http", "");
config->setString("proxy.http.uri", http_list_proxy_server.toString());
config->setString("proxy", "");
config->setString("proxy.https", "");
config->setString("proxy.https.uri", https_list_proxy_server.toString());
context->setConfig(config);
auto http_proxy_configuration = DB::ProxyConfigurationResolverProvider::get(DB::ProxyConfiguration::Protocol::HTTP)->resolve();
ASSERT_EQ(http_proxy_configuration.host, http_list_proxy_server.getHost());
ASSERT_EQ(http_proxy_configuration.protocol, DB::ProxyConfiguration::protocolFromString(http_list_proxy_server.getScheme()));
ASSERT_EQ(http_proxy_configuration.port, http_list_proxy_server.getPort());
auto https_proxy_configuration = DB::ProxyConfigurationResolverProvider::get(DB::ProxyConfiguration::Protocol::HTTPS)->resolve();
ASSERT_EQ(https_proxy_configuration.host, https_list_proxy_server.getHost());
// still HTTP because the proxy host is not HTTPS
ASSERT_EQ(https_proxy_configuration.protocol, DB::ProxyConfiguration::protocolFromString(https_list_proxy_server.getScheme()));
ASSERT_EQ(https_proxy_configuration.port, https_list_proxy_server.getPort());
}
// remote resolver is tricky to be tested in unit tests

View File

@ -0,0 +1,96 @@
#include <gtest/gtest.h>
#include <Common/EnvironmentProxyConfigurationResolver.h>
#include <Common/tests/gtest_helper_functions.h>
#include <Poco/URI.h>
namespace
{
auto http_proxy_server = Poco::URI("http://proxy_server:3128");
auto https_proxy_server = Poco::URI("https://proxy_server:3128");
}
TEST(EnvironmentProxyConfigurationResolver, TestHTTP)
{
EnvironmentProxySetter setter(http_proxy_server, {});
DB::EnvironmentProxyConfigurationResolver resolver(DB::ProxyConfiguration::Protocol::HTTP);
auto configuration = resolver.resolve();
ASSERT_EQ(configuration.host, http_proxy_server.getHost());
ASSERT_EQ(configuration.port, http_proxy_server.getPort());
ASSERT_EQ(configuration.protocol, DB::ProxyConfiguration::protocolFromString(http_proxy_server.getScheme()));
}
TEST(EnvironmentProxyConfigurationResolver, TestHTTPNoEnv)
{
DB::EnvironmentProxyConfigurationResolver resolver(DB::ProxyConfiguration::Protocol::HTTP);
auto configuration = resolver.resolve();
ASSERT_EQ(configuration.host, "");
ASSERT_EQ(configuration.protocol, DB::ProxyConfiguration::Protocol::HTTP);
ASSERT_EQ(configuration.port, 0u);
}
TEST(EnvironmentProxyConfigurationResolver, TestHTTPs)
{
EnvironmentProxySetter setter({}, https_proxy_server);
DB::EnvironmentProxyConfigurationResolver resolver(DB::ProxyConfiguration::Protocol::HTTPS);
auto configuration = resolver.resolve();
ASSERT_EQ(configuration.host, https_proxy_server.getHost());
ASSERT_EQ(configuration.port, https_proxy_server.getPort());
ASSERT_EQ(configuration.protocol, DB::ProxyConfiguration::protocolFromString(https_proxy_server.getScheme()));
}
TEST(EnvironmentProxyConfigurationResolver, TestHTTPsNoEnv)
{
DB::EnvironmentProxyConfigurationResolver resolver(DB::ProxyConfiguration::Protocol::HTTPS);
auto configuration = resolver.resolve();
ASSERT_EQ(configuration.host, "");
ASSERT_EQ(configuration.protocol, DB::ProxyConfiguration::Protocol::HTTP);
ASSERT_EQ(configuration.port, 0u);
}
TEST(EnvironmentProxyConfigurationResolver, TestANYHTTP)
{
EnvironmentProxySetter setter(http_proxy_server, {});
DB::EnvironmentProxyConfigurationResolver resolver(DB::ProxyConfiguration::Protocol::ANY);
auto configuration = resolver.resolve();
ASSERT_EQ(configuration.host, http_proxy_server.getHost());
ASSERT_EQ(configuration.port, http_proxy_server.getPort());
ASSERT_EQ(configuration.protocol, DB::ProxyConfiguration::protocolFromString(http_proxy_server.getScheme()));
}
TEST(EnvironmentProxyConfigurationResolver, TestANYHTTPS)
{
EnvironmentProxySetter setter({}, https_proxy_server);
DB::EnvironmentProxyConfigurationResolver resolver(DB::ProxyConfiguration::Protocol::ANY);
auto configuration = resolver.resolve();
ASSERT_EQ(configuration.host, https_proxy_server.getHost());
ASSERT_EQ(configuration.port, https_proxy_server.getPort());
ASSERT_EQ(configuration.protocol, DB::ProxyConfiguration::protocolFromString(https_proxy_server.getScheme()));
}
TEST(EnvironmentProxyConfigurationResolver, TestANYNoEnv)
{
DB::EnvironmentProxyConfigurationResolver resolver(DB::ProxyConfiguration::Protocol::ANY);
auto configuration = resolver.resolve();
ASSERT_EQ(configuration.host, "");
ASSERT_EQ(configuration.protocol, DB::ProxyConfiguration::Protocol::HTTP);
ASSERT_EQ(configuration.port, 0u);
}

View File

@ -0,0 +1,26 @@
#include <gtest/gtest.h>
#include <Common/ProxyListConfigurationResolver.h>
#include <Poco/URI.h>
namespace
{
auto proxy_server1 = Poco::URI("http://proxy_server1:3128");
auto proxy_server2 = Poco::URI("http://proxy_server2:3128");
}
TEST(ProxyListConfigurationResolver, SimpleTest)
{
DB::ProxyListConfigurationResolver resolver({proxy_server1, proxy_server2});
auto configuration1 = resolver.resolve();
auto configuration2 = resolver.resolve();
ASSERT_EQ(configuration1.host, proxy_server1.getHost());
ASSERT_EQ(configuration1.port, proxy_server1.getPort());
ASSERT_EQ(configuration1.protocol, DB::ProxyConfiguration::protocolFromString(proxy_server1.getScheme()));
ASSERT_EQ(configuration2.host, proxy_server2.getHost());
ASSERT_EQ(configuration2.port, proxy_server2.getPort());
ASSERT_EQ(configuration2.protocol, DB::ProxyConfiguration::protocolFromString(proxy_server2.getScheme()));
}

View File

@ -9,7 +9,7 @@ TEST(SLRUCache, set)
slru_cache.set(1, std::make_shared<int>(2));
slru_cache.set(2, std::make_shared<int>(3));
auto w = slru_cache.weight();
auto w = slru_cache.sizeInBytes();
auto n = slru_cache.count();
ASSERT_EQ(w, 2);
ASSERT_EQ(n, 2);
@ -125,7 +125,7 @@ TEST(SLRUCache, evictOnElements)
auto n = slru_cache.count();
ASSERT_EQ(n, 1);
auto w = slru_cache.weight();
auto w = slru_cache.sizeInBytes();
ASSERT_EQ(w, 3);
auto value = slru_cache.get(1);
@ -148,7 +148,7 @@ TEST(SLRUCache, evictOnWeight)
auto n = slru_cache.count();
ASSERT_EQ(n, 2);
auto w = slru_cache.weight();
auto w = slru_cache.sizeInBytes();
ASSERT_EQ(w, 9);
auto value = slru_cache.get(1);

View File

@ -23,7 +23,7 @@ int main(int argc, char ** argv)
try
{
UncompressedCache cache(1024);
UncompressedCache cache("SLRU", 1024, 0.5);
std::string path = argv[1];
std::cerr << std::fixed << std::setprecision(3);

View File

@ -92,7 +92,8 @@ void KeeperSnapshotManagerS3::updateS3Configuration(const Poco::Util::AbstractCo
auth_settings.region,
RemoteHostFilter(), s3_max_redirects,
enable_s3_requests_logging,
/* for_disk_s3 = */ false, /* get_request_throttler = */ {}, /* put_request_throttler = */ {});
/* for_disk_s3 = */ false, /* get_request_throttler = */ {}, /* put_request_throttler = */ {},
new_uri.uri.getScheme());
client_configuration.endpointOverride = new_uri.endpoint;

View File

@ -66,12 +66,18 @@
#define DBMS_HIERARCHICAL_DICTIONARY_MAX_DEPTH 1000
/// Default maximum (total and entry) sizes and policies of various caches
static constexpr auto DEFAULT_UNCOMPRESSED_CACHE_MAX_SIZE = 0_MiB;
static constexpr auto DEFAULT_UNCOMPRESSED_CACHE_POLICY = "SLRU";
static constexpr auto DEFAULT_MARK_CACHE_MAX_SIZE = 5368_MiB;
static constexpr auto DEFAULT_UNCOMPRESSED_CACHE_MAX_SIZE = 0_MiB;
static constexpr auto DEFAULT_UNCOMPRESSED_CACHE_SIZE_RATIO = 0.5l;
static constexpr auto DEFAULT_MARK_CACHE_POLICY = "SLRU";
static constexpr auto DEFAULT_MARK_CACHE_MAX_SIZE = 5368_MiB;
static constexpr auto DEFAULT_MARK_CACHE_SIZE_RATIO = 0.5l;
static constexpr auto DEFAULT_INDEX_UNCOMPRESSED_CACHE_POLICY = "SLRU";
static constexpr auto DEFAULT_INDEX_UNCOMPRESSED_CACHE_MAX_SIZE = 0_MiB;
static constexpr auto DEFAULT_INDEX_UNCOMPRESSED_CACHE_SIZE_RATIO = 0.5l;
static constexpr auto DEFAULT_INDEX_MARK_CACHE_POLICY = "SLRU";
static constexpr auto DEFAULT_INDEX_MARK_CACHE_MAX_SIZE = 0_MiB;
static constexpr auto DEFAULT_INDEX_MARK_CACHE_SIZE_RATIO = 0.5l;
static constexpr auto DEFAULT_MMAP_CACHE_MAX_SIZE = 1_KiB; /// chosen by rolling dice
static constexpr auto DEFAULT_COMPILED_EXPRESSION_CACHE_MAX_SIZE = 128_MiB;
static constexpr auto DEFAULT_COMPILED_EXPRESSION_CACHE_MAX_ENTRIES = 10'000;

View File

@ -60,10 +60,16 @@ namespace DB
M(Double, cache_size_to_ram_max_ratio, 0.5, "Set cache size ro RAM max ratio. Allows to lower cache size on low-memory systems.", 0) \
M(String, uncompressed_cache_policy, DEFAULT_UNCOMPRESSED_CACHE_POLICY, "Uncompressed cache policy name.", 0) \
M(UInt64, uncompressed_cache_size, DEFAULT_UNCOMPRESSED_CACHE_MAX_SIZE, "Size of cache for uncompressed blocks. Zero means disabled.", 0) \
M(UInt64, mark_cache_size, DEFAULT_MARK_CACHE_MAX_SIZE, "Size of cache for marks (index of MergeTree family of tables).", 0) \
M(Double, uncompressed_cache_size_ratio, DEFAULT_UNCOMPRESSED_CACHE_SIZE_RATIO, "The size of the protected queue in the uncompressed cache relative to the cache's total size.", 0) \
M(String, mark_cache_policy, DEFAULT_MARK_CACHE_POLICY, "Mark cache policy name.", 0) \
M(UInt64, mark_cache_size, DEFAULT_MARK_CACHE_MAX_SIZE, "Size of cache for marks (index of MergeTree family of tables).", 0) \
M(Double, mark_cache_size_ratio, DEFAULT_MARK_CACHE_SIZE_RATIO, "The size of the protected queue in the mark cache relative to the cache's total size.", 0) \
M(String, index_uncompressed_cache_policy, DEFAULT_INDEX_UNCOMPRESSED_CACHE_POLICY, "Index uncompressed cache policy name.", 0) \
M(UInt64, index_uncompressed_cache_size, DEFAULT_INDEX_UNCOMPRESSED_CACHE_MAX_SIZE, "Size of cache for uncompressed blocks of MergeTree indices. Zero means disabled.", 0) \
M(Double, index_uncompressed_cache_size_ratio, DEFAULT_INDEX_UNCOMPRESSED_CACHE_SIZE_RATIO, "The size of the protected queue in the index uncompressed cache relative to the cache's total size.", 0) \
M(String, index_mark_cache_policy, DEFAULT_INDEX_MARK_CACHE_POLICY, "Index mark cache policy name.", 0) \
M(UInt64, index_mark_cache_size, DEFAULT_INDEX_MARK_CACHE_MAX_SIZE, "Size of cache for index marks. Zero means disabled.", 0) \
M(Double, index_mark_cache_size_ratio, DEFAULT_INDEX_MARK_CACHE_SIZE_RATIO, "The size of the protected queue in the index mark cache relative to the cache's total size.", 0) \
M(UInt64, mmap_cache_size, DEFAULT_MMAP_CACHE_MAX_SIZE, "A cache for mmapped files.", 0) \
\
M(Bool, disable_internal_dns_cache, false, "Disable internal DNS caching at all.", 0) \

View File

@ -535,6 +535,8 @@ class IColumn;
M(Bool, database_atomic_wait_for_drop_and_detach_synchronously, false, "When executing DROP or DETACH TABLE in Atomic database, wait for table data to be finally dropped or detached.", 0) \
M(Bool, enable_scalar_subquery_optimization, true, "If it is set to true, prevent scalar subqueries from (de)serializing large scalar values and possibly avoid running the same subquery more than once.", 0) \
M(Bool, optimize_trivial_count_query, true, "Process trivial 'SELECT count() FROM table' query from metadata.", 0) \
M(Bool, optimize_count_from_files, true, "Optimize counting rows from files in supported input formats", 0) \
M(Bool, use_cache_for_count_from_files, true, "Use cache to count the number of rows in files", 0) \
M(Bool, optimize_respect_aliases, true, "If it is set to true, it will respect aliases in WHERE/GROUP BY/ORDER BY, that will help with partition pruning/secondary indexes/optimize_aggregation_in_order/optimize_read_in_order/optimize_trivial_count", 0) \
M(UInt64, mutations_sync, 0, "Wait for synchronous execution of ALTER TABLE UPDATE/DELETE queries (mutations). 0 - execute asynchronously. 1 - wait current server. 2 - wait all replicas if they exist.", 0) \
M(Bool, enable_lightweight_delete, true, "Enable lightweight DELETE mutations for mergetree tables.", 0) ALIAS(allow_experimental_lightweight_delete) \

View File

@ -263,7 +263,7 @@ ClusterPtr DatabaseReplicated::getClusterImpl() const
std::vector<UInt8> DatabaseReplicated::tryGetAreReplicasActive(const ClusterPtr & cluster_) const
{
Strings paths;
const auto & addresses_with_failover = cluster->getShardsAddresses();
const auto & addresses_with_failover = cluster_->getShardsAddresses();
const auto & shards_info = cluster_->getShardsInfo();
for (size_t shard_index = 0; shard_index < shards_info.size(); ++shard_index)
{

View File

@ -147,11 +147,19 @@ void CachedOnDiskReadBufferFromFile::initialize(size_t offset, size_t size)
}
CachedOnDiskReadBufferFromFile::ImplementationBufferPtr
CachedOnDiskReadBufferFromFile::getCacheReadBuffer(const FileSegment & file_segment) const
CachedOnDiskReadBufferFromFile::getCacheReadBuffer(const FileSegment & file_segment)
{
ProfileEventTimeIncrement<Microseconds> watch(ProfileEvents::CachedReadBufferCreateBufferMicroseconds);
auto path = file_segment.getPathInLocalCache();
if (cache_file_reader)
{
chassert(cache_file_reader->getFileName() == path);
if (cache_file_reader->getFileName() == path)
return cache_file_reader;
cache_file_reader.reset();
}
ReadSettings local_read_settings{settings};
/// Do not allow to use asynchronous version of LocalFSReadMethod.
@ -160,12 +168,12 @@ CachedOnDiskReadBufferFromFile::getCacheReadBuffer(const FileSegment & file_segm
if (use_external_buffer)
local_read_settings.local_fs_buffer_size = 0;
auto buf = createReadBufferFromFileBase(path, local_read_settings, std::nullopt, std::nullopt, file_segment.getFlagsForLocalRead());
cache_file_reader = createReadBufferFromFileBase(path, local_read_settings, std::nullopt, std::nullopt, file_segment.getFlagsForLocalRead());
if (getFileSizeFromReadBuffer(*buf) == 0)
if (getFileSizeFromReadBuffer(*cache_file_reader) == 0)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Attempt to read from an empty cache file: {}", path);
return buf;
return cache_file_reader;
}
CachedOnDiskReadBufferFromFile::ImplementationBufferPtr
@ -207,7 +215,7 @@ CachedOnDiskReadBufferFromFile::getRemoteReadBuffer(FileSegment & file_segment,
}
else
{
chassert(remote_fs_segment_reader->getFileOffsetOfBufferEnd() == file_segment.getCurrentWriteOffset(false));
chassert(remote_fs_segment_reader->getFileOffsetOfBufferEnd() == file_segment.getCurrentWriteOffset());
}
return remote_fs_segment_reader;
@ -240,12 +248,12 @@ bool CachedOnDiskReadBufferFromFile::canStartFromCache(size_t current_offset, co
/// segment{k} state: DOWNLOADING
/// cache: [______|___________
/// ^
/// first_non_downloaded_offset (in progress)
/// current_write_offset (in progress)
/// requested_range: [__________]
/// ^
/// current_offset
size_t first_non_downloaded_offset = file_segment.getFirstNonDownloadedOffset(true);
return first_non_downloaded_offset > current_offset;
size_t current_write_offset = file_segment.getCurrentWriteOffset();
return current_write_offset > current_offset;
}
CachedOnDiskReadBufferFromFile::ImplementationBufferPtr
@ -285,7 +293,7 @@ CachedOnDiskReadBufferFromFile::getReadBufferForFileSegment(FileSegment & file_s
/// segment{k} state: DOWNLOADING
/// cache: [______|___________
/// ^
/// first_non_downloaded_offset (in progress)
/// current_write_offset (in progress)
/// requested_range: [__________]
/// ^
/// file_offset_of_buffer_end
@ -310,7 +318,7 @@ CachedOnDiskReadBufferFromFile::getReadBufferForFileSegment(FileSegment & file_s
/// segment{k} state: PARTIALLY_DOWNLOADED
/// cache: [______|___________
/// ^
/// first_non_downloaded_offset (in progress)
/// current_write_offset (in progress)
/// requested_range: [__________]
/// ^
/// file_offset_of_buffer_end
@ -327,7 +335,7 @@ CachedOnDiskReadBufferFromFile::getReadBufferForFileSegment(FileSegment & file_s
/// segment{k}
/// cache: [______|___________
/// ^
/// first_non_downloaded_offset
/// current_write_offset
/// requested_range: [__________]
/// ^
/// file_offset_of_buffer_end
@ -337,7 +345,7 @@ CachedOnDiskReadBufferFromFile::getReadBufferForFileSegment(FileSegment & file_s
return getCacheReadBuffer(file_segment);
}
auto current_write_offset = file_segment.getCurrentWriteOffset(false);
auto current_write_offset = file_segment.getCurrentWriteOffset();
if (current_write_offset < file_offset_of_buffer_end)
{
/// segment{1}
@ -459,7 +467,7 @@ CachedOnDiskReadBufferFromFile::getImplementationBuffer(FileSegment & file_segme
if (bytes_to_predownload)
{
const size_t current_write_offset = file_segment.getCurrentWriteOffset(false);
const size_t current_write_offset = file_segment.getCurrentWriteOffset();
read_buffer_for_file_segment->seek(current_write_offset, SEEK_SET);
}
else
@ -469,7 +477,7 @@ CachedOnDiskReadBufferFromFile::getImplementationBuffer(FileSegment & file_segme
chassert(read_buffer_for_file_segment->getFileOffsetOfBufferEnd() == file_offset_of_buffer_end);
}
const auto current_write_offset = file_segment.getCurrentWriteOffset(false);
const auto current_write_offset = file_segment.getCurrentWriteOffset();
if (current_write_offset != static_cast<size_t>(read_buffer_for_file_segment->getPosition()))
{
throw Exception(
@ -501,6 +509,7 @@ bool CachedOnDiskReadBufferFromFile::completeFileSegmentAndGetNext()
appendFilesystemCacheLog(*current_file_segment, read_type);
chassert(file_offset_of_buffer_end > completed_range.right);
cache_file_reader.reset();
file_segments->popFront();
if (file_segments->empty())
@ -547,9 +556,9 @@ void CachedOnDiskReadBufferFromFile::predownload(FileSegment & file_segment)
/// download from offset a'' < a', but return buffer from offset a'.
LOG_TEST(log, "Bytes to predownload: {}, caller_id: {}", bytes_to_predownload, FileSegment::getCallerId());
/// chassert(implementation_buffer->getFileOffsetOfBufferEnd() == file_segment.getCurrentWriteOffset(false));
chassert(static_cast<size_t>(implementation_buffer->getPosition()) == file_segment.getCurrentWriteOffset(false));
size_t current_offset = file_segment.getCurrentWriteOffset(false);
/// chassert(implementation_buffer->getFileOffsetOfBufferEnd() == file_segment.getCurrentWriteOffset());
size_t current_offset = file_segment.getCurrentWriteOffset();
chassert(static_cast<size_t>(implementation_buffer->getPosition()) == current_offset);
const auto & current_range = file_segment.range();
while (true)
@ -575,7 +584,7 @@ void CachedOnDiskReadBufferFromFile::predownload(FileSegment & file_segment)
"current download offset: {}, expected: {}, eof: {}",
bytes_to_predownload,
current_range.toString(),
file_segment.getCurrentWriteOffset(false),
file_segment.getCurrentWriteOffset(),
file_offset_of_buffer_end,
implementation_buffer->eof());
@ -585,7 +594,7 @@ void CachedOnDiskReadBufferFromFile::predownload(FileSegment & file_segment)
{
nextimpl_working_buffer_offset = implementation_buffer->offset();
auto current_write_offset = file_segment.getCurrentWriteOffset(false);
auto current_write_offset = file_segment.getCurrentWriteOffset();
if (current_write_offset != static_cast<size_t>(implementation_buffer->getPosition())
|| current_write_offset != file_offset_of_buffer_end)
{
@ -614,7 +623,7 @@ void CachedOnDiskReadBufferFromFile::predownload(FileSegment & file_segment)
{
LOG_TEST(log, "Left to predownload: {}, buffer size: {}", bytes_to_predownload, current_impl_buffer_size);
chassert(file_segment.getCurrentWriteOffset(false) == static_cast<size_t>(implementation_buffer->getPosition()));
chassert(file_segment.getCurrentWriteOffset() == static_cast<size_t>(implementation_buffer->getPosition()));
continue_predownload = writeCache(implementation_buffer->buffer().begin(), current_predownload_size, current_offset, file_segment);
if (continue_predownload)
@ -695,38 +704,19 @@ bool CachedOnDiskReadBufferFromFile::updateImplementationBufferIfNeeded()
{
/// If current read_type is ReadType::CACHED and file segment is not DOWNLOADED,
/// it means the following case, e.g. we started from CacheReadBuffer and continue with RemoteFSReadBuffer.
/// segment{k}
/// cache: [______|___________
/// segment{k}
/// cache: [______|___________]
/// ^
/// current_write_offset
/// requested_range: [__________]
/// requested_range: [__________
/// ^
/// file_offset_of_buffer_end
auto current_write_offset = file_segment.getCurrentWriteOffset(true);
bool cached_part_is_finished = current_write_offset == file_offset_of_buffer_end;
LOG_TEST(log, "Current write offset: {}, file offset of buffer end: {}", current_write_offset, file_offset_of_buffer_end);
if (cached_part_is_finished)
if (file_offset_of_buffer_end >= file_segment.getCurrentWriteOffset())
{
/// TODO: makes sense to reuse local file reader if we return here with CACHED read type again?
implementation_buffer = getImplementationBuffer(file_segment);
return true;
}
else if (current_write_offset < file_offset_of_buffer_end)
{
const auto path = file_segment.getPathInLocalCache();
size_t file_size = 0;
if (fs::exists(path))
file_size = fs::file_size(path);
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"Invariant failed. Expected {} >= {} (size on fs: {}, {})",
current_write_offset, file_offset_of_buffer_end, file_size, getInfoForLog());
}
}
else if (read_type == ReadType::REMOTE_FS_READ_AND_PUT_IN_CACHE)
{
@ -950,23 +940,6 @@ bool CachedOnDiskReadBufferFromFile::nextImplStep()
{
ProfileEvents::increment(ProfileEvents::CachedReadBufferReadFromCacheBytes, size);
ProfileEvents::increment(ProfileEvents::CachedReadBufferReadFromCacheMicroseconds, elapsed);
if (result)
{
const size_t new_file_offset = file_offset_of_buffer_end + size;
const size_t file_segment_write_offset = file_segment.getCurrentWriteOffset(true);
if (new_file_offset > file_segment.range().right + 1 || new_file_offset > file_segment_write_offset)
{
auto file_segment_path = file_segment.getPathInLocalCache();
throw Exception(
ErrorCodes::LOGICAL_ERROR, "Read unexpected size. "
"File size: {}, file segment path: {}, impl size: {}, impl path: {}"
"file segment info: {}",
fs::file_size(file_segment_path), file_segment_path,
implementation_buffer->getFileSize(), implementation_buffer->getFileName(),
file_segment.getInfoForLog());
}
}
}
else
{
@ -985,15 +958,15 @@ bool CachedOnDiskReadBufferFromFile::nextImplStep()
bool success = file_segment.reserve(size);
if (success)
{
chassert(file_segment.getCurrentWriteOffset(false) == static_cast<size_t>(implementation_buffer->getPosition()));
chassert(file_segment.getCurrentWriteOffset() == static_cast<size_t>(implementation_buffer->getPosition()));
success = writeCache(implementation_buffer->position(), size, file_offset_of_buffer_end, file_segment);
if (success)
{
chassert(file_segment.getCurrentWriteOffset(false) <= file_segment.range().right + 1);
chassert(file_segment.getCurrentWriteOffset() <= file_segment.range().right + 1);
chassert(
/* last_file_segment */file_segments->size() == 1
|| file_segment.getCurrentWriteOffset(false) == implementation_buffer->getFileOffsetOfBufferEnd());
|| file_segment.getCurrentWriteOffset() == implementation_buffer->getFileOffsetOfBufferEnd());
LOG_TEST(log, "Successfully written {} bytes", size);
download_current_segment_succeeded = true;
@ -1035,7 +1008,7 @@ bool CachedOnDiskReadBufferFromFile::nextImplStep()
file_offset_of_buffer_end += size;
if (download_current_segment && download_current_segment_succeeded)
chassert(file_segment.getCurrentWriteOffset(false) >= file_offset_of_buffer_end);
chassert(file_segment.getCurrentWriteOffset() >= file_offset_of_buffer_end);
chassert(file_offset_of_buffer_end <= read_until_position);
}
@ -1084,7 +1057,7 @@ bool CachedOnDiskReadBufferFromFile::nextImplStep()
current_read_range.toString(),
file_offset_of_buffer_end,
FileSegment::stateToString(file_segment.state()),
file_segment.getCurrentWriteOffset(false),
file_segment.getCurrentWriteOffset(),
toString(read_type),
read_until_position,
first_offset,
@ -1179,6 +1152,7 @@ off_t CachedOnDiskReadBufferFromFile::seek(off_t offset, int whence)
file_segments.reset();
implementation_buffer.reset();
initialized = false;
cache_file_reader.reset();
LOG_TEST(log, "Reset state for seek to position {}", new_pos);
@ -1214,6 +1188,7 @@ void CachedOnDiskReadBufferFromFile::setReadUntilPosition(size_t position)
file_segments.reset();
implementation_buffer.reset();
initialized = false;
cache_file_reader.reset();
read_until_position = position;

View File

@ -76,7 +76,7 @@ private:
ImplementationBufferPtr getReadBufferForFileSegment(FileSegment & file_segment);
ImplementationBufferPtr getCacheReadBuffer(const FileSegment & file_segment) const;
ImplementationBufferPtr getCacheReadBuffer(const FileSegment & file_segment);
ImplementationBufferPtr getRemoteReadBuffer(FileSegment & file_segment, ReadType read_type_);
@ -110,7 +110,8 @@ private:
ImplementationBufferCreator implementation_buffer_creator;
/// Remote read buffer, which can only be owned by current buffer.
FileSegment::RemoteFileReaderPtr remote_file_reader;
ImplementationBufferPtr remote_file_reader;
ImplementationBufferPtr cache_file_reader;
FileSegmentsHolderPtr file_segments;

View File

@ -70,7 +70,7 @@ bool FileSegmentRangeWriter::write(const char * data, size_t size, size_t offset
while (size > 0)
{
size_t available_size = file_segment->range().size() - file_segment->getDownloadedSize(false);
size_t available_size = file_segment->range().size() - file_segment->getDownloadedSize();
if (available_size == 0)
{
completeFileSegment();
@ -155,7 +155,7 @@ void FileSegmentRangeWriter::appendFilesystemCacheLog(const FileSegment & file_s
return;
auto file_segment_range = file_segment.range();
size_t file_segment_right_bound = file_segment_range.left + file_segment.getDownloadedSize(false) - 1;
size_t file_segment_right_bound = file_segment_range.left + file_segment.getDownloadedSize() - 1;
FilesystemCacheLogElement elem
{

View File

@ -69,7 +69,7 @@ private:
static_cast<uint64_t>(blob.BlobSize),
Poco::Timestamp::fromEpochTime(
std::chrono::duration_cast<std::chrono::seconds>(
blob.Details.LastModified.time_since_epoch()).count()),
static_cast<std::chrono::system_clock::time_point>(blob.Details.LastModified).time_since_epoch()).count()),
{}});
}
@ -162,7 +162,7 @@ void AzureObjectStorage::listObjects(const std::string & path, RelativePathsWith
static_cast<uint64_t>(blob.BlobSize),
Poco::Timestamp::fromEpochTime(
std::chrono::duration_cast<std::chrono::seconds>(
blob.Details.LastModified.time_since_epoch()).count()),
static_cast<std::chrono::system_clock::time_point>(blob.Details.LastModified).time_since_epoch()).count()),
{}});
}
@ -350,7 +350,7 @@ ObjectMetadata AzureObjectStorage::getObjectMetadata(const std::string & path) c
for (const auto & [key, value] : properties.Metadata)
(*result.attributes)[key] = value;
}
result.last_modified.emplace(properties.LastModified.time_since_epoch().count());
result.last_modified.emplace(static_cast<std::chrono::system_clock::time_point>(properties.LastModified).time_since_epoch().count());
return result;
}

View File

@ -1,25 +0,0 @@
#pragma once
#include "config.h"
#if USE_AWS_S3
#include <utility>
#include <base/types.h>
#include <IO/S3/PocoHTTPClient.h>
#include <Poco/URI.h>
namespace DB::S3
{
class ProxyConfiguration
{
public:
virtual ~ProxyConfiguration() = default;
/// Returns proxy configuration on each HTTP request.
virtual ClientConfigurationPerRequest getConfiguration(const Aws::Http::HttpRequest & request) = 0;
virtual void errorReport(const ClientConfigurationPerRequest & config) = 0;
};
}
#endif

View File

@ -1,32 +0,0 @@
#include "ProxyListConfiguration.h"
#if USE_AWS_S3
#include <utility>
#include <Common/logger_useful.h>
namespace DB::S3
{
ProxyListConfiguration::ProxyListConfiguration(std::vector<Poco::URI> proxies_) : proxies(std::move(proxies_)), access_counter(0)
{
}
ClientConfigurationPerRequest ProxyListConfiguration::getConfiguration(const Aws::Http::HttpRequest &)
{
/// Avoid atomic increment if number of proxies is 1.
size_t index = proxies.size() > 1 ? (access_counter++) % proxies.size() : 0;
ClientConfigurationPerRequest cfg;
cfg.proxy_scheme = Aws::Http::SchemeMapper::FromString(proxies[index].getScheme().c_str());
cfg.proxy_host = proxies[index].getHost();
cfg.proxy_port = proxies[index].getPort();
LOG_DEBUG(&Poco::Logger::get("AWSClient"), "Use proxy: {}", proxies[index].toString());
return cfg;
}
}
#endif

View File

@ -1,32 +0,0 @@
#pragma once
#include "config.h"
#if USE_AWS_S3
#include <atomic> // for std::atomic<size_t>
#include "ProxyConfiguration.h"
namespace DB::S3
{
/**
* For each request to S3 it chooses a proxy from the specified list using round-robin strategy.
*/
class ProxyListConfiguration : public ProxyConfiguration
{
public:
explicit ProxyListConfiguration(std::vector<Poco::URI> proxies_);
ClientConfigurationPerRequest getConfiguration(const Aws::Http::HttpRequest & request) override;
void errorReport(const ClientConfigurationPerRequest &) override {}
private:
/// List of configured proxies.
const std::vector<Poco::URI> proxies;
/// Access counter to get proxy using round-robin strategy.
std::atomic<size_t> access_counter;
};
}
#endif

View File

@ -1,42 +0,0 @@
#pragma once
#include "config.h"
#if USE_AWS_S3
#include "ProxyConfiguration.h"
#include <mutex>
namespace DB::S3
{
/**
* Proxy configuration where proxy host is obtained each time from specified endpoint.
* For each request to S3 it makes GET request to specified endpoint URL and reads proxy host from a response body.
* Specified scheme and port added to obtained proxy host to form completed proxy URL.
*/
class ProxyResolverConfiguration : public ProxyConfiguration
{
public:
ProxyResolverConfiguration(const Poco::URI & endpoint_, String proxy_scheme_, unsigned proxy_port_, unsigned cache_ttl_);
ClientConfigurationPerRequest getConfiguration(const Aws::Http::HttpRequest & request) override;
void errorReport(const ClientConfigurationPerRequest & config) override;
private:
/// Endpoint to obtain a proxy host.
const Poco::URI endpoint;
/// Scheme for obtained proxy.
const String proxy_scheme;
/// Port for obtained proxy.
const unsigned proxy_port;
std::mutex cache_mutex;
bool cache_valid = false;
std::chrono::time_point<std::chrono::system_clock> cache_timestamp;
const std::chrono::seconds cache_ttl{0};
ClientConfigurationPerRequest cached_config;
};
}
#endif

View File

@ -5,6 +5,7 @@
#include <Common/StringUtils/StringUtils.h>
#include <Common/logger_useful.h>
#include <Common/Throttler.h>
#include <Common/ProxyConfigurationResolverProvider.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h>
#include <Interpreters/Context.h>
@ -17,9 +18,6 @@
#include <Storages/StorageS3Settings.h>
#include <Disks/ObjectStorages/S3/S3ObjectStorage.h>
#include <Disks/ObjectStorages/S3/ProxyConfiguration.h>
#include <Disks/ObjectStorages/S3/ProxyListConfiguration.h>
#include <Disks/ObjectStorages/S3/ProxyResolverConfiguration.h>
#include <Disks/ObjectStorages/DiskObjectStorageCommon.h>
#include <Disks/DiskLocal.h>
#include <Common/Macros.h>
@ -44,76 +42,15 @@ std::unique_ptr<S3ObjectStorageSettings> getSettings(const Poco::Util::AbstractC
config.getInt(config_prefix + ".objects_chunk_size_to_delete", 1000));
}
std::shared_ptr<S3::ProxyResolverConfiguration> getProxyResolverConfiguration(
const String & prefix, const Poco::Util::AbstractConfiguration & proxy_resolver_config)
{
auto endpoint = Poco::URI(proxy_resolver_config.getString(prefix + ".endpoint"));
auto proxy_scheme = proxy_resolver_config.getString(prefix + ".proxy_scheme");
if (proxy_scheme != "http" && proxy_scheme != "https")
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Only HTTP/HTTPS schemas allowed in proxy resolver config: {}", proxy_scheme);
auto proxy_port = proxy_resolver_config.getUInt(prefix + ".proxy_port");
auto cache_ttl = proxy_resolver_config.getUInt(prefix + ".proxy_cache_time", 10);
LOG_DEBUG(&Poco::Logger::get("DiskS3"), "Configured proxy resolver: {}, Scheme: {}, Port: {}",
endpoint.toString(), proxy_scheme, proxy_port);
return std::make_shared<S3::ProxyResolverConfiguration>(endpoint, proxy_scheme, proxy_port, cache_ttl);
}
std::shared_ptr<S3::ProxyListConfiguration> getProxyListConfiguration(
const String & prefix, const Poco::Util::AbstractConfiguration & proxy_config)
{
std::vector<String> keys;
proxy_config.keys(prefix, keys);
std::vector<Poco::URI> proxies;
for (const auto & key : keys)
if (startsWith(key, "uri"))
{
Poco::URI proxy_uri(proxy_config.getString(prefix + "." + key));
if (proxy_uri.getScheme() != "http" && proxy_uri.getScheme() != "https")
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Only HTTP/HTTPS schemas allowed in proxy uri: {}", proxy_uri.toString());
if (proxy_uri.getHost().empty())
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Empty host in proxy uri: {}", proxy_uri.toString());
proxies.push_back(proxy_uri);
LOG_DEBUG(&Poco::Logger::get("DiskS3"), "Configured proxy: {}", proxy_uri.toString());
}
if (!proxies.empty())
return std::make_shared<S3::ProxyListConfiguration>(proxies);
return nullptr;
}
std::shared_ptr<S3::ProxyConfiguration> getProxyConfiguration(const String & prefix, const Poco::Util::AbstractConfiguration & config)
{
if (!config.has(prefix + ".proxy"))
return nullptr;
std::vector<String> config_keys;
config.keys(prefix + ".proxy", config_keys);
if (auto resolver_configs = std::count(config_keys.begin(), config_keys.end(), "resolver"))
{
if (resolver_configs > 1)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Multiple proxy resolver configurations aren't allowed");
return getProxyResolverConfiguration(prefix + ".proxy.resolver", config);
}
return getProxyListConfiguration(prefix + ".proxy", config);
}
std::unique_ptr<S3::Client> getClient(
const Poco::Util::AbstractConfiguration & config,
const String & config_prefix,
ContextPtr context,
const S3ObjectStorageSettings & settings)
{
String endpoint = context->getMacros()->expand(config.getString(config_prefix + ".endpoint"));
S3::URI uri(endpoint);
S3::PocoHTTPClientConfiguration client_configuration = S3::ClientFactory::instance().createClientConfiguration(
config.getString(config_prefix + ".region", ""),
context->getRemoteHostFilter(),
@ -121,10 +58,9 @@ std::unique_ptr<S3::Client> getClient(
context->getGlobalContext()->getSettingsRef().enable_s3_requests_logging,
/* for_disk_s3 = */ true,
settings.request_settings.get_request_throttler,
settings.request_settings.put_request_throttler);
settings.request_settings.put_request_throttler,
uri.uri.getScheme());
String endpoint = context->getMacros()->expand(config.getString(config_prefix + ".endpoint"));
S3::URI uri(endpoint);
if (uri.key.back() != '/')
throw Exception(ErrorCodes::BAD_ARGUMENTS, "S3 path must ends with '/', but '{}' doesn't.", uri.key);
@ -136,11 +72,14 @@ std::unique_ptr<S3::Client> getClient(
client_configuration.http_connection_pool_size = config.getUInt(config_prefix + ".http_connection_pool_size", 1000);
client_configuration.wait_on_pool_size_limit = false;
auto proxy_config = getProxyConfiguration(config_prefix, config);
/*
* Override proxy configuration for backwards compatibility with old configuration format.
* */
auto proxy_config = DB::ProxyConfigurationResolverProvider::getFromOldSettingsFormat(config_prefix, config);
if (proxy_config)
{
client_configuration.per_request_configuration
= [proxy_config](const auto & request) { return proxy_config->getConfiguration(request); };
= [proxy_config]() { return proxy_config->resolve(); };
client_configuration.error_report
= [proxy_config](const auto & request_config) { proxy_config->errorReport(request_config); };
}

View File

@ -126,6 +126,86 @@ namespace JSONUtils
return fileSegmentationEngineJSONEachRowImpl<'[', ']'>(in, memory, min_bytes, min_rows, max_rows);
}
template <const char opening_bracket, const char closing_bracket>
void skipRowForJSONEachRowImpl(ReadBuffer & in)
{
size_t balance = 0;
bool quotes = false;
while (!in.eof())
{
if (quotes)
{
auto * pos = find_first_symbols<'\\', '"'>(in.position(), in.buffer().end());
in.position() = pos;
if (in.position() > in.buffer().end())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Position in buffer is out of bounds. There must be a bug.");
else if (in.position() == in.buffer().end())
continue;
if (*in.position() == '\\')
{
++in.position();
if (!in.eof())
++in.position();
}
else if (*in.position() == '"')
{
++in.position();
quotes = false;
}
}
else
{
auto * pos = find_first_symbols<opening_bracket, closing_bracket, '\\', '"'>(in.position(), in.buffer().end());
in.position() = pos;
if (in.position() > in.buffer().end())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Position in buffer is out of bounds. There must be a bug.");
else if (in.position() == in.buffer().end())
continue;
else if (*in.position() == opening_bracket)
{
++balance;
++in.position();
}
else if (*in.position() == closing_bracket)
{
--balance;
++in.position();
}
else if (*in.position() == '\\')
{
++in.position();
if (!in.eof())
++in.position();
}
else if (*in.position() == '"')
{
quotes = true;
++in.position();
}
if (balance == 0)
return;
}
}
throw Exception(ErrorCodes::INCORRECT_DATA, "Unexpected eof");
}
void skipRowForJSONEachRow(ReadBuffer & in)
{
return skipRowForJSONEachRowImpl<'{', '}'>(in);
}
void skipRowForJSONCompactEachRow(ReadBuffer & in)
{
return skipRowForJSONEachRowImpl<'[', ']'>(in);
}
NamesAndTypesList readRowAndGetNamesAndDataTypesForJSONEachRow(ReadBuffer & in, const FormatSettings & settings, JSONInferenceInfo * inference_info)
{
skipWhitespaceIfAny(in);
@ -612,8 +692,11 @@ namespace JSONUtils
auto names_and_types = JSONUtils::readMetadata(in);
for (const auto & [name, type] : names_and_types)
{
if (!header.has(name))
continue;
auto header_type = header.getByName(name).type;
if (header.has(name) && !type->equals(*header_type))
if (!type->equals(*header_type))
throw Exception(
ErrorCodes::INCORRECT_DATA,
"Type {} of column '{}' from metadata is not the same as type in header {}",

View File

@ -20,6 +20,9 @@ namespace JSONUtils
std::pair<bool, size_t> fileSegmentationEngineJSONEachRow(ReadBuffer & in, DB::Memory<> & memory, size_t min_bytes, size_t max_rows);
std::pair<bool, size_t> fileSegmentationEngineJSONCompactEachRow(ReadBuffer & in, DB::Memory<> & memory, size_t min_bytes, size_t min_rows, size_t max_rows);
void skipRowForJSONEachRow(ReadBuffer & in);
void skipRowForJSONCompactEachRow(ReadBuffer & in);
/// Read row in JSONEachRow format and try to determine type for each field.
/// Return list of names and types.
/// If cannot determine the type of some field, return nullptr for it.

View File

@ -47,7 +47,7 @@ bool isRetryableSchemaInferenceError(int code)
ColumnsDescription readSchemaFromFormat(
const String & format_name,
const std::optional<FormatSettings> & format_settings,
ReadBufferIterator & read_buffer_iterator,
IReadBufferIterator & read_buffer_iterator,
bool retry,
ContextPtr & context,
std::unique_ptr<ReadBuffer> & buf)
@ -77,13 +77,12 @@ try
size_t max_bytes_to_read = format_settings ? format_settings->max_bytes_to_read_for_schema_inference
: context->getSettingsRef().input_format_max_bytes_to_read_for_schema_inference;
size_t iterations = 0;
ColumnsDescription cached_columns;
while (true)
{
bool is_eof = false;
try
{
buf = read_buffer_iterator(cached_columns);
buf = read_buffer_iterator.next();
if (!buf)
break;
is_eof = buf->eof();
@ -123,6 +122,9 @@ try
schema_reader = FormatFactory::instance().getSchemaReader(format_name, *buf, context, format_settings);
schema_reader->setMaxRowsAndBytesToRead(max_rows_to_read, max_bytes_to_read);
names_and_types = schema_reader->readSchema();
auto num_rows = schema_reader->readNumberOrRows();
if (num_rows)
read_buffer_iterator.setNumRowsToLastFile(*num_rows);
break;
}
catch (...)
@ -177,8 +179,8 @@ try
}
}
if (!cached_columns.empty())
return cached_columns;
if (auto cached_columns = read_buffer_iterator.getCachedColumns())
return *cached_columns;
if (names_and_types.empty())
throw Exception(
@ -229,7 +231,7 @@ catch (Exception & e)
ColumnsDescription readSchemaFromFormat(
const String & format_name,
const std::optional<FormatSettings> & format_settings,
ReadBufferIterator & read_buffer_iterator,
IReadBufferIterator & read_buffer_iterator,
bool retry,
ContextPtr & context)
{

View File

@ -1,15 +1,44 @@
#pragma once
#include <Storages/ColumnsDescription.h>
#include <Storages/Cache/SchemaCache.h>
#include <Formats/FormatFactory.h>
#include <Storages/Cache/SchemaCache.h>
#include <Storages/ColumnsDescription.h>
namespace DB
{
using ReadBufferIterator = std::function<std::unique_ptr<ReadBuffer>(ColumnsDescription &)>;
struct IReadBufferIterator
{
virtual ~IReadBufferIterator() = default;
/// Try to determine the schema of the data in the specified format.
virtual std::unique_ptr<ReadBuffer> next() = 0;
virtual std::optional<ColumnsDescription> getCachedColumns() { return std::nullopt; }
virtual void setNumRowsToLastFile(size_t /*num_rows*/) {}
};
struct SingleReadBufferIterator : public IReadBufferIterator
{
public:
SingleReadBufferIterator(std::unique_ptr<ReadBuffer> buf_) : buf(std::move(buf_))
{
}
std::unique_ptr<ReadBuffer> next() override
{
if (done)
return nullptr;
done = true;
return std::move(buf);
}
private:
std::unique_ptr<ReadBuffer> buf;
bool done = false;
};
/// Try to determine the schema of the data and number of rows in data in the specified format.
/// For formats that have an external schema reader, it will
/// use it and won't create a read buffer.
/// For formats that have a schema reader from the data,
@ -22,7 +51,7 @@ using ReadBufferIterator = std::function<std::unique_ptr<ReadBuffer>(ColumnsDesc
ColumnsDescription readSchemaFromFormat(
const String & format_name,
const std::optional<FormatSettings> & format_settings,
ReadBufferIterator & read_buffer_iterator,
IReadBufferIterator & read_buffer_iterator,
bool retry,
ContextPtr & context);
@ -30,12 +59,12 @@ ColumnsDescription readSchemaFromFormat(
ColumnsDescription readSchemaFromFormat(
const String & format_name,
const std::optional<FormatSettings> & format_settings,
ReadBufferIterator & read_buffer_iterator,
IReadBufferIterator & read_buffer_iterator,
bool retry,
ContextPtr & context,
std::unique_ptr<ReadBuffer> & buf_out);
SchemaCache::Key getKeyForSchemaCache(const String & source, const String & format, const std::optional<FormatSettings> & format_settings, const ContextPtr & context);
SchemaCache::Keys getKeysForSchemaCache(const Strings & sources, const String & format, const std::optional<FormatSettings> & format_settings, const ContextPtr & context);
SchemaCache::Key getKeyForSchemaCache(const String & source, const String & format, const std::optional<FormatSettings> & format_settings, const ContextPtr & context);
SchemaCache::Keys getKeysForSchemaCache(const Strings & sources, const String & format, const std::optional<FormatSettings> & format_settings, const ContextPtr & context);
}

View File

@ -58,6 +58,10 @@ bool ParserJSONPathMemberAccess::parseImpl(Pos & pos, ASTPtr & node, Expected &
member_name = std::make_shared<ASTIdentifier>(String(last_begin, pos->end));
++pos;
}
else if (!pos.isValid() && pos->type == TokenType::EndOfStream)
{
member_name = std::make_shared<ASTIdentifier>(String(last_begin, last_end));
}
else
{
return false;

View File

@ -0,0 +1,399 @@
#include <limits>
#include <Columns/ColumnArray.h>
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/getLeastSupertype.h>
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionHelpers.h>
#include <Functions/IFunction.h>
#include <Interpreters/castColumn.h>
namespace DB
{
namespace ErrorCodes
{
extern const int ILLEGAL_COLUMN;
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE;
}
enum class ShiftRotateStrategy : uint8_t
{
Shift,
Rotate
};
enum class ShiftRotateDirection : uint8_t
{
Left,
Right
};
template <typename Impl, typename Name>
class FunctionArrayShiftRotate : public IFunction
{
public:
static constexpr auto name = Name::name;
static constexpr ShiftRotateStrategy strategy = Impl::strategy;
static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionArrayShiftRotate>(); }
String getName() const override { return name; }
bool isVariadic() const override { return strategy == ShiftRotateStrategy::Shift; }
size_t getNumberOfArguments() const override { return strategy == ShiftRotateStrategy::Rotate ? 2 : 0; }
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
{
if constexpr (strategy == ShiftRotateStrategy::Shift)
{
if (arguments.size() < 2)
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} requires at least two arguments.", getName());
if (arguments.size() > 3)
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} requires at most three arguments.", getName());
}
const DataTypePtr & first_arg = arguments[0];
if (!isArray(first_arg))
throw Exception(
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Illegal type {} of argument of function {}, expected Array",
arguments[0]->getName(),
getName());
if (!isNativeInteger(arguments[1]))
throw Exception(
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Illegal type {} of argument of function {}, expected Native Integer",
arguments[1]->getName(),
getName());
const DataTypePtr & elem_type = static_cast<const DataTypeArray &>(*first_arg).getNestedType();
if (arguments.size() == 3)
{
auto ret = tryGetLeastSupertype(DataTypes{elem_type, arguments[2]});
// Note that this will fail if the default value does not fit into the array element type (e.g. UInt64 and Array(UInt8)).
// In this case array should be converted to Array(UInt64) explicitly.
if (!ret || !ret->equals(*elem_type))
throw Exception(
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Illegal type {} of argument of function {}, expected {}",
arguments[2]->getName(),
getName(),
elem_type->getName());
}
return std::make_shared<DataTypeArray>(elem_type);
}
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override
{
ColumnPtr column_array_ptr = arguments[0].column;
const auto * column_array = checkAndGetColumn<ColumnArray>(column_array_ptr.get());
if (!column_array)
{
const auto * column_const_array = checkAndGetColumnConst<ColumnArray>(column_array_ptr.get());
if (!column_const_array)
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Expected Array column, found {}", column_array_ptr->getName());
column_array_ptr = column_const_array->convertToFullColumn();
column_array = assert_cast<const ColumnArray *>(column_array_ptr.get());
}
ColumnPtr shift_num_column = arguments[1].column;
if constexpr (strategy == ShiftRotateStrategy::Shift)
{
ColumnPtr default_column;
const auto elem_type = static_cast<const DataTypeArray &>(*result_type).getNestedType();
if (arguments.size() == 3)
default_column = castColumn(arguments[2], elem_type);
else
default_column = elem_type->createColumnConstWithDefaultValue(input_rows_count);
default_column = default_column->convertToFullColumnIfConst();
return Impl::execute(*column_array, shift_num_column, default_column, input_rows_count);
}
else
{
return Impl::execute(*column_array, shift_num_column, input_rows_count);
}
}
};
template <ShiftRotateDirection direction>
struct ArrayRotateImpl
{
static constexpr ShiftRotateStrategy strategy = ShiftRotateStrategy::Rotate;
static ColumnPtr execute(const ColumnArray & array, ColumnPtr shift_num_column, size_t input_rows_count)
{
size_t batch_size = array.getData().size();
IColumn::Permutation permutation(batch_size);
const IColumn::Offsets & offsets = array.getOffsets();
IColumn::Offset current_offset = 0;
for (size_t i = 0; i < input_rows_count; ++i)
{
const size_t offset = offsets[i];
const size_t nested_size = offset - current_offset;
Int64 shift_num_value = shift_num_column->getInt(i);
// Rotating left to -N is the same as rotating right to N.
ShiftRotateDirection actual_direction = direction;
if (shift_num_value < 0)
{
if (shift_num_value == std::numeric_limits<Int64>::min())
throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, "Shift number {} is out of range", shift_num_value);
actual_direction = (direction == ShiftRotateDirection::Left) ? ShiftRotateDirection::Right : ShiftRotateDirection::Left;
shift_num_value = -shift_num_value;
}
size_t shift_num = static_cast<size_t>(shift_num_value);
if (nested_size > 0 && shift_num >= nested_size)
shift_num %= nested_size;
// Rotating left to N is the same as shifting right to (size - N).
if (actual_direction == ShiftRotateDirection::Right)
shift_num = nested_size - shift_num;
for (size_t j = 0; j < nested_size; ++j)
permutation[current_offset + j] = current_offset + (j + shift_num) % nested_size;
current_offset = offset;
}
return ColumnArray::create(array.getData().permute(permutation, 0), array.getOffsetsPtr());
}
};
template <ShiftRotateDirection direction>
struct ArrayShiftImpl
{
static constexpr ShiftRotateStrategy strategy = ShiftRotateStrategy::Shift;
static ColumnPtr
execute(const ColumnArray & array, ColumnPtr shift_column, ColumnPtr default_column, size_t input_column_rows)
{
const IColumn::Offsets & offsets = array.getOffsets();
const IColumn & array_data = array.getData();
const size_t data_size = array_data.size();
auto result_column = array.getData().cloneEmpty();
result_column->reserve(data_size);
IColumn::Offset current_offset = 0;
for (size_t i = 0; i < input_column_rows; ++i)
{
const size_t offset = offsets[i];
const size_t nested_size = offset - current_offset;
Int64 shift_num_value = shift_column->getInt(i);
// Shifting left to -N is the same as shifting right to N.
ShiftRotateDirection actual_direction = direction;
if (shift_num_value < 0)
{
if (shift_num_value == std::numeric_limits<Int64>::min())
throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, "Shift number {} is out of range", shift_num_value);
actual_direction = (direction == ShiftRotateDirection::Left) ? ShiftRotateDirection::Right : ShiftRotateDirection::Left;
shift_num_value = -shift_num_value;
}
const size_t number_of_default_values = std::min(static_cast<size_t>(shift_num_value), nested_size);
const size_t num_of_original_values = nested_size - number_of_default_values;
if (actual_direction == ShiftRotateDirection::Right)
{
result_column->insertManyFrom(*default_column, i, number_of_default_values);
result_column->insertRangeFrom(array_data, current_offset, num_of_original_values);
}
else
{
result_column->insertRangeFrom(array_data, current_offset + number_of_default_values, num_of_original_values);
result_column->insertManyFrom(*default_column, i, number_of_default_values);
}
current_offset = offset;
}
return ColumnArray::create(std::move(result_column), array.getOffsetsPtr());
}
};
struct NameArrayShiftLeft
{
static constexpr auto name = "arrayShiftLeft";
};
struct NameArrayShiftRight
{
static constexpr auto name = "arrayShiftRight";
};
struct NameArrayRotateLeft
{
static constexpr auto name = "arrayRotateLeft";
};
struct NameArrayRotateRight
{
static constexpr auto name = "arrayRotateRight";
};
using ArrayShiftLeftImpl = ArrayShiftImpl<ShiftRotateDirection::Left>;
using FunctionArrayShiftLeft = FunctionArrayShiftRotate<ArrayShiftLeftImpl, NameArrayShiftLeft>;
using ArrayShiftRightImpl = ArrayShiftImpl<ShiftRotateDirection::Right>;
using FunctionArrayShiftRight = FunctionArrayShiftRotate<ArrayShiftRightImpl, NameArrayShiftRight>;
using ArrayRotateLeftImpl = ArrayRotateImpl<ShiftRotateDirection::Left>;
using FunctionArrayRotateLeft = FunctionArrayShiftRotate<ArrayRotateLeftImpl, NameArrayRotateLeft>;
using ArrayRotateRightImpl = ArrayRotateImpl<ShiftRotateDirection::Right>;
using FunctionArrayRotateRight = FunctionArrayShiftRotate<ArrayRotateRightImpl, NameArrayRotateRight>;
REGISTER_FUNCTION(ArrayShiftOrRotate)
{
factory.registerFunction<FunctionArrayRotateLeft>(
FunctionDocumentation{
.description = R"(
Returns an array of the same size as the original array with elements rotated
to the left by the specified number of positions.
[example:simple_int]
[example:overflow_int]
[example:simple_string]
[example:simple_array]
[example:simple_nested_array]
Negative rotate values are treated as rotating to the right by the absolute
value of the rotation.
[example:negative_rotation_int]
)",
.examples{
{"simple_int", "SELECT arrayRotateLeft([1, 2, 3, 4, 5], 3)", "[4, 5, 1, 2, 3]"},
{"simple_string", "SELECT arrayRotateLeft(['a', 'b', 'c', 'd', 'e'], 3)", "['d', 'e', 'a', 'b', 'c']"},
{"simple_array", "SELECT arrayRotateLeft([[1, 2], [3, 4], [5, 6]], 2)", "[[5, 6], [1, 2], [3, 4]]"},
{"simple_nested_array",
"SELECT arrayRotateLeft([[[1, 2], [3, 4]], [[5, 6], [7, 8]]], 1)",
"[[[5, 6], [7, 8]], [[1, 2], [3, 4]]]"},
{"negative_rotation_int", "SELECT arrayRotateLeft([1, 2, 3, 4, 5], -3)", "[3, 4, 5, 1, 2]"},
{"overflow_int", "SELECT arrayRotateLeft([1, 2, 3, 4, 5], 8)", "[4, 5, 1, 2, 3]"},
},
.categories = {"Array"},
});
factory.registerFunction<FunctionArrayRotateRight>(
FunctionDocumentation{
.description = R"(
Returns an array of the same size as the original array with elements rotated
to the right by the specified number of positions.
[example:simple_int]
[example:overflow_int]
[example:simple_string]
[example:simple_array]
[example:simple_nested_array]
Negative rotate values are treated as rotating to the left by the absolute
value of the rotation.
[example:negative_rotation_int]
)",
.examples{
{"simple_int", "SELECT arrayRotateRight([1, 2, 3, 4, 5], 3)", "[3, 4, 5, 1, 2]"},
{"simple_string", "SELECT arrayRotateRight(['a', 'b', 'c', 'd', 'e'], 3)", "['c', 'd', 'e', 'a', 'b']"},
{"simple_array", "SELECT arrayRotateRight([[1, 2], [3, 4], [5, 6]], 2)", "[[3, 4], [5, 6], [1, 2]]"},
{"simple_nested_array",
"SELECT arrayRotateRight([[[1, 2], [3, 4]], [[5, 6], [7, 8]]], 1)",
"[[[7, 8], [1, 2]], [[3, 4], [5, 6]]]"},
{"negative_rotation_int", "SELECT arrayRotateRight([1, 2, 3, 4, 5], -3)", "[4, 5, 1, 2, 3]"},
{"overflow_int", "SELECT arrayRotateRight([1, 2, 3, 4, 5], 8)", "[4, 5, 1, 2, 3]"},
},
.categories = {"Array"},
});
factory.registerFunction<FunctionArrayShiftLeft>(
FunctionDocumentation{
.description = R"(
Returns an array of the same size as the original array with elements shifted
to the left by the specified number of positions. New elements are filled with
provided default values or default values of the corresponding type.
[example:simple_int]
[example:overflow_int]
[example:simple_string]
[example:simple_array]
[example:simple_nested_array]
Negative shift values are treated as shifting to the right by the absolute
value of the shift.
[example:negative_shift_int]
The default value must be of the same type as the array elements.
[example:simple_int_with_default]
[example:simple_string_with_default]
[example:simple_array_with_default]
[example:casted_array_with_default]
)",
.examples{
{"simple_int", "SELECT arrayShiftLeft([1, 2, 3, 4, 5], 3)", "[4, 5, 0, 0, 0]"},
{"negative_shift_int", "SELECT arrayShiftLeft([1, 2, 3, 4, 5], -3)", "[0, 0, 0, 1, 2]"},
{"overflow_int", "SELECT arrayShiftLeft([1, 2, 3, 4, 5], 8)", "[0, 0, 0, 0, 0]"},
{"simple_string", "SELECT arrayShiftLeft(['a', 'b', 'c', 'd', 'e'], 3)", "['d', 'e', '', '', '']"},
{"simple_array", "SELECT arrayShiftLeft([[1, 2], [3, 4], [5, 6]], 2)", "[[5, 6], [], []]"},
{"simple_nested_array", "SELECT arrayShiftLeft([[[1, 2], [3, 4]], [[5, 6], [7, 8]]], 1)", "[[[5, 6], [7, 8]], []]"},
{"simple_int_with_default", "SELECT arrayShiftLeft([1, 2, 3, 4, 5], 3, 7)", "[4, 5, 7, 7, 7]"},
{"simple_string_with_default", "SELECT arrayShiftLeft(['a', 'b', 'c', 'd', 'e'], 3, 'foo')", "['d', 'e', 'foo', 'foo', 'foo']"},
{"simple_array_with_default", "SELECT arrayShiftLeft([[1, 2], [3, 4], [5, 6]], 2, [7, 8])", "[[5, 6], [7, 8], [7, 8]]"},
{"casted_array_with_default",
"SELECT arrayShiftLeft(CAST('[1, 2, 3, 4, 5, 6]', 'Array(UInt16)'), 1, 1000)",
"[2, 3, 4, 5, 6, 1000]"},
},
.categories = {"Array"},
});
factory.registerFunction<FunctionArrayShiftRight>(
FunctionDocumentation{
.description = R"(
Returns an array of the same size as the original array with elements shifted
to the right by the specified number of positions. New elements are filled with
provided default values or default values of the corresponding type.
[example:simple_int]
[example:overflow_int]
[example:simple_string]
[example:simple_array]
[example:simple_nested_array]
Negative shift values are treated as shifting to the left by the absolute
value of the shift.
[example:negative_shift_int]
The default value must be of the same type as the array elements.
[example:simple_int_with_default]
[example:simple_string_with_default]
[example:simple_array_with_default]
[example:casted_array_with_default]
)",
.examples{
{"simple_int", "SELECT arrayShiftRight([1, 2, 3, 4, 5], 3)", "[0, 0, 0, 1, 2]"},
{"negative_shift_int", "SELECT arrayShiftRight([1, 2, 3, 4, 5], -3)", "[4, 5, 0, 0, 0]"},
{"overflow_int", "SELECT arrayShiftRight([1, 2, 3, 4, 5], 8)", "[0, 0, 0, 0, 0]"},
{"simple_string", "SELECT arrayShiftRight(['a', 'b', 'c', 'd', 'e'], 3)", "['', '', '', 'a', 'b']"},
{"simple_array", "SELECT arrayShiftRight([[1, 2], [3, 4], [5, 6]], 2)", "[[], [], [1, 2]]"},
{"simple_nested_array", "SELECT arrayShiftRight([[[1, 2], [3, 4]], [[5, 6], [7, 8]]], 1)", "[[], [[1, 2], [3, 4]]]"},
{"simple_int_with_default", "SELECT arrayShiftRight([1, 2, 3, 4, 5], 3, 7)", "[7, 7, 7, 1, 2]"},
{"simple_string_with_default",
"SELECT arrayShiftRight(['a', 'b', 'c', 'd', 'e'], 3, 'foo')",
"['foo', 'foo', 'foo', 'a', 'b']"},
{"simple_array_with_default", "SELECT arrayShiftRight([[1, 2], [3, 4], [5, 6]], 2, [7, 8])", "[[7, 8], [7, 8], [1, 2]]"},
{"casted_array_with_default",
"SELECT arrayShiftRight(CAST('[1, 2, 3, 4, 5, 6]', 'Array(UInt16)'), 1, 1000)",
"[1000, 1, 2, 3, 4, 5]"},
},
.categories = {"Array"},
});
}
}

View File

@ -485,15 +485,16 @@ namespace
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
{
FunctionArgumentDescriptors args{
FunctionArgumentDescriptors mandatory_args{
{"time", &isString<IDataType>, nullptr, "String"},
{"format", &isString<IDataType>, nullptr, "String"},
{"format", &isString<IDataType>, nullptr, "String"}
};
if (arguments.size() == 3)
args.emplace_back(FunctionArgumentDescriptor{"timezone", &isString<IDataType>, nullptr, "String"});
FunctionArgumentDescriptors optional_args{
{"timezone", &isString<IDataType>, &isColumnConst, "const String"}
};
validateFunctionArgumentTypes(*this, arguments, args);
validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args);
String time_zone_name = getTimeZone(arguments).getTimeZone();
DataTypePtr date_type = std::make_shared<DataTypeDateTime>(time_zone_name);

View File

@ -137,7 +137,12 @@ namespace
throw Exception(ErrorCodes::UNSUPPORTED_URI_SCHEME, "Unsupported scheme in URI '{}'", uri.toString());
}
HTTPSessionPtr makeHTTPSessionImpl(const std::string & host, UInt16 port, bool https, bool keep_alive)
HTTPSessionPtr makeHTTPSessionImpl(
const std::string & host,
UInt16 port,
bool https,
bool keep_alive,
Poco::Net::HTTPClientSession::ProxyConfig proxy_config = {})
{
HTTPSessionPtr session;
@ -158,6 +163,9 @@ namespace
/// doesn't work properly without patch
session->setKeepAlive(keep_alive);
session->setProxyConfig(proxy_config);
return session;
}
@ -333,13 +341,17 @@ void setResponseDefaultHeaders(HTTPServerResponse & response, size_t keep_alive_
response.set("Keep-Alive", "timeout=" + std::to_string(timeout.totalSeconds()));
}
HTTPSessionPtr makeHTTPSession(const Poco::URI & uri, const ConnectionTimeouts & timeouts)
HTTPSessionPtr makeHTTPSession(
const Poco::URI & uri,
const ConnectionTimeouts & timeouts,
Poco::Net::HTTPClientSession::ProxyConfig proxy_config
)
{
const std::string & host = uri.getHost();
UInt16 port = uri.getPort();
bool https = isHTTPS(uri);
auto session = makeHTTPSessionImpl(host, port, https, false);
auto session = makeHTTPSessionImpl(host, port, https, false, proxy_config);
setTimeouts(*session, timeouts);
return session;
}

View File

@ -69,7 +69,11 @@ void markSessionForReuse(PooledHTTPSessionPtr session);
void setResponseDefaultHeaders(HTTPServerResponse & response, size_t keep_alive_timeout);
/// Create session object to perform requests and set required parameters.
HTTPSessionPtr makeHTTPSession(const Poco::URI & uri, const ConnectionTimeouts & timeouts);
HTTPSessionPtr makeHTTPSession(
const Poco::URI & uri,
const ConnectionTimeouts & timeouts,
Poco::Net::HTTPClientSession::ProxyConfig proxy_config = {}
);
/// As previous method creates session, but tooks it from pool, without and with proxy uri.
PooledHTTPSessionPtr makePooledHTTPSession(

View File

@ -250,7 +250,8 @@ ReadWriteBufferFromHTTPBase<UpdatableSessionPtr>::ReadWriteBufferFromHTTPBase(
bool delay_initialization,
bool use_external_buffer_,
bool http_skip_not_found_url_,
std::optional<HTTPFileInfo> file_info_)
std::optional<HTTPFileInfo> file_info_,
Poco::Net::HTTPClientSession::ProxyConfig proxy_config_)
: SeekableReadBuffer(nullptr, 0)
, uri {uri_}
, method {!method_.empty() ? method_ : out_stream_callback_ ? Poco::Net::HTTPRequest::HTTP_POST : Poco::Net::HTTPRequest::HTTP_GET}
@ -265,6 +266,7 @@ ReadWriteBufferFromHTTPBase<UpdatableSessionPtr>::ReadWriteBufferFromHTTPBase(
, http_skip_not_found_url(http_skip_not_found_url_)
, settings {settings_}
, log(&Poco::Logger::get("ReadWriteBufferFromHTTP"))
, proxy_config(proxy_config_)
{
if (settings.http_max_tries <= 0 || settings.http_retry_initial_backoff_ms <= 0
|| settings.http_retry_initial_backoff_ms >= settings.http_retry_max_backoff_ms)
@ -784,9 +786,21 @@ template <typename UpdatableSessionPtr>
const std::string & ReadWriteBufferFromHTTPBase<UpdatableSessionPtr>::getCompressionMethod() const { return content_encoding; }
template <typename UpdatableSessionPtr>
std::optional<time_t> ReadWriteBufferFromHTTPBase<UpdatableSessionPtr>::getLastModificationTime()
std::optional<time_t> ReadWriteBufferFromHTTPBase<UpdatableSessionPtr>::tryGetLastModificationTime()
{
return getFileInfo().last_modified;
if (!file_info)
{
try
{
file_info = getFileInfo();
}
catch (...)
{
return std::nullopt;
}
}
return file_info->last_modified;
}
template <typename UpdatableSessionPtr>
@ -848,12 +862,12 @@ HTTPFileInfo ReadWriteBufferFromHTTPBase<UpdatableSessionPtr>::parseFileInfo(con
}
SessionFactory::SessionFactory(const ConnectionTimeouts & timeouts_)
: timeouts(timeouts_) {}
SessionFactory::SessionFactory(const ConnectionTimeouts & timeouts_, Poco::Net::HTTPClientSession::ProxyConfig proxy_config_)
: timeouts(timeouts_), proxy_config(proxy_config_) {}
SessionFactory::SessionType SessionFactory::buildNewSession(const Poco::URI & uri)
{
return makeHTTPSession(uri, timeouts);
return makeHTTPSession(uri, timeouts, proxy_config);
}
ReadWriteBufferFromHTTP::ReadWriteBufferFromHTTP(
@ -870,9 +884,10 @@ ReadWriteBufferFromHTTP::ReadWriteBufferFromHTTP(
bool delay_initialization_,
bool use_external_buffer_,
bool skip_not_found_url_,
std::optional<HTTPFileInfo> file_info_)
std::optional<HTTPFileInfo> file_info_,
Poco::Net::HTTPClientSession::ProxyConfig proxy_config_)
: Parent(
std::make_shared<SessionType>(uri_, max_redirects, std::make_shared<SessionFactory>(timeouts)),
std::make_shared<SessionType>(uri_, max_redirects, std::make_shared<SessionFactory>(timeouts, proxy_config_)),
uri_,
credentials_,
method_,
@ -884,7 +899,8 @@ ReadWriteBufferFromHTTP::ReadWriteBufferFromHTTP(
delay_initialization_,
use_external_buffer_,
skip_not_found_url_,
file_info_) {}
file_info_,
proxy_config_) {}
PooledSessionFactory::PooledSessionFactory(

View File

@ -111,6 +111,8 @@ namespace detail
ReadSettings settings;
Poco::Logger * log;
Poco::Net::HTTPClientSession::ProxyConfig proxy_config;
bool withPartialContent(const HTTPRange & range) const;
size_t getOffset() const;
@ -161,7 +163,8 @@ namespace detail
bool delay_initialization = false,
bool use_external_buffer_ = false,
bool http_skip_not_found_url_ = false,
std::optional<HTTPFileInfo> file_info_ = std::nullopt);
std::optional<HTTPFileInfo> file_info_ = std::nullopt,
Poco::Net::HTTPClientSession::ProxyConfig proxy_config_ = {});
void callWithRedirects(Poco::Net::HTTPResponse & response, const String & method_, bool throw_on_all_errors = false, bool for_object_info = false);
@ -201,7 +204,7 @@ namespace detail
const std::string & getCompressionMethod() const;
std::optional<time_t> getLastModificationTime();
std::optional<time_t> tryGetLastModificationTime();
HTTPFileInfo getFileInfo();
@ -212,13 +215,14 @@ namespace detail
class SessionFactory
{
public:
explicit SessionFactory(const ConnectionTimeouts & timeouts_);
explicit SessionFactory(const ConnectionTimeouts & timeouts_, Poco::Net::HTTPClientSession::ProxyConfig proxy_config_ = {});
using SessionType = HTTPSessionPtr;
SessionType buildNewSession(const Poco::URI & uri);
private:
ConnectionTimeouts timeouts;
Poco::Net::HTTPClientSession::ProxyConfig proxy_config;
};
class ReadWriteBufferFromHTTP : public detail::ReadWriteBufferFromHTTPBase<std::shared_ptr<UpdatableSession<SessionFactory>>>
@ -241,7 +245,8 @@ public:
bool delay_initialization_ = true,
bool use_external_buffer_ = false,
bool skip_not_found_url_ = false,
std::optional<HTTPFileInfo> file_info_ = std::nullopt);
std::optional<HTTPFileInfo> file_info_ = std::nullopt,
Poco::Net::HTTPClientSession::ProxyConfig proxy_config_ = {});
};
class PooledSessionFactory

View File

@ -24,6 +24,8 @@
#include <Common/assert_cast.h>
#include <Common/logger_useful.h>
#include <Common/ProxyConfigurationResolverProvider.h>
namespace ProfileEvents
{
@ -861,16 +863,28 @@ PocoHTTPClientConfiguration ClientFactory::createClientConfiguration( // NOLINT
bool enable_s3_requests_logging,
bool for_disk_s3,
const ThrottlerPtr & get_request_throttler,
const ThrottlerPtr & put_request_throttler)
const ThrottlerPtr & put_request_throttler,
const String & protocol)
{
return PocoHTTPClientConfiguration(
auto proxy_configuration_resolver = DB::ProxyConfigurationResolverProvider::get(DB::ProxyConfiguration::protocolFromString(protocol));
auto per_request_configuration = [=] () { return proxy_configuration_resolver->resolve(); };
auto error_report = [=] (const DB::ProxyConfiguration & req) { proxy_configuration_resolver->errorReport(req); };
auto config = PocoHTTPClientConfiguration(
per_request_configuration,
force_region,
remote_host_filter,
s3_max_redirects,
enable_s3_requests_logging,
for_disk_s3,
get_request_throttler,
put_request_throttler);
put_request_throttler,
error_report);
config.scheme = Aws::Http::SchemeMapper::FromString(protocol.c_str());
return config;
}
}

View File

@ -314,7 +314,8 @@ public:
bool enable_s3_requests_logging,
bool for_disk_s3,
const ThrottlerPtr & get_request_throttler,
const ThrottlerPtr & put_request_throttler);
const ThrottlerPtr & put_request_throttler,
const String & protocol = "https");
private:
ClientFactory();

View File

@ -532,13 +532,13 @@ S3CredentialsProviderChain::S3CredentialsProviderChain(
configuration.enable_s3_requests_logging,
configuration.for_disk_s3,
configuration.get_request_throttler,
configuration.put_request_throttler);
configuration.put_request_throttler,
Aws::Http::SchemeMapper::ToString(Aws::Http::Scheme::HTTP));
/// See MakeDefaultHttpResourceClientConfiguration().
/// This is part of EC2 metadata client, but unfortunately it can't be accessed from outside
/// of contrib/aws/aws-cpp-sdk-core/source/internal/AWSHttpResourceClient.cpp
aws_client_configuration.maxConnections = 2;
aws_client_configuration.scheme = Aws::Http::Scheme::HTTP;
/// Explicitly set the proxy settings to empty/zero to avoid relying on defaults that could potentially change
/// in the future.

View File

@ -85,20 +85,24 @@ namespace DB::S3
{
PocoHTTPClientConfiguration::PocoHTTPClientConfiguration(
std::function<DB::ProxyConfiguration()> per_request_configuration_,
const String & force_region_,
const RemoteHostFilter & remote_host_filter_,
unsigned int s3_max_redirects_,
bool enable_s3_requests_logging_,
bool for_disk_s3_,
const ThrottlerPtr & get_request_throttler_,
const ThrottlerPtr & put_request_throttler_)
: force_region(force_region_)
const ThrottlerPtr & put_request_throttler_,
std::function<void(const DB::ProxyConfiguration &)> error_report_)
: per_request_configuration(per_request_configuration_)
, force_region(force_region_)
, remote_host_filter(remote_host_filter_)
, s3_max_redirects(s3_max_redirects_)
, enable_s3_requests_logging(enable_s3_requests_logging_)
, for_disk_s3(for_disk_s3_)
, get_request_throttler(get_request_throttler_)
, put_request_throttler(put_request_throttler_)
, error_report(error_report_)
{
}
@ -262,8 +266,8 @@ void PocoHTTPClient::makeRequestInternal(
Aws::Utils::RateLimits::RateLimiterInterface * writeLimiter) const
{
/// Most sessions in pool are already connected and it is not possible to set proxy host/port to a connected session.
const auto request_configuration = per_request_configuration(request);
if (http_connection_pool_size && request_configuration.proxy_host.empty())
const auto request_configuration = per_request_configuration();
if (http_connection_pool_size && request_configuration.host.empty())
makeRequestInternalImpl<true>(request, request_configuration, response, readLimiter, writeLimiter);
else
makeRequestInternalImpl<false>(request, request_configuration, response, readLimiter, writeLimiter);
@ -272,7 +276,7 @@ void PocoHTTPClient::makeRequestInternal(
template <bool pooled>
void PocoHTTPClient::makeRequestInternalImpl(
Aws::Http::HttpRequest & request,
const ClientConfigurationPerRequest & request_configuration,
const DB::ProxyConfiguration & request_configuration,
std::shared_ptr<PocoHTTPResponse> & response,
Aws::Utils::RateLimits::RateLimiterInterface *,
Aws::Utils::RateLimits::RateLimiterInterface *) const
@ -327,7 +331,7 @@ void PocoHTTPClient::makeRequestInternalImpl(
Poco::URI target_uri(uri);
SessionPtr session;
if (!request_configuration.proxy_host.empty())
if (!request_configuration.host.empty())
{
if (enable_s3_requests_logging)
LOG_TEST(log, "Due to reverse proxy host name ({}) won't be resolved on ClickHouse side", uri);
@ -339,12 +343,12 @@ void PocoHTTPClient::makeRequestInternalImpl(
target_uri, timeouts, http_connection_pool_size, wait_on_pool_size_limit);
else
session = makeHTTPSession(target_uri, timeouts);
bool use_tunnel = request_configuration.proxy_scheme == Aws::Http::Scheme::HTTP && target_uri.getScheme() == "https";
bool use_tunnel = request_configuration.protocol == DB::ProxyConfiguration::Protocol::HTTP && target_uri.getScheme() == "https";
session->setProxy(
request_configuration.proxy_host,
request_configuration.proxy_port,
Aws::Http::SchemeMapper::ToString(request_configuration.proxy_scheme),
request_configuration.host,
request_configuration.port,
DB::ProxyConfiguration::protocolToString(request_configuration.protocol),
use_tunnel
);
}

View File

@ -9,6 +9,7 @@
#include <Common/RemoteHostFilter.h>
#include <Common/Throttler_fwd.h>
#include <Common/ProxyConfiguration.h>
#include <IO/ConnectionTimeouts.h>
#include <IO/HTTPCommon.h>
#include <IO/HTTPHeaderEntries.h>
@ -34,16 +35,9 @@ namespace DB::S3
{
class ClientFactory;
struct ClientConfigurationPerRequest
{
Aws::Http::Scheme proxy_scheme = Aws::Http::Scheme::HTTPS;
String proxy_host;
unsigned proxy_port = 0;
};
struct PocoHTTPClientConfiguration : public Aws::Client::ClientConfiguration
{
std::function<ClientConfigurationPerRequest(const Aws::Http::HttpRequest &)> per_request_configuration = [] (const Aws::Http::HttpRequest &) { return ClientConfigurationPerRequest(); };
std::function<DB::ProxyConfiguration()> per_request_configuration;
String force_region;
const RemoteHostFilter & remote_host_filter;
unsigned int s3_max_redirects;
@ -62,17 +56,19 @@ struct PocoHTTPClientConfiguration : public Aws::Client::ClientConfiguration
void updateSchemeAndRegion();
std::function<void(const ClientConfigurationPerRequest &)> error_report;
std::function<void(const DB::ProxyConfiguration &)> error_report;
private:
PocoHTTPClientConfiguration(
std::function<DB::ProxyConfiguration()> per_request_configuration_,
const String & force_region_,
const RemoteHostFilter & remote_host_filter_,
unsigned int s3_max_redirects_,
bool enable_s3_requests_logging_,
bool for_disk_s3_,
const ThrottlerPtr & get_request_throttler_,
const ThrottlerPtr & put_request_throttler_
const ThrottlerPtr & put_request_throttler_,
std::function<void(const DB::ProxyConfiguration &)> error_report_
);
/// Constructor of Aws::Client::ClientConfiguration must be called after AWS SDK initialization.
@ -165,7 +161,7 @@ private:
template <bool pooled>
void makeRequestInternalImpl(
Aws::Http::HttpRequest & request,
const ClientConfigurationPerRequest & per_request_configuration,
const DB::ProxyConfiguration & per_request_configuration,
std::shared_ptr<PocoHTTPResponse> & response,
Aws::Utils::RateLimits::RateLimiterInterface * readLimiter,
Aws::Utils::RateLimits::RateLimiterInterface * writeLimiter) const;
@ -174,8 +170,8 @@ protected:
static S3MetricKind getMetricKind(const Aws::Http::HttpRequest & request);
void addMetric(const Aws::Http::HttpRequest & request, S3MetricType type, ProfileEvents::Count amount = 1) const;
std::function<ClientConfigurationPerRequest(const Aws::Http::HttpRequest &)> per_request_configuration;
std::function<void(const ClientConfigurationPerRequest &)> error_report;
std::function<DB::ProxyConfiguration()> per_request_configuration;
std::function<void(const DB::ProxyConfiguration &)> error_report;
ConnectionTimeouts timeouts;
const RemoteHostFilter & remote_host_filter;
unsigned int s3_max_redirects;

View File

@ -26,9 +26,19 @@
#include <IO/S3/Client.h>
#include <IO/HTTPHeaderEntries.h>
#include <Storages/StorageS3Settings.h>
#include <Poco/Util/ServerApplication.h>
#include "TestPocoHTTPServer.h"
/*
* When all tests are executed together, `Context::getGlobalContextInstance()` is not null. Global context is used by
* ProxyResolvers to get proxy configuration (used by S3 clients). If global context does not have a valid ConfigRef, it relies on
* Poco::Util::Application::instance() to grab the config. However, at this point, the application is not yet initialized and
* `Poco::Util::Application::instance()` returns nullptr. This causes the test to fail. To fix this, we create a dummy application that takes
* care of initialization.
* */
[[maybe_unused]] static Poco::Util::ServerApplication app;
class NoRetryStrategy : public Aws::Client::StandardRetryStrategy
{
@ -125,7 +135,8 @@ void testServerSideEncryption(
enable_s3_requests_logging,
/* for_disk_s3 = */ false,
/* get_request_throttler = */ {},
/* put_request_throttler = */ {}
/* put_request_throttler = */ {},
uri.uri.getScheme()
);
client_configuration.endpointOverride = uri.endpoint;

View File

@ -42,11 +42,8 @@ private:
using Base = CacheBase<UInt128, UncompressedCacheCell, UInt128TrivialHash, UncompressedSizeWeightFunction>;
public:
explicit UncompressedCache(size_t max_size_in_bytes)
: Base(max_size_in_bytes) {}
UncompressedCache(const String & uncompressed_cache_policy, size_t max_size_in_bytes)
: Base(uncompressed_cache_policy, max_size_in_bytes) {}
UncompressedCache(const String & cache_policy, size_t max_size_in_bytes, double size_ratio)
: Base(cache_policy, max_size_in_bytes, 0, size_ratio) {}
/// Calculate key from path to file and offset.
static UInt128 hash(const String & path_to_file, size_t offset)

View File

@ -13,9 +13,10 @@ WriteBufferFromHTTP::WriteBufferFromHTTP(
const std::string & content_encoding,
const HTTPHeaderEntries & additional_headers,
const ConnectionTimeouts & timeouts,
size_t buffer_size_)
size_t buffer_size_,
Poco::Net::HTTPClientSession::ProxyConfig proxy_configuration)
: WriteBufferFromOStream(buffer_size_)
, session{makeHTTPSession(uri, timeouts)}
, session{makeHTTPSession(uri, timeouts, proxy_configuration)}
, request{method, uri.getPathAndQuery(), Poco::Net::HTTPRequest::HTTP_1_1}
{
request.setHost(uri.getHost());

View File

@ -25,7 +25,8 @@ public:
const std::string & content_encoding = "",
const HTTPHeaderEntries & additional_headers = {},
const ConnectionTimeouts & timeouts = {},
size_t buffer_size_ = DBMS_DEFAULT_BUFFER_SIZE);
size_t buffer_size_ = DBMS_DEFAULT_BUFFER_SIZE,
Poco::Net::HTTPClientSession::ProxyConfig proxy_configuration = {});
private:
/// Receives response from the server after sending all data.

View File

@ -175,7 +175,7 @@ public:
private:
CachePtr getHashTableStatsCache(const Params & params, const std::lock_guard<std::mutex> &)
{
if (!hash_table_stats || hash_table_stats->maxSize() != params.max_entries_for_hash_table_stats)
if (!hash_table_stats || hash_table_stats->maxSizeInBytes() != params.max_entries_for_hash_table_stats)
hash_table_stats = std::make_shared<Cache>(params.max_entries_for_hash_table_stats);
return hash_table_stats;
}

View File

@ -9,8 +9,6 @@
#include <Interpreters/Cache/LRUFileCachePriority.h>
#include <Interpreters/Context.h>
#include <base/hex.h>
#include <pcg-random/pcg_random.hpp>
#include <Common/randomSeed.h>
#include <Common/ThreadPool.h>
#include <Common/ElapsedTimeProfileEventIncrement.h>
@ -21,8 +19,11 @@ namespace fs = std::filesystem;
namespace ProfileEvents
{
extern const Event FilesystemCacheLoadMetadataMicroseconds;
extern const Event FilesystemCacheEvictedBytes;
extern const Event FilesystemCacheEvictedFileSegments;
extern const Event FilesystemCacheEvictionSkippedFileSegments;
extern const Event FilesystemCacheEvictionTries;
extern const Event FilesystemCacheLockCacheMicroseconds;
extern const Event FilesystemCacheReserveMicroseconds;
extern const Event FilesystemCacheEvictMicroseconds;
@ -42,6 +43,7 @@ size_t roundUpToMultiple(size_t num, size_t multiple)
{
return roundDownToMultiple(num + multiple - 1, multiple);
}
}
namespace DB
@ -56,6 +58,7 @@ FileCache::FileCache(const std::string & cache_name, const FileCacheSettings & s
, bypass_cache_threshold(settings.enable_bypass_cache_with_threashold ? settings.bypass_cache_threashold : 0)
, boundary_alignment(settings.boundary_alignment)
, background_download_threads(settings.background_download_threads)
, metadata_download_threads(settings.load_metadata_threads)
, log(&Poco::Logger::get("FileCache(" + cache_name + ")"))
, metadata(settings.base_path)
{
@ -165,41 +168,6 @@ FileSegments FileCache::getImpl(const LockedKey & locked_key, const FileSegment:
if (!file_segment_metadata.evicting())
{
file_segment = file_segment_metadata.file_segment;
if (file_segment->isDownloaded())
{
if (file_segment->getDownloadedSize(true) == 0)
{
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"Cannot have zero size downloaded file segments. {}",
file_segment->getInfoForLog());
}
#ifndef NDEBUG
/**
* Check that in-memory state of the cache is consistent with the state on disk.
* Check only in debug build, because such checks can be done often and can be quite
* expensive compared to overall query execution time.
*/
fs::path path = file_segment->getPathInLocalCache();
if (!fs::exists(path))
{
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"File path does not exist, but file has DOWNLOADED state. {}",
file_segment->getInfoForLog());
}
if (fs::file_size(path) == 0)
{
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"Cannot have zero size downloaded file segments. {}",
file_segment->getInfoForLog());
}
#endif
}
}
else
{
@ -551,37 +519,18 @@ KeyMetadata::iterator FileCache::addFileSegment(
result_state = state;
}
PriorityIterator cache_it;
if (state == FileSegment::State::DOWNLOADED)
auto file_segment = std::make_shared<FileSegment>(key, offset, size, result_state, settings, this, locked_key.getKeyMetadata());
auto file_segment_metadata = std::make_shared<FileSegmentMetadata>(std::move(file_segment));
auto [file_segment_metadata_it, inserted] = locked_key.getKeyMetadata()->emplace(offset, file_segment_metadata);
if (!inserted)
{
cache_it = main_priority->add(locked_key.getKeyMetadata(), offset, size, *lock);
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"Failed to insert {}:{}: entry already exists", key, offset);
}
try
{
auto file_segment = std::make_shared<FileSegment>(
key, offset, size, result_state, settings, this, locked_key.getKeyMetadata(), cache_it);
auto file_segment_metadata = std::make_shared<FileSegmentMetadata>(std::move(file_segment));
auto [file_segment_metadata_it, inserted] = locked_key.getKeyMetadata()->emplace(offset, file_segment_metadata);
if (!inserted)
{
if (cache_it)
cache_it->remove(*lock);
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"Failed to insert {}:{}: entry already exists", key, offset);
}
return file_segment_metadata_it;
}
catch (...)
{
if (cache_it)
cache_it->remove(*lock);
throw;
}
return file_segment_metadata_it;
}
bool FileCache::tryReserve(FileSegment & file_segment, const size_t size, FileCacheReserveStat & reserve_stat)
@ -669,6 +618,8 @@ bool FileCache::tryReserve(FileSegment & file_segment, const size_t size, FileCa
{
stat_by_kind.non_releasable_size += segment_metadata->size();
++stat_by_kind.non_releasable_count;
ProfileEvents::increment(ProfileEvents::FilesystemCacheEvictionSkippedFileSegments);
}
return PriorityIterationResult::CONTINUE;
@ -684,6 +635,8 @@ bool FileCache::tryReserve(FileSegment & file_segment, const size_t size, FileCa
if (is_query_priority_overflow())
{
ProfileEvents::increment(ProfileEvents::FilesystemCacheEvictionTries);
query_priority->iterate(
[&](LockedKey & locked_key, const FileSegmentMetadataPtr & segment_metadata)
{ return is_query_priority_overflow() ? iterate_func(locked_key, segment_metadata) : PriorityIterationResult::BREAK; },
@ -731,6 +684,8 @@ bool FileCache::tryReserve(FileSegment & file_segment, const size_t size, FileCa
if (is_main_priority_overflow())
{
ProfileEvents::increment(ProfileEvents::FilesystemCacheEvictionTries);
main_priority->iterate(
[&](LockedKey & locked_key, const FileSegmentMetadataPtr & segment_metadata)
{ return is_main_priority_overflow() ? iterate_func(locked_key, segment_metadata) : PriorityIterationResult::BREAK; },
@ -857,13 +812,8 @@ void FileCache::removeAllReleasable()
void FileCache::loadMetadata()
{
auto lock = lockCache();
ProfileEventTimeIncrement<Microseconds> watch(ProfileEvents::FilesystemCacheLoadMetadataMicroseconds);
UInt64 offset = 0;
size_t size = 0;
std::vector<std::pair<PriorityIterator, std::weak_ptr<FileSegment>>> queue_entries;
/// cache_base_path / key_prefix / key / offset
if (!metadata.empty())
{
throw Exception(
@ -873,149 +823,236 @@ void FileCache::loadMetadata()
"Please, check log for error messages");
}
size_t total_size = 0;
for (auto key_prefix_it = fs::directory_iterator{metadata.getBaseDirectory()}; key_prefix_it != fs::directory_iterator();
key_prefix_it++)
loadMetadataImpl();
/// Shuffle file_segment_metadatas to have random order in LRUQueue
/// as at startup all file_segment_metadatas have the same priority.
main_priority->shuffle(lockCache());
}
void FileCache::loadMetadataImpl()
{
auto get_keys_dir_to_process = [
&, key_prefix_it = fs::directory_iterator{metadata.getBaseDirectory()}, get_key_mutex = std::mutex()]
() mutable -> std::optional<fs::path>
{
const fs::path key_prefix_directory = key_prefix_it->path();
if (!key_prefix_it->is_directory())
std::lock_guard lk(get_key_mutex);
while (true)
{
if (key_prefix_directory.filename() != "status")
if (key_prefix_it == fs::directory_iterator())
return std::nullopt;
auto path = key_prefix_it->path();
if (key_prefix_it->is_directory())
{
LOG_WARNING(
log, "Unexpected file {} (not a directory), will skip it",
key_prefix_directory.string());
key_prefix_it++;
return path;
}
if (key_prefix_it->path().filename() != "status")
{
LOG_WARNING(log, "Unexpected file {} (not a directory), will skip it", path.string());
}
key_prefix_it++;
}
};
std::vector<ThreadFromGlobalPool> loading_threads;
std::exception_ptr first_exception;
std::mutex set_exception_mutex;
std::atomic<bool> stop_loading = false;
LOG_INFO(log, "Loading filesystem cache with {} threads", metadata_download_threads);
for (size_t i = 0; i < metadata_download_threads; ++i)
{
try
{
loading_threads.emplace_back([&]
{
while (!stop_loading)
{
try
{
auto path = get_keys_dir_to_process();
if (!path.has_value())
return;
loadMetadataForKeys(path.value());
}
catch (...)
{
{
std::lock_guard exception_lock(set_exception_mutex);
if (!first_exception)
first_exception = std::current_exception();
}
stop_loading = true;
return;
}
}
});
}
catch (...)
{
{
std::lock_guard exception_lock(set_exception_mutex);
if (!first_exception)
first_exception = std::current_exception();
}
stop_loading = true;
break;
}
}
for (auto & thread : loading_threads)
if (thread.joinable())
thread.join();
if (first_exception)
std::rethrow_exception(first_exception);
#ifdef ABORT_ON_LOGICAL_ERROR
assertCacheCorrectness();
#endif
}
void FileCache::loadMetadataForKeys(const fs::path & keys_dir)
{
fs::directory_iterator key_it{keys_dir};
if (key_it == fs::directory_iterator{})
{
LOG_DEBUG(log, "Removing empty key prefix directory: {}", keys_dir.string());
fs::remove(keys_dir);
return;
}
UInt64 offset = 0, size = 0;
for (; key_it != fs::directory_iterator(); key_it++)
{
const fs::path key_directory = key_it->path();
if (!key_it->is_directory())
{
LOG_DEBUG(
log,
"Unexpected file: {} (not a directory). Expected a directory",
key_directory.string());
continue;
}
fs::directory_iterator key_it{key_prefix_directory};
if (key_it == fs::directory_iterator{})
if (fs::directory_iterator{key_directory} == fs::directory_iterator{})
{
LOG_DEBUG(log, "Removing empty key prefix directory: {}", key_prefix_directory.string());
fs::remove(key_prefix_directory);
LOG_DEBUG(log, "Removing empty key directory: {}", key_directory.string());
fs::remove(key_directory);
continue;
}
for (/* key_it already initialized to verify emptiness */; key_it != fs::directory_iterator(); key_it++)
const auto key = Key::fromKeyString(key_directory.filename().string());
auto key_metadata = metadata.getKeyMetadata(key, CacheMetadata::KeyNotFoundPolicy::CREATE_EMPTY, /* is_initial_load */true);
const size_t size_limit = main_priority->getSizeLimit();
const size_t elements_limit = main_priority->getElementsLimit();
for (fs::directory_iterator offset_it{key_directory}; offset_it != fs::directory_iterator(); ++offset_it)
{
const fs::path key_directory = key_it->path();
auto offset_with_suffix = offset_it->path().filename().string();
auto delim_pos = offset_with_suffix.find('_');
bool parsed;
FileSegmentKind segment_kind = FileSegmentKind::Regular;
if (!key_it->is_directory())
if (delim_pos == std::string::npos)
parsed = tryParse<UInt64>(offset, offset_with_suffix);
else
{
LOG_DEBUG(
log,
"Unexpected file: {} (not a directory). Expected a directory",
key_directory.string());
continue;
}
if (fs::directory_iterator{key_directory} == fs::directory_iterator{})
{
LOG_DEBUG(log, "Removing empty key directory: {}", key_directory.string());
fs::remove(key_directory);
continue;
}
const auto key = Key::fromKeyString(key_directory.filename().string());
auto locked_key = metadata.lockKeyMetadata(key, CacheMetadata::KeyNotFoundPolicy::CREATE_EMPTY, /* is_initial_load */true);
for (fs::directory_iterator offset_it{key_directory}; offset_it != fs::directory_iterator(); ++offset_it)
{
auto offset_with_suffix = offset_it->path().filename().string();
auto delim_pos = offset_with_suffix.find('_');
bool parsed;
FileSegmentKind segment_kind = FileSegmentKind::Regular;
if (delim_pos == std::string::npos)
parsed = tryParse<UInt64>(offset, offset_with_suffix);
else
parsed = tryParse<UInt64>(offset, offset_with_suffix.substr(0, delim_pos));
if (offset_with_suffix.substr(delim_pos+1) == "persistent")
{
parsed = tryParse<UInt64>(offset, offset_with_suffix.substr(0, delim_pos));
if (offset_with_suffix.substr(delim_pos+1) == "persistent")
{
/// For compatibility. Persistent files are no longer supported.
fs::remove(offset_it->path());
continue;
}
if (offset_with_suffix.substr(delim_pos+1) == "temporary")
{
fs::remove(offset_it->path());
continue;
}
/// For compatibility. Persistent files are no longer supported.
fs::remove(offset_it->path());
continue;
}
if (!parsed)
{
LOG_WARNING(log, "Unexpected file: {}", offset_it->path().string());
continue; /// Or just remove? Some unexpected file.
}
size = offset_it->file_size();
if (!size)
if (offset_with_suffix.substr(delim_pos+1) == "temporary")
{
fs::remove(offset_it->path());
continue;
}
}
if ((main_priority->getSizeLimit() == 0 || main_priority->getSize(lock) + size <= main_priority->getSizeLimit())
&& (main_priority->getElementsLimit() == 0 || main_priority->getElementsCount(lock) + 1 <= main_priority->getElementsLimit()))
if (!parsed)
{
LOG_WARNING(log, "Unexpected file: {}", offset_it->path().string());
continue; /// Or just remove? Some unexpected file.
}
size = offset_it->file_size();
if (!size)
{
fs::remove(offset_it->path());
continue;
}
bool limits_satisfied;
IFileCachePriority::Iterator cache_it;
{
auto lock = lockCache();
limits_satisfied = (size_limit == 0 || main_priority->getSize(lock) + size <= size_limit)
&& (elements_limit == 0 || main_priority->getElementsCount(lock) + 1 <= elements_limit);
if (limits_satisfied)
cache_it = main_priority->add(key_metadata, offset, size, lock);
/// TODO: we can get rid of this lockCache() if we first load everything in parallel
/// without any mutual lock between loading threads, and only after do removeOverflow().
/// This will be better because overflow here may
/// happen only if cache configuration changed and max_size because less than it was.
}
if (limits_satisfied)
{
bool inserted = false;
try
{
KeyMetadata::iterator file_segment_metadata_it;
try
{
file_segment_metadata_it = addFileSegment(
*locked_key, offset, size, FileSegment::State::DOWNLOADED, CreateFileSegmentSettings(segment_kind), &lock);
}
catch (...)
{
tryLogCurrentException(__PRETTY_FUNCTION__);
chassert(false);
auto file_segment = std::make_shared<FileSegment>(key, offset, size,
FileSegment::State::DOWNLOADED,
CreateFileSegmentSettings(segment_kind),
this,
key_metadata,
cache_it);
fs::remove(offset_it->path());
continue;
}
inserted = key_metadata->emplace(offset, std::make_shared<FileSegmentMetadata>(std::move(file_segment))).second;
}
catch (...)
{
tryLogCurrentException(__PRETTY_FUNCTION__);
chassert(false);
}
if (inserted)
{
LOG_TEST(log, "Added file segment {}:{} (size: {}) with path: {}", key, offset, size, offset_it->path().string());
const auto & file_segment_metadata = file_segment_metadata_it->second;
chassert(file_segment_metadata->file_segment->assertCorrectness());
total_size += size;
queue_entries.emplace_back(
file_segment_metadata->getQueueIterator(),
file_segment_metadata->file_segment);
}
else
{
LOG_WARNING(
log,
"Cache capacity changed (max size: {}, used: {}), "
"cached file `{}` does not fit in cache anymore (size: {})",
main_priority->getSizeLimit(), main_priority->getSize(lock), offset_it->path().string(), size);
cache_it->remove(lockCache());
fs::remove(offset_it->path());
chassert(false);
}
}
else
{
LOG_WARNING(
log,
"Cache capacity changed (max size: {}), "
"cached file `{}` does not fit in cache anymore (size: {})",
main_priority->getSizeLimit(), offset_it->path().string(), size);
fs::remove(offset_it->path());
}
}
}
chassert(total_size == main_priority->getSize(lock));
chassert(total_size <= main_priority->getSizeLimit());
/// Shuffle file_segment_metadatas to have random order in LRUQueue
/// as at startup all file_segment_metadatas have the same priority.
pcg64 generator(randomSeed());
std::shuffle(queue_entries.begin(), queue_entries.end(), generator);
for (auto & [it, file_segment] : queue_entries)
{
/// Cache size changed and, for example, 1st file segment fits into cache
/// and 2nd file segment will fit only if first was evicted, then first will be removed and
/// file_segment_metadata is nullptr here.
if (file_segment.expired())
continue;
it->use(lock);
if (key_metadata->empty())
metadata.removeKey(key, false, false);
}
}
@ -1037,7 +1074,7 @@ void FileCache::deactivateBackgroundOperations()
cleanup_thread->join();
}
FileSegmentsHolderPtr FileCache::getSnapshot()
FileSegments FileCache::getSnapshot()
{
assertInitialized();
#ifndef NDEBUG
@ -1050,19 +1087,19 @@ FileSegmentsHolderPtr FileCache::getSnapshot()
for (const auto & [_, file_segment_metadata] : locked_key)
file_segments.push_back(FileSegment::getSnapshot(file_segment_metadata->file_segment));
});
return std::make_unique<FileSegmentsHolder>(std::move(file_segments), /* complete_on_dtor */false);
return file_segments;
}
FileSegmentsHolderPtr FileCache::getSnapshot(const Key & key)
FileSegments FileCache::getSnapshot(const Key & key)
{
FileSegments file_segments;
auto locked_key = metadata.lockKeyMetadata(key, CacheMetadata::KeyNotFoundPolicy::THROW_LOGICAL);
for (const auto & [_, file_segment_metadata] : *locked_key->getKeyMetadata())
file_segments.push_back(FileSegment::getSnapshot(file_segment_metadata->file_segment));
return std::make_unique<FileSegmentsHolder>(std::move(file_segments));
return file_segments;
}
FileSegmentsHolderPtr FileCache::dumpQueue()
FileSegments FileCache::dumpQueue()
{
assertInitialized();
@ -1073,7 +1110,7 @@ FileSegmentsHolderPtr FileCache::dumpQueue()
return PriorityIterationResult::CONTINUE;
}, lockCache());
return std::make_unique<FileSegmentsHolder>(std::move(file_segments));
return file_segments;
}
std::vector<String> FileCache::tryGetCachePaths(const Key & key)
@ -1148,4 +1185,15 @@ FileCache::QueryContextHolderPtr FileCache::getQueryContextHolder(
return std::make_unique<QueryContextHolder>(query_id, this, std::move(context));
}
FileSegments FileCache::sync()
{
FileSegments file_segments;
metadata.iterate([&](LockedKey & locked_key)
{
auto broken = locked_key.sync();
file_segments.insert(file_segments.end(), broken.begin(), broken.end());
});
return file_segments;
}
}

View File

@ -124,11 +124,11 @@ public:
bool tryReserve(FileSegment & file_segment, size_t size, FileCacheReserveStat & stat);
FileSegmentsHolderPtr getSnapshot();
FileSegments getSnapshot();
FileSegmentsHolderPtr getSnapshot(const Key & key);
FileSegments getSnapshot(const Key & key);
FileSegmentsHolderPtr dumpQueue();
FileSegments dumpQueue();
void deactivateBackgroundOperations();
@ -150,6 +150,8 @@ public:
CacheGuard::Lock lockCache() const;
FileSegments sync();
private:
using KeyAndOffset = FileCacheKeyAndOffset;
@ -157,6 +159,7 @@ private:
const size_t bypass_cache_threshold = 0;
const size_t boundary_alignment;
const size_t background_download_threads;
const size_t metadata_download_threads;
Poco::Logger * log;
@ -207,6 +210,8 @@ private:
void assertCacheCorrectness();
void loadMetadata();
void loadMetadataImpl();
void loadMetadataForKeys(const std::filesystem::path & keys_dir);
FileSegments getImpl(const LockedKey & locked_key, const FileSegment::Range & range) const;

View File

@ -49,6 +49,9 @@ void FileCacheSettings::loadFromConfig(const Poco::Util::AbstractConfiguration &
if (config.has(config_prefix + ".background_download_threads"))
background_download_threads = config.getUInt(config_prefix + ".background_download_threads");
if (config.has(config_prefix + ".load_metadata_threads"))
load_metadata_threads = config.getUInt(config_prefix + ".load_metadata_threads");
}
}

View File

@ -28,6 +28,8 @@ struct FileCacheSettings
size_t boundary_alignment = FILECACHE_DEFAULT_FILE_SEGMENT_ALIGNMENT;
size_t background_download_threads = FILECACHE_DEFAULT_BACKGROUND_DOWNLOAD_THREADS;
size_t load_metadata_threads = FILECACHE_DEFAULT_LOAD_METADATA_THREADS;
void loadFromConfig(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix);
};

View File

@ -7,6 +7,7 @@ namespace DB
static constexpr int FILECACHE_DEFAULT_MAX_FILE_SEGMENT_SIZE = 32 * 1024 * 1024; /// 32Mi
static constexpr int FILECACHE_DEFAULT_FILE_SEGMENT_ALIGNMENT = 4 * 1024 * 1024; /// 4Mi
static constexpr int FILECACHE_DEFAULT_BACKGROUND_DOWNLOAD_THREADS = 2;
static constexpr int FILECACHE_DEFAULT_LOAD_METADATA_THREADS = 1;
static constexpr int FILECACHE_DEFAULT_MAX_ELEMENTS = 10000000;
static constexpr int FILECACHE_DEFAULT_HITS_THRESHOLD = 0;
static constexpr size_t FILECACHE_BYPASS_THRESHOLD = 256 * 1024 * 1024;

View File

@ -151,23 +151,13 @@ void FileSegment::setQueueIterator(Priority::Iterator iterator)
queue_iterator = iterator;
}
size_t FileSegment::getFirstNonDownloadedOffset(bool sync) const
size_t FileSegment::getCurrentWriteOffset() const
{
return range().left + getDownloadedSize(sync);
return range().left + downloaded_size;
}
size_t FileSegment::getCurrentWriteOffset(bool sync) const
size_t FileSegment::getDownloadedSize() const
{
return getFirstNonDownloadedOffset(sync);
}
size_t FileSegment::getDownloadedSize(bool sync) const
{
if (sync)
{
std::lock_guard lock(download_mutex);
return downloaded_size;
}
return downloaded_size;
}
@ -231,7 +221,7 @@ void FileSegment::resetDownloadingStateUnlocked(const FileSegmentGuard::Lock & l
assert(isDownloaderUnlocked(lock));
assert(download_state == State::DOWNLOADING);
size_t current_downloaded_size = getDownloadedSize(true);
size_t current_downloaded_size = getDownloadedSize();
/// range().size() can equal 0 in case of write-though cache.
if (!is_unbound && current_downloaded_size != 0 && current_downloaded_size == range().size())
setDownloadedUnlocked(lock);
@ -345,14 +335,14 @@ void FileSegment::write(const char * from, size_t size, size_t offset)
ErrorCodes::LOGICAL_ERROR,
"Expected DOWNLOADING state, got {}", stateToString(download_state));
size_t first_non_downloaded_offset = getFirstNonDownloadedOffset(false);
size_t first_non_downloaded_offset = getCurrentWriteOffset();
if (offset != first_non_downloaded_offset)
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"Attempt to write {} bytes to offset: {}, but current write offset is {}",
size, offset, first_non_downloaded_offset);
size_t current_downloaded_size = getDownloadedSize(false);
size_t current_downloaded_size = getDownloadedSize();
chassert(reserved_size >= current_downloaded_size);
size_t free_reserved_size = reserved_size - current_downloaded_size;
@ -379,13 +369,9 @@ void FileSegment::write(const char * from, size_t size, size_t offset)
try
{
cache_writer->write(from, size);
std::lock_guard lock(download_mutex);
cache_writer->next();
downloaded_size += size;
chassert(std::filesystem::file_size(file_segment_path) == downloaded_size);
}
catch (ErrnoException & e)
@ -416,7 +402,7 @@ void FileSegment::write(const char * from, size_t size, size_t offset)
throw;
}
chassert(getFirstNonDownloadedOffset(false) == offset + size);
chassert(getCurrentWriteOffset() == offset + size);
}
FileSegment::State FileSegment::wait(size_t offset)
@ -425,7 +411,7 @@ FileSegment::State FileSegment::wait(size_t offset)
auto lock = lockFileSegment();
if (downloader_id.empty() || offset < getCurrentWriteOffset(true))
if (downloader_id.empty() || offset < getCurrentWriteOffset())
return download_state;
if (download_state == State::EMPTY)
@ -441,7 +427,7 @@ FileSegment::State FileSegment::wait(size_t offset)
[[maybe_unused]] const auto ok = cv.wait_for(lock, std::chrono::seconds(60), [&, this]()
{
return download_state != State::DOWNLOADING || offset < getCurrentWriteOffset(false);
return download_state != State::DOWNLOADING || offset < getCurrentWriteOffset();
});
/// chassert(ok);
}
@ -490,7 +476,7 @@ bool FileSegment::reserve(size_t size_to_reserve, FileCacheReserveStat * reserve
assertNotDetachedUnlocked(lock);
assertIsDownloaderUnlocked("reserve", lock);
expected_downloaded_size = getDownloadedSize(false);
expected_downloaded_size = getDownloadedSize();
is_file_segment_size_exceeded = expected_downloaded_size + size_to_reserve > range().size();
if (is_file_segment_size_exceeded && !is_unbound)
@ -614,7 +600,7 @@ void FileSegment::complete()
const bool is_downloader = isDownloaderUnlocked(segment_lock);
const bool is_last_holder = locked_key->isLastOwnerOfFileSegment(offset());
const size_t current_downloaded_size = getDownloadedSize(true);
const size_t current_downloaded_size = getDownloadedSize();
SCOPE_EXIT({
if (is_downloader)
@ -735,11 +721,10 @@ String FileSegment::getInfoForLogUnlocked(const FileSegmentGuard::Lock &) const
info << "File segment: " << range().toString() << ", ";
info << "key: " << key().toString() << ", ";
info << "state: " << download_state.load() << ", ";
info << "downloaded size: " << getDownloadedSize(false) << ", ";
info << "downloaded size: " << getDownloadedSize() << ", ";
info << "reserved size: " << reserved_size.load() << ", ";
info << "downloader id: " << (downloader_id.empty() ? "None" : downloader_id) << ", ";
info << "current write offset: " << getCurrentWriteOffset(false) << ", ";
info << "first non-downloaded offset: " << getFirstNonDownloadedOffset(false) << ", ";
info << "current write offset: " << getCurrentWriteOffset() << ", ";
info << "caller id: " << getCallerId() << ", ";
info << "kind: " << toString(segment_kind) << ", ";
info << "unbound: " << is_unbound;
@ -791,6 +776,8 @@ bool FileSegment::assertCorrectnessUnlocked(const FileSegmentGuard::Lock &) cons
{
chassert(downloader_id.empty());
chassert(downloaded_size == reserved_size);
chassert(downloaded_size == range().size());
chassert(downloaded_size > 0);
chassert(std::filesystem::file_size(getPathInLocalCache()) > 0);
chassert(queue_iterator);
check_iterator(queue_iterator);
@ -844,7 +831,7 @@ FileSegmentPtr FileSegment::getSnapshot(const FileSegmentPtr & file_segment)
CreateFileSegmentSettings(file_segment->getKind(), file_segment->is_unbound));
snapshot->hits_count = file_segment->getHitsCount();
snapshot->downloaded_size = file_segment->getDownloadedSize(false);
snapshot->downloaded_size = file_segment->getDownloadedSize();
snapshot->download_state = file_segment->download_state.load();
snapshot->ref_count = file_segment.use_count();
@ -882,8 +869,15 @@ void FileSegment::setDetachedState(const FileSegmentGuard::Lock & lock)
key_metadata.reset();
cache = nullptr;
queue_iterator = nullptr;
cache_writer.reset();
remote_file_reader.reset();
try
{
cache_writer.reset();
remote_file_reader.reset();
}
catch (...)
{
tryLogCurrentException(__PRETTY_FUNCTION__);
}
}
void FileSegment::detach(const FileSegmentGuard::Lock & lock, const LockedKey &)

View File

@ -178,11 +178,9 @@ public:
size_t getRefCount() const { return ref_count; }
size_t getCurrentWriteOffset(bool sync) const;
size_t getCurrentWriteOffset() const;
size_t getFirstNonDownloadedOffset(bool sync) const;
size_t getDownloadedSize(bool sync) const;
size_t getDownloadedSize() const;
size_t getReservedSize() const;
@ -302,7 +300,6 @@ private:
/// downloaded_size should always be less or equal to reserved_size
std::atomic<size_t> downloaded_size = 0;
std::atomic<size_t> reserved_size = 0;
mutable std::mutex download_mutex;
mutable FileSegmentGuard segment_guard;
std::weak_ptr<KeyMetadata> key_metadata;

View File

@ -88,6 +88,8 @@ public:
/// From lowest to highest priority.
virtual void iterate(IterateFunc && func, const CacheGuard::Lock &) = 0;
virtual void shuffle(const CacheGuard::Lock &) = 0;
private:
const size_t max_size = 0;
const size_t max_elements = 0;

View File

@ -3,6 +3,7 @@
#include <Common/CurrentMetrics.h>
#include <Common/randomSeed.h>
#include <Common/logger_useful.h>
#include <pcg-random/pcg_random.hpp>
namespace CurrentMetrics
{
@ -213,4 +214,16 @@ void LRUFileCachePriority::LRUFileCacheIterator::checkUsable() const
throw Exception(ErrorCodes::LOGICAL_ERROR, "Attempt to use invalid iterator");
}
void LRUFileCachePriority::shuffle(const CacheGuard::Lock &)
{
std::vector<LRUQueueIterator> its;
its.reserve(queue.size());
for (auto it = queue.begin(); it != queue.end(); ++it)
its.push_back(it);
pcg64 generator(randomSeed());
std::shuffle(its.begin(), its.end(), generator);
for (auto & it : its)
queue.splice(queue.end(), queue, it);
}
}

View File

@ -4,6 +4,7 @@
#include <Interpreters/Cache/IFileCachePriority.h>
#include <Interpreters/Cache/FileCacheKey.h>
#include <Common/logger_useful.h>
#include "Interpreters/Cache/Guards.h"
namespace CurrentMetrics
{
@ -40,6 +41,8 @@ public:
void iterate(IterateFunc && func, const CacheGuard::Lock &) override;
void shuffle(const CacheGuard::Lock &) override;
private:
void updateElementsCount(int64_t num);
void updateSize(int64_t size);

View File

@ -128,7 +128,7 @@ bool KeyMetadata::createBaseDirectory()
return true;
}
std::string KeyMetadata::getFileSegmentPath(const FileSegment & file_segment)
std::string KeyMetadata::getFileSegmentPath(const FileSegment & file_segment) const
{
return fs::path(key_path)
/ CacheMetadata::getFileNameForFileSegment(file_segment.offset(), file_segment.getKind());
@ -179,27 +179,9 @@ LockedKeyPtr CacheMetadata::lockKeyMetadata(
KeyNotFoundPolicy key_not_found_policy,
bool is_initial_load)
{
KeyMetadataPtr key_metadata;
{
auto lock = lockMetadata();
auto it = find(key);
if (it == end())
{
if (key_not_found_policy == KeyNotFoundPolicy::THROW)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "No such key `{}` in cache", key);
else if (key_not_found_policy == KeyNotFoundPolicy::THROW_LOGICAL)
throw Exception(ErrorCodes::LOGICAL_ERROR, "No such key `{}` in cache", key);
else if (key_not_found_policy == KeyNotFoundPolicy::RETURN_NULL)
return nullptr;
it = emplace(
key, std::make_shared<KeyMetadata>(
key, getPathForKey(key), cleanup_queue, download_queue, log, key_prefix_directory_mutex, is_initial_load)).first;
}
key_metadata = it->second;
}
auto key_metadata = getKeyMetadata(key, key_not_found_policy, is_initial_load);
if (!key_metadata)
return nullptr;
{
auto locked_metadata = key_metadata->lockNoStateCheck();
@ -233,6 +215,29 @@ LockedKeyPtr CacheMetadata::lockKeyMetadata(
return lockKeyMetadata(key, key_not_found_policy);
}
KeyMetadataPtr CacheMetadata::getKeyMetadata(
const Key & key,
KeyNotFoundPolicy key_not_found_policy,
bool is_initial_load)
{
auto lock = lockMetadata();
auto it = find(key);
if (it == end())
{
if (key_not_found_policy == KeyNotFoundPolicy::THROW)
throw Exception(ErrorCodes::LOGICAL_ERROR, "No such key `{}` in cache", key);
else if (key_not_found_policy == KeyNotFoundPolicy::RETURN_NULL)
return nullptr;
it = emplace(
key, std::make_shared<KeyMetadata>(
key, getPathForKey(key), cleanup_queue, download_queue, log, key_prefix_directory_mutex, is_initial_load)).first;
}
return it->second;
}
void CacheMetadata::iterate(IterateFunc && func)
{
auto lock = lockMetadata();
@ -563,12 +568,12 @@ void CacheMetadata::downloadImpl(FileSegment & file_segment, std::optional<Memor
if (file_segment.getOrSetDownloader() != FileSegment::getCallerId())
return;
if (file_segment.getDownloadedSize(false) == file_segment.range().size())
if (file_segment.getDownloadedSize() == file_segment.range().size())
throw Exception(ErrorCodes::LOGICAL_ERROR, "File segment is already fully downloaded");
LOG_TEST(
log, "Downloading {} bytes for file segment {}",
file_segment.range().size() - file_segment.getDownloadedSize(false), file_segment.getInfoForLog());
file_segment.range().size() - file_segment.getDownloadedSize(), file_segment.getInfoForLog());
auto reader = file_segment.getRemoteFileReader();
@ -589,7 +594,7 @@ void CacheMetadata::downloadImpl(FileSegment & file_segment, std::optional<Memor
reader->set(memory->data(), memory->size());
}
size_t offset = file_segment.getCurrentWriteOffset(false);
size_t offset = file_segment.getCurrentWriteOffset();
if (offset != static_cast<size_t>(reader->getPosition()))
reader->seek(offset, SEEK_SET);
@ -603,7 +608,7 @@ void CacheMetadata::downloadImpl(FileSegment & file_segment, std::optional<Memor
log, "Failed to reserve space during background download "
"for {}:{} (downloaded size: {}/{})",
file_segment.key(), file_segment.offset(),
file_segment.getDownloadedSize(false), file_segment.range().size());
file_segment.getDownloadedSize(), file_segment.range().size());
return;
}
@ -704,26 +709,26 @@ bool LockedKey::removeAllFileSegments(bool if_releasable)
return removed_all;
}
KeyMetadata::iterator LockedKey::removeFileSegment(size_t offset)
KeyMetadata::iterator LockedKey::removeFileSegment(size_t offset, bool can_be_broken)
{
auto it = key_metadata->find(offset);
if (it == key_metadata->end())
throw Exception(ErrorCodes::BAD_ARGUMENTS, "There is no offset {}", offset);
auto file_segment = it->second->file_segment;
return removeFileSegmentImpl(it, file_segment->lock());
return removeFileSegmentImpl(it, file_segment->lock(), can_be_broken);
}
KeyMetadata::iterator LockedKey::removeFileSegment(size_t offset, const FileSegmentGuard::Lock & segment_lock)
KeyMetadata::iterator LockedKey::removeFileSegment(size_t offset, const FileSegmentGuard::Lock & segment_lock, bool can_be_broken)
{
auto it = key_metadata->find(offset);
if (it == key_metadata->end())
throw Exception(ErrorCodes::LOGICAL_ERROR, "There is no offset {}", offset);
throw Exception(ErrorCodes::BAD_ARGUMENTS, "There is no offset {} in key {}", offset, getKey());
return removeFileSegmentImpl(it, segment_lock);
return removeFileSegmentImpl(it, segment_lock, can_be_broken);
}
KeyMetadata::iterator LockedKey::removeFileSegmentImpl(KeyMetadata::iterator it, const FileSegmentGuard::Lock & segment_lock)
KeyMetadata::iterator LockedKey::removeFileSegmentImpl(KeyMetadata::iterator it, const FileSegmentGuard::Lock & segment_lock, bool can_be_broken)
{
auto file_segment = it->second->file_segment;
@ -731,30 +736,45 @@ KeyMetadata::iterator LockedKey::removeFileSegmentImpl(KeyMetadata::iterator it,
key_metadata->log, "Remove from cache. Key: {}, offset: {}, size: {}",
getKey(), file_segment->offset(), file_segment->reserved_size);
chassert(file_segment->assertCorrectnessUnlocked(segment_lock));
chassert(can_be_broken || file_segment->assertCorrectnessUnlocked(segment_lock));
if (file_segment->queue_iterator)
file_segment->queue_iterator->invalidate();
file_segment->detach(segment_lock, *this);
const auto path = key_metadata->getFileSegmentPath(*file_segment);
bool exists = fs::exists(path);
if (exists)
try
{
fs::remove(path);
const auto path = key_metadata->getFileSegmentPath(*file_segment);
bool exists = fs::exists(path);
if (exists)
{
fs::remove(path);
/// Clear OpenedFileCache to avoid reading from incorrect file descriptor.
int flags = file_segment->getFlagsForLocalRead();
/// Files are created with flags from file_segment->getFlagsForLocalRead()
/// plus optionally O_DIRECT is added, depends on query setting, so remove both.
OpenedFileCache::instance().remove(path, flags);
OpenedFileCache::instance().remove(path, flags | O_DIRECT);
/// Clear OpenedFileCache to avoid reading from incorrect file descriptor.
int flags = file_segment->getFlagsForLocalRead();
/// Files are created with flags from file_segment->getFlagsForLocalRead()
/// plus optionally O_DIRECT is added, depends on query setting, so remove both.
OpenedFileCache::instance().remove(path, flags);
OpenedFileCache::instance().remove(path, flags | O_DIRECT);
LOG_TEST(key_metadata->log, "Removed file segment at path: {}", path);
LOG_TEST(key_metadata->log, "Removed file segment at path: {}", path);
}
else if (file_segment->downloaded_size && !can_be_broken)
{
#ifdef ABORT_ON_LOGICAL_ERROR
throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected path {} to exist", path);
#else
LOG_WARNING(key_metadata->log, "Expected path {} to exist, while removing {}:{}",
path, getKey(), file_segment->offset());
#endif
}
}
catch (...)
{
tryLogCurrentException(__PRETTY_FUNCTION__);
chassert(false);
}
else if (file_segment->downloaded_size)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected path {} to exist", path);
return key_metadata->erase(it);
}
@ -772,7 +792,7 @@ void LockedKey::shrinkFileSegmentToDownloadedSize(
const auto & file_segment = metadata->file_segment;
chassert(file_segment->assertCorrectnessUnlocked(segment_lock));
const size_t downloaded_size = file_segment->getDownloadedSize(false);
const size_t downloaded_size = file_segment->getDownloadedSize();
if (downloaded_size == file_segment->range().size())
{
throw Exception(
@ -870,4 +890,56 @@ std::string LockedKey::toString() const
return result;
}
FileSegments LockedKey::sync()
{
FileSegments broken;
for (auto it = key_metadata->begin(); it != key_metadata->end();)
{
auto file_segment = it->second->file_segment;
if (file_segment->isDetached())
{
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"File segment has unexpected state: DETACHED ({})", file_segment->getInfoForLog());
}
if (file_segment->getDownloadedSize() == 0)
{
++it;
continue;
}
const auto & path = key_metadata->getFileSegmentPath(*file_segment);
if (!fs::exists(path))
{
LOG_WARNING(
key_metadata->log,
"File segment has DOWNLOADED state, but file does not exist ({})",
file_segment->getInfoForLog());
broken.push_back(FileSegment::getSnapshot(file_segment));
it = removeFileSegment(file_segment->offset(), file_segment->lock(), /* can_be_broken */true);
continue;
}
const size_t actual_size = fs::file_size(path);
const size_t expected_size = file_segment->getDownloadedSize();
if (actual_size == expected_size)
{
++it;
continue;
}
LOG_WARNING(
key_metadata->log,
"File segment has unexpected size. Having {}, expected {} ({})",
actual_size, expected_size, file_segment->getInfoForLog());
broken.push_back(FileSegment::getSnapshot(file_segment));
it = removeFileSegment(file_segment->offset(), file_segment->lock(), /* can_be_broken */false);
}
return broken;
}
}

View File

@ -73,7 +73,7 @@ struct KeyMetadata : public std::map<size_t, FileSegmentMetadataPtr>,
bool createBaseDirectory();
std::string getFileSegmentPath(const FileSegment & file_segment);
std::string getFileSegmentPath(const FileSegment & file_segment) const;
private:
KeyState key_state = KeyState::ACTIVE;
@ -116,6 +116,11 @@ public:
RETURN_NULL,
};
KeyMetadataPtr getKeyMetadata(
const Key & key,
KeyNotFoundPolicy key_not_found_policy,
bool is_initial_load = false);
LockedKeyPtr lockKeyMetadata(
const Key & key,
KeyNotFoundPolicy key_not_found_policy,
@ -192,8 +197,8 @@ struct LockedKey : private boost::noncopyable
bool removeAllFileSegments(bool if_releasable = true);
KeyMetadata::iterator removeFileSegment(size_t offset, const FileSegmentGuard::Lock &);
KeyMetadata::iterator removeFileSegment(size_t offset);
KeyMetadata::iterator removeFileSegment(size_t offset, const FileSegmentGuard::Lock &, bool can_be_broken = false);
KeyMetadata::iterator removeFileSegment(size_t offset, bool can_be_broken = false);
void shrinkFileSegmentToDownloadedSize(size_t offset, const FileSegmentGuard::Lock &);
@ -207,10 +212,12 @@ struct LockedKey : private boost::noncopyable
void markAsRemoved();
FileSegments sync();
std::string toString() const;
private:
KeyMetadata::iterator removeFileSegmentImpl(KeyMetadata::iterator it, const FileSegmentGuard::Lock &);
KeyMetadata::iterator removeFileSegmentImpl(KeyMetadata::iterator it, const FileSegmentGuard::Lock &, bool can_be_broken = false);
const std::shared_ptr<KeyMetadata> key_metadata;
KeyGuard::Lock lock; /// `lock` must be destructed before `key_metadata`.

View File

@ -480,7 +480,7 @@ QueryCache::QueryCache(size_t max_size_in_bytes, size_t max_entries, size_t max_
void QueryCache::updateConfiguration(size_t max_size_in_bytes, size_t max_entries, size_t max_entry_size_in_bytes_, size_t max_entry_size_in_rows_)
{
std::lock_guard lock(mutex);
cache.setMaxSize(max_size_in_bytes);
cache.setMaxSizeInBytes(max_size_in_bytes);
cache.setMaxCount(max_entries);
max_entry_size_in_bytes = max_entry_size_in_bytes_;
max_entry_size_in_rows = max_entry_size_in_rows_;
@ -510,9 +510,9 @@ void QueryCache::clear()
times_executed.clear();
}
size_t QueryCache::weight() const
size_t QueryCache::sizeInBytes() const
{
return cache.weight();
return cache.sizeInBytes();
}
size_t QueryCache::count() const

View File

@ -182,7 +182,7 @@ public:
void clear();
size_t weight() const;
size_t sizeInBytes() const;
size_t count() const;
/// Record new execution of query represented by key. Returns number of executions so far.

View File

@ -9,6 +9,7 @@
#include "config_version.h"
#include <format>
namespace DB
{
@ -18,7 +19,6 @@ namespace ErrorCodes
extern const int LOGICAL_ERROR;
}
void ClientInfo::write(WriteBuffer & out, UInt64 server_protocol_revision) const
{
if (server_protocol_revision < DBMS_MIN_REVISION_WITH_CLIENT_INFO)
@ -199,6 +199,20 @@ void ClientInfo::setInitialQuery()
client_name = (VERSION_NAME " ") + client_name;
}
bool ClientInfo::clientVersionEquals(const ClientInfo & other, bool compare_patch) const
{
bool patch_equals = compare_patch ? client_version_patch == other.client_version_patch : true;
return client_version_major == other.client_version_major &&
client_version_minor == other.client_version_minor &&
patch_equals &&
client_tcp_protocol_version == other.client_tcp_protocol_version;
}
String ClientInfo::getVersionStr() const
{
return std::format("{}.{}.{} ({})", client_version_major, client_version_minor, client_version_patch, client_tcp_protocol_version);
}
void ClientInfo::fillOSUserHostNameAndVersionInfo()
{
@ -216,5 +230,27 @@ void ClientInfo::fillOSUserHostNameAndVersionInfo()
client_tcp_protocol_version = DBMS_TCP_PROTOCOL_VERSION;
}
String toString(ClientInfo::Interface interface)
{
switch (interface)
{
case ClientInfo::Interface::TCP:
return "TCP";
case ClientInfo::Interface::HTTP:
return "HTTP";
case ClientInfo::Interface::GRPC:
return "GRPC";
case ClientInfo::Interface::MYSQL:
return "MYSQL";
case ClientInfo::Interface::POSTGRESQL:
return "POSTGRESQL";
case ClientInfo::Interface::LOCAL:
return "LOCAL";
case ClientInfo::Interface::TCP_INTERSERVER:
return "TCP_INTERSERVER";
}
return std::format("Unknown {}!\n", static_cast<int>(interface));
}
}

Some files were not shown because too many files have changed in this diff Show More