Merge branch 'master' into chesema-dedup-matview

This commit is contained in:
Sema Checherinda 2024-06-29 01:35:32 +02:00
commit abe58184df
177 changed files with 2192 additions and 1519 deletions

View File

@ -167,10 +167,16 @@ jobs:
steps:
- name: Check out repository code
uses: ClickHouse/checkout@v1
- name: Download reports
run: |
python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --infile ${{ toJson(needs.RunConfig.outputs.data) }} --pre --job-name Builds
- name: Builds report
run: |
cd "$GITHUB_WORKSPACE/tests/ci"
python3 ./build_report_check.py --reports package_release package_aarch64 package_asan package_tsan package_debug binary_darwin binary_darwin_aarch64
- name: Set status
run: |
python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --infile ${{ toJson(needs.RunConfig.outputs.data) }} --post --job-name Builds
############################################################################################
#################################### INSTALL PACKAGES ######################################
############################################################################################

29
.github/workflows/create_release.yml vendored Normal file
View File

@ -0,0 +1,29 @@
name: CreateRelease
concurrency:
group: release
'on':
workflow_dispatch:
inputs:
sha:
description: 'The SHA hash of the commit from which to create the release'
required: true
type: string
type:
description: 'The type of release: "new" for a new release or "patch" for a patch release'
required: true
type: choice
options:
- new
- patch
jobs:
Release:
runs-on: [self-hosted, style-checker-aarch64]
steps:
- name: Check out repository code
uses: ClickHouse/checkout@v1
- name: Print greeting
run: |
python3 ./tests/ci/release.py --commit ${{ inputs.sha }} --type ${{ inputs.type }} --dry-run

View File

@ -184,10 +184,16 @@ jobs:
steps:
- name: Check out repository code
uses: ClickHouse/checkout@v1
- name: Download reports
run: |
python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --infile ${{ toJson(needs.RunConfig.outputs.data) }} --pre --job-name Builds
- name: Builds report
run: |
cd "$GITHUB_WORKSPACE/tests/ci"
python3 ./build_report_check.py --reports package_release package_aarch64 package_asan package_msan package_ubsan package_tsan package_debug binary_darwin binary_darwin_aarch64
- name: Set status
run: |
python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --infile ${{ toJson(needs.RunConfig.outputs.data) }} --post --job-name Builds
MarkReleaseReady:
if: ${{ !failure() && !cancelled() }}
needs:

View File

@ -29,17 +29,17 @@
* Added `merge_workload` and `mutation_workload` settings to regulate how resources are utilized and shared between merges, mutations and other workloads. [#64061](https://github.com/ClickHouse/ClickHouse/pull/64061) ([Sergei Trifonov](https://github.com/serxa)).
* Add support for comparing IPv4 and IPv6 types using the `=` operator. [#64292](https://github.com/ClickHouse/ClickHouse/pull/64292) ([Francisco J. Jurado Moreno](https://github.com/Beetelbrox)).
* Allow to store named collections in zookeeper. [#64574](https://github.com/ClickHouse/ClickHouse/pull/64574) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Support decimal arguments in binary math functions (pow(), atan2(), max2, min2(), hypot(). [#64582](https://github.com/ClickHouse/ClickHouse/pull/64582) ([Mikhail Gorshkov](https://github.com/mgorshkov)).
* Support decimal arguments in binary math functions (pow, atan2, max2, min2, hypot). [#64582](https://github.com/ClickHouse/ClickHouse/pull/64582) ([Mikhail Gorshkov](https://github.com/mgorshkov)).
* Add support for index analysis over `hilbertEncode`. [#64662](https://github.com/ClickHouse/ClickHouse/pull/64662) ([Artem Mustafin](https://github.com/Artemmm91)).
* Added SQL functions `parseReadableSize` (along with `OrNull` and `OrZero` variants). [#64742](https://github.com/ClickHouse/ClickHouse/pull/64742) ([Francisco J. Jurado Moreno](https://github.com/Beetelbrox)).
* Add server settings `max_table_num_to_throw` and `max_database_num_to_throw` to limit the number of databases or tables on `CREATE` queries. [#64781](https://github.com/ClickHouse/ClickHouse/pull/64781) ([Xu Jia](https://github.com/XuJia0210)).
* Add _time virtual column to file alike storages (s3/file/hdfs/url/azureBlobStorage). [#64947](https://github.com/ClickHouse/ClickHouse/pull/64947) ([Ilya Golshtein](https://github.com/ilejn)).
* Introduced new functions `base64UrlEncode`, `base64UrlDecode` and `tryBase64UrlDecode`. [#64991](https://github.com/ClickHouse/ClickHouse/pull/64991) ([Mikhail Gorshkov](https://github.com/mgorshkov)).
* Introduced new functions `base64URLEncode`, `base64URLDecode` and `tryBase64URLDecode`. [#64991](https://github.com/ClickHouse/ClickHouse/pull/64991) ([Mikhail Gorshkov](https://github.com/mgorshkov)).
* Add new function `editDistanceUTF8`, which calculates the [edit distance](https://en.wikipedia.org/wiki/Edit_distance) between two UTF8 strings. [#65269](https://github.com/ClickHouse/ClickHouse/pull/65269) ([LiuNeng](https://github.com/liuneng1994)).
#### Performance Improvement
* Add a native parquet reader, which can read parquet binary to ClickHouse Columns directly. It's controlled by the setting `input_format_parquet_use_native_reader` (disabled by default). [#60361](https://github.com/ClickHouse/ClickHouse/pull/60361) ([ZhiHong Zhang](https://github.com/copperybean)).
* Reduce the number of virtual function calls in ColumnNullable::size(). [#60556](https://github.com/ClickHouse/ClickHouse/pull/60556) ([HappenLee](https://github.com/HappenLee)).
* Reduce the number of virtual function calls in ColumnNullable::size. [#60556](https://github.com/ClickHouse/ClickHouse/pull/60556) ([HappenLee](https://github.com/HappenLee)).
* Speedup `splitByRegexp` when the regular expression argument is a single-character. [#62696](https://github.com/ClickHouse/ClickHouse/pull/62696) ([Robert Schulze](https://github.com/rschu1ze)).
* Speed up FixedHashTable by keeping track of the min and max keys used. This allows to reduce the number of cells that need to be verified. [#62746](https://github.com/ClickHouse/ClickHouse/pull/62746) ([Jiebin Sun](https://github.com/jiebinn)).
* Optimize the resolution of in(LowCardinality, ConstantSet). [#64060](https://github.com/ClickHouse/ClickHouse/pull/64060) ([Zhiguo Zhou](https://github.com/ZhiguoZh)).
@ -51,7 +51,7 @@
* Improve function least/greatest for nullable numberic type arguments. [#64668](https://github.com/ClickHouse/ClickHouse/pull/64668) ([KevinyhZou](https://github.com/KevinyhZou)).
* Allow merging two consequent `FilterSteps` of a query plan. This improves filter-push-down optimization if the filter condition can be pushed down from the parent step. [#64760](https://github.com/ClickHouse/ClickHouse/pull/64760) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Remove bad optimization in vertical final implementation and re-enable vertical final algorithm by default. [#64783](https://github.com/ClickHouse/ClickHouse/pull/64783) ([Duc Canh Le](https://github.com/canhld94)).
* Remove ALIAS nodes from the filter expression. This slightly improves performance for queries with `PREWHERE` (with new analyzer). [#64793](https://github.com/ClickHouse/ClickHouse/pull/64793) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Remove ALIAS nodes from the filter expression. This slightly improves performance for queries with `PREWHERE` (with the new analyzer). [#64793](https://github.com/ClickHouse/ClickHouse/pull/64793) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Fix performance regression in cross join introduced in [#60459](https://github.com/ClickHouse/ClickHouse/issues/60459) (24.5). [#65243](https://github.com/ClickHouse/ClickHouse/pull/65243) ([Nikita Taranov](https://github.com/nickitat)).
#### Improvement
@ -63,7 +63,7 @@
* Reduce the memory usage when using Azure object storage by using fixed memory allocation, avoiding the allocation of an extra buffer. [#63160](https://github.com/ClickHouse/ClickHouse/pull/63160) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)).
* Several minor corner case fixes to proxy support & tunneling. [#63427](https://github.com/ClickHouse/ClickHouse/pull/63427) ([Arthur Passos](https://github.com/arthurpassos)).
* Add `http_response_headers` setting to support custom response headers in custom HTTP handlers. [#63562](https://github.com/ClickHouse/ClickHouse/pull/63562) ([Grigorii](https://github.com/GSokol)).
* Improve io_uring resubmits visibility. Rename profile event `IOUringSQEsResubmits` -> `IOUringSQEsResubmitsAsync` and add a new one `IOUringSQEsResubmitsSync`. [#63699](https://github.com/ClickHouse/ClickHouse/pull/63699) ([Tomer Shafir](https://github.com/tomershafir)).
* Improve io_uring resubmit visibility. Rename profile event `IOUringSQEsResubmits` -> `IOUringSQEsResubmitsAsync` and add a new one `IOUringSQEsResubmitsSync`. [#63699](https://github.com/ClickHouse/ClickHouse/pull/63699) ([Tomer Shafir](https://github.com/tomershafir)).
* Introduce assertions to verify all functions are called with columns of the right size. [#63723](https://github.com/ClickHouse/ClickHouse/pull/63723) ([Raúl Marín](https://github.com/Algunenano)).
* `SHOW CREATE TABLE` executed on top of system tables will now show the super handy comment unique for each table which will explain why this table is needed. [#63788](https://github.com/ClickHouse/ClickHouse/pull/63788) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
* Added setting `metadata_storage_type` to keep free space on metadata storage disk. [#64128](https://github.com/ClickHouse/ClickHouse/pull/64128) ([MikhailBurdukov](https://github.com/MikhailBurdukov)).

View File

@ -6,6 +6,9 @@ namespace
{
std::string getFQDNOrHostNameImpl()
{
#if defined(OS_DARWIN)
return Poco::Net::DNS::hostName();
#else
try
{
return Poco::Net::DNS::thisHost().name();
@ -14,6 +17,7 @@ namespace
{
return Poco::Net::DNS::hostName();
}
#endif
}
}

View File

@ -34,7 +34,7 @@ if (OS_LINUX)
# avoid spurious latencies and additional work associated with
# MADV_DONTNEED. See
# https://github.com/ClickHouse/ClickHouse/issues/11121 for motivation.
set (JEMALLOC_CONFIG_MALLOC_CONF "percpu_arena:percpu,oversize_threshold:0,muzzy_decay_ms:0,dirty_decay_ms:5000")
set (JEMALLOC_CONFIG_MALLOC_CONF "percpu_arena:percpu,oversize_threshold:0,muzzy_decay_ms:0,dirty_decay_ms:5000,prof:true,prof_active:false,background_thread:true")
else()
set (JEMALLOC_CONFIG_MALLOC_CONF "oversize_threshold:0,muzzy_decay_ms:0,dirty_decay_ms:5000")
endif()

View File

@ -28,7 +28,7 @@ run, for example, the test `01428_hash_set_nan_key`, change to the repository
folder and run the following command:
```
PATH=$PATH:<path to clickhouse-client> tests/clickhouse-test 01428_hash_set_nan_key
PATH=<path to clickhouse-client>:$PATH tests/clickhouse-test 01428_hash_set_nan_key
```
Test results (`stderr` and `stdout`) are written to files `01428_hash_set_nan_key.[stderr|stdout]` which

View File

@ -28,6 +28,8 @@ CREATE TABLE s3_queue_engine_table (name String, value UInt32)
[s3queue_cleanup_interval_max_ms = 30000,]
```
Starting with `24.7` settings without `s3queue_` prefix are also supported.
**Engine parameters**
- `path` — Bucket url with path to file. Supports following wildcards in readonly mode: `*`, `**`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc'`, `'def'` — strings. For more information see [below](#wildcards-in-path).

View File

@ -2169,6 +2169,7 @@ To exchange data with Hadoop, you can use [HDFS table engine](/docs/en/engines/t
- [output_format_parquet_compression_method](/docs/en/operations/settings/settings-formats.md/#output_format_parquet_compression_method) - compression method used in output Parquet format. Default value - `lz4`.
- [input_format_parquet_max_block_size](/docs/en/operations/settings/settings-formats.md/#input_format_parquet_max_block_size) - Max block row size for parquet reader. Default value - `65409`.
- [input_format_parquet_prefer_block_bytes](/docs/en/operations/settings/settings-formats.md/#input_format_parquet_prefer_block_bytes) - Average block bytes output by parquet reader. Default value - `16744704`.
- [output_format_parquet_write_page_index](/docs/en/operations/settings/settings-formats.md/#input_format_parquet_max_block_size) - Add a possibility to write page index into parquet files. Need to disable `output_format_parquet_use_custom_encoder` at present. Default value - `true`.
## ParquetMetadata {data-format-parquet-metadata}

View File

@ -5,6 +5,10 @@ sidebar_label: "Named collections"
title: "Named collections"
---
import CloudNotSupportedBadge from '@theme/badges/CloudNotSupportedBadge';
<CloudNotSupportedBadge />
Named collections provide a way to store collections of key-value pairs to be
used to configure integrations with external sources. You can use named collections with
dictionaries, tables, table functions, and object storage.

View File

@ -498,6 +498,8 @@ Default: 0.9
Interval in seconds during which the server's maximum allowed memory consumption is adjusted by the corresponding threshold in cgroups. (see
settings `cgroup_memory_watcher_hard_limit_ratio` and `cgroup_memory_watcher_soft_limit_ratio`).
To disable the cgroup observer, set this value to `0`.
Type: UInt64
Default: 15
@ -1463,6 +1465,9 @@ Keys:
- `size` Size of the file. Applies to `log` and `errorlog`. Once the file reaches `size`, ClickHouse archives and renames it, and creates a new log file in its place.
- `count` The number of archived log files that ClickHouse stores.
- `console` Send `log` and `errorlog` to the console instead of file. To enable, set to `1` or `true`.
- `console_log_level` Logging level for console. Default to `level`.
- `use_syslog` - Log to syslog as well.
- `syslog_level` - Logging level for logging to syslog.
- `stream_compress` Compress `log` and `errorlog` with `lz4` stream compression. To enable, set to `1` or `true`.
- `formatting` Specify log format to be printed in console log (currently only `json` supported).

View File

@ -1428,6 +1428,13 @@ Average block bytes output by parquet reader. Lowering the configuration in the
Default value: `65409 * 256 = 16744704`
### output_format_parquet_write_page_index {#input_format_parquet_max_block_size}
Could add page index into parquet files. To enable this, need set `output_format_parquet_use_custom_encoder`=`false` and
`output_format_parquet_write_page_index`=`true`.
Enable by default.
## Hive format settings {#hive-format-settings}
### input_format_hive_text_fields_delimiter {#input_format_hive_text_fields_delimiter}

View File

@ -236,10 +236,10 @@ Read 2 rows, 32.00 B in 0.000 sec., 5182 rows/sec., 80.97 KiB/sec.
Previous example is the same as:
``` bash
$ echo -e "1,2\n3,4" | clickhouse-local --query "
$ echo -e "1,2\n3,4" | clickhouse-local -n --query "
CREATE TABLE table (a Int64, b Int64) ENGINE = File(CSV, stdin);
SELECT a, b FROM table;
DROP TABLE table"
DROP TABLE table;"
Read 2 rows, 32.00 B in 0.000 sec., 4987 rows/sec., 77.93 KiB/sec.
1 2
3 4

View File

@ -1168,14 +1168,14 @@ Result:
└────────────────────────────┘
```
## base64UrlEncode
## base64URLEncode
Encodes an URL (String or FixedString) as base64 with URL-specific modifications, according to [RFC 4648](https://datatracker.ietf.org/doc/html/rfc4648#section-5).
**Syntax**
```sql
base64UrlEncode(url)
base64URLEncode(url)
```
**Arguments**
@ -1189,13 +1189,13 @@ base64UrlEncode(url)
**Example**
``` sql
SELECT base64UrlEncode('https://clickhouse.com');
SELECT base64URLEncode('https://clickhouse.com');
```
Result:
```result
┌─base64UrlEncode('https://clickhouse.com')─┐
┌─base64URLEncode('https://clickhouse.com')─┐
│ aHR0cDovL2NsaWNraG91c2UuY29t │
└───────────────────────────────────────────┘
```
@ -1234,19 +1234,19 @@ Result:
└──────────────────────────────────┘
```
## base64UrlDecode
## base64URLDecode
Accepts a base64-encoded URL and decodes it from base64 with URL-specific modifications, according to [RFC 4648](https://datatracker.ietf.org/doc/html/rfc4648#section-5). Throws an exception in case of an error.
**Syntax**
```sql
base64UrlDecode(encodedUrl)
base64URLDecode(encodedUrl)
```
**Arguments**
- `encodedUrl` — [String](../data-types/string.md) column or constant. If the string is not a valid Base64-encoded value with URL-specific modifications, an exception is thrown.
- `encodedURL` — [String](../data-types/string.md) column or constant. If the string is not a valid Base64-encoded value with URL-specific modifications, an exception is thrown.
**Returned value**
@ -1255,13 +1255,13 @@ base64UrlDecode(encodedUrl)
**Example**
``` sql
SELECT base64UrlDecode('aHR0cDovL2NsaWNraG91c2UuY29t');
SELECT base64URLDecode('aHR0cDovL2NsaWNraG91c2UuY29t');
```
Result:
```result
┌─base64UrlDecode('aHR0cDovL2NsaWNraG91c2UuY29t')─┐
┌─base64URLDecode('aHR0cDovL2NsaWNraG91c2UuY29t')─┐
│ https://clickhouse.com │
└─────────────────────────────────────────────────┘
```
@ -1298,19 +1298,19 @@ SELECT tryBase64Decode('RW5jb2RlZA==') as res, tryBase64Decode('invalid') as res
└────────────┴─────────────┘
```
## tryBase64UrlDecode
## tryBase64URLDecode
Like `base64UrlDecode` but returns an empty string in case of error.
Like `base64URLDecode` but returns an empty string in case of error.
**Syntax**
```sql
tryBase64UrlDecode(encodedUrl)
tryBase64URLDecode(encodedUrl)
```
**Parameters**
- `encodedUrl`: [String](../data-types/string.md) column or constant. If the string is not a valid Base64-encoded value with URL-specific modifications, returns an empty string.
- `encodedURL`: [String](../data-types/string.md) column or constant. If the string is not a valid Base64-encoded value with URL-specific modifications, returns an empty string.
**Returned value**
@ -1321,7 +1321,7 @@ tryBase64UrlDecode(encodedUrl)
Query:
```sql
SELECT tryBase64UrlDecode('aHR0cDovL2NsaWNraG91c2UuY29t') as res, tryBase64Decode('aHR0cHM6Ly9jbGlja') as res_invalid;
SELECT tryBase64URLDecode('aHR0cDovL2NsaWNraG91c2UuY29t') as res, tryBase64Decode('aHR0cHM6Ly9jbGlja') as res_invalid;
```
```response

View File

@ -3,6 +3,10 @@ slug: /en/sql-reference/statements/alter/named-collection
sidebar_label: NAMED COLLECTION
---
import CloudNotSupportedBadge from '@theme/badges/CloudNotSupportedBadge';
<CloudNotSupportedBadge />
# ALTER NAMED COLLECTION
This query intends to modify already existing named collections.

View File

@ -134,8 +134,8 @@ PRIMARY KEY (event_type, ts)
ORDER BY (event_type, ts, browser)
SETTINGS index_granularity = 8192
-- !!! The columns' definition is unchanged but it does not matter, we are not quering
-- MATERIALIZED VIEW, we are quering TO (storage) table.
-- !!! The columns' definition is unchanged but it does not matter, we are not querying
-- MATERIALIZED VIEW, we are querying TO (storage) table.
-- SELECT section is updated.
SHOW CREATE TABLE mv FORMAT TSVRaw;

View File

@ -3,6 +3,10 @@ slug: /en/sql-reference/statements/create/named-collection
sidebar_label: NAMED COLLECTION
---
import CloudNotSupportedBadge from '@theme/badges/CloudNotSupportedBadge';
<CloudNotSupportedBadge />
# CREATE NAMED COLLECTION
Creates a new named collection.

View File

@ -538,7 +538,7 @@ SELECT base58Decode('3dc8KtHrwM');
Синоним: `TO_BASE64`.
## base64UrlEncode(s)
## base64URLEncode(s)
Производит кодирование URL (String или FixedString) в base64-представление в соответствии с [RFC 4648](https://tools.ietf.org/html/rfc4648).
@ -548,7 +548,7 @@ SELECT base58Decode('3dc8KtHrwM');
Синоним: `FROM_BASE64`.
## base64UrlDecode(s)
## base64URLDecode(s)
Декодирует base64-представление URL в исходную строку в соответствии с [RFC 4648](https://tools.ietf.org/html/rfc4648). При невозможности декодирования выбрасывает исключение
@ -556,9 +556,9 @@ SELECT base58Decode('3dc8KtHrwM');
Функционал аналогичен base64Decode, но при невозможности декодирования возвращает пустую строку.
## tryBase64UrlDecode(s)
## tryBase64URLDecode(s)
Функционал аналогичен base64UrlDecode, но при невозможности декодирования возвращает пустую строку.
Функционал аналогичен base64URLDecode, но при невозможности декодирования возвращает пустую строку.
## endsWith(s, suffix) {#endswith}

View File

@ -248,6 +248,10 @@ std::vector<String> Client::loadWarningMessages()
}
}
Poco::Util::LayeredConfiguration & Client::getClientConfiguration()
{
return config();
}
void Client::initialize(Poco::Util::Application & self)
{
@ -697,9 +701,7 @@ bool Client::processWithFuzzing(const String & full_query)
const char * begin = full_query.data();
orig_ast = parseQuery(begin, begin + full_query.size(),
global_context->getSettingsRef(),
/*allow_multi_statements=*/ true,
/*is_interactive=*/ is_interactive,
/*ignore_error=*/ ignore_error);
/*allow_multi_statements=*/ true);
}
catch (const Exception & e)
{

View File

@ -16,6 +16,9 @@ public:
int main(const std::vector<String> & /*args*/) override;
protected:
Poco::Util::LayeredConfiguration & getClientConfiguration() override;
bool processWithFuzzing(const String & full_query) override;
std::optional<bool> processFuzzingStep(const String & query_to_execute, const ASTPtr & parsed_query);

View File

@ -11,7 +11,6 @@ set (CLICKHOUSE_LIBRARY_BRIDGE_SOURCES
LibraryBridgeHandlers.cpp
SharedLibrary.cpp
library-bridge.cpp
createFunctionBaseCast.cpp
)
clickhouse_add_executable(clickhouse-library-bridge ${CLICKHOUSE_LIBRARY_BRIDGE_SOURCES})
@ -20,6 +19,7 @@ target_link_libraries(clickhouse-library-bridge PRIVATE
daemon
dbms
bridge
clickhouse_functions
)
set_target_properties(clickhouse-library-bridge PROPERTIES RUNTIME_OUTPUT_DIRECTORY ..)

View File

@ -82,6 +82,11 @@ void applySettingsOverridesForLocal(ContextMutablePtr context)
context->setSettings(settings);
}
Poco::Util::LayeredConfiguration & LocalServer::getClientConfiguration()
{
return config();
}
void LocalServer::processError(const String &) const
{
if (ignore_error)
@ -117,19 +122,19 @@ void LocalServer::initialize(Poco::Util::Application & self)
Poco::Util::Application::initialize(self);
/// Load config files if exists
if (config().has("config-file") || fs::exists("config.xml"))
if (getClientConfiguration().has("config-file") || fs::exists("config.xml"))
{
const auto config_path = config().getString("config-file", "config.xml");
const auto config_path = getClientConfiguration().getString("config-file", "config.xml");
ConfigProcessor config_processor(config_path, false, true);
ConfigProcessor::setConfigPath(fs::path(config_path).parent_path());
auto loaded_config = config_processor.loadConfig();
config().add(loaded_config.configuration.duplicate(), PRIO_DEFAULT, false);
getClientConfiguration().add(loaded_config.configuration.duplicate(), PRIO_DEFAULT, false);
}
GlobalThreadPool::initialize(
config().getUInt("max_thread_pool_size", 10000),
config().getUInt("max_thread_pool_free_size", 1000),
config().getUInt("thread_pool_queue_size", 10000)
getClientConfiguration().getUInt("max_thread_pool_size", 10000),
getClientConfiguration().getUInt("max_thread_pool_free_size", 1000),
getClientConfiguration().getUInt("thread_pool_queue_size", 10000)
);
#if USE_AZURE_BLOB_STORAGE
@ -141,18 +146,18 @@ void LocalServer::initialize(Poco::Util::Application & self)
#endif
getIOThreadPool().initialize(
config().getUInt("max_io_thread_pool_size", 100),
config().getUInt("max_io_thread_pool_free_size", 0),
config().getUInt("io_thread_pool_queue_size", 10000));
getClientConfiguration().getUInt("max_io_thread_pool_size", 100),
getClientConfiguration().getUInt("max_io_thread_pool_free_size", 0),
getClientConfiguration().getUInt("io_thread_pool_queue_size", 10000));
const size_t active_parts_loading_threads = config().getUInt("max_active_parts_loading_thread_pool_size", 64);
const size_t active_parts_loading_threads = getClientConfiguration().getUInt("max_active_parts_loading_thread_pool_size", 64);
getActivePartsLoadingThreadPool().initialize(
active_parts_loading_threads,
0, // We don't need any threads one all the parts will be loaded
active_parts_loading_threads);
const size_t outdated_parts_loading_threads = config().getUInt("max_outdated_parts_loading_thread_pool_size", 32);
const size_t outdated_parts_loading_threads = getClientConfiguration().getUInt("max_outdated_parts_loading_thread_pool_size", 32);
getOutdatedPartsLoadingThreadPool().initialize(
outdated_parts_loading_threads,
0, // We don't need any threads one all the parts will be loaded
@ -160,7 +165,7 @@ void LocalServer::initialize(Poco::Util::Application & self)
getOutdatedPartsLoadingThreadPool().setMaxTurboThreads(active_parts_loading_threads);
const size_t unexpected_parts_loading_threads = config().getUInt("max_unexpected_parts_loading_thread_pool_size", 32);
const size_t unexpected_parts_loading_threads = getClientConfiguration().getUInt("max_unexpected_parts_loading_thread_pool_size", 32);
getUnexpectedPartsLoadingThreadPool().initialize(
unexpected_parts_loading_threads,
0, // We don't need any threads one all the parts will be loaded
@ -168,7 +173,7 @@ void LocalServer::initialize(Poco::Util::Application & self)
getUnexpectedPartsLoadingThreadPool().setMaxTurboThreads(active_parts_loading_threads);
const size_t cleanup_threads = config().getUInt("max_parts_cleaning_thread_pool_size", 128);
const size_t cleanup_threads = getClientConfiguration().getUInt("max_parts_cleaning_thread_pool_size", 128);
getPartsCleaningThreadPool().initialize(
cleanup_threads,
0, // We don't need any threads one all the parts will be deleted
@ -201,10 +206,10 @@ void LocalServer::tryInitPath()
{
std::string path;
if (config().has("path"))
if (getClientConfiguration().has("path"))
{
// User-supplied path.
path = config().getString("path");
path = getClientConfiguration().getString("path");
Poco::trimInPlace(path);
if (path.empty())
@ -263,13 +268,13 @@ void LocalServer::tryInitPath()
global_context->setUserFilesPath(""); /// user's files are everywhere
std::string user_scripts_path = config().getString("user_scripts_path", fs::path(path) / "user_scripts/");
std::string user_scripts_path = getClientConfiguration().getString("user_scripts_path", fs::path(path) / "user_scripts/");
global_context->setUserScriptsPath(user_scripts_path);
/// top_level_domains_lists
const std::string & top_level_domains_path = config().getString("top_level_domains_path", fs::path(path) / "top_level_domains/");
const std::string & top_level_domains_path = getClientConfiguration().getString("top_level_domains_path", fs::path(path) / "top_level_domains/");
if (!top_level_domains_path.empty())
TLDListsHolder::getInstance().parseConfig(fs::path(top_level_domains_path) / "", config());
TLDListsHolder::getInstance().parseConfig(fs::path(top_level_domains_path) / "", getClientConfiguration());
}
@ -311,14 +316,14 @@ void LocalServer::cleanup()
std::string LocalServer::getInitialCreateTableQuery()
{
if (!config().has("table-structure") && !config().has("table-file") && !config().has("table-data-format") && (!isRegularFile(STDIN_FILENO) || queries.empty()))
if (!getClientConfiguration().has("table-structure") && !getClientConfiguration().has("table-file") && !getClientConfiguration().has("table-data-format") && (!isRegularFile(STDIN_FILENO) || queries.empty()))
return {};
auto table_name = backQuoteIfNeed(config().getString("table-name", "table"));
auto table_structure = config().getString("table-structure", "auto");
auto table_name = backQuoteIfNeed(getClientConfiguration().getString("table-name", "table"));
auto table_structure = getClientConfiguration().getString("table-structure", "auto");
String table_file;
if (!config().has("table-file") || config().getString("table-file") == "-")
if (!getClientConfiguration().has("table-file") || getClientConfiguration().getString("table-file") == "-")
{
/// Use Unix tools stdin naming convention
table_file = "stdin";
@ -326,7 +331,7 @@ std::string LocalServer::getInitialCreateTableQuery()
else
{
/// Use regular file
auto file_name = config().getString("table-file");
auto file_name = getClientConfiguration().getString("table-file");
table_file = quoteString(file_name);
}
@ -374,18 +379,18 @@ void LocalServer::setupUsers()
ConfigurationPtr users_config;
auto & access_control = global_context->getAccessControl();
access_control.setNoPasswordAllowed(config().getBool("allow_no_password", true));
access_control.setPlaintextPasswordAllowed(config().getBool("allow_plaintext_password", true));
if (config().has("config-file") || fs::exists("config.xml"))
access_control.setNoPasswordAllowed(getClientConfiguration().getBool("allow_no_password", true));
access_control.setPlaintextPasswordAllowed(getClientConfiguration().getBool("allow_plaintext_password", true));
if (getClientConfiguration().has("config-file") || fs::exists("config.xml"))
{
String config_path = config().getString("config-file", "");
bool has_user_directories = config().has("user_directories");
String config_path = getClientConfiguration().getString("config-file", "");
bool has_user_directories = getClientConfiguration().has("user_directories");
const auto config_dir = fs::path{config_path}.remove_filename().string();
String users_config_path = config().getString("users_config", "");
String users_config_path = getClientConfiguration().getString("users_config", "");
if (users_config_path.empty() && has_user_directories)
{
users_config_path = config().getString("user_directories.users_xml.path");
users_config_path = getClientConfiguration().getString("user_directories.users_xml.path");
if (fs::path(users_config_path).is_relative() && fs::exists(fs::path(config_dir) / users_config_path))
users_config_path = fs::path(config_dir) / users_config_path;
}
@ -409,10 +414,10 @@ void LocalServer::setupUsers()
void LocalServer::connect()
{
connection_parameters = ConnectionParameters(config(), "localhost");
connection_parameters = ConnectionParameters(getClientConfiguration(), "localhost");
ReadBuffer * in;
auto table_file = config().getString("table-file", "-");
auto table_file = getClientConfiguration().getString("table-file", "-");
if (table_file == "-" || table_file == "stdin")
{
in = &std_in;
@ -433,7 +438,7 @@ try
UseSSL use_ssl;
thread_status.emplace();
StackTrace::setShowAddresses(config().getBool("show_addresses_in_stack_traces", true));
StackTrace::setShowAddresses(getClientConfiguration().getBool("show_addresses_in_stack_traces", true));
setupSignalHandler();
@ -448,7 +453,7 @@ try
if (rlim.rlim_cur < rlim.rlim_max)
{
rlim.rlim_cur = config().getUInt("max_open_files", static_cast<unsigned>(rlim.rlim_max));
rlim.rlim_cur = getClientConfiguration().getUInt("max_open_files", static_cast<unsigned>(rlim.rlim_max));
int rc = setrlimit(RLIMIT_NOFILE, &rlim);
if (rc != 0)
std::cerr << fmt::format("Cannot set max number of file descriptors to {}. Try to specify max_open_files according to your system limits. error: {}", rlim.rlim_cur, errnoToString()) << '\n';
@ -456,8 +461,8 @@ try
}
is_interactive = stdin_is_a_tty
&& (config().hasOption("interactive")
|| (queries.empty() && !config().has("table-structure") && queries_files.empty() && !config().has("table-file")));
&& (getClientConfiguration().hasOption("interactive")
|| (queries.empty() && !getClientConfiguration().has("table-structure") && queries_files.empty() && !getClientConfiguration().has("table-file")));
if (!is_interactive)
{
@ -481,7 +486,7 @@ try
SCOPE_EXIT({ cleanup(); });
initTTYBuffer(toProgressOption(config().getString("progress", "default")));
initTTYBuffer(toProgressOption(getClientConfiguration().getString("progress", "default")));
ASTAlterCommand::setFormatAlterCommandsWithParentheses(true);
applyCmdSettings(global_context);
@ -489,7 +494,7 @@ try
/// try to load user defined executable functions, throw on error and die
try
{
global_context->loadOrReloadUserDefinedExecutableFunctions(config());
global_context->loadOrReloadUserDefinedExecutableFunctions(getClientConfiguration());
}
catch (...)
{
@ -530,7 +535,7 @@ try
}
catch (const DB::Exception & e)
{
bool need_print_stack_trace = config().getBool("stacktrace", false);
bool need_print_stack_trace = getClientConfiguration().getBool("stacktrace", false);
std::cerr << getExceptionMessage(e, need_print_stack_trace, true) << std::endl;
return e.code() ? e.code() : -1;
}
@ -542,42 +547,42 @@ catch (...)
void LocalServer::updateLoggerLevel(const String & logs_level)
{
config().setString("logger.level", logs_level);
updateLevels(config(), logger());
getClientConfiguration().setString("logger.level", logs_level);
updateLevels(getClientConfiguration(), logger());
}
void LocalServer::processConfig()
{
if (!queries.empty() && config().has("queries-file"))
if (!queries.empty() && getClientConfiguration().has("queries-file"))
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Options '--query' and '--queries-file' cannot be specified at the same time");
if (config().has("multiquery"))
if (getClientConfiguration().has("multiquery"))
is_multiquery = true;
pager = config().getString("pager", "");
pager = getClientConfiguration().getString("pager", "");
delayed_interactive = config().has("interactive") && (!queries.empty() || config().has("queries-file"));
delayed_interactive = getClientConfiguration().has("interactive") && (!queries.empty() || getClientConfiguration().has("queries-file"));
if (!is_interactive || delayed_interactive)
{
echo_queries = config().hasOption("echo") || config().hasOption("verbose");
ignore_error = config().getBool("ignore-error", false);
echo_queries = getClientConfiguration().hasOption("echo") || getClientConfiguration().hasOption("verbose");
ignore_error = getClientConfiguration().getBool("ignore-error", false);
}
print_stack_trace = config().getBool("stacktrace", false);
print_stack_trace = getClientConfiguration().getBool("stacktrace", false);
const std::string clickhouse_dialect{"clickhouse"};
load_suggestions = (is_interactive || delayed_interactive) && !config().getBool("disable_suggestion", false)
&& config().getString("dialect", clickhouse_dialect) == clickhouse_dialect;
wait_for_suggestions_to_load = config().getBool("wait_for_suggestions_to_load", false);
load_suggestions = (is_interactive || delayed_interactive) && !getClientConfiguration().getBool("disable_suggestion", false)
&& getClientConfiguration().getString("dialect", clickhouse_dialect) == clickhouse_dialect;
wait_for_suggestions_to_load = getClientConfiguration().getBool("wait_for_suggestions_to_load", false);
auto logging = (config().has("logger.console")
|| config().has("logger.level")
|| config().has("log-level")
|| config().has("send_logs_level")
|| config().has("logger.log"));
auto logging = (getClientConfiguration().has("logger.console")
|| getClientConfiguration().has("logger.level")
|| getClientConfiguration().has("log-level")
|| getClientConfiguration().has("send_logs_level")
|| getClientConfiguration().has("logger.log"));
auto level = config().getString("log-level", "trace");
auto level = getClientConfiguration().getString("log-level", "trace");
if (config().has("server_logs_file"))
if (getClientConfiguration().has("server_logs_file"))
{
auto poco_logs_level = Poco::Logger::parseLevel(level);
Poco::Logger::root().setLevel(poco_logs_level);
@ -587,10 +592,10 @@ void LocalServer::processConfig()
}
else
{
config().setString("logger", "logger");
getClientConfiguration().setString("logger", "logger");
auto log_level_default = logging ? level : "fatal";
config().setString("logger.level", config().getString("log-level", config().getString("send_logs_level", log_level_default)));
buildLoggers(config(), logger(), "clickhouse-local");
getClientConfiguration().setString("logger.level", getClientConfiguration().getString("log-level", getClientConfiguration().getString("send_logs_level", log_level_default)));
buildLoggers(getClientConfiguration(), logger(), "clickhouse-local");
}
shared_context = Context::createShared();
@ -604,13 +609,13 @@ void LocalServer::processConfig()
LoggerRawPtr log = &logger();
/// Maybe useless
if (config().has("macros"))
global_context->setMacros(std::make_unique<Macros>(config(), "macros", log));
if (getClientConfiguration().has("macros"))
global_context->setMacros(std::make_unique<Macros>(getClientConfiguration(), "macros", log));
setDefaultFormatsAndCompressionFromConfiguration();
/// Sets external authenticators config (LDAP, Kerberos).
global_context->setExternalAuthenticatorsConfig(config());
global_context->setExternalAuthenticatorsConfig(getClientConfiguration());
setupUsers();
@ -619,12 +624,12 @@ void LocalServer::processConfig()
global_context->getProcessList().setMaxSize(0);
const size_t physical_server_memory = getMemoryAmount();
const double cache_size_to_ram_max_ratio = config().getDouble("cache_size_to_ram_max_ratio", 0.5);
const double cache_size_to_ram_max_ratio = getClientConfiguration().getDouble("cache_size_to_ram_max_ratio", 0.5);
const size_t max_cache_size = static_cast<size_t>(physical_server_memory * cache_size_to_ram_max_ratio);
String uncompressed_cache_policy = config().getString("uncompressed_cache_policy", DEFAULT_UNCOMPRESSED_CACHE_POLICY);
size_t uncompressed_cache_size = config().getUInt64("uncompressed_cache_size", DEFAULT_UNCOMPRESSED_CACHE_MAX_SIZE);
double uncompressed_cache_size_ratio = config().getDouble("uncompressed_cache_size_ratio", DEFAULT_UNCOMPRESSED_CACHE_SIZE_RATIO);
String uncompressed_cache_policy = getClientConfiguration().getString("uncompressed_cache_policy", DEFAULT_UNCOMPRESSED_CACHE_POLICY);
size_t uncompressed_cache_size = getClientConfiguration().getUInt64("uncompressed_cache_size", DEFAULT_UNCOMPRESSED_CACHE_MAX_SIZE);
double uncompressed_cache_size_ratio = getClientConfiguration().getDouble("uncompressed_cache_size_ratio", DEFAULT_UNCOMPRESSED_CACHE_SIZE_RATIO);
if (uncompressed_cache_size > max_cache_size)
{
uncompressed_cache_size = max_cache_size;
@ -632,9 +637,9 @@ void LocalServer::processConfig()
}
global_context->setUncompressedCache(uncompressed_cache_policy, uncompressed_cache_size, uncompressed_cache_size_ratio);
String mark_cache_policy = config().getString("mark_cache_policy", DEFAULT_MARK_CACHE_POLICY);
size_t mark_cache_size = config().getUInt64("mark_cache_size", DEFAULT_MARK_CACHE_MAX_SIZE);
double mark_cache_size_ratio = config().getDouble("mark_cache_size_ratio", DEFAULT_MARK_CACHE_SIZE_RATIO);
String mark_cache_policy = getClientConfiguration().getString("mark_cache_policy", DEFAULT_MARK_CACHE_POLICY);
size_t mark_cache_size = getClientConfiguration().getUInt64("mark_cache_size", DEFAULT_MARK_CACHE_MAX_SIZE);
double mark_cache_size_ratio = getClientConfiguration().getDouble("mark_cache_size_ratio", DEFAULT_MARK_CACHE_SIZE_RATIO);
if (!mark_cache_size)
LOG_ERROR(log, "Too low mark cache size will lead to severe performance degradation.");
if (mark_cache_size > max_cache_size)
@ -644,9 +649,9 @@ void LocalServer::processConfig()
}
global_context->setMarkCache(mark_cache_policy, mark_cache_size, mark_cache_size_ratio);
String index_uncompressed_cache_policy = config().getString("index_uncompressed_cache_policy", DEFAULT_INDEX_UNCOMPRESSED_CACHE_POLICY);
size_t index_uncompressed_cache_size = config().getUInt64("index_uncompressed_cache_size", DEFAULT_INDEX_UNCOMPRESSED_CACHE_MAX_SIZE);
double index_uncompressed_cache_size_ratio = config().getDouble("index_uncompressed_cache_size_ratio", DEFAULT_INDEX_UNCOMPRESSED_CACHE_SIZE_RATIO);
String index_uncompressed_cache_policy = getClientConfiguration().getString("index_uncompressed_cache_policy", DEFAULT_INDEX_UNCOMPRESSED_CACHE_POLICY);
size_t index_uncompressed_cache_size = getClientConfiguration().getUInt64("index_uncompressed_cache_size", DEFAULT_INDEX_UNCOMPRESSED_CACHE_MAX_SIZE);
double index_uncompressed_cache_size_ratio = getClientConfiguration().getDouble("index_uncompressed_cache_size_ratio", DEFAULT_INDEX_UNCOMPRESSED_CACHE_SIZE_RATIO);
if (index_uncompressed_cache_size > max_cache_size)
{
index_uncompressed_cache_size = max_cache_size;
@ -654,9 +659,9 @@ void LocalServer::processConfig()
}
global_context->setIndexUncompressedCache(index_uncompressed_cache_policy, index_uncompressed_cache_size, index_uncompressed_cache_size_ratio);
String index_mark_cache_policy = config().getString("index_mark_cache_policy", DEFAULT_INDEX_MARK_CACHE_POLICY);
size_t index_mark_cache_size = config().getUInt64("index_mark_cache_size", DEFAULT_INDEX_MARK_CACHE_MAX_SIZE);
double index_mark_cache_size_ratio = config().getDouble("index_mark_cache_size_ratio", DEFAULT_INDEX_MARK_CACHE_SIZE_RATIO);
String index_mark_cache_policy = getClientConfiguration().getString("index_mark_cache_policy", DEFAULT_INDEX_MARK_CACHE_POLICY);
size_t index_mark_cache_size = getClientConfiguration().getUInt64("index_mark_cache_size", DEFAULT_INDEX_MARK_CACHE_MAX_SIZE);
double index_mark_cache_size_ratio = getClientConfiguration().getDouble("index_mark_cache_size_ratio", DEFAULT_INDEX_MARK_CACHE_SIZE_RATIO);
if (index_mark_cache_size > max_cache_size)
{
index_mark_cache_size = max_cache_size;
@ -664,7 +669,7 @@ void LocalServer::processConfig()
}
global_context->setIndexMarkCache(index_mark_cache_policy, index_mark_cache_size, index_mark_cache_size_ratio);
size_t mmap_cache_size = config().getUInt64("mmap_cache_size", DEFAULT_MMAP_CACHE_MAX_SIZE);
size_t mmap_cache_size = getClientConfiguration().getUInt64("mmap_cache_size", DEFAULT_MMAP_CACHE_MAX_SIZE);
if (mmap_cache_size > max_cache_size)
{
mmap_cache_size = max_cache_size;
@ -676,8 +681,8 @@ void LocalServer::processConfig()
global_context->setQueryCache(0, 0, 0, 0);
#if USE_EMBEDDED_COMPILER
size_t compiled_expression_cache_max_size_in_bytes = config().getUInt64("compiled_expression_cache_size", DEFAULT_COMPILED_EXPRESSION_CACHE_MAX_SIZE);
size_t compiled_expression_cache_max_elements = config().getUInt64("compiled_expression_cache_elements_size", DEFAULT_COMPILED_EXPRESSION_CACHE_MAX_ENTRIES);
size_t compiled_expression_cache_max_size_in_bytes = getClientConfiguration().getUInt64("compiled_expression_cache_size", DEFAULT_COMPILED_EXPRESSION_CACHE_MAX_SIZE);
size_t compiled_expression_cache_max_elements = getClientConfiguration().getUInt64("compiled_expression_cache_elements_size", DEFAULT_COMPILED_EXPRESSION_CACHE_MAX_ENTRIES);
CompiledExpressionCacheFactory::instance().init(compiled_expression_cache_max_size_in_bytes, compiled_expression_cache_max_elements);
#endif
@ -689,16 +694,16 @@ void LocalServer::processConfig()
applyCmdOptions(global_context);
/// Load global settings from default_profile and system_profile.
global_context->setDefaultProfiles(config());
global_context->setDefaultProfiles(getClientConfiguration());
/// We load temporary database first, because projections need it.
DatabaseCatalog::instance().initializeAndLoadTemporaryDatabase();
std::string default_database = config().getString("default_database", "default");
std::string default_database = getClientConfiguration().getString("default_database", "default");
DatabaseCatalog::instance().attachDatabase(default_database, createClickHouseLocalDatabaseOverlay(default_database, global_context));
global_context->setCurrentDatabase(default_database);
if (config().has("path"))
if (getClientConfiguration().has("path"))
{
String path = global_context->getPath();
fs::create_directories(fs::path(path));
@ -713,7 +718,7 @@ void LocalServer::processConfig()
attachInformationSchema(global_context, *createMemoryDatabaseIfNotExists(global_context, DatabaseCatalog::INFORMATION_SCHEMA_UPPERCASE));
waitLoad(TablesLoaderForegroundPoolId, startup_system_tasks);
if (!config().has("only-system-tables"))
if (!getClientConfiguration().has("only-system-tables"))
{
DatabaseCatalog::instance().createBackgroundTasks();
waitLoad(loadMetadata(global_context));
@ -725,15 +730,15 @@ void LocalServer::processConfig()
LOG_DEBUG(log, "Loaded metadata.");
}
else if (!config().has("no-system-tables"))
else if (!getClientConfiguration().has("no-system-tables"))
{
attachSystemTablesServer(global_context, *createMemoryDatabaseIfNotExists(global_context, DatabaseCatalog::SYSTEM_DATABASE), false);
attachInformationSchema(global_context, *createMemoryDatabaseIfNotExists(global_context, DatabaseCatalog::INFORMATION_SCHEMA));
attachInformationSchema(global_context, *createMemoryDatabaseIfNotExists(global_context, DatabaseCatalog::INFORMATION_SCHEMA_UPPERCASE));
}
server_display_name = config().getString("display_name", "");
prompt_by_server_display_name = config().getRawString("prompt_by_server_display_name.default", ":) ");
server_display_name = getClientConfiguration().getString("display_name", "");
prompt_by_server_display_name = getClientConfiguration().getRawString("prompt_by_server_display_name.default", ":) ");
global_context->setQueryKindInitial();
global_context->setQueryKind(query_kind);
@ -811,7 +816,7 @@ void LocalServer::applyCmdSettings(ContextMutablePtr context)
void LocalServer::applyCmdOptions(ContextMutablePtr context)
{
context->setDefaultFormat(config().getString("output-format", config().getString("format", is_interactive ? "PrettyCompact" : "TSV")));
context->setDefaultFormat(getClientConfiguration().getString("output-format", getClientConfiguration().getString("format", is_interactive ? "PrettyCompact" : "TSV")));
applyCmdSettings(context);
}
@ -819,33 +824,33 @@ void LocalServer::applyCmdOptions(ContextMutablePtr context)
void LocalServer::processOptions(const OptionsDescription &, const CommandLineOptions & options, const std::vector<Arguments> &, const std::vector<Arguments> &)
{
if (options.count("table"))
config().setString("table-name", options["table"].as<std::string>());
getClientConfiguration().setString("table-name", options["table"].as<std::string>());
if (options.count("file"))
config().setString("table-file", options["file"].as<std::string>());
getClientConfiguration().setString("table-file", options["file"].as<std::string>());
if (options.count("structure"))
config().setString("table-structure", options["structure"].as<std::string>());
getClientConfiguration().setString("table-structure", options["structure"].as<std::string>());
if (options.count("no-system-tables"))
config().setBool("no-system-tables", true);
getClientConfiguration().setBool("no-system-tables", true);
if (options.count("only-system-tables"))
config().setBool("only-system-tables", true);
getClientConfiguration().setBool("only-system-tables", true);
if (options.count("database"))
config().setString("default_database", options["database"].as<std::string>());
getClientConfiguration().setString("default_database", options["database"].as<std::string>());
if (options.count("input-format"))
config().setString("table-data-format", options["input-format"].as<std::string>());
getClientConfiguration().setString("table-data-format", options["input-format"].as<std::string>());
if (options.count("output-format"))
config().setString("output-format", options["output-format"].as<std::string>());
getClientConfiguration().setString("output-format", options["output-format"].as<std::string>());
if (options.count("logger.console"))
config().setBool("logger.console", options["logger.console"].as<bool>());
getClientConfiguration().setBool("logger.console", options["logger.console"].as<bool>());
if (options.count("logger.log"))
config().setString("logger.log", options["logger.log"].as<std::string>());
getClientConfiguration().setString("logger.log", options["logger.log"].as<std::string>());
if (options.count("logger.level"))
config().setString("logger.level", options["logger.level"].as<std::string>());
getClientConfiguration().setString("logger.level", options["logger.level"].as<std::string>());
if (options.count("send_logs_level"))
config().setString("send_logs_level", options["send_logs_level"].as<std::string>());
getClientConfiguration().setString("send_logs_level", options["send_logs_level"].as<std::string>());
if (options.count("wait_for_suggestions_to_load"))
config().setBool("wait_for_suggestions_to_load", true);
getClientConfiguration().setBool("wait_for_suggestions_to_load", true);
}
void LocalServer::readArguments(int argc, char ** argv, Arguments & common_arguments, std::vector<Arguments> &, std::vector<Arguments> &)

View File

@ -30,6 +30,9 @@ public:
int main(const std::vector<String> & /*args*/) override;
protected:
Poco::Util::LayeredConfiguration & getClientConfiguration() override;
void connect() override;
void processError(const String & query) const override;

View File

@ -13,7 +13,6 @@ set (CLICKHOUSE_ODBC_BRIDGE_SOURCES
getIdentifierQuote.cpp
odbc-bridge.cpp
validateODBCConnectionString.cpp
createFunctionBaseCast.cpp
)
clickhouse_add_executable(clickhouse-odbc-bridge ${CLICKHOUSE_ODBC_BRIDGE_SOURCES})
@ -25,6 +24,7 @@ target_link_libraries(clickhouse-odbc-bridge PRIVATE
clickhouse_parsers
ch_contrib::nanodbc
ch_contrib::unixodbc
clickhouse_functions
)
set_target_properties(clickhouse-odbc-bridge PROPERTIES RUNTIME_OUTPUT_DIRECTORY ..)

View File

@ -29,7 +29,14 @@
-->
<size>1000M</size>
<count>10</count>
<!-- <console>1</console> --> <!-- Default behavior is autodetection (log to console if not daemon mode and is tty) -->
<!-- <console_log_level>trace</console_log_level> -->
<!-- <use_syslog>0</use_syslog> -->
<!-- <syslog_level>trace</syslog_level> -->
<!-- <stream_compress>0</stream_compress> -->
<!-- Per level overrides (legacy):

View File

@ -1,3 +1,5 @@
#include <Common/FieldVisitorToString.h>
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypeString.h>
#include <DataTypes/DataTypeNullable.h>
@ -3495,7 +3497,8 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi
*
* 4. If node has alias, update its value in scope alias map. Deregister alias from expression_aliases_in_resolve_process.
*/
ProjectionNames QueryAnalyzer::resolveExpressionNode(QueryTreeNodePtr & node, IdentifierResolveScope & scope, bool allow_lambda_expression, bool allow_table_expression, bool ignore_alias)
ProjectionNames QueryAnalyzer::resolveExpressionNode(
QueryTreeNodePtr & node, IdentifierResolveScope & scope, bool allow_lambda_expression, bool allow_table_expression, bool ignore_alias)
{
checkStackSize();
@ -4505,7 +4508,36 @@ void QueryAnalyzer::resolveTableFunction(QueryTreeNodePtr & table_function_node,
table_name = table_identifier[1];
}
auto parametrized_view_storage = scope_context->getQueryContext()->buildParametrizedViewStorage(function_ast, database_name, table_name);
/// Collect parametrized view arguments
NameToNameMap view_params;
for (const auto & argument : table_function_node_typed.getArguments())
{
if (auto * arg_func = argument->as<FunctionNode>())
{
if (arg_func->getFunctionName() != "equals")
continue;
auto nodes = arg_func->getArguments().getNodes();
if (nodes.size() != 2)
continue;
if (auto * identifier_node = nodes[0]->as<IdentifierNode>())
{
resolveExpressionNode(nodes[1], scope, /* allow_lambda_expression */false, /* allow_table_function */false);
if (auto * constant = nodes[1]->as<ConstantNode>())
{
view_params[identifier_node->getIdentifier().getFullName()] = convertFieldToString(constant->getValue());
}
}
}
}
auto context = scope_context->getQueryContext();
auto parametrized_view_storage = context->buildParametrizedViewStorage(
database_name,
table_name,
view_params);
if (parametrized_view_storage)
{
auto fake_table_node = std::make_shared<TableNode>(parametrized_view_storage, scope_context);

View File

@ -438,7 +438,7 @@ void RestorerFromBackup::findTableInBackupImpl(const QualifiedTableName & table_
String create_table_query_str = serializeAST(*create_table_query);
bool is_predefined_table = DatabaseCatalog::instance().isPredefinedTable(StorageID{table_name.database, table_name.table});
auto table_dependencies = getDependenciesFromCreateQuery(context, table_name, create_table_query);
auto table_dependencies = getDependenciesFromCreateQuery(context, table_name, create_table_query, context->getCurrentDatabase());
bool table_has_data = backup->hasFiles(data_path_in_backup);
std::lock_guard lock{mutex};

View File

@ -222,7 +222,7 @@ add_object_library(clickhouse_storages_mergetree Storages/MergeTree)
add_object_library(clickhouse_storages_statistics Storages/Statistics)
add_object_library(clickhouse_storages_liveview Storages/LiveView)
add_object_library(clickhouse_storages_windowview Storages/WindowView)
add_object_library(clickhouse_storages_s3queue Storages/S3Queue)
add_object_library(clickhouse_storages_s3queue Storages/ObjectStorageQueue)
add_object_library(clickhouse_storages_materializedview Storages/MaterializedView)
add_object_library(clickhouse_client Client)
add_object_library(clickhouse_bridge BridgeHelper)

View File

@ -302,8 +302,29 @@ public:
ClientBase::~ClientBase() = default;
ClientBase::ClientBase() = default;
ClientBase::ClientBase(
int in_fd_,
int out_fd_,
int err_fd_,
std::istream & input_stream_,
std::ostream & output_stream_,
std::ostream & error_stream_
)
: std_in(in_fd_)
, std_out(out_fd_)
, progress_indication(output_stream_, in_fd_, err_fd_)
, in_fd(in_fd_)
, out_fd(out_fd_)
, err_fd(err_fd_)
, input_stream(input_stream_)
, output_stream(output_stream_)
, error_stream(error_stream_)
{
stdin_is_a_tty = isatty(in_fd);
stdout_is_a_tty = isatty(out_fd);
stderr_is_a_tty = isatty(err_fd);
terminal_width = getTerminalWidth(in_fd, err_fd);
}
void ClientBase::setupSignalHandler()
{
@ -330,7 +351,7 @@ void ClientBase::setupSignalHandler()
}
ASTPtr ClientBase::parseQuery(const char *& pos, const char * end, const Settings & settings, bool allow_multi_statements, bool is_interactive, bool ignore_error)
ASTPtr ClientBase::parseQuery(const char *& pos, const char * end, const Settings & settings, bool allow_multi_statements)
{
std::unique_ptr<IParserBase> parser;
ASTPtr res;
@ -359,7 +380,7 @@ ASTPtr ClientBase::parseQuery(const char *& pos, const char * end, const Setting
if (!res)
{
std::cerr << std::endl << message << std::endl << std::endl;
error_stream << std::endl << message << std::endl << std::endl;
return nullptr;
}
}
@ -373,11 +394,11 @@ ASTPtr ClientBase::parseQuery(const char *& pos, const char * end, const Setting
if (is_interactive)
{
std::cout << std::endl;
WriteBufferFromOStream res_buf(std::cout, 4096);
output_stream << std::endl;
WriteBufferFromOStream res_buf(output_stream, 4096);
formatAST(*res, res_buf);
res_buf.finalize();
std::cout << std::endl << std::endl;
output_stream << std::endl << std::endl;
}
return res;
@ -481,7 +502,7 @@ void ClientBase::onData(Block & block, ASTPtr parsed_query)
if (need_render_progress && tty_buf)
{
if (select_into_file && !select_into_file_and_stdout)
std::cerr << "\r";
error_stream << "\r";
progress_indication.writeProgress(*tty_buf);
}
}
@ -741,17 +762,17 @@ bool ClientBase::isRegularFile(int fd)
void ClientBase::setDefaultFormatsAndCompressionFromConfiguration()
{
if (config().has("output-format"))
if (getClientConfiguration().has("output-format"))
{
default_output_format = config().getString("output-format");
default_output_format = getClientConfiguration().getString("output-format");
is_default_format = false;
}
else if (config().has("format"))
else if (getClientConfiguration().has("format"))
{
default_output_format = config().getString("format");
default_output_format = getClientConfiguration().getString("format");
is_default_format = false;
}
else if (config().has("vertical"))
else if (getClientConfiguration().has("vertical"))
{
default_output_format = "Vertical";
is_default_format = false;
@ -777,17 +798,17 @@ void ClientBase::setDefaultFormatsAndCompressionFromConfiguration()
default_output_format = "TSV";
}
if (config().has("input-format"))
if (getClientConfiguration().has("input-format"))
{
default_input_format = config().getString("input-format");
default_input_format = getClientConfiguration().getString("input-format");
}
else if (config().has("format"))
else if (getClientConfiguration().has("format"))
{
default_input_format = config().getString("format");
default_input_format = getClientConfiguration().getString("format");
}
else if (config().getString("table-file", "-") != "-")
else if (getClientConfiguration().getString("table-file", "-") != "-")
{
auto file_name = config().getString("table-file");
auto file_name = getClientConfiguration().getString("table-file");
std::optional<String> format_from_file_name = FormatFactory::instance().tryGetFormatFromFileName(file_name);
if (format_from_file_name)
default_input_format = *format_from_file_name;
@ -803,7 +824,7 @@ void ClientBase::setDefaultFormatsAndCompressionFromConfiguration()
default_input_format = "TSV";
}
format_max_block_size = config().getUInt64("format_max_block_size",
format_max_block_size = getClientConfiguration().getUInt64("format_max_block_size",
global_context->getSettingsRef().max_block_size);
/// Setting value from cmd arg overrides one from config
@ -813,7 +834,7 @@ void ClientBase::setDefaultFormatsAndCompressionFromConfiguration()
}
else
{
insert_format_max_block_size = config().getUInt64("insert_format_max_block_size",
insert_format_max_block_size = getClientConfiguration().getUInt64("insert_format_max_block_size",
global_context->getSettingsRef().max_insert_block_size);
}
}
@ -924,9 +945,7 @@ void ClientBase::processTextAsSingleQuery(const String & full_query)
const char * begin = full_query.data();
auto parsed_query = parseQuery(begin, begin + full_query.size(),
global_context->getSettingsRef(),
/*allow_multi_statements=*/ false,
is_interactive,
ignore_error);
/*allow_multi_statements=*/ false);
if (!parsed_query)
return;
@ -1100,7 +1119,7 @@ void ClientBase::processOrdinaryQuery(const String & query_to_execute, ASTPtr pa
/// has been received yet.
if (processed_rows == 0 && e.code() == ErrorCodes::DEADLOCK_AVOIDED && --retries_left)
{
std::cerr << "Got a transient error from the server, will"
error_stream << "Got a transient error from the server, will"
<< " retry (" << retries_left << " retries left)";
}
else
@ -1154,7 +1173,7 @@ void ClientBase::receiveResult(ASTPtr parsed_query, Int32 signals_before_stop, b
double elapsed = receive_watch.elapsedSeconds();
if (break_on_timeout && elapsed > receive_timeout.totalSeconds())
{
std::cout << "Timeout exceeded while receiving data from server."
output_stream << "Timeout exceeded while receiving data from server."
<< " Waited for " << static_cast<size_t>(elapsed) << " seconds,"
<< " timeout is " << receive_timeout.totalSeconds() << " seconds." << std::endl;
@ -1189,7 +1208,7 @@ void ClientBase::receiveResult(ASTPtr parsed_query, Int32 signals_before_stop, b
if (cancelled && is_interactive)
{
std::cout << "Query was cancelled." << std::endl;
output_stream << "Query was cancelled." << std::endl;
cancelled_printed = true;
}
}
@ -1308,9 +1327,9 @@ void ClientBase::onEndOfStream()
if (is_interactive)
{
if (cancelled && !cancelled_printed)
std::cout << "Query was cancelled." << std::endl;
output_stream << "Query was cancelled." << std::endl;
else if (!written_first_block)
std::cout << "Ok." << std::endl;
output_stream << "Ok." << std::endl;
}
}
@ -1863,7 +1882,7 @@ void ClientBase::cancelQuery()
progress_indication.clearProgressOutput(*tty_buf);
if (is_interactive)
std::cout << "Cancelling query." << std::endl;
output_stream << "Cancelling query." << std::endl;
cancelled = true;
}
@ -2026,7 +2045,7 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin
{
const String & new_database = use_query->getDatabase();
/// If the client initiates the reconnection, it takes the settings from the config.
config().setString("database", new_database);
getClientConfiguration().setString("database", new_database);
/// If the connection initiates the reconnection, it uses its variable.
connection->setDefaultDatabase(new_database);
}
@ -2046,21 +2065,21 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin
if (is_interactive)
{
std::cout << std::endl;
output_stream << std::endl;
if (!server_exception || processed_rows != 0)
std::cout << processed_rows << " row" << (processed_rows == 1 ? "" : "s") << " in set. ";
std::cout << "Elapsed: " << progress_indication.elapsedSeconds() << " sec. ";
output_stream << processed_rows << " row" << (processed_rows == 1 ? "" : "s") << " in set. ";
output_stream << "Elapsed: " << progress_indication.elapsedSeconds() << " sec. ";
progress_indication.writeFinalProgress();
std::cout << std::endl << std::endl;
output_stream << std::endl << std::endl;
}
else if (print_time_to_stderr)
else if (getClientConfiguration().getBool("print-time-to-stderr", false))
{
std::cerr << progress_indication.elapsedSeconds() << "\n";
error_stream << progress_indication.elapsedSeconds() << "\n";
}
if (!is_interactive && print_num_processed_rows)
if (!is_interactive && getClientConfiguration().getBool("print-num-processed-rows", false))
{
std::cout << "Processed rows: " << processed_rows << "\n";
output_stream << "Processed rows: " << processed_rows << "\n";
}
if (have_error && report_error)
@ -2110,9 +2129,7 @@ MultiQueryProcessingStage ClientBase::analyzeMultiQueryText(
{
parsed_query = parseQuery(this_query_end, all_queries_end,
global_context->getSettingsRef(),
/*allow_multi_statements=*/ true,
is_interactive,
ignore_error);
/*allow_multi_statements=*/ true);
}
catch (const Exception & e)
{
@ -2428,12 +2445,12 @@ void ClientBase::initQueryIdFormats()
return;
/// Initialize query_id_formats if any
if (config().has("query_id_formats"))
if (getClientConfiguration().has("query_id_formats"))
{
Poco::Util::AbstractConfiguration::Keys keys;
config().keys("query_id_formats", keys);
getClientConfiguration().keys("query_id_formats", keys);
for (const auto & name : keys)
query_id_formats.emplace_back(name + ":", config().getString("query_id_formats." + name));
query_id_formats.emplace_back(name + ":", getClientConfiguration().getString("query_id_formats." + name));
}
if (query_id_formats.empty())
@ -2478,9 +2495,9 @@ bool ClientBase::addMergeTreeSettings(ASTCreateQuery & ast_create)
void ClientBase::runInteractive()
{
if (config().has("query_id"))
if (getClientConfiguration().has("query_id"))
throw Exception(ErrorCodes::BAD_ARGUMENTS, "query_id could be specified only in non-interactive mode");
if (print_time_to_stderr)
if (getClientConfiguration().getBool("print-time-to-stderr", false))
throw Exception(ErrorCodes::BAD_ARGUMENTS, "time option could be specified only in non-interactive mode");
initQueryIdFormats();
@ -2493,9 +2510,9 @@ void ClientBase::runInteractive()
{
/// Load suggestion data from the server.
if (global_context->getApplicationType() == Context::ApplicationType::CLIENT)
suggest->load<Connection>(global_context, connection_parameters, config().getInt("suggestion_limit"), wait_for_suggestions_to_load);
suggest->load<Connection>(global_context, connection_parameters, getClientConfiguration().getInt("suggestion_limit"), wait_for_suggestions_to_load);
else if (global_context->getApplicationType() == Context::ApplicationType::LOCAL)
suggest->load<LocalConnection>(global_context, connection_parameters, config().getInt("suggestion_limit"), wait_for_suggestions_to_load);
suggest->load<LocalConnection>(global_context, connection_parameters, getClientConfiguration().getInt("suggestion_limit"), wait_for_suggestions_to_load);
}
if (home_path.empty())
@ -2506,8 +2523,8 @@ void ClientBase::runInteractive()
}
/// Load command history if present.
if (config().has("history_file"))
history_file = config().getString("history_file");
if (getClientConfiguration().has("history_file"))
history_file = getClientConfiguration().getString("history_file");
else
{
auto * history_file_from_env = getenv("CLICKHOUSE_HISTORY_FILE"); // NOLINT(concurrency-mt-unsafe)
@ -2528,7 +2545,7 @@ void ClientBase::runInteractive()
{
if (e.getErrno() != EEXIST)
{
std::cerr << getCurrentExceptionMessage(false) << '\n';
error_stream << getCurrentExceptionMessage(false) << '\n';
}
}
}
@ -2539,13 +2556,13 @@ void ClientBase::runInteractive()
#if USE_REPLXX
replxx::Replxx::highlighter_callback_t highlight_callback{};
if (config().getBool("highlight", true))
if (getClientConfiguration().getBool("highlight", true))
highlight_callback = highlight;
ReplxxLineReader lr(
*suggest,
history_file,
config().has("multiline"),
getClientConfiguration().has("multiline"),
query_extenders,
query_delimiters,
word_break_characters,
@ -2553,7 +2570,7 @@ void ClientBase::runInteractive()
#else
LineReader lr(
history_file,
config().has("multiline"),
getClientConfiguration().has("multiline"),
query_extenders,
query_delimiters,
word_break_characters);
@ -2633,7 +2650,7 @@ void ClientBase::runInteractive()
{
// If a separate connection loading suggestions failed to open a new session,
// use the main session to receive them.
suggest->load(*connection, connection_parameters.timeouts, config().getInt("suggestion_limit"), global_context->getClientInfo());
suggest->load(*connection, connection_parameters.timeouts, getClientConfiguration().getInt("suggestion_limit"), global_context->getClientInfo());
}
try
@ -2648,7 +2665,7 @@ void ClientBase::runInteractive()
break;
/// We don't need to handle the test hints in the interactive mode.
std::cerr << "Exception on client:" << std::endl << getExceptionMessage(e, print_stack_trace, true) << std::endl << std::endl;
error_stream << "Exception on client:" << std::endl << getExceptionMessage(e, print_stack_trace, true) << std::endl << std::endl;
client_exception.reset(e.clone());
}
@ -2665,11 +2682,11 @@ void ClientBase::runInteractive()
while (true);
if (isNewYearMode())
std::cout << "Happy new year." << std::endl;
output_stream << "Happy new year." << std::endl;
else if (isChineseNewYearMode(local_tz))
std::cout << "Happy Chinese new year. 春节快乐!" << std::endl;
output_stream << "Happy Chinese new year. 春节快乐!" << std::endl;
else
std::cout << "Bye." << std::endl;
output_stream << "Bye." << std::endl;
}
@ -2680,7 +2697,7 @@ bool ClientBase::processMultiQueryFromFile(const String & file_name)
ReadBufferFromFile in(file_name);
readStringUntilEOF(queries_from_file, in);
if (!has_log_comment)
if (!getClientConfiguration().has("log_comment"))
{
Settings settings = global_context->getSettings();
/// NOTE: cannot use even weakly_canonical() since it fails for /dev/stdin due to resolving of "pipe:[X]"
@ -2789,13 +2806,13 @@ void ClientBase::clearTerminal()
/// It is needed if garbage is left in terminal.
/// Show cursor. It can be left hidden by invocation of previous programs.
/// A test for this feature: perl -e 'print "x"x100000'; echo -ne '\033[0;0H\033[?25l'; clickhouse-client
std::cout << "\033[0J" "\033[?25h";
output_stream << "\033[0J" "\033[?25h";
}
void ClientBase::showClientVersion()
{
std::cout << VERSION_NAME << " " + getName() + " version " << VERSION_STRING << VERSION_OFFICIAL << "." << std::endl;
output_stream << VERSION_NAME << " " + getName() + " version " << VERSION_STRING << VERSION_OFFICIAL << "." << std::endl;
}
namespace
@ -2862,7 +2879,10 @@ private:
}
/// Enable optimizations even in debug builds because otherwise options parsing becomes extremely slow affecting .sh tests
#if defined(__clang__)
#pragma clang optimize on
#endif
void ClientBase::parseAndCheckOptions(OptionsDescription & options_description, po::variables_map & options, Arguments & arguments)
{
if (allow_repeated_settings)
@ -3080,18 +3100,18 @@ void ClientBase::init(int argc, char ** argv)
if (options.count("version-clean"))
{
std::cout << VERSION_STRING;
output_stream << VERSION_STRING;
exit(0); // NOLINT(concurrency-mt-unsafe)
}
if (options.count("verbose"))
config().setBool("verbose", true);
getClientConfiguration().setBool("verbose", true);
/// Output of help message.
if (options.count("help")
|| (options.count("host") && options["host"].as<std::string>() == "elp")) /// If user writes -help instead of --help.
{
if (config().getBool("verbose", false))
if (getClientConfiguration().getBool("verbose", false))
printHelpMessage(options_description, true);
else
printHelpMessage(options_description_non_verbose, false);
@ -3099,72 +3119,75 @@ void ClientBase::init(int argc, char ** argv)
}
/// Common options for clickhouse-client and clickhouse-local.
/// Output execution time to stderr in batch mode.
if (options.count("time"))
print_time_to_stderr = true;
getClientConfiguration().setBool("print-time-to-stderr", true);
if (options.count("query"))
queries = options["query"].as<std::vector<std::string>>();
if (options.count("query_id"))
config().setString("query_id", options["query_id"].as<std::string>());
getClientConfiguration().setString("query_id", options["query_id"].as<std::string>());
if (options.count("database"))
config().setString("database", options["database"].as<std::string>());
getClientConfiguration().setString("database", options["database"].as<std::string>());
if (options.count("config-file"))
config().setString("config-file", options["config-file"].as<std::string>());
getClientConfiguration().setString("config-file", options["config-file"].as<std::string>());
if (options.count("queries-file"))
queries_files = options["queries-file"].as<std::vector<std::string>>();
if (options.count("multiline"))
config().setBool("multiline", true);
getClientConfiguration().setBool("multiline", true);
if (options.count("multiquery"))
config().setBool("multiquery", true);
getClientConfiguration().setBool("multiquery", true);
if (options.count("ignore-error"))
config().setBool("ignore-error", true);
getClientConfiguration().setBool("ignore-error", true);
if (options.count("format"))
config().setString("format", options["format"].as<std::string>());
getClientConfiguration().setString("format", options["format"].as<std::string>());
if (options.count("output-format"))
config().setString("output-format", options["output-format"].as<std::string>());
getClientConfiguration().setString("output-format", options["output-format"].as<std::string>());
if (options.count("vertical"))
config().setBool("vertical", true);
getClientConfiguration().setBool("vertical", true);
if (options.count("stacktrace"))
config().setBool("stacktrace", true);
getClientConfiguration().setBool("stacktrace", true);
if (options.count("print-profile-events"))
config().setBool("print-profile-events", true);
getClientConfiguration().setBool("print-profile-events", true);
if (options.count("profile-events-delay-ms"))
config().setUInt64("profile-events-delay-ms", options["profile-events-delay-ms"].as<UInt64>());
getClientConfiguration().setUInt64("profile-events-delay-ms", options["profile-events-delay-ms"].as<UInt64>());
/// Whether to print the number of processed rows at
if (options.count("processed-rows"))
print_num_processed_rows = true;
getClientConfiguration().setBool("print-num-processed-rows", true);
if (options.count("progress"))
{
switch (options["progress"].as<ProgressOption>())
{
case DEFAULT:
config().setString("progress", "default");
getClientConfiguration().setString("progress", "default");
break;
case OFF:
config().setString("progress", "off");
getClientConfiguration().setString("progress", "off");
break;
case TTY:
config().setString("progress", "tty");
getClientConfiguration().setString("progress", "tty");
break;
case ERR:
config().setString("progress", "err");
getClientConfiguration().setString("progress", "err");
break;
}
}
if (options.count("echo"))
config().setBool("echo", true);
getClientConfiguration().setBool("echo", true);
if (options.count("disable_suggestion"))
config().setBool("disable_suggestion", true);
getClientConfiguration().setBool("disable_suggestion", true);
if (options.count("wait_for_suggestions_to_load"))
config().setBool("wait_for_suggestions_to_load", true);
getClientConfiguration().setBool("wait_for_suggestions_to_load", true);
if (options.count("suggestion_limit"))
config().setInt("suggestion_limit", options["suggestion_limit"].as<int>());
getClientConfiguration().setInt("suggestion_limit", options["suggestion_limit"].as<int>());
if (options.count("highlight"))
config().setBool("highlight", options["highlight"].as<bool>());
getClientConfiguration().setBool("highlight", options["highlight"].as<bool>());
if (options.count("history_file"))
config().setString("history_file", options["history_file"].as<std::string>());
getClientConfiguration().setString("history_file", options["history_file"].as<std::string>());
if (options.count("interactive"))
config().setBool("interactive", true);
getClientConfiguration().setBool("interactive", true);
if (options.count("pager"))
config().setString("pager", options["pager"].as<std::string>());
getClientConfiguration().setString("pager", options["pager"].as<std::string>());
if (options.count("log-level"))
Poco::Logger::root().setLevel(options["log-level"].as<std::string>());
@ -3182,13 +3205,13 @@ void ClientBase::init(int argc, char ** argv)
alias_names.reserve(options_description.main_description->options().size());
for (const auto& option : options_description.main_description->options())
alias_names.insert(option->long_name());
argsToConfig(common_arguments, config(), 100, &alias_names);
argsToConfig(common_arguments, getClientConfiguration(), 100, &alias_names);
}
clearPasswordFromCommandLine(argc, argv);
/// Limit on total memory usage
std::string max_client_memory_usage = config().getString("max_memory_usage_in_client", "0" /*default value*/);
std::string max_client_memory_usage = getClientConfiguration().getString("max_memory_usage_in_client", "0" /*default value*/);
if (max_client_memory_usage != "0")
{
UInt64 max_client_memory_usage_int = parseWithSizeSuffix<UInt64>(max_client_memory_usage.c_str(), max_client_memory_usage.length());
@ -3197,8 +3220,6 @@ void ClientBase::init(int argc, char ** argv)
total_memory_tracker.setDescription("(total)");
total_memory_tracker.setMetric(CurrentMetrics::MemoryTracking);
}
has_log_comment = config().has("log_comment");
}
}

View File

@ -18,7 +18,6 @@
#include <Storages/SelectQueryInfo.h>
#include <Storages/MergeTree/MergeTreeSettings.h>
namespace po = boost::program_options;
@ -67,13 +66,22 @@ class ClientBase : public Poco::Util::Application, public IHints<2>
public:
using Arguments = std::vector<String>;
ClientBase();
explicit ClientBase
(
int in_fd_ = STDIN_FILENO,
int out_fd_ = STDOUT_FILENO,
int err_fd_ = STDERR_FILENO,
std::istream & input_stream_ = std::cin,
std::ostream & output_stream_ = std::cout,
std::ostream & error_stream_ = std::cerr
);
~ClientBase() override;
void init(int argc, char ** argv);
std::vector<String> getAllRegisteredNames() const override { return cmd_options; }
static ASTPtr parseQuery(const char *& pos, const char * end, const Settings & settings, bool allow_multi_statements, bool is_interactive, bool ignore_error);
ASTPtr parseQuery(const char *& pos, const char * end, const Settings & settings, bool allow_multi_statements);
protected:
void runInteractive();
@ -82,6 +90,9 @@ protected:
char * argv0 = nullptr;
void runLibFuzzer();
/// This is the analogue of Poco::Application::config()
virtual Poco::Util::LayeredConfiguration & getClientConfiguration() = 0;
virtual bool processWithFuzzing(const String &)
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Query processing with fuzzing is not implemented");
@ -107,7 +118,7 @@ protected:
String & query_to_execute, ASTPtr & parsed_query, const String & all_queries_text,
std::unique_ptr<Exception> & current_exception);
static void clearTerminal();
void clearTerminal();
void showClientVersion();
using ProgramOptionsDescription = boost::program_options::options_description;
@ -206,7 +217,6 @@ protected:
bool echo_queries = false; /// Print queries before execution in batch mode.
bool ignore_error = false; /// In case of errors, don't print error message, continue to next query. Only applicable for non-interactive mode.
bool print_time_to_stderr = false; /// Output execution time to stderr in batch mode.
std::optional<Suggest> suggest;
bool load_suggestions = false;
@ -251,9 +261,9 @@ protected:
ConnectionParameters connection_parameters;
/// Buffer that reads from stdin in batch mode.
ReadBufferFromFileDescriptor std_in{STDIN_FILENO};
ReadBufferFromFileDescriptor std_in;
/// Console output.
WriteBufferFromFileDescriptor std_out{STDOUT_FILENO};
WriteBufferFromFileDescriptor std_out;
std::unique_ptr<ShellCommand> pager_cmd;
/// The user can specify to redirect query output to a file.
@ -284,7 +294,6 @@ protected:
bool need_render_profile_events = true;
bool written_first_block = false;
size_t processed_rows = 0; /// How many rows have been read or written.
bool print_num_processed_rows = false; /// Whether to print the number of processed rows at
bool print_stack_trace = false;
/// The last exception that was received from the server. Is used for the
@ -332,8 +341,14 @@ protected:
bool cancelled = false;
bool cancelled_printed = false;
/// Does log_comment has specified by user?
bool has_log_comment = false;
/// Unpacked descriptors and streams for the ease of use.
int in_fd = STDIN_FILENO;
int out_fd = STDOUT_FILENO;
int err_fd = STDERR_FILENO;
std::istream & input_stream;
std::ostream & output_stream;
std::ostream & error_stream;
};
}

View File

@ -23,14 +23,6 @@ void trim(String & s)
s.erase(std::find_if(s.rbegin(), s.rend(), [](int ch) { return !std::isspace(ch); }).base(), s.end());
}
/// Check if multi-line query is inserted from the paste buffer.
/// Allows delaying the start of query execution until the entirety of query is inserted.
bool hasInputData()
{
pollfd fd{STDIN_FILENO, POLLIN, 0};
return poll(&fd, 1, 0) == 1;
}
struct NoCaseCompare
{
bool operator()(const std::string & str1, const std::string & str2)
@ -63,6 +55,14 @@ void addNewWords(Words & to, const Words & from, Compare comp)
namespace DB
{
/// Check if multi-line query is inserted from the paste buffer.
/// Allows delaying the start of query execution until the entirety of query is inserted.
bool LineReader::hasInputData() const
{
pollfd fd{in_fd, POLLIN, 0};
return poll(&fd, 1, 0) == 1;
}
replxx::Replxx::completions_t LineReader::Suggest::getCompletions(const String & prefix, size_t prefix_length, const char * word_break_characters)
{
std::string_view last_word;
@ -131,11 +131,22 @@ void LineReader::Suggest::addWords(Words && new_words) // NOLINT(cppcoreguidelin
}
}
LineReader::LineReader(const String & history_file_path_, bool multiline_, Patterns extenders_, Patterns delimiters_)
LineReader::LineReader(
const String & history_file_path_,
bool multiline_,
Patterns extenders_,
Patterns delimiters_,
std::istream & input_stream_,
std::ostream & output_stream_,
int in_fd_
)
: history_file_path(history_file_path_)
, multiline(multiline_)
, extenders(std::move(extenders_))
, delimiters(std::move(delimiters_))
, input_stream(input_stream_)
, output_stream(output_stream_)
, in_fd(in_fd_)
{
/// FIXME: check extender != delimiter
}
@ -212,9 +223,9 @@ LineReader::InputStatus LineReader::readOneLine(const String & prompt)
input.clear();
{
std::cout << prompt;
std::getline(std::cin, input);
if (!std::cin.good())
output_stream << prompt;
std::getline(input_stream, input);
if (!input_stream.good())
return ABORT;
}

View File

@ -1,5 +1,7 @@
#pragma once
#include <iostream>
#include <unistd.h>
#include <mutex>
#include <atomic>
#include <vector>
@ -37,7 +39,16 @@ public:
using Patterns = std::vector<const char *>;
LineReader(const String & history_file_path, bool multiline, Patterns extenders, Patterns delimiters);
LineReader(
const String & history_file_path,
bool multiline,
Patterns extenders,
Patterns delimiters,
std::istream & input_stream_ = std::cin,
std::ostream & output_stream_ = std::cout,
int in_fd_ = STDIN_FILENO
);
virtual ~LineReader() = default;
/// Reads the whole line until delimiter (in multiline mode) or until the last line without extender.
@ -56,6 +67,8 @@ public:
virtual void enableBracketedPaste() {}
virtual void disableBracketedPaste() {}
bool hasInputData() const;
protected:
enum InputStatus
{
@ -77,6 +90,10 @@ protected:
virtual InputStatus readOneLine(const String & prompt);
virtual void addToHistory(const String &) {}
std::istream & input_stream;
std::ostream & output_stream;
int in_fd;
};
}

View File

@ -16,7 +16,10 @@
#include <Storages/IStorage.h>
#include <Common/ConcurrentBoundedQueue.h>
#include <Common/CurrentThread.h>
#include <Parsers/ParserQuery.h>
#include <Parsers/PRQL/ParserPRQLQuery.h>
#include <Parsers/Kusto/ParserKQLStatement.h>
#include <Parsers/Kusto/parseKQLQuery.h>
namespace DB
{
@ -151,12 +154,26 @@ void LocalConnection::sendQuery(
state->block = sample;
String current_format = "Values";
const auto & settings = context->getSettingsRef();
const char * begin = state->query.data();
auto parsed_query = ClientBase::parseQuery(begin, begin + state->query.size(),
context->getSettingsRef(),
/*allow_multi_statements=*/ false,
/*is_interactive=*/ false,
/*ignore_error=*/ false);
const char * end = begin + state->query.size();
const Dialect & dialect = settings.dialect;
std::unique_ptr<IParserBase> parser;
if (dialect == Dialect::kusto)
parser = std::make_unique<ParserKQLStatement>(end, settings.allow_settings_after_format_in_insert);
else if (dialect == Dialect::prql)
parser = std::make_unique<ParserPRQLQuery>(settings.max_query_size, settings.max_parser_depth, settings.max_parser_backtracks);
else
parser = std::make_unique<ParserQuery>(end, settings.allow_settings_after_format_in_insert);
ASTPtr parsed_query;
if (dialect == Dialect::kusto)
parsed_query = parseKQLQueryAndMovePosition(*parser, begin, end, "", /*allow_multi_statements*/false, settings.max_query_size, settings.max_parser_depth, settings.max_parser_backtracks);
else
parsed_query = parseQueryAndMovePosition(*parser, begin, end, "", /*allow_multi_statements*/false, settings.max_query_size, settings.max_parser_depth, settings.max_parser_backtracks);
if (const auto * insert = parsed_query->as<ASTInsertQuery>())
{
if (!insert->format.empty())

View File

@ -297,8 +297,15 @@ ReplxxLineReader::ReplxxLineReader(
Patterns extenders_,
Patterns delimiters_,
const char word_break_characters_[],
replxx::Replxx::highlighter_callback_t highlighter_)
: LineReader(history_file_path_, multiline_, std::move(extenders_), std::move(delimiters_)), highlighter(std::move(highlighter_))
replxx::Replxx::highlighter_callback_t highlighter_,
[[ maybe_unused ]] std::istream & input_stream_,
[[ maybe_unused ]] std::ostream & output_stream_,
[[ maybe_unused ]] int in_fd_,
[[ maybe_unused ]] int out_fd_,
[[ maybe_unused ]] int err_fd_
)
: LineReader(history_file_path_, multiline_, std::move(extenders_), std::move(delimiters_), input_stream_, output_stream_, in_fd_)
, highlighter(std::move(highlighter_))
, word_break_characters(word_break_characters_)
, editor(getEditor())
{
@ -471,7 +478,7 @@ ReplxxLineReader::ReplxxLineReader(
ReplxxLineReader::~ReplxxLineReader()
{
if (close(history_file_fd))
if (history_file_fd >= 0 && close(history_file_fd))
rx.print("Close of history file failed: %s\n", errnoToString().c_str());
}
@ -496,7 +503,7 @@ void ReplxxLineReader::addToHistory(const String & line)
// but replxx::Replxx::history_load() does not
// and that is why flock() is added here.
bool locked = false;
if (flock(history_file_fd, LOCK_EX))
if (history_file_fd >= 0 && flock(history_file_fd, LOCK_EX))
rx.print("Lock of history file failed: %s\n", errnoToString().c_str());
else
locked = true;
@ -507,7 +514,7 @@ void ReplxxLineReader::addToHistory(const String & line)
if (!rx.history_save(history_file_path))
rx.print("Saving history failed: %s\n", errnoToString().c_str());
if (locked && 0 != flock(history_file_fd, LOCK_UN))
if (history_file_fd >= 0 && locked && 0 != flock(history_file_fd, LOCK_UN))
rx.print("Unlock of history file failed: %s\n", errnoToString().c_str());
}

View File

@ -1,6 +1,7 @@
#pragma once
#include "LineReader.h"
#include <Client/LineReader.h>
#include <base/strong_typedef.h>
#include <replxx.hxx>
namespace DB
@ -9,14 +10,22 @@ namespace DB
class ReplxxLineReader : public LineReader
{
public:
ReplxxLineReader(
ReplxxLineReader
(
Suggest & suggest,
const String & history_file_path,
bool multiline,
Patterns extenders_,
Patterns delimiters_,
const char word_break_characters_[],
replxx::Replxx::highlighter_callback_t highlighter_);
replxx::Replxx::highlighter_callback_t highlighter_,
std::istream & input_stream_ = std::cin,
std::ostream & output_stream_ = std::cout,
int in_fd_ = STDIN_FILENO,
int out_fd_ = STDOUT_FILENO,
int err_fd_ = STDERR_FILENO
);
~ReplxxLineReader() override;
void enableBracketedPaste() override;

View File

@ -637,11 +637,11 @@ The server successfully detected this situation and will download merged part fr
M(S3QueueSetFileProcessingMicroseconds, "Time spent to set file as processing")\
M(S3QueueSetFileProcessedMicroseconds, "Time spent to set file as processed")\
M(S3QueueSetFileFailedMicroseconds, "Time spent to set file as failed")\
M(S3QueueFailedFiles, "Number of files which failed to be processed")\
M(S3QueueProcessedFiles, "Number of files which were processed")\
M(S3QueueCleanupMaxSetSizeOrTTLMicroseconds, "Time spent to set file as failed")\
M(S3QueuePullMicroseconds, "Time spent to read file data")\
M(S3QueueLockLocalFileStatusesMicroseconds, "Time spent to lock local file statuses")\
M(ObjectStorageQueueFailedFiles, "Number of files which failed to be processed")\
M(ObjectStorageQueueProcessedFiles, "Number of files which were processed")\
M(ObjectStorageQueueCleanupMaxSetSizeOrTTLMicroseconds, "Time spent to set file as failed")\
M(ObjectStorageQueuePullMicroseconds, "Time spent to read file data")\
M(ObjectStorageQueueLockLocalFileStatusesMicroseconds, "Time spent to lock local file statuses")\
\
M(ServerStartupMilliseconds, "Time elapsed from starting server to listening to sockets in milliseconds")\
M(IOUringSQEsSubmitted, "Total number of io_uring SQEs submitted") \

View File

@ -92,19 +92,19 @@ void ProgressIndication::writeFinalProgress()
if (progress.read_rows < 1000)
return;
std::cout << "Processed " << formatReadableQuantity(progress.read_rows) << " rows, "
output_stream << "Processed " << formatReadableQuantity(progress.read_rows) << " rows, "
<< formatReadableSizeWithDecimalSuffix(progress.read_bytes);
UInt64 elapsed_ns = getElapsedNanoseconds();
if (elapsed_ns)
std::cout << " (" << formatReadableQuantity(progress.read_rows * 1000000000.0 / elapsed_ns) << " rows/s., "
output_stream << " (" << formatReadableQuantity(progress.read_rows * 1000000000.0 / elapsed_ns) << " rows/s., "
<< formatReadableSizeWithDecimalSuffix(progress.read_bytes * 1000000000.0 / elapsed_ns) << "/s.)";
else
std::cout << ". ";
output_stream << ". ";
auto peak_memory_usage = getMemoryUsage().peak;
if (peak_memory_usage >= 0)
std::cout << "\nPeak memory usage: " << formatReadableSizeWithBinarySuffix(peak_memory_usage) << ".";
output_stream << "\nPeak memory usage: " << formatReadableSizeWithBinarySuffix(peak_memory_usage) << ".";
}
void ProgressIndication::writeProgress(WriteBufferFromFileDescriptor & message)
@ -125,7 +125,7 @@ void ProgressIndication::writeProgress(WriteBufferFromFileDescriptor & message)
const char * indicator = indicators[increment % 8];
size_t terminal_width = getTerminalWidth();
size_t terminal_width = getTerminalWidth(in_fd, err_fd);
if (!written_progress_chars)
{

View File

@ -32,6 +32,19 @@ using HostToTimesMap = std::unordered_map<String, ThreadEventData>;
class ProgressIndication
{
public:
explicit ProgressIndication
(
std::ostream & output_stream_ = std::cout,
int in_fd_ = STDIN_FILENO,
int err_fd_ = STDERR_FILENO
)
: output_stream(output_stream_),
in_fd(in_fd_),
err_fd(err_fd_)
{
}
/// Write progress bar.
void writeProgress(WriteBufferFromFileDescriptor & message);
void clearProgressOutput(WriteBufferFromFileDescriptor & message);
@ -103,6 +116,10 @@ private:
/// - hosts_data/cpu_usage_meter (guarded with profile_events_mutex)
mutable std::mutex profile_events_mutex;
mutable std::mutex progress_mutex;
std::ostream & output_stream;
int in_fd;
int err_fd;
};
}

View File

@ -11,7 +11,7 @@
#include <Interpreters/TextLog.h>
#include <Interpreters/TraceLog.h>
#include <Interpreters/FilesystemCacheLog.h>
#include <Interpreters/S3QueueLog.h>
#include <Interpreters/ObjectStorageQueueLog.h>
#include <Interpreters/FilesystemReadPrefetchesLog.h>
#include <Interpreters/ProcessorsProfileLog.h>
#include <Interpreters/ZooKeeperLog.h>

View File

@ -25,7 +25,7 @@
M(ZooKeeperLogElement) \
M(ProcessorProfileLogElement) \
M(TextLogElement) \
M(S3QueueLogElement) \
M(ObjectStorageQueueLogElement) \
M(FilesystemCacheLogElement) \
M(FilesystemReadPrefetchesLogElement) \
M(AsynchronousInsertLogElement) \

View File

@ -13,17 +13,17 @@ namespace DB::ErrorCodes
extern const int SYSTEM_ERROR;
}
uint16_t getTerminalWidth()
uint16_t getTerminalWidth(int in_fd, int err_fd)
{
struct winsize terminal_size {};
if (isatty(STDIN_FILENO))
if (isatty(in_fd))
{
if (ioctl(STDIN_FILENO, TIOCGWINSZ, &terminal_size))
if (ioctl(in_fd, TIOCGWINSZ, &terminal_size))
throw DB::ErrnoException(DB::ErrorCodes::SYSTEM_ERROR, "Cannot obtain terminal window size (ioctl TIOCGWINSZ)");
}
else if (isatty(STDERR_FILENO))
else if (isatty(err_fd))
{
if (ioctl(STDERR_FILENO, TIOCGWINSZ, &terminal_size))
if (ioctl(err_fd, TIOCGWINSZ, &terminal_size))
throw DB::ErrnoException(DB::ErrorCodes::SYSTEM_ERROR, "Cannot obtain terminal window size (ioctl TIOCGWINSZ)");
}
/// Default - 0.

View File

@ -1,16 +1,16 @@
#pragma once
#include <string>
#include <unistd.h>
#include <boost/program_options.hpp>
namespace po = boost::program_options;
uint16_t getTerminalWidth();
uint16_t getTerminalWidth(int in_fd = STDIN_FILENO, int err_fd = STDERR_FILENO);
/** Creates po::options_description with name and an appropriate size for option displaying
* when program is called with option --help
* */
po::options_description createOptionsDescription(const std::string &caption, unsigned short terminal_width); /// NOLINT

View File

@ -1,15 +1,18 @@
clickhouse_add_executable(zkutil_test_commands zkutil_test_commands.cpp)
target_link_libraries(zkutil_test_commands PRIVATE
clickhouse_common_zookeeper_no_log
clickhouse_functions
dbms)
clickhouse_add_executable(zkutil_test_commands_new_lib zkutil_test_commands_new_lib.cpp)
target_link_libraries(zkutil_test_commands_new_lib PRIVATE
clickhouse_common_zookeeper_no_log
clickhouse_compression
clickhouse_functions
dbms)
clickhouse_add_executable(zkutil_test_async zkutil_test_async.cpp)
target_link_libraries(zkutil_test_async PRIVATE
clickhouse_common_zookeeper_no_log
clickhouse_functions
dbms)

View File

@ -11,10 +11,10 @@ clickhouse_add_executable (small_table small_table.cpp)
target_link_libraries (small_table PRIVATE clickhouse_common_io)
clickhouse_add_executable (parallel_aggregation parallel_aggregation.cpp)
target_link_libraries (parallel_aggregation PRIVATE dbms)
target_link_libraries (parallel_aggregation PRIVATE dbms clickhouse_functions)
clickhouse_add_executable (parallel_aggregation2 parallel_aggregation2.cpp)
target_link_libraries (parallel_aggregation2 PRIVATE dbms)
target_link_libraries (parallel_aggregation2 PRIVATE dbms clickhouse_functions)
clickhouse_add_executable (int_hashes_perf int_hashes_perf.cpp)
target_link_libraries (int_hashes_perf PRIVATE clickhouse_common_io)
@ -85,7 +85,7 @@ target_link_libraries (interval_tree PRIVATE dbms)
if (ENABLE_SSL)
clickhouse_add_executable (encrypt_decrypt encrypt_decrypt.cpp)
target_link_libraries (encrypt_decrypt PRIVATE dbms)
target_link_libraries (encrypt_decrypt PRIVATE dbms clickhouse_functions)
endif()
clickhouse_add_executable (check_pointer_valid check_pointer_valid.cpp)

View File

@ -470,7 +470,7 @@ class IColumn;
M(UInt64, max_rows_in_join, 0, "Maximum size of the hash table for JOIN (in number of rows).", 0) \
M(UInt64, max_bytes_in_join, 0, "Maximum size of the hash table for JOIN (in number of bytes in memory).", 0) \
M(OverflowMode, join_overflow_mode, OverflowMode::THROW, "What to do when the limit is exceeded.", 0) \
M(Bool, join_any_take_last_row, false, "When disabled (default) ANY JOIN will take the first found row for a key. When enabled, it will take the last row seen if there are multiple rows for the same key.", IMPORTANT) \
M(Bool, join_any_take_last_row, false, "When disabled (default) ANY JOIN will take the first found row for a key. When enabled, it will take the last row seen if there are multiple rows for the same key. Can be applied only to hash join and storage join.", IMPORTANT) \
M(JoinAlgorithm, join_algorithm, JoinAlgorithm::DEFAULT, "Specify join algorithm.", 0) \
M(UInt64, cross_join_min_rows_to_compress, 10000000, "Minimal count of rows to compress block in CROSS JOIN. Zero value means - disable this threshold. This block is compressed when any of the two thresholds (by rows or by bytes) are reached.", 0) \
M(UInt64, cross_join_min_bytes_to_compress, 1_GiB, "Minimal size of block to compress in CROSS JOIN. Zero value means - disable this threshold. This block is compressed when any of the two thresholds (by rows or by bytes) are reached.", 0) \
@ -1092,6 +1092,7 @@ class IColumn;
M(Bool, input_format_json_defaults_for_missing_elements_in_named_tuple, true, "Insert default value in named tuple element if it's missing in json object", 0) \
M(Bool, input_format_json_throw_on_bad_escape_sequence, true, "Throw an exception if JSON string contains bad escape sequence in JSON input formats. If disabled, bad escape sequences will remain as is in the data", 0) \
M(Bool, input_format_json_ignore_unnecessary_fields, true, "Ignore unnecessary fields and not parse them. Enabling this may not throw exceptions on json strings of invalid format or with duplicated fields", 0) \
M(Bool, input_format_json_ignore_key_case, false, "Ignore json key case while read json field from string", 0) \
M(Bool, input_format_try_infer_integers, true, "Try to infer integers instead of floats while schema inference in text formats", 0) \
M(Bool, input_format_try_infer_dates, true, "Try to infer dates from string fields while schema inference in text formats", 0) \
M(Bool, input_format_try_infer_datetimes, true, "Try to infer datetimes from string fields while schema inference in text formats", 0) \
@ -1160,6 +1161,7 @@ class IColumn;
M(Bool, output_format_parquet_parallel_encoding, true, "Do Parquet encoding in multiple threads. Requires output_format_parquet_use_custom_encoder.", 0) \
M(UInt64, output_format_parquet_data_page_size, 1024 * 1024, "Target page size in bytes, before compression.", 0) \
M(UInt64, output_format_parquet_batch_size, 1024, "Check page size every this many rows. Consider decreasing if you have columns with average values size above a few KBs.", 0) \
M(Bool, output_format_parquet_write_page_index, true, "Add a possibility to write page index into parquet files.", 0) \
M(String, output_format_avro_codec, "", "Compression codec used for output. Possible values: 'null', 'deflate', 'snappy', 'zstd'.", 0) \
M(UInt64, output_format_avro_sync_interval, 16 * 1024, "Sync interval in bytes.", 0) \
M(String, output_format_avro_string_column_pattern, "", "For Avro format: regexp of String columns to select as AVRO string.", 0) \

View File

@ -86,6 +86,8 @@ namespace SettingsChangesHistory
/// It's used to implement `compatibility` setting (see https://github.com/ClickHouse/ClickHouse/issues/35972)
static const std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> settings_changes_history =
{
{"24.7", {{"output_format_parquet_write_page_index", false, true, "Add a possibility to write page index into parquet files."},
}},
{"24.6", {{"materialize_skip_indexes_on_insert", true, true, "Added new setting to allow to disable materialization of skip indexes on insert"},
{"materialize_statistics_on_insert", true, true, "Added new setting to allow to disable materialization of statistics on insert"},
{"input_format_parquet_use_native_reader", false, false, "When reading Parquet files, to use native reader instead of arrow reader."},
@ -115,6 +117,7 @@ static const std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges
{"output_format_csv_serialize_tuple_into_separate_columns", true, true, "A new way of how interpret tuples in CSV format was added."},
{"input_format_csv_deserialize_separate_columns_into_tuple", true, true, "A new way of how interpret tuples in CSV format was added."},
{"input_format_csv_try_infer_strings_from_quoted_tuples", true, true, "A new way of how interpret tuples in CSV format was added."},
{"input_format_json_ignore_key_case", false, false, "Ignore json key case while read json field from string."},
}},
{"24.5", {{"allow_deprecated_error_prone_window_functions", true, false, "Allow usage of deprecated error prone window functions (neighbor, runningAccumulate, runningDifferenceStartingWithFirstValue, runningDifference)"},
{"allow_experimental_join_condition", false, false, "Support join with inequal conditions which involve columns from both left and right table. e.g. t1.y < t2.y."},

View File

@ -201,13 +201,13 @@ IMPLEMENT_SETTING_ENUM(ORCCompression, ErrorCodes::BAD_ARGUMENTS,
{"zlib", FormatSettings::ORCCompression::ZLIB},
{"lz4", FormatSettings::ORCCompression::LZ4}})
IMPLEMENT_SETTING_ENUM(S3QueueMode, ErrorCodes::BAD_ARGUMENTS,
{{"ordered", S3QueueMode::ORDERED},
{"unordered", S3QueueMode::UNORDERED}})
IMPLEMENT_SETTING_ENUM(ObjectStorageQueueMode, ErrorCodes::BAD_ARGUMENTS,
{{"ordered", ObjectStorageQueueMode::ORDERED},
{"unordered", ObjectStorageQueueMode::UNORDERED}})
IMPLEMENT_SETTING_ENUM(S3QueueAction, ErrorCodes::BAD_ARGUMENTS,
{{"keep", S3QueueAction::KEEP},
{"delete", S3QueueAction::DELETE}})
IMPLEMENT_SETTING_ENUM(ObjectStorageQueueAction, ErrorCodes::BAD_ARGUMENTS,
{{"keep", ObjectStorageQueueAction::KEEP},
{"delete", ObjectStorageQueueAction::DELETE}})
IMPLEMENT_SETTING_ENUM(ExternalCommandStderrReaction, ErrorCodes::BAD_ARGUMENTS,
{{"none", ExternalCommandStderrReaction::NONE},

View File

@ -341,21 +341,21 @@ DECLARE_SETTING_ENUM(ParallelReplicasCustomKeyFilterType)
DECLARE_SETTING_ENUM(LocalFSReadMethod)
enum class S3QueueMode : uint8_t
enum class ObjectStorageQueueMode : uint8_t
{
ORDERED,
UNORDERED,
};
DECLARE_SETTING_ENUM(S3QueueMode)
DECLARE_SETTING_ENUM(ObjectStorageQueueMode)
enum class S3QueueAction : uint8_t
enum class ObjectStorageQueueAction : uint8_t
{
KEEP,
DELETE,
};
DECLARE_SETTING_ENUM(S3QueueAction)
DECLARE_SETTING_ENUM(ObjectStorageQueueAction)
DECLARE_SETTING_ENUM(ExternalCommandStderrReaction)

View File

@ -30,8 +30,8 @@ namespace
{
friend void tryVisitNestedSelect(const String & query, DDLDependencyVisitorData & data);
public:
DDLDependencyVisitorData(const ContextPtr & context_, const QualifiedTableName & table_name_, const ASTPtr & ast_)
: create_query(ast_), table_name(table_name_), current_database(context_->getCurrentDatabase()), context(context_)
DDLDependencyVisitorData(const ContextPtr & global_context_, const QualifiedTableName & table_name_, const ASTPtr & ast_, const String & current_database_)
: create_query(ast_), table_name(table_name_), default_database(global_context_->getCurrentDatabase()), current_database(current_database_), global_context(global_context_)
{
}
@ -71,8 +71,9 @@ namespace
ASTPtr create_query;
std::unordered_set<const IAST *> skip_asts;
QualifiedTableName table_name;
String default_database;
String current_database;
ContextPtr context;
ContextPtr global_context;
TableNamesSet dependencies;
/// CREATE TABLE or CREATE DICTIONARY or CREATE VIEW or CREATE TEMPORARY TABLE or CREATE DATABASE query.
@ -95,6 +96,11 @@ namespace
as_table.database = current_database;
dependencies.emplace(as_table);
}
/// Visit nested select query only for views, for other cases it's not
/// an actual dependency as it will be executed only once to fill the table.
if (create.select && !create.isView())
skip_asts.insert(create.select);
}
/// The definition of a dictionary: SOURCE(CLICKHOUSE(...)) LAYOUT(...) LIFETIME(...)
@ -103,8 +109,8 @@ namespace
if (!dictionary.source || dictionary.source->name != "clickhouse" || !dictionary.source->elements)
return;
auto config = getDictionaryConfigurationFromAST(create_query->as<ASTCreateQuery &>(), context);
auto info = getInfoIfClickHouseDictionarySource(config, context);
auto config = getDictionaryConfigurationFromAST(create_query->as<ASTCreateQuery &>(), global_context);
auto info = getInfoIfClickHouseDictionarySource(config, global_context);
/// We consider only dependencies on local tables.
if (!info || !info->is_local)
@ -112,14 +118,21 @@ namespace
if (!info->table_name.table.empty())
{
/// If database is not specified in dictionary source, use database of the dictionary itself, not the current/default database.
if (info->table_name.database.empty())
info->table_name.database = current_database;
info->table_name.database = table_name.database;
dependencies.emplace(std::move(info->table_name));
}
else
{
/// We don't have a table name, we have a select query instead
/// We don't have a table name, we have a select query instead.
/// All tables from select query in dictionary definition won't
/// use current database, as this query is executed with global context.
/// Use default database from global context while visiting select query.
String current_database_ = current_database;
current_database = default_database;
tryVisitNestedSelect(info->query, *this);
current_database = current_database_;
}
}
@ -176,7 +189,7 @@ namespace
if (auto cluster_name = tryGetClusterNameFromArgument(table_engine, 0))
{
auto cluster = context->tryGetCluster(*cluster_name);
auto cluster = global_context->tryGetCluster(*cluster_name);
if (cluster && cluster->getLocalShardCount())
has_local_replicas = true;
}
@ -231,7 +244,7 @@ namespace
{
if (auto cluster_name = tryGetClusterNameFromArgument(function, 0))
{
if (auto cluster = context->tryGetCluster(*cluster_name))
if (auto cluster = global_context->tryGetCluster(*cluster_name))
{
if (cluster->getLocalShardCount())
has_local_replicas = true;
@ -303,7 +316,10 @@ namespace
try
{
/// We're just searching for dependencies here, it's not safe to execute subqueries now.
auto evaluated = evaluateConstantExpressionOrIdentifierAsLiteral(arg, context);
/// Use copy of the global_context and set current database, because expressions can contain currentDatabase() function.
ContextMutablePtr global_context_copy = Context::createCopy(global_context);
global_context_copy->setCurrentDatabase(current_database);
auto evaluated = evaluateConstantExpressionOrIdentifierAsLiteral(arg, global_context_copy);
const auto * literal = evaluated->as<ASTLiteral>();
if (!literal || (literal->value.getType() != Field::Types::String))
return {};
@ -444,7 +460,7 @@ namespace
ParserSelectWithUnionQuery parser;
String description = fmt::format("Query for ClickHouse dictionary {}", data.table_name);
String fixed_query = removeWhereConditionPlaceholder(query);
const Settings & settings = data.context->getSettingsRef();
const Settings & settings = data.global_context->getSettingsRef();
ASTPtr select = parseQuery(parser, fixed_query, description,
settings.max_query_size, settings.max_parser_depth, settings.max_parser_backtracks);
@ -459,12 +475,19 @@ namespace
}
TableNamesSet getDependenciesFromCreateQuery(const ContextPtr & context, const QualifiedTableName & table_name, const ASTPtr & ast)
TableNamesSet getDependenciesFromCreateQuery(const ContextPtr & global_global_context, const QualifiedTableName & table_name, const ASTPtr & ast, const String & current_database)
{
DDLDependencyVisitor::Data data{context, table_name, ast};
DDLDependencyVisitor::Data data{global_global_context, table_name, ast, current_database};
DDLDependencyVisitor::Visitor visitor{data};
visitor.visit(ast);
return std::move(data).getDependencies();
}
TableNamesSet getDependenciesFromDictionaryNestedSelectQuery(const ContextPtr & global_context, const QualifiedTableName & table_name, const ASTPtr & ast, const String & select_query, const String & current_database)
{
DDLDependencyVisitor::Data data{global_context, table_name, ast, current_database};
tryVisitNestedSelect(select_query, data);
return std::move(data).getDependencies();
}
}

View File

@ -13,6 +13,9 @@ using TableNamesSet = std::unordered_set<QualifiedTableName>;
/// Returns a list of all tables explicitly referenced in the create query of a specified table.
/// For example, a column default expression can use dictGet() and thus reference a dictionary.
/// Does not validate AST, works a best-effort way.
TableNamesSet getDependenciesFromCreateQuery(const ContextPtr & context, const QualifiedTableName & table_name, const ASTPtr & ast);
TableNamesSet getDependenciesFromCreateQuery(const ContextPtr & global_context, const QualifiedTableName & table_name, const ASTPtr & ast, const String & current_database);
/// Returns a list of all tables explicitly referenced in the select query specified as a dictionary source.
TableNamesSet getDependenciesFromDictionaryNestedSelectQuery(const ContextPtr & global_context, const QualifiedTableName & table_name, const ASTPtr & ast, const String & select_query, const String & current_database);
}

View File

@ -110,19 +110,30 @@ void DDLLoadingDependencyVisitor::visit(const ASTFunctionWithKeyValueArguments &
auto config = getDictionaryConfigurationFromAST(data.create_query->as<ASTCreateQuery &>(), data.global_context);
auto info = getInfoIfClickHouseDictionarySource(config, data.global_context);
if (!info || !info->is_local || info->table_name.table.empty())
if (!info || !info->is_local)
return;
if (info->table_name.database.empty())
info->table_name.database = data.default_database;
data.dependencies.emplace(std::move(info->table_name));
if (!info->table_name.table.empty())
{
/// If database is not specified in dictionary source, use database of the dictionary itself, not the current/default database.
if (info->table_name.database.empty())
info->table_name.database = data.table_name.database;
data.dependencies.emplace(std::move(info->table_name));
}
else
{
/// We don't have a table name, we have a select query instead that will be executed during dictionary loading.
/// We need to find all tables used in this select query and add them to dependencies.
auto select_query_dependencies = getDependenciesFromDictionaryNestedSelectQuery(data.global_context, data.table_name, data.create_query, info->query, data.default_database);
data.dependencies.merge(select_query_dependencies);
}
}
void DDLLoadingDependencyVisitor::visit(const ASTStorage & storage, Data & data)
{
if (storage.ttl_table)
{
auto ttl_dependensies = getDependenciesFromCreateQuery(data.global_context, data.table_name, storage.ttl_table->ptr());
auto ttl_dependensies = getDependenciesFromCreateQuery(data.global_context, data.table_name, storage.ttl_table->ptr(), data.default_database);
data.dependencies.merge(ttl_dependensies);
}

View File

@ -154,7 +154,7 @@ void DatabaseMemory::alterTable(ContextPtr local_context, const StorageID & tabl
applyMetadataChangesToCreateQuery(it->second, metadata);
/// The create query of the table has been just changed, we need to update dependencies too.
auto ref_dependencies = getDependenciesFromCreateQuery(local_context->getGlobalContext(), table_id.getQualifiedName(), it->second);
auto ref_dependencies = getDependenciesFromCreateQuery(local_context->getGlobalContext(), table_id.getQualifiedName(), it->second, local_context->getCurrentDatabase());
auto loading_dependencies = getLoadingDependenciesFromCreateQuery(local_context->getGlobalContext(), table_id.getQualifiedName(), it->second);
DatabaseCatalog::instance().updateDependencies(table_id, ref_dependencies, loading_dependencies);
}

View File

@ -539,7 +539,7 @@ void DatabaseOrdinary::alterTable(ContextPtr local_context, const StorageID & ta
}
/// The create query of the table has been just changed, we need to update dependencies too.
auto ref_dependencies = getDependenciesFromCreateQuery(local_context->getGlobalContext(), table_id.getQualifiedName(), ast);
auto ref_dependencies = getDependenciesFromCreateQuery(local_context->getGlobalContext(), table_id.getQualifiedName(), ast, local_context->getCurrentDatabase());
auto loading_dependencies = getLoadingDependenciesFromCreateQuery(local_context->getGlobalContext(), table_id.getQualifiedName(), ast);
DatabaseCatalog::instance().updateDependencies(table_id, ref_dependencies, loading_dependencies);

View File

@ -1165,7 +1165,7 @@ void DatabaseReplicated::recoverLostReplica(const ZooKeeperPtr & current_zookeep
/// And QualifiedTableName::parseFromString doesn't handle this.
auto qualified_name = QualifiedTableName{.database = getDatabaseName(), .table = table_name};
auto query_ast = parseQueryFromMetadataInZooKeeper(table_name, create_table_query);
tables_dependencies.addDependencies(qualified_name, getDependenciesFromCreateQuery(getContext(), qualified_name, query_ast));
tables_dependencies.addDependencies(qualified_name, getDependenciesFromCreateQuery(getContext()->getGlobalContext(), qualified_name, query_ast, getContext()->getCurrentDatabase()));
}
tables_dependencies.checkNoCyclicDependencies();

View File

@ -137,7 +137,7 @@ void TablesLoader::buildDependencyGraph()
{
for (const auto & [table_name, table_metadata] : metadata.parsed_tables)
{
auto new_ref_dependencies = getDependenciesFromCreateQuery(global_context, table_name, table_metadata.ast);
auto new_ref_dependencies = getDependenciesFromCreateQuery(global_context, table_name, table_metadata.ast, global_context->getCurrentDatabase());
auto new_loading_dependencies = getLoadingDependenciesFromCreateQuery(global_context, table_name, table_metadata.ast);
if (!new_ref_dependencies.empty())

View File

@ -149,6 +149,7 @@ FormatSettings getFormatSettings(const ContextPtr & context, const Settings & se
format_settings.json.try_infer_objects_as_tuples = settings.input_format_json_try_infer_named_tuples_from_objects;
format_settings.json.throw_on_bad_escape_sequence = settings.input_format_json_throw_on_bad_escape_sequence;
format_settings.json.ignore_unnecessary_fields = settings.input_format_json_ignore_unnecessary_fields;
format_settings.json.ignore_key_case = settings.input_format_json_ignore_key_case;
format_settings.null_as_default = settings.input_format_null_as_default;
format_settings.force_null_for_omitted_fields = settings.input_format_force_null_for_omitted_fields;
format_settings.decimal_trailing_zeros = settings.output_format_decimal_trailing_zeros;
@ -171,6 +172,7 @@ FormatSettings getFormatSettings(const ContextPtr & context, const Settings & se
format_settings.parquet.parallel_encoding = settings.output_format_parquet_parallel_encoding;
format_settings.parquet.data_page_size = settings.output_format_parquet_data_page_size;
format_settings.parquet.write_batch_size = settings.output_format_parquet_batch_size;
format_settings.parquet.write_page_index = settings.output_format_parquet_write_page_index;
format_settings.parquet.local_read_min_bytes_for_seek = settings.input_format_parquet_local_file_min_bytes_for_seek;
format_settings.pretty.charset = settings.output_format_pretty_grid_charset.toString() == "ASCII" ? FormatSettings::Pretty::Charset::ASCII : FormatSettings::Pretty::Charset::UTF8;
format_settings.pretty.color = settings.output_format_pretty_color;

View File

@ -228,6 +228,7 @@ struct FormatSettings
bool infer_incomplete_types_as_strings = true;
bool throw_on_bad_escape_sequence = true;
bool ignore_unnecessary_fields = true;
bool ignore_key_case = false;
} json{};
struct
@ -275,6 +276,7 @@ struct FormatSettings
bool output_compliant_nested_types = true;
size_t data_page_size = 1024 * 1024;
size_t write_batch_size = 1024;
bool write_page_index = false;
size_t local_read_min_bytes_for_seek = 8192;
} parquet{};

View File

@ -3,35 +3,9 @@ add_subdirectory(divide)
include("${ClickHouse_SOURCE_DIR}/cmake/dbms_glob_sources.cmake")
add_headers_and_sources(clickhouse_functions .)
set(DBMS_FUNCTIONS
IFunction.cpp
FunctionFactory.cpp
FunctionHelpers.cpp
extractTimeZoneFromFunctionArguments.cpp
FunctionsLogical.cpp
if.cpp
multiIf.cpp
multiMatchAny.cpp
checkHyperscanRegexp.cpp
array/has.cpp
CastOverloadResolver.cpp
# Provides dependency for cast - createFunctionBaseCast()
FunctionsConversion.cpp
)
extract_into_parent_list(clickhouse_functions_sources dbms_sources ${DBMS_FUNCTIONS})
extract_into_parent_list(clickhouse_functions_headers dbms_headers
IFunction.h
FunctionFactory.h
FunctionHelpers.h
extractTimeZoneFromFunctionArguments.h
FunctionsLogical.h
CastOverloadResolver.h
)
add_library(clickhouse_functions_obj OBJECT ${clickhouse_functions_headers} ${clickhouse_functions_sources})
if (OMIT_HEAVY_DEBUG_SYMBOLS)
target_compile_options(clickhouse_functions_obj PRIVATE "-g0")
set_source_files_properties(${DBMS_FUNCTIONS} DIRECTORY .. PROPERTIES COMPILE_FLAGS "-g0")
endif()
list (APPEND OBJECT_LIBS $<TARGET_OBJECTS:clickhouse_functions_obj>)

View File

@ -25,10 +25,10 @@ namespace ErrorCodes
enum class Base64Variant : uint8_t
{
Normal,
Url
URL
};
inline std::string preprocessBase64Url(std::string_view src)
inline std::string preprocessBase64URL(std::string_view src)
{
std::string padded_src;
padded_src.reserve(src.size() + 3);
@ -70,7 +70,7 @@ inline std::string preprocessBase64Url(std::string_view src)
return padded_src;
}
inline size_t postprocessBase64Url(UInt8 * dst, size_t out_len)
inline size_t postprocessBase64URL(UInt8 * dst, size_t out_len)
{
// Do symbol substitution as described in https://datatracker.ietf.org/doc/html/rfc4648#section-5
for (size_t i = 0; i < out_len; ++i)
@ -95,7 +95,7 @@ inline size_t postprocessBase64Url(UInt8 * dst, size_t out_len)
template <Base64Variant variant>
struct Base64Encode
{
static constexpr auto name = (variant == Base64Variant::Normal) ? "base64Encode" : "base64UrlEncode";
static constexpr auto name = (variant == Base64Variant::Normal) ? "base64Encode" : "base64URLEncode";
static size_t getBufferSize(size_t string_length, size_t string_count)
{
@ -111,8 +111,8 @@ struct Base64Encode
/// Memory sanitizer doesn't understand if there was uninitialized memory in SIMD register but it was not used in the result of shuffle.
__msan_unpoison(dst, outlen);
if constexpr (variant == Base64Variant::Url)
outlen = postprocessBase64Url(dst, outlen);
if constexpr (variant == Base64Variant::URL)
outlen = postprocessBase64URL(dst, outlen);
return outlen;
}
@ -121,7 +121,7 @@ struct Base64Encode
template <Base64Variant variant>
struct Base64Decode
{
static constexpr auto name = (variant == Base64Variant::Normal) ? "base64Decode" : "base64UrlDecode";
static constexpr auto name = (variant == Base64Variant::Normal) ? "base64Decode" : "base64URLDecode";
static size_t getBufferSize(size_t string_length, size_t string_count)
{
@ -132,9 +132,9 @@ struct Base64Decode
{
int rc;
size_t outlen = 0;
if constexpr (variant == Base64Variant::Url)
if constexpr (variant == Base64Variant::URL)
{
std::string src_padded = preprocessBase64Url(src);
std::string src_padded = preprocessBase64URL(src);
rc = base64_decode(src_padded.data(), src_padded.size(), reinterpret_cast<char *>(dst), &outlen, 0);
}
else
@ -156,7 +156,7 @@ struct Base64Decode
template <Base64Variant variant>
struct TryBase64Decode
{
static constexpr auto name = (variant == Base64Variant::Normal) ? "tryBase64Decode" : "tryBase64UrlDecode";
static constexpr auto name = (variant == Base64Variant::Normal) ? "tryBase64Decode" : "tryBase64URLDecode";
static size_t getBufferSize(size_t string_length, size_t string_count)
{
@ -167,9 +167,9 @@ struct TryBase64Decode
{
int rc;
size_t outlen = 0;
if constexpr (variant == Base64Variant::Url)
if constexpr (variant == Base64Variant::URL)
{
std::string src_padded = preprocessBase64Url(src);
std::string src_padded = preprocessBase64URL(src);
rc = base64_decode(src_padded.data(), src_padded.size(), reinterpret_cast<char *>(dst), &outlen, 0);
}
else

View File

@ -5,16 +5,16 @@
namespace DB
{
REGISTER_FUNCTION(Base64UrlDecode)
REGISTER_FUNCTION(Base64URLDecode)
{
FunctionDocumentation::Description description = R"(Accepts a base64-encoded URL and decodes it from base64 with URL-specific modifications, according to RFC 4648 (https://datatracker.ietf.org/doc/html/rfc4648#section-5).)";
FunctionDocumentation::Syntax syntax = "base64UrlDecode(encodedUrl)";
FunctionDocumentation::Arguments arguments = {{"encodedUrl", "String column or constant. If the string is not a valid Base64-encoded value, an exception is thrown."}};
FunctionDocumentation::Syntax syntax = "base64URLDecode(encodedURL)";
FunctionDocumentation::Arguments arguments = {{"encodedURL", "String column or constant. If the string is not a valid Base64-encoded value, an exception is thrown."}};
FunctionDocumentation::ReturnedValue returned_value = "A string containing the decoded value of the argument.";
FunctionDocumentation::Examples examples = {{"Example", "SELECT base64UrlDecode('aHR0cDovL2NsaWNraG91c2UuY29t')", "https://clickhouse.com"}};
FunctionDocumentation::Examples examples = {{"Example", "SELECT base64URLDecode('aHR0cDovL2NsaWNraG91c2UuY29t')", "https://clickhouse.com"}};
FunctionDocumentation::Categories categories = {"String encoding"};
factory.registerFunction<FunctionBase64Conversion<Base64Decode<Base64Variant::Url>>>({description, syntax, arguments, returned_value, examples, categories});
factory.registerFunction<FunctionBase64Conversion<Base64Decode<Base64Variant::URL>>>({description, syntax, arguments, returned_value, examples, categories});
}
}

View File

@ -5,16 +5,16 @@
namespace DB
{
REGISTER_FUNCTION(Base64UrlEncode)
REGISTER_FUNCTION(Base64URLEncode)
{
FunctionDocumentation::Description description = R"(Encodes an URL (String or FixedString) as base64 with URL-specific modifications, according to RFC 4648 (https://datatracker.ietf.org/doc/html/rfc4648#section-5).)";
FunctionDocumentation::Syntax syntax = "base64UrlEncode(url)";
FunctionDocumentation::Syntax syntax = "base64URLEncode(url)";
FunctionDocumentation::Arguments arguments = {{"url", "String column or constant."}};
FunctionDocumentation::ReturnedValue returned_value = "A string containing the encoded value of the argument.";
FunctionDocumentation::Examples examples = {{"Example", "SELECT base64UrlEncode('https://clickhouse.com')", "aHR0cHM6Ly9jbGlja2hvdXNlLmNvbQ"}};
FunctionDocumentation::Examples examples = {{"Example", "SELECT base64URLEncode('https://clickhouse.com')", "aHR0cHM6Ly9jbGlja2hvdXNlLmNvbQ"}};
FunctionDocumentation::Categories categories = {"String encoding"};
factory.registerFunction<FunctionBase64Conversion<Base64Encode<Base64Variant::Url>>>({description, syntax, arguments, returned_value, examples, categories});
factory.registerFunction<FunctionBase64Conversion<Base64Encode<Base64Variant::URL>>>({description, syntax, arguments, returned_value, examples, categories});
}
}

View File

@ -978,8 +978,7 @@ namespace
[[nodiscard]]
static PosOrError mysqlAmericanDate(Pos cur, Pos end, const String & fragment, DateTime<error_handling> & date)
{
if (auto status = checkSpace(cur, end, 8, "mysqlAmericanDate requires size >= 8", fragment))
return tl::unexpected(status.error());
RETURN_ERROR_IF_FAILED(checkSpace(cur, end, 8, "mysqlAmericanDate requires size >= 8", fragment))
Int32 month;
ASSIGN_RESULT_OR_RETURN_ERROR(cur, (readNumber2<Int32, NeedCheckSpace::No>(cur, end, fragment, month)))
@ -993,7 +992,7 @@ namespace
Int32 year;
ASSIGN_RESULT_OR_RETURN_ERROR(cur, (readNumber2<Int32, NeedCheckSpace::No>(cur, end, fragment, year)))
RETURN_ERROR_IF_FAILED(date.setYear(year))
RETURN_ERROR_IF_FAILED(date.setYear(year + 2000))
return cur;
}
@ -1015,8 +1014,7 @@ namespace
[[nodiscard]]
static PosOrError mysqlISO8601Date(Pos cur, Pos end, const String & fragment, DateTime<error_handling> & date)
{
if (auto status = checkSpace(cur, end, 10, "mysqlISO8601Date requires size >= 10", fragment))
return tl::unexpected(status.error());
RETURN_ERROR_IF_FAILED(checkSpace(cur, end, 10, "mysqlISO8601Date requires size >= 10", fragment))
Int32 year;
Int32 month;
@ -1462,8 +1460,7 @@ namespace
[[nodiscard]]
static PosOrError jodaDayOfWeekText(size_t /*min_represent_digits*/, Pos cur, Pos end, const String & fragment, DateTime<error_handling> & date)
{
if (auto result= checkSpace(cur, end, 3, "jodaDayOfWeekText requires size >= 3", fragment); !result.has_value())
return tl::unexpected(result.error());
RETURN_ERROR_IF_FAILED(checkSpace(cur, end, 3, "jodaDayOfWeekText requires size >= 3", fragment))
String text1(cur, 3);
boost::to_lower(text1);
@ -1556,8 +1553,8 @@ namespace
Int32 day_of_month;
ASSIGN_RESULT_OR_RETURN_ERROR(cur, (readNumberWithVariableLength(
cur, end, false, false, false, repetitions, std::max(repetitions, 2uz), fragment, day_of_month)))
if (auto res = date.setDayOfMonth(day_of_month); !res.has_value())
return tl::unexpected(res.error());
RETURN_ERROR_IF_FAILED(date.setDayOfMonth(day_of_month))
return cur;
}

View File

@ -1,354 +0,0 @@
#include <algorithm>
#include <cstring>
#include <vector>
#include <string>
#include <type_traits>
#include <gtest/gtest.h>
#include <Columns/ColumnNothing.h>
#include <Columns/ColumnsNumber.h>
#include <Functions/FunctionsLogical.h>
// I know that inclusion of .cpp is not good at all
#include <Functions/FunctionsLogical.cpp> // NOLINT
using namespace DB;
using TernaryValues = std::vector<Ternary::ResultType>;
struct LinearCongruentialGenerator
{
/// Constants from `man lrand48_r`.
static constexpr UInt64 a = 0x5DEECE66D;
static constexpr UInt64 c = 0xB;
/// And this is from `head -c8 /dev/urandom | xxd -p`
UInt64 current = 0x09826f4a081cee35ULL;
UInt32 next()
{
current = current * a + c;
return static_cast<UInt32>(current >> 16);
}
};
void generateRandomTernaryValue(LinearCongruentialGenerator & gen, Ternary::ResultType * output, size_t size, double false_ratio, double null_ratio)
{
/// The LinearCongruentialGenerator generates nonnegative integers uniformly distributed over the interval [0, 2^32).
/// See https://linux.die.net/man/3/nrand48
double false_percentile = false_ratio;
double null_percentile = false_ratio + null_ratio;
false_percentile = false_percentile > 1 ? 1 : false_percentile;
null_percentile = null_percentile > 1 ? 1 : null_percentile;
UInt32 false_threshold = static_cast<UInt32>(static_cast<double>(std::numeric_limits<UInt32>::max()) * false_percentile);
UInt32 null_threshold = static_cast<UInt32>(static_cast<double>(std::numeric_limits<UInt32>::max()) * null_percentile);
for (Ternary::ResultType * end = output + size; output != end; ++output)
{
UInt32 val = gen.next();
*output = val < false_threshold ? Ternary::False : (val < null_threshold ? Ternary::Null : Ternary::True);
}
}
template<typename T>
ColumnPtr createColumnNullable(const Ternary::ResultType * ternary_values, size_t size)
{
auto nested_column = ColumnVector<T>::create(size);
auto null_map = ColumnUInt8::create(size);
auto & nested_column_data = nested_column->getData();
auto & null_map_data = null_map->getData();
for (size_t i = 0; i < size; ++i)
{
if (ternary_values[i] == Ternary::Null)
{
null_map_data[i] = 1;
nested_column_data[i] = 0;
}
else if (ternary_values[i] == Ternary::True)
{
null_map_data[i] = 0;
nested_column_data[i] = 100;
}
else
{
null_map_data[i] = 0;
nested_column_data[i] = 0;
}
}
return ColumnNullable::create(std::move(nested_column), std::move(null_map));
}
template<typename T>
ColumnPtr createColumnVector(const Ternary::ResultType * ternary_values, size_t size)
{
auto column = ColumnVector<T>::create(size);
auto & column_data = column->getData();
for (size_t i = 0; i < size; ++i)
{
if (ternary_values[i] == Ternary::True)
{
column_data[i] = 100;
}
else
{
column_data[i] = 0;
}
}
return column;
}
template<typename ColumnType, typename T>
ColumnPtr createRandomColumn(LinearCongruentialGenerator & gen, TernaryValues & ternary_values)
{
size_t size = ternary_values.size();
Ternary::ResultType * ternary_data = ternary_values.data();
if constexpr (std::is_same_v<ColumnType, ColumnNullable>)
{
generateRandomTernaryValue(gen, ternary_data, size, 0.3, 0.7);
return createColumnNullable<T>(ternary_data, size);
}
else if constexpr (std::is_same_v<ColumnType, ColumnVector<UInt8>>)
{
generateRandomTernaryValue(gen, ternary_data, size, 0.5, 0);
return createColumnVector<T>(ternary_data, size);
}
else
{
auto nested_col = ColumnNothing::create(size);
auto null_map = ColumnUInt8::create(size);
memset(ternary_data, Ternary::Null, size);
return ColumnNullable::create(std::move(nested_col), std::move(null_map));
}
}
/* The truth table of ternary And and Or operations:
* +-------+-------+---------+--------+
* | a | b | a And b | a Or b |
* +-------+-------+---------+--------+
* | False | False | False | False |
* | False | Null | False | Null |
* | False | True | False | True |
* | Null | False | False | Null |
* | Null | Null | Null | Null |
* | Null | True | Null | True |
* | True | False | False | True |
* | True | Null | Null | True |
* | True | True | True | True |
* +-------+-------+---------+--------+
*
* https://en.wikibooks.org/wiki/Structured_Query_Language/NULLs_and_the_Three_Valued_Logic
*/
template <typename Op, typename T>
bool testTernaryLogicTruthTable()
{
constexpr size_t size = 9;
Ternary::ResultType col_a_ternary[] = {Ternary::False, Ternary::False, Ternary::False, Ternary::Null, Ternary::Null, Ternary::Null, Ternary::True, Ternary::True, Ternary::True};
Ternary::ResultType col_b_ternary[] = {Ternary::False, Ternary::Null, Ternary::True, Ternary::False, Ternary::Null, Ternary::True,Ternary::False, Ternary::Null, Ternary::True};
Ternary::ResultType and_expected_ternary[] = {Ternary::False, Ternary::False, Ternary::False, Ternary::False, Ternary::Null, Ternary::Null,Ternary::False, Ternary::Null, Ternary::True};
Ternary::ResultType or_expected_ternary[] = {Ternary::False, Ternary::Null, Ternary::True, Ternary::Null, Ternary::Null, Ternary::True,Ternary::True, Ternary::True, Ternary::True};
Ternary::ResultType * expected_ternary;
if constexpr (std::is_same_v<Op, AndImpl>)
{
expected_ternary = and_expected_ternary;
}
else
{
expected_ternary = or_expected_ternary;
}
auto col_a = createColumnNullable<T>(col_a_ternary, size);
auto col_b = createColumnNullable<T>(col_b_ternary, size);
ColumnRawPtrs arguments = {col_a.get(), col_b.get()};
auto col_res = ColumnUInt8::create(size);
auto & col_res_data = col_res->getData();
OperationApplier<Op, AssociativeGenericApplierImpl>::apply(arguments, col_res->getData(), false);
for (size_t i = 0; i < size; ++i)
{
if (col_res_data[i] != expected_ternary[i]) return false;
}
return true;
}
template <typename Op, typename LeftColumn, typename RightColumn>
bool testTernaryLogicOfTwoColumns(size_t size)
{
LinearCongruentialGenerator gen;
TernaryValues left_column_ternary(size);
TernaryValues right_column_ternary(size);
TernaryValues expected_ternary(size);
ColumnPtr left = createRandomColumn<LeftColumn, UInt8>(gen, left_column_ternary);
ColumnPtr right = createRandomColumn<RightColumn, UInt8>(gen, right_column_ternary);
for (size_t i = 0; i < size; ++i)
{
/// Given that False is less than Null and Null is less than True, the And operation can be implemented
/// with std::min, and the Or operation can be implemented with std::max.
if constexpr (std::is_same_v<Op, AndImpl>)
{
expected_ternary[i] = std::min(left_column_ternary[i], right_column_ternary[i]);
}
else
{
expected_ternary[i] = std::max(left_column_ternary[i], right_column_ternary[i]);
}
}
ColumnRawPtrs arguments = {left.get(), right.get()};
auto col_res = ColumnUInt8::create(size);
auto & col_res_data = col_res->getData();
OperationApplier<Op, AssociativeGenericApplierImpl>::apply(arguments, col_res->getData(), false);
for (size_t i = 0; i < size; ++i)
{
if (col_res_data[i] != expected_ternary[i]) return false;
}
return true;
}
TEST(TernaryLogicTruthTable, NestedUInt8)
{
bool test_1 = testTernaryLogicTruthTable<AndImpl, UInt8>();
bool test_2 = testTernaryLogicTruthTable<OrImpl, UInt8>();
ASSERT_EQ(test_1, true);
ASSERT_EQ(test_2, true);
}
TEST(TernaryLogicTruthTable, NestedUInt16)
{
bool test_1 = testTernaryLogicTruthTable<AndImpl, UInt16>();
bool test_2 = testTernaryLogicTruthTable<OrImpl, UInt16>();
ASSERT_EQ(test_1, true);
ASSERT_EQ(test_2, true);
}
TEST(TernaryLogicTruthTable, NestedUInt32)
{
bool test_1 = testTernaryLogicTruthTable<AndImpl, UInt32>();
bool test_2 = testTernaryLogicTruthTable<OrImpl, UInt32>();
ASSERT_EQ(test_1, true);
ASSERT_EQ(test_2, true);
}
TEST(TernaryLogicTruthTable, NestedUInt64)
{
bool test_1 = testTernaryLogicTruthTable<AndImpl, UInt64>();
bool test_2 = testTernaryLogicTruthTable<OrImpl, UInt64>();
ASSERT_EQ(test_1, true);
ASSERT_EQ(test_2, true);
}
TEST(TernaryLogicTruthTable, NestedInt8)
{
bool test_1 = testTernaryLogicTruthTable<AndImpl, Int8>();
bool test_2 = testTernaryLogicTruthTable<OrImpl, Int8>();
ASSERT_EQ(test_1, true);
ASSERT_EQ(test_2, true);
}
TEST(TernaryLogicTruthTable, NestedInt16)
{
bool test_1 = testTernaryLogicTruthTable<AndImpl, Int16>();
bool test_2 = testTernaryLogicTruthTable<OrImpl, Int16>();
ASSERT_EQ(test_1, true);
ASSERT_EQ(test_2, true);
}
TEST(TernaryLogicTruthTable, NestedInt32)
{
bool test_1 = testTernaryLogicTruthTable<AndImpl, Int32>();
bool test_2 = testTernaryLogicTruthTable<OrImpl, Int32>();
ASSERT_EQ(test_1, true);
ASSERT_EQ(test_2, true);
}
TEST(TernaryLogicTruthTable, NestedInt64)
{
bool test_1 = testTernaryLogicTruthTable<AndImpl, Int64>();
bool test_2 = testTernaryLogicTruthTable<OrImpl, Int64>();
ASSERT_EQ(test_1, true);
ASSERT_EQ(test_2, true);
}
TEST(TernaryLogicTruthTable, NestedFloat32)
{
bool test_1 = testTernaryLogicTruthTable<AndImpl, Float32>();
bool test_2 = testTernaryLogicTruthTable<OrImpl, Float32>();
ASSERT_EQ(test_1, true);
ASSERT_EQ(test_2, true);
}
TEST(TernaryLogicTruthTable, NestedFloat64)
{
bool test_1 = testTernaryLogicTruthTable<AndImpl, Float64>();
bool test_2 = testTernaryLogicTruthTable<OrImpl, Float64>();
ASSERT_EQ(test_1, true);
ASSERT_EQ(test_2, true);
}
TEST(TernaryLogicTwoColumns, TwoNullable)
{
bool test_1 = testTernaryLogicOfTwoColumns<AndImpl, ColumnNullable, ColumnNullable>(100 /*size*/);
bool test_2 = testTernaryLogicOfTwoColumns<OrImpl, ColumnNullable, ColumnNullable>(100 /*size*/);
ASSERT_EQ(test_1, true);
ASSERT_EQ(test_2, true);
}
TEST(TernaryLogicTwoColumns, TwoVector)
{
bool test_1 = testTernaryLogicOfTwoColumns<AndImpl, ColumnUInt8, ColumnUInt8>(100 /*size*/);
bool test_2 = testTernaryLogicOfTwoColumns<OrImpl, ColumnUInt8, ColumnUInt8>(100 /*size*/);
ASSERT_EQ(test_1, true);
ASSERT_EQ(test_2, true);
}
TEST(TernaryLogicTwoColumns, TwoNothing)
{
bool test_1 = testTernaryLogicOfTwoColumns<AndImpl, ColumnNothing, ColumnNothing>(100 /*size*/);
bool test_2 = testTernaryLogicOfTwoColumns<OrImpl, ColumnNothing, ColumnNothing>(100 /*size*/);
ASSERT_EQ(test_1, true);
ASSERT_EQ(test_2, true);
}
TEST(TernaryLogicTwoColumns, NullableVector)
{
bool test_1 = testTernaryLogicOfTwoColumns<AndImpl, ColumnNullable, ColumnUInt8>(100 /*size*/);
bool test_2 = testTernaryLogicOfTwoColumns<OrImpl, ColumnNullable, ColumnUInt8>(100 /*size*/);
ASSERT_EQ(test_1, true);
ASSERT_EQ(test_2, true);
}
TEST(TernaryLogicTwoColumns, NullableNothing)
{
bool test_1 = testTernaryLogicOfTwoColumns<AndImpl, ColumnNullable, ColumnNothing>(100 /*size*/);
bool test_2 = testTernaryLogicOfTwoColumns<OrImpl, ColumnNullable, ColumnNothing>(100 /*size*/);
ASSERT_EQ(test_1, true);
ASSERT_EQ(test_2, true);
}
TEST(TernaryLogicTwoColumns, VectorNothing)
{
bool test_1 = testTernaryLogicOfTwoColumns<AndImpl, ColumnUInt8, ColumnNothing>(100 /*size*/);
bool test_2 = testTernaryLogicOfTwoColumns<OrImpl, ColumnUInt8, ColumnNothing>(100 /*size*/);
ASSERT_EQ(test_1, true);
ASSERT_EQ(test_2, true);
}

View File

@ -5,16 +5,16 @@
namespace DB
{
REGISTER_FUNCTION(TryBase64UrlDecode)
REGISTER_FUNCTION(TryBase64URLDecode)
{
FunctionDocumentation::Description description = R"(Decodes an URL from base64, like base64UrlDecode but returns an empty string in case of an error.)";
FunctionDocumentation::Syntax syntax = "tryBase64UrlDecode(encodedUrl)";
FunctionDocumentation::Arguments arguments = {{"encodedUrl", "String column or constant. If the string is not a valid Base64-encoded value with URL-specific modifications, returns an empty string."}};
FunctionDocumentation::Description description = R"(Decodes an URL from base64, like base64URLDecode but returns an empty string in case of an error.)";
FunctionDocumentation::Syntax syntax = "tryBase64URLDecode(encodedUrl)";
FunctionDocumentation::Arguments arguments = {{"encodedURL", "String column or constant. If the string is not a valid Base64-encoded value with URL-specific modifications, returns an empty string."}};
FunctionDocumentation::ReturnedValue returned_value = "A string containing the decoded value of the argument.";
FunctionDocumentation::Examples examples = {{"valid", "SELECT tryBase64UrlDecode('aHR0cHM6Ly9jbGlja2hvdXNlLmNvbQ')", "https://clickhouse.com"}, {"invalid", "SELECT tryBase64UrlDecode('aHR0cHM6Ly9jbGlja')", ""}};
FunctionDocumentation::Examples examples = {{"valid", "SELECT tryBase64URLDecode('aHR0cHM6Ly9jbGlja2hvdXNlLmNvbQ')", "https://clickhouse.com"}, {"invalid", "SELECT tryBase64UrlDecode('aHR0cHM6Ly9jbGlja')", ""}};
FunctionDocumentation::Categories categories = {"String encoding"};
factory.registerFunction<FunctionBase64Conversion<TryBase64Decode<Base64Variant::Url>>>({description, syntax, arguments, returned_value, examples, categories});
factory.registerFunction<FunctionBase64Conversion<TryBase64Decode<Base64Variant::URL>>>({description, syntax, arguments, returned_value, examples, categories});
}
}

View File

@ -2116,7 +2116,7 @@ StoragePtr Context::executeTableFunction(const ASTPtr & table_expression, const
}
StoragePtr Context::buildParametrizedViewStorage(const ASTPtr & table_expression, const String & database_name, const String & table_name)
StoragePtr Context::buildParametrizedViewStorage(const String & database_name, const String & table_name, const NameToNameMap & param_values)
{
if (table_name.empty())
return nullptr;
@ -2129,8 +2129,7 @@ StoragePtr Context::buildParametrizedViewStorage(const ASTPtr & table_expression
return nullptr;
auto query = original_view->getInMemoryMetadataPtr()->getSelectQuery().inner_query->clone();
NameToNameMap parameterized_view_values = analyzeFunctionParamValues(table_expression, getQueryContext());
StorageView::replaceQueryParametersIfParametrizedView(query, parameterized_view_values);
StorageView::replaceQueryParametersIfParametrizedView(query, param_values);
ASTCreateQuery create;
create.select = query->as<ASTSelectWithUnionQuery>();
@ -4136,7 +4135,7 @@ std::shared_ptr<FilesystemCacheLog> Context::getFilesystemCacheLog() const
return shared->system_logs->filesystem_cache_log;
}
std::shared_ptr<S3QueueLog> Context::getS3QueueLog() const
std::shared_ptr<ObjectStorageQueueLog> Context::getS3QueueLog() const
{
SharedLockGuard lock(shared->mutex);
if (!shared->system_logs)
@ -4145,6 +4144,15 @@ std::shared_ptr<S3QueueLog> Context::getS3QueueLog() const
return shared->system_logs->s3_queue_log;
}
std::shared_ptr<ObjectStorageQueueLog> Context::getAzureQueueLog() const
{
SharedLockGuard lock(shared->mutex);
if (!shared->system_logs)
return {};
return shared->system_logs->azure_queue_log;
}
std::shared_ptr<FilesystemReadPrefetchesLog> Context::getFilesystemReadPrefetchesLog() const
{
SharedLockGuard lock(shared->mutex);

View File

@ -107,7 +107,7 @@ class TransactionsInfoLog;
class ProcessorsProfileLog;
class FilesystemCacheLog;
class FilesystemReadPrefetchesLog;
class S3QueueLog;
class ObjectStorageQueueLog;
class AsynchronousInsertLog;
class BackupLog;
class BlobStorageLog;
@ -774,7 +774,7 @@ public:
/// Overload for the new analyzer. Structure inference is performed in QueryAnalysisPass.
StoragePtr executeTableFunction(const ASTPtr & table_expression, const TableFunctionPtr & table_function_ptr);
StoragePtr buildParametrizedViewStorage(const ASTPtr & table_expression, const String & database_name, const String & table_name);
StoragePtr buildParametrizedViewStorage(const String & database_name, const String & table_name, const NameToNameMap & param_values);
void addViewSource(const StoragePtr & storage);
StoragePtr getViewSource() const;
@ -1133,7 +1133,8 @@ public:
std::shared_ptr<TransactionsInfoLog> getTransactionsInfoLog() const;
std::shared_ptr<ProcessorsProfileLog> getProcessorsProfileLog() const;
std::shared_ptr<FilesystemCacheLog> getFilesystemCacheLog() const;
std::shared_ptr<S3QueueLog> getS3QueueLog() const;
std::shared_ptr<ObjectStorageQueueLog> getS3QueueLog() const;
std::shared_ptr<ObjectStorageQueueLog> getAzureQueueLog() const;
std::shared_ptr<FilesystemReadPrefetchesLog> getFilesystemReadPrefetchesLog() const;
std::shared_ptr<AsynchronousInsertLog> getAsynchronousInsertLog() const;
std::shared_ptr<BackupLog> getBackupLog() const;

View File

@ -63,6 +63,7 @@ namespace ErrorCodes
extern const int LOGICAL_ERROR;
extern const int HAVE_DEPENDENT_OBJECTS;
extern const int UNFINISHED;
extern const int INFINITE_LOOP;
}
class DatabaseNameHints : public IHints<>
@ -1473,6 +1474,114 @@ void DatabaseCatalog::checkTableCanBeRemovedOrRenamedUnlocked(
removing_table, fmt::join(from_other_databases, ", "));
}
void DatabaseCatalog::checkTableCanBeAddedWithNoCyclicDependencies(
const QualifiedTableName & table_name,
const TableNamesSet & new_referential_dependencies,
const TableNamesSet & new_loading_dependencies)
{
std::lock_guard lock{databases_mutex};
StorageID table_id = StorageID{table_name};
auto check = [&](TablesDependencyGraph & dependencies, const TableNamesSet & new_dependencies)
{
auto old_dependencies = dependencies.removeDependencies(table_id);
dependencies.addDependencies(table_name, new_dependencies);
auto restore_dependencies = [&]()
{
dependencies.removeDependencies(table_id);
if (!old_dependencies.empty())
dependencies.addDependencies(table_id, old_dependencies);
};
if (dependencies.hasCyclicDependencies())
{
auto cyclic_dependencies_description = dependencies.describeCyclicDependencies();
restore_dependencies();
throw Exception(
ErrorCodes::INFINITE_LOOP,
"Cannot add dependencies for '{}', because it will lead to cyclic dependencies: {}",
table_name.getFullName(),
cyclic_dependencies_description);
}
restore_dependencies();
};
check(referential_dependencies, new_referential_dependencies);
check(loading_dependencies, new_loading_dependencies);
}
void DatabaseCatalog::checkTableCanBeRenamedWithNoCyclicDependencies(const StorageID & from_table_id, const StorageID & to_table_id)
{
std::lock_guard lock{databases_mutex};
auto check = [&](TablesDependencyGraph & dependencies)
{
auto old_dependencies = dependencies.removeDependencies(from_table_id);
dependencies.addDependencies(to_table_id, old_dependencies);
auto restore_dependencies = [&]()
{
dependencies.removeDependencies(to_table_id);
dependencies.addDependencies(from_table_id, old_dependencies);
};
if (dependencies.hasCyclicDependencies())
{
auto cyclic_dependencies_description = dependencies.describeCyclicDependencies();
restore_dependencies();
throw Exception(
ErrorCodes::INFINITE_LOOP,
"Cannot rename '{}' to '{}', because it will lead to cyclic dependencies: {}",
from_table_id.getFullTableName(),
to_table_id.getFullTableName(),
cyclic_dependencies_description);
}
restore_dependencies();
};
check(referential_dependencies);
check(loading_dependencies);
}
void DatabaseCatalog::checkTablesCanBeExchangedWithNoCyclicDependencies(const StorageID & table_id_1, const StorageID & table_id_2)
{
std::lock_guard lock{databases_mutex};
auto check = [&](TablesDependencyGraph & dependencies)
{
auto old_dependencies_1 = dependencies.removeDependencies(table_id_1);
auto old_dependencies_2 = dependencies.removeDependencies(table_id_2);
dependencies.addDependencies(table_id_1, old_dependencies_2);
dependencies.addDependencies(table_id_2, old_dependencies_1);
auto restore_dependencies = [&]()
{
dependencies.removeDependencies(table_id_1);
dependencies.removeDependencies(table_id_2);
dependencies.addDependencies(table_id_1, old_dependencies_1);
dependencies.addDependencies(table_id_2, old_dependencies_2);
};
if (dependencies.hasCyclicDependencies())
{
auto cyclic_dependencies_description = dependencies.describeCyclicDependencies();
restore_dependencies();
throw Exception(
ErrorCodes::INFINITE_LOOP,
"Cannot exchange '{}' and '{}', because it will lead to cyclic dependencies: {}",
table_id_1.getFullTableName(),
table_id_2.getFullTableName(),
cyclic_dependencies_description);
}
restore_dependencies();
};
check(referential_dependencies);
check(loading_dependencies);
}
void DatabaseCatalog::cleanupStoreDirectoryTask()
{
for (const auto & [disk_name, disk] : getContext()->getDisksMap())

View File

@ -245,6 +245,9 @@ public:
void checkTableCanBeRemovedOrRenamed(const StorageID & table_id, bool check_referential_dependencies, bool check_loading_dependencies, bool is_drop_database = false) const;
void checkTableCanBeAddedWithNoCyclicDependencies(const QualifiedTableName & table_name, const TableNamesSet & new_referential_dependencies, const TableNamesSet & new_loading_dependencies);
void checkTableCanBeRenamedWithNoCyclicDependencies(const StorageID & from_table_id, const StorageID & to_table_id);
void checkTablesCanBeExchangedWithNoCyclicDependencies(const StorageID & table_id_1, const StorageID & table_id_2);
struct TableMarkedAsDropped
{

View File

@ -195,6 +195,10 @@ static void setLazyExecutionInfo(
}
lazy_execution_info.short_circuit_ancestors_info[parent].insert(indexes.begin(), indexes.end());
/// After checking arguments_with_disabled_lazy_execution, if there is no relation with parent,
/// disable the current node.
if (indexes.empty())
lazy_execution_info.can_be_lazy_executed = false;
}
else
/// If lazy execution is disabled for one of parents, we should disable it for current node.
@ -292,9 +296,9 @@ static std::unordered_set<const ActionsDAG::Node *> processShortCircuitFunctions
/// Firstly, find all short-circuit functions and get their settings.
std::unordered_map<const ActionsDAG::Node *, IFunctionBase::ShortCircuitSettings> short_circuit_nodes;
IFunctionBase::ShortCircuitSettings short_circuit_settings;
for (const auto & node : nodes)
{
IFunctionBase::ShortCircuitSettings short_circuit_settings;
if (node.type == ActionsDAG::ActionType::FUNCTION && node.function_base->isShortCircuit(short_circuit_settings, node.children.size()) && !node.children.empty())
short_circuit_nodes[&node] = short_circuit_settings;
}

View File

@ -898,6 +898,8 @@ InterpreterCreateQuery::TableProperties InterpreterCreateQuery::getTableProperti
assert(as_database_saved.empty() && as_table_saved.empty());
std::swap(create.as_database, as_database_saved);
std::swap(create.as_table, as_table_saved);
if (!as_table_saved.empty())
create.is_create_empty = false;
return properties;
}
@ -1109,6 +1111,27 @@ void InterpreterCreateQuery::assertOrSetUUID(ASTCreateQuery & create, const Data
}
namespace
{
void addTableDependencies(const ASTCreateQuery & create, const ASTPtr & query_ptr, const ContextPtr & context)
{
QualifiedTableName qualified_name{create.getDatabase(), create.getTable()};
auto ref_dependencies = getDependenciesFromCreateQuery(context->getGlobalContext(), qualified_name, query_ptr, context->getCurrentDatabase());
auto loading_dependencies = getLoadingDependenciesFromCreateQuery(context->getGlobalContext(), qualified_name, query_ptr);
DatabaseCatalog::instance().addDependencies(qualified_name, ref_dependencies, loading_dependencies);
}
void checkTableCanBeAddedWithNoCyclicDependencies(const ASTCreateQuery & create, const ASTPtr & query_ptr, const ContextPtr & context)
{
QualifiedTableName qualified_name{create.getDatabase(), create.getTable()};
auto ref_dependencies = getDependenciesFromCreateQuery(context->getGlobalContext(), qualified_name, query_ptr, context->getCurrentDatabase());
auto loading_dependencies = getLoadingDependenciesFromCreateQuery(context->getGlobalContext(), qualified_name, query_ptr);
DatabaseCatalog::instance().checkTableCanBeAddedWithNoCyclicDependencies(qualified_name, ref_dependencies, loading_dependencies);
}
}
BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create)
{
/// Temporary tables are created out of databases.
@ -1354,11 +1377,7 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create)
return {};
/// If table has dependencies - add them to the graph
QualifiedTableName qualified_name{database_name, create.getTable()};
auto ref_dependencies = getDependenciesFromCreateQuery(getContext()->getGlobalContext(), qualified_name, query_ptr);
auto loading_dependencies = getLoadingDependenciesFromCreateQuery(getContext()->getGlobalContext(), qualified_name, query_ptr);
DatabaseCatalog::instance().addDependencies(qualified_name, ref_dependencies, loading_dependencies);
addTableDependencies(create, query_ptr, getContext());
return fillTableIfNeeded(create);
}
@ -1510,6 +1529,9 @@ bool InterpreterCreateQuery::doCreateTable(ASTCreateQuery & create,
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot find UUID mapping for {}, it's a bug", create.uuid);
}
/// Before actually creating the table, check if it will lead to cyclic dependencies.
checkTableCanBeAddedWithNoCyclicDependencies(create, query_ptr, getContext());
StoragePtr res;
/// NOTE: CREATE query may be rewritten by Storage creator or table function
if (create.as_table_function)
@ -1621,6 +1643,9 @@ BlockIO InterpreterCreateQuery::doCreateOrReplaceTable(ASTCreateQuery & create,
ContextMutablePtr create_context = Context::createCopy(current_context);
create_context->setQueryContext(std::const_pointer_cast<Context>(current_context));
/// Before actually creating/replacing the table, check if it will lead to cyclic dependencies.
checkTableCanBeAddedWithNoCyclicDependencies(create, query_ptr, create_context);
auto make_drop_context = [&]() -> ContextMutablePtr
{
ContextMutablePtr drop_context = Context::createCopy(current_context);
@ -1667,6 +1692,9 @@ BlockIO InterpreterCreateQuery::doCreateOrReplaceTable(ASTCreateQuery & create,
assert(done);
created = true;
/// If table has dependencies - add them to the graph
addTableDependencies(create, query_ptr, getContext());
/// Try fill temporary table
BlockIO fill_io = fillTableIfNeeded(create);
executeTrivialBlockIO(fill_io, getContext());

View File

@ -127,14 +127,23 @@ BlockIO InterpreterRenameQuery::executeToTables(const ASTRenameQuery & rename, c
{
StorageID from_table_id{elem.from_database_name, elem.from_table_name};
StorageID to_table_id{elem.to_database_name, elem.to_table_name};
std::vector<StorageID> ref_dependencies;
std::vector<StorageID> loading_dependencies;
std::vector<StorageID> from_ref_dependencies;
std::vector<StorageID> from_loading_dependencies;
std::vector<StorageID> to_ref_dependencies;
std::vector<StorageID> to_loading_dependencies;
if (!exchange_tables)
if (exchange_tables)
{
DatabaseCatalog::instance().checkTablesCanBeExchangedWithNoCyclicDependencies(from_table_id, to_table_id);
std::tie(from_ref_dependencies, from_loading_dependencies) = database_catalog.removeDependencies(from_table_id, false, false);
std::tie(to_ref_dependencies, to_loading_dependencies) = database_catalog.removeDependencies(to_table_id, false, false);
}
else
{
DatabaseCatalog::instance().checkTableCanBeRenamedWithNoCyclicDependencies(from_table_id, to_table_id);
bool check_ref_deps = getContext()->getSettingsRef().check_referential_table_dependencies;
bool check_loading_deps = !check_ref_deps && getContext()->getSettingsRef().check_table_dependencies;
std::tie(ref_dependencies, loading_dependencies) = database_catalog.removeDependencies(from_table_id, check_ref_deps, check_loading_deps);
std::tie(from_ref_dependencies, from_loading_dependencies) = database_catalog.removeDependencies(from_table_id, check_ref_deps, check_loading_deps);
}
try
@ -147,12 +156,17 @@ BlockIO InterpreterRenameQuery::executeToTables(const ASTRenameQuery & rename, c
exchange_tables,
rename.dictionary);
DatabaseCatalog::instance().addDependencies(to_table_id, ref_dependencies, loading_dependencies);
DatabaseCatalog::instance().addDependencies(to_table_id, from_ref_dependencies, from_loading_dependencies);
if (!to_ref_dependencies.empty() || !to_loading_dependencies.empty())
DatabaseCatalog::instance().addDependencies(from_table_id, to_ref_dependencies, to_loading_dependencies);
}
catch (...)
{
/// Restore dependencies if RENAME fails
DatabaseCatalog::instance().addDependencies(from_table_id, ref_dependencies, loading_dependencies);
DatabaseCatalog::instance().addDependencies(from_table_id, from_ref_dependencies, from_loading_dependencies);
if (!to_ref_dependencies.empty() || !to_loading_dependencies.empty())
DatabaseCatalog::instance().addDependencies(to_table_id, to_ref_dependencies, to_loading_dependencies);
throw;
}
}

View File

@ -8,19 +8,19 @@
#include <DataTypes/DataTypeMap.h>
#include <Interpreters/ProfileEventsExt.h>
#include <DataTypes/DataTypeEnum.h>
#include <Interpreters/S3QueueLog.h>
#include <Interpreters/ObjectStorageQueueLog.h>
namespace DB
{
ColumnsDescription S3QueueLogElement::getColumnsDescription()
ColumnsDescription ObjectStorageQueueLogElement::getColumnsDescription()
{
auto status_datatype = std::make_shared<DataTypeEnum8>(
DataTypeEnum8::Values
{
{"Processed", static_cast<Int8>(S3QueueLogElement::S3QueueStatus::Processed)},
{"Failed", static_cast<Int8>(S3QueueLogElement::S3QueueStatus::Failed)},
{"Processed", static_cast<Int8>(ObjectStorageQueueLogElement::ObjectStorageQueueStatus::Processed)},
{"Failed", static_cast<Int8>(ObjectStorageQueueLogElement::ObjectStorageQueueStatus::Failed)},
});
return ColumnsDescription
@ -41,7 +41,7 @@ ColumnsDescription S3QueueLogElement::getColumnsDescription()
};
}
void S3QueueLogElement::appendToBlock(MutableColumns & columns) const
void ObjectStorageQueueLogElement::appendToBlock(MutableColumns & columns) const
{
size_t i = 0;
columns[i++]->insert(getFQDNOrHostName());

View File

@ -9,7 +9,7 @@
namespace DB
{
struct S3QueueLogElement
struct ObjectStorageQueueLogElement
{
time_t event_time{};
@ -20,18 +20,18 @@ struct S3QueueLogElement
std::string file_name;
size_t rows_processed = 0;
enum class S3QueueStatus : uint8_t
enum class ObjectStorageQueueStatus : uint8_t
{
Processed,
Failed,
};
S3QueueStatus status;
ObjectStorageQueueStatus status;
ProfileEvents::Counters::Snapshot counters_snapshot;
time_t processing_start_time;
time_t processing_end_time;
std::string exception;
static std::string name() { return "S3QueueLog"; }
static std::string name() { return "ObjectStorageQueueLog"; }
static ColumnsDescription getColumnsDescription();
static NamesAndAliases getNamesAndAliases() { return {}; }
@ -39,9 +39,9 @@ struct S3QueueLogElement
void appendToBlock(MutableColumns & columns) const;
};
class S3QueueLog : public SystemLog<S3QueueLogElement>
class ObjectStorageQueueLog : public SystemLog<ObjectStorageQueueLogElement>
{
using SystemLog<S3QueueLogElement>::SystemLog;
using SystemLog<ObjectStorageQueueLogElement>::SystemLog;
};
}

View File

@ -25,7 +25,7 @@
#include <Interpreters/QueryLog.h>
#include <Interpreters/QueryThreadLog.h>
#include <Interpreters/QueryViewsLog.h>
#include <Interpreters/S3QueueLog.h>
#include <Interpreters/ObjectStorageQueueLog.h>
#include <Interpreters/SessionLog.h>
#include <Interpreters/TextLog.h>
#include <Interpreters/TraceLog.h>
@ -306,7 +306,8 @@ SystemLogs::SystemLogs(ContextPtr global_context, const Poco::Util::AbstractConf
processors_profile_log = createSystemLog<ProcessorsProfileLog>(global_context, "system", "processors_profile_log", config, "processors_profile_log", "Contains profiling information on processors level (building blocks for a pipeline for query execution.");
asynchronous_insert_log = createSystemLog<AsynchronousInsertLog>(global_context, "system", "asynchronous_insert_log", config, "asynchronous_insert_log", "Contains a history for all asynchronous inserts executed on current server.");
backup_log = createSystemLog<BackupLog>(global_context, "system", "backup_log", config, "backup_log", "Contains logging entries with the information about BACKUP and RESTORE operations.");
s3_queue_log = createSystemLog<S3QueueLog>(global_context, "system", "s3queue_log", config, "s3queue_log", "Contains logging entries with the information files processes by S3Queue engine.");
s3_queue_log = createSystemLog<ObjectStorageQueueLog>(global_context, "system", "s3queue_log", config, "s3queue_log", "Contains logging entries with the information files processes by S3Queue engine.");
azure_queue_log = createSystemLog<ObjectStorageQueueLog>(global_context, "system", "azure_queue_log", config, "azure_queue_log", "Contains logging entries with the information files processes by S3Queue engine.");
blob_storage_log = createSystemLog<BlobStorageLog>(global_context, "system", "blob_storage_log", config, "blob_storage_log", "Contains logging entries with information about various blob storage operations such as uploads and deletes.");
if (query_log)

View File

@ -53,7 +53,7 @@ class FilesystemCacheLog;
class FilesystemReadPrefetchesLog;
class AsynchronousInsertLog;
class BackupLog;
class S3QueueLog;
class ObjectStorageQueueLog;
class BlobStorageLog;
/// System logs should be destroyed in destructor of the last Context and before tables,
@ -76,7 +76,8 @@ struct SystemLogs
std::shared_ptr<ErrorLog> error_log; /// Used to log errors.
std::shared_ptr<FilesystemCacheLog> filesystem_cache_log;
std::shared_ptr<FilesystemReadPrefetchesLog> filesystem_read_prefetches_log;
std::shared_ptr<S3QueueLog> s3_queue_log;
std::shared_ptr<ObjectStorageQueueLog> s3_queue_log;
std::shared_ptr<ObjectStorageQueueLog> azure_queue_log;
/// Metrics from system.asynchronous_metrics.
std::shared_ptr<AsynchronousMetricLog> asynchronous_metric_log;
/// OpenTelemetry trace spans.

View File

@ -321,7 +321,12 @@ void Loggers::updateLevels(Poco::Util::AbstractConfiguration & config, Poco::Log
bool should_log_to_console = isatty(STDIN_FILENO) || isatty(STDERR_FILENO);
if (config.getBool("logger.console", false)
|| (!config.hasProperty("logger.console") && !is_daemon && should_log_to_console))
split->setLevel("console", log_level);
{
auto console_log_level_string = config.getString("logger.console_log_level", log_level_string);
auto console_log_level = Poco::Logger::parseLevel(console_log_level_string);
max_log_level = std::max(console_log_level, max_log_level);
split->setLevel("console", console_log_level);
}
else
split->setLevel("console", 0);

View File

@ -404,8 +404,18 @@ void ASTCreateQuery::formatQueryImpl(const FormatSettings & settings, FormatStat
<< quoteString(toString(to_inner_uuid));
}
bool should_add_empty = is_create_empty;
auto add_empty_if_needed = [&]
{
if (!should_add_empty)
return;
should_add_empty = false;
settings.ostr << (settings.hilite ? hilite_keyword : "") << " EMPTY" << (settings.hilite ? hilite_none : "");
};
if (!as_table.empty())
{
add_empty_if_needed();
settings.ostr
<< (settings.hilite ? hilite_keyword : "") << " AS " << (settings.hilite ? hilite_none : "")
<< (!as_database.empty() ? backQuoteIfNeed(as_database) + "." : "") << backQuoteIfNeed(as_table);
@ -423,6 +433,7 @@ void ASTCreateQuery::formatQueryImpl(const FormatSettings & settings, FormatStat
frame.expression_list_always_start_on_new_line = false;
}
add_empty_if_needed();
settings.ostr << (settings.hilite ? hilite_keyword : "") << " AS " << (settings.hilite ? hilite_none : "");
as_table_function->formatImpl(settings, state, frame);
}
@ -484,8 +495,8 @@ void ASTCreateQuery::formatQueryImpl(const FormatSettings & settings, FormatStat
if (is_populate)
settings.ostr << (settings.hilite ? hilite_keyword : "") << " POPULATE" << (settings.hilite ? hilite_none : "");
else if (is_create_empty)
settings.ostr << (settings.hilite ? hilite_keyword : "") << " EMPTY" << (settings.hilite ? hilite_none : "");
add_empty_if_needed();
if (sql_security && supportSQLSecurity() && sql_security->as<ASTSQLSecurity &>().type.has_value())
{

View File

@ -82,6 +82,16 @@ private:
/// s3Cluster('cluster_name', 'url', 'aws_access_key_id', 'aws_secret_access_key', ...)
findS3FunctionSecretArguments(/* is_cluster_function= */ true);
}
else if (function.name == "azureBlobStorage")
{
/// azureBlobStorage(connection_string|storage_account_url, container_name, blobpath, account_name, account_key, format, compression, structure)
findAzureBlobStorageFunctionSecretArguments(/* is_cluster_function= */ false);
}
else if (function.name == "azureBlobStorageCluster")
{
/// azureBlobStorageCluster(cluster, connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure])
findAzureBlobStorageFunctionSecretArguments(/* is_cluster_function= */ true);
}
else if ((function.name == "remote") || (function.name == "remoteSecure"))
{
/// remote('addresses_expr', 'db', 'table', 'user', 'password', ...)
@ -169,6 +179,43 @@ private:
markSecretArgument(url_arg_idx + 2);
}
void findAzureBlobStorageFunctionSecretArguments(bool is_cluster_function)
{
/// azureBlobStorage('cluster_name', 'conn_string/storage_account_url', ...) has 'conn_string/storage_account_url' as its second argument.
size_t url_arg_idx = is_cluster_function ? 1 : 0;
if (!is_cluster_function && isNamedCollectionName(0))
{
/// azureBlobStorage(named_collection, ..., account_key = 'account_key', ...)
findSecretNamedArgument("account_key", 1);
return;
}
else if (is_cluster_function && isNamedCollectionName(1))
{
/// azureBlobStorageCluster(cluster, named_collection, ..., account_key = 'account_key', ...)
findSecretNamedArgument("account_key", 2);
return;
}
/// We should check other arguments first because we don't need to do any replacement in case storage_account_url is not used
/// azureBlobStorage(connection_string|storage_account_url, container_name, blobpath, account_name, account_key, format, compression, structure)
/// azureBlobStorageCluster(cluster, connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure])
size_t count = arguments->size();
if ((url_arg_idx + 4 <= count) && (count <= url_arg_idx + 7))
{
String second_arg;
if (tryGetStringFromArgument(url_arg_idx + 3, &second_arg))
{
if (second_arg == "auto" || KnownFormatNames::instance().exists(second_arg))
return; /// The argument after 'url' is a format: s3('url', 'format', ...)
}
}
/// We're going to replace 'account_key' with '[HIDDEN]' if account_key is used in the signature
if (url_arg_idx + 4 < count)
markSecretArgument(url_arg_idx + 4);
}
void findURLSecretArguments()
{
if (!isNamedCollectionName(0))

View File

@ -802,13 +802,12 @@ static std::shared_ptr<IJoin> tryCreateJoin(JoinAlgorithm algorithm,
algorithm == JoinAlgorithm::PARALLEL_HASH ||
algorithm == JoinAlgorithm::DEFAULT)
{
if (table_join->allowParallelHashJoin())
{
auto query_context = planner_context->getQueryContext();
return std::make_shared<ConcurrentHashJoin>(query_context, table_join, query_context->getSettings().max_threads, right_table_expression_header);
}
auto query_context = planner_context->getQueryContext();
return std::make_shared<HashJoin>(table_join, right_table_expression_header);
if (table_join->allowParallelHashJoin())
return std::make_shared<ConcurrentHashJoin>(query_context, table_join, query_context->getSettings().max_threads, right_table_expression_header);
return std::make_shared<HashJoin>(table_join, right_table_expression_header, query_context->getSettingsRef().join_any_take_last_row);
}
if (algorithm == JoinAlgorithm::FULL_SORTING_MERGE)

View File

@ -46,6 +46,15 @@ JSONEachRowRowInputFormat::JSONEachRowRowInputFormat(
{
const auto & header = getPort().getHeader();
name_map = header.getNamesToIndexesMap();
if (format_settings_.json.ignore_key_case)
{
for (auto & it : name_map)
{
StringRef key = it.first;
String lower_case_key = transformFieldNameToLowerCase(key);
lower_case_name_map[lower_case_key] = key;
}
}
if (format_settings_.import_nested_json)
{
for (size_t i = 0; i != header.columns(); ++i)
@ -171,7 +180,15 @@ void JSONEachRowRowInputFormat::readJSONObject(MutableColumns & columns)
skipUnknownField(name_ref);
continue;
}
const size_t column_index = columnIndex(name_ref, key_index);
size_t column_index = 0;
if (format_settings.json.ignore_key_case)
{
String lower_case_name = transformFieldNameToLowerCase(name_ref);
StringRef field_name_ref = lower_case_name_map[lower_case_name];
column_index = columnIndex(field_name_ref, key_index);
}
else
column_index = columnIndex(name_ref, key_index);
if (unlikely(ssize_t(column_index) < 0))
{

View File

@ -55,7 +55,13 @@ private:
virtual void readRowStart(MutableColumns &) {}
virtual void skipRowStart() {}
String transformFieldNameToLowerCase(const StringRef & field_name)
{
String field_name_str = field_name.toString();
std::transform(field_name_str.begin(), field_name_str.end(), field_name_str.begin(),
[](unsigned char c) { return std::tolower(c); });
return field_name_str;
}
/// Buffer for the read from the stream field name. Used when you have to copy it.
/// Also, if processing of Nested data is in progress, it holds the common prefix
/// of the nested column names (so that appending the field name to it produces
@ -74,7 +80,8 @@ private:
/// Hash table match `field name -> position in the block`. NOTE You can use perfect hash map.
Block::NameMap name_map;
/// Hash table match `lower_case field name -> field name in the block`.
std::unordered_map<String, StringRef> lower_case_name_map;
/// Cached search results for previous row (keyed as index in JSON object) - used as a hint.
std::vector<Block::NameMap::const_iterator> prev_positions;

View File

@ -323,6 +323,9 @@ void ParquetBlockOutputFormat::writeUsingArrow(std::vector<Chunk> chunks)
parquet::WriterProperties::Builder builder;
builder.version(getParquetVersion(format_settings));
builder.compression(getParquetCompression(format_settings.parquet.output_compression_method));
// write page index is disable at default.
if (format_settings.parquet.write_page_index)
builder.enable_write_page_index();
parquet::ArrowWriterProperties::Builder writer_props_builder;
if (format_settings.parquet.output_compliant_nested_types)

View File

@ -120,7 +120,7 @@ Chunk PostgreSQLSource<T>::generate()
MutableColumns columns = description.sample_block.cloneEmptyColumns();
size_t num_rows = 0;
while (true)
while (!isCancelled())
{
const std::vector<pqxx::zview> * row{stream->read_row()};

View File

@ -9,7 +9,6 @@
#include <base/defines.h>
#include <base/types.h>
#include <Common/logger_useful.h>
#include <Columns/ColumnNullable.h>
#include <Columns/ColumnsNumber.h>
#include <Columns/IColumn.h>
@ -19,6 +18,7 @@
#include <Interpreters/FullSortingMergeJoin.h>
#include <Interpreters/TableJoin.h>
#include <Parsers/ASTTablesInSelectQuery.h>
#include <Processors/Chunk.h>
#include <Processors/Transforms/MergeJoinTransform.h>
@ -40,7 +40,7 @@ FullMergeJoinCursorPtr createCursor(const Block & block, const Names & columns)
desc.reserve(columns.size());
for (const auto & name : columns)
desc.emplace_back(name);
return std::make_unique<FullMergeJoinCursor>(materializeBlock(block), desc);
return std::make_unique<FullMergeJoinCursor>(block, desc);
}
template <bool has_left_nulls, bool has_right_nulls>
@ -234,9 +234,14 @@ void inline addMany(PaddedPODArray<UInt64> & left_or_right_map, size_t idx, size
for (size_t i = 0; i < num; ++i)
left_or_right_map.push_back(idx);
}
}
FullMergeJoinCursor::FullMergeJoinCursor(const Block & sample_block_, const SortDescription & description_)
: sample_block(materializeBlock(sample_block_).cloneEmpty()), desc(description_)
{
}
const Chunk & FullMergeJoinCursor::getCurrent() const
{
return current_chunk;
@ -260,6 +265,10 @@ void FullMergeJoinCursor::setChunk(Chunk && chunk)
return;
}
// should match the structure of sample_block (after materialization)
convertToFullIfConst(chunk);
convertToFullIfSparse(chunk);
current_chunk = std::move(chunk);
cursor = SortCursorImpl(sample_block, current_chunk.getColumns(), desc);
}

View File

@ -193,11 +193,7 @@ private:
class FullMergeJoinCursor : boost::noncopyable
{
public:
explicit FullMergeJoinCursor(const Block & sample_block_, const SortDescription & description_)
: sample_block(sample_block_.cloneEmpty())
, desc(description_)
{
}
explicit FullMergeJoinCursor(const Block & sample_block_, const SortDescription & description_);
bool fullyCompleted() const;
void setChunk(Chunk && chunk);

View File

@ -387,7 +387,7 @@ void TCPHandler::runImpl()
query_scope.emplace(query_context, /* fatal_error_callback */ [this]
{
std::lock_guard lock(fatal_error_mutex);
std::lock_guard lock(out_mutex);
sendLogs();
});
@ -475,7 +475,7 @@ void TCPHandler::runImpl()
Stopwatch watch;
CurrentMetrics::Increment callback_metric_increment(CurrentMetrics::ReadTaskRequestsSent);
std::lock_guard lock(task_callback_mutex);
std::scoped_lock lock(out_mutex, task_callback_mutex);
if (state.cancellation_status == CancellationStatus::FULLY_CANCELLED)
return {};
@ -491,7 +491,7 @@ void TCPHandler::runImpl()
{
Stopwatch watch;
CurrentMetrics::Increment callback_metric_increment(CurrentMetrics::MergeTreeAllRangesAnnouncementsSent);
std::lock_guard lock(task_callback_mutex);
std::scoped_lock lock(out_mutex, task_callback_mutex);
if (state.cancellation_status == CancellationStatus::FULLY_CANCELLED)
return;
@ -505,7 +505,7 @@ void TCPHandler::runImpl()
{
Stopwatch watch;
CurrentMetrics::Increment callback_metric_increment(CurrentMetrics::MergeTreeReadTaskRequestsSent);
std::lock_guard lock(task_callback_mutex);
std::scoped_lock lock(out_mutex, task_callback_mutex);
if (state.cancellation_status == CancellationStatus::FULLY_CANCELLED)
return std::nullopt;
@ -553,7 +553,7 @@ void TCPHandler::runImpl()
{
auto callback = [this]()
{
std::scoped_lock lock(task_callback_mutex, fatal_error_mutex);
std::scoped_lock lock(out_mutex, task_callback_mutex);
if (getQueryCancellationStatus() == CancellationStatus::FULLY_CANCELLED)
return true;
@ -572,7 +572,7 @@ void TCPHandler::runImpl()
finish_or_cancel();
std::lock_guard lock(task_callback_mutex);
std::lock_guard lock(out_mutex);
/// Send final progress after calling onFinish(), since it will update the progress.
///
@ -595,7 +595,7 @@ void TCPHandler::runImpl()
break;
{
std::lock_guard lock(task_callback_mutex);
std::lock_guard lock(out_mutex);
sendLogs();
sendEndOfStream();
}
@ -1014,7 +1014,7 @@ void TCPHandler::processOrdinaryQuery()
if (query_context->getSettingsRef().allow_experimental_query_deduplication)
{
std::lock_guard lock(task_callback_mutex);
std::lock_guard lock(out_mutex);
sendPartUUIDs();
}
@ -1024,13 +1024,13 @@ void TCPHandler::processOrdinaryQuery()
if (header)
{
std::lock_guard lock(task_callback_mutex);
std::lock_guard lock(out_mutex);
sendData(header);
}
}
/// Defer locking to cover a part of the scope below and everything after it
std::unique_lock progress_lock(task_callback_mutex, std::defer_lock);
std::unique_lock out_lock(out_mutex, std::defer_lock);
{
PullingAsyncPipelineExecutor executor(pipeline);
@ -1056,6 +1056,9 @@ void TCPHandler::processOrdinaryQuery()
executor.cancelReading();
}
lock.unlock();
out_lock.lock();
if (after_send_progress.elapsed() / 1000 >= interactive_delay)
{
/// Some time passed and there is a progress.
@ -1071,12 +1074,14 @@ void TCPHandler::processOrdinaryQuery()
if (!state.io.null_format)
sendData(block);
}
out_lock.unlock();
}
/// This lock wasn't acquired before and we make .lock() call here
/// so everything under this line is covered even together
/// with sendProgress() out of the scope
progress_lock.lock();
out_lock.lock();
/** If data has run out, we will send the profiling data and total values to
* the last zero block to be able to use

View File

@ -226,8 +226,13 @@ private:
std::optional<UInt64> nonce;
String cluster;
/// `out_mutex` protects `out` (WriteBuffer).
/// So it is used for method sendData(), sendProgress(), sendLogs(), etc.
std::mutex out_mutex;
/// `task_callback_mutex` protects tasks callbacks.
/// Inside these callbacks we might also change cancellation status,
/// so it also protects cancellation status checks.
std::mutex task_callback_mutex;
std::mutex fatal_error_mutex;
/// At the moment, only one ongoing query in the connection is supported at a time.
QueryState state;

View File

@ -1577,7 +1577,7 @@ void IMergeTreeDataPart::loadColumns(bool require)
if (getFileNameForColumn(column))
loaded_columns.push_back(column);
if (columns.empty())
if (loaded_columns.empty())
throw Exception(ErrorCodes::NO_FILE_IN_DATA_PART, "No columns in part {}", name);
if (!is_readonly_storage)

View File

@ -298,6 +298,11 @@ std::optional<time_t> MergeTreeDataPartWide::getColumnModificationTime(const Str
std::optional<String> MergeTreeDataPartWide::getFileNameForColumn(const NameAndTypePair & column) const
{
std::optional<String> filename;
/// Fallback for the case when serializations was not loaded yet (called from loadColumns())
if (getSerializations().empty())
return getStreamNameForColumn(column, {}, DATA_FILE_EXTENSION, getDataPartStorage());
getSerialization(column.name)->enumerateStreams([&](const ISerialization::SubstreamPath & substream_path)
{
if (!filename.has_value())
@ -309,6 +314,7 @@ std::optional<String> MergeTreeDataPartWide::getFileNameForColumn(const NameAndT
filename = getStreamNameForColumn(column, substream_path, DATA_FILE_EXTENSION, getDataPartStorage());
}
});
return filename;
}

View File

@ -15,7 +15,7 @@ class SchemaCache;
class StorageObjectStorageSource : public SourceWithKeyCondition, WithContext
{
friend class StorageS3QueueSource;
friend class ObjectStorageQueueSource;
public:
using Configuration = StorageObjectStorage::Configuration;
using ConfigurationPtr = StorageObjectStorage::ConfigurationPtr;

View File

@ -1,4 +1,4 @@
#include <Storages/S3Queue/S3QueueIFileMetadata.h>
#include <Storages/ObjectStorageQueue/ObjectStorageQueueIFileMetadata.h>
#include <Common/SipHash.h>
#include <Common/CurrentThread.h>
#include <Common/DNSResolver.h>
@ -11,8 +11,8 @@
namespace ProfileEvents
{
extern const Event S3QueueProcessedFiles;
extern const Event S3QueueFailedFiles;
extern const Event ObjectStorageQueueProcessedFiles;
extern const Event ObjectStorageQueueFailedFiles;
};
namespace DB
@ -35,25 +35,25 @@ namespace
}
}
void S3QueueIFileMetadata::FileStatus::setProcessingEndTime()
void ObjectStorageQueueIFileMetadata::FileStatus::setProcessingEndTime()
{
processing_end_time = now();
}
void S3QueueIFileMetadata::FileStatus::onProcessing()
void ObjectStorageQueueIFileMetadata::FileStatus::onProcessing()
{
state = FileStatus::State::Processing;
processing_start_time = now();
}
void S3QueueIFileMetadata::FileStatus::onProcessed()
void ObjectStorageQueueIFileMetadata::FileStatus::onProcessed()
{
state = FileStatus::State::Processed;
if (!processing_end_time)
setProcessingEndTime();
}
void S3QueueIFileMetadata::FileStatus::onFailed(const std::string & exception)
void ObjectStorageQueueIFileMetadata::FileStatus::onFailed(const std::string & exception)
{
state = FileStatus::State::Failed;
if (!processing_end_time)
@ -62,13 +62,13 @@ void S3QueueIFileMetadata::FileStatus::onFailed(const std::string & exception)
last_exception = exception;
}
std::string S3QueueIFileMetadata::FileStatus::getException() const
std::string ObjectStorageQueueIFileMetadata::FileStatus::getException() const
{
std::lock_guard lock(last_exception_mutex);
return last_exception;
}
std::string S3QueueIFileMetadata::NodeMetadata::toString() const
std::string ObjectStorageQueueIFileMetadata::NodeMetadata::toString() const
{
Poco::JSON::Object json;
json.set("file_path", file_path);
@ -83,7 +83,7 @@ std::string S3QueueIFileMetadata::NodeMetadata::toString() const
return oss.str();
}
S3QueueIFileMetadata::NodeMetadata S3QueueIFileMetadata::NodeMetadata::fromString(const std::string & metadata_str)
ObjectStorageQueueIFileMetadata::NodeMetadata ObjectStorageQueueIFileMetadata::NodeMetadata::fromString(const std::string & metadata_str)
{
Poco::JSON::Parser parser;
auto json = parser.parse(metadata_str).extract<Poco::JSON::Object::Ptr>();
@ -98,7 +98,7 @@ S3QueueIFileMetadata::NodeMetadata S3QueueIFileMetadata::NodeMetadata::fromStrin
return metadata;
}
S3QueueIFileMetadata::S3QueueIFileMetadata(
ObjectStorageQueueIFileMetadata::ObjectStorageQueueIFileMetadata(
const std::string & path_,
const std::string & processing_node_path_,
const std::string & processed_node_path_,
@ -123,7 +123,7 @@ S3QueueIFileMetadata::S3QueueIFileMetadata(
processed_node_path, processing_node_path, failed_node_path);
}
S3QueueIFileMetadata::~S3QueueIFileMetadata()
ObjectStorageQueueIFileMetadata::~ObjectStorageQueueIFileMetadata()
{
if (processing_id_version.has_value())
{
@ -162,9 +162,9 @@ S3QueueIFileMetadata::~S3QueueIFileMetadata()
}
}
std::string S3QueueIFileMetadata::getNodeName(const std::string & path)
std::string ObjectStorageQueueIFileMetadata::getNodeName(const std::string & path)
{
/// Since with are dealing with paths in s3 which can have "/",
/// Since with are dealing with paths in object storage which can have "/",
/// we cannot create a zookeeper node with the name equal to path.
/// Therefore we use a hash of the path as a node name.
@ -173,7 +173,7 @@ std::string S3QueueIFileMetadata::getNodeName(const std::string & path)
return toString(path_hash.get64());
}
S3QueueIFileMetadata::NodeMetadata S3QueueIFileMetadata::createNodeMetadata(
ObjectStorageQueueIFileMetadata::NodeMetadata ObjectStorageQueueIFileMetadata::createNodeMetadata(
const std::string & path,
const std::string & exception,
size_t retries)
@ -182,9 +182,9 @@ S3QueueIFileMetadata::NodeMetadata S3QueueIFileMetadata::createNodeMetadata(
/// Since node name is just a hash we want to know to which file it corresponds,
/// so we keep "file_path" in nodes data.
/// "last_processed_timestamp" is needed for TTL metadata nodes enabled by s3queue_tracked_file_ttl_sec.
/// "last_exception" is kept for introspection, should also be visible in system.s3queue_log if it is enabled.
/// "retries" is kept for retrying the processing enabled by s3queue_loading_retries.
/// "last_processed_timestamp" is needed for TTL metadata nodes enabled by tracked_file_ttl_sec.
/// "last_exception" is kept for introspection, should also be visible in system.s3(azure)queue_log if it is enabled.
/// "retries" is kept for retrying the processing enabled by loading_retries.
NodeMetadata metadata;
metadata.file_path = path;
metadata.last_processed_timestamp = now();
@ -193,7 +193,7 @@ S3QueueIFileMetadata::NodeMetadata S3QueueIFileMetadata::createNodeMetadata(
return metadata;
}
std::string S3QueueIFileMetadata::getProcessorInfo(const std::string & processor_id)
std::string ObjectStorageQueueIFileMetadata::getProcessorInfo(const std::string & processor_id)
{
/// Add information which will be useful for debugging just in case.
Poco::JSON::Object json;
@ -206,7 +206,7 @@ std::string S3QueueIFileMetadata::getProcessorInfo(const std::string & processor
return oss.str();
}
bool S3QueueIFileMetadata::setProcessing()
bool ObjectStorageQueueIFileMetadata::setProcessing()
{
auto state = file_status->state.load();
if (state == FileStatus::State::Processing
@ -235,11 +235,11 @@ bool S3QueueIFileMetadata::setProcessing()
return success;
}
void S3QueueIFileMetadata::setProcessed()
void ObjectStorageQueueIFileMetadata::setProcessed()
{
LOG_TRACE(log, "Setting file {} as processed (path: {})", path, processed_node_path);
ProfileEvents::increment(ProfileEvents::S3QueueProcessedFiles);
ProfileEvents::increment(ProfileEvents::ObjectStorageQueueProcessedFiles);
file_status->onProcessed();
try
@ -258,12 +258,12 @@ void S3QueueIFileMetadata::setProcessed()
LOG_TRACE(log, "Set file {} as processed (rows: {})", path, file_status->processed_rows);
}
void S3QueueIFileMetadata::setFailed(const std::string & exception_message, bool reduce_retry_count, bool overwrite_status)
void ObjectStorageQueueIFileMetadata::setFailed(const std::string & exception_message, bool reduce_retry_count, bool overwrite_status)
{
LOG_TRACE(log, "Setting file {} as failed (path: {}, reduce retry count: {}, exception: {})",
path, failed_node_path, reduce_retry_count, exception_message);
ProfileEvents::increment(ProfileEvents::S3QueueFailedFiles);
ProfileEvents::increment(ProfileEvents::ObjectStorageQueueFailedFiles);
if (overwrite_status || file_status->state != FileStatus::State::Failed)
file_status->onFailed(exception_message);
@ -295,7 +295,7 @@ void S3QueueIFileMetadata::setFailed(const std::string & exception_message, bool
LOG_TRACE(log, "Set file {} as failed (rows: {})", path, file_status->processed_rows);
}
void S3QueueIFileMetadata::setFailedNonRetriable()
void ObjectStorageQueueIFileMetadata::setFailedNonRetriable()
{
auto zk_client = getZooKeeper();
Coordination::Requests requests;
@ -326,7 +326,7 @@ void S3QueueIFileMetadata::setFailedNonRetriable()
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected error while setting file as failed: {}", code);
}
void S3QueueIFileMetadata::setFailedRetriable()
void ObjectStorageQueueIFileMetadata::setFailedRetriable()
{
/// Instead of creating a persistent /failed/node_hash node
/// we create a persistent /failed/node_hash.retriable node.

View File

@ -6,7 +6,7 @@
namespace DB
{
class S3QueueIFileMetadata
class ObjectStorageQueueIFileMetadata
{
public:
struct FileStatus
@ -42,7 +42,7 @@ public:
};
using FileStatusPtr = std::shared_ptr<FileStatus>;
explicit S3QueueIFileMetadata(
explicit ObjectStorageQueueIFileMetadata(
const std::string & path_,
const std::string & processing_node_path_,
const std::string & processed_node_path_,
@ -51,7 +51,7 @@ public:
size_t max_loading_retries_,
LoggerPtr log_);
virtual ~S3QueueIFileMetadata();
virtual ~ObjectStorageQueueIFileMetadata();
bool setProcessing();
void setProcessed();
@ -95,7 +95,7 @@ protected:
LoggerPtr log;
/// processing node is ephemeral, so we cannot verify with it if
/// this node was created by a certain processor on a previous s3 queue processing stage,
/// this node was created by a certain processor on a previous processing stage,
/// because we could get a session expired in between the stages
/// and someone else could just create this processing node.
/// Therefore we also create a persistent processing node

View File

@ -4,13 +4,12 @@
#include <IO/ReadBufferFromString.h>
#include <IO/ReadHelpers.h>
#include <Interpreters/Context.h>
#include <Storages/S3Queue/S3QueueMetadata.h>
#include <Storages/S3Queue/S3QueueSettings.h>
#include <Storages/S3Queue/S3QueueIFileMetadata.h>
#include <Storages/S3Queue/S3QueueOrderedFileMetadata.h>
#include <Storages/S3Queue/S3QueueUnorderedFileMetadata.h>
#include <Storages/S3Queue/S3QueueTableMetadata.h>
#include <IO/S3Settings.h>
#include <Storages/ObjectStorageQueue/ObjectStorageQueueMetadata.h>
#include <Storages/ObjectStorageQueue/ObjectStorageQueueSettings.h>
#include <Storages/ObjectStorageQueue/ObjectStorageQueueIFileMetadata.h>
#include <Storages/ObjectStorageQueue/ObjectStorageQueueOrderedFileMetadata.h>
#include <Storages/ObjectStorageQueue/ObjectStorageQueueUnorderedFileMetadata.h>
#include <Storages/ObjectStorageQueue/ObjectStorageQueueTableMetadata.h>
#include <Storages/StorageSnapshot.h>
#include <base/sleep.h>
#include <Common/CurrentThread.h>
@ -22,13 +21,8 @@
namespace ProfileEvents
{
extern const Event S3QueueSetFileProcessingMicroseconds;
extern const Event S3QueueSetFileProcessedMicroseconds;
extern const Event S3QueueSetFileFailedMicroseconds;
extern const Event S3QueueFailedFiles;
extern const Event S3QueueProcessedFiles;
extern const Event S3QueueCleanupMaxSetSizeOrTTLMicroseconds;
extern const Event S3QueueLockLocalFileStatusesMicroseconds;
extern const Event ObjectStorageQueueCleanupMaxSetSizeOrTTLMicroseconds;
extern const Event ObjectStorageQueueLockLocalFileStatusesMicroseconds;
};
namespace DB
@ -63,7 +57,7 @@ namespace
}
}
class S3QueueMetadata::LocalFileStatuses
class ObjectStorageQueueMetadata::LocalFileStatuses
{
public:
LocalFileStatuses() = default;
@ -109,98 +103,89 @@ private:
std::unique_lock<std::mutex> lock() const
{
auto timer = DB::CurrentThread::getProfileEvents().timer(ProfileEvents::S3QueueLockLocalFileStatusesMicroseconds);
auto timer = DB::CurrentThread::getProfileEvents().timer(ProfileEvents::ObjectStorageQueueLockLocalFileStatusesMicroseconds);
return std::unique_lock(mutex);
}
};
S3QueueMetadata::S3QueueMetadata(const fs::path & zookeeper_path_, const S3QueueSettings & settings_)
ObjectStorageQueueMetadata::ObjectStorageQueueMetadata(const fs::path & zookeeper_path_, const ObjectStorageQueueSettings & settings_)
: settings(settings_)
, zookeeper_path(zookeeper_path_)
, buckets_num(getBucketsNum(settings_))
, log(getLogger("StorageS3Queue(" + zookeeper_path_.string() + ")"))
, log(getLogger("StorageObjectStorageQueue(" + zookeeper_path_.string() + ")"))
, local_file_statuses(std::make_shared<LocalFileStatuses>())
{
if (settings.mode == S3QueueMode::UNORDERED
&& (settings.s3queue_tracked_files_limit || settings.s3queue_tracked_file_ttl_sec))
if (settings.mode == ObjectStorageQueueMode::UNORDERED
&& (settings.tracked_files_limit || settings.tracked_file_ttl_sec))
{
task = Context::getGlobalContextInstance()->getSchedulePool().createTask(
"S3QueueCleanupFunc",
"ObjectStorageQueueCleanupFunc",
[this] { cleanupThreadFunc(); });
task->activate();
task->scheduleAfter(
generateRescheduleInterval(
settings.s3queue_cleanup_interval_min_ms, settings.s3queue_cleanup_interval_max_ms));
settings.cleanup_interval_min_ms, settings.cleanup_interval_max_ms));
}
LOG_TRACE(log, "Mode: {}, buckets: {}, processing threads: {}, result buckets num: {}",
settings.mode.toString(), settings.s3queue_buckets, settings.s3queue_processing_threads_num, buckets_num);
settings.mode.toString(), settings.buckets, settings.processing_threads_num, buckets_num);
}
S3QueueMetadata::~S3QueueMetadata()
ObjectStorageQueueMetadata::~ObjectStorageQueueMetadata()
{
shutdown();
}
void S3QueueMetadata::shutdown()
void ObjectStorageQueueMetadata::shutdown()
{
shutdown_called = true;
if (task)
task->deactivate();
}
void S3QueueMetadata::checkSettings(const S3QueueSettings & settings_) const
void ObjectStorageQueueMetadata::checkSettings(const ObjectStorageQueueSettings & settings_) const
{
S3QueueTableMetadata::checkEquals(settings, settings_);
ObjectStorageQueueTableMetadata::checkEquals(settings, settings_);
}
S3QueueMetadata::FileStatusPtr S3QueueMetadata::getFileStatus(const std::string & path)
ObjectStorageQueueMetadata::FileStatusPtr ObjectStorageQueueMetadata::getFileStatus(const std::string & path)
{
return local_file_statuses->get(path, /* create */false);
}
S3QueueMetadata::FileStatuses S3QueueMetadata::getFileStatuses() const
ObjectStorageQueueMetadata::FileStatuses ObjectStorageQueueMetadata::getFileStatuses() const
{
return local_file_statuses->getAll();
}
S3QueueMetadata::FileMetadataPtr S3QueueMetadata::getFileMetadata(
ObjectStorageQueueMetadata::FileMetadataPtr ObjectStorageQueueMetadata::getFileMetadata(
const std::string & path,
S3QueueOrderedFileMetadata::BucketInfoPtr bucket_info)
ObjectStorageQueueOrderedFileMetadata::BucketInfoPtr bucket_info)
{
auto file_status = local_file_statuses->get(path, /* create */true);
switch (settings.mode.value)
{
case S3QueueMode::ORDERED:
return std::make_shared<S3QueueOrderedFileMetadata>(
case ObjectStorageQueueMode::ORDERED:
return std::make_shared<ObjectStorageQueueOrderedFileMetadata>(
zookeeper_path,
path,
file_status,
bucket_info,
buckets_num,
settings.s3queue_loading_retries,
settings.loading_retries,
log);
case S3QueueMode::UNORDERED:
return std::make_shared<S3QueueUnorderedFileMetadata>(
case ObjectStorageQueueMode::UNORDERED:
return std::make_shared<ObjectStorageQueueUnorderedFileMetadata>(
zookeeper_path,
path,
file_status,
settings.s3queue_loading_retries,
settings.loading_retries,
log);
}
}
size_t S3QueueMetadata::getBucketsNum(const S3QueueSettings & settings)
{
if (settings.s3queue_buckets)
return settings.s3queue_buckets;
if (settings.s3queue_processing_threads_num)
return settings.s3queue_processing_threads_num;
return 0;
}
size_t S3QueueMetadata::getBucketsNum(const S3QueueTableMetadata & settings)
size_t ObjectStorageQueueMetadata::getBucketsNum(const ObjectStorageQueueSettings & settings)
{
if (settings.buckets)
return settings.buckets;
@ -209,32 +194,41 @@ size_t S3QueueMetadata::getBucketsNum(const S3QueueTableMetadata & settings)
return 0;
}
bool S3QueueMetadata::useBucketsForProcessing() const
size_t ObjectStorageQueueMetadata::getBucketsNum(const ObjectStorageQueueTableMetadata & settings)
{
return settings.mode == S3QueueMode::ORDERED && (buckets_num > 1);
if (settings.buckets)
return settings.buckets;
if (settings.processing_threads_num)
return settings.processing_threads_num;
return 0;
}
S3QueueMetadata::Bucket S3QueueMetadata::getBucketForPath(const std::string & path) const
bool ObjectStorageQueueMetadata::useBucketsForProcessing() const
{
return S3QueueOrderedFileMetadata::getBucketForPath(path, buckets_num);
return settings.mode == ObjectStorageQueueMode::ORDERED && (buckets_num > 1);
}
S3QueueOrderedFileMetadata::BucketHolderPtr
S3QueueMetadata::tryAcquireBucket(const Bucket & bucket, const Processor & processor)
ObjectStorageQueueMetadata::Bucket ObjectStorageQueueMetadata::getBucketForPath(const std::string & path) const
{
return S3QueueOrderedFileMetadata::tryAcquireBucket(zookeeper_path, bucket, processor, log);
return ObjectStorageQueueOrderedFileMetadata::getBucketForPath(path, buckets_num);
}
void S3QueueMetadata::initialize(
ObjectStorageQueueOrderedFileMetadata::BucketHolderPtr
ObjectStorageQueueMetadata::tryAcquireBucket(const Bucket & bucket, const Processor & processor)
{
return ObjectStorageQueueOrderedFileMetadata::tryAcquireBucket(zookeeper_path, bucket, processor, log);
}
void ObjectStorageQueueMetadata::initialize(
const ConfigurationPtr & configuration,
const StorageInMemoryMetadata & storage_metadata)
{
const auto metadata_from_table = S3QueueTableMetadata(*configuration, settings, storage_metadata);
const auto metadata_from_table = ObjectStorageQueueTableMetadata(*configuration, settings, storage_metadata);
const auto & columns_from_table = storage_metadata.getColumns();
const auto table_metadata_path = zookeeper_path / "metadata";
const auto metadata_paths = settings.mode == S3QueueMode::ORDERED
? S3QueueOrderedFileMetadata::getMetadataPaths(buckets_num)
: S3QueueUnorderedFileMetadata::getMetadataPaths();
const auto metadata_paths = settings.mode == ObjectStorageQueueMode::ORDERED
? ObjectStorageQueueOrderedFileMetadata::getMetadataPaths(buckets_num)
: ObjectStorageQueueUnorderedFileMetadata::getMetadataPaths();
auto zookeeper = getZooKeeper();
zookeeper->createAncestors(zookeeper_path);
@ -243,7 +237,7 @@ void S3QueueMetadata::initialize(
{
if (zookeeper->exists(table_metadata_path))
{
const auto metadata_from_zk = S3QueueTableMetadata::parse(zookeeper->get(fs::path(zookeeper_path) / "metadata"));
const auto metadata_from_zk = ObjectStorageQueueTableMetadata::parse(zookeeper->get(fs::path(zookeeper_path) / "metadata"));
const auto columns_from_zk = ColumnsDescription::parse(metadata_from_zk.columns);
metadata_from_table.checkEquals(metadata_from_zk);
@ -268,8 +262,8 @@ void S3QueueMetadata::initialize(
requests.emplace_back(zkutil::makeCreateRequest(zk_path, "", zkutil::CreateMode::Persistent));
}
if (!settings.s3queue_last_processed_path.value.empty())
getFileMetadata(settings.s3queue_last_processed_path)->setProcessedAtStartRequests(requests, zookeeper);
if (!settings.last_processed_path.value.empty())
getFileMetadata(settings.last_processed_path)->setProcessedAtStartRequests(requests, zookeeper);
Coordination::Responses responses;
auto code = zookeeper->tryMulti(requests, responses);
@ -293,10 +287,10 @@ void S3QueueMetadata::initialize(
"of wrong zookeeper path or because of logical error");
}
void S3QueueMetadata::cleanupThreadFunc()
void ObjectStorageQueueMetadata::cleanupThreadFunc()
{
/// A background task is responsible for maintaining
/// settings.s3queue_tracked_files_limit and max_set_age settings for `unordered` processing mode.
/// settings.tracked_files_limit and max_set_age settings for `unordered` processing mode.
if (shutdown_called)
return;
@ -315,12 +309,12 @@ void S3QueueMetadata::cleanupThreadFunc()
task->scheduleAfter(
generateRescheduleInterval(
settings.s3queue_cleanup_interval_min_ms, settings.s3queue_cleanup_interval_max_ms));
settings.cleanup_interval_min_ms, settings.cleanup_interval_max_ms));
}
void S3QueueMetadata::cleanupThreadFuncImpl()
void ObjectStorageQueueMetadata::cleanupThreadFuncImpl()
{
auto timer = DB::CurrentThread::getProfileEvents().timer(ProfileEvents::S3QueueCleanupMaxSetSizeOrTTLMicroseconds);
auto timer = DB::CurrentThread::getProfileEvents().timer(ProfileEvents::ObjectStorageQueueCleanupMaxSetSizeOrTTLMicroseconds);
const auto zk_client = getZooKeeper();
const fs::path zookeeper_processed_path = zookeeper_path / "processed";
const fs::path zookeeper_failed_path = zookeeper_path / "failed";
@ -358,11 +352,11 @@ void S3QueueMetadata::cleanupThreadFuncImpl()
return;
}
chassert(settings.s3queue_tracked_files_limit || settings.s3queue_tracked_file_ttl_sec);
const bool check_nodes_limit = settings.s3queue_tracked_files_limit > 0;
const bool check_nodes_ttl = settings.s3queue_tracked_file_ttl_sec > 0;
chassert(settings.tracked_files_limit || settings.tracked_file_ttl_sec);
const bool check_nodes_limit = settings.tracked_files_limit > 0;
const bool check_nodes_ttl = settings.tracked_file_ttl_sec > 0;
const bool nodes_limit_exceeded = nodes_num > settings.s3queue_tracked_files_limit;
const bool nodes_limit_exceeded = nodes_num > settings.tracked_files_limit;
if ((!nodes_limit_exceeded || !check_nodes_limit) && !check_nodes_ttl)
{
LOG_TEST(log, "No limit exceeded");
@ -384,7 +378,7 @@ void S3QueueMetadata::cleanupThreadFuncImpl()
struct Node
{
std::string zk_path;
S3QueueIFileMetadata::NodeMetadata metadata;
ObjectStorageQueueIFileMetadata::NodeMetadata metadata;
};
auto node_cmp = [](const Node & a, const Node & b)
{
@ -405,7 +399,7 @@ void S3QueueMetadata::cleanupThreadFuncImpl()
std::string metadata_str;
if (zk_client->tryGet(path, metadata_str))
{
sorted_nodes.emplace(path, S3QueueIFileMetadata::NodeMetadata::fromString(metadata_str));
sorted_nodes.emplace(path, ObjectStorageQueueIFileMetadata::NodeMetadata::fromString(metadata_str));
LOG_TEST(log, "Fetched metadata for node {}", path);
}
else
@ -435,9 +429,9 @@ void S3QueueMetadata::cleanupThreadFuncImpl()
wb << fmt::format("Node: {}, path: {}, timestamp: {};\n", node, metadata.file_path, metadata.last_processed_timestamp);
return wb.str();
};
LOG_TEST(log, "Checking node limits (max size: {}, max age: {}) for {}", settings.s3queue_tracked_files_limit, settings.s3queue_tracked_file_ttl_sec, get_nodes_str());
LOG_TEST(log, "Checking node limits (max size: {}, max age: {}) for {}", settings.tracked_files_limit, settings.tracked_file_ttl_sec, get_nodes_str());
size_t nodes_to_remove = check_nodes_limit && nodes_limit_exceeded ? nodes_num - settings.s3queue_tracked_files_limit : 0;
size_t nodes_to_remove = check_nodes_limit && nodes_limit_exceeded ? nodes_num - settings.tracked_files_limit : 0;
for (const auto & node : sorted_nodes)
{
if (nodes_to_remove)
@ -456,7 +450,7 @@ void S3QueueMetadata::cleanupThreadFuncImpl()
else if (check_nodes_ttl)
{
UInt64 node_age = getCurrentTime() - node.metadata.last_processed_timestamp;
if (node_age >= settings.s3queue_tracked_file_ttl_sec)
if (node_age >= settings.tracked_file_ttl_sec)
{
LOG_TRACE(log, "Removing node at path {} ({}) because file ttl is reached",
node.metadata.file_path, node.zk_path);

View File

@ -7,23 +7,23 @@
#include <Core/BackgroundSchedulePool.h>
#include <Common/ZooKeeper/ZooKeeper.h>
#include <Storages/ObjectStorage/StorageObjectStorage.h>
#include <Storages/S3Queue/S3QueueIFileMetadata.h>
#include <Storages/S3Queue/S3QueueOrderedFileMetadata.h>
#include <Storages/S3Queue/S3QueueSettings.h>
#include <Storages/ObjectStorageQueue/ObjectStorageQueueIFileMetadata.h>
#include <Storages/ObjectStorageQueue/ObjectStorageQueueOrderedFileMetadata.h>
#include <Storages/ObjectStorageQueue/ObjectStorageQueueSettings.h>
namespace fs = std::filesystem;
namespace Poco { class Logger; }
namespace DB
{
struct S3QueueSettings;
class StorageS3Queue;
struct S3QueueTableMetadata;
struct ObjectStorageQueueSettings;
class StorageObjectStorageQueue;
struct ObjectStorageQueueTableMetadata;
struct StorageInMemoryMetadata;
using ConfigurationPtr = StorageObjectStorage::ConfigurationPtr;
/**
* A class for managing S3Queue metadata in zookeeper, e.g.
* A class for managing ObjectStorageQueue metadata in zookeeper, e.g.
* the following folders:
* - <path_to_metadata>/processed
* - <path_to_metadata>/processing
@ -35,7 +35,7 @@ using ConfigurationPtr = StorageObjectStorage::ConfigurationPtr;
* - <path_to_metadata>/processing
* - <path_to_metadata>/failed
*
* Depending on S3Queue processing mode (ordered or unordered)
* Depending on ObjectStorageQueue processing mode (ordered or unordered)
* we can differently store metadata in /processed node.
*
* Implements caching of zookeeper metadata for faster responses.
@ -44,24 +44,24 @@ using ConfigurationPtr = StorageObjectStorage::ConfigurationPtr;
* In case of Unordered mode - if files TTL is enabled or maximum tracked files limit is set
* starts a background cleanup thread which is responsible for maintaining them.
*/
class S3QueueMetadata
class ObjectStorageQueueMetadata
{
public:
using FileStatus = S3QueueIFileMetadata::FileStatus;
using FileMetadataPtr = std::shared_ptr<S3QueueIFileMetadata>;
using FileStatus = ObjectStorageQueueIFileMetadata::FileStatus;
using FileMetadataPtr = std::shared_ptr<ObjectStorageQueueIFileMetadata>;
using FileStatusPtr = std::shared_ptr<FileStatus>;
using FileStatuses = std::unordered_map<std::string, FileStatusPtr>;
using Bucket = size_t;
using Processor = std::string;
S3QueueMetadata(const fs::path & zookeeper_path_, const S3QueueSettings & settings_);
~S3QueueMetadata();
ObjectStorageQueueMetadata(const fs::path & zookeeper_path_, const ObjectStorageQueueSettings & settings_);
~ObjectStorageQueueMetadata();
void initialize(const ConfigurationPtr & configuration, const StorageInMemoryMetadata & storage_metadata);
void checkSettings(const S3QueueSettings & settings) const;
void checkSettings(const ObjectStorageQueueSettings & settings) const;
void shutdown();
FileMetadataPtr getFileMetadata(const std::string & path, S3QueueOrderedFileMetadata::BucketInfoPtr bucket_info = {});
FileMetadataPtr getFileMetadata(const std::string & path, ObjectStorageQueueOrderedFileMetadata::BucketInfoPtr bucket_info = {});
FileStatusPtr getFileStatus(const std::string & path);
FileStatuses getFileStatuses() const;
@ -69,16 +69,16 @@ public:
/// Method of Ordered mode parallel processing.
bool useBucketsForProcessing() const;
Bucket getBucketForPath(const std::string & path) const;
S3QueueOrderedFileMetadata::BucketHolderPtr tryAcquireBucket(const Bucket & bucket, const Processor & processor);
ObjectStorageQueueOrderedFileMetadata::BucketHolderPtr tryAcquireBucket(const Bucket & bucket, const Processor & processor);
static size_t getBucketsNum(const S3QueueSettings & settings);
static size_t getBucketsNum(const S3QueueTableMetadata & settings);
static size_t getBucketsNum(const ObjectStorageQueueSettings & settings);
static size_t getBucketsNum(const ObjectStorageQueueTableMetadata & settings);
private:
void cleanupThreadFunc();
void cleanupThreadFuncImpl();
const S3QueueSettings settings;
const ObjectStorageQueueSettings settings;
const fs::path zookeeper_path;
const size_t buckets_num;

View File

@ -1,4 +1,4 @@
#include <Storages/S3Queue/S3QueueMetadataFactory.h>
#include <Storages/ObjectStorageQueue/ObjectStorageQueueMetadataFactory.h>
#include <Interpreters/Context.h>
namespace DB
@ -8,20 +8,20 @@ namespace ErrorCodes
extern const int BAD_ARGUMENTS;
}
S3QueueMetadataFactory & S3QueueMetadataFactory::instance()
ObjectStorageQueueMetadataFactory & ObjectStorageQueueMetadataFactory::instance()
{
static S3QueueMetadataFactory ret;
static ObjectStorageQueueMetadataFactory ret;
return ret;
}
S3QueueMetadataFactory::FilesMetadataPtr
S3QueueMetadataFactory::getOrCreate(const std::string & zookeeper_path, const S3QueueSettings & settings)
ObjectStorageQueueMetadataFactory::FilesMetadataPtr
ObjectStorageQueueMetadataFactory::getOrCreate(const std::string & zookeeper_path, const ObjectStorageQueueSettings & settings)
{
std::lock_guard lock(mutex);
auto it = metadata_by_path.find(zookeeper_path);
if (it == metadata_by_path.end())
{
auto files_metadata = std::make_shared<S3QueueMetadata>(zookeeper_path, settings);
auto files_metadata = std::make_shared<ObjectStorageQueueMetadata>(zookeeper_path, settings);
it = metadata_by_path.emplace(zookeeper_path, std::move(files_metadata)).first;
}
else
@ -32,7 +32,7 @@ S3QueueMetadataFactory::getOrCreate(const std::string & zookeeper_path, const S3
return it->second.metadata;
}
void S3QueueMetadataFactory::remove(const std::string & zookeeper_path)
void ObjectStorageQueueMetadataFactory::remove(const std::string & zookeeper_path)
{
std::lock_guard lock(mutex);
auto it = metadata_by_path.find(zookeeper_path);
@ -57,9 +57,9 @@ void S3QueueMetadataFactory::remove(const std::string & zookeeper_path)
}
}
std::unordered_map<std::string, S3QueueMetadataFactory::FilesMetadataPtr> S3QueueMetadataFactory::getAll()
std::unordered_map<std::string, ObjectStorageQueueMetadataFactory::FilesMetadataPtr> ObjectStorageQueueMetadataFactory::getAll()
{
std::unordered_map<std::string, S3QueueMetadataFactory::FilesMetadataPtr> result;
std::unordered_map<std::string, ObjectStorageQueueMetadataFactory::FilesMetadataPtr> result;
for (const auto & [zk_path, metadata_and_ref_count] : metadata_by_path)
result.emplace(zk_path, metadata_and_ref_count.metadata);
return result;

View File

@ -0,0 +1,37 @@
#pragma once
#include <boost/noncopyable.hpp>
#include <Storages/ObjectStorageQueue/ObjectStorageQueueSettings.h>
#include <Storages/ObjectStorageQueue/ObjectStorageQueueMetadata.h>
namespace DB
{
class ObjectStorageQueueMetadataFactory final : private boost::noncopyable
{
public:
using FilesMetadataPtr = std::shared_ptr<ObjectStorageQueueMetadata>;
static ObjectStorageQueueMetadataFactory & instance();
FilesMetadataPtr getOrCreate(const std::string & zookeeper_path, const ObjectStorageQueueSettings & settings);
void remove(const std::string & zookeeper_path);
std::unordered_map<std::string, FilesMetadataPtr> getAll();
private:
struct Metadata
{
explicit Metadata(std::shared_ptr<ObjectStorageQueueMetadata> metadata_) : metadata(metadata_), ref_count(1) {}
std::shared_ptr<ObjectStorageQueueMetadata> metadata;
/// TODO: the ref count should be kept in keeper, because of the case with distributed processing.
size_t ref_count = 0;
};
using MetadataByPath = std::unordered_map<std::string, Metadata>;
MetadataByPath metadata_by_path;
std::mutex mutex;
};
}

View File

@ -1,4 +1,4 @@
#include <Storages/S3Queue/S3QueueOrderedFileMetadata.h>
#include <Storages/ObjectStorageQueue/ObjectStorageQueueOrderedFileMetadata.h>
#include <Common/SipHash.h>
#include <Common/getRandomASCIIString.h>
#include <Common/logger_useful.h>
@ -16,7 +16,7 @@ namespace ErrorCodes
namespace
{
S3QueueOrderedFileMetadata::Bucket getBucketForPathImpl(const std::string & path, size_t buckets_num)
ObjectStorageQueueOrderedFileMetadata::Bucket getBucketForPathImpl(const std::string & path, size_t buckets_num)
{
return sipHash64(path) % buckets_num;
}
@ -40,7 +40,7 @@ namespace
}
}
S3QueueOrderedFileMetadata::BucketHolder::BucketHolder(
ObjectStorageQueueOrderedFileMetadata::BucketHolder::BucketHolder(
const Bucket & bucket_,
int bucket_version_,
const std::string & bucket_lock_path_,
@ -57,7 +57,7 @@ S3QueueOrderedFileMetadata::BucketHolder::BucketHolder(
{
}
void S3QueueOrderedFileMetadata::BucketHolder::release()
void ObjectStorageQueueOrderedFileMetadata::BucketHolder::release()
{
if (released)
return;
@ -89,7 +89,7 @@ void S3QueueOrderedFileMetadata::BucketHolder::release()
zkutil::KeeperMultiException::check(code, requests, responses);
}
S3QueueOrderedFileMetadata::BucketHolder::~BucketHolder()
ObjectStorageQueueOrderedFileMetadata::BucketHolder::~BucketHolder()
{
if (!released)
LOG_TEST(log, "Releasing bucket ({}) holder in destructor", bucket_info->bucket);
@ -104,7 +104,7 @@ S3QueueOrderedFileMetadata::BucketHolder::~BucketHolder()
}
}
S3QueueOrderedFileMetadata::S3QueueOrderedFileMetadata(
ObjectStorageQueueOrderedFileMetadata::ObjectStorageQueueOrderedFileMetadata(
const std::filesystem::path & zk_path_,
const std::string & path_,
FileStatusPtr file_status_,
@ -112,7 +112,7 @@ S3QueueOrderedFileMetadata::S3QueueOrderedFileMetadata(
size_t buckets_num_,
size_t max_loading_retries_,
LoggerPtr log_)
: S3QueueIFileMetadata(
: ObjectStorageQueueIFileMetadata(
path_,
/* processing_node_path */zk_path_ / "processing" / getNodeName(path_),
/* processed_node_path */getProcessedPath(zk_path_, path_, buckets_num_),
@ -126,7 +126,7 @@ S3QueueOrderedFileMetadata::S3QueueOrderedFileMetadata(
{
}
std::vector<std::string> S3QueueOrderedFileMetadata::getMetadataPaths(size_t buckets_num)
std::vector<std::string> ObjectStorageQueueOrderedFileMetadata::getMetadataPaths(size_t buckets_num)
{
if (buckets_num > 1)
{
@ -139,7 +139,7 @@ std::vector<std::string> S3QueueOrderedFileMetadata::getMetadataPaths(size_t buc
return {"failed", "processing"};
}
bool S3QueueOrderedFileMetadata::getMaxProcessedFile(
bool ObjectStorageQueueOrderedFileMetadata::getMaxProcessedFile(
NodeMetadata & result,
Coordination::Stat * stat,
const zkutil::ZooKeeperPtr & zk_client)
@ -147,7 +147,7 @@ bool S3QueueOrderedFileMetadata::getMaxProcessedFile(
return getMaxProcessedFile(result, stat, processed_node_path, zk_client);
}
bool S3QueueOrderedFileMetadata::getMaxProcessedFile(
bool ObjectStorageQueueOrderedFileMetadata::getMaxProcessedFile(
NodeMetadata & result,
Coordination::Stat * stat,
const std::string & processed_node_path_,
@ -163,12 +163,12 @@ bool S3QueueOrderedFileMetadata::getMaxProcessedFile(
return false;
}
S3QueueOrderedFileMetadata::Bucket S3QueueOrderedFileMetadata::getBucketForPath(const std::string & path_, size_t buckets_num)
ObjectStorageQueueOrderedFileMetadata::Bucket ObjectStorageQueueOrderedFileMetadata::getBucketForPath(const std::string & path_, size_t buckets_num)
{
return getBucketForPathImpl(path_, buckets_num);
}
S3QueueOrderedFileMetadata::BucketHolderPtr S3QueueOrderedFileMetadata::tryAcquireBucket(
ObjectStorageQueueOrderedFileMetadata::BucketHolderPtr ObjectStorageQueueOrderedFileMetadata::tryAcquireBucket(
const std::filesystem::path & zk_path,
const Bucket & bucket,
const Processor & processor,
@ -190,7 +190,7 @@ S3QueueOrderedFileMetadata::BucketHolderPtr S3QueueOrderedFileMetadata::tryAcqui
bucket_lock_id_path, processor_info, zkutil::CreateMode::Persistent, /* ignore_if_exists */true));
/// Update bucket lock id path. We use its version as a version of ephemeral bucket lock node.
/// (See comment near S3QueueIFileMetadata::processing_node_version).
/// (See comment near ObjectStorageQueueIFileMetadata::processing_node_version).
requests.push_back(zkutil::makeSetRequest(bucket_lock_id_path, processor_info, -1));
Coordination::Responses responses;
@ -223,7 +223,7 @@ S3QueueOrderedFileMetadata::BucketHolderPtr S3QueueOrderedFileMetadata::tryAcqui
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected error: {}", code);
}
std::pair<bool, S3QueueIFileMetadata::FileStatus::State> S3QueueOrderedFileMetadata::setProcessingImpl()
std::pair<bool, ObjectStorageQueueIFileMetadata::FileStatus::State> ObjectStorageQueueOrderedFileMetadata::setProcessingImpl()
{
/// In one zookeeper transaction do the following:
enum RequestType
@ -319,7 +319,7 @@ std::pair<bool, S3QueueIFileMetadata::FileStatus::State> S3QueueOrderedFileMetad
}
}
void S3QueueOrderedFileMetadata::setProcessedAtStartRequests(
void ObjectStorageQueueOrderedFileMetadata::setProcessedAtStartRequests(
Coordination::Requests & requests,
const zkutil::ZooKeeperPtr & zk_client)
{
@ -337,7 +337,7 @@ void S3QueueOrderedFileMetadata::setProcessedAtStartRequests(
}
}
void S3QueueOrderedFileMetadata::setProcessedRequests(
void ObjectStorageQueueOrderedFileMetadata::setProcessedRequests(
Coordination::Requests & requests,
const zkutil::ZooKeeperPtr & zk_client,
const std::string & processed_node_path_,
@ -378,7 +378,7 @@ void S3QueueOrderedFileMetadata::setProcessedRequests(
}
}
void S3QueueOrderedFileMetadata::setProcessedImpl()
void ObjectStorageQueueOrderedFileMetadata::setProcessedImpl()
{
/// In one zookeeper transaction do the following:
enum RequestType

View File

@ -1,5 +1,5 @@
#pragma once
#include <Storages/S3Queue/S3QueueIFileMetadata.h>
#include <Storages/ObjectStorageQueue/ObjectStorageQueueIFileMetadata.h>
#include <Common/logger_useful.h>
#include <Common/ZooKeeper/ZooKeeper.h>
#include <filesystem>
@ -7,7 +7,7 @@
namespace DB
{
class S3QueueOrderedFileMetadata : public S3QueueIFileMetadata
class ObjectStorageQueueOrderedFileMetadata : public ObjectStorageQueueIFileMetadata
{
public:
using Processor = std::string;
@ -21,7 +21,7 @@ public:
};
using BucketInfoPtr = std::shared_ptr<const BucketInfo>;
explicit S3QueueOrderedFileMetadata(
explicit ObjectStorageQueueOrderedFileMetadata(
const std::filesystem::path & zk_path_,
const std::string & path_,
FileStatusPtr file_status_,
@ -39,7 +39,7 @@ public:
const Processor & processor,
LoggerPtr log_);
static S3QueueOrderedFileMetadata::Bucket getBucketForPath(const std::string & path, size_t buckets_num);
static ObjectStorageQueueOrderedFileMetadata::Bucket getBucketForPath(const std::string & path, size_t buckets_num);
static std::vector<std::string> getMetadataPaths(size_t buckets_num);
@ -73,7 +73,7 @@ private:
bool ignore_if_exists);
};
struct S3QueueOrderedFileMetadata::BucketHolder : private boost::noncopyable
struct ObjectStorageQueueOrderedFileMetadata::BucketHolder : private boost::noncopyable
{
BucketHolder(
const Bucket & bucket_,

View File

@ -1,4 +1,4 @@
#include <Storages/S3Queue/S3QueueSettings.h>
#include <Storages/ObjectStorageQueue/ObjectStorageQueueSettings.h>
#include <Common/Exception.h>
#include <Parsers/ASTCreateQuery.h>
#include <Parsers/ASTFunction.h>
@ -13,14 +13,23 @@ namespace ErrorCodes
extern const int UNKNOWN_SETTING;
}
IMPLEMENT_SETTINGS_TRAITS(S3QueueSettingsTraits, LIST_OF_S3QUEUE_SETTINGS)
IMPLEMENT_SETTINGS_TRAITS(ObjectStorageQueueSettingsTraits, LIST_OF_OBJECT_STORAGE_QUEUE_SETTINGS)
void S3QueueSettings::loadFromQuery(ASTStorage & storage_def)
void ObjectStorageQueueSettings::loadFromQuery(ASTStorage & storage_def)
{
if (storage_def.settings)
{
try
{
/// We support settings starting with s3_ for compatibility.
for (auto & change : storage_def.settings->changes)
{
if (change.name.starts_with("s3queue_"))
change.name = change.name.substr(std::strlen("s3queue_"));
if (change.name == "enable_logging_to_s3queue_log")
change.name = "enable_logging_to_queue_log";
}
applyChanges(storage_def.settings->changes);
}
catch (Exception & e)

View File

@ -0,0 +1,51 @@
#pragma once
#include <Core/BaseSettings.h>
#include <Core/Settings.h>
#include <Core/SettingsEnums.h>
namespace DB
{
class ASTStorage;
#define OBJECT_STORAGE_QUEUE_RELATED_SETTINGS(M, ALIAS) \
M(ObjectStorageQueueMode, \
mode, \
ObjectStorageQueueMode::ORDERED, \
"With unordered mode, the set of all already processed files is tracked with persistent nodes in ZooKepeer." \
"With ordered mode, only the max name of the successfully consumed file stored.", \
0) \
M(ObjectStorageQueueAction, after_processing, ObjectStorageQueueAction::KEEP, "Delete or keep file in after successful processing", 0) \
M(String, keeper_path, "", "Zookeeper node path", 0) \
M(UInt32, loading_retries, 10, "Retry loading up to specified number of times", 0) \
M(UInt32, processing_threads_num, 1, "Number of processing threads", 0) \
M(UInt32, enable_logging_to_queue_log, 1, "Enable logging to system table system.(s3/azure_)queue_log", 0) \
M(String, last_processed_path, "", "For Ordered mode. Files that have lexicographically smaller file name are considered already processed", 0) \
M(UInt32, tracked_file_ttl_sec, 0, "Maximum number of seconds to store processed files in ZooKeeper node (store forever by default)", 0) \
M(UInt32, polling_min_timeout_ms, 1000, "Minimal timeout before next polling", 0) \
M(UInt32, polling_max_timeout_ms, 10000, "Maximum timeout before next polling", 0) \
M(UInt32, polling_backoff_ms, 1000, "Polling backoff", 0) \
M(UInt32, tracked_files_limit, 1000, "For unordered mode. Max set size for tracking processed files in ZooKeeper", 0) \
M(UInt32, cleanup_interval_min_ms, 60000, "For unordered mode. Polling backoff min for cleanup", 0) \
M(UInt32, cleanup_interval_max_ms, 60000, "For unordered mode. Polling backoff max for cleanup", 0) \
M(UInt32, buckets, 0, "Number of buckets for Ordered mode parallel processing", 0) \
M(UInt32, max_processed_files_before_commit, 100, "Number of files which can be processed before being committed to keeper", 0) \
M(UInt32, max_processed_rows_before_commit, 0, "Number of rows which can be processed before being committed to keeper", 0) \
M(UInt32, max_processed_bytes_before_commit, 0, "Number of bytes which can be processed before being committed to keeper", 0) \
M(UInt32, max_processing_time_sec_before_commit, 0, "Timeout in seconds after which to commit files committed to keeper", 0) \
#define LIST_OF_OBJECT_STORAGE_QUEUE_SETTINGS(M, ALIAS) \
OBJECT_STORAGE_QUEUE_RELATED_SETTINGS(M, ALIAS) \
LIST_OF_ALL_FORMAT_SETTINGS(M, ALIAS)
DECLARE_SETTINGS_TRAITS(ObjectStorageQueueSettingsTraits, LIST_OF_OBJECT_STORAGE_QUEUE_SETTINGS)
struct ObjectStorageQueueSettings : public BaseSettings<ObjectStorageQueueSettingsTraits>
{
void loadFromQuery(ASTStorage & storage_def);
};
}

Some files were not shown because too many files have changed in this diff Show More