Merge remote-tracking branch 'origin/master' into pr-local-plan

This commit is contained in:
Igor Nikonov 2024-07-29 20:00:55 +00:00
commit cf27f2665e
677 changed files with 9424 additions and 5741 deletions

View File

@ -9,19 +9,64 @@ on: # yamllint disable-line rule:truthy
- cron: '0 */6 * * *'
workflow_dispatch:
jobs:
RunConfig:
runs-on: [self-hosted, style-checker-aarch64]
outputs:
data: ${{ steps.runconfig.outputs.CI_DATA }}
steps:
- name: DebugInfo
uses: hmarr/debug-action@f7318c783045ac39ed9bb497e22ce835fdafbfe6
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true # to ensure correct digests
fetch-depth: 0 # to get version
filter: tree:0
- name: PrepareRunConfig
id: runconfig
run: |
echo "::group::configure CI run"
python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --configure --workflow "$GITHUB_WORKFLOW" --outfile ${{ runner.temp }}/ci_run_data.json
echo "::endgroup::"
echo "::group::CI run configure results"
python3 -m json.tool ${{ runner.temp }}/ci_run_data.json
echo "::endgroup::"
{
echo 'CI_DATA<<EOF'
cat ${{ runner.temp }}/ci_run_data.json
echo 'EOF'
} >> "$GITHUB_OUTPUT"
KeeperJepsenRelease:
uses: ./.github/workflows/reusable_simple_job.yml
needs: [RunConfig]
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Jepsen keeper check
runner_type: style-checker
report_required: true
test_name: ClickHouse Keeper Jepsen
runner_type: style-checker-aarch64
data: ${{ needs.RunConfig.outputs.data }}
run_command: |
python3 jepsen_check.py keeper
# ServerJepsenRelease:
# uses: ./.github/workflows/reusable_simple_job.yml
# with:
# test_name: Jepsen server check
# runner_type: style-checker
# run_command: |
# cd "$REPO_COPY/tests/ci"
# python3 jepsen_check.py server
ServerJepsenRelease:
if: false # skip for server
needs: [RunConfig]
uses: ./.github/workflows/reusable_test.yml
with:
test_name: ClickHouse Server Jepsen
runner_type: style-checker-aarch64
data: ${{ needs.RunConfig.outputs.data }}
run_command: |
python3 jepsen_check.py server
CheckWorkflow:
if: ${{ !cancelled() }}
needs: [RunConfig, ServerJepsenRelease, KeeperJepsenRelease]
runs-on: [self-hosted, style-checker-aarch64]
steps:
- name: Check out repository code
uses: ClickHouse/checkout@v1
- name: Check Workflow results
run: |
export WORKFLOW_RESULT_FILE="/tmp/workflow_results.json"
cat >> "$WORKFLOW_RESULT_FILE" << 'EOF'
${{ toJson(needs) }}
EOF
python3 ./tests/ci/ci_buddy.py --check-wf-status

View File

@ -93,21 +93,21 @@ jobs:
with:
stage: Builds_2
data: ${{ needs.RunConfig.outputs.data }}
Tests_2:
Tests_2_ww:
needs: [RunConfig, Builds_2]
if: ${{ !failure() && !cancelled() && contains(fromJson(needs.RunConfig.outputs.data).stages_data.stages_to_do, 'Tests_2_ww') }}
uses: ./.github/workflows/reusable_test_stage.yml
with:
stage: Tests_2_ww
data: ${{ needs.RunConfig.outputs.data }}
Tests_2:
# Test_3 should not wait for Test_1/Test_2 and should not be blocked by them on master branch since all jobs need to run there.
needs: [RunConfig, Builds_1]
if: ${{ !failure() && !cancelled() && contains(fromJson(needs.RunConfig.outputs.data).stages_data.stages_to_do, 'Tests_2') }}
uses: ./.github/workflows/reusable_test_stage.yml
with:
stage: Tests_2
data: ${{ needs.RunConfig.outputs.data }}
Tests_3:
# Test_3 should not wait for Test_1/Test_2 and should not be blocked by them on master branch since all jobs need to run there.
needs: [RunConfig, Builds_1]
if: ${{ !failure() && !cancelled() && contains(fromJson(needs.RunConfig.outputs.data).stages_data.stages_to_do, 'Tests_3') }}
uses: ./.github/workflows/reusable_test_stage.yml
with:
stage: Tests_3
data: ${{ needs.RunConfig.outputs.data }}
################################# Reports #################################
# Reports should run even if Builds_1/2 fail - run them separately, not in Tests_1/2/3
@ -123,7 +123,7 @@ jobs:
FinishCheck:
if: ${{ !cancelled() }}
needs: [RunConfig, Builds_1, Builds_2, Builds_Report, Tests_1, Tests_2, Tests_3]
needs: [RunConfig, Builds_1, Builds_2, Builds_Report, Tests_1, Tests_2_ww, Tests_2]
runs-on: [self-hosted, style-checker-aarch64]
steps:
- name: Check out repository code
@ -133,6 +133,7 @@ jobs:
cd "$GITHUB_WORKSPACE/tests/ci"
python3 finish_check.py --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }}
- name: Check Workflow results
if: ${{ !cancelled() }}
run: |
export WORKFLOW_RESULT_FILE="/tmp/workflow_results.json"
cat > "$WORKFLOW_RESULT_FILE" << 'EOF'

View File

@ -123,20 +123,20 @@ jobs:
stage: Builds_2
data: ${{ needs.RunConfig.outputs.data }}
# stage for running non-required checks without being blocked by required checks (Test_1) if corresponding settings is selected
Tests_2:
Tests_2_ww:
needs: [RunConfig, Builds_1]
if: ${{ !failure() && !cancelled() && contains(fromJson(needs.RunConfig.outputs.data).stages_data.stages_to_do, 'Tests_2_ww') }}
uses: ./.github/workflows/reusable_test_stage.yml
with:
stage: Tests_2_ww
data: ${{ needs.RunConfig.outputs.data }}
Tests_2:
needs: [RunConfig, Builds_1, Tests_1]
if: ${{ !failure() && !cancelled() && contains(fromJson(needs.RunConfig.outputs.data).stages_data.stages_to_do, 'Tests_2') }}
uses: ./.github/workflows/reusable_test_stage.yml
with:
stage: Tests_2
data: ${{ needs.RunConfig.outputs.data }}
Tests_3:
needs: [RunConfig, Builds_1, Tests_1]
if: ${{ !failure() && !cancelled() && contains(fromJson(needs.RunConfig.outputs.data).stages_data.stages_to_do, 'Tests_3') }}
uses: ./.github/workflows/reusable_test_stage.yml
with:
stage: Tests_3
data: ${{ needs.RunConfig.outputs.data }}
################################# Reports #################################
# Reports should run even if Builds_1/2 fail - run them separately (not in Tests_1/2/3)
@ -154,7 +154,7 @@ jobs:
if: ${{ !cancelled() }}
# Test_2 or Test_3 do not have the jobs required for Mergeable check,
# however, set them as "needs" to get all checks results before the automatic merge occurs.
needs: [RunConfig, BuildDockers, StyleCheck, FastTest, Builds_1, Builds_2, Builds_Report, Tests_1, Tests_2, Tests_3]
needs: [RunConfig, BuildDockers, StyleCheck, FastTest, Builds_1, Builds_2, Builds_Report, Tests_1, Tests_2_ww, Tests_2]
runs-on: [self-hosted, style-checker-aarch64]
steps:
- name: Check out repository code
@ -178,7 +178,7 @@ jobs:
#
FinishCheck:
if: ${{ !failure() && !cancelled() }}
needs: [RunConfig, BuildDockers, StyleCheck, FastTest, Builds_1, Builds_2, Builds_Report, Tests_1, Tests_2, Tests_3]
needs: [RunConfig, BuildDockers, StyleCheck, FastTest, Builds_1, Builds_2, Builds_Report, Tests_1, Tests_2_ww, Tests_2]
runs-on: [self-hosted, style-checker-aarch64]
steps:
- name: Check out repository code

View File

@ -1,4 +1,5 @@
### Table of Contents
**[ClickHouse release v24.7, 2024-07-30](#247)**<br/>
**[ClickHouse release v24.6, 2024-07-01](#246)**<br/>
**[ClickHouse release v24.5, 2024-05-30](#245)**<br/>
**[ClickHouse release v24.4, 2024-04-30](#244)**<br/>
@ -9,6 +10,178 @@
# 2024 Changelog
### <a id="247"></a> ClickHouse release 24.7, 2024-07-30
#### Backward Incompatible Change
* Forbid `CRATE MATERIALIZED VIEW ... ENGINE Replicated*MergeTree POPULATE AS SELECT ...` with Replicated databases. [#63963](https://github.com/ClickHouse/ClickHouse/pull/63963) ([vdimir](https://github.com/vdimir)).
* `clickhouse-keeper-client` will only accept paths in string literals, such as `ls '/hello/world'`, not bare strings such as `ls /hello/world`. [#65494](https://github.com/ClickHouse/ClickHouse/pull/65494) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Metric `KeeperOutstandingRequets` was renamed to `KeeperOutstandingRequests`. [#66206](https://github.com/ClickHouse/ClickHouse/pull/66206) ([Robert Schulze](https://github.com/rschu1ze)).
* Remove `is_deterministic` field from the `system.functions` table. [#66630](https://github.com/ClickHouse/ClickHouse/pull/66630) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Function `tuple` will now try to construct named tuples in query (controlled by `enable_named_columns_in_function_tuple`). Introduce function `tupleNames` to extract names from tuples. [#54881](https://github.com/ClickHouse/ClickHouse/pull/54881) ([Amos Bird](https://github.com/amosbird)).
#### New Feature
* Add `ASOF JOIN` support for `full_sorting_join` algorithm. [#55051](https://github.com/ClickHouse/ClickHouse/pull/55051) ([vdimir](https://github.com/vdimir)).
* Add new window function `percent_rank`. [#62747](https://github.com/ClickHouse/ClickHouse/pull/62747) ([lgbo](https://github.com/lgbo-ustc)).
* Support JWT authentication in `clickhouse-client` (will be available only in ClickHouse Cloud). [#62829](https://github.com/ClickHouse/ClickHouse/pull/62829) ([Konstantin Bogdanov](https://github.com/thevar1able)).
* Add SQL functions `changeYear`, `changeMonth`, `changeDay`, `changeHour`, `changeMinute`, `changeSecond`. For example, `SELECT changeMonth(toDate('2024-06-14'), 7)` returns date `2024-07-14`. [#63186](https://github.com/ClickHouse/ClickHouse/pull/63186) ([cucumber95](https://github.com/cucumber95)).
* Introduce startup scripts, which allow the execution of preconfigured queries at the startup stage. [#64889](https://github.com/ClickHouse/ClickHouse/pull/64889) ([pufit](https://github.com/pufit)).
* Support accept_invalid_certificate in client's config in order to allow for client to connect over secure TCP to a server running with self-signed certificate - can be used as a shorthand for corresponding `openSSL` client settings `verificationMode=none` + `invalidCertificateHandler.name=AcceptCertificateHandler`. [#65238](https://github.com/ClickHouse/ClickHouse/pull/65238) ([peacewalker122](https://github.com/peacewalker122)).
* Add system.error_log which contains history of error values from table system.errors, periodically flushed to disk. [#65381](https://github.com/ClickHouse/ClickHouse/pull/65381) ([Pablo Marcos](https://github.com/pamarcos)).
* Add aggregate function `groupConcat`. About the same as `arrayStringConcat( groupArray(column), ',')` Can receive 2 parameters: a string delimiter and the number of elements to be processed. [#65451](https://github.com/ClickHouse/ClickHouse/pull/65451) ([Yarik Briukhovetskyi](https://github.com/yariks5s)).
* Add AzureQueue storage. [#65458](https://github.com/ClickHouse/ClickHouse/pull/65458) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Add a new setting to disable/enable writing page index into parquet files. [#65475](https://github.com/ClickHouse/ClickHouse/pull/65475) ([lgbo](https://github.com/lgbo-ustc)).
* Introduce `logger.console_log_level` server config to control the log level to the console (if enabled). [#65559](https://github.com/ClickHouse/ClickHouse/pull/65559) ([Azat Khuzhin](https://github.com/azat)).
* Automatically append a wildcard `*` to the end of a directory path with table function `file`. [#66019](https://github.com/ClickHouse/ClickHouse/pull/66019) ([Zhidong (David) Guo](https://github.com/Gun9niR)).
* Add `--memory-usage` option to client in non interactive mode. [#66393](https://github.com/ClickHouse/ClickHouse/pull/66393) ([vdimir](https://github.com/vdimir)).
* Make an interactive client for clickhouse-disks, add local disk from the local directory. [#64446](https://github.com/ClickHouse/ClickHouse/pull/64446) ([Daniil Ivanik](https://github.com/divanik)).
* When lightweight delete happens on a table with projection(s), users have choices either throw an exception (by default) or drop the projection [#65594](https://github.com/ClickHouse/ClickHouse/pull/65594) ([jsc0218](https://github.com/jsc0218)).
#### Experimental Feature
* Change binary serialization of Variant data type: add `compact` mode to avoid writing the same discriminator multiple times for granules with single variant or with only NULL values. Add MergeTree setting `use_compact_variant_discriminators_serialization` that is enabled by default. Note that Variant type is still experimental and backward-incompatible change in serialization is ok. [#62774](https://github.com/ClickHouse/ClickHouse/pull/62774) ([Kruglov Pavel](https://github.com/Avogar)).
* Support rocksdb as backend storage of keeper. [#56626](https://github.com/ClickHouse/ClickHouse/pull/56626) ([Han Fei](https://github.com/hanfei1991)).
* Refactor JSONExtract functions, support more types including experimental Dynamic type. [#66046](https://github.com/ClickHouse/ClickHouse/pull/66046) ([Kruglov Pavel](https://github.com/Avogar)).
* Support null map subcolumn for Variant and Dynamic subcolumns. [#66178](https://github.com/ClickHouse/ClickHouse/pull/66178) ([Kruglov Pavel](https://github.com/Avogar)).
* Fix reading dynamic subcolumns from altered Memory table. Previously if `max_types` parameter of a Dynamic type was changed in Memory table via alter, further subcolumns reading can return wrong result. [#66066](https://github.com/ClickHouse/ClickHouse/pull/66066) ([Kruglov Pavel](https://github.com/Avogar)).
* Add support for `cluster_for_parallel_replicas` when using custom key parallel replicas. It allows you to use parallel replicas with custom key with MergeTree tables. [#65453](https://github.com/ClickHouse/ClickHouse/pull/65453) ([Antonio Andelic](https://github.com/antonio2368)).
#### Performance Improvement
* Enable `optimize_functions_to_subcolumns` by default. [#58661](https://github.com/ClickHouse/ClickHouse/pull/58661) ([Anton Popov](https://github.com/CurtizJ)).
* Replace int to string algorithm with a faster one (from a modified amdn/itoa to a modified jeaiii/itoa). [#61661](https://github.com/ClickHouse/ClickHouse/pull/61661) ([Raúl Marín](https://github.com/Algunenano)).
* Sizes of hash tables created by join (`parallel_hash` algorithm) is collected and cached now. This information will be used to preallocate space in hash tables for subsequent query executions and save time on hash table resizes. [#64553](https://github.com/ClickHouse/ClickHouse/pull/64553) ([Nikita Taranov](https://github.com/nickitat)).
* Optimized queries with `ORDER BY` primary key and `WHERE` that have a condition with high selectivity by using of buffering. It is controlled by setting `read_in_order_use_buffering` (enabled by default) and can increase memory usage of query. [#64607](https://github.com/ClickHouse/ClickHouse/pull/64607) ([Anton Popov](https://github.com/CurtizJ)).
* Improve performance of loading `plain_rewritable` metadata. [#65634](https://github.com/ClickHouse/ClickHouse/pull/65634) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Attaching tables on read-only disks will use fewer resources by not loading outdated parts. [#65635](https://github.com/ClickHouse/ClickHouse/pull/65635) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Support minmax hyperrectangle for Set indices. [#65676](https://github.com/ClickHouse/ClickHouse/pull/65676) ([AntiTopQuark](https://github.com/AntiTopQuark)).
* Unload primary index of outdated parts to reduce total memory usage. [#65852](https://github.com/ClickHouse/ClickHouse/pull/65852) ([Anton Popov](https://github.com/CurtizJ)).
* Functions `replaceRegexpAll` and `replaceRegexpOne` are now significantly faster if the pattern is trivial, i.e. contains no metacharacters, pattern classes, flags, grouping characters etc. (Thanks to Taiyang Li). [#66185](https://github.com/ClickHouse/ClickHouse/pull/66185) ([Robert Schulze](https://github.com/rschu1ze)).
* s3 requests: Reduce retry time for queries, increase retries count for backups. 8.5 minutes and 100 retires for queries, 1.2 hours and 1000 retries for backup restore. [#65232](https://github.com/ClickHouse/ClickHouse/pull/65232) ([Sema Checherinda](https://github.com/CheSema)).
* Support query plan LIMIT optimization. Support LIMIT pushdown for PostgreSQL storage and table function. [#65454](https://github.com/ClickHouse/ClickHouse/pull/65454) ([Maksim Kita](https://github.com/kitaisreal)).
* Improved ZooKeeper load balancing. The current session doesn't expire until the optimal nodes become available despite `fallback_session_lifetime`. Added support for AZ-aware balancing. [#65570](https://github.com/ClickHouse/ClickHouse/pull/65570) ([Alexander Tokmakov](https://github.com/tavplubix)).
* DatabaseCatalog drops tables faster by using up to database_catalog_drop_table_concurrency threads. [#66065](https://github.com/ClickHouse/ClickHouse/pull/66065) ([Sema Checherinda](https://github.com/CheSema)).
#### Improvement
* The setting `optimize_trivial_insert_select` is disabled by default. In most cases, it should be beneficial. Nevertheless, if you are seeing slower INSERT SELECT or increased memory usage, you can enable it back or `SET compatibility = '24.6'`. [#58970](https://github.com/ClickHouse/ClickHouse/pull/58970) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Print stacktrace and diagnostic info if `clickhouse-client` or `clickhouse-local` crashes. [#61109](https://github.com/ClickHouse/ClickHouse/pull/61109) ([Alexander Tokmakov](https://github.com/tavplubix)).
* The result of `SHOW INDEX | INDEXES | INDICES | KEYS` was previously sorted by the primary key column names. Since this was unintuitive, the result is now sorted by the position of the primary key columns within the primary key. [#61131](https://github.com/ClickHouse/ClickHouse/pull/61131) ([Robert Schulze](https://github.com/rschu1ze)).
* Change how deduplication for Materialized Views works. Fixed a lot of cases like: - on destination table: data is split for 2 or more blocks and that blocks is considered as duplicate when that block is inserted in parallel. - on MV destination table: the equal blocks are deduplicated, that happens when MV often produces equal data as a result for different input data due to performing aggregation. - on MV destination table: the equal blocks which comes from different MV are deduplicated. [#61601](https://github.com/ClickHouse/ClickHouse/pull/61601) ([Sema Checherinda](https://github.com/CheSema)).
* Allow matching column names in a case insensitive manner when reading json files (`input_format_json_case_insensitive_column_matching`). [#61750](https://github.com/ClickHouse/ClickHouse/pull/61750) ([kevinyhzou](https://github.com/KevinyhZou)).
* Support reading partitioned data DeltaLake data. Infer DeltaLake schema by reading metadata instead of data. [#63201](https://github.com/ClickHouse/ClickHouse/pull/63201) ([Kseniia Sumarokova](https://github.com/kssenii)).
* In composable protocols TLS layer accepted only `certificateFile` and `privateKeyFile` parameters. https://clickhouse.com/docs/en/operations/settings/composable-protocols. [#63985](https://github.com/ClickHouse/ClickHouse/pull/63985) ([Anton Ivashkin](https://github.com/ianton-ru)).
* Added profile event `SelectQueriesWithPrimaryKeyUsage` which indicates how many SELECT queries use the primary key to evaluate the WHERE clause. [#64492](https://github.com/ClickHouse/ClickHouse/pull/64492) ([0x01f](https://github.com/0xfei)).
* `StorageS3Queue` related fixes and improvements. Deduce a default value of `s3queue_processing_threads_num` according to the number of physical cpu cores on the server (instead of the previous default value as 1). Set default value of `s3queue_loading_retries` to 10. Fix possible vague "Uncaught exception" in exception column of `system.s3queue`. Do not increment retry count on `MEMORY_LIMIT_EXCEEDED` exception. Move files commit to a stage after insertion into table fully finished to avoid files being commited while not inserted. Add settings `s3queue_max_processed_files_before_commit`, `s3queue_max_processed_rows_before_commit`, `s3queue_max_processed_bytes_before_commit`, `s3queue_max_processing_time_sec_before_commit`, to better control commit and flush time. [#65046](https://github.com/ClickHouse/ClickHouse/pull/65046) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Support aliases in parametrized view function (only new analyzer). [#65190](https://github.com/ClickHouse/ClickHouse/pull/65190) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Updated to mask account key in logs in azureBlobStorage. [#65273](https://github.com/ClickHouse/ClickHouse/pull/65273) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)).
* Partition pruning for `IN` predicates when filter expression is a part of `PARTITION BY` expression. [#65335](https://github.com/ClickHouse/ClickHouse/pull/65335) ([Eduard Karacharov](https://github.com/korowa)).
* Add system tables with main information about all detached tables. [#65400](https://github.com/ClickHouse/ClickHouse/pull/65400) ([Konstantin Morozov](https://github.com/k-morozov)).
* `arrayMin`/`arrayMax` can be applicable to all data types that are comparable. [#65455](https://github.com/ClickHouse/ClickHouse/pull/65455) ([pn](https://github.com/chloro-pn)).
* Improved memory accounting for cgroups v2 to exclude the amount occupied by the page cache. [#65470](https://github.com/ClickHouse/ClickHouse/pull/65470) ([Nikita Taranov](https://github.com/nickitat)).
* Do not create format settings for each row when serializing chunks to insert to EmbeddedRocksDB table. [#65474](https://github.com/ClickHouse/ClickHouse/pull/65474) ([Duc Canh Le](https://github.com/canhld94)).
* Reduce `clickhouse-local` prompt to just `:)`. `getFQDNOrHostName()` takes too long on macOS, and we don't want a hostname in the prompt for `clickhouse-local` anyway. [#65510](https://github.com/ClickHouse/ClickHouse/pull/65510) ([Konstantin Bogdanov](https://github.com/thevar1able)).
* Avoid printing a message from jemalloc about per-CPU arenas on low-end virtual machines. [#65532](https://github.com/ClickHouse/ClickHouse/pull/65532) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Disable filesystem cache background download by default. It will be enabled back when we fix the issue with possible "Memory limit exceeded" because memory deallocation is done outside of query context (while buffer is allocated inside of query context) if we use background download threads. Plus we need to add a separate setting to define max size to download for background workers (currently it is limited by max_file_segment_size, which might be too big). [#65534](https://github.com/ClickHouse/ClickHouse/pull/65534) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Add new option to config `<config_reload_interval_ms>` which allow to specify how often clickhouse will reload config. [#65545](https://github.com/ClickHouse/ClickHouse/pull/65545) ([alesapin](https://github.com/alesapin)).
* Implement binary encoding for ClickHouse data types and add its specification in docs. Use it in Dynamic binary serialization, allow to use it in RowBinaryWithNamesAndTypes and Native formats under settings. [#65546](https://github.com/ClickHouse/ClickHouse/pull/65546) ([Kruglov Pavel](https://github.com/Avogar)).
* Improved ZooKeeper load balancing. The current session doesn't expire until the optimal nodes become available despite `fallback_session_lifetime`. Added support for AZ-aware balancing. [#65570](https://github.com/ClickHouse/ClickHouse/pull/65570) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Server settings `compiled_expression_cache_size` and `compiled_expression_cache_elements_size` are now shown in `system.server_settings`. [#65584](https://github.com/ClickHouse/ClickHouse/pull/65584) ([Robert Schulze](https://github.com/rschu1ze)).
* Add support for user identification based on x509 SubjectAltName extension. [#65626](https://github.com/ClickHouse/ClickHouse/pull/65626) ([Anton Kozlov](https://github.com/tonickkozlov)).
* `clickhouse-local` will respect the `max_server_memory_usage` and `max_server_memory_usage_to_ram_ratio` from the configuration file. It will also set the max memory usage to 90% of the system memory by default, like `clickhouse-server` does. [#65697](https://github.com/ClickHouse/ClickHouse/pull/65697) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Add a script to backup your files to ClickHouse. [#65699](https://github.com/ClickHouse/ClickHouse/pull/65699) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* PostgreSQL source support cancel. [#65722](https://github.com/ClickHouse/ClickHouse/pull/65722) ([Maksim Kita](https://github.com/kitaisreal)).
* Make allow_experimental_analyzer be controlled by the initiator for distributed queries. This ensures compatibility and correctness during operations in mixed version clusters. [#65777](https://github.com/ClickHouse/ClickHouse/pull/65777) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
* Respect cgroup CPU limit in Keeper. [#65819](https://github.com/ClickHouse/ClickHouse/pull/65819) ([Antonio Andelic](https://github.com/antonio2368)).
* Allow to use `concat` function with empty arguments ``` sql :) select concat();. [#65887](https://github.com/ClickHouse/ClickHouse/pull/65887) ([李扬](https://github.com/taiyang-li)).
* Allow controlling named collections in clickhouse-local. [#65973](https://github.com/ClickHouse/ClickHouse/pull/65973) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Improve Azure profile events. [#65999](https://github.com/ClickHouse/ClickHouse/pull/65999) ([alesapin](https://github.com/alesapin)).
* Support ORC file read by writer time zone. [#66025](https://github.com/ClickHouse/ClickHouse/pull/66025) ([kevinyhzou](https://github.com/KevinyhZou)).
* Add settings to control connection to the PostgreSQL. * Setting `postgresql_connection_attempt_timeout` specifies the value passed to `connect_timeout` parameter of connection URL. * Setting `postgresql_connection_pool_retries` specifies the number of retries to establish a connection to the PostgreSQL end-point. [#66232](https://github.com/ClickHouse/ClickHouse/pull/66232) ([Dmitry Novik](https://github.com/novikd)).
* Reduce inaccuracy of input_wait_elapsed_us/input_wait_elapsed_us/elapsed_us. [#66239](https://github.com/ClickHouse/ClickHouse/pull/66239) ([Azat Khuzhin](https://github.com/azat)).
* Improve FilesystemCache ProfileEvents. [#66249](https://github.com/ClickHouse/ClickHouse/pull/66249) ([zhukai](https://github.com/nauu)).
* Add settings to ignore ON CLUSTER clause in queries for named collection management with replicated storage. [#66288](https://github.com/ClickHouse/ClickHouse/pull/66288) ([MikhailBurdukov](https://github.com/MikhailBurdukov)).
* Function `generateSnowflakeID` now allows to specify a machine ID as a parameter to prevent collisions in large clusters. [#66374](https://github.com/ClickHouse/ClickHouse/pull/66374) ([ZAWA_ll](https://github.com/Zawa-ll)).
* Disable suspending on Ctrl+Z in interactive mode. This is a common trap and is not expected behavior for almost all users. I imagine only a few extreme power users could appreciate suspending terminal applications to the background, but I don't know any. [#66511](https://github.com/ClickHouse/ClickHouse/pull/66511) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Add option for validating the Primary key type in Dictionaries. Without this option for simple layouts any column type will be implicitly converted to UInt64. ### Documentation entry for user-facing changes. [#66595](https://github.com/ClickHouse/ClickHouse/pull/66595) ([MikhailBurdukov](https://github.com/MikhailBurdukov)).
#### Bug Fix (user-visible misbehavior in an official stable release)
* Fix unexpected size of low cardinality column in function calls. [#65298](https://github.com/ClickHouse/ClickHouse/pull/65298) ([Raúl Marín](https://github.com/Algunenano)).
* Check cyclic dependencies on CREATE/REPLACE/RENAME/EXCHANGE queries and throw an exception if there is a cyclic dependency. Previously such cyclic dependencies could lead to a deadlock during server startup. Also fix some bugs in dependencies creation. [#65405](https://github.com/ClickHouse/ClickHouse/pull/65405) ([Kruglov Pavel](https://github.com/Avogar)).
* Fix crash in maxIntersections. [#65689](https://github.com/ClickHouse/ClickHouse/pull/65689) ([Raúl Marín](https://github.com/Algunenano)).
* Fix the VALID UNTIL clause in the user definition resetting after a restart. [#66409](https://github.com/ClickHouse/ClickHouse/pull/66409) ([Nikolay Degterinsky](https://github.com/evillique)).
* Fix SHOW MERGES remaining time. [#66735](https://github.com/ClickHouse/ClickHouse/pull/66735) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* `Query was cancelled` might have been printed twice in clickhouse-client. This behaviour is fixed. [#66005](https://github.com/ClickHouse/ClickHouse/pull/66005) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
* Fixed crash while using MaterializedMySQL with TABLE OVERRIDE that maps MySQL NULL field into ClickHouse not NULL field. [#54649](https://github.com/ClickHouse/ClickHouse/pull/54649) ([Filipp Ozinov](https://github.com/bakwc)).
* Fix logical error when PREWHERE expression read no columns and table has no adaptive index granularity (very old table). [#59173](https://github.com/ClickHouse/ClickHouse/pull/59173) ([Alexander Gololobov](https://github.com/davenger)).
* Fix bug with cancellation buffer when canceling a query. [#64478](https://github.com/ClickHouse/ClickHouse/pull/64478) ([Sema Checherinda](https://github.com/CheSema)).
* Fix filling parts columns from metadata (when columns.txt does not exists). [#64757](https://github.com/ClickHouse/ClickHouse/pull/64757) ([Azat Khuzhin](https://github.com/azat)).
* Fix crash for `ALTER TABLE ... ON CLUSTER ... MODIFY SQL SECURITY`. [#64957](https://github.com/ClickHouse/ClickHouse/pull/64957) ([pufit](https://github.com/pufit)).
* Fix crash on destroying AccessControl: add explicit shutdown. [#64993](https://github.com/ClickHouse/ClickHouse/pull/64993) ([Vitaly Baranov](https://github.com/vitlibar)).
* Eliminate injective function in argument of functions `uniq*` recursively. This used to work correctly but was broken in the new analyzer. [#65140](https://github.com/ClickHouse/ClickHouse/pull/65140) ([Duc Canh Le](https://github.com/canhld94)).
* Fix unexpected projection name when query with CTE. [#65267](https://github.com/ClickHouse/ClickHouse/pull/65267) ([wudidapaopao](https://github.com/wudidapaopao)).
* Require `dictGet` privilege when accessing dictionaries via direct query or the `Dictionary` table engine. [#65359](https://github.com/ClickHouse/ClickHouse/pull/65359) ([Joe Lynch](https://github.com/joelynch)).
* Fix user-specific S3 auth with incremental backups. [#65481](https://github.com/ClickHouse/ClickHouse/pull/65481) ([Antonio Andelic](https://github.com/antonio2368)).
* Disable `non-intersecting-parts` optimization for queries with `FINAL` in case of `read-in-order` optimization was enabled. This could lead to an incorrect query result. As a workaround, disable `do_not_merge_across_partitions_select_final` and `split_parts_ranges_into_intersecting_and_non_intersecting_final` before this fix is merged. [#65505](https://github.com/ClickHouse/ClickHouse/pull/65505) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Fix getting exception `Index out of bound for blob metadata` in case all files from list batch were filtered out. [#65523](https://github.com/ClickHouse/ClickHouse/pull/65523) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Fix NOT_FOUND_COLUMN_IN_BLOCK for deduplicate merge of projection. [#65573](https://github.com/ClickHouse/ClickHouse/pull/65573) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
* Fixed bug in MergeJoin. Column in sparse serialisation might be treated as a column of its nested type though the required conversion wasn't performed. [#65632](https://github.com/ClickHouse/ClickHouse/pull/65632) ([Nikita Taranov](https://github.com/nickitat)).
* Fixed a bug that compatibility level '23.4' was not properly applied. [#65737](https://github.com/ClickHouse/ClickHouse/pull/65737) ([cw5121](https://github.com/cw5121)).
* Fix odbc table with nullable fields. [#65738](https://github.com/ClickHouse/ClickHouse/pull/65738) ([Rodolphe Dugé de Bernonville](https://github.com/RodolpheDuge)).
* Fix data race in `TCPHandler`, which could happen on fatal error. [#65744](https://github.com/ClickHouse/ClickHouse/pull/65744) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Fix invalid exceptions in function `parseDateTime` with `%F` and `%D` placeholders. [#65768](https://github.com/ClickHouse/ClickHouse/pull/65768) ([Antonio Andelic](https://github.com/antonio2368)).
* For queries that read from `PostgreSQL`, cancel the internal `PostgreSQL` query if the ClickHouse query is finished. Otherwise, `ClickHouse` query cannot be canceled until the internal `PostgreSQL` query is finished. [#65771](https://github.com/ClickHouse/ClickHouse/pull/65771) ([Maksim Kita](https://github.com/kitaisreal)).
* Fix a bug in short circuit logic when old analyzer and dictGetOrDefault is used. [#65802](https://github.com/ClickHouse/ClickHouse/pull/65802) ([jsc0218](https://github.com/jsc0218)).
* Fix a bug leads to EmbeddedRocksDB with TTL write corrupted SST files. [#65816](https://github.com/ClickHouse/ClickHouse/pull/65816) ([Duc Canh Le](https://github.com/canhld94)).
* Functions `bitTest`, `bitTestAll`, and `bitTestAny` now return an error if the specified bit index is out-of-bounds [#65818](https://github.com/ClickHouse/ClickHouse/pull/65818) ([Pablo Marcos](https://github.com/pamarcos)).
* Setting `join_any_take_last_row` is supported in any query with hash join. [#65820](https://github.com/ClickHouse/ClickHouse/pull/65820) ([vdimir](https://github.com/vdimir)).
* Better handling of join conditions involving `IS NULL` checks (for example `ON (a = b AND (a IS NOT NULL) AND (b IS NOT NULL) ) OR ( (a IS NULL) AND (b IS NULL) )` is rewritten to `ON a <=> b`), fix incorrect optimization when condition other then `IS NULL` are present. [#65835](https://github.com/ClickHouse/ClickHouse/pull/65835) ([vdimir](https://github.com/vdimir)).
* Functions `bitShiftLeft` and `bitShitfRight` return an error for out of bounds shift positions [#65838](https://github.com/ClickHouse/ClickHouse/pull/65838) ([Pablo Marcos](https://github.com/pamarcos)).
* Fix growing memory usage in S3Queue. [#65839](https://github.com/ClickHouse/ClickHouse/pull/65839) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Fix tie handling in `arrayAUC` to match sklearn. [#65840](https://github.com/ClickHouse/ClickHouse/pull/65840) ([gabrielmcg44](https://github.com/gabrielmcg44)).
* Fix possible issues with MySQL server protocol TLS connections. [#65917](https://github.com/ClickHouse/ClickHouse/pull/65917) ([Azat Khuzhin](https://github.com/azat)).
* Fix possible issues with MySQL client protocol TLS connections. [#65938](https://github.com/ClickHouse/ClickHouse/pull/65938) ([Azat Khuzhin](https://github.com/azat)).
* Fix handling of `SSL_ERROR_WANT_READ`/`SSL_ERROR_WANT_WRITE` with zero timeout. [#65941](https://github.com/ClickHouse/ClickHouse/pull/65941) ([Azat Khuzhin](https://github.com/azat)).
* Add missing settings `input_format_csv_skip_first_lines/input_format_tsv_skip_first_lines/input_format_csv_try_infer_numbers_from_strings/input_format_csv_try_infer_strings_from_quoted_tuples` in schema inference cache because they can change the resulting schema. It prevents from incorrect result of schema inference with these settings changed. [#65980](https://github.com/ClickHouse/ClickHouse/pull/65980) ([Kruglov Pavel](https://github.com/Avogar)).
* Column _size in s3 engine and s3 table function denotes the size of a file inside the archive, not a size of the archive itself. [#65993](https://github.com/ClickHouse/ClickHouse/pull/65993) ([Daniil Ivanik](https://github.com/divanik)).
* Fix resolving dynamic subcolumns in analyzer, avoid reading the whole column on dynamic subcolumn reading. [#66004](https://github.com/ClickHouse/ClickHouse/pull/66004) ([Kruglov Pavel](https://github.com/Avogar)).
* Fix config merging for from_env with replace overrides. [#66034](https://github.com/ClickHouse/ClickHouse/pull/66034) ([Azat Khuzhin](https://github.com/azat)).
* Fix a possible hanging in `GRPCServer` during shutdown. [#66061](https://github.com/ClickHouse/ClickHouse/pull/66061) ([Vitaly Baranov](https://github.com/vitlibar)).
* Fixed several cases in function `has` with non-constant `LowCardinality` arguments. [#66088](https://github.com/ClickHouse/ClickHouse/pull/66088) ([Anton Popov](https://github.com/CurtizJ)).
* Fix for `groupArrayIntersect`. It had incorrect behavior in the `merge()` function. Also, fixed behavior in `deserialise()` for numeric and general data. [#66103](https://github.com/ClickHouse/ClickHouse/pull/66103) ([Yarik Briukhovetskyi](https://github.com/yariks5s)).
* Fixed buffer overflow bug in `unbin`/`unhex` implementation. [#66106](https://github.com/ClickHouse/ClickHouse/pull/66106) ([Nikita Taranov](https://github.com/nickitat)).
* Disable the `merge-filters` optimization introduced in [#64760](https://github.com/ClickHouse/ClickHouse/issues/64760). It may cause an exception if optimization merges two filter expressions and does not apply a short-circuit evaluation. [#66126](https://github.com/ClickHouse/ClickHouse/pull/66126) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Fixed the issue when the server failed to parse Avro files with negative block size arrays encoded, which is now allowed by the Avro specification. [#66130](https://github.com/ClickHouse/ClickHouse/pull/66130) ([Serge Klochkov](https://github.com/slvrtrn)).
* Fixed a bug in ZooKeeper client: a session could get stuck in unusable state after receiving a hardware error from ZooKeeper. For example, this might happen due to "soft memory limit" in ClickHouse Keeper. [#66140](https://github.com/ClickHouse/ClickHouse/pull/66140) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Fix issue in SumIfToCountIfVisitor and signed integers. [#66146](https://github.com/ClickHouse/ClickHouse/pull/66146) ([Raúl Marín](https://github.com/Algunenano)).
* Fix rare case with missing data in the result of distributed query. [#66174](https://github.com/ClickHouse/ClickHouse/pull/66174) ([vdimir](https://github.com/vdimir)).
* Fix order of parsing metadata fields in StorageDeltaLake. [#66211](https://github.com/ClickHouse/ClickHouse/pull/66211) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Don't throw `TIMEOUT_EXCEEDED` for `none_only_active` mode of `distributed_ddl_output_mode`. [#66218](https://github.com/ClickHouse/ClickHouse/pull/66218) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Fix handling limit for `system.numbers_mt` when no index can be used. [#66231](https://github.com/ClickHouse/ClickHouse/pull/66231) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
* Fixed how the ClickHouse server detects the maximum number of usable CPU cores as specified by cgroups v2 if the server runs in a container such as Docker. In more detail, containers often run their process in the root cgroup which has an empty name. In that case, ClickHouse ignored the CPU limits set by cgroups v2. [#66237](https://github.com/ClickHouse/ClickHouse/pull/66237) ([filimonov](https://github.com/filimonov)).
* Fix the `Not-ready set` error when a subquery with `IN` is used in the constraint. [#66261](https://github.com/ClickHouse/ClickHouse/pull/66261) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Fix error reporting while copying to S3 or AzureBlobStorage. [#66295](https://github.com/ClickHouse/ClickHouse/pull/66295) ([Vitaly Baranov](https://github.com/vitlibar)).
* Prevent watchdog from keeping descriptors of unlinked(rotated) log files. [#66334](https://github.com/ClickHouse/ClickHouse/pull/66334) ([Aleksei Filatov](https://github.com/aalexfvk)).
* Fix the bug that logicalexpressionoptimizerpass lost logical type of constant. [#66344](https://github.com/ClickHouse/ClickHouse/pull/66344) ([pn](https://github.com/chloro-pn)).
* Fix `Column identifier is already registered` error with `group_by_use_nulls=true` and new analyzer. [#66400](https://github.com/ClickHouse/ClickHouse/pull/66400) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Fix possible incorrect result for queries joining and filtering table external engine (like PostgreSQL), due to too aggressive filter pushdown. Since now, conditions from where section won't be send to external database in case of outer join with external table. [#66402](https://github.com/ClickHouse/ClickHouse/pull/66402) ([vdimir](https://github.com/vdimir)).
* Added missing column materialization for cross join. [#66413](https://github.com/ClickHouse/ClickHouse/pull/66413) ([lgbo](https://github.com/lgbo-ustc)).
* Fix `Cannot find column` error for queries with constant expression in `GROUP BY` key and new analyzer enabled. [#66433](https://github.com/ClickHouse/ClickHouse/pull/66433) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Avoid possible logical error during import from Npy format in case of bad array nesting level, fix testing of other kinds of errors. [#66461](https://github.com/ClickHouse/ClickHouse/pull/66461) ([Yarik Briukhovetskyi](https://github.com/yariks5s)).
* Fix wrong count() result when there is non-deterministic function in predicate. [#66510](https://github.com/ClickHouse/ClickHouse/pull/66510) ([Duc Canh Le](https://github.com/canhld94)).
* Correctly track memory for `Allocator::realloc`. [#66548](https://github.com/ClickHouse/ClickHouse/pull/66548) ([Antonio Andelic](https://github.com/antonio2368)).
* Fix reading of uninitialized memory when hashing empty tuples. [#66562](https://github.com/ClickHouse/ClickHouse/pull/66562) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix an invalid result for queries with `WINDOW`. This could happen when `PARTITION` columns have sparse serialization and window functions are executed in parallel. [#66579](https://github.com/ClickHouse/ClickHouse/pull/66579) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Fix removing named collections in local storage. [#66599](https://github.com/ClickHouse/ClickHouse/pull/66599) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
* Fix `column_length` is not updated in `ColumnTuple::insertManyFrom`. [#66626](https://github.com/ClickHouse/ClickHouse/pull/66626) ([lgbo](https://github.com/lgbo-ustc)).
* Fix `Unknown identifier` and `Column is not under aggregate function` errors for queries with the expression `(column IS NULL).` The bug was triggered by [#65088](https://github.com/ClickHouse/ClickHouse/issues/65088), with the disabled analyzer only. [#66654](https://github.com/ClickHouse/ClickHouse/pull/66654) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Fix `Method getResultType is not supported for QUERY query node` error when scalar subquery was used as the first argument of IN (with new analyzer). [#66655](https://github.com/ClickHouse/ClickHouse/pull/66655) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Fix possible PARAMETER_OUT_OF_BOUND error during reading variant subcolumn. [#66659](https://github.com/ClickHouse/ClickHouse/pull/66659) ([Kruglov Pavel](https://github.com/Avogar)).
* Fix rare case of stuck merge after drop column. [#66707](https://github.com/ClickHouse/ClickHouse/pull/66707) ([Raúl Marín](https://github.com/Algunenano)).
* Fix assertion `isUniqTypes` when insert select from remote sources. [#66722](https://github.com/ClickHouse/ClickHouse/pull/66722) ([Sema Checherinda](https://github.com/CheSema)).
* Fix logical error in PrometheusRequestHandler. [#66621](https://github.com/ClickHouse/ClickHouse/pull/66621) ([Vitaly Baranov](https://github.com/vitlibar)).
* Fix `indexHint` function case found by fuzzer. [#66286](https://github.com/ClickHouse/ClickHouse/pull/66286) ([Anton Popov](https://github.com/CurtizJ)).
* Fix AST formatting of 'create table b empty as a'. [#64951](https://github.com/ClickHouse/ClickHouse/pull/64951) ([Michael Kolupaev](https://github.com/al13n321)).
#### Build/Testing/Packaging Improvement
* Instantiate template methods ahead in different .cpp files, avoid too large translation units during compiling. [#64818](https://github.com/ClickHouse/ClickHouse/pull/64818) ([lgbo](https://github.com/lgbo-ustc)).
### <a id="246"></a> ClickHouse release 24.6, 2024-07-01
#### Backward Incompatible Change

2
contrib/libunwind vendored

@ -1 +1 @@
Subproject commit fe854449e24bedfa26e38465b84374312dbd587f
Subproject commit a89d904befea07814628c6ce0b44083c4e149c62

View File

@ -23,15 +23,17 @@ RUN apt-get update \
# and MEMORY_LIMIT_EXCEEDED exceptions in Functional tests (total memory limit in Functional tests is ~55.24 GiB).
# TSAN will flush shadow memory when reaching this limit.
# It may cause false-negatives, but it's better than OOM.
RUN echo "TSAN_OPTIONS='verbosity=1000 halt_on_error=1 abort_on_error=1 history_size=7 memory_limit_mb=46080 second_deadlock_stack=1'" >> /etc/environment
RUN echo "UBSAN_OPTIONS='print_stacktrace=1'" >> /etc/environment
RUN echo "MSAN_OPTIONS='abort_on_error=1 poison_in_dtor=1'" >> /etc/environment
RUN echo "LSAN_OPTIONS='suppressions=/usr/share/clickhouse-test/config/lsan_suppressions.txt'" >> /etc/environment
# max_allocation_size_mb is set to 32GB, so we have much bigger chance to run into memory limit than the limitation of the sanitizers
RUN echo "TSAN_OPTIONS='verbosity=1000 halt_on_error=1 abort_on_error=1 history_size=7 memory_limit_mb=46080 second_deadlock_stack=1 max_allocation_size_mb=32768'" >> /etc/environment
RUN echo "UBSAN_OPTIONS='print_stacktrace=1 max_allocation_size_mb=32768'" >> /etc/environment
RUN echo "MSAN_OPTIONS='abort_on_error=1 poison_in_dtor=1 max_allocation_size_mb=32768'" >> /etc/environment
RUN echo "LSAN_OPTIONS='suppressions=/usr/share/clickhouse-test/config/lsan_suppressions.txt max_allocation_size_mb=32768'" >> /etc/environment
# Sanitizer options for current shell (not current, but the one that will be spawned on "docker run")
# (but w/o verbosity for TSAN, otherwise test.reference will not match)
ENV TSAN_OPTIONS='halt_on_error=1 abort_on_error=1 history_size=7 memory_limit_mb=46080 second_deadlock_stack=1'
ENV UBSAN_OPTIONS='print_stacktrace=1'
ENV MSAN_OPTIONS='abort_on_error=1 poison_in_dtor=1'
ENV TSAN_OPTIONS='halt_on_error=1 abort_on_error=1 history_size=7 memory_limit_mb=46080 second_deadlock_stack=1 max_allocation_size_mb=32768'
ENV UBSAN_OPTIONS='print_stacktrace=1 max_allocation_size_mb=32768'
ENV MSAN_OPTIONS='abort_on_error=1 poison_in_dtor=1 max_allocation_size_mb=32768'
ENV LSAN_OPTIONS='max_allocation_size_mb=32768'
# for external_symbolizer_path
RUN ln -s /usr/bin/llvm-symbolizer-${LLVM_VERSION} /usr/bin/llvm-symbolizer

View File

@ -261,9 +261,12 @@ function timeout_with_logging() {
timeout -s TERM --preserve-status "${@}" || exit_code="${?}"
echo "Checking if it is a timeout. The code 124 will indicate a timeout."
if [[ "${exit_code}" -eq "124" ]]
then
echo "The command 'timeout ${*}' has been killed by timeout"
echo "The command 'timeout ${*}' has been killed by timeout."
else
echo "No, it isn't a timeout."
fi
return $exit_code

View File

@ -20,7 +20,7 @@
</max_execution_time>
<max_memory_usage>
<max>10G</max>
<max>5G</max>
</max_memory_usage>
<table_function_remote_max_addresses>

View File

@ -208,7 +208,6 @@ handle SIGPIPE nostop noprint pass
handle SIGTERM nostop noprint pass
handle SIGUSR1 nostop noprint pass
handle SIGUSR2 nostop noprint pass
handle SIGSEGV nostop pass
handle SIG$RTMIN nostop noprint pass
info signals
continue

View File

@ -251,9 +251,12 @@ function timeout_with_logging() {
timeout -s TERM --preserve-status "${@}" || exit_code="${?}"
echo "Checking if it is a timeout. The code 124 will indicate a timeout."
if [[ "${exit_code}" -eq "124" ]]
then
echo "The command 'timeout ${*}' has been killed by timeout"
echo "The command 'timeout ${*}' has been killed by timeout."
else
echo "No, it isn't a timeout."
fi
return $exit_code

View File

@ -20,7 +20,6 @@ handle SIGPIPE nostop noprint pass
handle SIGTERM nostop noprint pass
handle SIGUSR1 nostop noprint pass
handle SIGUSR2 nostop noprint pass
handle SIGSEGV nostop pass
handle SIG$RTMIN nostop noprint pass
info signals
continue

View File

@ -247,12 +247,22 @@ function run_tests()
try_run_with_retry 10 clickhouse-client -q "insert into system.zookeeper (name, path, value) values ('auxiliary_zookeeper2', '/test/chroot/', '')"
TIMEOUT=$((MAX_RUN_TIME - 800 > 8400 ? 8400 : MAX_RUN_TIME - 800))
START_TIME=${SECONDS}
set +e
timeout -k 60m -s TERM --preserve-status 140m clickhouse-test --testname --shard --zookeeper --check-zookeeper-session --hung-check --print-time \
--no-drop-if-fail --test-runs "$NUM_TRIES" "${ADDITIONAL_OPTIONS[@]}" 2>&1 \
timeout --preserve-status --signal TERM --kill-after 60m ${TIMEOUT}s \
clickhouse-test --testname --shard --zookeeper --check-zookeeper-session --hung-check --print-time \
--no-drop-if-fail --test-runs "$NUM_TRIES" "${ADDITIONAL_OPTIONS[@]}" 2>&1 \
| ts '%Y-%m-%d %H:%M:%S' \
| tee -a test_output/test_result.txt
set -e
DURATION=$((START_TIME - SECONDS))
echo "Elapsed ${DURATION} seconds."
if [[ $DURATION -ge $TIMEOUT ]]
then
echo "It looks like the command is terminated by the timeout, which is ${TIMEOUT} seconds."
fi
}
export -f run_tests
@ -264,7 +274,7 @@ if [ "$NUM_TRIES" -gt "1" ]; then
# We don't run tests with Ordinary database in PRs, only in master.
# So run new/changed tests with Ordinary at least once in flaky check.
timeout_with_logging "$TIMEOUT" bash -c 'NUM_TRIES=1; USE_DATABASE_ORDINARY=1; run_tests' \
| sed 's/All tests have finished//' | sed 's/No tests were run//' ||:
| sed 's/All tests have finished/Redacted: a message about tests finish is deleted/' | sed 's/No tests were run/Redacted: a message about no tests run is deleted/' ||:
fi
timeout_with_logging "$TIMEOUT" bash -c run_tests ||:

View File

@ -45,9 +45,12 @@ function timeout_with_logging() {
timeout -s TERM --preserve-status "${@}" || exit_code="${?}"
echo "Checking if it is a timeout. The code 124 will indicate a timeout."
if [[ "${exit_code}" -eq "124" ]]
then
echo "The command 'timeout ${*}' has been killed by timeout"
echo "The command 'timeout ${*}' has been killed by timeout."
else
echo "No, it isn't a timeout."
fi
return $exit_code

View File

@ -12,6 +12,7 @@ UNKNOWN_SIGN = "[ UNKNOWN "
SKIPPED_SIGN = "[ SKIPPED "
HUNG_SIGN = "Found hung queries in processlist"
SERVER_DIED_SIGN = "Server died, terminating all processes"
SERVER_DIED_SIGN2 = "Server does not respond to health check"
DATABASE_SIGN = "Database: "
SUCCESS_FINISH_SIGNS = ["All tests have finished", "No tests were run"]
@ -43,7 +44,7 @@ def process_test_log(log_path, broken_tests):
if HUNG_SIGN in line:
hung = True
break
if SERVER_DIED_SIGN in line:
if SERVER_DIED_SIGN in line or SERVER_DIED_SIGN2 in line:
server_died = True
if RETRIES_SIGN in line:
retries = True
@ -111,12 +112,12 @@ def process_test_log(log_path, broken_tests):
# Python does not support TSV, so we have to escape '\t' and '\n' manually
# and hope that complex escape sequences will not break anything
test_results = [
(
[
test[0],
test[1],
test[2],
"".join(test[3])[:4096].replace("\t", "\\t").replace("\n", "\\n"),
)
]
for test in test_results
]
@ -170,18 +171,23 @@ def process_result(result_path, broken_tests):
if hung:
description = "Some queries hung, "
state = "failure"
test_results.append(("Some queries hung", "FAIL", "0", ""))
test_results.append(["Some queries hung", "FAIL", "0", ""])
elif server_died:
description = "Server died, "
state = "failure"
test_results.append(("Server died", "FAIL", "0", ""))
# When ClickHouse server crashes, some tests are still running
# and fail because they cannot connect to server
for result in test_results:
if result[1] == "FAIL":
result[1] = "SERVER_DIED"
test_results.append(["Server died", "FAIL", "0", ""])
elif not success_finish:
description = "Tests are not finished, "
state = "failure"
test_results.append(("Tests are not finished", "FAIL", "0", ""))
test_results.append(["Tests are not finished", "FAIL", "0", ""])
elif retries:
description = "Some tests restarted, "
test_results.append(("Some tests restarted", "SKIPPED", "0", ""))
test_results.append(["Some tests restarted", "SKIPPED", "0", ""])
else:
description = ""
@ -233,11 +239,12 @@ if __name__ == "__main__":
# sort by status then by check name
order = {
"FAIL": 0,
"Timeout": 1,
"NOT_FAILED": 2,
"BROKEN": 3,
"OK": 4,
"SKIPPED": 5,
"SERVER_DIED": 1,
"Timeout": 2,
"NOT_FAILED": 3,
"BROKEN": 4,
"OK": 5,
"SKIPPED": 6,
}
return order.get(item[1], 10), str(item[0]), item[1]

View File

@ -54,7 +54,7 @@ CREATE TABLE keeper_map_table
`v2` String,
`v3` Float32
)
ENGINE = KeeperMap(/keeper_map_table, 4)
ENGINE = KeeperMap('/keeper_map_table', 4)
PRIMARY KEY key
```

View File

@ -2102,14 +2102,14 @@ Result:
└─────────────────┘
```
## filesystemFree
## filesystemUnreserved
Returns the total amount of the free space on the filesystem hosting the database persistence. See also `filesystemAvailable`
Returns the total amount of the free space on the filesystem hosting the database persistence. (previously `filesystemFree`). See also [`filesystemAvailable`](#filesystemavailable).
**Syntax**
```sql
filesystemFree()
filesystemUnreserved()
```
**Returned value**
@ -2121,7 +2121,7 @@ filesystemFree()
Query:
```sql
SELECT formatReadableSize(filesystemFree()) AS "Free space";
SELECT formatReadableSize(filesystemUnreserved()) AS "Free space";
```
Result:
@ -2449,11 +2449,11 @@ As you can see, `runningAccumulate` merges states for each group of rows separat
## joinGet
The function lets you extract data from the table the same way as from a [dictionary](../../sql-reference/dictionaries/index.md).
Gets the data from [Join](../../engines/table-engines/special/join.md#creating-a-table) tables using the specified join key.
The function lets you extract data from the table the same way as from a [dictionary](../../sql-reference/dictionaries/index.md). Gets the data from [Join](../../engines/table-engines/special/join.md#creating-a-table) tables using the specified join key.
:::note
Only supports tables created with the `ENGINE = Join(ANY, LEFT, <join_keys>)` statement.
:::
**Syntax**
@ -2463,26 +2463,32 @@ joinGet(join_storage_table_name, `value_column`, join_keys)
**Arguments**
- `join_storage_table_name` — an [identifier](../../sql-reference/syntax.md#syntax-identifiers) indicating where the search is performed. The identifier is searched in the default database (see setting `default_database` in the config file). To override the default database, use `USE db_name` or specify the database and the table through the separator `db_name.db_table` as in the example.
- `join_storage_table_name` — an [identifier](../../sql-reference/syntax.md#syntax-identifiers) indicating where the search is performed.
- `value_column` — name of the column of the table that contains required data.
- `join_keys` — list of keys.
:::note
The identifier is searched for in the default database (see setting `default_database` in the config file). To override the default database, use `USE db_name` or specify the database and the table through the separator `db_name.db_table` as in the example.
:::
**Returned value**
Returns a list of values corresponded to list of keys.
If certain does not exist in source table then `0` or `null` will be returned based on [join_use_nulls](../../operations/settings/settings.md#join_use_nulls) setting.
- Returns a list of values corresponded to the list of keys.
:::note
If a certain key does not exist in source table then `0` or `null` will be returned based on [join_use_nulls](../../operations/settings/settings.md#join_use_nulls) setting during table creation.
More info about `join_use_nulls` in [Join operation](../../engines/table-engines/special/join.md).
:::
**Example**
Input table:
```sql
CREATE DATABASE db_test
CREATE TABLE db_test.id_val(`id` UInt32, `val` UInt32) ENGINE = Join(ANY, LEFT, id) SETTINGS join_use_nulls = 1
INSERT INTO db_test.id_val VALUES (1,11)(2,12)(4,13)
CREATE DATABASE db_test;
CREATE TABLE db_test.id_val(`id` UInt32, `val` UInt32) ENGINE = Join(ANY, LEFT, id);
INSERT INTO db_test.id_val VALUES (1, 11)(2, 12)(4, 13);
SELECT * FROM db_test.id_val;
```
```text
@ -2496,18 +2502,116 @@ INSERT INTO db_test.id_val VALUES (1,11)(2,12)(4,13)
Query:
```sql
SELECT joinGet(db_test.id_val, 'val', toUInt32(number)) from numbers(4) SETTINGS join_use_nulls = 1
SELECT number, joinGet(db_test.id_val, 'val', toUInt32(number)) from numbers(4);
```
Result:
```text
┌─joinGet(db_test.id_val, 'val', toUInt32(number))─┐
│ 0 │
│ 11 │
│ 12 │
│ 0 │
└──────────────────────────────────────────────────┘
┌─number─┬─joinGet('db_test.id_val', 'val', toUInt32(number))─┐
1. │ 0 │ 0 │
2. │ 1 │ 11 │
3. │ 2 │ 12 │
4. │ 3 │ 0 │
└────────┴────────────────────────────────────────────────────┘
```
Setting `join_use_nulls` can be used during table creation to change the behaviour of what gets returned if no key exists in the source table.
```sql
CREATE DATABASE db_test;
CREATE TABLE db_test.id_val_nulls(`id` UInt32, `val` UInt32) ENGINE = Join(ANY, LEFT, id) SETTINGS join_use_nulls=1;
INSERT INTO db_test.id_val_nulls VALUES (1, 11)(2, 12)(4, 13);
SELECT * FROM db_test.id_val_nulls;
```
```text
┌─id─┬─val─┐
│ 4 │ 13 │
│ 2 │ 12 │
│ 1 │ 11 │
└────┴─────┘
```
Query:
```sql
SELECT number, joinGet(db_test.id_val_nulls, 'val', toUInt32(number)) from numbers(4);
```
Result:
```text
┌─number─┬─joinGet('db_test.id_val_nulls', 'val', toUInt32(number))─┐
1. │ 0 │ ᴺᵁᴸᴸ │
2. │ 1 │ 11 │
3. │ 2 │ 12 │
4. │ 3 │ ᴺᵁᴸᴸ │
└────────┴──────────────────────────────────────────────────────────┘
```
## joinGetOrNull
Like [joinGet](#joinget) but returns `NULL` when the key is missing instead of returning the default value.
**Syntax**
```sql
joinGetOrNull(join_storage_table_name, `value_column`, join_keys)
```
**Arguments**
- `join_storage_table_name` — an [identifier](../../sql-reference/syntax.md#syntax-identifiers) indicating where the search is performed.
- `value_column` — name of the column of the table that contains required data.
- `join_keys` — list of keys.
:::note
The identifier is searched for in the default database (see setting `default_database` in the config file). To override the default database, use `USE db_name` or specify the database and the table through the separator `db_name.db_table` as in the example.
:::
**Returned value**
- Returns a list of values corresponded to the list of keys.
:::note
If a certain key does not exist in source table then `NULL` is returned for that key.
:::
**Example**
Input table:
```sql
CREATE DATABASE db_test;
CREATE TABLE db_test.id_val(`id` UInt32, `val` UInt32) ENGINE = Join(ANY, LEFT, id);
INSERT INTO db_test.id_val VALUES (1, 11)(2, 12)(4, 13);
SELECT * FROM db_test.id_val;
```
```text
┌─id─┬─val─┐
│ 4 │ 13 │
│ 2 │ 12 │
│ 1 │ 11 │
└────┴─────┘
```
Query:
```sql
SELECT number, joinGetOrNull(db_test.id_val, 'val', toUInt32(number)) from numbers(4);
```
Result:
```text
┌─number─┬─joinGetOrNull('db_test.id_val', 'val', toUInt32(number))─┐
1. │ 0 │ ᴺᵁᴸᴸ │
2. │ 1 │ 11 │
3. │ 2 │ 12 │
4. │ 3 │ ᴺᵁᴸᴸ │
└────────┴──────────────────────────────────────────────────────────┘
```
## catboostEvaluate

File diff suppressed because it is too large Load Diff

View File

@ -36,7 +36,7 @@ These actions are described in detail below.
ADD COLUMN [IF NOT EXISTS] name [type] [default_expr] [codec] [AFTER name_after | FIRST]
```
Adds a new column to the table with the specified `name`, `type`, [`codec`](../create/table.md/#codecs) and `default_expr` (see the section [Default expressions](/docs/en/sql-reference/statements/create/table.md/#create-default-values)).
Adds a new column to the table with the specified `name`, `type`, [`codec`](../create/table.md/#column_compression_codec) and `default_expr` (see the section [Default expressions](/docs/en/sql-reference/statements/create/table.md/#create-default-values)).
If the `IF NOT EXISTS` clause is included, the query wont return an error if the column already exists. If you specify `AFTER name_after` (the name of another column), the column is added after the specified one in the list of table columns. If you want to add a column to the beginning of the table use the `FIRST` clause. Otherwise, the column is added to the end of the table. For a chain of actions, `name_after` can be the name of a column that is added in one of the previous actions.
@ -155,7 +155,7 @@ This query changes the `name` column properties:
- Column-level Settings
For examples of columns compression CODECS modifying, see [Column Compression Codecs](../create/table.md/#codecs).
For examples of columns compression CODECS modifying, see [Column Compression Codecs](../create/table.md/#column_compression_codec).
For examples of columns TTL modifying, see [Column TTL](/docs/en/engines/table-engines/mergetree-family/mergetree.md/#mergetree-column-ttl).

View File

@ -21,7 +21,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
name2 [type2] [NULL|NOT NULL] [DEFAULT|MATERIALIZED|EPHEMERAL|ALIAS expr2] [COMMENT 'comment for column'] [compression_codec] [TTL expr2],
...
) ENGINE = engine
COMMENT 'comment for table'
[COMMENT 'comment for table']
```
Creates a table named `table_name` in the `db` database or the current database if `db` is not set, with the structure specified in brackets and the `engine` engine.
@ -626,11 +626,6 @@ SELECT * FROM base.t1;
You can add a comment to the table when you creating it.
:::note
The comment clause is supported by all table engines except [Kafka](../../../engines/table-engines/integrations/kafka.md), [RabbitMQ](../../../engines/table-engines/integrations/rabbitmq.md) and [EmbeddedRocksDB](../../../engines/table-engines/integrations/embedded-rocksdb.md).
:::
**Syntax**
``` sql

View File

@ -16,6 +16,7 @@ Syntax:
CREATE [OR REPLACE] VIEW [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster_name]
[DEFINER = { user | CURRENT_USER }] [SQL SECURITY { DEFINER | INVOKER | NONE }]
AS SELECT ...
[COMMENT 'comment']
```
Normal views do not store any data. They just perform a read from another table on each access. In other words, a normal view is nothing more than a saved query. When reading from a view, this saved query is used as a subquery in the [FROM](../../../sql-reference/statements/select/from.md) clause.
@ -57,6 +58,7 @@ SELECT * FROM view(column1=value1, column2=value2 ...)
CREATE MATERIALIZED VIEW [IF NOT EXISTS] [db.]table_name [ON CLUSTER] [TO[db.]name] [ENGINE = engine] [POPULATE]
[DEFINER = { user | CURRENT_USER }] [SQL SECURITY { DEFINER | INVOKER | NONE }]
AS SELECT ...
[COMMENT 'comment']
```
:::tip
@ -161,6 +163,7 @@ RANDOMIZE FOR interval
DEPENDS ON [db.]name [, [db.]name [, ...]]
[TO[db.]name] [(columns)] [ENGINE = engine] [EMPTY]
AS SELECT ...
[COMMENT 'comment']
```
where `interval` is a sequence of simple intervals:
```sql
@ -267,7 +270,10 @@ This is an experimental feature that may change in backwards-incompatible ways i
:::
``` sql
CREATE WINDOW VIEW [IF NOT EXISTS] [db.]table_name [TO [db.]table_name] [INNER ENGINE engine] [ENGINE engine] [WATERMARK strategy] [ALLOWED_LATENESS interval_function] [POPULATE] AS SELECT ... GROUP BY time_window_function
CREATE WINDOW VIEW [IF NOT EXISTS] [db.]table_name [TO [db.]table_name] [INNER ENGINE engine] [ENGINE engine] [WATERMARK strategy] [ALLOWED_LATENESS interval_function] [POPULATE]
AS SELECT ...
GROUP BY time_window_function
[COMMENT 'comment']
```
Window view can aggregate data by time window and output the results when the window is ready to fire. It stores the partial aggregation results in an inner(or specified) table to reduce latency and can push the processing result to a specified table or push notifications using the WATCH query.

View File

@ -18,10 +18,21 @@ Reloads all dictionaries that have been successfully loaded before.
By default, dictionaries are loaded lazily (see [dictionaries_lazy_load](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-dictionaries_lazy_load)), so instead of being loaded automatically at startup, they are initialized on first access through dictGet function or SELECT from tables with ENGINE = Dictionary. The `SYSTEM RELOAD DICTIONARIES` query reloads such dictionaries (LOADED).
Always returns `Ok.` regardless of the result of the dictionary update.
**Syntax**
```sql
SYSTEM RELOAD DICTIONARIES [ON CLUSTER cluster_name]
```
## RELOAD DICTIONARY
Completely reloads a dictionary `dictionary_name`, regardless of the state of the dictionary (LOADED / NOT_LOADED / FAILED).
Always returns `Ok.` regardless of the result of updating the dictionary.
``` sql
SYSTEM RELOAD DICTIONARY [ON CLUSTER cluster_name] dictionary_name
```
The status of the dictionary can be checked by querying the `system.dictionaries` table.
``` sql

View File

@ -1,36 +0,0 @@
---
slug: /en/sql-reference/table-functions/fuzzQuery
sidebar_position: 75
sidebar_label: fuzzQuery
---
# fuzzQuery
Perturbs the given query string with random variations.
``` sql
fuzzQuery(query[, max_query_length[, random_seed]])
```
**Arguments**
- `query` (String) - The source query to perform the fuzzing on.
- `max_query_length` (UInt64) - A maximum length the query can get during the fuzzing process.
- `random_seed` (UInt64) - A random seed for producing stable results.
**Returned Value**
A table object with a single column containing perturbed query strings.
## Usage Example
``` sql
SELECT * FROM fuzzQuery('SELECT materialize(\'a\' AS key) GROUP BY key') LIMIT 2;
```
```
┌─query──────────────────────────────────────────────────────────┐
1. │ SELECT 'a' AS key GROUP BY key │
2. │ EXPLAIN PIPELINE compact = true SELECT 'a' AS key GROUP BY key │
└────────────────────────────────────────────────────────────────┘
```

View File

@ -209,8 +209,8 @@ std::vector<String> Client::loadWarningMessages()
{} /* query_parameters */,
"" /* query_id */,
QueryProcessingStage::Complete,
&global_context->getSettingsRef(),
&global_context->getClientInfo(), false, {});
&client_context->getSettingsRef(),
&client_context->getClientInfo(), false, {});
while (true)
{
Packet packet = connection->receivePacket();
@ -306,9 +306,6 @@ void Client::initialize(Poco::Util::Application & self)
if (env_password && !config().has("password"))
config().setString("password", env_password);
// global_context->setApplicationType(Context::ApplicationType::CLIENT);
global_context->setQueryParameters(query_parameters);
/// settings and limits could be specified in config file, but passed settings has higher priority
for (const auto & setting : global_context->getSettingsRef().allUnchanged())
{
@ -382,7 +379,7 @@ try
showWarnings();
/// Set user password complexity rules
auto & access_control = global_context->getAccessControl();
auto & access_control = client_context->getAccessControl();
access_control.setPasswordComplexityRules(connection->getPasswordComplexityRules());
if (is_interactive && !delayed_interactive)
@ -459,7 +456,7 @@ void Client::connect()
<< connection_parameters.host << ":" << connection_parameters.port
<< (!connection_parameters.user.empty() ? " as user " + connection_parameters.user : "") << "." << std::endl;
connection = Connection::createConnection(connection_parameters, global_context);
connection = Connection::createConnection(connection_parameters, client_context);
if (max_client_network_bandwidth)
{
@ -528,7 +525,7 @@ void Client::connect()
}
}
if (!global_context->getSettingsRef().use_client_time_zone)
if (!client_context->getSettingsRef().use_client_time_zone)
{
const auto & time_zone = connection->getServerTimezone(connection_parameters.timeouts);
if (!time_zone.empty())
@ -611,7 +608,7 @@ void Client::printChangedSettings() const
}
};
print_changes(global_context->getSettingsRef().changes(), "settings");
print_changes(client_context->getSettingsRef().changes(), "settings");
print_changes(cmd_merge_tree_settings.changes(), "MergeTree settings");
}
@ -709,7 +706,7 @@ bool Client::processWithFuzzing(const String & full_query)
{
const char * begin = full_query.data();
orig_ast = parseQuery(begin, begin + full_query.size(),
global_context->getSettingsRef(),
client_context->getSettingsRef(),
/*allow_multi_statements=*/ true);
}
catch (const Exception & e)
@ -733,7 +730,7 @@ bool Client::processWithFuzzing(const String & full_query)
}
// Kusto is not a subject for fuzzing (yet)
if (global_context->getSettingsRef().dialect == DB::Dialect::kusto)
if (client_context->getSettingsRef().dialect == DB::Dialect::kusto)
{
return true;
}
@ -1138,8 +1135,6 @@ void Client::processOptions(const OptionsDescription & options_description,
if ((query_fuzzer_runs = options["query-fuzzer-runs"].as<int>()))
{
// Fuzzer implies multiquery.
config().setBool("multiquery", true);
// Ignore errors in parsing queries.
config().setBool("ignore-error", true);
ignore_error = true;
@ -1147,8 +1142,6 @@ void Client::processOptions(const OptionsDescription & options_description,
if ((create_query_fuzzer_runs = options["create-query-fuzzer-runs"].as<int>()))
{
// Fuzzer implies multiquery.
config().setBool("multiquery", true);
// Ignore errors in parsing queries.
config().setBool("ignore-error", true);
@ -1166,6 +1159,11 @@ void Client::processOptions(const OptionsDescription & options_description,
if (options.count("opentelemetry-tracestate"))
global_context->getClientTraceContext().tracestate = options["opentelemetry-tracestate"].as<std::string>();
/// In case of clickhouse-client the `client_context` can be just an alias for the `global_context`.
/// (There is no need to copy the context because clickhouse-client has no background tasks so it won't use that context in parallel.)
client_context = global_context;
initClientContext();
}
@ -1199,17 +1197,9 @@ void Client::processConfig()
}
print_stack_trace = config().getBool("stacktrace", false);
if (config().has("multiquery"))
is_multiquery = true;
pager = config().getString("pager", "");
setDefaultFormatsAndCompressionFromConfiguration();
global_context->setClientName(std::string(DEFAULT_CLIENT_NAME));
global_context->setQueryKindInitial();
global_context->setQuotaClientKey(config().getString("quota_key", ""));
global_context->setQueryKind(query_kind);
}
@ -1362,13 +1352,6 @@ void Client::readArguments(
allow_repeated_settings = true;
else if (arg == "--allow_merge_tree_settings")
allow_merge_tree_settings = true;
else if (arg == "--multiquery" && (arg_num + 1) < argc && !std::string_view(argv[arg_num + 1]).starts_with('-'))
{
/// Transform the abbreviated syntax '--multiquery <SQL>' into the full syntax '--multiquery -q <SQL>'
++arg_num;
arg = argv[arg_num];
addMultiquery(arg, common_arguments);
}
else if (arg == "--password" && ((arg_num + 1) >= argc || std::string_view(argv[arg_num + 1]).starts_with('-')))
{
common_arguments.emplace_back(arg);

View File

@ -9,17 +9,13 @@ namespace DB
class Client : public ClientBase
{
public:
Client()
{
fuzzer = QueryFuzzer(randomSeed(), &std::cout, &std::cerr);
}
Client() = default;
void initialize(Poco::Util::Application & self) override;
int main(const std::vector<String> & /*args*/) override;
protected:
Poco::Util::LayeredConfiguration & getClientConfiguration() override;
bool processWithFuzzing(const String & full_query) override;

View File

@ -295,6 +295,8 @@ void LocalServer::cleanup()
if (suggest)
suggest.reset();
client_context.reset();
if (global_context)
{
global_context->shutdown();
@ -436,7 +438,7 @@ void LocalServer::connect()
in = input.get();
}
connection = LocalConnection::createConnection(
connection_parameters, global_context, in, need_render_progress, need_render_profile_events, server_display_name);
connection_parameters, client_context, in, need_render_progress, need_render_profile_events, server_display_name);
}
@ -497,8 +499,6 @@ try
initTTYBuffer(toProgressOption(getClientConfiguration().getString("progress", "default")));
ASTAlterCommand::setFormatAlterCommandsWithParentheses(true);
applyCmdSettings(global_context);
/// try to load user defined executable functions, throw on error and die
try
{
@ -510,6 +510,11 @@ try
throw;
}
/// Must be called after we stopped initializing the global context and changing its settings.
/// After this point the global context must be stayed almost unchanged till shutdown,
/// and all necessary changes must be made to the client context instead.
createClientContext();
if (is_interactive)
{
clearTerminal();
@ -564,9 +569,6 @@ void LocalServer::processConfig()
if (!queries.empty() && getClientConfiguration().has("queries-file"))
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Options '--query' and '--queries-file' cannot be specified at the same time");
if (getClientConfiguration().has("multiquery"))
is_multiquery = true;
pager = getClientConfiguration().getString("pager", "");
delayed_interactive = getClientConfiguration().has("interactive") && (!queries.empty() || getClientConfiguration().has("queries-file"));
@ -735,6 +737,9 @@ void LocalServer::processConfig()
/// Load global settings from default_profile and system_profile.
global_context->setDefaultProfiles(getClientConfiguration());
/// Command-line parameters can override settings from the default profile.
applyCmdSettings(global_context);
/// We load temporary database first, because projections need it.
DatabaseCatalog::instance().initializeAndLoadTemporaryDatabase();
@ -778,10 +783,6 @@ void LocalServer::processConfig()
server_display_name = getClientConfiguration().getString("display_name", "");
prompt_by_server_display_name = getClientConfiguration().getRawString("prompt_by_server_display_name.default", ":) ");
global_context->setQueryKindInitial();
global_context->setQueryKind(query_kind);
global_context->setQueryParameters(query_parameters);
}
@ -860,6 +861,16 @@ void LocalServer::applyCmdOptions(ContextMutablePtr context)
}
void LocalServer::createClientContext()
{
/// In case of clickhouse-local it's necessary to use a separate context for client-related purposes.
/// We can't just change the global context because it is used in background tasks (for example, in merges)
/// which don't expect that the global context can suddenly change.
client_context = Context::createCopy(global_context);
initClientContext();
}
void LocalServer::processOptions(const OptionsDescription &, const CommandLineOptions & options, const std::vector<Arguments> &, const std::vector<Arguments> &)
{
if (options.count("table"))
@ -922,13 +933,6 @@ void LocalServer::readArguments(int argc, char ** argv, Arguments & common_argum
query_parameters.emplace(param_continuation.substr(0, equal_pos), param_continuation.substr(equal_pos + 1));
}
}
else if (arg == "--multiquery" && (arg_num + 1) < argc && !std::string_view(argv[arg_num + 1]).starts_with('-'))
{
/// Transform the abbreviated syntax '--multiquery <SQL>' into the full syntax '--multiquery -q <SQL>'
++arg_num;
arg = argv[arg_num];
addMultiquery(arg, common_arguments);
}
else
{
common_arguments.emplace_back(arg);

View File

@ -31,7 +31,6 @@ public:
int main(const std::vector<String> & /*args*/) override;
protected:
Poco::Util::LayeredConfiguration & getClientConfiguration() override;
void connect() override;
@ -50,7 +49,6 @@ protected:
void processConfig() override;
void readArguments(int argc, char ** argv, Arguments & common_arguments, std::vector<Arguments> &, std::vector<Arguments> &) override;
void updateLoggerLevel(const String & logs_level) override;
private:
@ -67,6 +65,8 @@ private:
void applyCmdOptions(ContextMutablePtr context);
void applyCmdSettings(ContextMutablePtr context);
void createClientContext();
ServerSettings server_settings;
std::optional<StatusFile> status;

View File

@ -1,2 +1,2 @@
clickhouse_add_executable (validate-odbc-connection-string validate-odbc-connection-string.cpp ../validateODBCConnectionString.cpp)
target_link_libraries (validate-odbc-connection-string PRIVATE clickhouse_common_io)
target_link_libraries (validate-odbc-connection-string PRIVATE clickhouse_common_io clickhouse_common_config)

View File

@ -0,0 +1,13 @@
<clickhouse>
<storage_configuration>
<disks>
<backups>
<type>local</type>
<path>/tmp/backups/</path>
</backups>
</disks>
</storage_configuration>
<backups>
<allowed_disk>backups</allowed_disk>
</backups>
</clickhouse>

View File

@ -0,0 +1 @@
../../../tests/config/config.d/enable_keeper_map.xml

View File

@ -0,0 +1 @@
../../../tests/config/config.d/session_log.xml

View File

@ -1,12 +1,12 @@
#include <cassert>
#include <memory>
#include <IO/WriteHelpers.h>
#include <IO/ReadHelpers.h>
#include <IO/ReadHelpersArena.h>
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypeDate.h>
#include <DataTypes/DataTypeDate32.h>
#include <DataTypes/DataTypeDateTime.h>
#include <DataTypes/DataTypeDateTime64.h>
#include <DataTypes/DataTypeString.h>
#include <Columns/ColumnArray.h>
@ -15,18 +15,14 @@
#include <Common/HashTable/HashTableKeyHolder.h>
#include <Common/assert_cast.h>
#include <AggregateFunctions/IAggregateFunction.h>
#include <AggregateFunctions/KeyHolderHelpers.h>
#include <Core/Field.h>
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <AggregateFunctions/Helpers.h>
#include <AggregateFunctions/FactoryHelpers.h>
#include <DataTypes/DataTypeDate.h>
#include <DataTypes/DataTypeDate32.h>
#include <DataTypes/DataTypeDateTime.h>
#include <DataTypes/DataTypeDateTime64.h>
#include <AggregateFunctions/Helpers.h>
#include <AggregateFunctions/IAggregateFunction.h>
#include <memory>
namespace DB
@ -51,7 +47,7 @@ struct AggregateFunctionGroupArrayIntersectData
};
/// Puts all values to the hash set. Returns an array of unique values. Implemented for numeric types.
/// Puts all values to the hash set. Returns an array of unique values present in all inputs. Implemented for numeric types.
template <typename T>
class AggregateFunctionGroupArrayIntersect
: public IAggregateFunctionDataHelper<AggregateFunctionGroupArrayIntersectData<T>, AggregateFunctionGroupArrayIntersect<T>>
@ -69,7 +65,7 @@ public:
: IAggregateFunctionDataHelper<AggregateFunctionGroupArrayIntersectData<T>,
AggregateFunctionGroupArrayIntersect<T>>({argument_type}, parameters_, result_type_) {}
String getName() const override { return "GroupArrayIntersect"; }
String getName() const override { return "groupArrayIntersect"; }
bool allocatesMemoryInArena() const override { return false; }
@ -158,7 +154,7 @@ public:
set.reserve(size);
for (size_t i = 0; i < size; ++i)
{
int key;
T key;
readIntBinary(key, buf);
set.insert(key);
}
@ -213,7 +209,7 @@ public:
: IAggregateFunctionDataHelper<AggregateFunctionGroupArrayIntersectGenericData, AggregateFunctionGroupArrayIntersectGeneric<is_plain_column>>({input_data_type_}, parameters_, result_type_)
, input_data_type(result_type_) {}
String getName() const override { return "GroupArrayIntersect"; }
String getName() const override { return "groupArrayIntersect"; }
bool allocatesMemoryInArena() const override { return true; }
@ -240,7 +236,7 @@ public:
{
const char * begin = nullptr;
StringRef serialized = data_column->serializeValueIntoArena(offset + i, *arena, begin);
assert(serialized.data != nullptr);
chassert(serialized.data != nullptr);
set.emplace(SerializedKeyHolder{serialized, *arena}, it, inserted);
}
}
@ -260,7 +256,7 @@ public:
{
const char * begin = nullptr;
StringRef serialized = data_column->serializeValueIntoArena(offset + i, *arena, begin);
assert(serialized.data != nullptr);
chassert(serialized.data != nullptr);
it = set.find(serialized);
if (it != nullptr)

View File

@ -195,7 +195,7 @@ bool SingleValueDataFixed<T>::isEqualTo(const IColumn & column, size_t index) co
template <typename T>
bool SingleValueDataFixed<T>::isEqualTo(const SingleValueDataFixed<T> & to) const
{
return has() && to.value == value;
return has() && to.has() && to.value == value;
}
template <typename T>
@ -904,6 +904,7 @@ bool SingleValueDataNumeric<T>::isEqualTo(const DB::IColumn & column, size_t ind
template <typename T>
bool SingleValueDataNumeric<T>::isEqualTo(const DB::SingleValueDataBase & to) const
{
/// to.has() is checked in memory.get().isEqualTo
auto const & other = assert_cast<const Self &>(to);
return memory.get().isEqualTo(other.memory.get());
}
@ -917,6 +918,7 @@ void SingleValueDataNumeric<T>::set(const DB::IColumn & column, size_t row_num,
template <typename T>
void SingleValueDataNumeric<T>::set(const DB::SingleValueDataBase & to, DB::Arena * arena)
{
/// to.has() is checked in memory.get().set
auto const & other = assert_cast<const Self &>(to);
return memory.get().set(other.memory.get(), arena);
}
@ -924,6 +926,7 @@ void SingleValueDataNumeric<T>::set(const DB::SingleValueDataBase & to, DB::Aren
template <typename T>
bool SingleValueDataNumeric<T>::setIfSmaller(const DB::SingleValueDataBase & to, DB::Arena * arena)
{
/// to.has() is checked in memory.get().setIfSmaller
auto const & other = assert_cast<const Self &>(to);
return memory.get().setIfSmaller(other.memory.get(), arena);
}
@ -931,6 +934,7 @@ bool SingleValueDataNumeric<T>::setIfSmaller(const DB::SingleValueDataBase & to,
template <typename T>
bool SingleValueDataNumeric<T>::setIfGreater(const DB::SingleValueDataBase & to, DB::Arena * arena)
{
/// to.has() is checked in memory.get().setIfGreater
auto const & other = assert_cast<const Self &>(to);
return memory.get().setIfGreater(other.memory.get(), arena);
}
@ -1191,7 +1195,7 @@ bool SingleValueDataString::isEqualTo(const DB::IColumn & column, size_t row_num
bool SingleValueDataString::isEqualTo(const SingleValueDataBase & other) const
{
auto const & to = assert_cast<const Self &>(other);
return has() && to.getStringRef() == getStringRef();
return has() && to.has() && to.getStringRef() == getStringRef();
}
void SingleValueDataString::set(const IColumn & column, size_t row_num, Arena * arena)
@ -1291,7 +1295,7 @@ bool SingleValueDataGeneric::isEqualTo(const IColumn & column, size_t row_num) c
bool SingleValueDataGeneric::isEqualTo(const DB::SingleValueDataBase & other) const
{
auto const & to = assert_cast<const Self &>(other);
return has() && to.value == value;
return has() && to.has() && to.value == value;
}
void SingleValueDataGeneric::set(const IColumn & column, size_t row_num, Arena *)

View File

@ -68,10 +68,13 @@ QueryTreeNodePtr findEqualsFunction(const QueryTreeNodes & nodes)
return nullptr;
}
/// Checks if the node is combination of isNull and notEquals functions of two the same arguments
/// Checks if the node is combination of isNull and notEquals functions of two the same arguments:
/// [ (a <> b AND) ] (a IS NULL) AND (b IS NULL)
bool matchIsNullOfTwoArgs(const QueryTreeNodes & nodes, QueryTreeNodePtr & lhs, QueryTreeNodePtr & rhs)
{
QueryTreeNodePtrWithHashSet all_arguments;
QueryTreeNodePtrWithHashSet is_null_arguments;
for (const auto & node : nodes)
{
const auto * func_node = node->as<FunctionNode>();
@ -80,7 +83,11 @@ bool matchIsNullOfTwoArgs(const QueryTreeNodes & nodes, QueryTreeNodePtr & lhs,
const auto & arguments = func_node->getArguments().getNodes();
if (func_node->getFunctionName() == "isNull" && arguments.size() == 1)
{
all_arguments.insert(QueryTreeNodePtrWithHash(arguments[0]));
is_null_arguments.insert(QueryTreeNodePtrWithHash(arguments[0]));
}
else if (func_node->getFunctionName() == "notEquals" && arguments.size() == 2)
{
if (arguments[0]->isEqual(*arguments[1]))
@ -95,7 +102,7 @@ bool matchIsNullOfTwoArgs(const QueryTreeNodes & nodes, QueryTreeNodePtr & lhs,
return false;
}
if (all_arguments.size() != 2)
if (all_arguments.size() != 2 || is_null_arguments.size() != 2)
return false;
lhs = all_arguments.begin()->node;

View File

@ -268,6 +268,8 @@ QueryTreeNodePtr QueryTreeBuilder::buildSelectExpression(const ASTPtr & select_q
}
}
const auto enable_order_by_all = updated_context->getSettingsRef().enable_order_by_all;
auto current_query_tree = std::make_shared<QueryNode>(std::move(updated_context), std::move(settings_changes));
current_query_tree->setIsSubquery(is_subquery);
@ -281,7 +283,10 @@ QueryTreeNodePtr QueryTreeBuilder::buildSelectExpression(const ASTPtr & select_q
current_query_tree->setIsGroupByWithRollup(select_query_typed.group_by_with_rollup);
current_query_tree->setIsGroupByWithGroupingSets(select_query_typed.group_by_with_grouping_sets);
current_query_tree->setIsGroupByAll(select_query_typed.group_by_all);
current_query_tree->setIsOrderByAll(select_query_typed.order_by_all);
/// order_by_all flag in AST is set w/o consideration of `enable_order_by_all` setting
/// since SETTINGS section has not been parsed yet, - so, check the setting here
if (enable_order_by_all)
current_query_tree->setIsOrderByAll(select_query_typed.order_by_all);
current_query_tree->setOriginalAST(select_query);
auto current_context = current_query_tree->getContext();

View File

@ -1740,7 +1740,7 @@ QueryAnalyzer::QueryTreeNodesWithNames QueryAnalyzer::resolveQualifiedMatcher(Qu
const auto * tuple_data_type = typeid_cast<const DataTypeTuple *>(result_type.get());
if (!tuple_data_type)
throw Exception(ErrorCodes::UNSUPPORTED_METHOD,
"Qualified matcher {} find non compound expression {} with type {}. Expected tuple or array of tuples. In scope {}",
"Qualified matcher {} found a non-compound expression {} with type {}. Expected a tuple or an array of tuples. In scope {}",
matcher_node->formatASTForErrorMessage(),
expression_query_tree_node->formatASTForErrorMessage(),
expression_query_tree_node->getResultType()->getName(),
@ -4124,9 +4124,7 @@ void QueryAnalyzer::resolveInterpolateColumnsNodeList(QueryTreeNodePtr & interpo
auto * column_to_interpolate = interpolate_node_typed.getExpression()->as<IdentifierNode>();
if (!column_to_interpolate)
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"INTERPOLATE can work only for identifiers, but {} is found",
throw Exception(ErrorCodes::LOGICAL_ERROR, "INTERPOLATE can work only for indentifiers, but {} is found",
interpolate_node_typed.getExpression()->formatASTForErrorMessage());
auto column_to_interpolate_name = column_to_interpolate->getIdentifier().getFullName();

View File

@ -226,6 +226,9 @@ add_object_library(clickhouse_storages_windowview Storages/WindowView)
add_object_library(clickhouse_storages_s3queue Storages/ObjectStorageQueue)
add_object_library(clickhouse_storages_materializedview Storages/MaterializedView)
add_object_library(clickhouse_client Client)
# Always compile this file with the highest possible level of optimizations, even in Debug builds.
# https://github.com/ClickHouse/ClickHouse/issues/65745
set_source_files_properties(Client/ClientBaseOptimizedParts.cpp PROPERTIES COMPILE_FLAGS "-O3")
add_object_library(clickhouse_bridge BridgeHelper)
add_object_library(clickhouse_server Server)
add_object_library(clickhouse_server_http Server/HTTP)

View File

@ -70,7 +70,6 @@
#include <boost/algorithm/string/case_conv.hpp>
#include <boost/algorithm/string/replace.hpp>
#include <boost/algorithm/string/split.hpp>
#include <iostream>
#include <filesystem>
#include <limits>
@ -80,6 +79,8 @@
#include <Common/config_version.h>
#include "config.h"
#include <IO/ReadHelpers.h>
#include <Processors/Formats/Impl/ValuesBlockInputFormat.h>
#if USE_GWP_ASAN
# include <Common/GWPAsan.h>
@ -108,7 +109,6 @@ namespace ErrorCodes
extern const int UNEXPECTED_PACKET_FROM_SERVER;
extern const int INVALID_USAGE_OF_INPUT;
extern const int CANNOT_SET_SIGNAL_HANDLER;
extern const int UNRECOGNIZED_ARGUMENTS;
extern const int LOGICAL_ERROR;
extern const int CANNOT_OPEN_FILE;
extern const int FILE_ALREADY_EXISTS;
@ -309,9 +309,16 @@ public:
ClientBase::~ClientBase()
{
writeSignalIDtoSignalPipe(SignalListener::StopThread);
signal_listener_thread.join();
HandledSignals::instance().reset();
try
{
writeSignalIDtoSignalPipe(SignalListener::StopThread);
signal_listener_thread.join();
HandledSignals::instance().reset();
}
catch (...)
{
tryLogCurrentException(__PRETTY_FUNCTION__);
}
}
ClientBase::ClientBase(
@ -471,7 +478,7 @@ void ClientBase::sendExternalTables(ASTPtr parsed_query)
std::vector<ExternalTableDataPtr> data;
for (auto & table : external_tables)
data.emplace_back(table.getData(global_context));
data.emplace_back(table.getData(client_context));
connection->sendExternalTablesData(data);
}
@ -684,10 +691,10 @@ try
/// intermixed with data with parallel formatting.
/// It may increase code complexity significantly.
if (!extras_into_stdout || select_only_into_file)
output_format = global_context->getOutputFormatParallelIfPossible(
output_format = client_context->getOutputFormatParallelIfPossible(
current_format, out_file_buf ? *out_file_buf : *out_buf, block);
else
output_format = global_context->getOutputFormat(
output_format = client_context->getOutputFormat(
current_format, out_file_buf ? *out_file_buf : *out_buf, block);
output_format->setAutoFlush();
@ -739,14 +746,6 @@ void ClientBase::adjustSettings()
/// NOTE: Do not forget to set changed=false to avoid sending it to the server (to avoid breakage read only profiles)
/// In case of multi-query we allow data after semicolon since it will be
/// parsed by the client and interpreted as new query
if (is_multiquery && !global_context->getSettingsRef().input_format_values_allow_data_after_semicolon.changed)
{
settings.input_format_values_allow_data_after_semicolon = true;
settings.input_format_values_allow_data_after_semicolon.changed = false;
}
/// Do not limit pretty format output in case of --pager specified or in case of stdout is not a tty.
if (!pager.empty() || !stdout_is_a_tty)
{
@ -766,6 +765,15 @@ void ClientBase::adjustSettings()
global_context->setSettings(settings);
}
void ClientBase::initClientContext()
{
client_context->setClientName(std::string(DEFAULT_CLIENT_NAME));
client_context->setQuotaClientKey(getClientConfiguration().getString("quota_key", ""));
client_context->setQueryKindInitial();
client_context->setQueryKind(query_kind);
client_context->setQueryParameters(query_parameters);
}
bool ClientBase::isRegularFile(int fd)
{
struct stat file_stat;
@ -956,7 +964,7 @@ void ClientBase::processTextAsSingleQuery(const String & full_query)
/// client-side. Thus we need to parse the query.
const char * begin = full_query.data();
auto parsed_query = parseQuery(begin, begin + full_query.size(),
global_context->getSettingsRef(),
client_context->getSettingsRef(),
/*allow_multi_statements=*/ false);
if (!parsed_query)
@ -979,7 +987,7 @@ void ClientBase::processTextAsSingleQuery(const String & full_query)
/// But for asynchronous inserts we don't extract data, because it's needed
/// to be done on server side in that case (for coalescing the data from multiple inserts on server side).
const auto * insert = parsed_query->as<ASTInsertQuery>();
if (insert && isSyncInsertWithData(*insert, global_context))
if (insert && isSyncInsertWithData(*insert, client_context))
query_to_execute = full_query.substr(0, insert->data - full_query.data());
else
query_to_execute = full_query;
@ -1097,7 +1105,7 @@ void ClientBase::processOrdinaryQuery(const String & query_to_execute, ASTPtr pa
}
}
const auto & settings = global_context->getSettingsRef();
const auto & settings = client_context->getSettingsRef();
const Int32 signals_before_stop = settings.partial_result_on_first_cancel ? 2 : 1;
int retries_left = 10;
@ -1112,10 +1120,10 @@ void ClientBase::processOrdinaryQuery(const String & query_to_execute, ASTPtr pa
connection_parameters.timeouts,
query,
query_parameters,
global_context->getCurrentQueryId(),
client_context->getCurrentQueryId(),
query_processing_stage,
&global_context->getSettingsRef(),
&global_context->getClientInfo(),
&client_context->getSettingsRef(),
&client_context->getClientInfo(),
true,
[&](const Progress & progress) { onProgress(progress); });
@ -1302,7 +1310,7 @@ void ClientBase::onProgress(const Progress & value)
void ClientBase::onTimezoneUpdate(const String & tz)
{
global_context->setSetting("session_timezone", tz);
client_context->setSetting("session_timezone", tz);
}
@ -1498,25 +1506,18 @@ bool ClientBase::receiveSampleBlock(Block & out, ColumnsDescription & columns_de
void ClientBase::setInsertionTable(const ASTInsertQuery & insert_query)
{
if (!global_context->hasInsertionTable() && insert_query.table)
if (!client_context->hasInsertionTable() && insert_query.table)
{
String table = insert_query.table->as<ASTIdentifier &>().shortName();
if (!table.empty())
{
String database = insert_query.database ? insert_query.database->as<ASTIdentifier &>().shortName() : "";
global_context->setInsertionTable(StorageID(database, table));
client_context->setInsertionTable(StorageID(database, table));
}
}
}
void ClientBase::addMultiquery(std::string_view query, Arguments & common_arguments) const
{
common_arguments.emplace_back("--multiquery");
common_arguments.emplace_back("-q");
common_arguments.emplace_back(query);
}
namespace
{
bool isStdinNotEmptyAndValid(ReadBufferFromFileDescriptor & std_in)
@ -1555,7 +1556,7 @@ void ClientBase::processInsertQuery(const String & query_to_execute, ASTPtr pars
const auto & parsed_insert_query = parsed_query->as<ASTInsertQuery &>();
if ((!parsed_insert_query.data && !parsed_insert_query.infile) && (is_interactive || (!stdin_is_a_tty && !isStdinNotEmptyAndValid(std_in))))
{
const auto & settings = global_context->getSettingsRef();
const auto & settings = client_context->getSettingsRef();
if (settings.throw_if_no_data_to_insert)
throw Exception(ErrorCodes::NO_DATA_TO_INSERT, "No data to insert");
else
@ -1569,10 +1570,10 @@ void ClientBase::processInsertQuery(const String & query_to_execute, ASTPtr pars
connection_parameters.timeouts,
query,
query_parameters,
global_context->getCurrentQueryId(),
client_context->getCurrentQueryId(),
query_processing_stage,
&global_context->getSettingsRef(),
&global_context->getClientInfo(),
&client_context->getSettingsRef(),
&client_context->getClientInfo(),
true,
[&](const Progress & progress) { onProgress(progress); });
@ -1620,7 +1621,7 @@ void ClientBase::sendData(Block & sample, const ColumnsDescription & columns_des
/// Set callback to be called on file progress.
if (tty_buf)
progress_indication.setFileProgressCallback(global_context, *tty_buf);
progress_indication.setFileProgressCallback(client_context, *tty_buf);
}
/// If data fetched from file (maybe compressed file)
@ -1654,10 +1655,10 @@ void ClientBase::sendData(Block & sample, const ColumnsDescription & columns_des
}
StorageFile::CommonArguments args{
WithContext(global_context),
WithContext(client_context),
parsed_insert_query->table_id,
current_format,
getFormatSettings(global_context),
getFormatSettings(client_context),
compression_method,
columns_for_storage_file,
ConstraintsDescription{},
@ -1665,7 +1666,7 @@ void ClientBase::sendData(Block & sample, const ColumnsDescription & columns_des
{},
String{},
};
StoragePtr storage = std::make_shared<StorageFile>(in_file, global_context->getUserFilesPath(), args);
StoragePtr storage = std::make_shared<StorageFile>(in_file, client_context->getUserFilesPath(), args);
storage->startup();
SelectQueryInfo query_info;
@ -1676,16 +1677,16 @@ void ClientBase::sendData(Block & sample, const ColumnsDescription & columns_des
storage->read(
plan,
sample.getNames(),
storage->getStorageSnapshot(metadata, global_context),
storage->getStorageSnapshot(metadata, client_context),
query_info,
global_context,
client_context,
{},
global_context->getSettingsRef().max_block_size,
client_context->getSettingsRef().max_block_size,
getNumberOfPhysicalCPUCores());
auto builder = plan.buildQueryPipeline(
QueryPlanOptimizationSettings::fromContext(global_context),
BuildQueryPipelineSettings::fromContext(global_context));
QueryPlanOptimizationSettings::fromContext(client_context),
BuildQueryPipelineSettings::fromContext(client_context));
QueryPlanResourceHolder resources;
auto pipe = QueryPipelineBuilder::getPipe(std::move(*builder), resources);
@ -1746,14 +1747,14 @@ void ClientBase::sendDataFrom(ReadBuffer & buf, Block & sample, const ColumnsDes
current_format = insert->format;
}
auto source = global_context->getInputFormat(current_format, buf, sample, insert_format_max_block_size);
auto source = client_context->getInputFormat(current_format, buf, sample, insert_format_max_block_size);
Pipe pipe(source);
if (columns_description.hasDefaults())
{
pipe.addSimpleTransform([&](const Block & header)
{
return std::make_shared<AddingDefaultsTransform>(header, columns_description, *source, global_context);
return std::make_shared<AddingDefaultsTransform>(header, columns_description, *source, client_context);
});
}
@ -1915,12 +1916,12 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin
if (is_interactive)
{
global_context->setCurrentQueryId("");
client_context->setCurrentQueryId("");
// Generate a new query_id
for (const auto & query_id_format : query_id_formats)
{
writeString(query_id_format.first, std_out);
writeString(fmt::format(fmt::runtime(query_id_format.second), fmt::arg("query_id", global_context->getCurrentQueryId())), std_out);
writeString(fmt::format(fmt::runtime(query_id_format.second), fmt::arg("query_id", client_context->getCurrentQueryId())), std_out);
writeChar('\n', std_out);
std_out.next();
}
@ -1947,7 +1948,7 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin
auto password = auth_data->getPassword();
if (password)
global_context->getAccessControl().checkPasswordComplexityRules(*password);
client_context->getAccessControl().checkPasswordComplexityRules(*password);
}
}
}
@ -1962,15 +1963,15 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin
std::optional<Settings> old_settings;
SCOPE_EXIT_SAFE({
if (old_settings)
global_context->setSettings(*old_settings);
client_context->setSettings(*old_settings);
});
auto apply_query_settings = [&](const IAST & settings_ast)
{
if (!old_settings)
old_settings.emplace(global_context->getSettingsRef());
global_context->applySettingsChanges(settings_ast.as<ASTSetQuery>()->changes);
global_context->resetSettingsToDefaultValue(settings_ast.as<ASTSetQuery>()->default_settings);
old_settings.emplace(client_context->getSettingsRef());
client_context->applySettingsChanges(settings_ast.as<ASTSetQuery>()->changes);
client_context->resetSettingsToDefaultValue(settings_ast.as<ASTSetQuery>()->default_settings);
};
const auto * insert = parsed_query->as<ASTInsertQuery>();
@ -2003,7 +2004,7 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin
if (insert && insert->select)
insert->tryFindInputFunction(input_function);
bool is_async_insert_with_inlined_data = global_context->getSettingsRef().async_insert && insert && insert->hasInlinedData();
bool is_async_insert_with_inlined_data = client_context->getSettingsRef().async_insert && insert && insert->hasInlinedData();
if (is_async_insert_with_inlined_data)
{
@ -2038,9 +2039,9 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin
if (change.name == "profile")
current_profile = change.value.safeGet<String>();
else
global_context->applySettingChange(change);
client_context->applySettingChange(change);
}
global_context->resetSettingsToDefaultValue(set_query->default_settings);
client_context->resetSettingsToDefaultValue(set_query->default_settings);
/// Query parameters inside SET queries should be also saved on the client side
/// to override their previous definitions set with --param_* arguments
@ -2048,7 +2049,7 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin
for (const auto & [name, value] : set_query->query_parameters)
query_parameters.insert_or_assign(name, value);
global_context->addQueryParameters(NameToNameMap{set_query->query_parameters.begin(), set_query->query_parameters.end()});
client_context->addQueryParameters(NameToNameMap{set_query->query_parameters.begin(), set_query->query_parameters.end()});
}
if (const auto * use_query = parsed_query->as<ASTUseQuery>())
{
@ -2125,8 +2126,8 @@ MultiQueryProcessingStage ClientBase::analyzeMultiQueryText(
if (this_query_begin >= all_queries_end)
return MultiQueryProcessingStage::QUERIES_END;
unsigned max_parser_depth = static_cast<unsigned>(global_context->getSettingsRef().max_parser_depth);
unsigned max_parser_backtracks = static_cast<unsigned>(global_context->getSettingsRef().max_parser_backtracks);
unsigned max_parser_depth = static_cast<unsigned>(client_context->getSettingsRef().max_parser_depth);
unsigned max_parser_backtracks = static_cast<unsigned>(client_context->getSettingsRef().max_parser_backtracks);
// If there are only comments left until the end of file, we just
// stop. The parser can't handle this situation because it always
@ -2146,7 +2147,7 @@ MultiQueryProcessingStage ClientBase::analyzeMultiQueryText(
try
{
parsed_query = parseQuery(this_query_end, all_queries_end,
global_context->getSettingsRef(),
client_context->getSettingsRef(),
/*allow_multi_statements=*/ true);
}
catch (const Exception & e)
@ -2171,25 +2172,50 @@ MultiQueryProcessingStage ClientBase::analyzeMultiQueryText(
return MultiQueryProcessingStage::PARSING_FAILED;
}
// INSERT queries may have the inserted data in the query text
// that follow the query itself, e.g. "insert into t format CSV 1;2".
// They need special handling. First of all, here we find where the
// inserted data ends. In multi-query mode, it is delimited by a
// newline.
// The VALUES format needs even more handling - we also allow the
// data to be delimited by semicolon. This case is handled later by
// the format parser itself.
// We can't do multiline INSERTs with inline data, because most
// row input formats (e.g. TSV) can't tell when the input stops,
// unlike VALUES.
// INSERT queries may have the inserted data in the query text that follow the query itself, e.g. "insert into t format CSV 1,2". They
// need special handling.
// - If the INSERT statement FORMAT is VALUES, we use the VALUES format parser to skip the inserted data until we reach the trailing single semicolon.
// - Other formats (e.g. FORMAT CSV) are arbitrarily more complex and tricky to parse. For example, we may be unable to distinguish if the semicolon
// is part of the data or ends the statement. In this case, we simply assume that the end of the INSERT statement is determined by \n\n (two newlines).
auto * insert_ast = parsed_query->as<ASTInsertQuery>();
const char * query_to_execute_end = this_query_end;
if (insert_ast && insert_ast->data)
{
this_query_end = find_first_symbols<'\n'>(insert_ast->data, all_queries_end);
if (insert_ast->format == "Values")
{
// Invoke the VALUES format parser to skip the inserted data
ReadBufferFromMemory data_in(insert_ast->data, all_queries_end - insert_ast->data);
skipBOMIfExists(data_in);
do
{
skipWhitespaceIfAny(data_in);
if (data_in.eof() || *data_in.position() == ';')
break;
}
while (ValuesBlockInputFormat::skipToNextRow(&data_in, 1, 0));
// Handle the case of a comment followed by a semicolon
// Example: INSERT INTO tab VALUES xx; -- {serverError xx}
// If we use this error hint, the next query should not be placed on the same line
this_query_end = insert_ast->data + data_in.count();
const auto * pos_newline = find_first_symbols<'\n'>(this_query_end, all_queries_end);
if (pos_newline != this_query_end)
{
TestHint hint(String(this_query_end, pos_newline - this_query_end));
if (hint.hasClientErrors() || hint.hasServerErrors())
this_query_end = pos_newline;
}
}
else
{
// Handling of generic formats
auto pos_newline = String(insert_ast->data, all_queries_end).find("\n\n");
if (pos_newline != std::string::npos)
this_query_end = insert_ast->data + pos_newline;
else
this_query_end = all_queries_end;
}
insert_ast->end = this_query_end;
query_to_execute_end = isSyncInsertWithData(*insert_ast, global_context) ? insert_ast->data : this_query_end;
query_to_execute_end = isSyncInsertWithData(*insert_ast, client_context) ? insert_ast->data : this_query_end;
}
query_to_execute = all_queries_text.substr(this_query_begin - all_queries_text.data(), query_to_execute_end - this_query_begin);
@ -2222,7 +2248,10 @@ bool ClientBase::executeMultiQuery(const String & all_queries_text)
size_t test_tags_length = getTestTagsLength(all_queries_text);
/// Several queries separated by ';'.
/// INSERT data is ended by the end of line, not ';'.
/// INSERT data is ended by the empty line (\n\n), not ';'.
/// Unnecessary semicolons may cause data to be parsed containing ';'
/// e.g. 'insert into xx format csv val;' will insert "val;" instead of "val"
/// 'insert into xx format csv val\n;' will insert "val" and ";"
/// An exception is VALUES format where we also support semicolon in
/// addition to end of line.
const char * this_query_begin = all_queries_text.data() + test_tags_length;
@ -2233,6 +2262,8 @@ bool ClientBase::executeMultiQuery(const String & all_queries_text)
String query_to_execute;
ASTPtr parsed_query;
std::unique_ptr<Exception> current_exception;
size_t retries_count = 0;
bool is_first = true;
while (true)
{
@ -2241,16 +2272,24 @@ bool ClientBase::executeMultiQuery(const String & all_queries_text)
switch (stage)
{
case MultiQueryProcessingStage::QUERIES_END:
{
/// Compatible with old version when run interactive, e.g. "", "\ld"
if (is_first && is_interactive)
processTextAsSingleQuery(all_queries_text);
return true;
}
case MultiQueryProcessingStage::PARSING_FAILED:
{
return true;
}
case MultiQueryProcessingStage::CONTINUE_PARSING:
{
is_first = false;
continue;
}
case MultiQueryProcessingStage::PARSING_EXCEPTION:
{
is_first = false;
this_query_end = find_first_symbols<'\n'>(this_query_end, all_queries_end);
// Try to find test hint for syntax error. We don't know where
@ -2280,6 +2319,7 @@ bool ClientBase::executeMultiQuery(const String & all_queries_text)
}
case MultiQueryProcessingStage::EXECUTE_QUERY:
{
is_first = false;
full_query = all_queries_text.substr(this_query_begin - all_queries_text.data(), this_query_end - this_query_begin);
if (query_fuzzer_runs)
{
@ -2289,6 +2329,8 @@ bool ClientBase::executeMultiQuery(const String & all_queries_text)
this_query_begin = this_query_end;
continue;
}
if (suggest)
updateSuggest(parsed_query);
// Now we know for sure where the query ends.
// Look for the hint in the text of query + insert data + trailing
@ -2314,7 +2356,12 @@ bool ClientBase::executeMultiQuery(const String & all_queries_text)
// Check whether the error (or its absence) matches the test hints
// (or their absence).
bool error_matches_hint = true;
if (have_error)
bool need_retry = test_hint.needRetry(server_exception, &retries_count);
if (need_retry)
{
std::this_thread::sleep_for(std::chrono::seconds(1));
}
else if (have_error)
{
if (test_hint.hasServerErrors())
{
@ -2391,13 +2438,13 @@ bool ClientBase::executeMultiQuery(const String & all_queries_text)
// , where the inline data is delimited by semicolon and not by a
// newline.
auto * insert_ast = parsed_query->as<ASTInsertQuery>();
if (insert_ast && isSyncInsertWithData(*insert_ast, global_context))
if (insert_ast && isSyncInsertWithData(*insert_ast, client_context))
{
this_query_end = insert_ast->end;
adjustQueryEnd(
this_query_end, all_queries_end,
static_cast<unsigned>(global_context->getSettingsRef().max_parser_depth),
static_cast<unsigned>(global_context->getSettingsRef().max_parser_backtracks));
static_cast<unsigned>(client_context->getSettingsRef().max_parser_depth),
static_cast<unsigned>(client_context->getSettingsRef().max_parser_backtracks));
}
// Report error.
@ -2408,7 +2455,8 @@ bool ClientBase::executeMultiQuery(const String & all_queries_text)
if (have_error && !ignore_error)
return is_interactive;
this_query_begin = this_query_end;
if (!need_retry)
this_query_begin = this_query_end;
break;
}
}
@ -2433,14 +2481,6 @@ bool ClientBase::processQueryText(const String & text)
return processMultiQueryFromFile(file_name);
}
if (!is_multiquery)
{
assert(!query_fuzzer_runs);
processTextAsSingleQuery(text);
return true;
}
if (query_fuzzer_runs)
{
processWithFuzzing(text);
@ -2527,10 +2567,10 @@ void ClientBase::runInteractive()
if (load_suggestions)
{
/// Load suggestion data from the server.
if (global_context->getApplicationType() == Context::ApplicationType::CLIENT)
suggest->load<Connection>(global_context, connection_parameters, getClientConfiguration().getInt("suggestion_limit"), wait_for_suggestions_to_load);
else if (global_context->getApplicationType() == Context::ApplicationType::LOCAL)
suggest->load<LocalConnection>(global_context, connection_parameters, getClientConfiguration().getInt("suggestion_limit"), wait_for_suggestions_to_load);
if (client_context->getApplicationType() == Context::ApplicationType::CLIENT)
suggest->load<Connection>(client_context, connection_parameters, getClientConfiguration().getInt("suggestion_limit"), wait_for_suggestions_to_load);
else if (client_context->getApplicationType() == Context::ApplicationType::LOCAL)
suggest->load<LocalConnection>(client_context, connection_parameters, getClientConfiguration().getInt("suggestion_limit"), wait_for_suggestions_to_load);
}
if (home_path.empty())
@ -2668,7 +2708,7 @@ void ClientBase::runInteractive()
{
// If a separate connection loading suggestions failed to open a new session,
// use the main session to receive them.
suggest->load(*connection, connection_parameters.timeouts, getClientConfiguration().getInt("suggestion_limit"), global_context->getClientInfo());
suggest->load(*connection, connection_parameters.timeouts, getClientConfiguration().getInt("suggestion_limit"), client_context->getClientInfo());
}
try
@ -2717,10 +2757,10 @@ bool ClientBase::processMultiQueryFromFile(const String & file_name)
if (!getClientConfiguration().has("log_comment"))
{
Settings settings = global_context->getSettings();
Settings settings = client_context->getSettings();
/// NOTE: cannot use even weakly_canonical() since it fails for /dev/stdin due to resolving of "pipe:[X]"
settings.log_comment = fs::absolute(fs::path(file_name));
global_context->setSettings(settings);
client_context->setSettings(settings);
}
return executeMultiQuery(queries_from_file);
@ -2833,168 +2873,6 @@ void ClientBase::showClientVersion()
output_stream << VERSION_NAME << " " + getName() + " version " << VERSION_STRING << VERSION_OFFICIAL << "." << std::endl;
}
namespace
{
/// Define transparent hash to we can use
/// std::string_view with the containers
struct TransparentStringHash
{
using is_transparent = void;
size_t operator()(std::string_view txt) const
{
return std::hash<std::string_view>{}(txt);
}
};
/*
* This functor is used to parse command line arguments and replace dashes with underscores,
* allowing options to be specified using either dashes or underscores.
*/
class OptionsAliasParser
{
public:
explicit OptionsAliasParser(const boost::program_options::options_description& options)
{
options_names.reserve(options.options().size());
for (const auto& option : options.options())
options_names.insert(option->long_name());
}
/*
* Parses arguments by replacing dashes with underscores, and matches the resulting name with known options
* Implements boost::program_options::ext_parser logic
*/
std::pair<std::string, std::string> operator()(const std::string & token) const
{
if (!token.starts_with("--"))
return {};
std::string arg = token.substr(2);
// divide token by '=' to separate key and value if options style=long_allow_adjacent
auto pos_eq = arg.find('=');
std::string key = arg.substr(0, pos_eq);
if (options_names.contains(key))
// option does not require any changes, because it is already correct
return {};
std::replace(key.begin(), key.end(), '-', '_');
if (!options_names.contains(key))
// after replacing '-' with '_' argument is still unknown
return {};
std::string value;
if (pos_eq != std::string::npos && pos_eq < arg.size())
value = arg.substr(pos_eq + 1);
return {key, value};
}
private:
std::unordered_set<std::string> options_names;
};
}
/// Enable optimizations even in debug builds because otherwise options parsing becomes extremely slow affecting .sh tests
#if defined(__clang__)
#pragma clang optimize on
#endif
void ClientBase::parseAndCheckOptions(OptionsDescription & options_description, po::variables_map & options, Arguments & arguments)
{
if (allow_repeated_settings)
addProgramOptionsAsMultitokens(cmd_settings, options_description.main_description.value());
else
addProgramOptions(cmd_settings, options_description.main_description.value());
if (allow_merge_tree_settings)
{
/// Add merge tree settings manually, because names of some settings
/// may clash. Query settings have higher priority and we just
/// skip ambiguous merge tree settings.
auto & main_options = options_description.main_description.value();
std::unordered_set<std::string, TransparentStringHash, std::equal_to<>> main_option_names;
for (const auto & option : main_options.options())
main_option_names.insert(option->long_name());
for (const auto & setting : cmd_merge_tree_settings.all())
{
const auto add_setting = [&](const std::string_view name)
{
if (auto it = main_option_names.find(name); it != main_option_names.end())
return;
if (allow_repeated_settings)
addProgramOptionAsMultitoken(cmd_merge_tree_settings, main_options, name, setting);
else
addProgramOption(cmd_merge_tree_settings, main_options, name, setting);
};
const auto & setting_name = setting.getName();
add_setting(setting_name);
const auto & settings_to_aliases = MergeTreeSettings::Traits::settingsToAliases();
if (auto it = settings_to_aliases.find(setting_name); it != settings_to_aliases.end())
{
for (const auto alias : it->second)
{
add_setting(alias);
}
}
}
}
/// Parse main commandline options.
auto parser = po::command_line_parser(arguments)
.options(options_description.main_description.value())
.extra_parser(OptionsAliasParser(options_description.main_description.value()))
.allow_unregistered();
po::parsed_options parsed = parser.run();
/// Check unrecognized options without positional options.
auto unrecognized_options = po::collect_unrecognized(parsed.options, po::collect_unrecognized_mode::exclude_positional);
if (!unrecognized_options.empty())
{
auto hints = this->getHints(unrecognized_options[0]);
if (!hints.empty())
throw Exception(ErrorCodes::UNRECOGNIZED_ARGUMENTS, "Unrecognized option '{}'. Maybe you meant {}",
unrecognized_options[0], toString(hints));
throw Exception(ErrorCodes::UNRECOGNIZED_ARGUMENTS, "Unrecognized option '{}'", unrecognized_options[0]);
}
/// Check positional options.
for (const auto & op : parsed.options)
{
if (!op.unregistered && op.string_key.empty() && !op.original_tokens[0].starts_with("--")
&& !op.original_tokens[0].empty() && !op.value.empty())
{
/// Two special cases for better usability:
/// - if the option contains a whitespace, it might be a query: clickhouse "SELECT 1"
/// These are relevant for interactive usage - user-friendly, but questionable in general.
/// In case of ambiguity or for scripts, prefer using proper options.
const auto & token = op.original_tokens[0];
po::variable_value value(boost::any(op.value), false);
const char * option;
if (token.contains(' '))
option = "query";
else
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Positional option `{}` is not supported.", token);
if (!options.emplace(option, value).second)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Positional option `{}` is not supported.", token);
}
}
po::store(parsed, options);
}
void ClientBase::init(int argc, char ** argv)
{
namespace po = boost::program_options;
@ -3040,9 +2918,9 @@ void ClientBase::init(int argc, char ** argv)
("config-file,C", po::value<std::string>(), "config-file path")
("query,q", po::value<std::vector<std::string>>()->multitoken(), R"(query; can be specified multiple times (--query "SELECT 1" --query "SELECT 2"...))")
("query,q", po::value<std::vector<std::string>>()->multitoken(), R"(Query. Can be specified multiple times (--query "SELECT 1" --query "SELECT 2") or once with multiple comma-separated queries (--query "SELECT 1; SELECT 2;"). In the latter case, INSERT queries with non-VALUE format must be separated by empty lines.)")
("queries-file", po::value<std::vector<std::string>>()->multitoken(), "file path with queries to execute; multiple files can be specified (--queries-file file1 file2...)")
("multiquery,n", "If specified, multiple queries separated by semicolons can be listed after --query. For convenience, it is also possible to omit --query and pass the queries directly after --multiquery.")
("multiquery,n", "Obsolete, does nothing")
("multiline,m", "If specified, allow multiline queries (do not send the query on Enter)")
("database,d", po::value<std::string>(), "database")
("query_kind", po::value<std::string>()->default_value("initial_query"), "One of initial_query/secondary_query/no_query")
@ -3071,7 +2949,7 @@ void ClientBase::init(int argc, char ** argv)
("vertical,E", "vertical output format, same as --format=Vertical or FORMAT Vertical or \\G at end of command")
("highlight", po::value<bool>()->default_value(true), "enable or disable basic syntax highlight in interactive command line")
("ignore-error", "do not stop processing in multiquery mode")
("ignore-error", "do not stop processing when an error occurs")
("stacktrace", "print stack traces of exceptions")
("hardware-utilization", "print hardware utilization information in progress bar")
("print-profile-events", po::value(&profile_events.print)->zero_tokens(), "Printing ProfileEvents packets")
@ -3164,8 +3042,6 @@ void ClientBase::init(int argc, char ** argv)
queries_files = options["queries-file"].as<std::vector<std::string>>();
if (options.count("multiline"))
getClientConfiguration().setBool("multiline", true);
if (options.count("multiquery"))
getClientConfiguration().setBool("multiquery", true);
if (options.count("ignore-error"))
getClientConfiguration().setBool("ignore-error", true);
if (options.count("format"))

View File

@ -6,7 +6,6 @@
#include <Common/ProgressIndication.h>
#include <Common/InterruptListener.h>
#include <Common/ShellCommand.h>
#include <Common/QueryFuzzer.h>
#include <Common/Stopwatch.h>
#include <Common/DNSResolver.h>
#include <Core/ExternalTable.h>
@ -17,6 +16,7 @@
#include <Poco/SplitterChannel.h>
#include <Interpreters/Context.h>
#include <Client/Suggest.h>
#include <Client/QueryFuzzer.h>
#include <boost/program_options.hpp>
#include <Storages/StorageFile.h>
#include <Storages/SelectQueryInfo.h>
@ -156,8 +156,6 @@ protected:
void setInsertionTable(const ASTInsertQuery & insert_query);
void addMultiquery(std::string_view query, Arguments & common_arguments) const;
private:
void receiveResult(ASTPtr parsed_query, Int32 signals_before_stop, bool partial_result_on_first_cancel);
bool receiveAndProcessPacket(ASTPtr parsed_query, bool cancelled_);
@ -206,6 +204,9 @@ protected:
/// Adjust some settings after command line options and config had been processed.
void adjustSettings();
/// Initializes the client context.
void initClientContext();
void setDefaultFormatsAndCompressionFromConfiguration();
void initTTYBuffer(ProgressOption progress);
@ -215,6 +216,9 @@ protected:
SharedContextHolder shared_context;
ContextMutablePtr global_context;
/// Client context is a context used only by the client to parse queries, process query parameters and to connect to clickhouse-server.
ContextMutablePtr client_context;
LoggerPtr fatal_log;
Poco::AutoPtr<Poco::SplitterChannel> fatal_channel_ptr;
Poco::AutoPtr<Poco::Channel> fatal_console_channel_ptr;
@ -223,7 +227,6 @@ protected:
std::unique_ptr<Poco::Runnable> signal_listener;
bool is_interactive = false; /// Use either interactive line editing interface or batch mode.
bool is_multiquery = false;
bool delayed_interactive = false;
bool echo_queries = false; /// Print queries before execution in batch mode.

View File

@ -0,0 +1,176 @@
#include <Client/ClientBase.h>
#include <Core/BaseSettingsProgramOptions.h>
namespace DB
{
/**
* Program options parsing is very slow in debug builds and it affects .sh tests
* causing them to timeout sporadically.
* It seems impossible to enable optimizations for a single function (only to disable them), so
* instead we extract the code to a separate source file and compile it with different options.
*/
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
extern const int UNRECOGNIZED_ARGUMENTS;
}
namespace
{
/// Define transparent hash to we can use
/// std::string_view with the containers
struct TransparentStringHash
{
using is_transparent = void;
size_t operator()(std::string_view txt) const
{
return std::hash<std::string_view>{}(txt);
}
};
/*
* This functor is used to parse command line arguments and replace dashes with underscores,
* allowing options to be specified using either dashes or underscores.
*/
class OptionsAliasParser
{
public:
explicit OptionsAliasParser(const boost::program_options::options_description& options)
{
options_names.reserve(options.options().size());
for (const auto& option : options.options())
options_names.insert(option->long_name());
}
/*
* Parses arguments by replacing dashes with underscores, and matches the resulting name with known options
* Implements boost::program_options::ext_parser logic
*/
std::pair<std::string, std::string> operator()(const std::string & token) const
{
if (!token.starts_with("--"))
return {};
std::string arg = token.substr(2);
// divide token by '=' to separate key and value if options style=long_allow_adjacent
auto pos_eq = arg.find('=');
std::string key = arg.substr(0, pos_eq);
if (options_names.contains(key))
// option does not require any changes, because it is already correct
return {};
std::replace(key.begin(), key.end(), '-', '_');
if (!options_names.contains(key))
// after replacing '-' with '_' argument is still unknown
return {};
std::string value;
if (pos_eq != std::string::npos && pos_eq < arg.size())
value = arg.substr(pos_eq + 1);
return {key, value};
}
private:
std::unordered_set<std::string> options_names;
};
}
void ClientBase::parseAndCheckOptions(OptionsDescription & options_description, po::variables_map & options, Arguments & arguments)
{
if (allow_repeated_settings)
addProgramOptionsAsMultitokens(cmd_settings, options_description.main_description.value());
else
addProgramOptions(cmd_settings, options_description.main_description.value());
if (allow_merge_tree_settings)
{
/// Add merge tree settings manually, because names of some settings
/// may clash. Query settings have higher priority and we just
/// skip ambiguous merge tree settings.
auto & main_options = options_description.main_description.value();
std::unordered_set<std::string, TransparentStringHash, std::equal_to<>> main_option_names;
for (const auto & option : main_options.options())
main_option_names.insert(option->long_name());
for (const auto & setting : cmd_merge_tree_settings.all())
{
const auto add_setting = [&](const std::string_view name)
{
if (auto it = main_option_names.find(name); it != main_option_names.end())
return;
if (allow_repeated_settings)
addProgramOptionAsMultitoken(cmd_merge_tree_settings, main_options, name, setting);
else
addProgramOption(cmd_merge_tree_settings, main_options, name, setting);
};
const auto & setting_name = setting.getName();
add_setting(setting_name);
const auto & settings_to_aliases = MergeTreeSettings::Traits::settingsToAliases();
if (auto it = settings_to_aliases.find(setting_name); it != settings_to_aliases.end())
{
for (const auto alias : it->second)
{
add_setting(alias);
}
}
}
}
/// Parse main commandline options.
auto parser = po::command_line_parser(arguments)
.options(options_description.main_description.value())
.extra_parser(OptionsAliasParser(options_description.main_description.value()))
.allow_unregistered();
po::parsed_options parsed = parser.run();
/// Check unrecognized options without positional options.
auto unrecognized_options = po::collect_unrecognized(parsed.options, po::collect_unrecognized_mode::exclude_positional);
if (!unrecognized_options.empty())
{
auto hints = this->getHints(unrecognized_options[0]);
if (!hints.empty())
throw Exception(ErrorCodes::UNRECOGNIZED_ARGUMENTS, "Unrecognized option '{}'. Maybe you meant {}",
unrecognized_options[0], toString(hints));
throw Exception(ErrorCodes::UNRECOGNIZED_ARGUMENTS, "Unrecognized option '{}'", unrecognized_options[0]);
}
/// Check positional options.
for (const auto & op : parsed.options)
{
if (!op.unregistered && op.string_key.empty() && !op.original_tokens[0].starts_with("--")
&& !op.original_tokens[0].empty() && !op.value.empty())
{
/// Two special cases for better usability:
/// - if the option contains a whitespace, it might be a query: clickhouse "SELECT 1"
/// These are relevant for interactive usage - user-friendly, but questionable in general.
/// In case of ambiguity or for scripts, prefer using proper options.
const auto & token = op.original_tokens[0];
po::variable_value value(boost::any(op.value), false);
const char * option;
if (token.contains(' '))
option = "query";
else
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Positional option `{}` is not supported.", token);
if (!options.emplace(option, value).second)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Positional option `{}` is not supported.", token);
}
}
po::store(parsed, options);
}
}

View File

@ -68,21 +68,22 @@ Field QueryFuzzer::getRandomField(int type)
{
case 0:
{
return bad_int64_values[fuzz_rand() % std::size(bad_int64_values)];
return bad_int64_values[fuzz_rand() % (sizeof(bad_int64_values)
/ sizeof(*bad_int64_values))];
}
case 1:
{
static constexpr double values[]
= {NAN, INFINITY, -INFINITY, 0., -0., 0.0001, 0.5, 0.9999,
1., 1.0001, 2., 10.0001, 100.0001, 1000.0001, 1e10, 1e20,
FLT_MIN, FLT_MIN + FLT_EPSILON, FLT_MAX, FLT_MAX + FLT_EPSILON}; return values[fuzz_rand() % std::size(values)];
FLT_MIN, FLT_MIN + FLT_EPSILON, FLT_MAX, FLT_MAX + FLT_EPSILON}; return values[fuzz_rand() % (sizeof(values) / sizeof(*values))];
}
case 2:
{
static constexpr UInt64 scales[] = {0, 1, 2, 10};
return DecimalField<Decimal64>(
bad_int64_values[fuzz_rand() % std::size(bad_int64_values)],
static_cast<UInt32>(scales[fuzz_rand() % std::size(scales)])
bad_int64_values[fuzz_rand() % (sizeof(bad_int64_values) / sizeof(*bad_int64_values))],
static_cast<UInt32>(scales[fuzz_rand() % (sizeof(scales) / sizeof(*scales))])
);
}
default:
@ -164,8 +165,7 @@ Field QueryFuzzer::fuzzField(Field field)
{
size_t pos = fuzz_rand() % arr.size();
arr.erase(arr.begin() + pos);
if (debug_stream)
*debug_stream << "erased\n";
std::cerr << "erased\n";
}
if (fuzz_rand() % 5 == 0)
@ -174,14 +174,12 @@ Field QueryFuzzer::fuzzField(Field field)
{
size_t pos = fuzz_rand() % arr.size();
arr.insert(arr.begin() + pos, fuzzField(arr[pos]));
if (debug_stream)
*debug_stream << fmt::format("inserted (pos {})\n", pos);
std::cerr << fmt::format("inserted (pos {})\n", pos);
}
else
{
arr.insert(arr.begin(), getRandomField(0));
if (debug_stream)
*debug_stream << "inserted (0)\n";
std::cerr << "inserted (0)\n";
}
}
@ -199,9 +197,7 @@ Field QueryFuzzer::fuzzField(Field field)
{
size_t pos = fuzz_rand() % arr.size();
arr.erase(arr.begin() + pos);
if (debug_stream)
*debug_stream << "erased\n";
std::cerr << "erased\n";
}
if (fuzz_rand() % 5 == 0)
@ -210,16 +206,12 @@ Field QueryFuzzer::fuzzField(Field field)
{
size_t pos = fuzz_rand() % arr.size();
arr.insert(arr.begin() + pos, fuzzField(arr[pos]));
if (debug_stream)
*debug_stream << fmt::format("inserted (pos {})\n", pos);
std::cerr << fmt::format("inserted (pos {})\n", pos);
}
else
{
arr.insert(arr.begin(), getRandomField(0));
if (debug_stream)
*debug_stream << "inserted (0)\n";
std::cerr << "inserted (0)\n";
}
}
@ -352,8 +344,7 @@ void QueryFuzzer::fuzzOrderByList(IAST * ast)
}
else
{
if (debug_stream)
*debug_stream << "No random column.\n";
std::cerr << "No random column.\n";
}
}
@ -387,8 +378,7 @@ void QueryFuzzer::fuzzColumnLikeExpressionList(IAST * ast)
if (col)
impl->children.insert(pos, col);
else
if (debug_stream)
*debug_stream << "No random column.\n";
std::cerr << "No random column.\n";
}
// We don't have to recurse here to fuzz the children, this is handled by
@ -1371,15 +1361,11 @@ void QueryFuzzer::fuzzMain(ASTPtr & ast)
collectFuzzInfoMain(ast);
fuzz(ast);
if (out_stream)
{
*out_stream << std::endl;
WriteBufferFromOStream ast_buf(*out_stream, 4096);
formatAST(*ast, ast_buf, false /*highlight*/);
ast_buf.finalize();
*out_stream << std::endl << std::endl;
}
std::cout << std::endl;
WriteBufferFromOStream ast_buf(std::cout, 4096);
formatAST(*ast, ast_buf, false /*highlight*/);
ast_buf.finalize();
std::cout << std::endl << std::endl;
}
}

View File

@ -35,31 +35,9 @@ struct ASTWindowDefinition;
* queries, so you want to feed it a lot of queries to get some interesting mix
* of them. Normally we feed SQL regression tests to it.
*/
class QueryFuzzer
struct QueryFuzzer
{
public:
explicit QueryFuzzer(pcg64 fuzz_rand_ = randomSeed(), std::ostream * out_stream_ = nullptr, std::ostream * debug_stream_ = nullptr)
: fuzz_rand(fuzz_rand_)
, out_stream(out_stream_)
, debug_stream(debug_stream_)
{
}
// This is the only function you have to call -- it will modify the passed
// ASTPtr to point to new AST with some random changes.
void fuzzMain(ASTPtr & ast);
ASTs getInsertQueriesForFuzzedTables(const String & full_query);
ASTs getDropQueriesForFuzzedTables(const ASTDropQuery & drop_query);
void notifyQueryFailed(ASTPtr ast);
static bool isSuitableForFuzzing(const ASTCreateQuery & create);
private:
pcg64 fuzz_rand;
std::ostream * out_stream = nullptr;
std::ostream * debug_stream = nullptr;
pcg64 fuzz_rand{randomSeed()};
// We add elements to expression lists with fixed probability. Some elements
// are so large, that the expected number of elements we add to them is
@ -88,6 +66,10 @@ private:
std::unordered_map<std::string, size_t> index_of_fuzzed_table;
std::set<IAST::Hash> created_tables_hashes;
// This is the only function you have to call -- it will modify the passed
// ASTPtr to point to new AST with some random changes.
void fuzzMain(ASTPtr & ast);
// Various helper functions follow, normally you shouldn't have to call them.
Field getRandomField(int type);
Field fuzzField(Field field);
@ -95,6 +77,9 @@ private:
ASTPtr getRandomExpressionList();
DataTypePtr fuzzDataType(DataTypePtr type);
DataTypePtr getRandomType();
ASTs getInsertQueriesForFuzzedTables(const String & full_query);
ASTs getDropQueriesForFuzzedTables(const ASTDropQuery & drop_query);
void notifyQueryFailed(ASTPtr ast);
void replaceWithColumnLike(ASTPtr & ast);
void replaceWithTableLike(ASTPtr & ast);
void fuzzOrderByElement(ASTOrderByElement * elem);
@ -117,6 +102,8 @@ private:
void addTableLike(ASTPtr ast);
void addColumnLike(ASTPtr ast);
void collectFuzzInfoRecurse(ASTPtr ast);
static bool isSuitableForFuzzing(const ASTCreateQuery & create);
};
}

View File

@ -10,6 +10,7 @@
namespace DB::ErrorCodes
{
extern const int CANNOT_PARSE_TEXT;
extern const int OK;
}
namespace DB
@ -62,9 +63,28 @@ bool TestHint::hasExpectedServerError(int error)
return std::find(server_errors.begin(), server_errors.end(), error) != server_errors.end();
}
bool TestHint::needRetry(const std::unique_ptr<Exception> & server_exception, size_t * retries_counter)
{
chassert(retries_counter);
if (max_retries <= *retries_counter)
return false;
++*retries_counter;
int error = ErrorCodes::OK;
if (server_exception)
error = server_exception->code();
if (retry_until)
return !hasExpectedServerError(error); /// retry until we get the expected error
else
return hasExpectedServerError(error); /// retry while we have the expected error
}
void TestHint::parse(Lexer & comment_lexer, bool is_leading_hint)
{
std::unordered_set<std::string_view> commands{"echo", "echoOn", "echoOff"};
std::unordered_set<std::string_view> commands{"echo", "echoOn", "echoOff", "retry"};
std::unordered_set<std::string_view> command_errors{
"serverError",
@ -73,6 +93,9 @@ void TestHint::parse(Lexer & comment_lexer, bool is_leading_hint)
for (Token token = comment_lexer.nextToken(); !token.isEnd(); token = comment_lexer.nextToken())
{
if (token.type == TokenType::Whitespace)
continue;
String item = String(token.begin, token.end);
if (token.type == TokenType::BareWord && commands.contains(item))
{
@ -82,6 +105,30 @@ void TestHint::parse(Lexer & comment_lexer, bool is_leading_hint)
echo.emplace(true);
if (item == "echoOff")
echo.emplace(false);
if (item == "retry")
{
token = comment_lexer.nextToken();
while (token.type == TokenType::Whitespace)
token = comment_lexer.nextToken();
if (token.type != TokenType::Number)
throw DB::Exception(DB::ErrorCodes::CANNOT_PARSE_TEXT, "Could not parse the number of retries: {}",
std::string_view(token.begin, token.end));
max_retries = std::stoul(std::string(token.begin, token.end));
token = comment_lexer.nextToken();
while (token.type == TokenType::Whitespace)
token = comment_lexer.nextToken();
if (token.type != TokenType::BareWord ||
(std::string_view(token.begin, token.end) != "until" &&
std::string_view(token.begin, token.end) != "while"))
throw DB::Exception(DB::ErrorCodes::CANNOT_PARSE_TEXT, "Expected 'until' or 'while' after the number of retries, got: {}",
std::string_view(token.begin, token.end));
retry_until = std::string_view(token.begin, token.end) == "until";
}
}
else if (!is_leading_hint && token.type == TokenType::BareWord && command_errors.contains(item))
{
@ -133,6 +180,9 @@ void TestHint::parse(Lexer & comment_lexer, bool is_leading_hint)
break;
}
}
if (max_retries && server_errors.size() != 1)
throw DB::Exception(DB::ErrorCodes::CANNOT_PARSE_TEXT, "Expected one serverError after the 'retry N while|until' command");
}
}

View File

@ -6,6 +6,7 @@
#include <fmt/format.h>
#include <Core/Types.h>
#include <Common/Exception.h>
namespace DB
@ -65,12 +66,17 @@ public:
bool hasExpectedClientError(int error);
bool hasExpectedServerError(int error);
bool needRetry(const std::unique_ptr<Exception> & server_exception, size_t * retries_counter);
private:
const String & query;
ErrorVector server_errors{};
ErrorVector client_errors{};
std::optional<bool> echo;
size_t max_retries = 0;
bool retry_until = false;
void parse(Lexer & comment_lexer, bool is_leading_hint);
bool allErrorsExpected(int actual_server_error, int actual_client_error) const

View File

@ -8,6 +8,7 @@
#include <Common/ErrorCodes.h>
#include <Common/Exception.h>
#include <Common/LockMemoryExceptionInThread.h>
#include <Common/Logger.h>
#include <Common/MemorySanitizer.h>
#include <Common/SensitiveDataMasker.h>
#include <Common/config_version.h>
@ -100,7 +101,7 @@ Exception::Exception(const MessageMasked & msg_masked, int code, bool remote_)
{
if (terminate_on_any_exception)
std::_Exit(terminate_status_code);
capture_thread_frame_pointers = thread_frame_pointers;
capture_thread_frame_pointers = getThreadFramePointers();
handle_error_code(msg_masked.msg, code, remote, getStackFramePointers());
}
@ -110,7 +111,7 @@ Exception::Exception(MessageMasked && msg_masked, int code, bool remote_)
{
if (terminate_on_any_exception)
std::_Exit(terminate_status_code);
capture_thread_frame_pointers = thread_frame_pointers;
capture_thread_frame_pointers = getThreadFramePointers();
handle_error_code(message(), code, remote, getStackFramePointers());
}
@ -119,7 +120,7 @@ Exception::Exception(CreateFromPocoTag, const Poco::Exception & exc)
{
if (terminate_on_any_exception)
std::_Exit(terminate_status_code);
capture_thread_frame_pointers = thread_frame_pointers;
capture_thread_frame_pointers = getThreadFramePointers();
#ifdef STD_EXCEPTION_HAS_STACK_TRACE
auto * stack_trace_frames = exc.get_stack_trace_frames();
auto stack_trace_size = exc.get_stack_trace_size();
@ -133,7 +134,7 @@ Exception::Exception(CreateFromSTDTag, const std::exception & exc)
{
if (terminate_on_any_exception)
std::_Exit(terminate_status_code);
capture_thread_frame_pointers = thread_frame_pointers;
capture_thread_frame_pointers = getThreadFramePointers();
#ifdef STD_EXCEPTION_HAS_STACK_TRACE
auto * stack_trace_frames = exc.get_stack_trace_frames();
auto stack_trace_size = exc.get_stack_trace_size();
@ -223,10 +224,38 @@ Exception::FramePointers Exception::getStackFramePointers() const
}
thread_local bool Exception::enable_job_stack_trace = false;
thread_local std::vector<StackTrace::FramePointers> Exception::thread_frame_pointers = {};
thread_local bool Exception::can_use_thread_frame_pointers = false;
thread_local Exception::ThreadFramePointers Exception::thread_frame_pointers;
Exception::ThreadFramePointers::ThreadFramePointers()
{
can_use_thread_frame_pointers = true;
}
Exception::ThreadFramePointers::~ThreadFramePointers()
{
can_use_thread_frame_pointers = false;
}
Exception::ThreadFramePointersBase Exception::getThreadFramePointers()
{
if (can_use_thread_frame_pointers)
return thread_frame_pointers.frame_pointers;
return {};
}
void Exception::setThreadFramePointers(ThreadFramePointersBase frame_pointers)
{
if (can_use_thread_frame_pointers)
thread_frame_pointers.frame_pointers = std::move(frame_pointers);
}
static void tryLogCurrentExceptionImpl(Poco::Logger * logger, const std::string & start_of_message)
{
if (!isLoggingEnabled())
return;
try
{
PreformattedMessage message = getCurrentExceptionMessageAndPattern(true);
@ -242,6 +271,9 @@ static void tryLogCurrentExceptionImpl(Poco::Logger * logger, const std::string
void tryLogCurrentException(const char * log_name, const std::string & start_of_message)
{
if (!isLoggingEnabled())
return;
/// Under high memory pressure, new allocations throw a
/// MEMORY_LIMIT_EXCEEDED exception.
///

View File

@ -10,7 +10,6 @@
#include <cerrno>
#include <exception>
#include <memory>
#include <vector>
#include <fmt/core.h>
@ -49,14 +48,14 @@ public:
{
if (terminate_on_any_exception)
std::terminate();
capture_thread_frame_pointers = thread_frame_pointers;
capture_thread_frame_pointers = getThreadFramePointers();
}
Exception(const PreformattedMessage & msg, int code): Exception(msg.text, code)
{
if (terminate_on_any_exception)
std::terminate();
capture_thread_frame_pointers = thread_frame_pointers;
capture_thread_frame_pointers = getThreadFramePointers();
message_format_string = msg.format_string;
message_format_string_args = msg.format_string_args;
}
@ -65,18 +64,36 @@ public:
{
if (terminate_on_any_exception)
std::terminate();
capture_thread_frame_pointers = thread_frame_pointers;
capture_thread_frame_pointers = getThreadFramePointers();
message_format_string = msg.format_string;
message_format_string_args = msg.format_string_args;
}
/// Collect call stacks of all previous jobs' schedulings leading to this thread job's execution
static thread_local bool enable_job_stack_trace;
static thread_local std::vector<StackTrace::FramePointers> thread_frame_pointers;
static thread_local bool can_use_thread_frame_pointers;
/// Because of unknown order of static destructor calls,
/// thread_frame_pointers can already be uninitialized when a different destructor generates an exception.
/// To prevent such scenarios, a wrapper class is created and a function that will return empty vector
/// if its destructor is already called
using ThreadFramePointersBase = std::vector<StackTrace::FramePointers>;
struct ThreadFramePointers
{
ThreadFramePointers();
~ThreadFramePointers();
ThreadFramePointersBase frame_pointers;
};
static ThreadFramePointersBase getThreadFramePointers();
static void setThreadFramePointers(ThreadFramePointersBase frame_pointers);
/// Callback for any exception
static std::function<void(const std::string & msg, int code, bool remote, const Exception::FramePointers & trace)> callback;
protected:
static thread_local ThreadFramePointers thread_frame_pointers;
// used to remove the sensitive information from exceptions if query_masking_rules is configured
struct MessageMasked
{
@ -178,7 +195,7 @@ class ErrnoException : public Exception
public:
ErrnoException(std::string && msg, int code, int with_errno) : Exception(msg, code), saved_errno(with_errno)
{
capture_thread_frame_pointers = thread_frame_pointers;
capture_thread_frame_pointers = getThreadFramePointers();
addMessage(", {}", errnoToString(saved_errno));
}
@ -187,7 +204,7 @@ public:
requires std::is_convertible_v<T, String>
ErrnoException(int code, T && message) : Exception(message, code), saved_errno(errno)
{
capture_thread_frame_pointers = thread_frame_pointers;
capture_thread_frame_pointers = getThreadFramePointers();
addMessage(", {}", errnoToString(saved_errno));
}

View File

@ -59,6 +59,7 @@ static struct InitFiu
ONCE(execute_query_calling_empty_set_result_func_on_exception) \
ONCE(receive_timeout_on_table_status_response) \
REGULAR(keepermap_fail_drop_data) \
REGULAR(lazy_pipe_fds_fail_close) \
namespace FailPoints

View File

@ -25,3 +25,15 @@ bool hasLogger(const std::string & name)
{
return Poco::Logger::has(name);
}
static constinit std::atomic<bool> allow_logging{true};
bool isLoggingEnabled()
{
return allow_logging;
}
void disableLogging()
{
allow_logging = false;
}

View File

@ -64,3 +64,7 @@ LoggerRawPtr createRawLogger(const std::string & name, Poco::Channel * channel,
* Otherwise, returns false.
*/
bool hasLogger(const std::string & name);
void disableLogging();
bool isLoggingEnabled();

View File

@ -1,19 +1,23 @@
#include <Common/PipeFDs.h>
#include <Common/Exception.h>
#include <Common/formatReadable.h>
#include <Common/FailPoint.h>
#include <Common/logger_useful.h>
#include <base/errnoToString.h>
#include <unistd.h>
#include <fcntl.h>
#include <string>
#include <algorithm>
namespace DB
{
namespace FailPoints
{
extern const char lazy_pipe_fds_fail_close[];
}
namespace ErrorCodes
{
extern const int CANNOT_PIPE;
@ -42,6 +46,11 @@ void LazyPipeFDs::open()
void LazyPipeFDs::close()
{
fiu_do_on(FailPoints::lazy_pipe_fds_fail_close,
{
throw Exception(ErrorCodes::CANNOT_PIPE, "Manually triggered exception on close");
});
for (int & fd : fds_rw)
{
if (fd < 0)

View File

@ -1,8 +1,5 @@
#pragma once
#include <cstddef>
namespace DB
{

View File

@ -89,7 +89,7 @@ void signalHandler(int sig, siginfo_t * info, void * context)
writePODBinary(*info, out);
writePODBinary(signal_context, out);
writePODBinary(stack_trace, out);
writeVectorBinary(Exception::enable_job_stack_trace ? Exception::thread_frame_pointers : std::vector<StackTrace::FramePointers>{}, out);
writeVectorBinary(Exception::enable_job_stack_trace ? Exception::getThreadFramePointers() : std::vector<StackTrace::FramePointers>{}, out);
writeBinary(static_cast<UInt32>(getThreadId()), out);
writePODBinary(current_thread, out);
@ -605,7 +605,14 @@ void HandledSignals::reset()
HandledSignals::~HandledSignals()
{
reset();
try
{
reset();
}
catch (...)
{
tryLogCurrentException(__PRETTY_FUNCTION__);
}
};
HandledSignals & HandledSignals::instance()

View File

@ -489,13 +489,25 @@ struct CacheEntry
using CacheEntryPtr = std::shared_ptr<CacheEntry>;
using StackTraceCache = std::map<StackTraceTriple, CacheEntryPtr, std::less<>>;
static constinit bool can_use_cache = false;
static StackTraceCache & cacheInstance()
using StackTraceCacheBase = std::map<StackTraceTriple, CacheEntryPtr, std::less<>>;
struct StackTraceCache : public StackTraceCacheBase
{
static StackTraceCache cache;
return cache;
}
StackTraceCache()
: StackTraceCacheBase()
{
can_use_cache = true;
}
~StackTraceCache()
{
can_use_cache = false;
}
};
static StackTraceCache cache;
static DB::SharedMutex stacktrace_cache_mutex;
@ -503,10 +515,16 @@ String toStringCached(const StackTrace::FramePointers & pointers, size_t offset,
{
const StackTraceRefTriple key{pointers, offset, size};
if (!can_use_cache)
{
DB::WriteBufferFromOwnString out;
toStringEveryLineImpl(false, key, [&](std::string_view str) { out << str << '\n'; });
return out.str();
}
/// Calculation of stack trace text is extremely slow.
/// We use cache because otherwise the server could be overloaded by trash queries.
/// Note that this cache can grow unconditionally, but practically it should be small.
StackTraceCache & cache = cacheInstance();
CacheEntryPtr cache_entry;
// Optimistic try for cache hit to avoid any contention whatsoever, should be the main hot code route
@ -558,7 +576,7 @@ std::string StackTrace::toString(void * const * frame_pointers_raw, size_t offse
void StackTrace::dropCache()
{
std::lock_guard lock{stacktrace_cache_mutex};
cacheInstance().clear();
cache.clear();
}

View File

@ -51,7 +51,7 @@ public:
if (!capture_frame_pointers)
return;
/// Save all previous jobs call stacks and append with current
frame_pointers = DB::Exception::thread_frame_pointers;
frame_pointers = DB::Exception::getThreadFramePointers();
frame_pointers.push_back(StackTrace().getFramePointers());
}
@ -455,7 +455,7 @@ void ThreadPoolImpl<Thread>::worker(typename std::list<Thread>::iterator thread_
try
{
if (DB::Exception::enable_job_stack_trace)
DB::Exception::thread_frame_pointers = std::move(job_data->frame_pointers);
DB::Exception::setThreadFramePointers(std::move(job_data->frame_pointers));
CurrentMetrics::Increment metric_active_pool_threads(metric_active_threads);

View File

@ -1,11 +1,12 @@
#if defined(OS_LINUX)
#include <Common/TimerDescriptor.h>
#include <Common/Exception.h>
#include <sys/timerfd.h>
#include <fcntl.h>
#include <unistd.h>
namespace DB
{
@ -13,21 +14,18 @@ namespace ErrorCodes
{
extern const int CANNOT_CREATE_TIMER;
extern const int CANNOT_SET_TIMER_PERIOD;
extern const int CANNOT_FCNTL;
extern const int CANNOT_READ_FROM_SOCKET;
}
TimerDescriptor::TimerDescriptor(int clockid, int flags)
TimerDescriptor::TimerDescriptor()
{
timer_fd = timerfd_create(clockid, flags);
timer_fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK | TFD_CLOEXEC);
if (timer_fd == -1)
throw Exception(ErrorCodes::CANNOT_CREATE_TIMER, "Cannot create timer_fd descriptor");
if (-1 == fcntl(timer_fd, F_SETFL, O_NONBLOCK))
throw ErrnoException(ErrorCodes::CANNOT_FCNTL, "Cannot set O_NONBLOCK for timer_fd");
throw ErrnoException(ErrorCodes::CANNOT_CREATE_TIMER, "Cannot create timer_fd descriptor");
}
TimerDescriptor::TimerDescriptor(TimerDescriptor && other) noexcept : timer_fd(other.timer_fd)
TimerDescriptor::TimerDescriptor(TimerDescriptor && other) noexcept
: timer_fd(other.timer_fd)
{
other.timer_fd = -1;
}
@ -40,21 +38,19 @@ TimerDescriptor & TimerDescriptor::operator=(DB::TimerDescriptor && other) noexc
TimerDescriptor::~TimerDescriptor()
{
/// Do not check for result cause cannot throw exception.
if (timer_fd != -1)
{
int err = close(timer_fd);
chassert(!err || errno == EINTR);
if (0 != ::close(timer_fd))
std::terminate();
}
}
void TimerDescriptor::reset() const
{
itimerspec spec;
spec.it_interval.tv_nsec = 0;
spec.it_interval.tv_sec = 0;
spec.it_value.tv_sec = 0;
spec.it_value.tv_nsec = 0;
if (timer_fd == -1)
return;
itimerspec spec{};
if (-1 == timerfd_settime(timer_fd, 0 /*relative timer */, &spec, nullptr))
throw ErrnoException(ErrorCodes::CANNOT_SET_TIMER_PERIOD, "Cannot reset timer_fd");
@ -66,25 +62,46 @@ void TimerDescriptor::reset() const
void TimerDescriptor::drain() const
{
if (timer_fd == -1)
return;
/// It is expected that socket returns 8 bytes when readable.
/// Read in loop anyway cause signal may interrupt read call.
/// man timerfd_create:
/// If the timer has already expired one or more times since its settings were last modified using timerfd_settime(),
/// or since the last successful read(2), then the buffer given to read(2) returns an unsigned 8-byte integer (uint64_t)
/// containing the number of expirations that have occurred.
/// (The returned value is in host byte order—that is, the native byte order for integers on the host machine.)
uint64_t buf;
while (true)
{
ssize_t res = ::read(timer_fd, &buf, sizeof(buf));
if (res < 0)
{
/// man timerfd_create:
/// If no timer expirations have occurred at the time of the read(2),
/// then the call either blocks until the next timer expiration, or fails with the error EAGAIN
/// if the file descriptor has been made nonblocking
/// (via the use of the fcntl(2) F_SETFL operation to set the O_NONBLOCK flag).
if (errno == EAGAIN)
break;
if (errno != EINTR)
throw ErrnoException(ErrorCodes::CANNOT_READ_FROM_SOCKET, "Cannot drain timer_fd");
/// A signal happened, need to retry.
if (errno == EINTR)
continue;
throw ErrnoException(ErrorCodes::CANNOT_READ_FROM_SOCKET, "Cannot drain timer_fd");
}
chassert(res == sizeof(buf));
}
}
void TimerDescriptor::setRelative(uint64_t usec) const
{
chassert(timer_fd >= 0);
static constexpr uint32_t TIMER_PRECISION = 1e6;
itimerspec spec;
@ -103,4 +120,5 @@ void TimerDescriptor::setRelative(Poco::Timespan timespan) const
}
}
#endif

View File

@ -12,7 +12,7 @@ private:
int timer_fd;
public:
explicit TimerDescriptor(int clockid = CLOCK_MONOTONIC, int flags = 0);
TimerDescriptor();
~TimerDescriptor();
TimerDescriptor(const TimerDescriptor &) = delete;

View File

@ -548,7 +548,7 @@ public:
virtual bool isExpired() const = 0;
/// Get the current connected node idx.
virtual Int8 getConnectedNodeIdx() const = 0;
virtual std::optional<int8_t> getConnectedNodeIdx() const = 0;
/// Get the current connected host and port.
virtual String getConnectedHostPort() const = 0;

View File

@ -39,7 +39,7 @@ public:
~TestKeeper() override;
bool isExpired() const override { return expired; }
Int8 getConnectedNodeIdx() const override { return 0; }
std::optional<int8_t> getConnectedNodeIdx() const override { return 0; }
String getConnectedHostPort() const override { return "TestKeeper:0000"; }
int32_t getConnectionXid() const override { return 0; }
int64_t getSessionID() const override { return 0; }

View File

@ -128,16 +128,15 @@ void ZooKeeper::init(ZooKeeperArgs args_, std::unique_ptr<Coordination::IKeeper>
ShuffleHosts shuffled_hosts = shuffleHosts();
impl = std::make_unique<Coordination::ZooKeeper>(shuffled_hosts, args, zk_log);
Int8 node_idx = impl->getConnectedNodeIdx();
auto node_idx = impl->getConnectedNodeIdx();
if (args.chroot.empty())
LOG_TRACE(log, "Initialized, hosts: {}", fmt::join(args.hosts, ","));
else
LOG_TRACE(log, "Initialized, hosts: {}, chroot: {}", fmt::join(args.hosts, ","), args.chroot);
/// If the balancing strategy has an optimal node then it will be the first in the list
bool connected_to_suboptimal_node = node_idx != shuffled_hosts[0].original_index;
bool connected_to_suboptimal_node = node_idx && static_cast<UInt8>(*node_idx) != shuffled_hosts[0].original_index;
bool respect_az = args.prefer_local_availability_zone && !args.client_availability_zone.empty();
bool may_benefit_from_reconnecting = respect_az || args.get_priority_load_balancing.hasOptimalNode();
if (connected_to_suboptimal_node && may_benefit_from_reconnecting)
@ -145,7 +144,7 @@ void ZooKeeper::init(ZooKeeperArgs args_, std::unique_ptr<Coordination::IKeeper>
auto reconnect_timeout_sec = getSecondsUntilReconnect(args);
LOG_DEBUG(log, "Connected to a suboptimal ZooKeeper host ({}, index {})."
" To preserve balance in ZooKeeper usage, this ZooKeeper session will expire in {} seconds",
impl->getConnectedHostPort(), node_idx, reconnect_timeout_sec);
impl->getConnectedHostPort(), *node_idx, reconnect_timeout_sec);
auto reconnect_task_holder = DB::Context::getGlobalContextInstance()->getSchedulePool().createTask("ZKReconnect", [this, optimal_host = shuffled_hosts[0]]()
{
@ -154,13 +153,15 @@ void ZooKeeper::init(ZooKeeperArgs args_, std::unique_ptr<Coordination::IKeeper>
LOG_DEBUG(log, "Trying to connect to a more optimal node {}", optimal_host.host);
ShuffleHosts node{optimal_host};
std::unique_ptr<Coordination::IKeeper> new_impl = std::make_unique<Coordination::ZooKeeper>(node, args, zk_log);
Int8 new_node_idx = new_impl->getConnectedNodeIdx();
auto new_node_idx = new_impl->getConnectedNodeIdx();
chassert(new_node_idx.has_value());
/// Maybe the node was unavailable when getting AZs first time, update just in case
if (args.availability_zone_autodetect && availability_zones[new_node_idx].empty())
if (args.availability_zone_autodetect && availability_zones[*new_node_idx].empty())
{
availability_zones[new_node_idx] = new_impl->tryGetAvailabilityZone();
LOG_DEBUG(log, "Got availability zone for {}: {}", optimal_host.host, availability_zones[new_node_idx]);
availability_zones[*new_node_idx] = new_impl->tryGetAvailabilityZone();
LOG_DEBUG(log, "Got availability zone for {}: {}", optimal_host.host, availability_zones[*new_node_idx]);
}
optimal_impl = std::move(new_impl);
@ -1525,7 +1526,7 @@ void ZooKeeper::setServerCompletelyStarted()
zk->setServerCompletelyStarted();
}
Int8 ZooKeeper::getConnectedHostIdx() const
std::optional<int8_t> ZooKeeper::getConnectedHostIdx() const
{
return impl->getConnectedNodeIdx();
}
@ -1544,10 +1545,10 @@ String ZooKeeper::getConnectedHostAvailabilityZone() const
{
if (args.implementation != "zookeeper" || !impl)
return "";
Int8 idx = impl->getConnectedNodeIdx();
if (idx < 0)
std::optional<int8_t> idx = impl->getConnectedNodeIdx();
if (!idx)
return ""; /// session expired
return availability_zones.at(idx);
return availability_zones.at(*idx);
}
size_t getFailedOpIndex(Coordination::Error exception_code, const Coordination::Responses & responses)

View File

@ -620,7 +620,7 @@ public:
void setServerCompletelyStarted();
Int8 getConnectedHostIdx() const;
std::optional<int8_t> getConnectedHostIdx() const;
String getConnectedHostPort() const;
int32_t getConnectionXid() const;

View File

@ -536,7 +536,7 @@ void ZooKeeper::connect(
compressed_out.emplace(*out, CompressionCodecFactory::instance().get("LZ4", {}));
}
original_index = static_cast<Int8>(node.original_index);
original_index.store(node.original_index);
break;
}
catch (...)
@ -1014,9 +1014,6 @@ void ZooKeeper::finalize(bool error_send, bool error_receive, const String & rea
LOG_INFO(log, "Finalizing session {}. finalization_started: {}, queue_finished: {}, reason: '{}'",
session_id, already_started, requests_queue.isFinished(), reason);
/// Reset the original index.
original_index = -1;
auto expire_session_if_not_expired = [&]
{
/// No new requests will appear in queue after finish()
@ -1534,6 +1531,30 @@ void ZooKeeper::close()
}
std::optional<int8_t> ZooKeeper::getConnectedNodeIdx() const
{
int8_t res = original_index.load();
if (res == -1)
return std::nullopt;
else
return res;
}
String ZooKeeper::getConnectedHostPort() const
{
auto idx = getConnectedNodeIdx();
if (idx)
return args.hosts[*idx];
else
return "";
}
int32_t ZooKeeper::getConnectionXid() const
{
return next_xid.load();
}
void ZooKeeper::setZooKeeperLog(std::shared_ptr<DB::ZooKeeperLog> zk_log_)
{
/// logOperationIfNeeded(...) uses zk_log and can be called from different threads, so we have to use atomic shared_ptr

View File

@ -114,13 +114,12 @@ public:
~ZooKeeper() override;
/// If expired, you can only destroy the object. All other methods will throw exception.
bool isExpired() const override { return requests_queue.isFinished(); }
Int8 getConnectedNodeIdx() const override { return original_index; }
String getConnectedHostPort() const override { return (original_index == -1) ? "" : args.hosts[original_index]; }
int32_t getConnectionXid() const override { return next_xid.load(); }
std::optional<int8_t> getConnectedNodeIdx() const override;
String getConnectedHostPort() const override;
int32_t getConnectionXid() const override;
String tryGetAvailabilityZone() override;
@ -219,7 +218,7 @@ private:
ACLs default_acls;
zkutil::ZooKeeperArgs args;
Int8 original_index = -1;
std::atomic<int8_t> original_index{-1};
/// Fault injection
void maybeInjectSendFault();

View File

@ -1,14 +1,14 @@
clickhouse_add_executable (hashes_test hashes_test.cpp)
target_link_libraries (hashes_test PRIVATE clickhouse_common_io ch_contrib::cityhash)
target_link_libraries (hashes_test PRIVATE clickhouse_common_io clickhouse_common_config ch_contrib::cityhash)
if (TARGET OpenSSL::Crypto)
target_link_libraries (hashes_test PRIVATE OpenSSL::Crypto)
endif()
clickhouse_add_executable (sip_hash_perf sip_hash_perf.cpp)
target_link_libraries (sip_hash_perf PRIVATE clickhouse_common_io)
target_link_libraries (sip_hash_perf PRIVATE clickhouse_common_io clickhouse_common_config)
clickhouse_add_executable (small_table small_table.cpp)
target_link_libraries (small_table PRIVATE clickhouse_common_io)
target_link_libraries (small_table PRIVATE clickhouse_common_io clickhouse_common_config)
clickhouse_add_executable (parallel_aggregation parallel_aggregation.cpp)
target_link_libraries (parallel_aggregation PRIVATE dbms clickhouse_functions)
@ -17,13 +17,13 @@ clickhouse_add_executable (parallel_aggregation2 parallel_aggregation2.cpp)
target_link_libraries (parallel_aggregation2 PRIVATE dbms clickhouse_functions)
clickhouse_add_executable (int_hashes_perf int_hashes_perf.cpp)
target_link_libraries (int_hashes_perf PRIVATE clickhouse_common_io)
target_link_libraries (int_hashes_perf PRIVATE clickhouse_common_io clickhouse_common_config)
clickhouse_add_executable (compact_array compact_array.cpp)
target_link_libraries (compact_array PRIVATE clickhouse_common_io)
target_link_libraries (compact_array PRIVATE clickhouse_common_io clickhouse_common_config)
clickhouse_add_executable (radix_sort radix_sort.cpp)
target_link_libraries (radix_sort PRIVATE clickhouse_common_io ch_contrib::pdqsort)
target_link_libraries (radix_sort PRIVATE clickhouse_common_io clickhouse_common_config ch_contrib::pdqsort)
clickhouse_add_executable (arena_with_free_lists arena_with_free_lists.cpp)
target_link_libraries (arena_with_free_lists PRIVATE dbms)
@ -33,46 +33,46 @@ target_link_libraries (lru_hash_map_perf PRIVATE dbms)
if (OS_LINUX)
clickhouse_add_executable (thread_creation_latency thread_creation_latency.cpp)
target_link_libraries (thread_creation_latency PRIVATE clickhouse_common_io)
target_link_libraries (thread_creation_latency PRIVATE clickhouse_common_io clickhouse_common_config)
endif()
clickhouse_add_executable (array_cache array_cache.cpp)
target_link_libraries (array_cache PRIVATE clickhouse_common_io)
target_link_libraries (array_cache PRIVATE clickhouse_common_io clickhouse_common_config)
clickhouse_add_executable (space_saving space_saving.cpp)
target_link_libraries (space_saving PRIVATE clickhouse_common_io)
target_link_libraries (space_saving PRIVATE clickhouse_common_io clickhouse_common_config)
clickhouse_add_executable (integer_hash_tables_benchmark integer_hash_tables_benchmark.cpp)
target_link_libraries (integer_hash_tables_benchmark PRIVATE dbms ch_contrib::abseil_swiss_tables ch_contrib::sparsehash)
clickhouse_add_executable (cow_columns cow_columns.cpp)
target_link_libraries (cow_columns PRIVATE clickhouse_common_io)
target_link_libraries (cow_columns PRIVATE clickhouse_common_io clickhouse_common_config)
clickhouse_add_executable (cow_compositions cow_compositions.cpp)
target_link_libraries (cow_compositions PRIVATE clickhouse_common_io)
target_link_libraries (cow_compositions PRIVATE clickhouse_common_io clickhouse_common_config)
clickhouse_add_executable (stopwatch stopwatch.cpp)
target_link_libraries (stopwatch PRIVATE clickhouse_common_io)
target_link_libraries (stopwatch PRIVATE clickhouse_common_io clickhouse_common_config)
clickhouse_add_executable (symbol_index symbol_index.cpp)
target_link_libraries (symbol_index PRIVATE clickhouse_common_io)
target_link_libraries (symbol_index PRIVATE clickhouse_common_io clickhouse_common_config)
clickhouse_add_executable (chaos_sanitizer chaos_sanitizer.cpp)
target_link_libraries (chaos_sanitizer PRIVATE clickhouse_common_io)
target_link_libraries (chaos_sanitizer PRIVATE clickhouse_common_io clickhouse_common_config)
if (OS_LINUX)
clickhouse_add_executable (memory_statistics_os_perf memory_statistics_os_perf.cpp)
target_link_libraries (memory_statistics_os_perf PRIVATE clickhouse_common_io)
target_link_libraries (memory_statistics_os_perf PRIVATE clickhouse_common_io clickhouse_common_config)
endif()
clickhouse_add_executable (procfs_metrics_provider_perf procfs_metrics_provider_perf.cpp)
target_link_libraries (procfs_metrics_provider_perf PRIVATE clickhouse_common_io)
target_link_libraries (procfs_metrics_provider_perf PRIVATE clickhouse_common_io clickhouse_common_config)
clickhouse_add_executable (average average.cpp)
target_link_libraries (average PRIVATE clickhouse_common_io)
target_link_libraries (average PRIVATE clickhouse_common_io clickhouse_common_config)
clickhouse_add_executable (shell_command_inout shell_command_inout.cpp)
target_link_libraries (shell_command_inout PRIVATE clickhouse_common_io)
target_link_libraries (shell_command_inout PRIVATE clickhouse_common_io clickhouse_common_config)
clickhouse_add_executable (executable_udf executable_udf.cpp)
target_link_libraries (executable_udf PRIVATE dbms)
@ -91,4 +91,4 @@ if (ENABLE_SSL)
endif()
clickhouse_add_executable (check_pointer_valid check_pointer_valid.cpp)
target_link_libraries (check_pointer_valid PRIVATE clickhouse_common_io)
target_link_libraries (check_pointer_valid PRIVATE clickhouse_common_io clickhouse_common_config)

View File

@ -1,2 +1,2 @@
clickhouse_add_executable (mysqlxx_pool_test mysqlxx_pool_test.cpp)
target_link_libraries (mysqlxx_pool_test PRIVATE mysqlxx)
target_link_libraries (mysqlxx_pool_test PRIVATE mysqlxx clickhouse_common_config loggers_no_text_log)

View File

@ -1,2 +1,2 @@
clickhouse_add_executable (compressed_buffer compressed_buffer.cpp)
target_link_libraries (compressed_buffer PRIVATE clickhouse_common_io clickhouse_compression)
target_link_libraries (compressed_buffer PRIVATE clickhouse_common_io clickhouse_common_config clickhouse_compression)

View File

@ -13,10 +13,10 @@
namespace DB
{
InterpolateDescription::InterpolateDescription(ActionsDAGPtr actions_, const Aliases & aliases)
: actions(actions_)
InterpolateDescription::InterpolateDescription(ActionsDAG actions_, const Aliases & aliases)
: actions(std::move(actions_))
{
for (const auto & name_type : actions->getRequiredColumns())
for (const auto & name_type : actions.getRequiredColumns())
{
if (const auto & p = aliases.find(name_type.name); p != aliases.end())
required_columns_map[p->second->getColumnName()] = name_type;
@ -24,7 +24,7 @@ namespace DB
required_columns_map[name_type.name] = name_type;
}
for (const ColumnWithTypeAndName & column : actions->getResultColumns())
for (const ColumnWithTypeAndName & column : actions.getResultColumns())
{
std::string name = column.name;
if (const auto & p = aliases.find(name); p != aliases.end())

View File

@ -5,21 +5,20 @@
#include <string>
#include <Core/NamesAndTypes.h>
#include <Parsers/IAST_fwd.h>
#include <Interpreters/ActionsDAG.h>
namespace DB
{
class ActionsDAG;
using ActionsDAGPtr = std::shared_ptr<ActionsDAG>;
using Aliases = std::unordered_map<String, ASTPtr>;
/// Interpolate description
struct InterpolateDescription
{
explicit InterpolateDescription(ActionsDAGPtr actions, const Aliases & aliases);
explicit InterpolateDescription(ActionsDAG actions, const Aliases & aliases);
ActionsDAGPtr actions;
ActionsDAG actions;
std::unordered_map<std::string, NameAndTypePair> required_columns_map; /// input column name -> {alias, type}
std::unordered_set<std::string> result_columns_set; /// result block columns

View File

@ -1,5 +1,8 @@
#pragma once
/// CLion freezes for a minute on every keypress in any file including this.
#if !defined(__CLION_IDE__)
#include <Common/NamePrompter.h>
#include <Core/BaseSettings.h>
#include <Core/SettingsEnums.h>
@ -183,7 +186,7 @@ class IColumn;
M(Bool, allow_suspicious_ttl_expressions, false, "Reject TTL expressions that don't depend on any of table's columns. It indicates a user error most of the time.", 0) \
M(Bool, allow_suspicious_variant_types, false, "In CREATE TABLE statement allows specifying Variant type with similar variant types (for example, with different numeric or date types). Enabling this setting may introduce some ambiguity when working with values with similar types.", 0) \
M(Bool, allow_suspicious_primary_key, false, "Forbid suspicious PRIMARY KEY/ORDER BY for MergeTree (i.e. SimpleAggregateFunction)", 0) \
M(Bool, compile_expressions, false, "Compile some scalar functions and operators to native code.", 0) \
M(Bool, compile_expressions, false, "Compile some scalar functions and operators to native code. Due to a bug in the LLVM compiler infrastructure, on AArch64 machines, it is known to lead to a nullptr dereference and, consequently, server crash. Do not enable this setting.", 0) \
M(UInt64, min_count_to_compile_expression, 3, "The number of identical expressions before they are JIT-compiled", 0) \
M(Bool, compile_aggregate_expressions, true, "Compile aggregate functions to native code.", 0) \
M(UInt64, min_count_to_compile_aggregate_expression, 3, "The number of identical aggregate expressions before they are JIT-compiled", 0) \
@ -602,7 +605,7 @@ class IColumn;
M(Bool, optimize_if_chain_to_multiif, false, "Replace if(cond1, then1, if(cond2, ...)) chains to multiIf. Currently it's not beneficial for numeric types.", 0) \
M(Bool, optimize_multiif_to_if, true, "Replace 'multiIf' with only one condition to 'if'.", 0) \
M(Bool, optimize_if_transform_strings_to_enum, false, "Replaces string-type arguments in If and Transform to enum. Disabled by default cause it could make inconsistent change in distributed query that would lead to its fail.", 0) \
M(Bool, optimize_functions_to_subcolumns, true, "Transform functions to subcolumns, if possible, to reduce amount of read data. E.g. 'length(arr)' -> 'arr.size0', 'col IS NULL' -> 'col.null' ", 0) \
M(Bool, optimize_functions_to_subcolumns, false, "Transform functions to subcolumns, if possible, to reduce amount of read data. E.g. 'length(arr)' -> 'arr.size0', 'col IS NULL' -> 'col.null' ", 0) \
M(Bool, optimize_using_constraints, false, "Use constraints for query optimization", 0) \
M(Bool, optimize_substitute_columns, false, "Use constraints for column substitution", 0) \
M(Bool, optimize_append_index, false, "Use constraints in order to append index condition (indexHint)", 0) \
@ -763,7 +766,7 @@ class IColumn;
M(UInt64, merge_tree_min_rows_for_concurrent_read_for_remote_filesystem, (20 * 8192), "If at least as many lines are read from one file, the reading can be parallelized, when reading from remote filesystem.", 0) \
M(UInt64, merge_tree_min_bytes_for_concurrent_read_for_remote_filesystem, (24 * 10 * 1024 * 1024), "If at least as many bytes are read from one file, the reading can be parallelized, when reading from remote filesystem.", 0) \
M(UInt64, remote_read_min_bytes_for_seek, 4 * DBMS_DEFAULT_BUFFER_SIZE, "Min bytes required for remote read (url, s3) to do seek, instead of read with ignore.", 0) \
M(UInt64, merge_tree_min_bytes_per_task_for_remote_reading, 4 * DBMS_DEFAULT_BUFFER_SIZE, "Min bytes to read per task.", 0) \
M(UInt64, merge_tree_min_bytes_per_task_for_remote_reading, 2 * DBMS_DEFAULT_BUFFER_SIZE, "Min bytes to read per task.", 0) ALIAS(filesystem_prefetch_min_bytes_for_single_read_task) \
M(Bool, merge_tree_use_const_size_tasks_for_remote_reading, true, "Whether to use constant size tasks for reading from a remote table.", 0) \
M(Bool, merge_tree_determine_task_size_by_prewhere_columns, true, "Whether to use only prewhere columns size to determine reading task size.", 0) \
M(UInt64, merge_tree_compact_parts_min_granules_to_multibuffer_read, 16, "Only available in ClickHouse Cloud", 0) \
@ -805,7 +808,6 @@ class IColumn;
M(UInt64, prefetch_buffer_size, DBMS_DEFAULT_BUFFER_SIZE, "The maximum size of the prefetch buffer to read from the filesystem.", 0) \
M(UInt64, filesystem_prefetch_step_bytes, 0, "Prefetch step in bytes. Zero means `auto` - approximately the best prefetch step will be auto deduced, but might not be 100% the best. The actual value might be different because of setting filesystem_prefetch_min_bytes_for_single_read_task", 0) \
M(UInt64, filesystem_prefetch_step_marks, 0, "Prefetch step in marks. Zero means `auto` - approximately the best prefetch step will be auto deduced, but might not be 100% the best. The actual value might be different because of setting filesystem_prefetch_min_bytes_for_single_read_task", 0) \
M(UInt64, filesystem_prefetch_min_bytes_for_single_read_task, "2Mi", "Do not parallelize within one file read less than this amount of bytes. E.g. one reader will not receive a read task of size less than this amount. This setting is recommended to avoid spikes of time for aws getObject requests to aws", 0) \
M(UInt64, filesystem_prefetch_max_memory_usage, "1Gi", "Maximum memory usage for prefetches.", 0) \
M(UInt64, filesystem_prefetches_limit, 200, "Maximum number of prefetches. Zero means unlimited. A setting `filesystem_prefetches_max_memory_usage` is more recommended if you want to limit the number of prefetches", 0) \
\
@ -1124,7 +1126,7 @@ class IColumn;
M(Bool, input_format_json_defaults_for_missing_elements_in_named_tuple, true, "Insert default value in named tuple element if it's missing in json object", 0) \
M(Bool, input_format_json_throw_on_bad_escape_sequence, true, "Throw an exception if JSON string contains bad escape sequence in JSON input formats. If disabled, bad escape sequences will remain as is in the data", 0) \
M(Bool, input_format_json_ignore_unnecessary_fields, true, "Ignore unnecessary fields and not parse them. Enabling this may not throw exceptions on json strings of invalid format or with duplicated fields", 0) \
M(Bool, input_format_json_ignore_key_case, false, "Ignore json key case while read json field from string", 0) \
M(Bool, input_format_json_case_insensitive_column_matching, false, "Ignore case when matching JSON keys with CH columns", 0) \
M(Bool, input_format_try_infer_integers, true, "Try to infer integers instead of floats while schema inference in text formats", 0) \
M(Bool, input_format_try_infer_dates, true, "Try to infer dates from string fields while schema inference in text formats", 0) \
M(Bool, input_format_try_infer_datetimes, true, "Try to infer datetimes from string fields while schema inference in text formats", 0) \
@ -1155,7 +1157,6 @@ class IColumn;
M(Bool, input_format_values_interpret_expressions, true, "For Values format: if the field could not be parsed by streaming parser, run SQL parser and try to interpret it as SQL expression.", 0) \
M(Bool, input_format_values_deduce_templates_of_expressions, true, "For Values format: if the field could not be parsed by streaming parser, run SQL parser, deduce template of the SQL expression, try to parse all rows using template and then interpret expression for all rows.", 0) \
M(Bool, input_format_values_accurate_types_of_literals, true, "For Values format: when parsing and interpreting expressions using template, check actual type of literal to avoid possible overflow and precision issues.", 0) \
M(Bool, input_format_values_allow_data_after_semicolon, false, "For Values format: allow extra data after semicolon (used by client to interpret comments).", 0) \
M(Bool, input_format_avro_allow_missing_fields, false, "For Avro/AvroConfluent format: when field is not found in schema use default value instead of error", 0) \
/** This setting is obsolete and do nothing, left for compatibility reasons. */ \
M(Bool, input_format_avro_null_as_default, false, "For Avro/AvroConfluent format: insert default in case of null and non Nullable column", 0) \
@ -1349,3 +1350,5 @@ struct FormatFactorySettings : public BaseSettings<FormatFactorySettingsTraits>
};
}
#endif

View File

@ -64,9 +64,8 @@ static std::initializer_list<std::pair<ClickHouseVersion, SettingsChangesHistory
{"output_format_native_encode_types_in_binary_format", false, false, "Added new setting to allow to write type names in binary format in Native output format"},
{"input_format_native_decode_types_in_binary_format", false, false, "Added new setting to allow to read type names in binary format in Native output format"},
{"read_in_order_use_buffering", false, true, "Use buffering before merging while reading in order of primary key"},
{"optimize_functions_to_subcolumns", false, true, "Enable optimization by default"},
{"enable_named_columns_in_function_tuple", false, true, "Generate named tuples in function tuple() when all names are unique and can be treated as unquoted identifiers."},
{"input_format_json_ignore_key_case", false, false, "Ignore json key case while read json field from string."},
{"input_format_json_case_insensitive_column_matching", false, false, "Ignore case when matching JSON keys with CH columns."},
{"optimize_trivial_insert_select", true, false, "The optimization does not make sense in many cases."},
{"dictionary_validate_primary_key_type", false, false, "Validate primary key type for dictionaries. By default id type for simple layouts will be implicitly converted to UInt64."},
{"collect_hash_table_stats_during_joins", false, true, "New setting."},
@ -78,6 +77,7 @@ static std::initializer_list<std::pair<ClickHouseVersion, SettingsChangesHistory
{"azure_sdk_max_retries", 10, 10, "Maximum number of retries in azure sdk"},
{"azure_sdk_retry_initial_backoff_ms", 10, 10, "Minimal backoff between retries in azure sdk"},
{"azure_sdk_retry_max_backoff_ms", 1000, 1000, "Maximal backoff between retries in azure sdk"},
{"merge_tree_min_bytes_per_task_for_remote_reading", 4194304, 2097152, "Value is unified with `filesystem_prefetch_min_bytes_for_single_read_task`"},
{"ignore_on_cluster_for_replicated_named_collections_queries", false, false, "Ignore ON CLUSTER clause for replicated named collections management queries."},
{"backup_restore_s3_retry_attempts", 1000,1000, "Setting for Aws::Client::RetryStrategy, Aws::Client does retries itself, 0 means no retries. It takes place only for backup/restore."},
{"postgresql_connection_attempt_timeout", 2, 2, "Allow to control 'connect_timeout' parameter of PostgreSQL connection."},
@ -148,7 +148,7 @@ static std::initializer_list<std::pair<ClickHouseVersion, SettingsChangesHistory
{"default_table_engine", "None", "MergeTree", "Set default table engine to MergeTree for better usability"},
{"input_format_json_use_string_type_for_ambiguous_paths_in_named_tuples_inference_from_objects", false, false, "Allow to use String type for ambiguous paths during named tuple inference from JSON objects"},
{"traverse_shadow_remote_data_paths", false, false, "Traverse shadow directory when query system.remote_data_paths."},
{"throw_if_deduplication_in_dependent_materialized_views_enabled_with_async_insert", false, true, "Deduplication is dependent materialized view cannot work together with async inserts."},
{"throw_if_deduplication_in_dependent_materialized_views_enabled_with_async_insert", false, true, "Deduplication in dependent materialized view cannot work together with async inserts."},
{"parallel_replicas_allow_in_with_subquery", false, true, "If true, subquery for IN will be executed on every follower replica"},
{"log_processors_profiles", false, true, "Enable by default"},
{"function_locate_has_mysql_compatible_argument_order", false, true, "Increase compatibility with MySQL's locate function."},

View File

@ -1,8 +1,8 @@
clickhouse_add_executable (string_pool string_pool.cpp)
target_link_libraries (string_pool PRIVATE clickhouse_common_io ch_contrib::sparsehash)
target_link_libraries (string_pool PRIVATE clickhouse_common_io clickhouse_common_config ch_contrib::sparsehash)
clickhouse_add_executable (field field.cpp)
target_link_libraries (field PRIVATE dbms)
clickhouse_add_executable (string_ref_hash string_ref_hash.cpp)
target_link_libraries (string_ref_hash PRIVATE clickhouse_common_io)
target_link_libraries (string_ref_hash PRIVATE clickhouse_common_io clickhouse_common_config)

View File

@ -146,10 +146,19 @@ BaseDaemon::BaseDaemon() = default;
BaseDaemon::~BaseDaemon()
{
writeSignalIDtoSignalPipe(SignalListener::StopThread);
signal_listener_thread.join();
HandledSignals::instance().reset();
SentryWriter::resetInstance();
try
{
writeSignalIDtoSignalPipe(SignalListener::StopThread);
signal_listener_thread.join();
HandledSignals::instance().reset();
SentryWriter::resetInstance();
}
catch (...)
{
tryLogCurrentException(&logger());
}
disableLogging();
}

View File

@ -257,8 +257,8 @@ static DataTypePtr create(const ASTPtr & arguments)
}
else
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Unexpected AST element passed as aggregate function name for data type AggregateFunction. "
"Must be identifier or function.");
"Unexpected AST element {} passed as aggregate function name for data type AggregateFunction. "
"Must be identifier or function", data_type_ast->getID());
for (size_t i = argument_types_start_idx; i < arguments->children.size(); ++i)
argument_types.push_back(DataTypeFactory::instance().get(arguments->children[i]));

View File

@ -2,7 +2,7 @@
#include <DataTypes/DataTypeCustom.h>
#include <Parsers/parseQuery.h>
#include <Parsers/ParserCreateQuery.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTDataType.h>
#include <Parsers/ASTIdentifier.h>
#include <Parsers/ASTLiteral.h>
#include <Common/typeid_cast.h>
@ -22,7 +22,6 @@ namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
extern const int UNKNOWN_TYPE;
extern const int ILLEGAL_SYNTAX_FOR_DATA_TYPE;
extern const int UNEXPECTED_AST_STRUCTURE;
extern const int DATA_TYPE_CANNOT_HAVE_ARGUMENTS;
}
@ -83,15 +82,9 @@ DataTypePtr DataTypeFactory::tryGet(const ASTPtr & ast) const
template <bool nullptr_on_error>
DataTypePtr DataTypeFactory::getImpl(const ASTPtr & ast) const
{
if (const auto * func = ast->as<ASTFunction>())
if (const auto * type = ast->as<ASTDataType>())
{
if (func->parameters)
{
if constexpr (nullptr_on_error)
return nullptr;
throw Exception(ErrorCodes::ILLEGAL_SYNTAX_FOR_DATA_TYPE, "Data type cannot have multiple parenthesized parameters.");
}
return getImpl<nullptr_on_error>(func->name, func->arguments);
return getImpl<nullptr_on_error>(type->name, type->arguments);
}
if (const auto * ident = ast->as<ASTIdentifier>())
@ -107,7 +100,7 @@ DataTypePtr DataTypeFactory::getImpl(const ASTPtr & ast) const
if constexpr (nullptr_on_error)
return nullptr;
throw Exception(ErrorCodes::UNEXPECTED_AST_STRUCTURE, "Unexpected AST element for data type.");
throw Exception(ErrorCodes::UNEXPECTED_AST_STRUCTURE, "Unexpected AST element for data type: {}.", ast->getID());
}
DataTypePtr DataTypeFactory::get(const String & family_name_param, const ASTPtr & parameters) const

View File

@ -4,9 +4,10 @@
#include <Parsers/IAST.h>
#include <Parsers/ASTLiteral.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTDataType.h>
#include <IO/Operators.h>
namespace DB
{
@ -53,13 +54,13 @@ static DataTypePtr create(const ASTPtr & arguments)
ASTPtr schema_argument = arguments->children[0];
bool is_nullable = false;
if (const auto * func = schema_argument->as<ASTFunction>())
if (const auto * type = schema_argument->as<ASTDataType>())
{
if (func->name != "Nullable" || func->arguments->children.size() != 1)
if (type->name != "Nullable" || type->arguments->children.size() != 1)
throw Exception(ErrorCodes::UNEXPECTED_AST_STRUCTURE,
"Expected 'Nullable(<schema_name>)' as parameter for type Object (function: {})", func->name);
"Expected 'Nullable(<schema_name>)' as parameter for type Object (function: {})", type->name);
schema_argument = func->arguments->children[0];
schema_argument = type->arguments->children[0];
is_nullable = true;
}

View File

@ -559,8 +559,11 @@ void DatabaseReplicated::createEmptyLogEntry(const ZooKeeperPtr & current_zookee
bool DatabaseReplicated::waitForReplicaToProcessAllEntries(UInt64 timeout_ms)
{
if (!ddl_worker || is_probably_dropped)
return false;
{
std::lock_guard lock{ddl_worker_mutex};
if (!ddl_worker || is_probably_dropped)
return false;
}
return ddl_worker->waitForReplicaToProcessAllEntries(timeout_ms);
}
@ -641,12 +644,16 @@ LoadTaskPtr DatabaseReplicated::startupDatabaseAsync(AsyncLoader & async_loader,
if (is_probably_dropped)
return;
ddl_worker = std::make_unique<DatabaseReplicatedDDLWorker>(this, getContext());
ddl_worker->startup();
ddl_worker_initialized = true;
{
std::lock_guard lock{ddl_worker_mutex};
ddl_worker = std::make_unique<DatabaseReplicatedDDLWorker>(this, getContext());
ddl_worker->startup();
ddl_worker_initialized = true;
}
});
std::scoped_lock lock(mutex);
return startup_replicated_database_task = makeLoadTask(async_loader, {job});
startup_replicated_database_task = makeLoadTask(async_loader, {job});
return startup_replicated_database_task;
}
void DatabaseReplicated::waitDatabaseStarted() const
@ -671,92 +678,96 @@ void DatabaseReplicated::stopLoading()
DatabaseAtomic::stopLoading();
}
bool DatabaseReplicated::checkDigestValid(const ContextPtr & local_context, bool debug_check /* = true */) const
void DatabaseReplicated::dumpLocalTablesForDebugOnly(const ContextPtr & local_context) const
{
if (debug_check)
auto table_names = getAllTableNames(context.lock());
for (const auto & table_name : table_names)
{
/// Reduce number of debug checks
if (thread_local_rng() % 16)
return true;
auto ast_ptr = tryGetCreateTableQuery(table_name, local_context);
if (ast_ptr)
LOG_DEBUG(log, "[local] Table {} create query is {}", table_name, queryToString(ast_ptr));
else
LOG_DEBUG(log, "[local] Table {} has no create query", table_name);
}
LOG_TEST(log, "Current in-memory metadata digest: {}", tables_metadata_digest);
/// Database is probably being dropped
if (!local_context->getZooKeeperMetadataTransaction() && (!ddl_worker || !ddl_worker->isCurrentlyActive()))
return true;
UInt64 local_digest = 0;
{
std::lock_guard lock{mutex};
for (const auto & table : TSA_SUPPRESS_WARNING_FOR_READ(tables))
local_digest += getMetadataHash(table.first);
}
if (local_digest != tables_metadata_digest)
{
LOG_ERROR(log, "Digest of local metadata ({}) is not equal to in-memory digest ({})", local_digest, tables_metadata_digest);
return false;
}
/// Do not check digest in Keeper after internal subquery, it's probably not committed yet
if (local_context->isInternalSubquery())
return true;
/// Check does not make sense to check digest in Keeper during recovering
if (is_recovering)
return true;
String zk_digest = getZooKeeper()->get(replica_path + "/digest");
String local_digest_str = toString(local_digest);
if (zk_digest != local_digest_str)
{
LOG_ERROR(log, "Digest of local metadata ({}) is not equal to digest in Keeper ({})", local_digest_str, zk_digest);
return false;
}
return true;
}
void DatabaseReplicated::checkQueryValid(const ASTPtr & query, ContextPtr query_context) const
void DatabaseReplicated::dumpTablesInZooKeeperForDebugOnly() const
{
/// Replicas will set correct name of current database in query context (database name can be different on replicas)
if (auto * ddl_query = dynamic_cast<ASTQueryWithTableAndOutput *>(query.get()))
UInt32 max_log_ptr;
auto table_name_to_metadata = tryGetConsistentMetadataSnapshot(getZooKeeper(), max_log_ptr);
for (const auto & [table_name, create_table_query] : table_name_to_metadata)
{
if (ddl_query->getDatabase() != getDatabaseName())
throw Exception(ErrorCodes::UNKNOWN_DATABASE, "Database was renamed");
ddl_query->database.reset();
if (auto * create = query->as<ASTCreateQuery>())
auto query_ast = parseQueryFromMetadataInZooKeeper(table_name, create_table_query);
if (query_ast)
{
if (create->storage)
checkTableEngine(*create, *create->storage, query_context);
LOG_DEBUG(log, "[zookeeper] Table {} create query is {}", table_name, queryToString(query_ast));
}
else
{
LOG_DEBUG(log, "[zookeeper] Table {} has no create query", table_name);
}
}
}
if (create->targets)
void DatabaseReplicated::tryCompareLocalAndZooKeeperTablesAndDumpDiffForDebugOnly(const ContextPtr & local_context) const
{
UInt32 max_log_ptr;
auto table_name_to_metadata_in_zk = tryGetConsistentMetadataSnapshot(getZooKeeper(), max_log_ptr);
auto table_names_local = getAllTableNames(local_context);
if (table_name_to_metadata_in_zk.size() != table_names_local.size())
LOG_DEBUG(log, "Amount of tables in zk {} locally {}", table_name_to_metadata_in_zk.size(), table_names_local.size());
std::unordered_set<std::string> checked_tables;
for (const auto & table_name : table_names_local)
{
auto local_ast_ptr = tryGetCreateTableQuery(table_name, local_context);
if (table_name_to_metadata_in_zk.contains(table_name))
{
checked_tables.insert(table_name);
auto create_table_query_in_zk = table_name_to_metadata_in_zk[table_name];
auto zk_ast_ptr = parseQueryFromMetadataInZooKeeper(table_name, create_table_query_in_zk);
if (local_ast_ptr == nullptr && zk_ast_ptr == nullptr)
{
for (const auto & inner_table_engine : create->targets->getInnerEngines())
checkTableEngine(*create, *inner_table_engine, query_context);
LOG_DEBUG(log, "AST for table {} is the same (nullptr) in local and ZK", table_name);
}
else if (local_ast_ptr != nullptr && zk_ast_ptr != nullptr && queryToString(local_ast_ptr) != queryToString(zk_ast_ptr))
{
LOG_DEBUG(log, "AST differs for table {}, local {}, in zookeeper {}", table_name, queryToString(local_ast_ptr), queryToString(zk_ast_ptr));
}
else if (local_ast_ptr == nullptr)
{
LOG_DEBUG(log, "AST differs for table {}, local nullptr, in zookeeper {}", table_name, queryToString(zk_ast_ptr));
}
else if (zk_ast_ptr == nullptr)
{
LOG_DEBUG(log, "AST differs for table {}, local {}, in zookeeper nullptr", table_name, queryToString(local_ast_ptr));
}
else
{
LOG_DEBUG(log, "AST for table {} is the same in local and ZK", table_name);
}
}
}
if (const auto * query_alter = query->as<ASTAlterQuery>())
{
for (const auto & command : query_alter->command_list->children)
else
{
if (!isSupportedAlterTypeForOnClusterDDLQuery(command->as<ASTAlterCommand&>().type))
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unsupported type of ALTER query");
if (local_ast_ptr == nullptr)
LOG_DEBUG(log, "Table {} exists locally, but missing in ZK", table_name);
else
LOG_DEBUG(log, "Table {} exists locally with AST {}, but missing in ZK", table_name, queryToString(local_ast_ptr));
}
}
if (auto * query_drop = query->as<ASTDropQuery>())
for (const auto & [table_name, table_metadata] : table_name_to_metadata_in_zk)
{
if (query_drop->kind == ASTDropQuery::Kind::Detach && query_context->getSettingsRef().database_replicated_always_detach_permanently)
query_drop->permanently = true;
if (query_drop->kind == ASTDropQuery::Kind::Detach && !query_drop->permanently)
throw Exception(ErrorCodes::INCORRECT_QUERY, "DETACH TABLE is not allowed for Replicated databases. "
"Use DETACH TABLE PERMANENTLY or SYSTEM RESTART REPLICA or set "
"database_replicated_always_detach_permanently to 1");
if (!checked_tables.contains(table_name))
{
auto zk_ast_ptr = parseQueryFromMetadataInZooKeeper(table_name, table_metadata);
if (zk_ast_ptr == nullptr)
LOG_DEBUG(log, "Table {} exists in ZK with AST {}, but missing locally", table_name, queryToString(zk_ast_ptr));
else
LOG_DEBUG(log, "Table {} exists in ZK, but missing locally", table_name);
}
}
}
@ -839,6 +850,107 @@ void DatabaseReplicated::checkTableEngine(const ASTCreateQuery & query, ASTStora
"to distinguish different shards and replicas");
}
bool DatabaseReplicated::checkDigestValid(const ContextPtr & local_context, bool debug_check /* = true */) const
{
if (debug_check)
{
/// Reduce number of debug checks
if (thread_local_rng() % 16)
return true;
}
LOG_TEST(log, "Current in-memory metadata digest: {}", tables_metadata_digest);
/// Database is probably being dropped
if (!local_context->getZooKeeperMetadataTransaction() && (!ddl_worker || !ddl_worker->isCurrentlyActive()))
return true;
UInt64 local_digest = 0;
{
std::lock_guard lock{mutex};
for (const auto & table : TSA_SUPPRESS_WARNING_FOR_READ(tables))
local_digest += getMetadataHash(table.first);
}
if (local_digest != tables_metadata_digest)
{
LOG_ERROR(log, "Digest of local metadata ({}) is not equal to in-memory digest ({})", local_digest, tables_metadata_digest);
#ifndef NDEBUG
dumpLocalTablesForDebugOnly(local_context);
dumpTablesInZooKeeperForDebugOnly();
tryCompareLocalAndZooKeeperTablesAndDumpDiffForDebugOnly(local_context);
#endif
return false;
}
/// Do not check digest in Keeper after internal subquery, it's probably not committed yet
if (local_context->isInternalSubquery())
return true;
/// Check does not make sense to check digest in Keeper during recovering
if (is_recovering)
return true;
String zk_digest = getZooKeeper()->get(replica_path + "/digest");
String local_digest_str = toString(local_digest);
if (zk_digest != local_digest_str)
{
LOG_ERROR(log, "Digest of local metadata ({}) is not equal to digest in Keeper ({})", local_digest_str, zk_digest);
#ifndef NDEBUG
dumpLocalTablesForDebugOnly(local_context);
dumpTablesInZooKeeperForDebugOnly();
tryCompareLocalAndZooKeeperTablesAndDumpDiffForDebugOnly(local_context);
#endif
return false;
}
return true;
}
void DatabaseReplicated::checkQueryValid(const ASTPtr & query, ContextPtr query_context) const
{
/// Replicas will set correct name of current database in query context (database name can be different on replicas)
if (auto * ddl_query = dynamic_cast<ASTQueryWithTableAndOutput *>(query.get()))
{
if (ddl_query->getDatabase() != getDatabaseName())
throw Exception(ErrorCodes::UNKNOWN_DATABASE, "Database was renamed");
ddl_query->database.reset();
if (auto * create = query->as<ASTCreateQuery>())
{
if (create->storage)
checkTableEngine(*create, *create->storage, query_context);
if (create->targets)
{
for (const auto & inner_table_engine : create->targets->getInnerEngines())
checkTableEngine(*create, *inner_table_engine, query_context);
}
}
}
if (const auto * query_alter = query->as<ASTAlterQuery>())
{
for (const auto & command : query_alter->command_list->children)
{
if (!isSupportedAlterTypeForOnClusterDDLQuery(command->as<ASTAlterCommand&>().type))
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unsupported type of ALTER query");
}
}
if (auto * query_drop = query->as<ASTDropQuery>())
{
if (query_drop->kind == ASTDropQuery::Kind::Detach && query_context->getSettingsRef().database_replicated_always_detach_permanently)
query_drop->permanently = true;
if (query_drop->kind == ASTDropQuery::Kind::Detach && !query_drop->permanently)
throw Exception(ErrorCodes::INCORRECT_QUERY, "DETACH TABLE is not allowed for Replicated databases. "
"Use DETACH TABLE PERMANENTLY or SYSTEM RESTART REPLICA or set "
"database_replicated_always_detach_permanently to 1");
}
}
BlockIO DatabaseReplicated::tryEnqueueReplicatedDDL(const ASTPtr & query, ContextPtr query_context, QueryFlags flags)
{
waitDatabaseStarted();
@ -1253,7 +1365,7 @@ void DatabaseReplicated::recoverLostReplica(const ZooKeeperPtr & current_zookeep
current_zookeeper->set(replica_path + "/digest", toString(tables_metadata_digest));
}
std::map<String, String> DatabaseReplicated::tryGetConsistentMetadataSnapshot(const ZooKeeperPtr & zookeeper, UInt32 & max_log_ptr)
std::map<String, String> DatabaseReplicated::tryGetConsistentMetadataSnapshot(const ZooKeeperPtr & zookeeper, UInt32 & max_log_ptr) const
{
return getConsistentMetadataSnapshotImpl(zookeeper, {}, /* max_retries= */ 10, max_log_ptr);
}
@ -1314,7 +1426,7 @@ std::map<String, String> DatabaseReplicated::getConsistentMetadataSnapshotImpl(
return table_name_to_metadata;
}
ASTPtr DatabaseReplicated::parseQueryFromMetadataInZooKeeper(const String & node_name, const String & query)
ASTPtr DatabaseReplicated::parseQueryFromMetadataInZooKeeper(const String & node_name, const String & query) const
{
ParserCreateQuery parser;
String description = "in ZooKeeper " + zookeeper_path + "/metadata/" + node_name;
@ -1411,6 +1523,7 @@ void DatabaseReplicated::renameDatabase(ContextPtr query_context, const String &
void DatabaseReplicated::stopReplication()
{
std::lock_guard lock{ddl_worker_mutex};
if (ddl_worker)
ddl_worker->shutdown();
}
@ -1418,8 +1531,11 @@ void DatabaseReplicated::stopReplication()
void DatabaseReplicated::shutdown()
{
stopReplication();
ddl_worker_initialized = false;
ddl_worker = nullptr;
{
std::lock_guard lock{ddl_worker_mutex};
ddl_worker_initialized = false;
ddl_worker = nullptr;
}
DatabaseAtomic::shutdown();
}
@ -1567,6 +1683,7 @@ bool DatabaseReplicated::canExecuteReplicatedMetadataAlter() const
/// It may update the metadata digest (both locally and in ZooKeeper)
/// before DatabaseReplicatedDDLWorker::initializeReplication() has finished.
/// We should not update metadata until the database is initialized.
std::lock_guard lock{ddl_worker_mutex};
return ddl_worker_initialized && ddl_worker->isCurrentlyActive();
}

View File

@ -109,14 +109,15 @@ private:
void checkQueryValid(const ASTPtr & query, ContextPtr query_context) const;
void checkTableEngine(const ASTCreateQuery & query, ASTStorage & storage, ContextPtr query_context) const;
void recoverLostReplica(const ZooKeeperPtr & current_zookeeper, UInt32 our_log_ptr, UInt32 & max_log_ptr);
std::map<String, String> tryGetConsistentMetadataSnapshot(const ZooKeeperPtr & zookeeper, UInt32 & max_log_ptr);
std::map<String, String> tryGetConsistentMetadataSnapshot(const ZooKeeperPtr & zookeeper, UInt32 & max_log_ptr) const;
std::map<String, String> getConsistentMetadataSnapshotImpl(const ZooKeeperPtr & zookeeper, const FilterByNameFunction & filter_by_table_name,
size_t max_retries, UInt32 & max_log_ptr) const;
ASTPtr parseQueryFromMetadataInZooKeeper(const String & node_name, const String & query);
ASTPtr parseQueryFromMetadataInZooKeeper(const String & node_name, const String & query) const;
String readMetadataFile(const String & table_name) const;
ClusterPtr getClusterImpl(bool all_groups = false) const;
@ -132,6 +133,11 @@ private:
UInt64 getMetadataHash(const String & table_name) const;
bool checkDigestValid(const ContextPtr & local_context, bool debug_check = true) const TSA_REQUIRES(metadata_mutex);
/// For debug purposes only, don't use in production code
void dumpLocalTablesForDebugOnly(const ContextPtr & local_context) const;
void dumpTablesInZooKeeperForDebugOnly() const;
void tryCompareLocalAndZooKeeperTablesAndDumpDiffForDebugOnly(const ContextPtr & local_context) const;
void waitDatabaseStarted() const override;
void stopLoading() override;
@ -149,6 +155,7 @@ private:
std::atomic_bool is_recovering = false;
std::atomic_bool ddl_worker_initialized = false;
std::unique_ptr<DatabaseReplicatedDDLWorker> ddl_worker;
mutable std::mutex ddl_worker_mutex;
UInt32 max_log_ptr_at_creation = 0;
/// Usually operation with metadata are single-threaded because of the way replication works,

View File

@ -149,7 +149,7 @@ ASTPtr getCreateQueryFromStorage(const StoragePtr & storage, const ASTPtr & ast_
columns = metadata_ptr->columns.getAll();
for (const auto & column_name_and_type: columns)
{
const auto & ast_column_declaration = std::make_shared<ASTColumnDeclaration>();
const auto ast_column_declaration = std::make_shared<ASTColumnDeclaration>();
ast_column_declaration->name = column_name_and_type.name;
/// parser typename
{
@ -164,7 +164,7 @@ ASTPtr getCreateQueryFromStorage(const StoragePtr & storage, const ASTPtr & ast_
if (!parser.parse(pos, ast_type, expected))
{
if (throw_on_error)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot parser metadata of {}.{}",
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot parse metadata of {}.{}",
backQuote(table_id.database_name), backQuote(table_id.table_name));
else
return nullptr;
@ -289,8 +289,11 @@ StoragePtr DatabaseWithOwnTablesBase::detachTableUnlocked(const String & table_n
tables.erase(it);
table_storage->is_detached = true;
if (!table_storage->isSystemStorage() && database_name != DatabaseCatalog::SYSTEM_DATABASE)
if (!table_storage->isSystemStorage() && !DatabaseCatalog::isPredefinedDatabase(database_name))
{
LOG_TEST(log, "Counting detached table {} to database {}", table_name, database_name);
CurrentMetrics::sub(getAttachedCounterForStorage(table_storage));
}
auto table_id = table_storage->getStorageID();
if (table_id.hasUUID())
@ -334,8 +337,11 @@ void DatabaseWithOwnTablesBase::attachTableUnlocked(const String & table_name, c
/// non-Atomic database the is_detached is set to true before RENAME.
table->is_detached = false;
if (!table->isSystemStorage() && table_id.database_name != DatabaseCatalog::SYSTEM_DATABASE)
if (!table->isSystemStorage() && !DatabaseCatalog::isPredefinedDatabase(database_name))
{
LOG_TEST(log, "Counting attached table {} to database {}", table_name, database_name);
CurrentMetrics::add(getAttachedCounterForStorage(table));
}
}
void DatabaseWithOwnTablesBase::shutdown()

View File

@ -12,9 +12,9 @@
#include <Interpreters/evaluateConstantExpression.h>
#include <Parsers/ASTCreateQuery.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTDataType.h>
#include <Parsers/ParserCreateQuery.h>
#include <Parsers/parseQuery.h>
#include <Parsers/queryToString.h>
#include <Common/escapeForFileName.h>
#include <Common/parseRemoteDescription.h>
#include <Databases/DatabaseFactory.h>
@ -25,6 +25,7 @@
#include <Core/Settings.h>
#include <filesystem>
namespace fs = std::filesystem;
namespace DB
@ -432,7 +433,7 @@ ASTPtr DatabasePostgreSQL::getCreateTableQueryImpl(const String & table_name, Co
auto metadata_snapshot = storage->getInMemoryMetadataPtr();
for (const auto & column_type_and_name : metadata_snapshot->getColumns().getOrdinary())
{
const auto & column_declaration = std::make_shared<ASTColumnDeclaration>();
const auto column_declaration = std::make_shared<ASTColumnDeclaration>();
column_declaration->name = column_type_and_name.name;
column_declaration->type = getColumnDeclaration(column_type_and_name.type);
columns_expression_list->children.emplace_back(column_declaration);
@ -470,17 +471,15 @@ ASTPtr DatabasePostgreSQL::getColumnDeclaration(const DataTypePtr & data_type) c
WhichDataType which(data_type);
if (which.isNullable())
return makeASTFunction("Nullable", getColumnDeclaration(typeid_cast<const DataTypeNullable *>(data_type.get())->getNestedType()));
return makeASTDataType("Nullable", getColumnDeclaration(typeid_cast<const DataTypeNullable *>(data_type.get())->getNestedType()));
if (which.isArray())
return makeASTFunction("Array", getColumnDeclaration(typeid_cast<const DataTypeArray *>(data_type.get())->getNestedType()));
return makeASTDataType("Array", getColumnDeclaration(typeid_cast<const DataTypeArray *>(data_type.get())->getNestedType()));
if (which.isDateTime64())
{
return makeASTFunction("DateTime64", std::make_shared<ASTLiteral>(static_cast<UInt32>(6)));
}
return makeASTDataType("DateTime64", std::make_shared<ASTLiteral>(static_cast<UInt32>(6)));
return std::make_shared<ASTIdentifier>(data_type->getName());
return makeASTDataType(data_type->getName());
}
void registerDatabasePostgreSQL(DatabaseFactory & factory)

View File

@ -154,6 +154,7 @@ StoragePtr DatabaseSQLite::fetchTable(const String & table_name, ContextPtr loca
table_name,
ColumnsDescription{*columns},
ConstraintsDescription{},
/* comment = */ "",
local_context);
return storage;

View File

@ -34,7 +34,7 @@ public:
String getFileName() const override { return impl->getFileName(); }
size_t getFileSize() override { return impl->getFileSize(); }
std::optional<size_t> tryGetFileSize() override { return impl->tryGetFileSize(); }
String getInfoForLog() override { return impl->getInfoForLog(); }

View File

@ -810,6 +810,7 @@ bool CachedOnDiskReadBufferFromFile::nextImplStep()
{
last_caller_id = FileSegment::getCallerId();
chassert(file_offset_of_buffer_end <= read_until_position);
if (file_offset_of_buffer_end == read_until_position)
return false;
@ -1051,7 +1052,11 @@ bool CachedOnDiskReadBufferFromFile::nextImplStep()
if (download_current_segment && download_current_segment_succeeded)
chassert(file_segment.getCurrentWriteOffset() >= file_offset_of_buffer_end);
chassert(file_offset_of_buffer_end <= read_until_position);
chassert(
file_offset_of_buffer_end <= read_until_position,
fmt::format("Expected {} <= {} (size: {}, read range: {})",
file_offset_of_buffer_end, read_until_position, size, current_read_range.toString()));
}
swap(*implementation_buffer);

View File

@ -253,16 +253,15 @@ void ReadBufferFromAzureBlobStorage::initialize()
initialized = true;
}
size_t ReadBufferFromAzureBlobStorage::getFileSize()
std::optional<size_t> ReadBufferFromAzureBlobStorage::tryGetFileSize()
{
if (!blob_client)
blob_client = std::make_unique<Azure::Storage::Blobs::BlobClient>(blob_container_client->GetBlobClient(path));
if (file_size.has_value())
return *file_size;
if (!file_size)
file_size = blob_client->GetProperties().Value.BlobSize;
file_size = blob_client->GetProperties().Value.BlobSize;
return *file_size;
return file_size;
}
size_t ReadBufferFromAzureBlobStorage::readBigAt(char * to, size_t n, size_t range_begin, const std::function<bool(size_t)> & /*progress_callback*/) const

View File

@ -42,7 +42,7 @@ public:
bool supportsRightBoundedReads() const override { return true; }
size_t getFileSize() override;
std::optional<size_t> tryGetFileSize() override;
size_t readBigAt(char * to, size_t n, size_t range_begin, const std::function<bool(size_t)> & progress_callback) const override;

View File

@ -41,7 +41,7 @@ public:
void setReadUntilEnd() override { setReadUntilPosition(getFileSize()); }
size_t getFileSize() override { return getTotalSize(blobs_to_read); }
std::optional<size_t> tryGetFileSize() override { return getTotalSize(blobs_to_read); }
size_t getFileOffsetOfBufferEnd() const override { return file_offset_of_buffer_end; }

View File

@ -874,7 +874,9 @@ void DiskObjectStorageTransaction::writeFileUsingBlobWritingFunction(
/// Create metadata (see create_metadata_callback in DiskObjectStorageTransaction::writeFile()).
if (mode == WriteMode::Rewrite)
{
if (!object_storage.isWriteOnce() && metadata_storage.exists(path))
/// Otherwise we will produce lost blobs which nobody points to
/// WriteOnce storages are not affected by the issue
if (!object_storage.isPlain() && metadata_storage.exists(path))
object_storage.removeObjectsIfExist(metadata_storage.getStorageObjects(path));
metadata_transaction->createMetadataFile(path, std::move(object_key), object_size);

View File

@ -58,7 +58,8 @@ TemporaryFileOnDisk::~TemporaryFileOnDisk()
if (!disk->exists(relative_path))
{
LOG_WARNING(getLogger("TemporaryFileOnDisk"), "Temporary path '{}' does not exist in '{}'", relative_path, disk->getPath());
if (show_warning_if_removed)
LOG_WARNING(getLogger("TemporaryFileOnDisk"), "Temporary path '{}' does not exist in '{}'", relative_path, disk->getPath());
return;
}

View File

@ -27,12 +27,19 @@ public:
/// Return relative path (without disk)
const String & getRelativePath() const { return relative_path; }
/// Sets whether the destructor should show a warning if the temporary file has been already removed.
/// By default a warning is shown.
void setShowWarningIfRemoved(bool show_warning_if_removed_) { show_warning_if_removed = show_warning_if_removed_; }
private:
DiskPtr disk;
/// Relative path in disk to the temporary file or directory
String relative_path;
/// Whether the destructor should show a warning if the temporary file has been already removed.
bool show_warning_if_removed = true;
CurrentMetrics::Increment metric_increment;
/// Specified if we know what for file is used (sort/aggregate/join).

View File

@ -150,7 +150,7 @@ FormatSettings getFormatSettings(const ContextPtr & context, const Settings & se
format_settings.json.try_infer_objects_as_tuples = settings.input_format_json_try_infer_named_tuples_from_objects;
format_settings.json.throw_on_bad_escape_sequence = settings.input_format_json_throw_on_bad_escape_sequence;
format_settings.json.ignore_unnecessary_fields = settings.input_format_json_ignore_unnecessary_fields;
format_settings.json.ignore_key_case = settings.input_format_json_ignore_key_case;
format_settings.json.case_insensitive_column_matching = settings.input_format_json_case_insensitive_column_matching;
format_settings.null_as_default = settings.input_format_null_as_default;
format_settings.force_null_for_omitted_fields = settings.input_format_force_null_for_omitted_fields;
format_settings.decimal_trailing_zeros = settings.output_format_decimal_trailing_zeros;
@ -215,7 +215,6 @@ FormatSettings getFormatSettings(const ContextPtr & context, const Settings & se
format_settings.tsv.allow_variable_number_of_columns = settings.input_format_tsv_allow_variable_number_of_columns;
format_settings.tsv.crlf_end_of_line_input = settings.input_format_tsv_crlf_end_of_line;
format_settings.values.accurate_types_of_literals = settings.input_format_values_accurate_types_of_literals;
format_settings.values.allow_data_after_semicolon = settings.input_format_values_allow_data_after_semicolon;
format_settings.values.deduce_templates_of_expressions = settings.input_format_values_deduce_templates_of_expressions;
format_settings.values.interpret_expressions = settings.input_format_values_interpret_expressions;
format_settings.values.escape_quote_with_quote = settings.output_format_values_escape_quote_with_quote;

View File

@ -233,7 +233,7 @@ struct FormatSettings
bool infer_incomplete_types_as_strings = true;
bool throw_on_bad_escape_sequence = true;
bool ignore_unnecessary_fields = true;
bool ignore_key_case = false;
bool case_insensitive_column_matching = false;
} json{};
struct

View File

@ -18,6 +18,7 @@ namespace ErrorCodes
extern const int ILLEGAL_COLUMN;
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int BAD_ARGUMENTS;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
}
struct Base58Encode
@ -135,7 +136,7 @@ public:
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
{
if (arguments.size() != 1)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Wrong number of arguments for function {}: 1 expected.", getName());
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Wrong number of arguments for function {}: 1 expected.", getName());
if (!isString(arguments[0].type))
throw Exception(

View File

@ -15,6 +15,7 @@ namespace ErrorCodes
{
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int ILLEGAL_COLUMN;
extern const int TOO_FEW_ARGUMENTS_FOR_FUNCTION;
}
class FunctionChar : public IFunction
@ -36,7 +37,7 @@ public:
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
{
if (arguments.empty())
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
throw Exception(ErrorCodes::TOO_FEW_ARGUMENTS_FOR_FUNCTION,
"Number of arguments for function {} can't be {}, should be at least 1",
getName(), arguments.size());

View File

@ -59,19 +59,19 @@ public:
bool useDefaultImplementationForConstants() const override { return true; }
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
{
const ColumnPtr column = arguments[0].column;
if (const ColumnString * col = checkAndGetColumn<ColumnString>(column.get()))
{
auto col_res = ColumnString::create();
Impl::vector(col->getChars(), col->getOffsets(), col_res->getChars(), col_res->getOffsets());
Impl::vector(col->getChars(), col->getOffsets(), col_res->getChars(), col_res->getOffsets(), input_rows_count);
return col_res;
}
else if (const ColumnFixedString * col_fixed = checkAndGetColumn<ColumnFixedString>(column.get()))
{
auto col_res = ColumnFixedString::create(col_fixed->getN());
Impl::vectorFixed(col_fixed->getChars(), col_fixed->getN(), col_res->getChars());
Impl::vectorFixed(col_fixed->getChars(), col_fixed->getN(), col_res->getChars(), input_rows_count);
return col_res;
}
else

View File

@ -84,7 +84,7 @@ public:
return std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>());
}
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /*input_rows_count*/) const override
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override
{
Generator generator;
generator.init(arguments, max_substrings_includes_remaining_string);
@ -107,18 +107,17 @@ public:
const ColumnString::Chars & src_chars = col_str->getChars();
const ColumnString::Offsets & src_offsets = col_str->getOffsets();
res_offsets.reserve(src_offsets.size());
res_strings_offsets.reserve(src_offsets.size() * 5); /// Constant 5 - at random.
res_offsets.reserve(input_rows_count);
res_strings_offsets.reserve(input_rows_count * 5); /// Constant 5 - at random.
res_strings_chars.reserve(src_chars.size());
Pos token_begin = nullptr;
Pos token_end = nullptr;
size_t size = src_offsets.size();
ColumnString::Offset current_src_offset = 0;
ColumnArray::Offset current_dst_offset = 0;
ColumnString::Offset current_dst_strings_offset = 0;
for (size_t i = 0; i < size; ++i)
for (size_t i = 0; i < input_rows_count; ++i)
{
Pos pos = reinterpret_cast<Pos>(&src_chars[current_src_offset]);
current_src_offset = src_offsets[i];

View File

@ -47,85 +47,54 @@ bool allArgumentsAreConstants(const ColumnsWithTypeAndName & args)
return true;
}
/// Replaces single low cardinality column in a function call by its dictionary
/// This can only happen after the arguments have been adapted in IFunctionOverloadResolver::getReturnType
/// as it's only possible if there is one low cardinality column and, optionally, const columns
ColumnPtr replaceLowCardinalityColumnsByNestedAndGetDictionaryIndexes(
ColumnsWithTypeAndName & args, bool can_be_executed_on_default_arguments, size_t input_rows_count)
{
/// We return the LC indexes so the LC can be reconstructed with the function result
size_t num_rows = input_rows_count;
ColumnPtr indexes;
size_t number_low_cardinality_columns = 0;
size_t last_low_cardinality = 0;
size_t number_const_columns = 0;
size_t number_full_columns = 0;
for (size_t i = 0; i < args.size(); i++)
/// Find first LowCardinality column and replace it to nested dictionary.
for (auto & column : args)
{
auto const & arg = args[i];
if (checkAndGetColumn<ColumnLowCardinality>(arg.column.get()))
if (const auto * low_cardinality_column = checkAndGetColumn<ColumnLowCardinality>(column.column.get()))
{
number_low_cardinality_columns++;
last_low_cardinality = i;
/// Single LowCardinality column is supported now.
if (indexes)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected single dictionary argument for function.");
const auto * low_cardinality_type = checkAndGetDataType<DataTypeLowCardinality>(column.type.get());
if (!low_cardinality_type)
throw Exception(ErrorCodes::LOGICAL_ERROR,
"Incompatible type for LowCardinality column: {}",
column.type->getName());
if (can_be_executed_on_default_arguments)
{
/// Normal case, when function can be executed on values' default.
column.column = low_cardinality_column->getDictionary().getNestedColumn();
indexes = low_cardinality_column->getIndexesPtr();
}
else
{
/// Special case when default value can't be used. Example: 1 % LowCardinality(Int).
/// LowCardinality always contains default, so 1 % 0 will throw exception in normal case.
auto dict_encoded = low_cardinality_column->getMinimalDictionaryEncodedColumn(0, low_cardinality_column->size());
column.column = dict_encoded.dictionary;
indexes = dict_encoded.indexes;
}
num_rows = column.column->size();
column.type = low_cardinality_type->getDictionaryType();
}
else if (checkAndGetColumn<ColumnConst>(arg.column.get()))
number_const_columns++;
else
number_full_columns++;
}
if (!number_low_cardinality_columns && !number_const_columns)
return nullptr;
if (number_full_columns > 0 || number_low_cardinality_columns > 1)
{
/// This should not be possible but currently there are multiple tests in CI failing because of it
/// TODO: Fix those cases, then enable this exception
#if 0
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected low cardinality types found. Low cardinality: {}. Full {}. Const {}",
number_low_cardinality_columns, number_full_columns, number_const_columns);
#else
return nullptr;
#endif
}
else if (number_low_cardinality_columns == 1)
{
auto & lc_arg = args[last_low_cardinality];
const auto * low_cardinality_type = checkAndGetDataType<DataTypeLowCardinality>(lc_arg.type.get());
if (!low_cardinality_type)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Incompatible type for LowCardinality column: {}", lc_arg.type->getName());
const auto * low_cardinality_column = checkAndGetColumn<ColumnLowCardinality>(lc_arg.column.get());
chassert(low_cardinality_column);
if (can_be_executed_on_default_arguments)
{
/// Normal case, when function can be executed on values' default.
lc_arg.column = low_cardinality_column->getDictionary().getNestedColumn();
indexes = low_cardinality_column->getIndexesPtr();
}
else
{
/// Special case when default value can't be used. Example: 1 % LowCardinality(Int).
/// LowCardinality always contains default, so 1 % 0 will throw exception in normal case.
auto dict_encoded = low_cardinality_column->getMinimalDictionaryEncodedColumn(0, low_cardinality_column->size());
lc_arg.column = dict_encoded.dictionary;
indexes = dict_encoded.indexes;
}
/// The new column will have a different number of rows, normally less but occasionally it might be more (NULL)
input_rows_count = lc_arg.column->size();
lc_arg.type = low_cardinality_type->getDictionaryType();
}
/// Change size of constants
/// Change size of constants.
for (auto & column : args)
{
if (const auto * column_const = checkAndGetColumn<ColumnConst>(column.column.get()))
{
column.column = ColumnConst::create(recursiveRemoveLowCardinality(column_const->getDataColumnPtr()), input_rows_count);
column.column = ColumnConst::create(recursiveRemoveLowCardinality(column_const->getDataColumnPtr()), num_rows);
column.type = recursiveRemoveLowCardinality(column.type);
}
}
@ -301,8 +270,6 @@ ColumnPtr IExecutableFunction::executeWithoutSparseColumns(const ColumnsWithType
bool can_be_executed_on_default_arguments = canBeExecutedOnDefaultArguments();
const auto & dictionary_type = res_low_cardinality_type->getDictionaryType();
/// The arguments should have been adapted in IFunctionOverloadResolver::getReturnType
/// So there is only one low cardinality column (and optionally some const columns) and no full column
ColumnPtr indexes = replaceLowCardinalityColumnsByNestedAndGetDictionaryIndexes(
columns_without_low_cardinality, can_be_executed_on_default_arguments, input_rows_count);

View File

@ -8,17 +8,19 @@ namespace DB
template <char not_case_lower_bound, char not_case_upper_bound>
struct LowerUpperImpl
{
static void vector(const ColumnString::Chars & data,
static void vector(
const ColumnString::Chars & data,
const ColumnString::Offsets & offsets,
ColumnString::Chars & res_data,
ColumnString::Offsets & res_offsets)
ColumnString::Offsets & res_offsets,
size_t /*input_rows_count*/)
{
res_data.resize_exact(data.size());
res_offsets.assign(offsets);
array(data.data(), data.data() + data.size(), res_data.data());
}
static void vectorFixed(const ColumnString::Chars & data, size_t /*n*/, ColumnString::Chars & res_data)
static void vectorFixed(const ColumnString::Chars & data, size_t /*n*/, ColumnString::Chars & res_data, size_t /*input_rows_count*/)
{
res_data.resize_exact(data.size());
array(data.data(), data.data() + data.size(), res_data.data());

View File

@ -90,7 +90,8 @@ struct LowerUpperUTF8Impl
const ColumnString::Chars & data,
const ColumnString::Offsets & offsets,
ColumnString::Chars & res_data,
ColumnString::Offsets & res_offsets)
ColumnString::Offsets & res_offsets,
size_t input_rows_count)
{
if (data.empty())
return;
@ -98,7 +99,7 @@ struct LowerUpperUTF8Impl
bool all_ascii = isAllASCII(data.data(), data.size());
if (all_ascii)
{
LowerUpperImpl<not_case_lower_bound, not_case_upper_bound>::vector(data, offsets, res_data, res_offsets);
LowerUpperImpl<not_case_lower_bound, not_case_upper_bound>::vector(data, offsets, res_data, res_offsets, input_rows_count);
return;
}
@ -107,7 +108,7 @@ struct LowerUpperUTF8Impl
array(data.data(), data.data() + data.size(), offsets, res_data.data());
}
static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &)
static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &, size_t)
{
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Functions lowerUTF8 and upperUTF8 cannot work with FixedString argument");
}

View File

@ -62,12 +62,13 @@ using Pos = const char *;
template <typename Extractor>
struct ExtractSubstringImpl
{
static void vector(const ColumnString::Chars & data, const ColumnString::Offsets & offsets,
ColumnString::Chars & res_data, ColumnString::Offsets & res_offsets)
static void vector(
const ColumnString::Chars & data, const ColumnString::Offsets & offsets,
ColumnString::Chars & res_data, ColumnString::Offsets & res_offsets,
size_t input_rows_count)
{
size_t size = offsets.size();
res_offsets.resize(size);
res_data.reserve(size * Extractor::getReserveLengthForElement());
res_offsets.resize(input_rows_count);
res_data.reserve(input_rows_count * Extractor::getReserveLengthForElement());
size_t prev_offset = 0;
size_t res_offset = 0;
@ -76,7 +77,7 @@ struct ExtractSubstringImpl
Pos start;
size_t length;
for (size_t i = 0; i < size; ++i)
for (size_t i = 0; i < input_rows_count; ++i)
{
Extractor::execute(reinterpret_cast<const char *>(&data[prev_offset]), offsets[i] - prev_offset - 1, start, length);
@ -99,7 +100,7 @@ struct ExtractSubstringImpl
res_data.assign(start, length);
}
static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &)
static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &, size_t)
{
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Column of type FixedString is not supported by this function");
}
@ -111,12 +112,13 @@ struct ExtractSubstringImpl
template <typename Extractor>
struct CutSubstringImpl
{
static void vector(const ColumnString::Chars & data, const ColumnString::Offsets & offsets,
ColumnString::Chars & res_data, ColumnString::Offsets & res_offsets)
static void vector(
const ColumnString::Chars & data, const ColumnString::Offsets & offsets,
ColumnString::Chars & res_data, ColumnString::Offsets & res_offsets,
size_t input_rows_count)
{
res_data.reserve(data.size());
size_t size = offsets.size();
res_offsets.resize(size);
res_offsets.resize(input_rows_count);
size_t prev_offset = 0;
size_t res_offset = 0;
@ -125,7 +127,7 @@ struct CutSubstringImpl
Pos start;
size_t length;
for (size_t i = 0; i < size; ++i)
for (size_t i = 0; i < input_rows_count; ++i)
{
const char * current = reinterpret_cast<const char *>(&data[prev_offset]);
Extractor::execute(current, offsets[i] - prev_offset - 1, start, length);
@ -154,7 +156,7 @@ struct CutSubstringImpl
res_data.append(start + length, data.data() + data.size());
}
static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &)
static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &, size_t)
{
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Column of type FixedString is not supported by this function");
}

View File

@ -1,8 +1,8 @@
#pragma once
#include <base/find_symbols.h>
#include "domain.h"
#include "tldLookup.h"
#include <Functions/URL/domain.h>
#include <Functions/URL/tldLookup.h>
#include <Common/TLDListsHolder.h> /// TLDType
namespace DB

View File

@ -1,6 +1,6 @@
#include <Functions/FunctionFactory.h>
#include "fragment.h"
#include <Functions/FunctionStringToString.h>
#include <Functions/URL/fragment.h>
namespace DB
{

View File

@ -1,6 +1,6 @@
#include <Functions/FunctionFactory.h>
#include "queryString.h"
#include <Functions/FunctionStringToString.h>
#include <Functions/URL/queryString.h>
namespace DB
{

View File

@ -1,6 +1,6 @@
#include <Functions/FunctionFactory.h>
#include "queryStringAndFragment.h"
#include <Functions/FunctionStringToString.h>
#include <Functions/URL/queryStringAndFragment.h>
namespace DB
{

Some files were not shown because too many files have changed in this diff Show More