Merge branch 'master' into hanfei/refreshable_view_race

This commit is contained in:
Han Fei 2024-03-27 21:33:59 +01:00
commit f3616f67aa
221 changed files with 4200 additions and 1060 deletions

View File

@ -44,22 +44,35 @@ At a minimum, the following information should be added (but add more as needed)
--- ---
### Modify your CI run: ### Modify your CI run:
**NOTE:** If your merge the PR with modified CI you **MUST KNOW** what you are doing **NOTE:** If your merge the PR with modified CI you **MUST KNOW** what you are doing
**NOTE:** Set desired options before CI starts or re-push after updates **NOTE:** Checked options will be applied if set before CI RunConfig/PrepareRunConfig step
#### Run only: #### Include tests (required builds will be added automatically):
- [ ] <!---ci_set_integration--> Integration tests - [ ] <!---ci_include_fast--> Fast test
- [ ] <!---ci_set_arm--> Integration tests (arm64) - [ ] <!---ci_include_integration--> Integration Tests
- [ ] <!---ci_set_stateless--> Stateless tests (release) - [ ] <!---ci_include_stateless--> Stateless tests
- [ ] <!---ci_set_stateless_asan--> Stateless tests (asan) - [ ] <!---ci_include_stateful--> Stateful tests
- [ ] <!---ci_set_stateful--> Stateful tests (release) - [ ] <!---ci_include_unit--> Unit tests
- [ ] <!---ci_set_stateful_asan--> Stateful tests (asan) - [ ] <!---ci_include_performance--> Performance tests
- [ ] <!---ci_set_reduced--> No sanitizers - [ ] <!---ci_include_asan--> All with ASAN
- [ ] <!---ci_set_analyzer--> Tests with analyzer - [ ] <!---ci_include_tsan--> All with TSAN
- [ ] <!---ci_set_fast--> Fast tests - [ ] <!---ci_include_analyzer--> All with Analyzer
- [ ] <!---job_package_debug--> Only package_debug build - [ ] <!---ci_include_KEYWORD--> Add your option here
- [ ] <!---PLACE_YOUR_TAG_CONFIGURED_IN_ci_config.py_FILE_HERE--> Add your CI variant description here
#### CI options: #### Exclude tests:
- [ ] <!---ci_exclude_fast--> Fast test
- [ ] <!---ci_exclude_integration--> Integration Tests
- [ ] <!---ci_exclude_stateless--> Stateless tests
- [ ] <!---ci_exclude_stateful--> Stateful tests
- [ ] <!---ci_exclude_performance--> Performance tests
- [ ] <!---ci_exclude_asan--> All with ASAN
- [ ] <!---ci_exclude_tsan--> All with TSAN
- [ ] <!---ci_exclude_msan--> All with MSAN
- [ ] <!---ci_exclude_ubsan--> All with UBSAN
- [ ] <!---ci_exclude_coverage--> All with Coverage
- [ ] <!---ci_exclude_aarch64--> All with Aarch64
- [ ] <!---ci_exclude_KEYWORD--> Add your option here
#### Extra options:
- [ ] <!---do_not_test--> do not test (only style check) - [ ] <!---do_not_test--> do not test (only style check)
- [ ] <!---no_merge_commit--> disable merge-commit (no merge from master before tests) - [ ] <!---no_merge_commit--> disable merge-commit (no merge from master before tests)
- [ ] <!---no_ci_cache--> disable CI cache (job reuse) - [ ] <!---no_ci_cache--> disable CI cache (job reuse)

View File

@ -374,7 +374,7 @@ jobs:
if: ${{ !failure() && !cancelled() }} if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml uses: ./.github/workflows/reusable_test.yml
with: with:
test_name: Stateless tests (release, analyzer, s3, DatabaseReplicated) test_name: Stateless tests (release, old analyzer, s3, DatabaseReplicated)
runner_type: func-tester runner_type: func-tester
data: ${{ needs.RunConfig.outputs.data }} data: ${{ needs.RunConfig.outputs.data }}
FunctionalStatelessTestS3Debug: FunctionalStatelessTestS3Debug:
@ -632,7 +632,7 @@ jobs:
if: ${{ !failure() && !cancelled() }} if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml uses: ./.github/workflows/reusable_test.yml
with: with:
test_name: Integration tests (asan, analyzer) test_name: Integration tests (asan, old analyzer)
runner_type: stress-tester runner_type: stress-tester
data: ${{ needs.RunConfig.outputs.data }} data: ${{ needs.RunConfig.outputs.data }}
IntegrationTestsTsan: IntegrationTestsTsan:

View File

@ -6,6 +6,7 @@ env:
PYTHONUNBUFFERED: 1 PYTHONUNBUFFERED: 1
on: # yamllint disable-line rule:truthy on: # yamllint disable-line rule:truthy
merge_group:
pull_request: pull_request:
types: types:
- synchronize - synchronize
@ -29,6 +30,7 @@ jobs:
fetch-depth: 0 # to get version fetch-depth: 0 # to get version
filter: tree:0 filter: tree:0
- name: Labels check - name: Labels check
if: ${{ github.event_name != 'merge_group' }}
run: | run: |
cd "$GITHUB_WORKSPACE/tests/ci" cd "$GITHUB_WORKSPACE/tests/ci"
python3 run_check.py python3 run_check.py
@ -56,16 +58,9 @@ jobs:
echo 'EOF' echo 'EOF'
} >> "$GITHUB_OUTPUT" } >> "$GITHUB_OUTPUT"
- name: Re-create GH statuses for skipped jobs if any - name: Re-create GH statuses for skipped jobs if any
if: ${{ github.event_name != 'merge_group' }}
run: | run: |
python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --infile ${{ runner.temp }}/ci_run_data.json --update-gh-statuses python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --infile ${{ runner.temp }}/ci_run_data.json --update-gh-statuses
- name: Style check early
# hack to run style check before the docker build job if possible (style-check image not changed)
if: contains(fromJson(steps.runconfig.outputs.CI_DATA).jobs_data.jobs_to_do, 'Style check early')
run: |
DOCKER_TAG=$(echo '${{ toJson(fromJson(steps.runconfig.outputs.CI_DATA).docker_data.images) }}' | tr -d '\n')
export DOCKER_TAG=$DOCKER_TAG
python3 ./tests/ci/style_check.py --no-push
python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --infile ${{ runner.temp }}/ci_run_data.json --post --job-name 'Style check'
BuildDockers: BuildDockers:
needs: [RunConfig] needs: [RunConfig]
if: ${{ !failure() && !cancelled() && toJson(fromJson(needs.RunConfig.outputs.data).docker_data.missing_multi) != '[]' }} if: ${{ !failure() && !cancelled() && toJson(fromJson(needs.RunConfig.outputs.data).docker_data.missing_multi) != '[]' }}

View File

@ -436,7 +436,7 @@ jobs:
if: ${{ !failure() && !cancelled() }} if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml uses: ./.github/workflows/reusable_test.yml
with: with:
test_name: Integration tests (asan, analyzer) test_name: Integration tests (asan, old analyzer)
runner_type: stress-tester runner_type: stress-tester
data: ${{ needs.RunConfig.outputs.data }} data: ${{ needs.RunConfig.outputs.data }}
IntegrationTestsTsan: IntegrationTestsTsan:

View File

@ -1,10 +1,184 @@
### Table of Contents ### Table of Contents
**[ClickHouse release v24.3 LTS, 2024-03-26](#243)**<br/>
**[ClickHouse release v24.2, 2024-02-29](#242)**<br/> **[ClickHouse release v24.2, 2024-02-29](#242)**<br/>
**[ClickHouse release v24.1, 2024-01-30](#241)**<br/> **[ClickHouse release v24.1, 2024-01-30](#241)**<br/>
**[Changelog for 2023](https://clickhouse.com/docs/en/whats-new/changelog/2023/)**<br/> **[Changelog for 2023](https://clickhouse.com/docs/en/whats-new/changelog/2023/)**<br/>
# 2024 Changelog # 2024 Changelog
### <a id="243"></a> ClickHouse release 24.3 LTS, 2024-03-27
#### Upgrade Notes
* The setting `allow_experimental_analyzer` is enabled by default and it switches the query analysis to a new implementation, which has better compatibility and feature completeness. The feature "analyzer" is considered beta instead of experimental. You can turn the old behavior by setting the `compatibility` to `24.2` or disabling the `allow_experimental_analyzer` setting. Watch the [video on YouTube](https://www.youtube.com/watch?v=zhrOYQpgvkk).
* ClickHouse allows arbitrary binary data in the String data type, which is typically UTF-8. Parquet/ORC/Arrow Strings only support UTF-8. That's why you can choose which Arrow's data type to use for the ClickHouse String data type - String or Binary. This is controlled by the settings, `output_format_parquet_string_as_string`, `output_format_orc_string_as_string`, `output_format_arrow_string_as_string`. While Binary would be more correct and compatible, using String by default will correspond to user expectations in most cases. Parquet/ORC/Arrow supports many compression methods, including lz4 and zstd. ClickHouse supports each and every compression method. Some inferior tools lack support for the faster `lz4` compression method, that's why we set `zstd` by default. This is controlled by the settings `output_format_parquet_compression_method`, `output_format_orc_compression_method`, and `output_format_arrow_compression_method`. We changed the default to `zstd` for Parquet and ORC, but not Arrow (it is emphasized for low-level usages). [#61817](https://github.com/ClickHouse/ClickHouse/pull/61817) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* In the new ClickHouse version, the functions `geoDistance`, `greatCircleDistance`, and `greatCircleAngle` will use 64-bit double precision floating point data type for internal calculations and return type if all the arguments are Float64. This closes [#58476](https://github.com/ClickHouse/ClickHouse/issues/58476). In previous versions, the function always used Float32. You can switch to the old behavior by setting `geo_distance_returns_float64_on_float64_arguments` to `false` or setting `compatibility` to `24.2` or earlier. [#61848](https://github.com/ClickHouse/ClickHouse/pull/61848) ([Alexey Milovidov](https://github.com/alexey-milovidov)). Co-authored with [Geet Patel](https://github.com/geetptl).
* The obsolete in-memory data parts have been deprecated since version 23.5 and have not been supported since version 23.10. Now the remaining code is removed. Continuation of [#55186](https://github.com/ClickHouse/ClickHouse/issues/55186) and [#45409](https://github.com/ClickHouse/ClickHouse/issues/45409). It is unlikely that you have used in-memory data parts because they were available only before version 23.5 and only when you enabled them manually by specifying the corresponding SETTINGS for a MergeTree table. To check if you have in-memory data parts, run the following query: `SELECT part_type, count() FROM system.parts GROUP BY part_type ORDER BY part_type`. To disable the usage of in-memory data parts, do `ALTER TABLE ... MODIFY SETTING min_bytes_for_compact_part = DEFAULT, min_rows_for_compact_part = DEFAULT`. Before upgrading from old ClickHouse releases, first check that you don't have in-memory data parts. If there are in-memory data parts, disable them first, then wait while there are no in-memory data parts and continue the upgrade. [#61127](https://github.com/ClickHouse/ClickHouse/pull/61127) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Changed the column name from `duration_ms` to `duration_microseconds` in the `system.zookeeper` table to reflect the reality that the duration is in the microsecond resolution. [#60774](https://github.com/ClickHouse/ClickHouse/pull/60774) ([Duc Canh Le](https://github.com/canhld94)).
* Reject incoming INSERT queries in case when query-level settings `async_insert` and `deduplicate_blocks_in_dependent_materialized_views` are enabled together. This behaviour is controlled by a setting `throw_if_deduplication_in_dependent_materialized_views_enabled_with_async_insert` and enabled by default. This is a continuation of https://github.com/ClickHouse/ClickHouse/pull/59699 needed to unblock https://github.com/ClickHouse/ClickHouse/pull/59915. [#60888](https://github.com/ClickHouse/ClickHouse/pull/60888) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
* Utility `clickhouse-copier` is moved to a separate repository on GitHub: https://github.com/ClickHouse/copier. It is no longer included in the bundle but is still available as a separate download. This closes: [#60734](https://github.com/ClickHouse/ClickHouse/issues/60734) This closes: [#60540](https://github.com/ClickHouse/ClickHouse/issues/60540) This closes: [#60250](https://github.com/ClickHouse/ClickHouse/issues/60250) This closes: [#52917](https://github.com/ClickHouse/ClickHouse/issues/52917) This closes: [#51140](https://github.com/ClickHouse/ClickHouse/issues/51140) This closes: [#47517](https://github.com/ClickHouse/ClickHouse/issues/47517) This closes: [#47189](https://github.com/ClickHouse/ClickHouse/issues/47189) This closes: [#46598](https://github.com/ClickHouse/ClickHouse/issues/46598) This closes: [#40257](https://github.com/ClickHouse/ClickHouse/issues/40257) This closes: [#36504](https://github.com/ClickHouse/ClickHouse/issues/36504) This closes: [#35485](https://github.com/ClickHouse/ClickHouse/issues/35485) This closes: [#33702](https://github.com/ClickHouse/ClickHouse/issues/33702) This closes: [#26702](https://github.com/ClickHouse/ClickHouse/issues/26702).
* To increase compatibility with MySQL, the compatibility alias `locate` now accepts arguments `(needle, haystack[, start_pos])` by default. The previous behavior `(haystack, needle, [, start_pos])` can be restored by setting `function_locate_has_mysql_compatible_argument_order = 0`. [#61092](https://github.com/ClickHouse/ClickHouse/pull/61092) ([Robert Schulze](https://github.com/rschu1ze)).
* Forbid `SimpleAggregateFunction` in `ORDER BY` of `MergeTree` tables (like `AggregateFunction` is forbidden, but they are forbidden because they are not comparable) by default (use `allow_suspicious_primary_key` to allow them). [#61399](https://github.com/ClickHouse/ClickHouse/pull/61399) ([Azat Khuzhin](https://github.com/azat)).
* The `Ordinary` database engine is deprecated. You will receive a warning in clickhouse-client if your server is using it. This closes [#52229](https://github.com/ClickHouse/ClickHouse/issues/52229). [#56942](https://github.com/ClickHouse/ClickHouse/pull/56942) ([shabroo](https://github.com/shabroo)).
#### New Feature
* Support reading and writing backups as `tar` (in addition to `zip`). [#59535](https://github.com/ClickHouse/ClickHouse/pull/59535) ([josh-hildred](https://github.com/josh-hildred)).
* Implemented support for S3 Express buckets. [#59965](https://github.com/ClickHouse/ClickHouse/pull/59965) ([Nikita Taranov](https://github.com/nickitat)).
* Allow to attach parts from a different disk (using copy instead of hard link). [#60112](https://github.com/ClickHouse/ClickHouse/pull/60112) ([Unalian](https://github.com/Unalian)).
* Size-capped `Memory` tables: controlled by their settings, `min_bytes_to_keep, max_bytes_to_keep, min_rows_to_keep` and `max_rows_to_keep`. [#60612](https://github.com/ClickHouse/ClickHouse/pull/60612) ([Jake Bamrah](https://github.com/JakeBamrah)).
* Separate limits on number of waiting and executing queries. Added new server setting `max_waiting_queries` that limits the number of queries waiting due to `async_load_databases`. Existing limits on number of executing queries no longer count waiting queries. [#61053](https://github.com/ClickHouse/ClickHouse/pull/61053) ([Sergei Trifonov](https://github.com/serxa)).
* Added a table `system.keywords` which contains all the keywords from parser. Mostly needed and will be used for better fuzzing and syntax highlighting. [#51808](https://github.com/ClickHouse/ClickHouse/pull/51808) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
* Add support for `ATTACH PARTITION ALL`. [#61107](https://github.com/ClickHouse/ClickHouse/pull/61107) ([Kirill Nikiforov](https://github.com/allmazz)).
* Add a new function, `getClientHTTPHeader`. This closes [#54665](https://github.com/ClickHouse/ClickHouse/issues/54665). Co-authored with @lingtaolf. [#61820](https://github.com/ClickHouse/ClickHouse/pull/61820) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Add `generate_series` as a table function (compatibility alias for PostgreSQL to the existing `numbers` function). This function generates table with an arithmetic progression with natural numbers. [#59390](https://github.com/ClickHouse/ClickHouse/pull/59390) ([divanik](https://github.com/divanik)).
* A mode for `topK`/`topkWeighed` support mode, which return count of values and its error. [#54508](https://github.com/ClickHouse/ClickHouse/pull/54508) ([UnamedRus](https://github.com/UnamedRus)).
* Added function `toMillisecond` which returns the millisecond component for values of type`DateTime` or `DateTime64`. [#60281](https://github.com/ClickHouse/ClickHouse/pull/60281) ([Shaun Struwig](https://github.com/Blargian)).
* Allow configuring HTTP redirect handlers for clickhouse-server. For example, you can make `/` redirect to the Play UI. [#60390](https://github.com/ClickHouse/ClickHouse/pull/60390) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
#### Performance Improvement
* Optimized function `dotProduct` to omit unnecessary and expensive memory copies. [#60928](https://github.com/ClickHouse/ClickHouse/pull/60928) ([Robert Schulze](https://github.com/rschu1ze)).
* 30x faster printing for 256-bit integers. [#61100](https://github.com/ClickHouse/ClickHouse/pull/61100) ([Raúl Marín](https://github.com/Algunenano)).
* If the table's primary key contains mostly useless columns, don't keep them in memory. This is controlled by a new setting `primary_key_ratio_of_unique_prefix_values_to_skip_suffix_columns` with the value `0.9` by default, which means: for a composite primary key, if a column changes its value for at least 0.9 of all the times, the next columns after it will be not loaded. [#60255](https://github.com/ClickHouse/ClickHouse/pull/60255) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Improve the performance of serialized aggregation method when involving multiple `Nullable` columns. [#55809](https://github.com/ClickHouse/ClickHouse/pull/55809) ([Amos Bird](https://github.com/amosbird)).
* Lazy build JSON's output to improve performance of ALL JOIN. [#58278](https://github.com/ClickHouse/ClickHouse/pull/58278) ([LiuNeng](https://github.com/liuneng1994)).
* Make HTTP/HTTPs connections with external services, such as AWS S3 reusable for all uses cases. Even when response is 3xx or 4xx. [#58845](https://github.com/ClickHouse/ClickHouse/pull/58845) ([Sema Checherinda](https://github.com/CheSema)).
* Improvements to aggregate functions `argMin` / `argMax` / `any` / `anyLast` / `anyHeavy`, as well as `ORDER BY {u8/u16/u32/u64/i8/i16/u32/i64) LIMIT 1` queries. [#58640](https://github.com/ClickHouse/ClickHouse/pull/58640) ([Raúl Marín](https://github.com/Algunenano)).
* Trivial optimization for column's filter. Peak memory can be reduced to 44% of the original in some cases. [#59698](https://github.com/ClickHouse/ClickHouse/pull/59698) ([李扬](https://github.com/taiyang-li)).
* Execute `multiIf` function in a columnar fashion when the result type's underlying type is a number. [#60384](https://github.com/ClickHouse/ClickHouse/pull/60384) ([李扬](https://github.com/taiyang-li)).
* Faster (almost 2x) mutexes. [#60823](https://github.com/ClickHouse/ClickHouse/pull/60823) ([Azat Khuzhin](https://github.com/azat)).
* Drain multiple connections in parallel when a distributed query is finishing. [#60845](https://github.com/ClickHouse/ClickHouse/pull/60845) ([lizhuoyu5](https://github.com/lzydmxy)).
* Optimize data movement between columns of a Nullable number or a Nullable string, which improves some micro-benchmarks. [#60846](https://github.com/ClickHouse/ClickHouse/pull/60846) ([李扬](https://github.com/taiyang-li)).
* Operations with the filesystem cache will suffer less from the lock contention. [#61066](https://github.com/ClickHouse/ClickHouse/pull/61066) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Optimize array join and other JOINs by preventing a wrong compiler's optimization. Close [#61074](https://github.com/ClickHouse/ClickHouse/issues/61074). [#61075](https://github.com/ClickHouse/ClickHouse/pull/61075) ([李扬](https://github.com/taiyang-li)).
* If a query with a syntax error contained `COLUMNS` matcher with a regular expression, the regular expression was compiled each time during the parser's backtracking, instead of being compiled once. This was a fundamental error. The compiled regexp was put to AST. But the letter A in AST means "abstract" which means it should not contain heavyweight objects. Parts of AST can be created and discarded during parsing, including a large number of backtracking. This leads to slowness on the parsing side and consequently allows DoS by a readonly user. But the main problem is that it prevents progress in fuzzers. [#61543](https://github.com/ClickHouse/ClickHouse/pull/61543) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Add a new analyzer pass to optimize the IN operator for a single value. [#61564](https://github.com/ClickHouse/ClickHouse/pull/61564) ([LiuNeng](https://github.com/liuneng1994)).
* DNSResolver shuffles set of resolved IPs which is needed to uniformly utilize multiple endpoints of AWS S3. [#60965](https://github.com/ClickHouse/ClickHouse/pull/60965) ([Sema Checherinda](https://github.com/CheSema)).
#### Experimental Feature
* Support parallel reading for Azure blob storage. This improves the performance of the experimental Azure object storage. [#61503](https://github.com/ClickHouse/ClickHouse/pull/61503) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)).
* Add asynchronous WriteBuffer for Azure blob storage similar to S3. This improves the performance of the experimental Azure object storage. [#59929](https://github.com/ClickHouse/ClickHouse/pull/59929) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)).
* Use managed identity for backups IO when using Azure Blob Storage. Add a setting to prevent ClickHouse from attempting to create a non-existent container, which requires permissions at the storage account level. [#61785](https://github.com/ClickHouse/ClickHouse/pull/61785) ([Daniel Pozo Escalona](https://github.com/danipozo)).
* Add a setting `parallel_replicas_allow_in_with_subquery = 1` which allows subqueries for IN work with parallel replicas. [#60950](https://github.com/ClickHouse/ClickHouse/pull/60950) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* A change for the "zero-copy" replication: all zero copy locks related to a table have to be dropped when the table is dropped. The directory which contains these locks has to be removed also. [#57575](https://github.com/ClickHouse/ClickHouse/pull/57575) ([Sema Checherinda](https://github.com/CheSema)).
#### Improvement
* Use `MergeTree` as a default table engine. [#60524](https://github.com/ClickHouse/ClickHouse/pull/60524) ([Alexey Milovidov](https://github.com/alexey-milovidov))
* Enable `output_format_pretty_row_numbers` by default. It is better for usability. [#61791](https://github.com/ClickHouse/ClickHouse/pull/61791) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* In the previous version, some numbers in Pretty formats were not pretty enough. [#61794](https://github.com/ClickHouse/ClickHouse/pull/61794) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* A long value in Pretty formats won't be cut if it is the single value in the resultset, such as in the result of the `SHOW CREATE TABLE` query. [#61795](https://github.com/ClickHouse/ClickHouse/pull/61795) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Similarly to `clickhouse-local`, `clickhouse-client` will accept the `--output-format` option as a synonym to the `--format` option. This closes [#59848](https://github.com/ClickHouse/ClickHouse/issues/59848). [#61797](https://github.com/ClickHouse/ClickHouse/pull/61797) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* If stdout is a terminal and the output format is not specified, `clickhouse-client` and similar tools will use `PrettyCompact` by default, similarly to the interactive mode. `clickhouse-client` and `clickhouse-local` will handle command line arguments for input and output formats in a unified fashion. This closes [#61272](https://github.com/ClickHouse/ClickHouse/issues/61272). [#61800](https://github.com/ClickHouse/ClickHouse/pull/61800) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Underscore digit groups in Pretty formats for better readability. This is controlled by a new setting, `output_format_pretty_highlight_digit_groups`. [#61802](https://github.com/ClickHouse/ClickHouse/pull/61802) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Add ability to override initial INSERT settings via `SYSTEM FLUSH DISTRIBUTED`. [#61832](https://github.com/ClickHouse/ClickHouse/pull/61832) ([Azat Khuzhin](https://github.com/azat)).
* Enable processors profiling (time spent/in and out bytes for sorting, aggregation, ...) by default. [#61096](https://github.com/ClickHouse/ClickHouse/pull/61096) ([Azat Khuzhin](https://github.com/azat)).
* Support files without format extension in Filesystem database. [#60795](https://github.com/ClickHouse/ClickHouse/pull/60795) ([Kruglov Pavel](https://github.com/Avogar)).
* Make all format names case insensitive, like Tsv, or TSV, or tsv, or even rowbinary. [#60420](https://github.com/ClickHouse/ClickHouse/pull/60420) ([豪肥肥](https://github.com/HowePa)). I appreciate if you will continue to write it correctly, e.g., `JSON` 😇, not `Json` 🤮, but we don't mind if you spell it as you prefer.
* Added `none_only_active` mode for `distributed_ddl_output_mode` setting. [#60340](https://github.com/ClickHouse/ClickHouse/pull/60340) ([Alexander Tokmakov](https://github.com/tavplubix)).
* The advanced dashboard has slightly better colors for multi-line graphs. [#60391](https://github.com/ClickHouse/ClickHouse/pull/60391) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* The Advanced dashboard now has controls always visible on scrolling. This allows you to add a new chart without scrolling up. [#60692](https://github.com/ClickHouse/ClickHouse/pull/60692) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* While running the `MODIFY COLUMN` query for materialized views, check the inner table's structure to ensure every column exists. [#47427](https://github.com/ClickHouse/ClickHouse/pull/47427) ([sunny](https://github.com/sunny19930321)).
* String types and Enums can be used in the same context, such as: arrays, UNION queries, conditional expressions. This closes [#60726](https://github.com/ClickHouse/ClickHouse/issues/60726). [#60727](https://github.com/ClickHouse/ClickHouse/pull/60727) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Allow declaring Enums in the structure of external data for query processing (this is an immediate temporary table that you can provide for your query). [#57857](https://github.com/ClickHouse/ClickHouse/pull/57857) ([Duc Canh Le](https://github.com/canhld94)).
* Consider lightweight deleted rows when selecting parts to merge, so the disk size of the resulting part will be estimated better. [#58223](https://github.com/ClickHouse/ClickHouse/pull/58223) ([Zhuo Qiu](https://github.com/jewelzqiu)).
* Added comments for columns for more system tables. Continuation of https://github.com/ClickHouse/ClickHouse/pull/58356. [#59016](https://github.com/ClickHouse/ClickHouse/pull/59016) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
* Now we can use virtual columns in PREWHERE. It's worthwhile for non-const virtual columns like `_part_offset`. [#59033](https://github.com/ClickHouse/ClickHouse/pull/59033) ([Amos Bird](https://github.com/amosbird)). Improved overall usability of virtual columns. Now it is allowed to use virtual columns in `PREWHERE` (it's worthwhile for non-const virtual columns like `_part_offset`). Now a builtin documentation is available for virtual columns as a comment of column in `DESCRIBE` query with enabled setting `describe_include_virtual_columns`. [#60205](https://github.com/ClickHouse/ClickHouse/pull/60205) ([Anton Popov](https://github.com/CurtizJ)).
* Instead of using a constant key, now object storage generates key for determining remove objects capability. [#59495](https://github.com/ClickHouse/ClickHouse/pull/59495) ([Sema Checherinda](https://github.com/CheSema)).
* Allow "local" as object storage type instead of "local_blob_storage". [#60165](https://github.com/ClickHouse/ClickHouse/pull/60165) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Parallel flush of pending INSERT blocks of Distributed engine on `DETACH`/server shutdown and `SYSTEM FLUSH DISTRIBUTED` (Parallelism will work only if you have multi-disk policy for a table (like everything in the Distributed engine right now)). [#60225](https://github.com/ClickHouse/ClickHouse/pull/60225) ([Azat Khuzhin](https://github.com/azat)).
* Add a setting to force read-through cache for merges. [#60308](https://github.com/ClickHouse/ClickHouse/pull/60308) ([Kseniia Sumarokova](https://github.com/kssenii)).
* An improvement for the MySQL compatibility protocol. The issue [#57598](https://github.com/ClickHouse/ClickHouse/issues/57598) mentions a variant behaviour regarding transaction handling. An issued COMMIT/ROLLBACK when no transaction is active is reported as an error contrary to MySQL behaviour. [#60338](https://github.com/ClickHouse/ClickHouse/pull/60338) ([PapaToemmsn](https://github.com/PapaToemmsn)).
* Function `substring` now has a new alias `byteSlice`. [#60494](https://github.com/ClickHouse/ClickHouse/pull/60494) ([Robert Schulze](https://github.com/rschu1ze)).
* Renamed server setting `dns_cache_max_size` to `dns_cache_max_entries` to reduce ambiguity. [#60500](https://github.com/ClickHouse/ClickHouse/pull/60500) ([Kirill Nikiforov](https://github.com/allmazz)).
* `SHOW INDEX | INDEXES | INDICES | KEYS` no longer sorts by the primary key columns (which was unintuitive). [#60514](https://github.com/ClickHouse/ClickHouse/pull/60514) ([Robert Schulze](https://github.com/rschu1ze)).
* Keeper improvement: abort during startup if an invalid snapshot is detected to avoid data loss. [#60537](https://github.com/ClickHouse/ClickHouse/pull/60537) ([Antonio Andelic](https://github.com/antonio2368)).
* Update tzdata to 2024a. [#60768](https://github.com/ClickHouse/ClickHouse/pull/60768) ([Raúl Marín](https://github.com/Algunenano)).
* Keeper improvement: support `leadership_expiry_ms` in Keeper's settings. [#60806](https://github.com/ClickHouse/ClickHouse/pull/60806) ([Brokenice0415](https://github.com/Brokenice0415)).
* Always infer exponential numbers in JSON formats regardless of the setting `input_format_try_infer_exponent_floats`. Add setting `input_format_json_use_string_type_for_ambiguous_paths_in_named_tuples_inference_from_objects` that allows to use String type for ambiguous paths instead of an exception during named Tuples inference from JSON objects. [#60808](https://github.com/ClickHouse/ClickHouse/pull/60808) ([Kruglov Pavel](https://github.com/Avogar)).
* Add support for `START TRANSACTION` syntax typically used in MySQL syntax, resolving https://github.com/ClickHouse/ClickHouse/discussions/60865. [#60886](https://github.com/ClickHouse/ClickHouse/pull/60886) ([Zach Naimon](https://github.com/ArctypeZach)).
* Add a flag for the full-sorting merge join algorithm to treat null as biggest/smallest. So the behavior can be compitable with other SQL systems, like Apache Spark. [#60896](https://github.com/ClickHouse/ClickHouse/pull/60896) ([loudongfeng](https://github.com/loudongfeng)).
* Support detect output format by file exctension in `clickhouse-client` and `clickhouse-local`. [#61036](https://github.com/ClickHouse/ClickHouse/pull/61036) ([豪肥肥](https://github.com/HowePa)).
* Update memory limit in runtime when Linux's CGroups value changed. [#61049](https://github.com/ClickHouse/ClickHouse/pull/61049) ([Han Fei](https://github.com/hanfei1991)).
* Add the function `toUInt128OrZero`, which was missed by mistake (the mistake is related to https://github.com/ClickHouse/ClickHouse/pull/945). The compatibility aliases `FROM_UNIXTIME` and `DATE_FORMAT` (they are not ClickHouse-native and only exist for MySQL compatibility) have been made case insensitive, as expected for SQL-compatibility aliases. [#61114](https://github.com/ClickHouse/ClickHouse/pull/61114) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Improvements for the access checks, allowing to revoke of unpossessed rights in case the target user doesn't have the revoking grants either. Example: `GRANT SELECT ON *.* TO user1; REVOKE SELECT ON system.* FROM user1;`. [#61115](https://github.com/ClickHouse/ClickHouse/pull/61115) ([pufit](https://github.com/pufit)).
* Fix `has()` function with `Nullable` column (fixes [#60214](https://github.com/ClickHouse/ClickHouse/issues/60214)). [#61249](https://github.com/ClickHouse/ClickHouse/pull/61249) ([Mikhail Koviazin](https://github.com/mkmkme)).
* Now it's possible to specify the attribute `merge="true"` in config substitutions for subtrees `<include from_zk="/path" merge="true">`. In case this attribute specified, clickhouse will merge subtree with existing configuration, otherwise default behavior is append new content to configuration. [#61299](https://github.com/ClickHouse/ClickHouse/pull/61299) ([alesapin](https://github.com/alesapin)).
* Add async metrics for virtual memory mappings: `VMMaxMapCount` & `VMNumMaps`. Closes [#60662](https://github.com/ClickHouse/ClickHouse/issues/60662). [#61354](https://github.com/ClickHouse/ClickHouse/pull/61354) ([Tuan Pham Anh](https://github.com/tuanpavn)).
* Use `temporary_files_codec` setting in all places where we create temporary data, for example external memory sorting and external memory GROUP BY. Before it worked only in `partial_merge` JOIN algorithm. [#61456](https://github.com/ClickHouse/ClickHouse/pull/61456) ([Maksim Kita](https://github.com/kitaisreal)).
* Add a new setting `max_parser_backtracks` which allows to limit the complexity of query parsing. [#61502](https://github.com/ClickHouse/ClickHouse/pull/61502) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Less contention during dynamic resize of the filesystem cache. [#61524](https://github.com/ClickHouse/ClickHouse/pull/61524) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Disallow sharded mode of StorageS3 queue, because it will be rewritten. [#61537](https://github.com/ClickHouse/ClickHouse/pull/61537) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Fixed typo: from `use_leagcy_max_level` to `use_legacy_max_level`. [#61545](https://github.com/ClickHouse/ClickHouse/pull/61545) ([William Schoeffel](https://github.com/wiledusc)).
* Remove some duplicate entries in `system.blob_storage_log`. [#61622](https://github.com/ClickHouse/ClickHouse/pull/61622) ([YenchangChan](https://github.com/YenchangChan)).
* Added `current_user` function as a compatibility alias for MySQL. [#61770](https://github.com/ClickHouse/ClickHouse/pull/61770) ([Yarik Briukhovetskyi](https://github.com/yariks5s)).
* Fix inconsistent floating point aggregate function states in mixed x86-64 / ARM clusters [#60610](https://github.com/ClickHouse/ClickHouse/pull/60610) ([Harry Lee](https://github.com/HarryLeeIBM)).
#### Build/Testing/Packaging Improvement
* The real-time query profiler now works on AArch64. In previous versions, it worked only when a program didn't spend time inside a syscall. [#60807](https://github.com/ClickHouse/ClickHouse/pull/60807) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* ClickHouse version has been added to docker labels. Closes [#54224](https://github.com/ClickHouse/ClickHouse/issues/54224). [#60949](https://github.com/ClickHouse/ClickHouse/pull/60949) ([Nikolay Monkov](https://github.com/nikmonkov)).
* Upgrade `prqlc` to 0.11.3. [#60616](https://github.com/ClickHouse/ClickHouse/pull/60616) ([Maximilian Roos](https://github.com/max-sixty)).
* Add generic query text fuzzer in `clickhouse-local`. [#61508](https://github.com/ClickHouse/ClickHouse/pull/61508) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
#### Bug Fix (user-visible misbehavior in an official stable release)
* Fix finished_mutations_to_keep=0 for MergeTree (as docs says 0 is to keep everything) [#60031](https://github.com/ClickHouse/ClickHouse/pull/60031) ([Azat Khuzhin](https://github.com/azat)).
* Something was wrong with the FINAL optimization, here is how the author describes it: "PartsSplitter invalid ranges for the same part". [#60041](https://github.com/ClickHouse/ClickHouse/pull/60041) ([Maksim Kita](https://github.com/kitaisreal)).
* Something was wrong with Apache Hive, which is experimental and not supported. [#60262](https://github.com/ClickHouse/ClickHouse/pull/60262) ([shanfengp](https://github.com/Aed-p)).
* An improvement for experimental parallel replicas: force reanalysis if parallel replicas changed [#60362](https://github.com/ClickHouse/ClickHouse/pull/60362) ([Raúl Marín](https://github.com/Algunenano)).
* Fix usage of plain metadata type with new disks configuration option [#60396](https://github.com/ClickHouse/ClickHouse/pull/60396) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Don't allow to set max_parallel_replicas to 0 as it doesn't make sense [#60430](https://github.com/ClickHouse/ClickHouse/pull/60430) ([Kruglov Pavel](https://github.com/Avogar)).
* Try to fix logical error 'Cannot capture column because it has incompatible type' in mapContainsKeyLike [#60451](https://github.com/ClickHouse/ClickHouse/pull/60451) ([Kruglov Pavel](https://github.com/Avogar)).
* Avoid calculation of scalar subqueries for CREATE TABLE. [#60464](https://github.com/ClickHouse/ClickHouse/pull/60464) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Fix deadlock in parallel parsing when lots of rows are skipped due to errors [#60516](https://github.com/ClickHouse/ClickHouse/pull/60516) ([Kruglov Pavel](https://github.com/Avogar)).
* Something was wrong with experimental KQL (Kusto) support: fix `max_query_size_for_kql_compound_operator`: [#60534](https://github.com/ClickHouse/ClickHouse/pull/60534) ([Yong Wang](https://github.com/kashwy)).
* Keeper fix: add timeouts when waiting for commit logs [#60544](https://github.com/ClickHouse/ClickHouse/pull/60544) ([Antonio Andelic](https://github.com/antonio2368)).
* Don't output number tips for date types [#60577](https://github.com/ClickHouse/ClickHouse/pull/60577) ([Raúl Marín](https://github.com/Algunenano)).
* Fix reading from MergeTree with non-deterministic functions in filter [#60586](https://github.com/ClickHouse/ClickHouse/pull/60586) ([Kruglov Pavel](https://github.com/Avogar)).
* Fix logical error on bad compatibility setting value type [#60596](https://github.com/ClickHouse/ClickHouse/pull/60596) ([Kruglov Pavel](https://github.com/Avogar)).
* fix(prql): Robust panic handler [#60615](https://github.com/ClickHouse/ClickHouse/pull/60615) ([Maximilian Roos](https://github.com/max-sixty)).
* Fix `intDiv` for decimal and date arguments [#60672](https://github.com/ClickHouse/ClickHouse/pull/60672) ([Yarik Briukhovetskyi](https://github.com/yariks5s)).
* Fix: expand CTE in alter modify query [#60682](https://github.com/ClickHouse/ClickHouse/pull/60682) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
* Fix system.parts for non-Atomic/Ordinary database engine (i.e. Memory) [#60689](https://github.com/ClickHouse/ClickHouse/pull/60689) ([Azat Khuzhin](https://github.com/azat)).
* Fix "Invalid storage definition in metadata file" for parameterized views [#60708](https://github.com/ClickHouse/ClickHouse/pull/60708) ([Azat Khuzhin](https://github.com/azat)).
* Fix buffer overflow in CompressionCodecMultiple [#60731](https://github.com/ClickHouse/ClickHouse/pull/60731) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Remove nonsense from SQL/JSON [#60738](https://github.com/ClickHouse/ClickHouse/pull/60738) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Remove wrong assertion in aggregate function quantileGK [#60740](https://github.com/ClickHouse/ClickHouse/pull/60740) ([李扬](https://github.com/taiyang-li)).
* Fix insert-select + insert_deduplication_token bug by setting streams to 1 [#60745](https://github.com/ClickHouse/ClickHouse/pull/60745) ([Jordi Villar](https://github.com/jrdi)).
* Prevent setting custom metadata headers on unsupported multipart upload operations [#60748](https://github.com/ClickHouse/ClickHouse/pull/60748) ([Francisco J. Jurado Moreno](https://github.com/Beetelbrox)).
* Fix toStartOfInterval [#60763](https://github.com/ClickHouse/ClickHouse/pull/60763) ([Andrey Zvonov](https://github.com/zvonand)).
* Fix crash in arrayEnumerateRanked [#60764](https://github.com/ClickHouse/ClickHouse/pull/60764) ([Raúl Marín](https://github.com/Algunenano)).
* Fix crash when using input() in INSERT SELECT JOIN [#60765](https://github.com/ClickHouse/ClickHouse/pull/60765) ([Kruglov Pavel](https://github.com/Avogar)).
* Fix crash with different allow_experimental_analyzer value in subqueries [#60770](https://github.com/ClickHouse/ClickHouse/pull/60770) ([Dmitry Novik](https://github.com/novikd)).
* Remove recursion when reading from S3 [#60849](https://github.com/ClickHouse/ClickHouse/pull/60849) ([Antonio Andelic](https://github.com/antonio2368)).
* Fix possible stuck on error in HashedDictionaryParallelLoader [#60926](https://github.com/ClickHouse/ClickHouse/pull/60926) ([vdimir](https://github.com/vdimir)).
* Fix async RESTORE with Replicated database (experimental feature) [#60934](https://github.com/ClickHouse/ClickHouse/pull/60934) ([Antonio Andelic](https://github.com/antonio2368)).
* Fix deadlock in async inserts to `Log` tables via native protocol [#61055](https://github.com/ClickHouse/ClickHouse/pull/61055) ([Anton Popov](https://github.com/CurtizJ)).
* Fix lazy execution of default argument in dictGetOrDefault for RangeHashedDictionary [#61196](https://github.com/ClickHouse/ClickHouse/pull/61196) ([Kruglov Pavel](https://github.com/Avogar)).
* Fix multiple bugs in groupArraySorted [#61203](https://github.com/ClickHouse/ClickHouse/pull/61203) ([Raúl Marín](https://github.com/Algunenano)).
* Fix Keeper reconfig for standalone binary [#61233](https://github.com/ClickHouse/ClickHouse/pull/61233) ([Antonio Andelic](https://github.com/antonio2368)).
* Fix usage of session_token in S3 engine [#61234](https://github.com/ClickHouse/ClickHouse/pull/61234) ([Kruglov Pavel](https://github.com/Avogar)).
* Fix possible incorrect result of aggregate function `uniqExact` [#61257](https://github.com/ClickHouse/ClickHouse/pull/61257) ([Anton Popov](https://github.com/CurtizJ)).
* Fix bugs in show database [#61269](https://github.com/ClickHouse/ClickHouse/pull/61269) ([Raúl Marín](https://github.com/Algunenano)).
* Fix logical error in RabbitMQ storage with MATERIALIZED columns [#61320](https://github.com/ClickHouse/ClickHouse/pull/61320) ([vdimir](https://github.com/vdimir)).
* Fix CREATE OR REPLACE DICTIONARY [#61356](https://github.com/ClickHouse/ClickHouse/pull/61356) ([Vitaly Baranov](https://github.com/vitlibar)).
* Fix ATTACH query with external ON CLUSTER [#61365](https://github.com/ClickHouse/ClickHouse/pull/61365) ([Nikolay Degterinsky](https://github.com/evillique)).
* Fix consecutive keys optimization for nullable keys [#61393](https://github.com/ClickHouse/ClickHouse/pull/61393) ([Anton Popov](https://github.com/CurtizJ)).
* fix issue of actions dag split [#61458](https://github.com/ClickHouse/ClickHouse/pull/61458) ([Raúl Marín](https://github.com/Algunenano)).
* Fix finishing a failed RESTORE [#61466](https://github.com/ClickHouse/ClickHouse/pull/61466) ([Vitaly Baranov](https://github.com/vitlibar)).
* Disable async_insert_use_adaptive_busy_timeout correctly with compatibility settings [#61468](https://github.com/ClickHouse/ClickHouse/pull/61468) ([Raúl Marín](https://github.com/Algunenano)).
* Allow queuing in restore pool [#61475](https://github.com/ClickHouse/ClickHouse/pull/61475) ([Nikita Taranov](https://github.com/nickitat)).
* Fix an inconsistency when reading system.parts using UUID. [#61479](https://github.com/ClickHouse/ClickHouse/pull/61479) ([Dan Wu](https://github.com/wudanzy)).
* Fix ALTER QUERY MODIFY SQL SECURITY [#61480](https://github.com/ClickHouse/ClickHouse/pull/61480) ([pufit](https://github.com/pufit)).
* Fix a crash in window view (experimental feature) [#61526](https://github.com/ClickHouse/ClickHouse/pull/61526) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix `repeat` with non-native integers [#61527](https://github.com/ClickHouse/ClickHouse/pull/61527) ([Antonio Andelic](https://github.com/antonio2368)).
* Fix client's `-s` argument [#61530](https://github.com/ClickHouse/ClickHouse/pull/61530) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Fix crash in arrayPartialReverseSort [#61539](https://github.com/ClickHouse/ClickHouse/pull/61539) ([Raúl Marín](https://github.com/Algunenano)).
* Fix string search with const position [#61547](https://github.com/ClickHouse/ClickHouse/pull/61547) ([Antonio Andelic](https://github.com/antonio2368)).
* Fix addDays cause an error when used DateTime64 [#61561](https://github.com/ClickHouse/ClickHouse/pull/61561) ([Shuai li](https://github.com/loneylee)).
* Disallow LowCardinality input type for JSONExtract [#61617](https://github.com/ClickHouse/ClickHouse/pull/61617) ([Julia Kartseva](https://github.com/jkartseva)).
* Fix `system.part_log` for async insert with deduplication [#61620](https://github.com/ClickHouse/ClickHouse/pull/61620) ([Antonio Andelic](https://github.com/antonio2368)).
* Fix a `Non-ready set` exception for system.parts. [#61666](https://github.com/ClickHouse/ClickHouse/pull/61666) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Fix actual_part_name for REPLACE_RANGE (`Entry actual part isn't empty yet`) [#61675](https://github.com/ClickHouse/ClickHouse/pull/61675) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Fix a sanitizer report in `multiSearchAllPositionsCaseInsensitiveUTF8` for incorrect UTF-8 [#61749](https://github.com/ClickHouse/ClickHouse/pull/61749) ([pufit](https://github.com/pufit)).
* Fix an observation that the RANGE frame is not supported for Nullable columns. [#61766](https://github.com/ClickHouse/ClickHouse/pull/61766) ([YuanLiu](https://github.com/ditgittube)).
### <a id="242"></a> ClickHouse release 24.2, 2024-02-29 ### <a id="242"></a> ClickHouse release 24.2, 2024-02-29
#### Backward Incompatible Change #### Backward Incompatible Change

View File

@ -28,7 +28,6 @@ curl https://clickhouse.com/ | sh
* [Slack](https://clickhouse.com/slack) and [Telegram](https://telegram.me/clickhouse_en) allow chatting with ClickHouse users in real-time. * [Slack](https://clickhouse.com/slack) and [Telegram](https://telegram.me/clickhouse_en) allow chatting with ClickHouse users in real-time.
* [Blog](https://clickhouse.com/blog/) contains various ClickHouse-related articles, as well as announcements and reports about events. * [Blog](https://clickhouse.com/blog/) contains various ClickHouse-related articles, as well as announcements and reports about events.
* [Code Browser (github.dev)](https://github.dev/ClickHouse/ClickHouse) with syntax highlighting, powered by github.dev. * [Code Browser (github.dev)](https://github.dev/ClickHouse/ClickHouse) with syntax highlighting, powered by github.dev.
* [Static Analysis (SonarCloud)](https://sonarcloud.io/project/issues?resolved=false&id=ClickHouse_ClickHouse) proposes C++ quality improvements.
* [Contacts](https://clickhouse.com/company/contact) can help to get your questions answered if there are any. * [Contacts](https://clickhouse.com/company/contact) can help to get your questions answered if there are any.
## Monthly Release & Community Call ## Monthly Release & Community Call

View File

@ -13,18 +13,16 @@ The following versions of ClickHouse server are currently being supported with s
| Version | Supported | | Version | Supported |
|:-|:-| |:-|:-|
| 24.3 | ✔️ |
| 24.2 | ✔️ | | 24.2 | ✔️ |
| 24.1 | ✔️ | | 24.1 | ✔️ |
| 23.12 | ✔️ | | 23.* | ❌ |
| 23.11 | ❌ |
| 23.10 | ❌ |
| 23.9 | ❌ |
| 23.8 | ✔️ | | 23.8 | ✔️ |
| 23.7 | ❌ | | 23.7 | ❌ |
| 23.6 | ❌ | | 23.6 | ❌ |
| 23.5 | ❌ | | 23.5 | ❌ |
| 23.4 | ❌ | | 23.4 | ❌ |
| 23.3 | ✔️ | | 23.3 | |
| 23.2 | ❌ | | 23.2 | ❌ |
| 23.1 | ❌ | | 23.1 | ❌ |
| 22.* | ❌ | | 22.* | ❌ |

View File

@ -2,11 +2,11 @@
# NOTE: has nothing common with DBMS_TCP_PROTOCOL_VERSION, # NOTE: has nothing common with DBMS_TCP_PROTOCOL_VERSION,
# only DBMS_TCP_PROTOCOL_VERSION should be incremented on protocol changes. # only DBMS_TCP_PROTOCOL_VERSION should be incremented on protocol changes.
SET(VERSION_REVISION 54484) SET(VERSION_REVISION 54485)
SET(VERSION_MAJOR 24) SET(VERSION_MAJOR 24)
SET(VERSION_MINOR 3) SET(VERSION_MINOR 4)
SET(VERSION_PATCH 1) SET(VERSION_PATCH 1)
SET(VERSION_GITHASH 891689a41506d00aa169548f5b4a8774351242c4) SET(VERSION_GITHASH 2c5c589a882ceec35439650337b92db3e76f0081)
SET(VERSION_DESCRIBE v24.3.1.1-testing) SET(VERSION_DESCRIBE v24.4.1.1-testing)
SET(VERSION_STRING 24.3.1.1) SET(VERSION_STRING 24.4.1.1)
# end of autochange # end of autochange

View File

@ -34,7 +34,7 @@ RUN arch=${TARGETARCH:-amd64} \
# lts / testing / prestable / etc # lts / testing / prestable / etc
ARG REPO_CHANNEL="stable" ARG REPO_CHANNEL="stable"
ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}" ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
ARG VERSION="24.2.2.71" ARG VERSION="24.3.1.2672"
ARG PACKAGES="clickhouse-keeper" ARG PACKAGES="clickhouse-keeper"
ARG DIRECT_DOWNLOAD_URLS="" ARG DIRECT_DOWNLOAD_URLS=""

View File

@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \
# lts / testing / prestable / etc # lts / testing / prestable / etc
ARG REPO_CHANNEL="stable" ARG REPO_CHANNEL="stable"
ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}" ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
ARG VERSION="24.2.2.71" ARG VERSION="24.3.1.2672"
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
ARG DIRECT_DOWNLOAD_URLS="" ARG DIRECT_DOWNLOAD_URLS=""

View File

@ -27,7 +27,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list
ARG REPO_CHANNEL="stable" ARG REPO_CHANNEL="stable"
ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main" ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main"
ARG VERSION="24.2.2.71" ARG VERSION="24.3.1.2672"
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
# set non-empty deb_location_url url to create a docker image # set non-empty deb_location_url url to create a docker image

View File

@ -138,7 +138,7 @@ ENGINE = MergeTree
PARTITION BY toYYYYMM(EventDate) PARTITION BY toYYYYMM(EventDate)
ORDER BY (CounterID, EventDate, intHash32(UserID)) ORDER BY (CounterID, EventDate, intHash32(UserID))
SAMPLE BY intHash32(UserID) SAMPLE BY intHash32(UserID)
SETTINGS disk = disk(type = cache, path = '/var/lib/clickhouse/filesystem_caches/', max_size = '4G', SETTINGS disk = disk(type = cache, path = '/var/lib/clickhouse/filesystem_caches/stateful/', max_size = '4G',
disk = disk(type = web, endpoint = 'https://clickhouse-datasets-web.s3.us-east-1.amazonaws.com/')); disk = disk(type = web, endpoint = 'https://clickhouse-datasets-web.s3.us-east-1.amazonaws.com/'));
ATTACH TABLE datasets.visits_v1 UUID '5131f834-711f-4168-98a5-968b691a104b' ATTACH TABLE datasets.visits_v1 UUID '5131f834-711f-4168-98a5-968b691a104b'
@ -329,5 +329,5 @@ ENGINE = CollapsingMergeTree(Sign)
PARTITION BY toYYYYMM(StartDate) PARTITION BY toYYYYMM(StartDate)
ORDER BY (CounterID, StartDate, intHash32(UserID), VisitID) ORDER BY (CounterID, StartDate, intHash32(UserID), VisitID)
SAMPLE BY intHash32(UserID) SAMPLE BY intHash32(UserID)
SETTINGS disk = disk(type = cache, path = '/var/lib/clickhouse/filesystem_caches/', max_size = '4G', SETTINGS disk = disk(type = cache, path = '/var/lib/clickhouse/filesystem_caches/stateful/', max_size = '4G',
disk = disk(type = web, endpoint = 'https://clickhouse-datasets-web.s3.us-east-1.amazonaws.com/')); disk = disk(type = web, endpoint = 'https://clickhouse-datasets-web.s3.us-east-1.amazonaws.com/'));

View File

@ -0,0 +1,24 @@
---
sidebar_position: 1
sidebar_label: 2024
---
# 2024 Changelog
### ClickHouse release v23.12.6.19-stable (40080a3c2a4) FIXME as compared to v23.12.5.81-stable (a0fbe3ae813)
#### Bug Fix (user-visible misbehavior in an official stable release)
* Improve isolation of query cache entries under re-created users or role switches [#58611](https://github.com/ClickHouse/ClickHouse/pull/58611) ([Robert Schulze](https://github.com/rschu1ze)).
* Fix possible incorrect result of aggregate function `uniqExact` [#61257](https://github.com/ClickHouse/ClickHouse/pull/61257) ([Anton Popov](https://github.com/CurtizJ)).
* Fix consecutive keys optimization for nullable keys [#61393](https://github.com/ClickHouse/ClickHouse/pull/61393) ([Anton Popov](https://github.com/CurtizJ)).
* Fix string search with const position [#61547](https://github.com/ClickHouse/ClickHouse/pull/61547) ([Antonio Andelic](https://github.com/antonio2368)).
* Fix crash in `multiSearchAllPositionsCaseInsensitiveUTF8` for incorrect UTF-8 [#61749](https://github.com/ClickHouse/ClickHouse/pull/61749) ([pufit](https://github.com/pufit)).
#### CI Fix or Improvement (changelog entry is not required)
* Backported in [#61429](https://github.com/ClickHouse/ClickHouse/issues/61429):. [#61374](https://github.com/ClickHouse/ClickHouse/pull/61374) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Backported in [#61486](https://github.com/ClickHouse/ClickHouse/issues/61486): ... [#61441](https://github.com/ClickHouse/ClickHouse/pull/61441) ([Max K.](https://github.com/maxknv)).
* Backported in [#61641](https://github.com/ClickHouse/ClickHouse/issues/61641):. [#61592](https://github.com/ClickHouse/ClickHouse/pull/61592) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Backported in [#61811](https://github.com/ClickHouse/ClickHouse/issues/61811): ![Screenshot_20240323_025055](https://github.com/ClickHouse/ClickHouse/assets/18581488/ccaab212-a1d3-4dfb-8d56-b1991760b6bf). [#61801](https://github.com/ClickHouse/ClickHouse/pull/61801) ([Alexey Milovidov](https://github.com/alexey-milovidov)).

View File

@ -0,0 +1,13 @@
---
sidebar_position: 1
sidebar_label: 2024
---
# 2024 Changelog
### ClickHouse release v23.3.22.3-lts (04075bf96a1) FIXME as compared to v23.3.21.26-lts (d9672a3731f)
#### Bug Fix (user-visible misbehavior in an official stable release)
* Fix crash in `multiSearchAllPositionsCaseInsensitiveUTF8` for incorrect UTF-8 [#61749](https://github.com/ClickHouse/ClickHouse/pull/61749) ([pufit](https://github.com/pufit)).

View File

@ -0,0 +1,20 @@
---
sidebar_position: 1
sidebar_label: 2024
---
# 2024 Changelog
### ClickHouse release v23.8.12.13-lts (bdbd0d87e5d) FIXME as compared to v23.8.11.28-lts (31879d2ab4c)
#### Bug Fix (user-visible misbehavior in an official stable release)
* Improve isolation of query cache entries under re-created users or role switches [#58611](https://github.com/ClickHouse/ClickHouse/pull/58611) ([Robert Schulze](https://github.com/rschu1ze)).
* Fix string search with const position [#61547](https://github.com/ClickHouse/ClickHouse/pull/61547) ([Antonio Andelic](https://github.com/antonio2368)).
* Fix crash in `multiSearchAllPositionsCaseInsensitiveUTF8` for incorrect UTF-8 [#61749](https://github.com/ClickHouse/ClickHouse/pull/61749) ([pufit](https://github.com/pufit)).
#### CI Fix or Improvement (changelog entry is not required)
* Backported in [#61428](https://github.com/ClickHouse/ClickHouse/issues/61428):. [#61374](https://github.com/ClickHouse/ClickHouse/pull/61374) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Backported in [#61484](https://github.com/ClickHouse/ClickHouse/issues/61484): ... [#61441](https://github.com/ClickHouse/ClickHouse/pull/61441) ([Max K.](https://github.com/maxknv)).

View File

@ -0,0 +1,32 @@
---
sidebar_position: 1
sidebar_label: 2024
---
# 2024 Changelog
### ClickHouse release v24.1.8.22-stable (7fb8f96d3da) FIXME as compared to v24.1.7.18-stable (90925babd78)
#### Bug Fix (user-visible misbehavior in an official stable release)
* Fix possible incorrect result of aggregate function `uniqExact` [#61257](https://github.com/ClickHouse/ClickHouse/pull/61257) ([Anton Popov](https://github.com/CurtizJ)).
* Fix consecutive keys optimization for nullable keys [#61393](https://github.com/ClickHouse/ClickHouse/pull/61393) ([Anton Popov](https://github.com/CurtizJ)).
* Fix bug when reading system.parts using UUID (issue 61220). [#61479](https://github.com/ClickHouse/ClickHouse/pull/61479) ([Dan Wu](https://github.com/wudanzy)).
* Fix client `-s` argument [#61530](https://github.com/ClickHouse/ClickHouse/pull/61530) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Fix string search with const position [#61547](https://github.com/ClickHouse/ClickHouse/pull/61547) ([Antonio Andelic](https://github.com/antonio2368)).
* Fix crash in `multiSearchAllPositionsCaseInsensitiveUTF8` for incorrect UTF-8 [#61749](https://github.com/ClickHouse/ClickHouse/pull/61749) ([pufit](https://github.com/pufit)).
#### CI Fix or Improvement (changelog entry is not required)
* Backported in [#61431](https://github.com/ClickHouse/ClickHouse/issues/61431):. [#61374](https://github.com/ClickHouse/ClickHouse/pull/61374) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Backported in [#61488](https://github.com/ClickHouse/ClickHouse/issues/61488): ... [#61441](https://github.com/ClickHouse/ClickHouse/pull/61441) ([Max K.](https://github.com/maxknv)).
* Backported in [#61642](https://github.com/ClickHouse/ClickHouse/issues/61642):. [#61592](https://github.com/ClickHouse/ClickHouse/pull/61592) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
#### NO CL ENTRY
* NO CL ENTRY: 'Revert "Backport [#61479](https://github.com/ClickHouse/ClickHouse/issues/61479) to 24.1: Fix bug when reading system.parts using UUID (issue 61220)."'. [#61775](https://github.com/ClickHouse/ClickHouse/pull/61775) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
#### NOT FOR CHANGELOG / INSIGNIFICANT
* Speed up cctools building [#61011](https://github.com/ClickHouse/ClickHouse/pull/61011) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).

View File

@ -0,0 +1,537 @@
---
sidebar_position: 1
sidebar_label: 2024
---
# 2024 Changelog
### ClickHouse release v24.3.1.2672-lts (2c5c589a882) FIXME as compared to v24.2.1.2248-stable (891689a4150)
#### Backward Incompatible Change
* Don't allow to set max_parallel_replicas to 0 as it doesn't make sense. Setting it to 0 could lead to unexpected logical errors. Closes [#60140](https://github.com/ClickHouse/ClickHouse/issues/60140). [#60430](https://github.com/ClickHouse/ClickHouse/pull/60430) ([Kruglov Pavel](https://github.com/Avogar)).
* Change the column name from `duration_ms` to `duration_microseconds` in the `system.zookeeper` table to reflect the reality that the duration is in the microsecond resolution. [#60774](https://github.com/ClickHouse/ClickHouse/pull/60774) ([Duc Canh Le](https://github.com/canhld94)).
* Reject incoming INSERT queries in case when query-level settings `async_insert` and `deduplicate_blocks_in_dependent_materialized_views` are enabled together. This behaviour is controlled by a setting `throw_if_deduplication_in_dependent_materialized_views_enabled_with_async_insert` and enabled by default. This is a continuation of https://github.com/ClickHouse/ClickHouse/pull/59699 needed to unblock https://github.com/ClickHouse/ClickHouse/pull/59915. [#60888](https://github.com/ClickHouse/ClickHouse/pull/60888) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
* Utility `clickhouse-copier` is moved to a separate repository on GitHub: https://github.com/ClickHouse/copier. It is no longer included in the bundle but is still available as a separate download. This closes: [#60734](https://github.com/ClickHouse/ClickHouse/issues/60734) This closes: [#60540](https://github.com/ClickHouse/ClickHouse/issues/60540) This closes: [#60250](https://github.com/ClickHouse/ClickHouse/issues/60250) This closes: [#52917](https://github.com/ClickHouse/ClickHouse/issues/52917) This closes: [#51140](https://github.com/ClickHouse/ClickHouse/issues/51140) This closes: [#47517](https://github.com/ClickHouse/ClickHouse/issues/47517) This closes: [#47189](https://github.com/ClickHouse/ClickHouse/issues/47189) This closes: [#46598](https://github.com/ClickHouse/ClickHouse/issues/46598) This closes: [#40257](https://github.com/ClickHouse/ClickHouse/issues/40257) This closes: [#36504](https://github.com/ClickHouse/ClickHouse/issues/36504) This closes: [#35485](https://github.com/ClickHouse/ClickHouse/issues/35485) This closes: [#33702](https://github.com/ClickHouse/ClickHouse/issues/33702) This closes: [#26702](https://github.com/ClickHouse/ClickHouse/issues/26702) ### Documentation entry for user-facing changes. [#61058](https://github.com/ClickHouse/ClickHouse/pull/61058) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
* To increase compatibility with MySQL, function `locate` now accepts arguments `(needle, haystack[, start_pos])` by default. The previous behavior `(haystack, needle, [, start_pos])` can be restored by setting `function_locate_has_mysql_compatible_argument_order = 0`. [#61092](https://github.com/ClickHouse/ClickHouse/pull/61092) ([Robert Schulze](https://github.com/rschu1ze)).
* The obsolete in-memory data parts have been deprecated since version 23.5 and have not been supported since version 23.10. Now the remaining code is removed. Continuation of [#55186](https://github.com/ClickHouse/ClickHouse/issues/55186) and [#45409](https://github.com/ClickHouse/ClickHouse/issues/45409). It is unlikely that you have used in-memory data parts because they were available only before version 23.5 and only when you enabled them manually by specifying the corresponding SETTINGS for a MergeTree table. To check if you have in-memory data parts, run the following query: `SELECT part_type, count() FROM system.parts GROUP BY part_type ORDER BY part_type`. To disable the usage of in-memory data parts, do `ALTER TABLE ... MODIFY SETTING min_bytes_for_compact_part = DEFAULT, min_rows_for_compact_part = DEFAULT`. Before upgrading from old ClickHouse releases, first check that you don't have in-memory data parts. If there are in-memory data parts, disable them first, then wait while there are no in-memory data parts and continue the upgrade. [#61127](https://github.com/ClickHouse/ClickHouse/pull/61127) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Forbid `SimpleAggregateFunction` in `ORDER BY` of `MergeTree` tables (like `AggregateFunction` is forbidden, but they are forbidden because they are not comparable) by default (use `allow_suspicious_primary_key` to allow them). [#61399](https://github.com/ClickHouse/ClickHouse/pull/61399) ([Azat Khuzhin](https://github.com/azat)).
* ClickHouse allows arbitrary binary data in the String data type, which is typically UTF-8. Parquet/ORC/Arrow Strings only support UTF-8. That's why you can choose which Arrow's data type to use for the ClickHouse String data type - String or Binary. This is controlled by the settings, `output_format_parquet_string_as_string`, `output_format_orc_string_as_string`, `output_format_arrow_string_as_string`. While Binary would be more correct and compatible, using String by default will correspond to user expectations in most cases. Parquet/ORC/Arrow supports many compression methods, including lz4 and zstd. ClickHouse supports each and every compression method. Some inferior tools lack support for the faster `lz4` compression method, that's why we set `zstd` by default. This is controlled by the settings `output_format_parquet_compression_method`, `output_format_orc_compression_method`, and `output_format_arrow_compression_method`. We changed the default to `zstd` for Parquet and ORC, but not Arrow (it is emphasized for low-level usages). [#61817](https://github.com/ClickHouse/ClickHouse/pull/61817) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* In the new ClickHouse version, the functions `geoDistance`, `greatCircleDistance`, and `greatCircleAngle` will use 64-bit double precision floating point data type for internal calculations and return type if all the arguments are Float64. This closes [#58476](https://github.com/ClickHouse/ClickHouse/issues/58476). In previous versions, the function always used Float32. You can switch to the old behavior by setting `geo_distance_returns_float64_on_float64_arguments` to `false` or setting `compatibility` to `24.2` or earlier. [#61848](https://github.com/ClickHouse/ClickHouse/pull/61848) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
#### New Feature
* Topk/topkweighed support mode, which return count of values and it's error. [#54508](https://github.com/ClickHouse/ClickHouse/pull/54508) ([UnamedRus](https://github.com/UnamedRus)).
* Add generate_series as a table function. This function generates table with an arithmetic progression with natural numbers. [#59390](https://github.com/ClickHouse/ClickHouse/pull/59390) ([divanik](https://github.com/divanik)).
* Support reading and writing backups as tar archives. [#59535](https://github.com/ClickHouse/ClickHouse/pull/59535) ([josh-hildred](https://github.com/josh-hildred)).
* Implemented support for S3Express buckets. [#59965](https://github.com/ClickHouse/ClickHouse/pull/59965) ([Nikita Taranov](https://github.com/nickitat)).
* Allow to attach parts from a different disk * attach partition from the table on other disks using copy instead of hard link (such as instant table) * attach partition using copy when the hard link fails even on the same disk. [#60112](https://github.com/ClickHouse/ClickHouse/pull/60112) ([Unalian](https://github.com/Unalian)).
* Added function `toMillisecond` which returns the millisecond component for values of type`DateTime` or `DateTime64`. [#60281](https://github.com/ClickHouse/ClickHouse/pull/60281) ([Shaun Struwig](https://github.com/Blargian)).
* Make all format names case insensitive, like Tsv, or TSV, or tsv, or even rowbinary. [#60420](https://github.com/ClickHouse/ClickHouse/pull/60420) ([豪肥肥](https://github.com/HowePa)).
* Add four properties to the `StorageMemory` (memory-engine) `min_bytes_to_keep, max_bytes_to_keep, min_rows_to_keep` and `max_rows_to_keep` - Add tests to reflect new changes - Update `memory.md` documentation - Add table `context` property to `MemorySink` to enable access to table parameter bounds. [#60612](https://github.com/ClickHouse/ClickHouse/pull/60612) ([Jake Bamrah](https://github.com/JakeBamrah)).
* Added function `toMillisecond` which returns the millisecond component for values of type`DateTime` or `DateTime64`. [#60649](https://github.com/ClickHouse/ClickHouse/pull/60649) ([Robert Schulze](https://github.com/rschu1ze)).
* Separate limits on number of waiting and executing queries. Added new server setting `max_waiting_queries` that limits the number of queries waiting due to `async_load_databases`. Existing limits on number of executing queries no longer count waiting queries. [#61053](https://github.com/ClickHouse/ClickHouse/pull/61053) ([Sergei Trifonov](https://github.com/serxa)).
* Add support for `ATTACH PARTITION ALL`. [#61107](https://github.com/ClickHouse/ClickHouse/pull/61107) ([Kirill Nikiforov](https://github.com/allmazz)).
* Add a new function, `getClientHTTPHeader`. This closes [#54665](https://github.com/ClickHouse/ClickHouse/issues/54665). Co-authored with @lingtaolf. [#61820](https://github.com/ClickHouse/ClickHouse/pull/61820) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
#### Performance Improvement
* Improve the performance of serialized aggregation method when involving multiple [nullable] columns. This is a general version of [#51399](https://github.com/ClickHouse/ClickHouse/issues/51399) that doesn't compromise on abstraction integrity. [#55809](https://github.com/ClickHouse/ClickHouse/pull/55809) ([Amos Bird](https://github.com/amosbird)).
* Lazy build join output to improve performance of ALL join. [#58278](https://github.com/ClickHouse/ClickHouse/pull/58278) ([LiuNeng](https://github.com/liuneng1994)).
* Improvements to aggregate functions ArgMin / ArgMax / any / anyLast / anyHeavy, as well as `ORDER BY {u8/u16/u32/u64/i8/i16/u32/i64) LIMIT 1` queries. [#58640](https://github.com/ClickHouse/ClickHouse/pull/58640) ([Raúl Marín](https://github.com/Algunenano)).
* Trivial optimize on column filter. Avoid those filter columns whoes underlying data type is not number being filtered with `result_size_hint = -1`. Peak memory can be reduced to 44% of the original in some cases. [#59698](https://github.com/ClickHouse/ClickHouse/pull/59698) ([李扬](https://github.com/taiyang-li)).
* If the table's primary key contains mostly useless columns, don't keep them in memory. This is controlled by a new setting `primary_key_ratio_of_unique_prefix_values_to_skip_suffix_columns` with the value `0.9` by default, which means: for a composite primary key, if a column changes its value for at least 0.9 of all the times, the next columns after it will be not loaded. [#60255](https://github.com/ClickHouse/ClickHouse/pull/60255) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Execute multiIf function columnarly when result_type's underlying type is number. [#60384](https://github.com/ClickHouse/ClickHouse/pull/60384) ([李扬](https://github.com/taiyang-li)).
* Faster (almost 2x) mutexes (was slower due to ThreadFuzzer). [#60823](https://github.com/ClickHouse/ClickHouse/pull/60823) ([Azat Khuzhin](https://github.com/azat)).
* Move connection drain from prepare to work, and drain multiple connections in parallel. [#60845](https://github.com/ClickHouse/ClickHouse/pull/60845) ([lizhuoyu5](https://github.com/lzydmxy)).
* Optimize insertManyFrom of nullable number or nullable string. [#60846](https://github.com/ClickHouse/ClickHouse/pull/60846) ([李扬](https://github.com/taiyang-li)).
* Optimized function `dotProduct` to omit unnecessary and expensive memory copies. [#60928](https://github.com/ClickHouse/ClickHouse/pull/60928) ([Robert Schulze](https://github.com/rschu1ze)).
* Operations with the filesystem cache will suffer less from the lock contention. [#61066](https://github.com/ClickHouse/ClickHouse/pull/61066) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Optimize ColumnString::replicate and prevent memcpySmallAllowReadWriteOverflow15Impl from being optimized to built-in memcpy. Close [#61074](https://github.com/ClickHouse/ClickHouse/issues/61074). ColumnString::replicate speeds up by 2.46x on x86-64. [#61075](https://github.com/ClickHouse/ClickHouse/pull/61075) ([李扬](https://github.com/taiyang-li)).
* 30x faster printing for 256-bit integers. [#61100](https://github.com/ClickHouse/ClickHouse/pull/61100) ([Raúl Marín](https://github.com/Algunenano)).
* If a query with a syntax error contained COLUMNS matcher with a regular expression, the regular expression was compiled each time during the parser's backtracking, instead of being compiled once. This was a fundamental error. The compiled regexp was put to AST. But the letter A in AST means "abstract" which means it should not contain heavyweight objects. Parts of AST can be created and discarded during parsing, including a large number of backtracking. This leads to slowness on the parsing side and consequently allows DoS by a readonly user. But the main problem is that it prevents progress in fuzzers. [#61543](https://github.com/ClickHouse/ClickHouse/pull/61543) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Add a new analyzer pass to optimize in single value. [#61564](https://github.com/ClickHouse/ClickHouse/pull/61564) ([LiuNeng](https://github.com/liuneng1994)).
#### Improvement
* While running the MODIFY COLUMN query for materialized views, check the inner table's structure to ensure every column exists. [#47427](https://github.com/ClickHouse/ClickHouse/pull/47427) ([sunny](https://github.com/sunny19930321)).
* Added table `system.keywords` which contains all the keywords from parser. Mostly needed and will be used for better fuzzing and syntax highlighting. [#51808](https://github.com/ClickHouse/ClickHouse/pull/51808) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
* Ordinary database engine is deprecated. You will receive a warning in clickhouse-client if your server is using it. This closes [#52229](https://github.com/ClickHouse/ClickHouse/issues/52229). [#56942](https://github.com/ClickHouse/ClickHouse/pull/56942) ([shabroo](https://github.com/shabroo)).
* All zero copy locks related to a table have to be dropped when the table is dropped. The directory which contains these locks has to be removed also. [#57575](https://github.com/ClickHouse/ClickHouse/pull/57575) ([Sema Checherinda](https://github.com/CheSema)).
* Allow declaring enum in external table structure. [#57857](https://github.com/ClickHouse/ClickHouse/pull/57857) ([Duc Canh Le](https://github.com/canhld94)).
* Consider lightweight deleted rows when selecting parts to merge. [#58223](https://github.com/ClickHouse/ClickHouse/pull/58223) ([Zhuo Qiu](https://github.com/jewelzqiu)).
* This PR makes http/https connections reusable for all uses cases. Even when response is 3xx or 4xx. [#58845](https://github.com/ClickHouse/ClickHouse/pull/58845) ([Sema Checherinda](https://github.com/CheSema)).
* Added comments for columns for more system tables. Continuation of https://github.com/ClickHouse/ClickHouse/pull/58356. [#59016](https://github.com/ClickHouse/ClickHouse/pull/59016) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
* Now we can use virtual columns in PREWHERE. It's worthwhile for non-const virtual columns like `_part_offset`. [#59033](https://github.com/ClickHouse/ClickHouse/pull/59033) ([Amos Bird](https://github.com/amosbird)).
* Add ability to skip read-only replicas for INSERT into Distributed engine (Controlled with `distributed_insert_skip_read_only_replicas` setting, by default OFF - backward compatible). [#59176](https://github.com/ClickHouse/ClickHouse/pull/59176) ([Azat Khuzhin](https://github.com/azat)).
* Instead using a constant key, now object storage generates key for determining remove objects capability. [#59495](https://github.com/ClickHouse/ClickHouse/pull/59495) ([Sema Checherinda](https://github.com/CheSema)).
* Add positional pread in libhdfs3. If you want to call positional read in libhdfs3, use the hdfsPread function in hdfs.h as follows. `tSize hdfsPread(hdfsFS fs, hdfsFile file, void * buffer, tSize length, tOffset position);`. [#59624](https://github.com/ClickHouse/ClickHouse/pull/59624) ([M1eyu](https://github.com/M1eyu2018)).
* Add asynchronous WriteBuffer for AzureBlobStorage similar to S3. [#59929](https://github.com/ClickHouse/ClickHouse/pull/59929) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)).
* Allow "local" as object storage type instead of "local_blob_storage". [#60165](https://github.com/ClickHouse/ClickHouse/pull/60165) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Improved overall usability of virtual columns. Now it is allowed to use virtual columns in `PREWHERE` (it's worthwhile for non-const virtual columns like `_part_offset`). Now a builtin documentation is available for virtual columns as a comment of column in `DESCRIBE` query with enabled setting `describe_include_virtual_columns`. [#60205](https://github.com/ClickHouse/ClickHouse/pull/60205) ([Anton Popov](https://github.com/CurtizJ)).
* Parallel flush of pending INSERT blocks of Distributed engine on `DETACH`/server shutdown and `SYSTEM FLUSH DISTRIBUTED` (Parallelism will work only if you have multi disk policy for table (like everything in Distributed engine right now)). [#60225](https://github.com/ClickHouse/ClickHouse/pull/60225) ([Azat Khuzhin](https://github.com/azat)).
* Filter setting is improper in `joinRightColumnsSwitchNullability`, resolve [#59625](https://github.com/ClickHouse/ClickHouse/issues/59625). [#60259](https://github.com/ClickHouse/ClickHouse/pull/60259) ([lgbo](https://github.com/lgbo-ustc)).
* Add a setting to force read-through cache for merges. [#60308](https://github.com/ClickHouse/ClickHouse/pull/60308) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Issue [#57598](https://github.com/ClickHouse/ClickHouse/issues/57598) mentions a variant behaviour regarding transaction handling. An issued COMMIT/ROLLBACK when no transaction is active is reported as an error contrary to MySQL behaviour. [#60338](https://github.com/ClickHouse/ClickHouse/pull/60338) ([PapaToemmsn](https://github.com/PapaToemmsn)).
* Added `none_only_active` mode for `distributed_ddl_output_mode` setting. [#60340](https://github.com/ClickHouse/ClickHouse/pull/60340) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Allow configuring HTTP redirect handlers for clickhouse-server. For example, you can make `/` redirect to the Play UI. [#60390](https://github.com/ClickHouse/ClickHouse/pull/60390) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* The advanced dashboard has slightly better colors for multi-line graphs. [#60391](https://github.com/ClickHouse/ClickHouse/pull/60391) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Function `substring` now has a new alias `byteSlice`. [#60494](https://github.com/ClickHouse/ClickHouse/pull/60494) ([Robert Schulze](https://github.com/rschu1ze)).
* Renamed server setting `dns_cache_max_size` to `dns_cache_max_entries` to reduce ambiguity. [#60500](https://github.com/ClickHouse/ClickHouse/pull/60500) ([Kirill Nikiforov](https://github.com/allmazz)).
* `SHOW INDEX | INDEXES | INDICES | KEYS` no longer sorts by the primary key columns (which was unintuitive). [#60514](https://github.com/ClickHouse/ClickHouse/pull/60514) ([Robert Schulze](https://github.com/rschu1ze)).
* Keeper improvement: abort during startup if an invalid snapshot is detected to avoid data loss. [#60537](https://github.com/ClickHouse/ClickHouse/pull/60537) ([Antonio Andelic](https://github.com/antonio2368)).
* Added MergeTree read split ranges into intersecting and non intersecting fault injection using `merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_fault_probability` setting. [#60548](https://github.com/ClickHouse/ClickHouse/pull/60548) ([Maksim Kita](https://github.com/kitaisreal)).
* The Advanced dashboard now has controls always visible on scrolling. This allows you to add a new chart without scrolling up. [#60692](https://github.com/ClickHouse/ClickHouse/pull/60692) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* String types and Enums can be used in the same context, such as: arrays, UNION queries, conditional expressions. This closes [#60726](https://github.com/ClickHouse/ClickHouse/issues/60726). [#60727](https://github.com/ClickHouse/ClickHouse/pull/60727) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Update tzdata to 2024a. [#60768](https://github.com/ClickHouse/ClickHouse/pull/60768) ([Raúl Marín](https://github.com/Algunenano)).
* Support files without format extension in Filesystem database. [#60795](https://github.com/ClickHouse/ClickHouse/pull/60795) ([Kruglov Pavel](https://github.com/Avogar)).
* Keeper improvement: support `leadership_expiry_ms` in Keeper's settings. [#60806](https://github.com/ClickHouse/ClickHouse/pull/60806) ([Brokenice0415](https://github.com/Brokenice0415)).
* Always infer exponential numbers in JSON formats regardless of the setting `input_format_try_infer_exponent_floats`. Add setting `input_format_json_use_string_type_for_ambiguous_paths_in_named_tuples_inference_from_objects` that allows to use String type for ambiguous paths instead of an exception during named Tuples inference from JSON objects. [#60808](https://github.com/ClickHouse/ClickHouse/pull/60808) ([Kruglov Pavel](https://github.com/Avogar)).
* Add support for `START TRANSACTION` syntax typically used in MySQL syntax, resolving https://github.com/ClickHouse/ClickHouse/discussions/60865. [#60886](https://github.com/ClickHouse/ClickHouse/pull/60886) ([Zach Naimon](https://github.com/ArctypeZach)).
* Add a flag for SMJ to treat null as biggest/smallest. So the behavior can be compitable with other SQL systems, like Apache Spark. [#60896](https://github.com/ClickHouse/ClickHouse/pull/60896) ([loudongfeng](https://github.com/loudongfeng)).
* Clickhouse version has been added to docker labels. Closes [#54224](https://github.com/ClickHouse/ClickHouse/issues/54224). [#60949](https://github.com/ClickHouse/ClickHouse/pull/60949) ([Nikolay Monkov](https://github.com/nikmonkov)).
* Add a setting `parallel_replicas_allow_in_with_subquery = 1` which allows subqueries for IN work with parallel replicas. [#60950](https://github.com/ClickHouse/ClickHouse/pull/60950) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* DNSResolver shuffles set of resolved IPs. [#60965](https://github.com/ClickHouse/ClickHouse/pull/60965) ([Sema Checherinda](https://github.com/CheSema)).
* Support detect output format by file exctension in `clickhouse-client` and `clickhouse-local`. [#61036](https://github.com/ClickHouse/ClickHouse/pull/61036) ([豪肥肥](https://github.com/HowePa)).
* Check memory limit update periodically. [#61049](https://github.com/ClickHouse/ClickHouse/pull/61049) ([Han Fei](https://github.com/hanfei1991)).
* Enable processors profiling (time spent/in and out bytes for sorting, aggregation, ...) by default. [#61096](https://github.com/ClickHouse/ClickHouse/pull/61096) ([Azat Khuzhin](https://github.com/azat)).
* Add the function `toUInt128OrZero`, which was missed by mistake (the mistake is related to https://github.com/ClickHouse/ClickHouse/pull/945). The compatibility aliases `FROM_UNIXTIME` and `DATE_FORMAT` (they are not ClickHouse-native and only exist for MySQL compatibility) have been made case insensitive, as expected for SQL-compatibility aliases. [#61114](https://github.com/ClickHouse/ClickHouse/pull/61114) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Improvements for the access checks, allowing to revoke of unpossessed rights in case the target user doesn't have the revoking grants either. Example: ```sql GRANT SELECT ON *.* TO user1; REVOKE SELECT ON system.* FROM user1;. [#61115](https://github.com/ClickHouse/ClickHouse/pull/61115) ([pufit](https://github.com/pufit)).
* Fix an error in previeous opt: https://github.com/ClickHouse/ClickHouse/pull/59698: remove break to make sure the first filtered column has minimum size cc @jsc0218. [#61145](https://github.com/ClickHouse/ClickHouse/pull/61145) ([李扬](https://github.com/taiyang-li)).
* Fix `has()` function with `Nullable` column (fixes [#60214](https://github.com/ClickHouse/ClickHouse/issues/60214)). [#61249](https://github.com/ClickHouse/ClickHouse/pull/61249) ([Mikhail Koviazin](https://github.com/mkmkme)).
* Now it's possible to specify attribute `merge="true"` in config substitutions for subtrees `<include from_zk="/path" merge="true">`. In case this attribute specified, clickhouse will merge subtree with existing configuration, otherwise default behavior is append new content to configuration. [#61299](https://github.com/ClickHouse/ClickHouse/pull/61299) ([alesapin](https://github.com/alesapin)).
* Add async metrics for virtual memory mappings: VMMaxMapCount & VMNumMaps. Closes [#60662](https://github.com/ClickHouse/ClickHouse/issues/60662). [#61354](https://github.com/ClickHouse/ClickHouse/pull/61354) ([Tuan Pham Anh](https://github.com/tuanpavn)).
* Use `temporary_files_codec` setting in all places where we create temporary data, for example external memory sorting and external memory GROUP BY. Before it worked only in `partial_merge` JOIN algorithm. [#61456](https://github.com/ClickHouse/ClickHouse/pull/61456) ([Maksim Kita](https://github.com/kitaisreal)).
* Remove duplicated check `containing_part.empty()`, It's already being checked here: https://github.com/ClickHouse/ClickHouse/blob/1296dac3c7e47670872c15e3f5e58f869e0bd2f2/src/Storages/MergeTree/MergeTreeData.cpp#L6141. [#61467](https://github.com/ClickHouse/ClickHouse/pull/61467) ([William Schoeffel](https://github.com/wiledusc)).
* Add a new setting `max_parser_backtracks` which allows to limit the complexity of query parsing. [#61502](https://github.com/ClickHouse/ClickHouse/pull/61502) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Support parallel reading for azure blob storage. [#61503](https://github.com/ClickHouse/ClickHouse/pull/61503) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)).
* Less contention during dynamic resize of filesystem cache. [#61524](https://github.com/ClickHouse/ClickHouse/pull/61524) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Disallow sharded mode of StorageS3 queue, because it will be rewritten. [#61537](https://github.com/ClickHouse/ClickHouse/pull/61537) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Fixed typo: from `use_leagcy_max_level` to `use_legacy_max_level`. [#61545](https://github.com/ClickHouse/ClickHouse/pull/61545) ([William Schoeffel](https://github.com/wiledusc)).
* Remove some duplicate entries in blob_storage_log. [#61622](https://github.com/ClickHouse/ClickHouse/pull/61622) ([YenchangChan](https://github.com/YenchangChan)).
* Enable `allow_experimental_analyzer` setting by default. [#61652](https://github.com/ClickHouse/ClickHouse/pull/61652) ([Dmitry Novik](https://github.com/novikd)).
* Added `current_user` function as a compatibility alias for MySQL. [#61770](https://github.com/ClickHouse/ClickHouse/pull/61770) ([Yarik Briukhovetskyi](https://github.com/yariks5s)).
* Use managed identity for backups IO when using Azure Blob Storage. Add a setting to prevent ClickHouse from attempting to create a non-existent container, which requires permissions at the storage account level. [#61785](https://github.com/ClickHouse/ClickHouse/pull/61785) ([Daniel Pozo Escalona](https://github.com/danipozo)).
* Enable `output_format_pretty_row_numbers` by default. It is better for usability. [#61791](https://github.com/ClickHouse/ClickHouse/pull/61791) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* In the previous version, some numbers in Pretty formats were not pretty enough. [#61794](https://github.com/ClickHouse/ClickHouse/pull/61794) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* A long value in Pretty formats won't be cut if it is the single value in the resultset, such as in the result of the `SHOW CREATE TABLE` query. [#61795](https://github.com/ClickHouse/ClickHouse/pull/61795) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Similarly to `clickhouse-local`, `clickhouse-client` will accept the `--output-format` option as a synonym to the `--format` option. This closes [#59848](https://github.com/ClickHouse/ClickHouse/issues/59848). [#61797](https://github.com/ClickHouse/ClickHouse/pull/61797) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* If stdout is a terminal and the output format is not specified, `clickhouse-client` and similar tools will use `PrettyCompact` by default, similarly to the interactive mode. `clickhouse-client` and `clickhouse-local` will handle command line arguments for input and output formats in a unified fashion. This closes [#61272](https://github.com/ClickHouse/ClickHouse/issues/61272). [#61800](https://github.com/ClickHouse/ClickHouse/pull/61800) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Underscore digit groups in Pretty formats for better readability. This is controlled by a new setting, `output_format_pretty_highlight_digit_groups`. [#61802](https://github.com/ClickHouse/ClickHouse/pull/61802) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Add ability to override initial INSERT SETTINGS via SYSTEM FLUSH DISTRIBUTED. [#61832](https://github.com/ClickHouse/ClickHouse/pull/61832) ([Azat Khuzhin](https://github.com/azat)).
* Fixed grammar from "a" to "the" in the warning message. There is only one Atomic engine, so it should be "to the new Atomic engine" instead of "to a new Atomic engine". [#61952](https://github.com/ClickHouse/ClickHouse/pull/61952) ([shabroo](https://github.com/shabroo)).
#### Build/Testing/Packaging Improvement
* Update sccache to the latest version; significantly reduce images size by reshaking the dependency trees; use the latest working odbc driver. [#59953](https://github.com/ClickHouse/ClickHouse/pull/59953) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Update python related style checkers. Continue the [#50174](https://github.com/ClickHouse/ClickHouse/issues/50174). [#60408](https://github.com/ClickHouse/ClickHouse/pull/60408) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Upgrade `prqlc` to 0.11.3. [#60616](https://github.com/ClickHouse/ClickHouse/pull/60616) ([Maximilian Roos](https://github.com/max-sixty)).
* Attach gdb to running fuzzer process. [#60654](https://github.com/ClickHouse/ClickHouse/pull/60654) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
* Use explicit template instantiation more aggressively. Get rid of templates in favor of overloaded functions in some places. [#60730](https://github.com/ClickHouse/ClickHouse/pull/60730) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
* The real-time query profiler now works on AArch64. In previous versions, it worked only when a program didn't spend time inside a syscall. [#60807](https://github.com/ClickHouse/ClickHouse/pull/60807) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* ... Too big translation unit in `Aggregator`. [#61211](https://github.com/ClickHouse/ClickHouse/pull/61211) ([lgbo](https://github.com/lgbo-ustc)).
* Fixed flakiness of 01603_insert_select_too_many_parts test. Closes [#61158](https://github.com/ClickHouse/ClickHouse/issues/61158). [#61259](https://github.com/ClickHouse/ClickHouse/pull/61259) ([Ilya Yatsishin](https://github.com/qoega)).
* Now it possible to use `chassert(expression, comment)` in the codebase. [#61263](https://github.com/ClickHouse/ClickHouse/pull/61263) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
* Teach the fuzzer to use other numeric types. [#61317](https://github.com/ClickHouse/ClickHouse/pull/61317) ([Raúl Marín](https://github.com/Algunenano)).
* Increase memory limit for coverage builds. [#61405](https://github.com/ClickHouse/ClickHouse/pull/61405) ([Raúl Marín](https://github.com/Algunenano)).
* Add generic query text fuzzer in `clickhouse-local`. [#61508](https://github.com/ClickHouse/ClickHouse/pull/61508) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
#### Bug Fix (user-visible misbehavior in an official stable release)
* Fix function execution over const and LowCardinality with GROUP BY const for analyzer [#59986](https://github.com/ClickHouse/ClickHouse/pull/59986) ([Azat Khuzhin](https://github.com/azat)).
* Fix finished_mutations_to_keep=0 for MergeTree (as docs says 0 is to keep everything) [#60031](https://github.com/ClickHouse/ClickHouse/pull/60031) ([Azat Khuzhin](https://github.com/azat)).
* PartsSplitter invalid ranges for the same part [#60041](https://github.com/ClickHouse/ClickHouse/pull/60041) ([Maksim Kita](https://github.com/kitaisreal)).
* Azure Blob Storage : Fix issues endpoint and prefix [#60251](https://github.com/ClickHouse/ClickHouse/pull/60251) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)).
* fix LRUResource Cache bug (Hive cache) [#60262](https://github.com/ClickHouse/ClickHouse/pull/60262) ([shanfengp](https://github.com/Aed-p)).
* Force reanalysis if parallel replicas changed [#60362](https://github.com/ClickHouse/ClickHouse/pull/60362) ([Raúl Marín](https://github.com/Algunenano)).
* Fix usage of plain metadata type with new disks configuration option [#60396](https://github.com/ClickHouse/ClickHouse/pull/60396) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Try to fix logical error 'Cannot capture column because it has incompatible type' in mapContainsKeyLike [#60451](https://github.com/ClickHouse/ClickHouse/pull/60451) ([Kruglov Pavel](https://github.com/Avogar)).
* Try to avoid calculation of scalar subqueries for CREATE TABLE. [#60464](https://github.com/ClickHouse/ClickHouse/pull/60464) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Fix deadlock in parallel parsing when lots of rows are skipped due to errors [#60516](https://github.com/ClickHouse/ClickHouse/pull/60516) ([Kruglov Pavel](https://github.com/Avogar)).
* Fix_max_query_size_for_kql_compound_operator: [#60534](https://github.com/ClickHouse/ClickHouse/pull/60534) ([Yong Wang](https://github.com/kashwy)).
* Keeper fix: add timeouts when waiting for commit logs [#60544](https://github.com/ClickHouse/ClickHouse/pull/60544) ([Antonio Andelic](https://github.com/antonio2368)).
* Reduce the number of read rows from `system.numbers` [#60546](https://github.com/ClickHouse/ClickHouse/pull/60546) ([JackyWoo](https://github.com/JackyWoo)).
* Don't output number tips for date types [#60577](https://github.com/ClickHouse/ClickHouse/pull/60577) ([Raúl Marín](https://github.com/Algunenano)).
* Fix reading from MergeTree with non-deterministic functions in filter [#60586](https://github.com/ClickHouse/ClickHouse/pull/60586) ([Kruglov Pavel](https://github.com/Avogar)).
* Fix logical error on bad compatibility setting value type [#60596](https://github.com/ClickHouse/ClickHouse/pull/60596) ([Kruglov Pavel](https://github.com/Avogar)).
* Fix inconsistent aggregate function states in mixed x86-64 / ARM clusters [#60610](https://github.com/ClickHouse/ClickHouse/pull/60610) ([Harry Lee](https://github.com/HarryLeeIBM)).
* fix(prql): Robust panic handler [#60615](https://github.com/ClickHouse/ClickHouse/pull/60615) ([Maximilian Roos](https://github.com/max-sixty)).
* Fix `intDiv` for decimal and date arguments [#60672](https://github.com/ClickHouse/ClickHouse/pull/60672) ([Yarik Briukhovetskyi](https://github.com/yariks5s)).
* Fix: expand CTE in alter modify query [#60682](https://github.com/ClickHouse/ClickHouse/pull/60682) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
* Fix system.parts for non-Atomic/Ordinary database engine (i.e. Memory) [#60689](https://github.com/ClickHouse/ClickHouse/pull/60689) ([Azat Khuzhin](https://github.com/azat)).
* Fix "Invalid storage definition in metadata file" for parameterized views [#60708](https://github.com/ClickHouse/ClickHouse/pull/60708) ([Azat Khuzhin](https://github.com/azat)).
* Fix buffer overflow in CompressionCodecMultiple [#60731](https://github.com/ClickHouse/ClickHouse/pull/60731) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Remove nonsense from SQL/JSON [#60738](https://github.com/ClickHouse/ClickHouse/pull/60738) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Remove wrong sanitize checking in aggregate function quantileGK [#60740](https://github.com/ClickHouse/ClickHouse/pull/60740) ([李扬](https://github.com/taiyang-li)).
* Fix insert-select + insert_deduplication_token bug by setting streams to 1 [#60745](https://github.com/ClickHouse/ClickHouse/pull/60745) ([Jordi Villar](https://github.com/jrdi)).
* Prevent setting custom metadata headers on unsupported multipart upload operations [#60748](https://github.com/ClickHouse/ClickHouse/pull/60748) ([Francisco J. Jurado Moreno](https://github.com/Beetelbrox)).
* Fix toStartOfInterval [#60763](https://github.com/ClickHouse/ClickHouse/pull/60763) ([Andrey Zvonov](https://github.com/zvonand)).
* Fix crash in arrayEnumerateRanked [#60764](https://github.com/ClickHouse/ClickHouse/pull/60764) ([Raúl Marín](https://github.com/Algunenano)).
* Fix crash when using input() in INSERT SELECT JOIN [#60765](https://github.com/ClickHouse/ClickHouse/pull/60765) ([Kruglov Pavel](https://github.com/Avogar)).
* Fix crash with different allow_experimental_analyzer value in subqueries [#60770](https://github.com/ClickHouse/ClickHouse/pull/60770) ([Dmitry Novik](https://github.com/novikd)).
* Remove recursion when reading from S3 [#60849](https://github.com/ClickHouse/ClickHouse/pull/60849) ([Antonio Andelic](https://github.com/antonio2368)).
* Fix possible stuck on error in HashedDictionaryParallelLoader [#60926](https://github.com/ClickHouse/ClickHouse/pull/60926) ([vdimir](https://github.com/vdimir)).
* Fix async RESTORE with Replicated database [#60934](https://github.com/ClickHouse/ClickHouse/pull/60934) ([Antonio Andelic](https://github.com/antonio2368)).
* fix csv format not support tuple [#60994](https://github.com/ClickHouse/ClickHouse/pull/60994) ([shuai.xu](https://github.com/shuai-xu)).
* Fix deadlock in async inserts to `Log` tables via native protocol [#61055](https://github.com/ClickHouse/ClickHouse/pull/61055) ([Anton Popov](https://github.com/CurtizJ)).
* Fix lazy execution of default argument in dictGetOrDefault for RangeHashedDictionary [#61196](https://github.com/ClickHouse/ClickHouse/pull/61196) ([Kruglov Pavel](https://github.com/Avogar)).
* Fix multiple bugs in groupArraySorted [#61203](https://github.com/ClickHouse/ClickHouse/pull/61203) ([Raúl Marín](https://github.com/Algunenano)).
* Fix Keeper reconfig for standalone binary [#61233](https://github.com/ClickHouse/ClickHouse/pull/61233) ([Antonio Andelic](https://github.com/antonio2368)).
* Fix usage of session_token in S3 engine [#61234](https://github.com/ClickHouse/ClickHouse/pull/61234) ([Kruglov Pavel](https://github.com/Avogar)).
* Fix possible incorrect result of aggregate function `uniqExact` [#61257](https://github.com/ClickHouse/ClickHouse/pull/61257) ([Anton Popov](https://github.com/CurtizJ)).
* Fix bugs in show database [#61269](https://github.com/ClickHouse/ClickHouse/pull/61269) ([Raúl Marín](https://github.com/Algunenano)).
* Fix logical error in RabbitMQ storage with MATERIALIZED columns [#61320](https://github.com/ClickHouse/ClickHouse/pull/61320) ([vdimir](https://github.com/vdimir)).
* Fix CREATE OR REPLACE DICTIONARY [#61356](https://github.com/ClickHouse/ClickHouse/pull/61356) ([Vitaly Baranov](https://github.com/vitlibar)).
* Fix crash in ObjectJson parsing array with nulls [#61364](https://github.com/ClickHouse/ClickHouse/pull/61364) ([vdimir](https://github.com/vdimir)).
* Fix ATTACH query with external ON CLUSTER [#61365](https://github.com/ClickHouse/ClickHouse/pull/61365) ([Nikolay Degterinsky](https://github.com/evillique)).
* Fix consecutive keys optimization for nullable keys [#61393](https://github.com/ClickHouse/ClickHouse/pull/61393) ([Anton Popov](https://github.com/CurtizJ)).
* fix issue of actions dag split [#61458](https://github.com/ClickHouse/ClickHouse/pull/61458) ([Raúl Marín](https://github.com/Algunenano)).
* Fix finishing a failed RESTORE [#61466](https://github.com/ClickHouse/ClickHouse/pull/61466) ([Vitaly Baranov](https://github.com/vitlibar)).
* Disable async_insert_use_adaptive_busy_timeout correctly with compatibility settings [#61468](https://github.com/ClickHouse/ClickHouse/pull/61468) ([Raúl Marín](https://github.com/Algunenano)).
* Allow queuing in restore pool [#61475](https://github.com/ClickHouse/ClickHouse/pull/61475) ([Nikita Taranov](https://github.com/nickitat)).
* Fix bug when reading system.parts using UUID (issue 61220). [#61479](https://github.com/ClickHouse/ClickHouse/pull/61479) ([Dan Wu](https://github.com/wudanzy)).
* Fix ALTER QUERY MODIFY SQL SECURITY [#61480](https://github.com/ClickHouse/ClickHouse/pull/61480) ([pufit](https://github.com/pufit)).
* Fix crash in window view [#61526](https://github.com/ClickHouse/ClickHouse/pull/61526) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix `repeat` with non native integers [#61527](https://github.com/ClickHouse/ClickHouse/pull/61527) ([Antonio Andelic](https://github.com/antonio2368)).
* Fix client `-s` argument [#61530](https://github.com/ClickHouse/ClickHouse/pull/61530) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Reset part level upon attach from disk on MergeTree [#61536](https://github.com/ClickHouse/ClickHouse/pull/61536) ([Arthur Passos](https://github.com/arthurpassos)).
* Fix crash in arrayPartialReverseSort [#61539](https://github.com/ClickHouse/ClickHouse/pull/61539) ([Raúl Marín](https://github.com/Algunenano)).
* Fix string search with const position [#61547](https://github.com/ClickHouse/ClickHouse/pull/61547) ([Antonio Andelic](https://github.com/antonio2368)).
* Fix addDays cause an error when used datetime64 [#61561](https://github.com/ClickHouse/ClickHouse/pull/61561) ([Shuai li](https://github.com/loneylee)).
* disallow LowCardinality input type for JSONExtract [#61617](https://github.com/ClickHouse/ClickHouse/pull/61617) ([Julia Kartseva](https://github.com/jkartseva)).
* Fix `system.part_log` for async insert with deduplication [#61620](https://github.com/ClickHouse/ClickHouse/pull/61620) ([Antonio Andelic](https://github.com/antonio2368)).
* Fix Non-ready set for system.parts. [#61666](https://github.com/ClickHouse/ClickHouse/pull/61666) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Don't allow the same expression in ORDER BY with and without WITH FILL [#61667](https://github.com/ClickHouse/ClickHouse/pull/61667) ([Kruglov Pavel](https://github.com/Avogar)).
* Fix actual_part_name for REPLACE_RANGE (`Entry actual part isn't empty yet`) [#61675](https://github.com/ClickHouse/ClickHouse/pull/61675) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Fix columns after executing MODIFY QUERY for a materialized view with internal table [#61734](https://github.com/ClickHouse/ClickHouse/pull/61734) ([Vitaly Baranov](https://github.com/vitlibar)).
* Fix crash in `multiSearchAllPositionsCaseInsensitiveUTF8` for incorrect UTF-8 [#61749](https://github.com/ClickHouse/ClickHouse/pull/61749) ([pufit](https://github.com/pufit)).
* Fix RANGE frame is not supported for Nullable columns. [#61766](https://github.com/ClickHouse/ClickHouse/pull/61766) ([YuanLiu](https://github.com/ditgittube)).
* Revert "Revert "Fix bug when reading system.parts using UUID (issue 61220)."" [#61779](https://github.com/ClickHouse/ClickHouse/pull/61779) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
#### CI Fix or Improvement (changelog entry is not required)
* Decoupled changes from [#60408](https://github.com/ClickHouse/ClickHouse/issues/60408). [#60553](https://github.com/ClickHouse/ClickHouse/pull/60553) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Eliminates the need to provide input args to docker server jobs to clean yml files. [#60602](https://github.com/ClickHouse/ClickHouse/pull/60602) ([Max K.](https://github.com/maxknv)).
* Debug and fix markreleaseready. [#60611](https://github.com/ClickHouse/ClickHouse/pull/60611) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Fix build_report job so that it's defined by ci_config only (not yml file). [#60613](https://github.com/ClickHouse/ClickHouse/pull/60613) ([Max K.](https://github.com/maxknv)).
* Do not await ci pending jobs on release branches decrease wait timeout to fit into gh job timeout. [#60652](https://github.com/ClickHouse/ClickHouse/pull/60652) ([Max K.](https://github.com/maxknv)).
* Set limited number of builds for "special build check" report in backports. [#60850](https://github.com/ClickHouse/ClickHouse/pull/60850) ([Max K.](https://github.com/maxknv)).
* ... [#60935](https://github.com/ClickHouse/ClickHouse/pull/60935) ([Max K.](https://github.com/maxknv)).
* ... [#60947](https://github.com/ClickHouse/ClickHouse/pull/60947) ([Max K.](https://github.com/maxknv)).
* ... [#60952](https://github.com/ClickHouse/ClickHouse/pull/60952) ([Max K.](https://github.com/maxknv)).
* ... [#60958](https://github.com/ClickHouse/ClickHouse/pull/60958) ([Max K.](https://github.com/maxknv)).
* ... [#61022](https://github.com/ClickHouse/ClickHouse/pull/61022) ([Max K.](https://github.com/maxknv)).
* Just a preparation for the merge queue support. [#61099](https://github.com/ClickHouse/ClickHouse/pull/61099) ([Max K.](https://github.com/maxknv)).
* ... [#61133](https://github.com/ClickHouse/ClickHouse/pull/61133) ([Max K.](https://github.com/maxknv)).
* In PRs: - run typos, aspell check - always - run pylint, mypy - only if py file(s) changed in PRs - run basic source files style check - only if not all changes in py files. [#61148](https://github.com/ClickHouse/ClickHouse/pull/61148) ([Max K.](https://github.com/maxknv)).
* ... [#61172](https://github.com/ClickHouse/ClickHouse/pull/61172) ([Max K.](https://github.com/maxknv)).
* ... [#61183](https://github.com/ClickHouse/ClickHouse/pull/61183) ([Han Fei](https://github.com/hanfei1991)).
* ... [#61185](https://github.com/ClickHouse/ClickHouse/pull/61185) ([Max K.](https://github.com/maxknv)).
* TBD. [#61197](https://github.com/ClickHouse/ClickHouse/pull/61197) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* ... [#61214](https://github.com/ClickHouse/ClickHouse/pull/61214) ([Max K.](https://github.com/maxknv)).
* ... [#61441](https://github.com/ClickHouse/ClickHouse/pull/61441) ([Max K.](https://github.com/maxknv)).
* ![Screenshot_20240323_025055](https://github.com/ClickHouse/ClickHouse/assets/18581488/ccaab212-a1d3-4dfb-8d56-b1991760b6bf). [#61801](https://github.com/ClickHouse/ClickHouse/pull/61801) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* ... [#61877](https://github.com/ClickHouse/ClickHouse/pull/61877) ([Max K.](https://github.com/maxknv)).
#### NO CL ENTRY
* NO CL ENTRY: 'Revert "Revert "Use `MergeTree` as a default table engine""'. [#60524](https://github.com/ClickHouse/ClickHouse/pull/60524) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* NO CL ENTRY: 'Revert "Revert "Support resource request canceling""'. [#60558](https://github.com/ClickHouse/ClickHouse/pull/60558) ([Sergei Trifonov](https://github.com/serxa)).
* NO CL ENTRY: 'Revert "Add `toMillisecond` function"'. [#60644](https://github.com/ClickHouse/ClickHouse/pull/60644) ([Alexander Tokmakov](https://github.com/tavplubix)).
* NO CL ENTRY: 'Revert "Synchronize parsers"'. [#60759](https://github.com/ClickHouse/ClickHouse/pull/60759) ([Alexander Tokmakov](https://github.com/tavplubix)).
* NO CL ENTRY: 'Revert "Fix wacky primary key sorting in `SHOW INDEX`"'. [#60898](https://github.com/ClickHouse/ClickHouse/pull/60898) ([Antonio Andelic](https://github.com/antonio2368)).
* NO CL ENTRY: 'Revert "CI: make style check faster"'. [#61142](https://github.com/ClickHouse/ClickHouse/pull/61142) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* NO CL ENTRY: 'Revert "Don't allow to set max_parallel_replicas to 0 as it doesn't make sense"'. [#61200](https://github.com/ClickHouse/ClickHouse/pull/61200) ([Kruglov Pavel](https://github.com/Avogar)).
* NO CL ENTRY: 'Revert "Fix usage of session_token in S3 engine"'. [#61359](https://github.com/ClickHouse/ClickHouse/pull/61359) ([Antonio Andelic](https://github.com/antonio2368)).
* NO CL ENTRY: 'Revert "Revert "Fix usage of session_token in S3 engine""'. [#61362](https://github.com/ClickHouse/ClickHouse/pull/61362) ([Kruglov Pavel](https://github.com/Avogar)).
* NO CL ENTRY: 'Reorder hidden and shown checks in comment, change url of Mergeable check'. [#61373](https://github.com/ClickHouse/ClickHouse/pull/61373) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* NO CL ENTRY: 'Remove unnecessary layers from clickhouse/cctools'. [#61374](https://github.com/ClickHouse/ClickHouse/pull/61374) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* NO CL ENTRY: 'Revert "Updated format settings references in the docs (datetime.md)"'. [#61435](https://github.com/ClickHouse/ClickHouse/pull/61435) ([Kruglov Pavel](https://github.com/Avogar)).
* NO CL ENTRY: 'Revert "CI: ARM integration tests: disable tests with HDFS "'. [#61449](https://github.com/ClickHouse/ClickHouse/pull/61449) ([Max K.](https://github.com/maxknv)).
* NO CL ENTRY: 'Revert "Analyzer: Fix virtual columns in StorageMerge"'. [#61518](https://github.com/ClickHouse/ClickHouse/pull/61518) ([Antonio Andelic](https://github.com/antonio2368)).
* NO CL ENTRY: 'Revert "Revert "Analyzer: Fix virtual columns in StorageMerge""'. [#61528](https://github.com/ClickHouse/ClickHouse/pull/61528) ([Dmitry Novik](https://github.com/novikd)).
* NO CL ENTRY: 'Improve build_download_helper'. [#61592](https://github.com/ClickHouse/ClickHouse/pull/61592) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* NO CL ENTRY: 'Revert "Un-flake `test_undrop_query`"'. [#61668](https://github.com/ClickHouse/ClickHouse/pull/61668) ([Robert Schulze](https://github.com/rschu1ze)).
* NO CL ENTRY: 'Fix flaky tests (stateless, integration)'. [#61816](https://github.com/ClickHouse/ClickHouse/pull/61816) ([Nikita Fomichev](https://github.com/fm4v)).
* NO CL ENTRY: 'Better usability of "expect" tests: less trouble with running directly'. [#61818](https://github.com/ClickHouse/ClickHouse/pull/61818) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* NO CL ENTRY: 'Revert "Fix flaky `02122_parallel_formatting_Template`"'. [#61868](https://github.com/ClickHouse/ClickHouse/pull/61868) ([Alexander Tokmakov](https://github.com/tavplubix)).
* NO CL ENTRY: 'Revert "Add --now option to enable and start the service" #job_Install_packages_amd64'. [#61878](https://github.com/ClickHouse/ClickHouse/pull/61878) ([Max K.](https://github.com/maxknv)).
* NO CL ENTRY: 'Revert "disallow LowCardinality input type for JSONExtract"'. [#61960](https://github.com/ClickHouse/ClickHouse/pull/61960) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
#### NOT FOR CHANGELOG / INSIGNIFICANT
* Improve query performance in case of very small blocks [#58879](https://github.com/ClickHouse/ClickHouse/pull/58879) ([Azat Khuzhin](https://github.com/azat)).
* Analyzer: fixes for JOIN columns resolution [#59007](https://github.com/ClickHouse/ClickHouse/pull/59007) ([vdimir](https://github.com/vdimir)).
* Fix race on `Context::async_insert_queue` [#59082](https://github.com/ClickHouse/ClickHouse/pull/59082) ([Alexander Tokmakov](https://github.com/tavplubix)).
* CI: support batch specification in commit message [#59738](https://github.com/ClickHouse/ClickHouse/pull/59738) ([Max K.](https://github.com/maxknv)).
* Update storing-data.md [#60024](https://github.com/ClickHouse/ClickHouse/pull/60024) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Make max_insert_delayed_streams_for_parallel_write actually work [#60079](https://github.com/ClickHouse/ClickHouse/pull/60079) ([alesapin](https://github.com/alesapin)).
* Analyzer: support join using column from select list [#60182](https://github.com/ClickHouse/ClickHouse/pull/60182) ([vdimir](https://github.com/vdimir)).
* test for [#60223](https://github.com/ClickHouse/ClickHouse/issues/60223) [#60258](https://github.com/ClickHouse/ClickHouse/pull/60258) ([Denny Crane](https://github.com/den-crane)).
* Analyzer: Refactor execution name for ConstantNode [#60313](https://github.com/ClickHouse/ClickHouse/pull/60313) ([Dmitry Novik](https://github.com/novikd)).
* Fix database iterator waiting code [#60314](https://github.com/ClickHouse/ClickHouse/pull/60314) ([Sergei Trifonov](https://github.com/serxa)).
* QueryCache: Don't acquire the query count mutex if not necessary [#60348](https://github.com/ClickHouse/ClickHouse/pull/60348) ([zhongyuankai](https://github.com/zhongyuankai)).
* Fix bugfix check (due to unknown commit_logs_cache_size_threshold) [#60375](https://github.com/ClickHouse/ClickHouse/pull/60375) ([Azat Khuzhin](https://github.com/azat)).
* Enable testing with `io_uring` back [#60383](https://github.com/ClickHouse/ClickHouse/pull/60383) ([Nikita Taranov](https://github.com/nickitat)).
* Analyzer - improve hiding secret arguments. [#60386](https://github.com/ClickHouse/ClickHouse/pull/60386) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
* CI: make workflow yml abstract [#60421](https://github.com/ClickHouse/ClickHouse/pull/60421) ([Max K.](https://github.com/maxknv)).
* Improve test test_reload_clusters_config [#60426](https://github.com/ClickHouse/ClickHouse/pull/60426) ([Kruglov Pavel](https://github.com/Avogar)).
* Revert "Revert "Merge pull request [#56864](https://github.com/ClickHouse/ClickHouse/issues/56864) from ClickHouse/broken-projections-better-handling"" [#60452](https://github.com/ClickHouse/ClickHouse/pull/60452) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Do not check to and from files existence in metadata_storage because it does not see uncommitted changes [#60462](https://github.com/ClickHouse/ClickHouse/pull/60462) ([Alexander Gololobov](https://github.com/davenger)).
* Fix option ambiguous in `clickhouse-local` [#60475](https://github.com/ClickHouse/ClickHouse/pull/60475) ([豪肥肥](https://github.com/HowePa)).
* Fix: test_parallel_replicas_custom_key_load_balancing [#60485](https://github.com/ClickHouse/ClickHouse/pull/60485) ([Igor Nikonov](https://github.com/devcrafter)).
* Fix: progress bar for *Cluster table functions [#60491](https://github.com/ClickHouse/ClickHouse/pull/60491) ([Igor Nikonov](https://github.com/devcrafter)).
* Analyzer: Support different ObjectJSON on shards [#60497](https://github.com/ClickHouse/ClickHouse/pull/60497) ([Dmitry Novik](https://github.com/novikd)).
* Cancel PipelineExecutor properly in case of exception in spawnThreads [#60499](https://github.com/ClickHouse/ClickHouse/pull/60499) ([Kruglov Pavel](https://github.com/Avogar)).
* Refactor StorageSystemOneBlock [#60510](https://github.com/ClickHouse/ClickHouse/pull/60510) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Simple cleanup while fixing progress bar [#60513](https://github.com/ClickHouse/ClickHouse/pull/60513) ([Igor Nikonov](https://github.com/devcrafter)).
* PullingAsyncPipelineExecutor cleanup [#60515](https://github.com/ClickHouse/ClickHouse/pull/60515) ([Igor Nikonov](https://github.com/devcrafter)).
* Fix bad error message [#60518](https://github.com/ClickHouse/ClickHouse/pull/60518) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Synchronize Access [#60519](https://github.com/ClickHouse/ClickHouse/pull/60519) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Synchronize metrics and Keeper [#60520](https://github.com/ClickHouse/ClickHouse/pull/60520) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Enforce clang-tidy in `programs/` and `utils/` headers [#60521](https://github.com/ClickHouse/ClickHouse/pull/60521) ([Robert Schulze](https://github.com/rschu1ze)).
* Synchronize parsers [#60522](https://github.com/ClickHouse/ClickHouse/pull/60522) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix a bunch of clang-tidy warnings in headers [#60523](https://github.com/ClickHouse/ClickHouse/pull/60523) ([Robert Schulze](https://github.com/rschu1ze)).
* General sanity in function `seriesOutliersDetectTukey` [#60535](https://github.com/ClickHouse/ClickHouse/pull/60535) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Update Chinese document for max_query_size, max_parser_depth and optimize_functions_to_subcolumns [#60541](https://github.com/ClickHouse/ClickHouse/pull/60541) ([Alex Cheng](https://github.com/Alex-Cheng)).
* Userspace page cache again [#60552](https://github.com/ClickHouse/ClickHouse/pull/60552) ([Michael Kolupaev](https://github.com/al13n321)).
* Traverse shadow directory for system.remote_data_paths [#60585](https://github.com/ClickHouse/ClickHouse/pull/60585) ([Aleksei Filatov](https://github.com/aalexfvk)).
* Add test for [#58906](https://github.com/ClickHouse/ClickHouse/issues/58906) [#60597](https://github.com/ClickHouse/ClickHouse/pull/60597) ([Raúl Marín](https://github.com/Algunenano)).
* Use python zipfile to have x-platform idempotent lambda packages [#60603](https://github.com/ClickHouse/ClickHouse/pull/60603) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* tests: suppress data-race in librdkafka statistics code [#60604](https://github.com/ClickHouse/ClickHouse/pull/60604) ([Azat Khuzhin](https://github.com/azat)).
* Update version after release [#60605](https://github.com/ClickHouse/ClickHouse/pull/60605) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Update version_date.tsv and changelogs after v24.2.1.2248-stable [#60607](https://github.com/ClickHouse/ClickHouse/pull/60607) ([robot-clickhouse](https://github.com/robot-clickhouse)).
* Addition to changelog [#60609](https://github.com/ClickHouse/ClickHouse/pull/60609) ([Anton Popov](https://github.com/CurtizJ)).
* internal: Refine rust prql code [#60617](https://github.com/ClickHouse/ClickHouse/pull/60617) ([Maximilian Roos](https://github.com/max-sixty)).
* fix(rust): Fix skim's panic handler [#60621](https://github.com/ClickHouse/ClickHouse/pull/60621) ([Maximilian Roos](https://github.com/max-sixty)).
* Resubmit "Analyzer: compute ALIAS columns right after reading" [#60641](https://github.com/ClickHouse/ClickHouse/pull/60641) ([vdimir](https://github.com/vdimir)).
* Analyzer: Fix bug with join_use_nulls and PREWHERE [#60655](https://github.com/ClickHouse/ClickHouse/pull/60655) ([vdimir](https://github.com/vdimir)).
* Add test for [#59891](https://github.com/ClickHouse/ClickHouse/issues/59891) [#60657](https://github.com/ClickHouse/ClickHouse/pull/60657) ([Raúl Marín](https://github.com/Algunenano)).
* Fix missed entries in system.part_log in case of fetch preferred over merges/mutations [#60659](https://github.com/ClickHouse/ClickHouse/pull/60659) ([Azat Khuzhin](https://github.com/azat)).
* Always apply first minmax index among available skip indices [#60675](https://github.com/ClickHouse/ClickHouse/pull/60675) ([Igor Nikonov](https://github.com/devcrafter)).
* Remove bad test `02152_http_external_tables_memory_tracking` [#60690](https://github.com/ClickHouse/ClickHouse/pull/60690) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix questionable behavior in the `parseDateTimeBestEffort` function. [#60691](https://github.com/ClickHouse/ClickHouse/pull/60691) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix flaky checks [#60694](https://github.com/ClickHouse/ClickHouse/pull/60694) ([Azat Khuzhin](https://github.com/azat)).
* Resubmit http_external_tables_memory_tracking test [#60695](https://github.com/ClickHouse/ClickHouse/pull/60695) ([Azat Khuzhin](https://github.com/azat)).
* Fix bugfix and upgrade checks (due to "Unknown handler type 'redirect'" error) [#60696](https://github.com/ClickHouse/ClickHouse/pull/60696) ([Azat Khuzhin](https://github.com/azat)).
* Fix test_grant_and_revoke/test.py::test_grant_all_on_table (after syncing with cloud) [#60699](https://github.com/ClickHouse/ClickHouse/pull/60699) ([Azat Khuzhin](https://github.com/azat)).
* Remove unit test for ColumnObject [#60709](https://github.com/ClickHouse/ClickHouse/pull/60709) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Improve unit tests [#60710](https://github.com/ClickHouse/ClickHouse/pull/60710) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix scheduler fairness test [#60712](https://github.com/ClickHouse/ClickHouse/pull/60712) ([Sergei Trifonov](https://github.com/serxa)).
* Do not retry queries if container is down in integration tests (resubmit) [#60714](https://github.com/ClickHouse/ClickHouse/pull/60714) ([Azat Khuzhin](https://github.com/azat)).
* Mark one setting as obsolete [#60715](https://github.com/ClickHouse/ClickHouse/pull/60715) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix a test with Analyzer [#60723](https://github.com/ClickHouse/ClickHouse/pull/60723) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Two tests are fixed with Analyzer [#60724](https://github.com/ClickHouse/ClickHouse/pull/60724) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Remove old code [#60728](https://github.com/ClickHouse/ClickHouse/pull/60728) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Remove more code from LIVE VIEW [#60729](https://github.com/ClickHouse/ClickHouse/pull/60729) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix `test_keeper_back_to_back/test.py::test_concurrent_watches` [#60749](https://github.com/ClickHouse/ClickHouse/pull/60749) ([Antonio Andelic](https://github.com/antonio2368)).
* Catch exceptions on finalize in `InterserverIOHTTPHandler` [#60769](https://github.com/ClickHouse/ClickHouse/pull/60769) ([Antonio Andelic](https://github.com/antonio2368)).
* Reduce flakiness of 02932_refreshable_materialized_views [#60771](https://github.com/ClickHouse/ClickHouse/pull/60771) ([Michael Kolupaev](https://github.com/al13n321)).
* Use 64-bit capabilities if available [#60775](https://github.com/ClickHouse/ClickHouse/pull/60775) ([Azat Khuzhin](https://github.com/azat)).
* Include multiline logs in fuzzer fatal.log report [#60796](https://github.com/ClickHouse/ClickHouse/pull/60796) ([Raúl Marín](https://github.com/Algunenano)).
* Add missing clone calls related to compression [#60810](https://github.com/ClickHouse/ClickHouse/pull/60810) ([Raúl Marín](https://github.com/Algunenano)).
* New private runners [#60811](https://github.com/ClickHouse/ClickHouse/pull/60811) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Move userspace page cache settings to the correct section of SettingsChangeHistory.h [#60812](https://github.com/ClickHouse/ClickHouse/pull/60812) ([Michael Kolupaev](https://github.com/al13n321)).
* Update version_date.tsv and changelogs after v23.8.10.43-lts [#60851](https://github.com/ClickHouse/ClickHouse/pull/60851) ([robot-clickhouse](https://github.com/robot-clickhouse)).
* Fix fuzzer report [#60853](https://github.com/ClickHouse/ClickHouse/pull/60853) ([Raúl Marín](https://github.com/Algunenano)).
* Update version_date.tsv and changelogs after v23.3.20.27-lts [#60857](https://github.com/ClickHouse/ClickHouse/pull/60857) ([robot-clickhouse](https://github.com/robot-clickhouse)).
* Refactor OptimizeDateOrDateTimeConverterWithPreimageVisitor [#60875](https://github.com/ClickHouse/ClickHouse/pull/60875) ([Zhiguo Zhou](https://github.com/ZhiguoZh)).
* Fix race in PageCache [#60878](https://github.com/ClickHouse/ClickHouse/pull/60878) ([Michael Kolupaev](https://github.com/al13n321)).
* Small changes in async inserts code [#60885](https://github.com/ClickHouse/ClickHouse/pull/60885) ([Nikita Taranov](https://github.com/nickitat)).
* Remove useless verbose logging from AWS library [#60921](https://github.com/ClickHouse/ClickHouse/pull/60921) ([alesapin](https://github.com/alesapin)).
* Throw on query timeout in ZooKeeperRetries [#60922](https://github.com/ClickHouse/ClickHouse/pull/60922) ([Antonio Andelic](https://github.com/antonio2368)).
* Bring clickhouse-test changes from private [#60924](https://github.com/ClickHouse/ClickHouse/pull/60924) ([Raúl Marín](https://github.com/Algunenano)).
* Add debug info to exceptions in `IMergeTreeDataPart::checkConsistency()` [#60981](https://github.com/ClickHouse/ClickHouse/pull/60981) ([Nikita Taranov](https://github.com/nickitat)).
* Fix a typo [#60987](https://github.com/ClickHouse/ClickHouse/pull/60987) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Replace some header includes with forward declarations [#61003](https://github.com/ClickHouse/ClickHouse/pull/61003) ([Amos Bird](https://github.com/amosbird)).
* Speed up cctools building [#61011](https://github.com/ClickHouse/ClickHouse/pull/61011) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Fix ASTRenameQuery::clone [#61013](https://github.com/ClickHouse/ClickHouse/pull/61013) ([vdimir](https://github.com/vdimir)).
* Update README.md [#61021](https://github.com/ClickHouse/ClickHouse/pull/61021) ([Tyler Hannan](https://github.com/tylerhannan)).
* Fix TableFunctionExecutable::skipAnalysisForArguments [#61037](https://github.com/ClickHouse/ClickHouse/pull/61037) ([Dmitry Novik](https://github.com/novikd)).
* Fix: parallel replicas with PREWHERE (ubsan) [#61052](https://github.com/ClickHouse/ClickHouse/pull/61052) ([Igor Nikonov](https://github.com/devcrafter)).
* Fast fix tests. [#61056](https://github.com/ClickHouse/ClickHouse/pull/61056) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Fix `test_placement_info` [#61057](https://github.com/ClickHouse/ClickHouse/pull/61057) ([Konstantin Bogdanov](https://github.com/thevar1able)).
* Fix: parallel replicas with CTEs, crash in EXPLAIN SYNTAX with analyzer [#61059](https://github.com/ClickHouse/ClickHouse/pull/61059) ([Igor Nikonov](https://github.com/devcrafter)).
* Debug fuzzer failures [#61062](https://github.com/ClickHouse/ClickHouse/pull/61062) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
* Add regression tests for fixed issues [#61076](https://github.com/ClickHouse/ClickHouse/pull/61076) ([Antonio Andelic](https://github.com/antonio2368)).
* Analyzer: Fix 01244_optimize_distributed_group_by_sharding_key [#61089](https://github.com/ClickHouse/ClickHouse/pull/61089) ([Dmitry Novik](https://github.com/novikd)).
* Use global scalars cache with analyzer [#61104](https://github.com/ClickHouse/ClickHouse/pull/61104) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Fix removing is_active node after re-creation [#61105](https://github.com/ClickHouse/ClickHouse/pull/61105) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Update 02962_system_sync_replica_lightweight_from_modifier.sh [#61110](https://github.com/ClickHouse/ClickHouse/pull/61110) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Simplify bridges [#61118](https://github.com/ClickHouse/ClickHouse/pull/61118) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* update cppkafka to v0.4.1 [#61119](https://github.com/ClickHouse/ClickHouse/pull/61119) ([Ilya Golshtein](https://github.com/ilejn)).
* CI: add wf class in ci_config [#61122](https://github.com/ClickHouse/ClickHouse/pull/61122) ([Max K.](https://github.com/maxknv)).
* QueryFuzzer: replace element randomly when AST part buffer is full [#61124](https://github.com/ClickHouse/ClickHouse/pull/61124) ([Tomer Shafir](https://github.com/tomershafir)).
* CI: make style check fast [#61125](https://github.com/ClickHouse/ClickHouse/pull/61125) ([Max K.](https://github.com/maxknv)).
* Better gitignore [#61128](https://github.com/ClickHouse/ClickHouse/pull/61128) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix something strange [#61129](https://github.com/ClickHouse/ClickHouse/pull/61129) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Update check-large-objects.sh to be language neutral [#61130](https://github.com/ClickHouse/ClickHouse/pull/61130) ([Dan Wu](https://github.com/wudanzy)).
* Throw memory limit exceptions to avoid OOM in some places [#61132](https://github.com/ClickHouse/ClickHouse/pull/61132) ([alesapin](https://github.com/alesapin)).
* Fix test_distributed_directory_monitor_split_batch_on_failure flakienss [#61136](https://github.com/ClickHouse/ClickHouse/pull/61136) ([Azat Khuzhin](https://github.com/azat)).
* Fix llvm symbolizer on CI [#61147](https://github.com/ClickHouse/ClickHouse/pull/61147) ([Azat Khuzhin](https://github.com/azat)).
* Some clang-tidy fixes [#61150](https://github.com/ClickHouse/ClickHouse/pull/61150) ([Robert Schulze](https://github.com/rschu1ze)).
* Revive "Less contention in the cache, part 2" [#61152](https://github.com/ClickHouse/ClickHouse/pull/61152) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Enable black back [#61159](https://github.com/ClickHouse/ClickHouse/pull/61159) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* CI: fix nightly job issue [#61160](https://github.com/ClickHouse/ClickHouse/pull/61160) ([Max K.](https://github.com/maxknv)).
* Split `RangeHashedDictionary` [#61162](https://github.com/ClickHouse/ClickHouse/pull/61162) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
* Remove a few templates from Aggregator.cpp [#61171](https://github.com/ClickHouse/ClickHouse/pull/61171) ([Raúl Marín](https://github.com/Algunenano)).
* Avoid some logical errors in experimantal Object type [#61173](https://github.com/ClickHouse/ClickHouse/pull/61173) ([Kruglov Pavel](https://github.com/Avogar)).
* Update ReadSettings.h [#61174](https://github.com/ClickHouse/ClickHouse/pull/61174) ([Kseniia Sumarokova](https://github.com/kssenii)).
* CI: ARM integration tests: disable tests with HDFS [#61182](https://github.com/ClickHouse/ClickHouse/pull/61182) ([Max K.](https://github.com/maxknv)).
* Disable sanitizers with 02784_parallel_replicas_automatic_decision_join [#61184](https://github.com/ClickHouse/ClickHouse/pull/61184) ([Raúl Marín](https://github.com/Algunenano)).
* Fix `02887_mutations_subcolumns` test flakiness [#61198](https://github.com/ClickHouse/ClickHouse/pull/61198) ([Nikita Taranov](https://github.com/nickitat)).
* Make variant tests a bit faster [#61199](https://github.com/ClickHouse/ClickHouse/pull/61199) ([Kruglov Pavel](https://github.com/Avogar)).
* Fix strange log message [#61206](https://github.com/ClickHouse/ClickHouse/pull/61206) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix 01603_insert_select_too_many_parts flakiness [#61218](https://github.com/ClickHouse/ClickHouse/pull/61218) ([Azat Khuzhin](https://github.com/azat)).
* Make every style-checker runner types scaling-out very quickly [#61231](https://github.com/ClickHouse/ClickHouse/pull/61231) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Improve `test_failed_mutations` [#61235](https://github.com/ClickHouse/ClickHouse/pull/61235) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
* Fix `test_merge_tree_load_parts/test.py::test_merge_tree_load_parts_corrupted` [#61236](https://github.com/ClickHouse/ClickHouse/pull/61236) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
* fix `forget_partition` test [#61237](https://github.com/ClickHouse/ClickHouse/pull/61237) ([Sergei Trifonov](https://github.com/serxa)).
* Print more info in `02572_system_logs_materialized_views_ignore_errors` to debug [#61246](https://github.com/ClickHouse/ClickHouse/pull/61246) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
* Fix runtime error in AST Fuzzer [#61248](https://github.com/ClickHouse/ClickHouse/pull/61248) ([Yarik Briukhovetskyi](https://github.com/yariks5s)).
* Add retries to `02908_many_requests_to_system_replicas` [#61253](https://github.com/ClickHouse/ClickHouse/pull/61253) ([Nikita Taranov](https://github.com/nickitat)).
* Followup fix ASTRenameQuery::clone [#61254](https://github.com/ClickHouse/ClickHouse/pull/61254) ([vdimir](https://github.com/vdimir)).
* Disable test 02998_primary_key_skip_columns.sql in sanitizer builds as it can be slow [#61256](https://github.com/ClickHouse/ClickHouse/pull/61256) ([Kruglov Pavel](https://github.com/Avogar)).
* Update curl to curl with data race fix [#61264](https://github.com/ClickHouse/ClickHouse/pull/61264) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)).
* Fix `01417_freeze_partition_verbose` [#61266](https://github.com/ClickHouse/ClickHouse/pull/61266) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Free memory earlier in inserts [#61267](https://github.com/ClickHouse/ClickHouse/pull/61267) ([Anton Popov](https://github.com/CurtizJ)).
* Fixing test_build_sets_from_multiple_threads/test.py::test_set [#61286](https://github.com/ClickHouse/ClickHouse/pull/61286) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Analyzer: Fix virtual columns in StorageMerge [#61298](https://github.com/ClickHouse/ClickHouse/pull/61298) ([Dmitry Novik](https://github.com/novikd)).
* Fix 01952_optimize_distributed_group_by_sharding_key with analyzer. [#61301](https://github.com/ClickHouse/ClickHouse/pull/61301) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* fix data race in poco tcp server [#61309](https://github.com/ClickHouse/ClickHouse/pull/61309) ([Sema Checherinda](https://github.com/CheSema)).
* Don't use default cluster in test test_distibuted_settings [#61314](https://github.com/ClickHouse/ClickHouse/pull/61314) ([Kruglov Pavel](https://github.com/Avogar)).
* Fix false positive assertion in cache [#61319](https://github.com/ClickHouse/ClickHouse/pull/61319) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Fix test test_input_format_parallel_parsing_memory_tracking [#61322](https://github.com/ClickHouse/ClickHouse/pull/61322) ([Kruglov Pavel](https://github.com/Avogar)).
* Fix 01761_cast_to_enum_nullable with analyzer. [#61323](https://github.com/ClickHouse/ClickHouse/pull/61323) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Add zookeeper retries for exists check in forcefullyRemoveBrokenOutdatedPartFromZooKeeper [#61324](https://github.com/ClickHouse/ClickHouse/pull/61324) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Minor changes in stress and fuzzer reports [#61333](https://github.com/ClickHouse/ClickHouse/pull/61333) ([Raúl Marín](https://github.com/Algunenano)).
* Un-flake `test_undrop_query` [#61348](https://github.com/ClickHouse/ClickHouse/pull/61348) ([Robert Schulze](https://github.com/rschu1ze)).
* Tiny improvement for replication.lib [#61361](https://github.com/ClickHouse/ClickHouse/pull/61361) ([alesapin](https://github.com/alesapin)).
* Fix bugfix check (due to "unknown object storage type: azure") [#61363](https://github.com/ClickHouse/ClickHouse/pull/61363) ([Azat Khuzhin](https://github.com/azat)).
* Fix `01599_multiline_input_and_singleline_comments` 3 minute wait [#61371](https://github.com/ClickHouse/ClickHouse/pull/61371) ([Sergei Trifonov](https://github.com/serxa)).
* Terminate EC2 on spot event if runner isn't running [#61377](https://github.com/ClickHouse/ClickHouse/pull/61377) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Try fix docs check [#61378](https://github.com/ClickHouse/ClickHouse/pull/61378) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Fix `heap-use-after-free` for Merge table with alias [#61380](https://github.com/ClickHouse/ClickHouse/pull/61380) ([Antonio Andelic](https://github.com/antonio2368)).
* Disable `optimize_rewrite_sum_if_to_count_if` if return type is nullable (new analyzer) [#61389](https://github.com/ClickHouse/ClickHouse/pull/61389) ([Antonio Andelic](https://github.com/antonio2368)).
* Analyzer: Fix planner context for subquery in StorageMerge [#61392](https://github.com/ClickHouse/ClickHouse/pull/61392) ([Dmitry Novik](https://github.com/novikd)).
* Fix `test_failed_async_inserts` [#61394](https://github.com/ClickHouse/ClickHouse/pull/61394) ([Nikolay Degterinsky](https://github.com/evillique)).
* Fix test test_system_clusters_actual_information flakiness [#61395](https://github.com/ClickHouse/ClickHouse/pull/61395) ([Kruglov Pavel](https://github.com/Avogar)).
* Remove default cluster from default config from test config [#61396](https://github.com/ClickHouse/ClickHouse/pull/61396) ([Raúl Marín](https://github.com/Algunenano)).
* Enable clang-tidy in headers [#61406](https://github.com/ClickHouse/ClickHouse/pull/61406) ([Robert Schulze](https://github.com/rschu1ze)).
* Add sanity check for poll_max_batch_size FileLog setting [#61408](https://github.com/ClickHouse/ClickHouse/pull/61408) ([Kruglov Pavel](https://github.com/Avogar)).
* ThreadFuzzer: randomize sleep time [#61410](https://github.com/ClickHouse/ClickHouse/pull/61410) ([Tomer Shafir](https://github.com/tomershafir)).
* Update version_date.tsv and changelogs after v23.8.11.28-lts [#61416](https://github.com/ClickHouse/ClickHouse/pull/61416) ([robot-clickhouse](https://github.com/robot-clickhouse)).
* Update version_date.tsv and changelogs after v23.3.21.26-lts [#61418](https://github.com/ClickHouse/ClickHouse/pull/61418) ([robot-clickhouse](https://github.com/robot-clickhouse)).
* Update version_date.tsv and changelogs after v24.1.7.18-stable [#61419](https://github.com/ClickHouse/ClickHouse/pull/61419) ([robot-clickhouse](https://github.com/robot-clickhouse)).
* Update version_date.tsv and changelogs after v24.2.2.71-stable [#61420](https://github.com/ClickHouse/ClickHouse/pull/61420) ([robot-clickhouse](https://github.com/robot-clickhouse)).
* Update version_date.tsv and changelogs after v23.12.5.81-stable [#61421](https://github.com/ClickHouse/ClickHouse/pull/61421) ([robot-clickhouse](https://github.com/robot-clickhouse)).
* Restore automerge for approved PRs [#61433](https://github.com/ClickHouse/ClickHouse/pull/61433) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Disable broken SonarCloud [#61434](https://github.com/ClickHouse/ClickHouse/pull/61434) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Fix `01599_multiline_input_and_singleline_comments` properly [#61440](https://github.com/ClickHouse/ClickHouse/pull/61440) ([Sergei Trifonov](https://github.com/serxa)).
* Convert test 02998_system_dns_cache_table to smoke and mirrors [#61443](https://github.com/ClickHouse/ClickHouse/pull/61443) ([vdimir](https://github.com/vdimir)).
* Check boundaries for some settings in parallel replicas [#61455](https://github.com/ClickHouse/ClickHouse/pull/61455) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
* Use SHARD_LOAD_QUEUE_BACKLOG for dictionaries in tests [#61462](https://github.com/ClickHouse/ClickHouse/pull/61462) ([vdimir](https://github.com/vdimir)).
* Split `02125_lz4_compression_bug` [#61465](https://github.com/ClickHouse/ClickHouse/pull/61465) ([Antonio Andelic](https://github.com/antonio2368)).
* Correctly process last stacktrace in `postprocess-traces.pl` [#61470](https://github.com/ClickHouse/ClickHouse/pull/61470) ([Antonio Andelic](https://github.com/antonio2368)).
* Fix test `test_polymorphic_parts` [#61477](https://github.com/ClickHouse/ClickHouse/pull/61477) ([Anton Popov](https://github.com/CurtizJ)).
* A definitive guide to CAST [#61491](https://github.com/ClickHouse/ClickHouse/pull/61491) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Minor rename in FileCache [#61494](https://github.com/ClickHouse/ClickHouse/pull/61494) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Remove useless code [#61498](https://github.com/ClickHouse/ClickHouse/pull/61498) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix fuzzers [#61499](https://github.com/ClickHouse/ClickHouse/pull/61499) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Update jdbc.md [#61506](https://github.com/ClickHouse/ClickHouse/pull/61506) ([San](https://github.com/santrancisco)).
* Fix error in clickhouse-client [#61507](https://github.com/ClickHouse/ClickHouse/pull/61507) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix clang-tidy build [#61519](https://github.com/ClickHouse/ClickHouse/pull/61519) ([Antonio Andelic](https://github.com/antonio2368)).
* Fix infinite loop in function `hop` [#61523](https://github.com/ClickHouse/ClickHouse/pull/61523) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Improve tests 00159_parallel_formatting_* to to avoid timeouts [#61532](https://github.com/ClickHouse/ClickHouse/pull/61532) ([Kruglov Pavel](https://github.com/Avogar)).
* Refactoring of reading from compact parts [#61535](https://github.com/ClickHouse/ClickHouse/pull/61535) ([Anton Popov](https://github.com/CurtizJ)).
* Don't run 01459_manual_write_to_replicas in debug build as it's too slow [#61538](https://github.com/ClickHouse/ClickHouse/pull/61538) ([Kruglov Pavel](https://github.com/Avogar)).
* CI: ARM integration test - skip hdfs, kerberos, kafka [#61542](https://github.com/ClickHouse/ClickHouse/pull/61542) ([Max K.](https://github.com/maxknv)).
* More logging for loading of tables [#61546](https://github.com/ClickHouse/ClickHouse/pull/61546) ([Sergei Trifonov](https://github.com/serxa)).
* Fixing 01584_distributed_buffer_cannot_find_column with analyzer. [#61550](https://github.com/ClickHouse/ClickHouse/pull/61550) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Wait for done mutation with more logs and asserts [#61554](https://github.com/ClickHouse/ClickHouse/pull/61554) ([alesapin](https://github.com/alesapin)).
* Fix read_rows count with external group by [#61555](https://github.com/ClickHouse/ClickHouse/pull/61555) ([Alexander Tokmakov](https://github.com/tavplubix)).
* queries-file should be used to specify file [#61557](https://github.com/ClickHouse/ClickHouse/pull/61557) ([danila-ermakov](https://github.com/danila-ermakov)).
* Fix `02481_async_insert_dedup_token` [#61568](https://github.com/ClickHouse/ClickHouse/pull/61568) ([Antonio Andelic](https://github.com/antonio2368)).
* Add a comment after [#61458](https://github.com/ClickHouse/ClickHouse/issues/61458) [#61580](https://github.com/ClickHouse/ClickHouse/pull/61580) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Fix clickhouse-test client option and CLICKHOUSE_URL_PARAMS interference [#61596](https://github.com/ClickHouse/ClickHouse/pull/61596) ([vdimir](https://github.com/vdimir)).
* CI: remove compose files from integration test docker [#61597](https://github.com/ClickHouse/ClickHouse/pull/61597) ([Max K.](https://github.com/maxknv)).
* Fix 01244_optimize_distributed_group_by_sharding_key by ordering output [#61602](https://github.com/ClickHouse/ClickHouse/pull/61602) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Remove some tests from analyzer_tech_debt [#61603](https://github.com/ClickHouse/ClickHouse/pull/61603) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Reduce header dependencies [#61604](https://github.com/ClickHouse/ClickHouse/pull/61604) ([Raúl Marín](https://github.com/Algunenano)).
* Remove some magic_enum from headers [#61606](https://github.com/ClickHouse/ClickHouse/pull/61606) ([Raúl Marín](https://github.com/Algunenano)).
* Fix configs for upgrade and bugfix [#61607](https://github.com/ClickHouse/ClickHouse/pull/61607) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Add tests for multiple fuzzer issues [#61614](https://github.com/ClickHouse/ClickHouse/pull/61614) ([Raúl Marín](https://github.com/Algunenano)).
* Try to fix `02908_many_requests_to_system_replicas` again [#61616](https://github.com/ClickHouse/ClickHouse/pull/61616) ([Nikita Taranov](https://github.com/nickitat)).
* Verbose error message about analyzer_compatibility_join_using_top_level_identifier [#61631](https://github.com/ClickHouse/ClickHouse/pull/61631) ([vdimir](https://github.com/vdimir)).
* Fix 00223_shard_distributed_aggregation_memory_efficient with analyzer [#61649](https://github.com/ClickHouse/ClickHouse/pull/61649) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Better fuzzer logs [#61650](https://github.com/ClickHouse/ClickHouse/pull/61650) ([Raúl Marín](https://github.com/Algunenano)).
* Fix flaky `02122_parallel_formatting_Template` [#61651](https://github.com/ClickHouse/ClickHouse/pull/61651) ([Antonio Andelic](https://github.com/antonio2368)).
* Fix Aggregator when data is empty [#61654](https://github.com/ClickHouse/ClickHouse/pull/61654) ([Antonio Andelic](https://github.com/antonio2368)).
* Restore poco SUN files [#61655](https://github.com/ClickHouse/ClickHouse/pull/61655) ([Andy Fiddaman](https://github.com/citrus-it)).
* Another fix for `SumIfToCountIfPass` [#61656](https://github.com/ClickHouse/ClickHouse/pull/61656) ([Antonio Andelic](https://github.com/antonio2368)).
* Keeper: fix data race during snapshot destructor call [#61657](https://github.com/ClickHouse/ClickHouse/pull/61657) ([Antonio Andelic](https://github.com/antonio2368)).
* CI: integration tests: use runner as py module [#61658](https://github.com/ClickHouse/ClickHouse/pull/61658) ([Max K.](https://github.com/maxknv)).
* Fix logging of autoscaling lambda, add test for effective_capacity [#61662](https://github.com/ClickHouse/ClickHouse/pull/61662) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Small change in `DatabaseOnDisk::iterateMetadataFiles()` [#61664](https://github.com/ClickHouse/ClickHouse/pull/61664) ([Nikita Taranov](https://github.com/nickitat)).
* Build improvements by removing magic enum from header and apply some explicit template instantiation [#61665](https://github.com/ClickHouse/ClickHouse/pull/61665) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
* Update the dictionary for OSSFuzz [#61672](https://github.com/ClickHouse/ClickHouse/pull/61672) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
* Inhibit randomization in some tests and exclude some long tests from debug runs [#61676](https://github.com/ClickHouse/ClickHouse/pull/61676) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Add a test for [#61669](https://github.com/ClickHouse/ClickHouse/issues/61669) [#61678](https://github.com/ClickHouse/ClickHouse/pull/61678) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix use-of-uninitialized-value in HedgedConnections [#61679](https://github.com/ClickHouse/ClickHouse/pull/61679) ([Nikolay Degterinsky](https://github.com/evillique)).
* Remove clickhouse-diagnostics from the package [#61681](https://github.com/ClickHouse/ClickHouse/pull/61681) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix use-of-uninitialized-value in parseDateTimeBestEffort [#61694](https://github.com/ClickHouse/ClickHouse/pull/61694) ([Nikolay Degterinsky](https://github.com/evillique)).
* poco foundation: add illumos support [#61701](https://github.com/ClickHouse/ClickHouse/pull/61701) ([Andy Fiddaman](https://github.com/citrus-it)).
* contrib/c-ares: add illumos as a platform [#61702](https://github.com/ClickHouse/ClickHouse/pull/61702) ([Andy Fiddaman](https://github.com/citrus-it)).
* contrib/curl: Add illumos support [#61704](https://github.com/ClickHouse/ClickHouse/pull/61704) ([Andy Fiddaman](https://github.com/citrus-it)).
* Fuzzer: Try a different way to wait for the server [#61706](https://github.com/ClickHouse/ClickHouse/pull/61706) ([Raúl Marín](https://github.com/Algunenano)).
* Disable some tests for SMT [#61708](https://github.com/ClickHouse/ClickHouse/pull/61708) ([Raúl Marín](https://github.com/Algunenano)).
* Fix signal handler for sanitizer signals [#61709](https://github.com/ClickHouse/ClickHouse/pull/61709) ([Antonio Andelic](https://github.com/antonio2368)).
* Avoid `IsADirectoryError: Is a directory contrib/azure` [#61710](https://github.com/ClickHouse/ClickHouse/pull/61710) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Analyzer: fix group_by_use_nulls [#61717](https://github.com/ClickHouse/ClickHouse/pull/61717) ([Dmitry Novik](https://github.com/novikd)).
* Analyzer: Clear list of broken integration tests [#61718](https://github.com/ClickHouse/ClickHouse/pull/61718) ([Dmitry Novik](https://github.com/novikd)).
* CI: modify CI from PR body [#61725](https://github.com/ClickHouse/ClickHouse/pull/61725) ([Max K.](https://github.com/maxknv)).
* Add test for [#57820](https://github.com/ClickHouse/ClickHouse/issues/57820) [#61726](https://github.com/ClickHouse/ClickHouse/pull/61726) ([Dmitry Novik](https://github.com/novikd)).
* Revert "Revert "Un-flake test_undrop_query"" [#61727](https://github.com/ClickHouse/ClickHouse/pull/61727) ([Yarik Briukhovetskyi](https://github.com/yariks5s)).
* FunctionsConversion: Start simplifying templates [#61733](https://github.com/ClickHouse/ClickHouse/pull/61733) ([Raúl Marín](https://github.com/Algunenano)).
* CI: modify it [#61735](https://github.com/ClickHouse/ClickHouse/pull/61735) ([Max K.](https://github.com/maxknv)).
* Fix segfault in SquashingTransform [#61736](https://github.com/ClickHouse/ClickHouse/pull/61736) ([Michael Kolupaev](https://github.com/al13n321)).
* Fix DWARF format failing to skip DW_FORM_strx3 attributes [#61737](https://github.com/ClickHouse/ClickHouse/pull/61737) ([Michael Kolupaev](https://github.com/al13n321)).
* There is no such thing as broken tests [#61739](https://github.com/ClickHouse/ClickHouse/pull/61739) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Process removed files, decouple _check_mime [#61751](https://github.com/ClickHouse/ClickHouse/pull/61751) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Keeper fix: destroy `KeeperDispatcher` first [#61752](https://github.com/ClickHouse/ClickHouse/pull/61752) ([Antonio Andelic](https://github.com/antonio2368)).
* Fix flaky `03014_async_with_dedup_part_log_rmt` [#61757](https://github.com/ClickHouse/ClickHouse/pull/61757) ([Antonio Andelic](https://github.com/antonio2368)).
* FunctionsConversion: Remove another batch of bad templates [#61773](https://github.com/ClickHouse/ClickHouse/pull/61773) ([Raúl Marín](https://github.com/Algunenano)).
* Revert "Fix bug when reading system.parts using UUID (issue 61220)." [#61774](https://github.com/ClickHouse/ClickHouse/pull/61774) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
* CI: disable grpc tests on ARM [#61778](https://github.com/ClickHouse/ClickHouse/pull/61778) ([Max K.](https://github.com/maxknv)).
* Fix more tests with virtual columns in StorageMerge. [#61787](https://github.com/ClickHouse/ClickHouse/pull/61787) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Remove already not flaky tests with analyzer. [#61788](https://github.com/ClickHouse/ClickHouse/pull/61788) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Analyzer: Fix assert in JOIN with Distributed table [#61789](https://github.com/ClickHouse/ClickHouse/pull/61789) ([vdimir](https://github.com/vdimir)).
* A test can be slow in debug build [#61796](https://github.com/ClickHouse/ClickHouse/pull/61796) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Updated clang-19 to master. [#61798](https://github.com/ClickHouse/ClickHouse/pull/61798) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix test "00002_log_and_exception_messages_formatting" [#61821](https://github.com/ClickHouse/ClickHouse/pull/61821) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* A test is too slow for debug [#61822](https://github.com/ClickHouse/ClickHouse/pull/61822) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Remove DataStreams [#61824](https://github.com/ClickHouse/ClickHouse/pull/61824) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Better message for logging errors [#61827](https://github.com/ClickHouse/ClickHouse/pull/61827) ([Azat Khuzhin](https://github.com/azat)).
* Fix sanitizers suppressions [#61828](https://github.com/ClickHouse/ClickHouse/pull/61828) ([Azat Khuzhin](https://github.com/azat)).
* Remove unused code [#61830](https://github.com/ClickHouse/ClickHouse/pull/61830) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Remove DataStreams (2) [#61831](https://github.com/ClickHouse/ClickHouse/pull/61831) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Update xxhash to v0.8.2 [#61838](https://github.com/ClickHouse/ClickHouse/pull/61838) ([Shubham Ranjan](https://github.com/shubhamranjan)).
* Fix: DISTINCT in subquery with analyzer [#61847](https://github.com/ClickHouse/ClickHouse/pull/61847) ([Igor Nikonov](https://github.com/devcrafter)).
* Analyzer: fix limit/offset on shards [#61849](https://github.com/ClickHouse/ClickHouse/pull/61849) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
* Remove PoolBase::AllocateNewBypassingPool [#61866](https://github.com/ClickHouse/ClickHouse/pull/61866) ([Azat Khuzhin](https://github.com/azat)).
* Try to fix 02901_parallel_replicas_rollup with analyzer. [#61875](https://github.com/ClickHouse/ClickHouse/pull/61875) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Add test for [#57808](https://github.com/ClickHouse/ClickHouse/issues/57808) [#61879](https://github.com/ClickHouse/ClickHouse/pull/61879) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
* CI: merge queue support [#61881](https://github.com/ClickHouse/ClickHouse/pull/61881) ([Max K.](https://github.com/maxknv)).
* Update create.sql [#61885](https://github.com/ClickHouse/ClickHouse/pull/61885) ([Kseniia Sumarokova](https://github.com/kssenii)).
* no smaller unit in date_trunc [#61888](https://github.com/ClickHouse/ClickHouse/pull/61888) ([jsc0218](https://github.com/jsc0218)).
* Move KQL trash where it is supposed to be [#61903](https://github.com/ClickHouse/ClickHouse/pull/61903) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Changelog for 24.3 [#61909](https://github.com/ClickHouse/ClickHouse/pull/61909) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Update version_date.tsv and changelogs after v23.3.22.3-lts [#61914](https://github.com/ClickHouse/ClickHouse/pull/61914) ([robot-clickhouse](https://github.com/robot-clickhouse)).
* Update version_date.tsv and changelogs after v23.8.12.13-lts [#61915](https://github.com/ClickHouse/ClickHouse/pull/61915) ([robot-clickhouse](https://github.com/robot-clickhouse)).
* No "please" [#61916](https://github.com/ClickHouse/ClickHouse/pull/61916) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Update version_date.tsv and changelogs after v23.12.6.19-stable [#61917](https://github.com/ClickHouse/ClickHouse/pull/61917) ([robot-clickhouse](https://github.com/robot-clickhouse)).
* Update version_date.tsv and changelogs after v24.1.8.22-stable [#61918](https://github.com/ClickHouse/ClickHouse/pull/61918) ([robot-clickhouse](https://github.com/robot-clickhouse)).
* Fix flaky test_broken_projestions/test.py::test_broken_ignored_replic… [#61932](https://github.com/ClickHouse/ClickHouse/pull/61932) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Check is Rust avaiable for build, if not, suggest a way to disable Rust support [#61938](https://github.com/ClickHouse/ClickHouse/pull/61938) ([Azat Khuzhin](https://github.com/azat)).
* CI: new ci menu in PR body [#61948](https://github.com/ClickHouse/ClickHouse/pull/61948) ([Max K.](https://github.com/maxknv)).
* Remove flaky test `01193_metadata_loading` [#61961](https://github.com/ClickHouse/ClickHouse/pull/61961) ([Nikita Taranov](https://github.com/nickitat)).
#### Packaging Improvement
* Adding the `--now` option to enable and start service automatically when installing the database server completely. [#60656](https://github.com/ClickHouse/ClickHouse/pull/60656) ([Chun-Sheng, Li](https://github.com/peter279k)).

View File

@ -55,9 +55,7 @@ To build using Homebrew's vanilla Clang compiler (the only **recommended** way):
cd ClickHouse cd ClickHouse
mkdir build mkdir build
export PATH=$(brew --prefix llvm)/bin:$PATH export PATH=$(brew --prefix llvm)/bin:$PATH
export CC=$(brew --prefix llvm)/bin/clang cmake -G Ninja -DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_C_COMPILER=$(brew --prefix llvm)/bin/clang -DCMAKE_CXX_COMPILER=$(brew --prefix llvm)/bin/clang++ -S . -B build
export CXX=$(brew --prefix llvm)/bin/clang++
cmake -G Ninja -DCMAKE_BUILD_TYPE=RelWithDebInfo -S . -B build
cmake --build build cmake --build build
# The resulting binary will be created at: build/programs/clickhouse # The resulting binary will be created at: build/programs/clickhouse
``` ```

View File

@ -2356,7 +2356,7 @@ You can select data from a ClickHouse table and save them into some file in the
$ clickhouse-client --query="SELECT * FROM {some_table} FORMAT Arrow" > {filename.arrow} $ clickhouse-client --query="SELECT * FROM {some_table} FORMAT Arrow" > {filename.arrow}
``` ```
### Arrow format settings {#parquet-format-settings} ### Arrow format settings {#arrow-format-settings}
- [output_format_arrow_low_cardinality_as_dictionary](/docs/en/operations/settings/settings-formats.md/#output_format_arrow_low_cardinality_as_dictionary) - enable output ClickHouse LowCardinality type as Dictionary Arrow type. Default value - `false`. - [output_format_arrow_low_cardinality_as_dictionary](/docs/en/operations/settings/settings-formats.md/#output_format_arrow_low_cardinality_as_dictionary) - enable output ClickHouse LowCardinality type as Dictionary Arrow type. Default value - `false`.
- [output_format_arrow_use_64_bit_indexes_for_dictionary](/docs/en/operations/settings/settings-formats.md/#output_format_arrow_use_64_bit_indexes_for_dictionary) - use 64-bit integer type for Dictionary indexes. Default value - `false`. - [output_format_arrow_use_64_bit_indexes_for_dictionary](/docs/en/operations/settings/settings-formats.md/#output_format_arrow_use_64_bit_indexes_for_dictionary) - use 64-bit integer type for Dictionary indexes. Default value - `false`.

View File

@ -945,9 +945,9 @@ Hard limit is configured via system tools
## database_atomic_delay_before_drop_table_sec {#database_atomic_delay_before_drop_table_sec} ## database_atomic_delay_before_drop_table_sec {#database_atomic_delay_before_drop_table_sec}
Sets the delay before remove table data in seconds. If the query has `SYNC` modifier, this setting is ignored. The delay during which a dropped table can be restored using the [UNDROP](/docs/en/sql-reference/statements/undrop.md) statement. If `DROP TABLE` ran with a `SYNC` modifier, the setting is ignored.
Default value: `480` (8 minute). Default value: `480` (8 minutes).
## database_catalog_unused_dir_hide_timeout_sec {#database_catalog_unused_dir_hide_timeout_sec} ## database_catalog_unused_dir_hide_timeout_sec {#database_catalog_unused_dir_hide_timeout_sec}

View File

@ -867,3 +867,31 @@ Default value: `Never`
Persists virtual column `_block_number` on merges. Persists virtual column `_block_number` on merges.
Default value: false. Default value: false.
## exclude_deleted_rows_for_part_size_in_merge {#exclude_deleted_rows_for_part_size_in_merge}
If enabled, estimated actual size of data parts (i.e., excluding those rows that have been deleted through `DELETE FROM`) will be used when selecting parts to merge. Note that this behavior is only triggered for data parts affected by `DELETE FROM` executed after this setting is enabled.
Possible values:
- true, false
Default value: false
**See Also**
- [load_existing_rows_count_for_old_parts](#load_existing_rows_count_for_old_parts) setting
## load_existing_rows_count_for_old_parts {#load_existing_rows_count_for_old_parts}
If enabled along with [exclude_deleted_rows_for_part_size_in_merge](#exclude_deleted_rows_for_part_size_in_merge), deleted rows count for existing data parts will be calculated during table starting up. Note that it may slow down start up table loading.
Possible values:
- true, false
Default value: false
**See Also**
- [exclude_deleted_rows_for_part_size_in_merge](#exclude_deleted_rows_for_part_size_in_merge) setting

View File

@ -1367,7 +1367,7 @@ Default value: `1'000'000`.
While importing data, when column is not found in schema default value will be used instead of error. While importing data, when column is not found in schema default value will be used instead of error.
Disabled by default. Enabled by default.
### input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference {#input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference} ### input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference {#input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference}

View File

@ -2817,6 +2817,17 @@ Possible values:
Default value: 0. Default value: 0.
## distributed_insert_skip_read_only_replicas {#distributed_insert_skip_read_only_replicas}
Enables skipping read-only replicas for INSERT queries into Distributed.
Possible values:
- 0 — INSERT was as usual, if it will go to read-only replica it will fail
- 1 — Initiator will skip read-only replicas before sending data to shards.
Default value: `0`
## distributed_foreground_insert {#distributed_foreground_insert} ## distributed_foreground_insert {#distributed_foreground_insert}
Enables or disables synchronous data insertion into a [Distributed](../../engines/table-engines/special/distributed.md/#distributed) table. Enables or disables synchronous data insertion into a [Distributed](../../engines/table-engines/special/distributed.md/#distributed) table.

View File

@ -128,9 +128,9 @@ Returns the part of the domain that includes top-level subdomains up to the “f
For example: For example:
- `cutToFirstSignificantSubdomain('https://news.clickhouse.com.tr/') = 'clickhouse.com.tr'`. - `cutToFirstSignificantSubdomainWithWWW('https://news.clickhouse.com.tr/') = 'clickhouse.com.tr'`.
- `cutToFirstSignificantSubdomain('www.tr') = 'www.tr'`. - `cutToFirstSignificantSubdomainWithWWW('www.tr') = 'www.tr'`.
- `cutToFirstSignificantSubdomain('tr') = ''`. - `cutToFirstSignificantSubdomainWithWWW('tr') = ''`.
### cutToFirstSignificantSubdomainCustom ### cutToFirstSignificantSubdomainCustom

View File

@ -13,13 +13,6 @@ a system table called `system.dropped_tables`.
If you have a materialized view without a `TO` clause associated with the dropped table, then you will also have to UNDROP the inner table of that view. If you have a materialized view without a `TO` clause associated with the dropped table, then you will also have to UNDROP the inner table of that view.
:::note
UNDROP TABLE is experimental. To use it add this setting:
```sql
set allow_experimental_undrop_table_query = 1;
```
:::
:::tip :::tip
Also see [DROP TABLE](/docs/en/sql-reference/statements/drop.md) Also see [DROP TABLE](/docs/en/sql-reference/statements/drop.md)
::: :::
@ -32,60 +25,53 @@ UNDROP TABLE [db.]name [UUID '<uuid>'] [ON CLUSTER cluster]
**Example** **Example**
``` sql
set allow_experimental_undrop_table_query = 1;
```
```sql ```sql
CREATE TABLE undropMe CREATE TABLE tab
( (
`id` UInt8 `id` UInt8
) )
ENGINE = MergeTree ENGINE = MergeTree
ORDER BY id ORDER BY id;
```
DROP TABLE tab;
```sql
DROP TABLE undropMe
```
```sql
SELECT * SELECT *
FROM system.dropped_tables FROM system.dropped_tables
FORMAT Vertical FORMAT Vertical;
``` ```
```response ```response
Row 1: Row 1:
────── ──────
index: 0 index: 0
database: default database: default
table: undropMe table: tab
uuid: aa696a1a-1d70-4e60-a841-4c80827706cc uuid: aa696a1a-1d70-4e60-a841-4c80827706cc
engine: MergeTree engine: MergeTree
metadata_dropped_path: /var/lib/clickhouse/metadata_dropped/default.undropMe.aa696a1a-1d70-4e60-a841-4c80827706cc.sql metadata_dropped_path: /var/lib/clickhouse/metadata_dropped/default.tab.aa696a1a-1d70-4e60-a841-4c80827706cc.sql
table_dropped_time: 2023-04-05 14:12:12 table_dropped_time: 2023-04-05 14:12:12
1 row in set. Elapsed: 0.001 sec. 1 row in set. Elapsed: 0.001 sec.
``` ```
```sql ```sql
UNDROP TABLE undropMe UNDROP TABLE tab;
```
```response
Ok.
```
```sql
SELECT * SELECT *
FROM system.dropped_tables FROM system.dropped_tables
FORMAT Vertical FORMAT Vertical;
```
```response ```response
Ok. Ok.
0 rows in set. Elapsed: 0.001 sec. 0 rows in set. Elapsed: 0.001 sec.
``` ```
```sql ```sql
DESCRIBE TABLE undropMe DESCRIBE TABLE tab
FORMAT Vertical FORMAT Vertical;
``` ```
```response ```response
Row 1: Row 1:
────── ──────

View File

@ -53,7 +53,7 @@ SELECT * FROM random;
└──────────────────────────────┴──────────────┴────────────────────────────────────────────────────────────────────┘ └──────────────────────────────┴──────────────┴────────────────────────────────────────────────────────────────────┘
``` ```
In combination with [generateRandomStructure](../../sql-reference/functions/other-functions.md#generateRandomStructure): In combination with [generateRandomStructure](../../sql-reference/functions/other-functions.md#generaterandomstructure):
```sql ```sql
SELECT * FROM generateRandom(generateRandomStructure(4, 101), 101) LIMIT 3; SELECT * FROM generateRandom(generateRandomStructure(4, 101), 101) LIMIT 3;

View File

@ -127,7 +127,7 @@ See the [deployment](docs/en/deployment-guides/terminology.md) documentation for
#### Verify that experimental transactions are enabled #### Verify that experimental transactions are enabled
Issue a `BEGIN TRANSACTION` followed by a `ROLLBACK` to verify that experimental transactions are enabled, and that ClickHouse Keeper is enabled as it is used to track transactions. Issue a `BEGIN TRANSACTION` or `START TRANSACTION` followed by a `ROLLBACK` to verify that experimental transactions are enabled, and that ClickHouse Keeper is enabled as it is used to track transactions.
```sql ```sql
BEGIN TRANSACTION BEGIN TRANSACTION

View File

@ -36,7 +36,7 @@ if [ "$1" = configure ] || [ -n "$not_deb_os" ]; then
fi fi
/bin/systemctl daemon-reload /bin/systemctl daemon-reload
/bin/systemctl enable --now clickhouse-server /bin/systemctl enable clickhouse-server
else else
# If you downgrading to version older than 1.1.54336 run: systemctl disable clickhouse-server # If you downgrading to version older than 1.1.54336 run: systemctl disable clickhouse-server
if [ -x "/etc/init.d/clickhouse-server" ]; then if [ -x "/etc/init.d/clickhouse-server" ]; then

View File

@ -99,6 +99,19 @@ function(add_rust_subdirectory src)
message(STATUS "Copy ${src} to ${dst}") message(STATUS "Copy ${src} to ${dst}")
file(COPY "${src}" DESTINATION "${CMAKE_CURRENT_BINARY_DIR}" file(COPY "${src}" DESTINATION "${CMAKE_CURRENT_BINARY_DIR}"
PATTERN target EXCLUDE) PATTERN target EXCLUDE)
# Check is Rust available or not.
#
# `cargo update --dry-run` will not update anything, but will check the internet connectivity.
execute_process(COMMAND ${Rust_CARGO_CACHED} update --dry-run
WORKING_DIRECTORY "${dst}"
RESULT_VARIABLE CARGO_UPDATE_RESULT
OUTPUT_VARIABLE CARGO_UPDATE_STDOUT
ERROR_VARIABLE CARGO_UPDATE_STDERR)
if (CARGO_UPDATE_RESULT)
message(FATAL_ERROR "Rust (${Rust_CARGO_CACHED}) support is not available (likely there is no internet connectivity):\n${CARGO_UPDATE_STDERR}\nYou can disable Rust support with -DENABLE_RUST=OFF")
endif()
add_subdirectory("${dst}" "${dst}") add_subdirectory("${dst}" "${dst}")
# cmake -E copy* do now know how to exclude files # cmake -E copy* do now know how to exclude files

View File

@ -777,7 +777,13 @@ struct IdentifierResolveScope
std::unordered_map<QueryTreeNodePtr, TableExpressionData> table_expression_node_to_data; std::unordered_map<QueryTreeNodePtr, TableExpressionData> table_expression_node_to_data;
QueryTreeNodePtrWithHashSet nullable_group_by_keys; QueryTreeNodePtrWithHashSet nullable_group_by_keys;
QueryTreeNodePtrWithHashMap<QueryTreeNodePtr> nullable_join_columns;
/** It's possible that after a JOIN, a column in the projection has a type different from the column in the source table.
* (For example, after join_use_nulls or USING column casted to supertype)
* However, the column in the projection still refers to the table as its source.
* This map is used to revert these columns back to their original columns in the source table.
*/
QueryTreeNodePtrWithHashMap<QueryTreeNodePtr> join_columns_with_changed_types;
/// Use identifier lookup to result cache /// Use identifier lookup to result cache
bool use_identifier_lookup_to_result_cache = true; bool use_identifier_lookup_to_result_cache = true;
@ -1308,7 +1314,8 @@ private:
if (!resolved_expression->getResultType()->equals(*new_result_type)) if (!resolved_expression->getResultType()->equals(*new_result_type))
resolved_expression = buildCastFunction(resolved_expression, new_result_type, scope.context, true); resolved_expression = buildCastFunction(resolved_expression, new_result_type, scope.context, true);
} }
scope.nullable_join_columns[nullable_resolved_identifier] = resolved_identifier; if (!nullable_resolved_identifier->isEqual(*resolved_identifier))
scope.join_columns_with_changed_types[nullable_resolved_identifier] = resolved_identifier;
return nullable_resolved_identifier; return nullable_resolved_identifier;
} }
return nullptr; return nullptr;
@ -1401,6 +1408,8 @@ private:
const NamesAndTypes & matched_columns, const NamesAndTypes & matched_columns,
const IdentifierResolveScope & scope); const IdentifierResolveScope & scope);
void updateMatchedColumnsFromJoinUsing(QueryTreeNodesWithNames & result_matched_column_nodes_with_names, const QueryTreeNodePtr & source_table_expression, IdentifierResolveScope & scope);
QueryTreeNodesWithNames resolveQualifiedMatcher(QueryTreeNodePtr & matcher_node, IdentifierResolveScope & scope); QueryTreeNodesWithNames resolveQualifiedMatcher(QueryTreeNodePtr & matcher_node, IdentifierResolveScope & scope);
QueryTreeNodesWithNames resolveUnqualifiedMatcher(QueryTreeNodePtr & matcher_node, IdentifierResolveScope & scope); QueryTreeNodesWithNames resolveUnqualifiedMatcher(QueryTreeNodePtr & matcher_node, IdentifierResolveScope & scope);
@ -2057,92 +2066,75 @@ void QueryAnalyzer::evaluateScalarSubqueryIfNeeded(QueryTreeNodePtr & node, Iden
io.pipeline.setProgressCallback(context->getProgressCallback()); io.pipeline.setProgressCallback(context->getProgressCallback());
io.pipeline.setProcessListElement(context->getProcessListElement()); io.pipeline.setProcessListElement(context->getProcessListElement());
if (only_analyze) Block block;
while (block.rows() == 0 && executor.pull(block))
{ {
/// If query is only analyzed, then constants are not correct. }
scalar_block = interpreter->getSampleBlock();
for (auto & column : scalar_block) if (block.rows() == 0)
{
auto types = interpreter->getSampleBlock().getDataTypes();
if (types.size() != 1)
types = {std::make_shared<DataTypeTuple>(types)};
auto & type = types[0];
if (!type->isNullable())
{ {
if (column.column->empty()) if (!type->canBeInsideNullable())
{ throw Exception(ErrorCodes::INCORRECT_RESULT_OF_SCALAR_SUBQUERY,
auto mut_col = column.column->cloneEmpty(); "Scalar subquery returned empty result of type {} which cannot be Nullable",
mut_col->insertDefault(); type->getName());
column.column = std::move(mut_col);
} type = makeNullable(type);
} }
auto scalar_column = type->createColumn();
scalar_column->insert(Null());
scalar_block.insert({std::move(scalar_column), type, "null"});
} }
else else
{ {
Block block; if (block.rows() != 1)
throw Exception(ErrorCodes::INCORRECT_RESULT_OF_SCALAR_SUBQUERY, "Scalar subquery returned more than one row");
while (block.rows() == 0 && executor.pull(block)) Block tmp_block;
while (tmp_block.rows() == 0 && executor.pull(tmp_block))
{ {
} }
if (block.rows() == 0) if (tmp_block.rows() != 0)
throw Exception(ErrorCodes::INCORRECT_RESULT_OF_SCALAR_SUBQUERY, "Scalar subquery returned more than one row");
block = materializeBlock(block);
size_t columns = block.columns();
if (columns == 1)
{ {
auto types = interpreter->getSampleBlock().getDataTypes(); auto & column = block.getByPosition(0);
if (types.size() != 1) /// Here we wrap type to nullable if we can.
types = {std::make_shared<DataTypeTuple>(types)}; /// It is needed cause if subquery return no rows, it's result will be Null.
/// In case of many columns, do not check it cause tuple can't be nullable.
auto & type = types[0]; if (!column.type->isNullable() && column.type->canBeInsideNullable())
if (!type->isNullable())
{ {
if (!type->canBeInsideNullable()) column.type = makeNullable(column.type);
throw Exception(ErrorCodes::INCORRECT_RESULT_OF_SCALAR_SUBQUERY, column.column = makeNullable(column.column);
"Scalar subquery returned empty result of type {} which cannot be Nullable",
type->getName());
type = makeNullable(type);
} }
auto scalar_column = type->createColumn(); scalar_block = block;
scalar_column->insert(Null());
scalar_block.insert({std::move(scalar_column), type, "null"});
} }
else else
{ {
if (block.rows() != 1) /** Make unique column names for tuple.
throw Exception(ErrorCodes::INCORRECT_RESULT_OF_SCALAR_SUBQUERY, "Scalar subquery returned more than one row"); *
* Example: SELECT (SELECT 2 AS x, x)
*/
makeUniqueColumnNamesInBlock(block);
Block tmp_block; scalar_block.insert({
while (tmp_block.rows() == 0 && executor.pull(tmp_block)) ColumnTuple::create(block.getColumns()),
{ std::make_shared<DataTypeTuple>(block.getDataTypes(), block.getNames()),
} "tuple"});
if (tmp_block.rows() != 0)
throw Exception(ErrorCodes::INCORRECT_RESULT_OF_SCALAR_SUBQUERY, "Scalar subquery returned more than one row");
block = materializeBlock(block);
size_t columns = block.columns();
if (columns == 1)
{
auto & column = block.getByPosition(0);
/// Here we wrap type to nullable if we can.
/// It is needed cause if subquery return no rows, it's result will be Null.
/// In case of many columns, do not check it cause tuple can't be nullable.
if (!column.type->isNullable() && column.type->canBeInsideNullable())
{
column.type = makeNullable(column.type);
column.column = makeNullable(column.column);
}
scalar_block = block;
}
else
{
/** Make unique column names for tuple.
*
* Example: SELECT (SELECT 2 AS x, x)
*/
makeUniqueColumnNamesInBlock(block);
scalar_block.insert({
ColumnTuple::create(block.getColumns()),
std::make_shared<DataTypeTuple>(block.getDataTypes(), block.getNames()),
"tuple"});
}
} }
} }
@ -2168,10 +2160,13 @@ void QueryAnalyzer::evaluateScalarSubqueryIfNeeded(QueryTreeNodePtr & node, Iden
!nearest_query_scope) !nearest_query_scope)
{ {
auto constant_value = std::make_shared<ConstantValue>(std::move(scalar_value), scalar_type); auto constant_value = std::make_shared<ConstantValue>(std::move(scalar_value), scalar_type);
auto constant_node = std::make_shared<ConstantNode>(std::move(constant_value), node); auto constant_node = std::make_shared<ConstantNode>(constant_value, node);
if (constant_node->getValue().isNull()) if (constant_node->getValue().isNull())
{
node = buildCastFunction(constant_node, constant_node->getResultType(), context); node = buildCastFunction(constant_node, constant_node->getResultType(), context);
node = std::make_shared<ConstantNode>(std::move(constant_value), node);
}
else else
node = std::move(constant_node); node = std::move(constant_node);
@ -3309,6 +3304,78 @@ QueryTreeNodePtr checkIsMissedObjectJSONSubcolumn(const QueryTreeNodePtr & left_
return {}; return {};
} }
/// Used to replace columns that changed type because of JOIN to their original type
class ReplaceColumnsVisitor : public InDepthQueryTreeVisitor<ReplaceColumnsVisitor>
{
public:
explicit ReplaceColumnsVisitor(const QueryTreeNodePtrWithHashMap<QueryTreeNodePtr> & replacement_map_, const ContextPtr & context_)
: replacement_map(replacement_map_)
, context(context_)
{}
/// Apply replacement transitively, because column may change it's type twice, one to have a supertype and then because of `joun_use_nulls`
static QueryTreeNodePtr findTransitiveReplacement(QueryTreeNodePtr node, const QueryTreeNodePtrWithHashMap<QueryTreeNodePtr> & replacement_map_)
{
auto it = replacement_map_.find(node);
QueryTreeNodePtr result_node = nullptr;
for (; it != replacement_map_.end(); it = replacement_map_.find(result_node))
{
if (result_node && result_node->isEqual(*it->second))
{
Strings map_dump;
for (const auto & [k, v]: replacement_map_)
map_dump.push_back(fmt::format("{} -> {} (is_equals: {}, is_same: {})",
k.node->dumpTree(), v->dumpTree(), k.node->isEqual(*v), k.node == v));
throw Exception(ErrorCodes::LOGICAL_ERROR, "Infinite loop in query tree replacement map: {}", fmt::join(map_dump, "; "));
}
chassert(it->second);
result_node = it->second;
}
return result_node;
}
void visitImpl(QueryTreeNodePtr & node)
{
if (auto replacement_node = findTransitiveReplacement(node, replacement_map))
node = replacement_node;
if (auto * function_node = node->as<FunctionNode>(); function_node && function_node->isResolved())
rerunFunctionResolve(function_node, context);
}
/// We want to re-run resolve for function _after_ its arguments are replaced
bool shouldTraverseTopToBottom() const { return false; }
bool needChildVisit(QueryTreeNodePtr & /* parent */, QueryTreeNodePtr & child)
{
/// Visit only expressions, but not subqueries
return child->getNodeType() == QueryTreeNodeType::IDENTIFIER
|| child->getNodeType() == QueryTreeNodeType::LIST
|| child->getNodeType() == QueryTreeNodeType::FUNCTION
|| child->getNodeType() == QueryTreeNodeType::COLUMN;
}
private:
const QueryTreeNodePtrWithHashMap<QueryTreeNodePtr> & replacement_map;
const ContextPtr & context;
};
/// Compare resolved identifiers considering columns that become nullable after JOIN
bool resolvedIdenfiersFromJoinAreEquals(
const QueryTreeNodePtr & left_resolved_identifier,
const QueryTreeNodePtr & right_resolved_identifier,
const IdentifierResolveScope & scope)
{
auto left_original_node = ReplaceColumnsVisitor::findTransitiveReplacement(left_resolved_identifier, scope.join_columns_with_changed_types);
const auto & left_resolved_to_compare = left_original_node ? left_original_node : left_resolved_identifier;
auto right_original_node = ReplaceColumnsVisitor::findTransitiveReplacement(right_resolved_identifier, scope.join_columns_with_changed_types);
const auto & right_resolved_to_compare = right_original_node ? right_original_node : right_resolved_identifier;
return left_resolved_to_compare->isEqual(*right_resolved_to_compare, IQueryTreeNode::CompareOptions{.compare_aliases = false});
}
QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromJoin(const IdentifierLookup & identifier_lookup, QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromJoin(const IdentifierLookup & identifier_lookup,
const QueryTreeNodePtr & table_expression_node, const QueryTreeNodePtr & table_expression_node,
IdentifierResolveScope & scope) IdentifierResolveScope & scope)
@ -3443,9 +3510,13 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromJoin(const IdentifierLoo
auto & result_column = result_column_node->as<ColumnNode &>(); auto & result_column = result_column_node->as<ColumnNode &>();
result_column.setColumnType(using_column_node.getColumnType()); result_column.setColumnType(using_column_node.getColumnType());
const auto & join_using_left_column = using_expression_list.getNodes().at(0);
if (!result_column_node->isEqual(*join_using_left_column))
scope.join_columns_with_changed_types[result_column_node] = join_using_left_column;
resolved_identifier = std::move(result_column_node); resolved_identifier = std::move(result_column_node);
} }
else if (left_resolved_identifier->isEqual(*right_resolved_identifier, IQueryTreeNode::CompareOptions{.compare_aliases = false})) else if (resolvedIdenfiersFromJoinAreEquals(left_resolved_identifier, right_resolved_identifier, scope))
{ {
const auto & identifier_path_part = identifier_lookup.identifier.front(); const auto & identifier_path_part = identifier_lookup.identifier.front();
auto * left_resolved_identifier_column = left_resolved_identifier->as<ColumnNode>(); auto * left_resolved_identifier_column = left_resolved_identifier->as<ColumnNode>();
@ -3521,6 +3592,9 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromJoin(const IdentifierLoo
auto left_resolved_column_clone = std::static_pointer_cast<ColumnNode>(left_resolved_column.clone()); auto left_resolved_column_clone = std::static_pointer_cast<ColumnNode>(left_resolved_column.clone());
left_resolved_column_clone->setColumnType(using_column_node_it->second->getColumnType()); left_resolved_column_clone->setColumnType(using_column_node_it->second->getColumnType());
resolved_identifier = std::move(left_resolved_column_clone); resolved_identifier = std::move(left_resolved_column_clone);
if (!resolved_identifier->isEqual(*using_column_node_it->second))
scope.join_columns_with_changed_types[resolved_identifier] = using_column_node_it->second;
} }
} }
} }
@ -3543,6 +3617,8 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromJoin(const IdentifierLoo
auto right_resolved_column_clone = std::static_pointer_cast<ColumnNode>(right_resolved_column.clone()); auto right_resolved_column_clone = std::static_pointer_cast<ColumnNode>(right_resolved_column.clone());
right_resolved_column_clone->setColumnType(using_column_node_it->second->getColumnType()); right_resolved_column_clone->setColumnType(using_column_node_it->second->getColumnType());
resolved_identifier = std::move(right_resolved_column_clone); resolved_identifier = std::move(right_resolved_column_clone);
if (!resolved_identifier->isEqual(*using_column_node_it->second))
scope.join_columns_with_changed_types[resolved_identifier] = using_column_node_it->second;
} }
} }
} }
@ -3552,9 +3628,17 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromJoin(const IdentifierLoo
if (scope.join_use_nulls) if (scope.join_use_nulls)
{ {
auto projection_name_it = node_to_projection_name.find(resolved_identifier);
auto nullable_resolved_identifier = convertJoinedColumnTypeToNullIfNeeded(resolved_identifier, join_kind, resolved_side, scope); auto nullable_resolved_identifier = convertJoinedColumnTypeToNullIfNeeded(resolved_identifier, join_kind, resolved_side, scope);
if (nullable_resolved_identifier) if (nullable_resolved_identifier)
{
resolved_identifier = nullable_resolved_identifier; resolved_identifier = nullable_resolved_identifier;
/// Set the same projection name for new nullable node
if (projection_name_it != node_to_projection_name.end())
{
node_to_projection_name.emplace(resolved_identifier, projection_name_it->second);
}
}
} }
return resolved_identifier; return resolved_identifier;
@ -4213,6 +4297,95 @@ QueryAnalyzer::QueryTreeNodesWithNames QueryAnalyzer::getMatchedColumnNodesWithN
return matched_column_nodes_with_names; return matched_column_nodes_with_names;
} }
bool hasTableExpressionInJoinTree(const QueryTreeNodePtr & join_tree_node, const QueryTreeNodePtr & table_expression)
{
QueryTreeNodes nodes_to_process;
nodes_to_process.push_back(join_tree_node);
while (!nodes_to_process.empty())
{
auto node_to_process = std::move(nodes_to_process.back());
nodes_to_process.pop_back();
if (node_to_process == table_expression)
return true;
if (node_to_process->getNodeType() == QueryTreeNodeType::JOIN)
{
const auto & join_node = node_to_process->as<JoinNode &>();
nodes_to_process.push_back(join_node.getLeftTableExpression());
nodes_to_process.push_back(join_node.getRightTableExpression());
}
}
return false;
}
/// Columns that resolved from matcher can also match columns from JOIN USING.
/// In that case we update type to type of column in USING section.
/// TODO: It's not completely correct for qualified matchers, so t1.* should be resolved to left table column type.
/// But in planner we do not distinguish such cases.
void QueryAnalyzer::updateMatchedColumnsFromJoinUsing(
QueryTreeNodesWithNames & result_matched_column_nodes_with_names,
const QueryTreeNodePtr & source_table_expression,
IdentifierResolveScope & scope)
{
auto * nearest_query_scope = scope.getNearestQueryScope();
auto * nearest_query_scope_query_node = nearest_query_scope ? nearest_query_scope->scope_node->as<QueryNode>() : nullptr;
/// If there are no parent query scope or query scope does not have join tree
if (!nearest_query_scope_query_node || !nearest_query_scope_query_node->getJoinTree())
{
throw Exception(ErrorCodes::UNSUPPORTED_METHOD,
"There are no table sources. In scope {}",
scope.scope_node->formatASTForErrorMessage());
}
const auto & join_tree = nearest_query_scope_query_node->getJoinTree();
const auto * join_node = join_tree->as<JoinNode>();
if (join_node && join_node->isUsingJoinExpression())
{
const auto & join_using_list = join_node->getJoinExpression()->as<ListNode &>();
const auto & join_using_nodes = join_using_list.getNodes();
for (auto & [matched_column_node, _] : result_matched_column_nodes_with_names)
{
auto & matched_column_node_typed = matched_column_node->as<ColumnNode &>();
const auto & matched_column_name = matched_column_node_typed.getColumnName();
for (const auto & join_using_node : join_using_nodes)
{
auto & join_using_column_node = join_using_node->as<ColumnNode &>();
const auto & join_using_column_name = join_using_column_node.getColumnName();
if (matched_column_name != join_using_column_name)
continue;
const auto & join_using_column_nodes_list = join_using_column_node.getExpressionOrThrow()->as<ListNode &>();
const auto & join_using_column_nodes = join_using_column_nodes_list.getNodes();
auto it = node_to_projection_name.find(matched_column_node);
if (hasTableExpressionInJoinTree(join_node->getLeftTableExpression(), source_table_expression))
matched_column_node = join_using_column_nodes.at(0);
else if (hasTableExpressionInJoinTree(join_node->getRightTableExpression(), source_table_expression))
matched_column_node = join_using_column_nodes.at(1);
else
throw Exception(ErrorCodes::LOGICAL_ERROR,
"Cannot find column {} in JOIN USING section {}",
matched_column_node->dumpTree(), join_node->dumpTree());
matched_column_node = matched_column_node->clone();
if (it != node_to_projection_name.end())
node_to_projection_name.emplace(matched_column_node, it->second);
matched_column_node->as<ColumnNode &>().setColumnType(join_using_column_node.getResultType());
if (!matched_column_node->isEqual(*join_using_column_nodes.at(0)))
scope.join_columns_with_changed_types[matched_column_node] = join_using_column_nodes.at(0);
}
}
}
}
/** Resolve qualified tree matcher. /** Resolve qualified tree matcher.
* *
* First try to match qualified identifier to expression. If qualified identifier matched expression node then * First try to match qualified identifier to expression. If qualified identifier matched expression node then
@ -4330,6 +4503,8 @@ QueryAnalyzer::QueryTreeNodesWithNames QueryAnalyzer::resolveQualifiedMatcher(Qu
matched_columns, matched_columns,
scope); scope);
updateMatchedColumnsFromJoinUsing(result_matched_column_nodes_with_names, table_expression_node, scope);
return result_matched_column_nodes_with_names; return result_matched_column_nodes_with_names;
} }
@ -4465,6 +4640,8 @@ QueryAnalyzer::QueryTreeNodesWithNames QueryAnalyzer::resolveUnqualifiedMatcher(
matched_column_node = matched_column_node->clone(); matched_column_node = matched_column_node->clone();
matched_column_node->as<ColumnNode &>().setColumnType(join_using_column_node.getResultType()); matched_column_node->as<ColumnNode &>().setColumnType(join_using_column_node.getResultType());
if (!matched_column_node->isEqual(*join_using_column_nodes.at(0)))
scope.join_columns_with_changed_types[matched_column_node] = join_using_column_nodes.at(0);
table_expression_column_names_to_skip.insert(join_using_column_name); table_expression_column_names_to_skip.insert(join_using_column_name);
matched_expression_nodes_with_column_names.emplace_back(std::move(matched_column_node), join_using_column_name); matched_expression_nodes_with_column_names.emplace_back(std::move(matched_column_node), join_using_column_name);
@ -4584,7 +4761,9 @@ ProjectionNames QueryAnalyzer::resolveMatcher(QueryTreeNodePtr & matcher_node, I
node = nullable_node; node = nullable_node;
/// Set the same projection name for new nullable node /// Set the same projection name for new nullable node
if (projection_name_it != node_to_projection_name.end()) if (projection_name_it != node_to_projection_name.end())
{
node_to_projection_name.emplace(node, projection_name_it->second); node_to_projection_name.emplace(node, projection_name_it->second);
}
} }
} }
} }
@ -7164,7 +7343,7 @@ void QueryAnalyzer::resolveTableFunction(QueryTreeNodePtr & table_function_node,
ColumnDescription column = insert_columns.get(*insert_column_name_it); ColumnDescription column = insert_columns.get(*insert_column_name_it);
/// Change ephemeral columns to default columns. /// Change ephemeral columns to default columns.
column.default_desc.kind = ColumnDefaultKind::Default; column.default_desc.kind = ColumnDefaultKind::Default;
structure_hint.add(insert_columns.get(*insert_column_name_it)); structure_hint.add(std::move(column));
} }
} }
@ -7609,29 +7788,6 @@ void QueryAnalyzer::resolveQueryJoinTreeNode(QueryTreeNodePtr & join_tree_node,
scope.table_expressions_in_resolve_process.erase(join_tree_node.get()); scope.table_expressions_in_resolve_process.erase(join_tree_node.get());
} }
class ReplaceColumnsVisitor : public InDepthQueryTreeVisitor<ReplaceColumnsVisitor>
{
public:
explicit ReplaceColumnsVisitor(const QueryTreeNodePtrWithHashMap<QueryTreeNodePtr> & replacement_map_, const ContextPtr & context_)
: replacement_map(replacement_map_)
, context(context_)
{}
void visitImpl(QueryTreeNodePtr & node)
{
if (auto it = replacement_map.find(node); it != replacement_map.end())
node = it->second;
if (auto * function_node = node->as<FunctionNode>())
rerunFunctionResolve(function_node, context);
}
bool shouldTraverseTopToBottom() const { return false; }
private:
const QueryTreeNodePtrWithHashMap<QueryTreeNodePtr> & replacement_map;
const ContextPtr & context;
};
/** Resolve query. /** Resolve query.
* This function modifies query node during resolve. It is caller responsibility to clone query node before resolve * This function modifies query node during resolve. It is caller responsibility to clone query node before resolve
* if it is needed for later use. * if it is needed for later use.
@ -7823,19 +7979,17 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier
{ {
resolveExpressionNode(prewhere_node, scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/); resolveExpressionNode(prewhere_node, scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/);
if (scope.join_use_nulls) /** Expressions in PREWHERE with JOIN should not change their type.
{ * Example: SELECT * FROM t1 JOIN t2 USING (a) PREWHERE a = 1
/** Expression in PREWHERE with JOIN should not be modified by join_use_nulls. * Column `a` in PREWHERE should be resolved from the left table
* Example: SELECT * FROM t1 JOIN t2 USING (id) PREWHERE b = 1 * and should not change its type to Nullable or to the supertype of `a` from t1 and t2.
* Column `a` should be resolved from table and should not change its type to Nullable. * Here's a more complicated example where the column is somewhere inside an expression:
* More complicated example when column is somewhere inside an expression: * SELECT a + 1 as b FROM t1 JOIN t2 USING (id) PREWHERE b = 1
* SELECT a + 1 as b FROM t1 JOIN t2 USING (id) PREWHERE b = 1 * The expression `a + 1 as b` in the projection and in PREWHERE should have different `a`.
* expression `a + 1 as b` in projection and in PREWHERE should have different `a`. */
*/ prewhere_node = prewhere_node->clone();
prewhere_node = prewhere_node->clone(); ReplaceColumnsVisitor replace_visitor(scope.join_columns_with_changed_types, scope.context);
ReplaceColumnsVisitor replace_visitor(scope.nullable_join_columns, scope.context); replace_visitor.visit(prewhere_node);
replace_visitor.visit(prewhere_node);
}
} }
if (query_node_typed.getWhere()) if (query_node_typed.getWhere())

View File

@ -33,6 +33,8 @@ namespace ErrorCodes
M(UInt64, shard_num) \ M(UInt64, shard_num) \
M(UInt64, replica_num) \ M(UInt64, replica_num) \
M(Bool, check_parts) \ M(Bool, check_parts) \
M(Bool, check_projection_parts) \
M(Bool, allow_backup_broken_projections) \
M(Bool, internal) \ M(Bool, internal) \
M(String, host_id) \ M(String, host_id) \
M(OptionalUUID, backup_uuid) M(OptionalUUID, backup_uuid)

View File

@ -65,6 +65,12 @@ struct BackupSettings
/// Check checksums of the data parts before writing them to a backup. /// Check checksums of the data parts before writing them to a backup.
bool check_parts = true; bool check_parts = true;
/// Check checksums of the projection data parts before writing them to a backup.
bool check_projection_parts = true;
/// Allow to create backup with broken projections.
bool allow_backup_broken_projections = false;
/// Internal, should not be specified by user. /// Internal, should not be specified by user.
/// Whether this backup is a part of a distributed backup created by BACKUP ON CLUSTER. /// Whether this backup is a part of a distributed backup created by BACKUP ON CLUSTER.
bool internal = false; bool internal = false;

View File

@ -70,6 +70,12 @@ void ConnectionEstablisher::run(ConnectionEstablisher::TryResult & result, std::
ProfileEvents::increment(ProfileEvents::DistributedConnectionUsable); ProfileEvents::increment(ProfileEvents::DistributedConnectionUsable);
result.is_usable = true; result.is_usable = true;
if (table_status_it->second.is_readonly)
{
result.is_readonly = true;
LOG_TRACE(log, "Table {}.{} is readonly on server {}", table_to_check->database, table_to_check->table, result.entry->getDescription());
}
const UInt64 max_allowed_delay = settings.max_replica_delay_for_distributed_queries; const UInt64 max_allowed_delay = settings.max_replica_delay_for_distributed_queries;
if (!max_allowed_delay) if (!max_allowed_delay)
{ {

View File

@ -52,7 +52,7 @@ IConnectionPool::Entry ConnectionPoolWithFailover::get(const ConnectionTimeouts
settings.distributed_replica_max_ignored_errors = 0; settings.distributed_replica_max_ignored_errors = 0;
settings.fallback_to_stale_replicas_for_distributed_queries = true; settings.fallback_to_stale_replicas_for_distributed_queries = true;
return get(timeouts, settings, true); return get(timeouts, settings, /* force_connected= */ true);
} }
IConnectionPool::Entry ConnectionPoolWithFailover::get(const ConnectionTimeouts & timeouts, IConnectionPool::Entry ConnectionPoolWithFailover::get(const ConnectionTimeouts & timeouts,
@ -65,7 +65,7 @@ IConnectionPool::Entry ConnectionPoolWithFailover::get(const ConnectionTimeouts
TryGetEntryFunc try_get_entry = [&](const NestedPoolPtr & pool, std::string & fail_message) TryGetEntryFunc try_get_entry = [&](const NestedPoolPtr & pool, std::string & fail_message)
{ {
return tryGetEntry(pool, timeouts, fail_message, settings, {}); return tryGetEntry(pool, timeouts, fail_message, settings);
}; };
const size_t offset = settings.load_balancing_first_offset % nested_pools.size(); const size_t offset = settings.load_balancing_first_offset % nested_pools.size();
@ -158,6 +158,21 @@ std::vector<ConnectionPoolWithFailover::TryResult> ConnectionPoolWithFailover::g
return getManyImpl(settings, pool_mode, try_get_entry, skip_unavailable_endpoints, priority_func); return getManyImpl(settings, pool_mode, try_get_entry, skip_unavailable_endpoints, priority_func);
} }
std::vector<ConnectionPoolWithFailover::TryResult> ConnectionPoolWithFailover::getManyCheckedForInsert(
const ConnectionTimeouts & timeouts,
const Settings & settings,
PoolMode pool_mode,
const QualifiedTableName & table_to_check)
{
TryGetEntryFunc try_get_entry = [&](const NestedPoolPtr & pool, std::string & fail_message)
{ return tryGetEntry(pool, timeouts, fail_message, settings, &table_to_check, /*async_callback=*/ {}); };
return getManyImpl(settings, pool_mode, try_get_entry,
/*skip_unavailable_endpoints=*/ std::nullopt,
/*priority_func=*/ {},
settings.distributed_insert_skip_read_only_replicas);
}
ConnectionPoolWithFailover::Base::GetPriorityFunc ConnectionPoolWithFailover::makeGetPriorityFunc(const Settings & settings) ConnectionPoolWithFailover::Base::GetPriorityFunc ConnectionPoolWithFailover::makeGetPriorityFunc(const Settings & settings)
{ {
const size_t offset = settings.load_balancing_first_offset % nested_pools.size(); const size_t offset = settings.load_balancing_first_offset % nested_pools.size();
@ -171,7 +186,8 @@ std::vector<ConnectionPoolWithFailover::TryResult> ConnectionPoolWithFailover::g
PoolMode pool_mode, PoolMode pool_mode,
const TryGetEntryFunc & try_get_entry, const TryGetEntryFunc & try_get_entry,
std::optional<bool> skip_unavailable_endpoints, std::optional<bool> skip_unavailable_endpoints,
GetPriorityForLoadBalancing::Func priority_func) GetPriorityForLoadBalancing::Func priority_func,
bool skip_read_only_replicas)
{ {
if (nested_pools.empty()) if (nested_pools.empty())
throw DB::Exception( throw DB::Exception(
@ -203,7 +219,7 @@ std::vector<ConnectionPoolWithFailover::TryResult> ConnectionPoolWithFailover::g
UInt64 max_ignored_errors = settings.distributed_replica_max_ignored_errors.value; UInt64 max_ignored_errors = settings.distributed_replica_max_ignored_errors.value;
bool fallback_to_stale_replicas = settings.fallback_to_stale_replicas_for_distributed_queries.value; bool fallback_to_stale_replicas = settings.fallback_to_stale_replicas_for_distributed_queries.value;
return Base::getMany(min_entries, max_entries, max_tries, max_ignored_errors, fallback_to_stale_replicas, try_get_entry, priority_func); return Base::getMany(min_entries, max_entries, max_tries, max_ignored_errors, fallback_to_stale_replicas, skip_read_only_replicas, try_get_entry, priority_func);
} }
ConnectionPoolWithFailover::TryResult ConnectionPoolWithFailover::TryResult

View File

@ -77,6 +77,12 @@ public:
AsyncCallback async_callback = {}, AsyncCallback async_callback = {},
std::optional<bool> skip_unavailable_endpoints = std::nullopt, std::optional<bool> skip_unavailable_endpoints = std::nullopt,
GetPriorityForLoadBalancing::Func priority_func = {}); GetPriorityForLoadBalancing::Func priority_func = {});
/// The same as getManyChecked(), but respects distributed_insert_skip_read_only_replicas setting.
std::vector<TryResult> getManyCheckedForInsert(
const ConnectionTimeouts & timeouts,
const Settings & settings,
PoolMode pool_mode,
const QualifiedTableName & table_to_check);
struct NestedPoolStatus struct NestedPoolStatus
{ {
@ -107,7 +113,8 @@ private:
PoolMode pool_mode, PoolMode pool_mode,
const TryGetEntryFunc & try_get_entry, const TryGetEntryFunc & try_get_entry,
std::optional<bool> skip_unavailable_endpoints = std::nullopt, std::optional<bool> skip_unavailable_endpoints = std::nullopt,
GetPriorityForLoadBalancing::Func priority_func = {}); GetPriorityForLoadBalancing::Func priority_func = {},
bool skip_read_only_replicas = false);
/// Try to get a connection from the pool and check that it is good. /// Try to get a connection from the pool and check that it is good.
/// If table_to_check is not null and the check is enabled in settings, check that replication delay /// If table_to_check is not null and the check is enabled in settings, check that replication delay

View File

@ -2,7 +2,6 @@
#include <AggregateFunctions/AggregateFunctionFactory.h> #include <AggregateFunctions/AggregateFunctionFactory.h>
#include <AggregateFunctions/Combinators/AggregateFunctionCombinatorFactory.h> #include <AggregateFunctions/Combinators/AggregateFunctionCombinatorFactory.h>
#include <Core/Settings.h>
#include <Columns/ColumnString.h> #include <Columns/ColumnString.h>
#include <Common/typeid_cast.h> #include <Common/typeid_cast.h>
#include <Common/Macros.h> #include <Common/Macros.h>

View File

@ -20,12 +20,12 @@ namespace DB
namespace ErrorCodes namespace ErrorCodes
{ {
extern const int ILLEGAL_COLUMN; extern const int ARGUMENT_OUT_OF_BOUND;
extern const int DUPLICATE_COLUMN; extern const int DUPLICATE_COLUMN;
extern const int EXPERIMENTAL_FEATURE_ERROR;
extern const int ILLEGAL_COLUMN;
extern const int NUMBER_OF_DIMENSIONS_MISMATCHED; extern const int NUMBER_OF_DIMENSIONS_MISMATCHED;
extern const int SIZES_OF_COLUMNS_DOESNT_MATCH; extern const int SIZES_OF_COLUMNS_DOESNT_MATCH;
extern const int ARGUMENT_OUT_OF_BOUND;
extern const int EXPERIMENTAL_FEATURE_ERROR;
} }
namespace namespace
@ -334,7 +334,18 @@ void ColumnObject::Subcolumn::insert(Field field, FieldInfo info)
if (type_changed || info.need_convert) if (type_changed || info.need_convert)
field = convertFieldToTypeOrThrow(field, *least_common_type.get()); field = convertFieldToTypeOrThrow(field, *least_common_type.get());
data.back()->insert(field); if (!data.back()->tryInsert(field))
{
/** Normalization of the field above is pretty complicated (it uses several FieldVisitors),
* so in the case of a bug, we may get mismatched types.
* The `IColumn::insert` method does not check the type of the inserted field, and it can lead to a segmentation fault.
* Therefore, we use the safer `tryInsert` method to get an exception instead of a segmentation fault.
*/
throw Exception(ErrorCodes::EXPERIMENTAL_FEATURE_ERROR,
"Cannot insert field {} to column {}",
field.dump(), data.back()->dumpStructure());
}
++num_rows; ++num_rows;
} }

View File

@ -460,6 +460,28 @@ Float32 ColumnVector<T>::getFloat32(size_t n [[maybe_unused]]) const
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot get the value of {} as Float32", TypeName<T>); throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot get the value of {} as Float32", TypeName<T>);
} }
template <typename T>
bool ColumnVector<T>::tryInsert(const DB::Field & x)
{
NearestFieldType<T> value;
if (!x.tryGet<NearestFieldType<T>>(value))
{
if constexpr (std::is_same_v<T, UInt8>)
{
/// It's also possible to insert boolean values into UInt8 column.
bool boolean_value;
if (x.tryGet<bool>(boolean_value))
{
data.push_back(static_cast<T>(boolean_value));
return true;
}
}
return false;
}
data.push_back(static_cast<T>(value));
return true;
}
template <typename T> template <typename T>
void ColumnVector<T>::insertRangeFrom(const IColumn & src, size_t start, size_t length) void ColumnVector<T>::insertRangeFrom(const IColumn & src, size_t start, size_t length)
{ {

View File

@ -224,14 +224,8 @@ public:
data.push_back(static_cast<T>(x.get<T>())); data.push_back(static_cast<T>(x.get<T>()));
} }
bool tryInsert(const DB::Field & x) override bool tryInsert(const DB::Field & x) override;
{
NearestFieldType<T> value;
if (!x.tryGet<NearestFieldType<T>>(value))
return false;
data.push_back(static_cast<T>(value));
return true;
}
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
ColumnPtr filter(const IColumn::Filter & filt, ssize_t result_size_hint) const override; ColumnPtr filter(const IColumn::Filter & filt, ssize_t result_size_hint) const override;

View File

@ -591,6 +591,7 @@
M(710, FAULT_INJECTED) \ M(710, FAULT_INJECTED) \
M(711, FILECACHE_ACCESS_DENIED) \ M(711, FILECACHE_ACCESS_DENIED) \
M(712, TOO_MANY_MATERIALIZED_VIEWS) \ M(712, TOO_MANY_MATERIALIZED_VIEWS) \
M(713, BROKEN_PROJECTION) \
M(714, UNEXPECTED_CLUSTER) \ M(714, UNEXPECTED_CLUSTER) \
M(715, CANNOT_DETECT_FORMAT) \ M(715, CANNOT_DETECT_FORMAT) \
M(716, CANNOT_FORGET_PARTITION) \ M(716, CANNOT_FORGET_PARTITION) \

View File

@ -1,11 +1,9 @@
#pragma once #pragma once
#include <condition_variable>
#include <mutex> #include <mutex>
#include <type_traits> #include <condition_variable>
#include <variant>
#include <boost/noncopyable.hpp>
#include <Poco/Timespan.h> #include <Poco/Timespan.h>
#include <boost/noncopyable.hpp>
#include <Common/logger_useful.h> #include <Common/logger_useful.h>
#include <Common/Exception.h> #include <Common/Exception.h>
@ -17,6 +15,14 @@ namespace ProfileEvents
extern const Event ConnectionPoolIsFullMicroseconds; extern const Event ConnectionPoolIsFullMicroseconds;
} }
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
}
}
/** A class from which you can inherit and get a pool of something. Used for database connection pools. /** A class from which you can inherit and get a pool of something. Used for database connection pools.
* Descendant class must provide a method for creating a new object to place in the pool. * Descendant class must provide a method for creating a new object to place in the pool.
*/ */
@ -29,22 +35,6 @@ public:
using ObjectPtr = std::shared_ptr<Object>; using ObjectPtr = std::shared_ptr<Object>;
using Ptr = std::shared_ptr<PoolBase<TObject>>; using Ptr = std::shared_ptr<PoolBase<TObject>>;
enum class BehaviourOnLimit
{
/**
* Default behaviour - when limit on pool size is reached, callers will wait until object will be returned back in pool.
*/
Wait,
/**
* If no free objects in pool - allocate a new object, but not store it in pool.
* This behaviour is needed when we simply don't want to waste time waiting or if we cannot guarantee that query could be processed using fixed amount of connections.
* For example, when we read from table on s3, one GetObject request corresponds to the whole FileSystemCache segment. This segments are shared between different
* reading tasks, so in general case connection could be taken from pool by one task and returned back by another one. And these tasks are processed completely independently.
*/
AllocateNewBypassingPool,
};
private: private:
/** The object with the flag, whether it is currently used. */ /** The object with the flag, whether it is currently used. */
@ -99,53 +89,37 @@ public:
Object & operator*() && = delete; Object & operator*() && = delete;
const Object & operator*() const && = delete; const Object & operator*() const && = delete;
Object * operator->() & { return castToObjectPtr(); } Object * operator->() & { return &*data->data.object; }
const Object * operator->() const & { return castToObjectPtr(); } const Object * operator->() const & { return &*data->data.object; }
Object & operator*() & { return *castToObjectPtr(); } Object & operator*() & { return *data->data.object; }
const Object & operator*() const & { return *castToObjectPtr(); } const Object & operator*() const & { return *data->data.object; }
/** /**
* Expire an object to make it reallocated later. * Expire an object to make it reallocated later.
*/ */
void expire() void expire()
{ {
if (data.index() == 1) data->data.is_expired = true;
std::get<1>(data)->data.is_expired = true;
} }
bool isNull() const { return data.index() == 0 ? !std::get<0>(data) : !std::get<1>(data); } bool isNull() const { return data == nullptr; }
PoolBase * getPool() const
{
if (!data)
throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Attempt to get pool from uninitialized entry");
return &data->data.pool;
}
private: private:
/** std::shared_ptr<PoolEntryHelper> data;
* Plain object will be stored instead of PoolEntryHelper if fallback was made in get() (see BehaviourOnLimit::AllocateNewBypassingPool).
*/
std::variant<ObjectPtr, std::shared_ptr<PoolEntryHelper>> data;
explicit Entry(ObjectPtr && object) : data(std::move(object)) { } explicit Entry(PooledObject & object) : data(std::make_shared<PoolEntryHelper>(object)) {}
explicit Entry(PooledObject & object) : data(std::make_shared<PoolEntryHelper>(object)) { }
auto castToObjectPtr() const
{
return std::visit(
[](const auto & ptr)
{
using T = std::decay_t<decltype(ptr)>;
if constexpr (std::is_same_v<ObjectPtr, T>)
return ptr.get();
else
return ptr->data.object.get();
},
data);
}
}; };
virtual ~PoolBase() = default; virtual ~PoolBase() = default;
/** Allocates the object. /** Allocates the object. Wait for free object in pool for 'timeout'. With 'timeout' < 0, the timeout is infinite. */
* If 'behaviour_on_limit' is Wait - wait for free object in pool for 'timeout'. With 'timeout' < 0, the timeout is infinite.
* If 'behaviour_on_limit' is AllocateNewBypassingPool and there is no free object - a new object will be created but not stored in the pool.
*/
Entry get(Poco::Timespan::TimeDiff timeout) Entry get(Poco::Timespan::TimeDiff timeout)
{ {
std::unique_lock lock(mutex); std::unique_lock lock(mutex);
@ -176,9 +150,6 @@ public:
return Entry(*items.back()); return Entry(*items.back());
} }
if (behaviour_on_limit == BehaviourOnLimit::AllocateNewBypassingPool)
return Entry(allocObject());
Stopwatch blocked; Stopwatch blocked;
if (timeout < 0) if (timeout < 0)
{ {
@ -213,8 +184,6 @@ private:
/** The maximum size of the pool. */ /** The maximum size of the pool. */
unsigned max_items; unsigned max_items;
BehaviourOnLimit behaviour_on_limit;
/** Pool. */ /** Pool. */
Objects items; Objects items;
@ -225,8 +194,8 @@ private:
protected: protected:
LoggerPtr log; LoggerPtr log;
PoolBase(unsigned max_items_, LoggerPtr log_, BehaviourOnLimit behaviour_on_limit_ = BehaviourOnLimit::Wait) PoolBase(unsigned max_items_, LoggerPtr log_)
: max_items(max_items_), behaviour_on_limit(behaviour_on_limit_), log(log_) : max_items(max_items_), log(log_)
{ {
items.reserve(max_items); items.reserve(max_items);
} }

View File

@ -30,6 +30,7 @@ namespace ProfileEvents
{ {
extern const Event DistributedConnectionFailTry; extern const Event DistributedConnectionFailTry;
extern const Event DistributedConnectionFailAtAll; extern const Event DistributedConnectionFailAtAll;
extern const Event DistributedConnectionSkipReadOnlyReplica;
} }
/// This class provides a pool with fault tolerance. It is used for pooling of connections to replicated DB. /// This class provides a pool with fault tolerance. It is used for pooling of connections to replicated DB.
@ -73,13 +74,7 @@ public:
{ {
TryResult() = default; TryResult() = default;
void reset() void reset() { *this = {}; }
{
entry = Entry();
is_usable = false;
is_up_to_date = false;
delay = 0;
}
Entry entry; /// use isNull() to check if connection is established Entry entry; /// use isNull() to check if connection is established
bool is_usable = false; /// if connection is established, then can be false only with table check bool is_usable = false; /// if connection is established, then can be false only with table check
@ -87,6 +82,7 @@ public:
bool is_up_to_date = false; /// If true, the entry is a connection to up-to-date replica bool is_up_to_date = false; /// If true, the entry is a connection to up-to-date replica
/// Depends on max_replica_delay_for_distributed_queries setting /// Depends on max_replica_delay_for_distributed_queries setting
UInt32 delay = 0; /// Helps choosing the "least stale" option when all replicas are stale. UInt32 delay = 0; /// Helps choosing the "least stale" option when all replicas are stale.
bool is_readonly = false; /// Table is in read-only mode, INSERT can ignore such replicas.
}; };
struct PoolState; struct PoolState;
@ -117,6 +113,7 @@ public:
size_t min_entries, size_t max_entries, size_t max_tries, size_t min_entries, size_t max_entries, size_t max_tries,
size_t max_ignored_errors, size_t max_ignored_errors,
bool fallback_to_stale_replicas, bool fallback_to_stale_replicas,
bool skip_read_only_replicas,
const TryGetEntryFunc & try_get_entry, const TryGetEntryFunc & try_get_entry,
const GetPriorityFunc & get_priority); const GetPriorityFunc & get_priority);
@ -205,8 +202,12 @@ PoolWithFailoverBase<TNestedPool>::get(size_t max_ignored_errors, bool fallback_
const TryGetEntryFunc & try_get_entry, const GetPriorityFunc & get_priority) const TryGetEntryFunc & try_get_entry, const GetPriorityFunc & get_priority)
{ {
std::vector<TryResult> results = getMany( std::vector<TryResult> results = getMany(
1 /* min entries */, 1 /* max entries */, 1 /* max tries */, /* min_entries= */ 1,
max_ignored_errors, fallback_to_stale_replicas, /* max_entries= */ 1,
/* max_tries= */ 1,
max_ignored_errors,
fallback_to_stale_replicas,
/* skip_read_only_replicas= */ false,
try_get_entry, get_priority); try_get_entry, get_priority);
if (results.empty() || results[0].entry.isNull()) if (results.empty() || results[0].entry.isNull())
throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR,
@ -220,6 +221,7 @@ PoolWithFailoverBase<TNestedPool>::getMany(
size_t min_entries, size_t max_entries, size_t max_tries, size_t min_entries, size_t max_entries, size_t max_tries,
size_t max_ignored_errors, size_t max_ignored_errors,
bool fallback_to_stale_replicas, bool fallback_to_stale_replicas,
bool skip_read_only_replicas,
const TryGetEntryFunc & try_get_entry, const TryGetEntryFunc & try_get_entry,
const GetPriorityFunc & get_priority) const GetPriorityFunc & get_priority)
{ {
@ -271,9 +273,14 @@ PoolWithFailoverBase<TNestedPool>::getMany(
++entries_count; ++entries_count;
if (result.is_usable) if (result.is_usable)
{ {
++usable_count; if (skip_read_only_replicas && result.is_readonly)
if (result.is_up_to_date) ProfileEvents::increment(ProfileEvents::DistributedConnectionSkipReadOnlyReplica);
++up_to_date_count; else
{
++usable_count;
if (result.is_up_to_date)
++up_to_date_count;
}
} }
} }
else else
@ -296,7 +303,7 @@ PoolWithFailoverBase<TNestedPool>::getMany(
throw DB::NetException(DB::ErrorCodes::ALL_CONNECTION_TRIES_FAILED, throw DB::NetException(DB::ErrorCodes::ALL_CONNECTION_TRIES_FAILED,
"All connection tries failed. Log: \n\n{}\n", fail_messages); "All connection tries failed. Log: \n\n{}\n", fail_messages);
std::erase_if(try_results, [](const TryResult & r) { return r.entry.isNull() || !r.is_usable; }); std::erase_if(try_results, [&](const TryResult & r) { return r.entry.isNull() || !r.is_usable || (skip_read_only_replicas && r.is_readonly); });
/// Sort so that preferred items are near the beginning. /// Sort so that preferred items are near the beginning.
std::stable_sort( std::stable_sort(

View File

@ -156,6 +156,7 @@
M(DistributedConnectionFailTry, "Total count when distributed connection fails with retry.") \ M(DistributedConnectionFailTry, "Total count when distributed connection fails with retry.") \
M(DistributedConnectionMissingTable, "Number of times we rejected a replica from a distributed query, because it did not contain a table needed for the query.") \ M(DistributedConnectionMissingTable, "Number of times we rejected a replica from a distributed query, because it did not contain a table needed for the query.") \
M(DistributedConnectionStaleReplica, "Number of times we rejected a replica from a distributed query, because some table needed for a query had replication lag higher than the configured threshold.") \ M(DistributedConnectionStaleReplica, "Number of times we rejected a replica from a distributed query, because some table needed for a query had replication lag higher than the configured threshold.") \
M(DistributedConnectionSkipReadOnlyReplica, "Number of replicas skipped during INSERT into Distributed table due to replicas being read-only") \
M(DistributedConnectionFailAtAll, "Total count when distributed connection fails after all retries finished.") \ M(DistributedConnectionFailAtAll, "Total count when distributed connection fails after all retries finished.") \
\ \
M(HedgedRequestsChangeReplica, "Total count when timeout for changing replica expired in hedged requests.") \ M(HedgedRequestsChangeReplica, "Total count when timeout for changing replica expired in hedged requests.") \

View File

@ -76,6 +76,9 @@ static constexpr auto DBMS_MIN_REVISION_WITH_SPARSE_SERIALIZATION = 54465;
static constexpr auto DBMS_MIN_REVISION_WITH_SSH_AUTHENTICATION = 54466; static constexpr auto DBMS_MIN_REVISION_WITH_SSH_AUTHENTICATION = 54466;
/// Send read-only flag for Replicated tables as well
static constexpr auto DBMS_MIN_REVISION_WITH_TABLE_READ_ONLY_CHECK = 54467;
/// Version of ClickHouse TCP protocol. /// Version of ClickHouse TCP protocol.
/// ///
/// Should be incremented manually on protocol changes. /// Should be incremented manually on protocol changes.
@ -83,6 +86,6 @@ static constexpr auto DBMS_MIN_REVISION_WITH_SSH_AUTHENTICATION = 54466;
/// NOTE: DBMS_TCP_PROTOCOL_VERSION has nothing common with VERSION_REVISION, /// NOTE: DBMS_TCP_PROTOCOL_VERSION has nothing common with VERSION_REVISION,
/// later is just a number for server version (one number instead of commit SHA) /// later is just a number for server version (one number instead of commit SHA)
/// for simplicity (sometimes it may be more convenient in some use cases). /// for simplicity (sometimes it may be more convenient in some use cases).
static constexpr auto DBMS_TCP_PROTOCOL_VERSION = 54466; static constexpr auto DBMS_TCP_PROTOCOL_VERSION = 54467;
} }

View File

@ -136,6 +136,7 @@ class IColumn;
M(Bool, stream_like_engine_allow_direct_select, false, "Allow direct SELECT query for Kafka, RabbitMQ, FileLog, Redis Streams, and NATS engines. In case there are attached materialized views, SELECT query is not allowed even if this setting is enabled.", 0) \ M(Bool, stream_like_engine_allow_direct_select, false, "Allow direct SELECT query for Kafka, RabbitMQ, FileLog, Redis Streams, and NATS engines. In case there are attached materialized views, SELECT query is not allowed even if this setting is enabled.", 0) \
M(String, stream_like_engine_insert_queue, "", "When stream like engine reads from multiple queues, user will need to select one queue to insert into when writing. Used by Redis Streams and NATS.", 0) \ M(String, stream_like_engine_insert_queue, "", "When stream like engine reads from multiple queues, user will need to select one queue to insert into when writing. Used by Redis Streams and NATS.", 0) \
\ \
M(Bool, distributed_insert_skip_read_only_replicas, false, "If true, INSERT into Distributed will skip read-only replicas.", 0) \
M(Bool, distributed_foreground_insert, false, "If setting is enabled, insert query into distributed waits until data are sent to all nodes in a cluster. \n\nEnables or disables synchronous data insertion into a `Distributed` table.\n\nBy default, when inserting data into a Distributed table, the ClickHouse server sends data to cluster nodes in the background. When `distributed_foreground_insert` = 1, the data is processed synchronously, and the `INSERT` operation succeeds only after all the data is saved on all shards (at least one replica for each shard if `internal_replication` is true).", 0) ALIAS(insert_distributed_sync) \ M(Bool, distributed_foreground_insert, false, "If setting is enabled, insert query into distributed waits until data are sent to all nodes in a cluster. \n\nEnables or disables synchronous data insertion into a `Distributed` table.\n\nBy default, when inserting data into a Distributed table, the ClickHouse server sends data to cluster nodes in the background. When `distributed_foreground_insert` = 1, the data is processed synchronously, and the `INSERT` operation succeeds only after all the data is saved on all shards (at least one replica for each shard if `internal_replication` is true).", 0) ALIAS(insert_distributed_sync) \
M(UInt64, distributed_background_insert_timeout, 0, "Timeout for insert query into distributed. Setting is used only with insert_distributed_sync enabled. Zero value means no timeout.", 0) ALIAS(insert_distributed_timeout) \ M(UInt64, distributed_background_insert_timeout, 0, "Timeout for insert query into distributed. Setting is used only with insert_distributed_sync enabled. Zero value means no timeout.", 0) ALIAS(insert_distributed_timeout) \
M(Milliseconds, distributed_background_insert_sleep_time_ms, 100, "Sleep time for background INSERTs into Distributed, in case of any errors delay grows exponentially.", 0) ALIAS(distributed_directory_monitor_sleep_time_ms) \ M(Milliseconds, distributed_background_insert_sleep_time_ms, 100, "Sleep time for background INSERTs into Distributed, in case of any errors delay grows exponentially.", 0) ALIAS(distributed_directory_monitor_sleep_time_ms) \
@ -380,7 +381,7 @@ class IColumn;
M(Float, opentelemetry_start_trace_probability, 0., "Probability to start an OpenTelemetry trace for an incoming query.", 0) \ M(Float, opentelemetry_start_trace_probability, 0., "Probability to start an OpenTelemetry trace for an incoming query.", 0) \
M(Bool, opentelemetry_trace_processors, false, "Collect OpenTelemetry spans for processors.", 0) \ M(Bool, opentelemetry_trace_processors, false, "Collect OpenTelemetry spans for processors.", 0) \
M(Bool, prefer_column_name_to_alias, false, "Prefer using column names instead of aliases if possible.", 0) \ M(Bool, prefer_column_name_to_alias, false, "Prefer using column names instead of aliases if possible.", 0) \
M(Bool, allow_experimental_analyzer, false, "Allow experimental analyzer", 0) \ M(Bool, allow_experimental_analyzer, true, "Allow experimental analyzer.", 0) \
M(Bool, analyzer_compatibility_join_using_top_level_identifier, false, "Force to resolve identifier in JOIN USING from projection (for example, in `SELECT a + 1 AS b FROM t1 JOIN t2 USING (b)` join will be performed by `t1.a + 1 = t2.b`, rather then `t1.b = t2.b`).", 0) \ M(Bool, analyzer_compatibility_join_using_top_level_identifier, false, "Force to resolve identifier in JOIN USING from projection (for example, in `SELECT a + 1 AS b FROM t1 JOIN t2 USING (b)` join will be performed by `t1.a + 1 = t2.b`, rather then `t1.b = t2.b`).", 0) \
M(Bool, prefer_global_in_and_join, false, "If enabled, all IN/JOIN operators will be rewritten as GLOBAL IN/JOIN. It's useful when the to-be-joined tables are only available on the initiator and we need to always scatter their data on-the-fly during distributed processing with the GLOBAL keyword. It's also useful to reduce the need to access the external sources joining external tables.", 0) \ M(Bool, prefer_global_in_and_join, false, "If enabled, all IN/JOIN operators will be rewritten as GLOBAL IN/JOIN. It's useful when the to-be-joined tables are only available on the initiator and we need to always scatter their data on-the-fly during distributed processing with the GLOBAL keyword. It's also useful to reduce the need to access the external sources joining external tables.", 0) \
M(Bool, enable_vertical_final, true, "If enable, remove duplicated rows during FINAL by marking rows as deleted and filtering them later instead of merging rows", 0) \ M(Bool, enable_vertical_final, true, "If enable, remove duplicated rows during FINAL by marking rows as deleted and filtering them later instead of merging rows", 0) \
@ -868,6 +869,8 @@ class IColumn;
M(Bool, use_variant_as_common_type, false, "Use Variant as a result type for if/multiIf in case when there is no common type for arguments", 0) \ M(Bool, use_variant_as_common_type, false, "Use Variant as a result type for if/multiIf in case when there is no common type for arguments", 0) \
M(Bool, enable_order_by_all, true, "Enable sorting expression ORDER BY ALL.", 0) \ M(Bool, enable_order_by_all, true, "Enable sorting expression ORDER BY ALL.", 0) \
M(Bool, traverse_shadow_remote_data_paths, false, "Traverse shadow directory when query system.remote_data_paths", 0) \ M(Bool, traverse_shadow_remote_data_paths, false, "Traverse shadow directory when query system.remote_data_paths", 0) \
M(Bool, geo_distance_returns_float64_on_float64_arguments, true, "If all four arguments to `geoDistance`, `greatCircleDistance`, `greatCircleAngle` functions are Float64, return Float64 and use double precision for internal calculations. In previous ClickHouse versions, the functions always returned Float32.", 0) \
M(Bool, allow_get_client_http_header, false, "Allow to use the function `getClientHTTPHeader` which lets to obtain a value of an the current HTTP request's header. It is not enabled by default for security reasons, because some headers, such as `Cookie`, could contain sensitive info. Note that the `X-ClickHouse-*` and `Authentication` headers are always restricted and cannot be obtained with this function.", 0) \
\ \
/** Experimental functions */ \ /** Experimental functions */ \
M(Bool, allow_experimental_materialized_postgresql_table, false, "Allows to use the MaterializedPostgreSQL table engine. Disabled by default, because this feature is experimental", 0) \ M(Bool, allow_experimental_materialized_postgresql_table, false, "Allows to use the MaterializedPostgreSQL table engine. Disabled by default, because this feature is experimental", 0) \
@ -902,7 +905,6 @@ class IColumn;
M(Int64, ignore_cold_parts_seconds, 0, "Only available in ClickHouse Cloud. Exclude new data parts from SELECT queries until they're either pre-warmed (see cache_populated_by_fetch) or this many seconds old. Only for Replicated-/SharedMergeTree.", 0) \ M(Int64, ignore_cold_parts_seconds, 0, "Only available in ClickHouse Cloud. Exclude new data parts from SELECT queries until they're either pre-warmed (see cache_populated_by_fetch) or this many seconds old. Only for Replicated-/SharedMergeTree.", 0) \
M(Int64, prefer_warmed_unmerged_parts_seconds, 0, "Only available in ClickHouse Cloud. If a merged part is less than this many seconds old and is not pre-warmed (see cache_populated_by_fetch), but all its source parts are available and pre-warmed, SELECT queries will read from those parts instead. Only for ReplicatedMergeTree. Note that this only checks whether CacheWarmer processed the part; if the part was fetched into cache by something else, it'll still be considered cold until CacheWarmer gets to it; if it was warmed, then evicted from cache, it'll still be considered warm.", 0) \ M(Int64, prefer_warmed_unmerged_parts_seconds, 0, "Only available in ClickHouse Cloud. If a merged part is less than this many seconds old and is not pre-warmed (see cache_populated_by_fetch), but all its source parts are available and pre-warmed, SELECT queries will read from those parts instead. Only for ReplicatedMergeTree. Note that this only checks whether CacheWarmer processed the part; if the part was fetched into cache by something else, it'll still be considered cold until CacheWarmer gets to it; if it was warmed, then evicted from cache, it'll still be considered warm.", 0) \
M(Bool, iceberg_engine_ignore_schema_evolution, false, "Ignore schema evolution in Iceberg table engine and read all data using latest schema saved on table creation. Note that it can lead to incorrect result", 0) \ M(Bool, iceberg_engine_ignore_schema_evolution, false, "Ignore schema evolution in Iceberg table engine and read all data using latest schema saved on table creation. Note that it can lead to incorrect result", 0) \
M(Bool, allow_get_client_http_header, false, "Allow to use the function `getClientHTTPHeader` which lets to obtain a value of an the current HTTP request's header. It is not enabled by default for security reasons, because some headers, such as `Cookie`, could contain sensitive info. Note that the `X-ClickHouse-*` and `Authentication` headers are always restricted and cannot be obtained with this function.", 0) \
// End of COMMON_SETTINGS // End of COMMON_SETTINGS
// Please add settings related to formats into the FORMAT_FACTORY_SETTINGS, move obsolete settings to OBSOLETE_SETTINGS and obsolete format settings to OBSOLETE_FORMAT_SETTINGS. // Please add settings related to formats into the FORMAT_FACTORY_SETTINGS, move obsolete settings to OBSOLETE_SETTINGS and obsolete format settings to OBSOLETE_FORMAT_SETTINGS.

View File

@ -101,10 +101,12 @@ static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> sett
{"filesystem_cache_reserve_space_wait_lock_timeout_milliseconds", 1000, 1000, "Wait time to lock cache for sapce reservation in filesystem cache"}, {"filesystem_cache_reserve_space_wait_lock_timeout_milliseconds", 1000, 1000, "Wait time to lock cache for sapce reservation in filesystem cache"},
{"max_parser_backtracks", 0, 1000000, "Limiting the complexity of parsing"}, {"max_parser_backtracks", 0, 1000000, "Limiting the complexity of parsing"},
{"analyzer_compatibility_join_using_top_level_identifier", false, false, "Force to resolve identifier in JOIN USING from projection"}, {"analyzer_compatibility_join_using_top_level_identifier", false, false, "Force to resolve identifier in JOIN USING from projection"},
{"distributed_insert_skip_read_only_replicas", false, false, "If true, INSERT into Distributed will skip read-only replicas"},
{"keeper_max_retries", 10, 10, "Max retries for general keeper operations"}, {"keeper_max_retries", 10, 10, "Max retries for general keeper operations"},
{"keeper_retry_initial_backoff_ms", 100, 100, "Initial backoff timeout for general keeper operations"}, {"keeper_retry_initial_backoff_ms", 100, 100, "Initial backoff timeout for general keeper operations"},
{"keeper_retry_max_backoff_ms", 5000, 5000, "Max backoff timeout for general keeper operations"}, {"keeper_retry_max_backoff_ms", 5000, 5000, "Max backoff timeout for general keeper operations"},
{"s3queue_allow_experimental_sharded_mode", false, false, "Enable experimental sharded mode of S3Queue table engine. It is experimental because it will be rewritten"}, {"s3queue_allow_experimental_sharded_mode", false, false, "Enable experimental sharded mode of S3Queue table engine. It is experimental because it will be rewritten"},
{"allow_experimental_analyzer", false, true, "Enable analyzer and planner by default."},
{"merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability", 0.0, 0.0, "For testing of `PartsSplitter` - split read ranges into intersecting and non intersecting every time you read from MergeTree with the specified probability."}, {"merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability", 0.0, 0.0, "For testing of `PartsSplitter` - split read ranges into intersecting and non intersecting every time you read from MergeTree with the specified probability."},
{"allow_get_client_http_header", false, false, "Introduced a new function."}, {"allow_get_client_http_header", false, false, "Introduced a new function."},
{"output_format_pretty_row_numbers", false, true, "It is better for usability."}, {"output_format_pretty_row_numbers", false, true, "It is better for usability."},
@ -115,6 +117,7 @@ static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> sett
{"output_format_parquet_compression_method", "lz4", "zstd", "Parquet/ORC/Arrow support many compression methods, including lz4 and zstd. ClickHouse supports each and every compression method. Some inferior tools, such as 'duckdb', lack support for the faster `lz4` compression method, that's why we set zstd by default."}, {"output_format_parquet_compression_method", "lz4", "zstd", "Parquet/ORC/Arrow support many compression methods, including lz4 and zstd. ClickHouse supports each and every compression method. Some inferior tools, such as 'duckdb', lack support for the faster `lz4` compression method, that's why we set zstd by default."},
{"output_format_orc_compression_method", "lz4", "zstd", "Parquet/ORC/Arrow support many compression methods, including lz4 and zstd. ClickHouse supports each and every compression method. Some inferior tools, such as 'duckdb', lack support for the faster `lz4` compression method, that's why we set zstd by default."}, {"output_format_orc_compression_method", "lz4", "zstd", "Parquet/ORC/Arrow support many compression methods, including lz4 and zstd. ClickHouse supports each and every compression method. Some inferior tools, such as 'duckdb', lack support for the faster `lz4` compression method, that's why we set zstd by default."},
{"output_format_pretty_highlight_digit_groups", false, true, "If enabled and if output is a terminal, highlight every digit corresponding to the number of thousands, millions, etc. with underline."}, {"output_format_pretty_highlight_digit_groups", false, true, "If enabled and if output is a terminal, highlight every digit corresponding to the number of thousands, millions, etc. with underline."},
{"geo_distance_returns_float64_on_float64_arguments", false, true, "Increase the default precision."},
{"azure_max_inflight_parts_for_one_file", 20, 20, "The maximum number of a concurrent loaded parts in multipart upload request. 0 means unlimited."}, {"azure_max_inflight_parts_for_one_file", 20, 20, "The maximum number of a concurrent loaded parts in multipart upload request. 0 means unlimited."},
{"azure_strict_upload_part_size", 0, 0, "The exact size of part to upload during multipart upload to Azure blob storage."}, {"azure_strict_upload_part_size", 0, 0, "The exact size of part to upload during multipart upload to Azure blob storage."},
{"azure_min_upload_part_size", 16*1024*1024, 16*1024*1024, "The minimum size of part to upload during multipart upload to Azure blob storage."}, {"azure_min_upload_part_size", 16*1024*1024, 16*1024*1024, "The minimum size of part to upload during multipart upload to Azure blob storage."},

View File

@ -1054,6 +1054,14 @@ Field FieldVisitorFoldDimension::operator()(const Array & x) const
return res; return res;
} }
Field FieldVisitorFoldDimension::operator()(const Null & x) const
{
if (num_dimensions_to_fold == 0)
return x;
return Array();
}
void setAllObjectsToDummyTupleType(NamesAndTypesList & columns) void setAllObjectsToDummyTupleType(NamesAndTypesList & columns)
{ {
for (auto & column : columns) for (auto & column : columns)

View File

@ -149,7 +149,7 @@ public:
Field operator()(const Array & x) const; Field operator()(const Array & x) const;
Field operator()(const Null & x) const { return x; } Field operator()(const Null & x) const;
template <typename T> template <typename T>
Field operator()(const T & x) const Field operator()(const T & x) const

View File

@ -7,6 +7,7 @@
#include <Common/assert_cast.h> #include <Common/assert_cast.h>
#include <IO/WriteHelpers.h> #include <IO/WriteHelpers.h>
#include <IO/ReadHelpers.h> #include <IO/ReadHelpers.h>
#include <IO/ReadBufferFromString.h>
#include <IO/WriteBufferFromString.h> #include <IO/WriteBufferFromString.h>
@ -526,68 +527,26 @@ void SerializationTuple::serializeTextXML(const IColumn & column, size_t row_num
void SerializationTuple::serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const void SerializationTuple::serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{ {
for (size_t i = 0; i < elems.size(); ++i) WriteBufferFromOwnString wb;
{ serializeText(column, row_num, wb, settings);
if (i != 0) writeCSV(wb.str(), ostr);
writeChar(settings.csv.tuple_delimiter, ostr);
elems[i]->serializeTextCSV(extractElementColumn(column, i), row_num, ostr, settings);
}
} }
void SerializationTuple::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const void SerializationTuple::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{ {
addElementSafe<void>(elems.size(), column, [&] String s;
{ readCSV(s, istr, settings.csv);
const size_t size = elems.size(); ReadBufferFromString rb(s);
for (size_t i = 0; i < size; ++i) deserializeText(column, rb, settings, true);
{
if (i != 0)
{
skipWhitespaceIfAny(istr);
assertChar(settings.csv.tuple_delimiter, istr);
skipWhitespaceIfAny(istr);
}
auto & element_column = extractElementColumn(column, i);
if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(element_column))
SerializationNullable::deserializeNullAsDefaultOrNestedTextCSV(element_column, istr, settings, elems[i]);
else
elems[i]->deserializeTextCSV(element_column, istr, settings);
}
return true;
});
} }
bool SerializationTuple::tryDeserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const bool SerializationTuple::tryDeserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{ {
return addElementSafe<bool>(elems.size(), column, [&] String s;
{ if (!tryReadCSV(s, istr, settings.csv))
const size_t size = elems.size(); return false;
for (size_t i = 0; i < size; ++i) ReadBufferFromString rb(s);
{ return tryDeserializeText(column, rb, settings, true);
if (i != 0)
{
skipWhitespaceIfAny(istr);
if (!checkChar(settings.csv.tuple_delimiter, istr))
return false;
skipWhitespaceIfAny(istr);
}
auto & element_column = extractElementColumn(column, i);
if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(element_column))
{
if (!SerializationNullable::tryDeserializeNullAsDefaultOrNestedTextCSV(element_column, istr, settings, elems[i]))
return false;
}
else
{
if (!elems[i]->tryDeserializeTextCSV(element_column, istr, settings))
return false;
}
}
return true;
});
} }
void SerializationTuple::enumerateStreams( void SerializationTuple::enumerateStreams(

View File

@ -1,11 +1,16 @@
#include <Databases/DatabaseOnDisk.h> #include <Databases/DatabaseOnDisk.h>
#include <filesystem>
#include <iterator>
#include <span>
#include <Databases/DatabaseAtomic.h>
#include <Databases/DatabaseOrdinary.h>
#include <IO/ReadBufferFromFile.h> #include <IO/ReadBufferFromFile.h>
#include <IO/ReadHelpers.h> #include <IO/ReadHelpers.h>
#include <IO/WriteBufferFromFile.h> #include <IO/WriteBufferFromFile.h>
#include <IO/WriteHelpers.h> #include <IO/WriteHelpers.h>
#include <Interpreters/Context.h>
#include <Interpreters/ApplyWithSubqueryVisitor.h> #include <Interpreters/ApplyWithSubqueryVisitor.h>
#include <Interpreters/Context.h>
#include <Interpreters/DatabaseCatalog.h> #include <Interpreters/DatabaseCatalog.h>
#include <Interpreters/InterpreterCreateQuery.h> #include <Interpreters/InterpreterCreateQuery.h>
#include <Parsers/ASTCreateQuery.h> #include <Parsers/ASTCreateQuery.h>
@ -16,14 +21,11 @@
#include <Storages/IStorage.h> #include <Storages/IStorage.h>
#include <Storages/StorageFactory.h> #include <Storages/StorageFactory.h>
#include <TableFunctions/TableFunctionFactory.h> #include <TableFunctions/TableFunctionFactory.h>
#include <Common/escapeForFileName.h>
#include <Common/logger_useful.h>
#include <Common/filesystemHelpers.h>
#include <Common/CurrentMetrics.h> #include <Common/CurrentMetrics.h>
#include <Common/assert_cast.h> #include <Common/assert_cast.h>
#include <Databases/DatabaseOrdinary.h> #include <Common/escapeForFileName.h>
#include <Databases/DatabaseAtomic.h> #include <Common/filesystemHelpers.h>
#include <filesystem> #include <Common/logger_useful.h>
namespace fs = std::filesystem; namespace fs = std::filesystem;
@ -613,7 +615,7 @@ void DatabaseOnDisk::iterateMetadataFiles(ContextPtr local_context, const Iterat
}; };
/// Metadata files to load: name and flag for .tmp_drop files /// Metadata files to load: name and flag for .tmp_drop files
std::set<std::pair<String, bool>> metadata_files; std::vector<std::pair<String, bool>> metadata_files;
fs::directory_iterator dir_end; fs::directory_iterator dir_end;
for (fs::directory_iterator dir_it(getMetadataPath()); dir_it != dir_end; ++dir_it) for (fs::directory_iterator dir_it(getMetadataPath()); dir_it != dir_end; ++dir_it)
@ -634,7 +636,7 @@ void DatabaseOnDisk::iterateMetadataFiles(ContextPtr local_context, const Iterat
if (endsWith(file_name, ".sql.tmp_drop")) if (endsWith(file_name, ".sql.tmp_drop"))
{ {
/// There are files that we tried to delete previously /// There are files that we tried to delete previously
metadata_files.emplace(file_name, false); metadata_files.emplace_back(file_name, false);
} }
else if (endsWith(file_name, ".sql.tmp")) else if (endsWith(file_name, ".sql.tmp"))
{ {
@ -645,23 +647,30 @@ void DatabaseOnDisk::iterateMetadataFiles(ContextPtr local_context, const Iterat
else if (endsWith(file_name, ".sql")) else if (endsWith(file_name, ".sql"))
{ {
/// The required files have names like `table_name.sql` /// The required files have names like `table_name.sql`
metadata_files.emplace(file_name, true); metadata_files.emplace_back(file_name, true);
} }
else else
throw Exception(ErrorCodes::INCORRECT_FILE_NAME, "Incorrect file extension: {} in metadata directory {}", file_name, getMetadataPath()); throw Exception(ErrorCodes::INCORRECT_FILE_NAME, "Incorrect file extension: {} in metadata directory {}", file_name, getMetadataPath());
} }
std::sort(metadata_files.begin(), metadata_files.end());
metadata_files.erase(std::unique(metadata_files.begin(), metadata_files.end()), metadata_files.end());
/// Read and parse metadata in parallel /// Read and parse metadata in parallel
ThreadPool pool(CurrentMetrics::DatabaseOnDiskThreads, CurrentMetrics::DatabaseOnDiskThreadsActive, CurrentMetrics::DatabaseOnDiskThreadsScheduled); ThreadPool pool(CurrentMetrics::DatabaseOnDiskThreads, CurrentMetrics::DatabaseOnDiskThreadsActive, CurrentMetrics::DatabaseOnDiskThreadsScheduled);
for (const auto & file : metadata_files) const auto batch_size = metadata_files.size() / pool.getMaxThreads() + 1;
for (auto it = metadata_files.begin(); it < metadata_files.end(); std::advance(it, batch_size))
{ {
pool.scheduleOrThrowOnError([&]() std::span batch{it, std::min(std::next(it, batch_size), metadata_files.end())};
{ pool.scheduleOrThrowOnError(
if (file.second) [batch, &process_metadata_file, &process_tmp_drop_metadata_file]() mutable
process_metadata_file(file.first); {
else for (const auto & file : batch)
process_tmp_drop_metadata_file(file.first); if (file.second)
}); process_metadata_file(file.first);
else
process_tmp_drop_metadata_file(file.first);
});
} }
pool.wait(); pool.wait();
} }

View File

@ -71,6 +71,17 @@ void applyMetadataChangesToCreateQuery(const ASTPtr & query, const StorageInMemo
query->replace(ast_create_query.refresh_strategy, metadata.refresh); query->replace(ast_create_query.refresh_strategy, metadata.refresh);
} }
if (metadata.sql_security_type)
{
auto new_sql_security = std::make_shared<ASTSQLSecurity>();
new_sql_security->type = metadata.sql_security_type;
if (metadata.definer)
new_sql_security->definer = std::make_shared<ASTUserNameWithHost>(*metadata.definer);
ast_create_query.sql_security = std::move(new_sql_security);
}
/// MaterializedView, Dictionary are types of CREATE query without storage. /// MaterializedView, Dictionary are types of CREATE query without storage.
if (ast_create_query.storage) if (ast_create_query.storage)
{ {

View File

@ -303,8 +303,8 @@ DataTypePtr tryInferDataTypeByEscapingRule(const String & field, const FormatSet
/// Try to determine the type of value inside quotes /// Try to determine the type of value inside quotes
auto type = tryInferDataTypeForSingleField(data, format_settings); auto type = tryInferDataTypeForSingleField(data, format_settings);
/// If we couldn't infer any type or it's tuple in quotes or it's a number and csv.try_infer_numbers_from_strings = 0, we determine it as a string. /// If we couldn't infer any type or it's a number and csv.try_infer_numbers_from_strings = 0, we determine it as a string.
if (!type || isTuple(type) || (isNumber(type) && !format_settings.csv.try_infer_numbers_from_strings)) if (!type || (isNumber(type) && !format_settings.csv.try_infer_numbers_from_strings))
return std::make_shared<DataTypeString>(); return std::make_shared<DataTypeString>();
return type; return type;

View File

@ -56,6 +56,10 @@ public:
if (!IntervalKind::tryParseString(datepart_param, datepart_kind)) if (!IntervalKind::tryParseString(datepart_param, datepart_kind))
throw Exception(ErrorCodes::BAD_ARGUMENTS, "{} doesn't look like datepart name in {}", datepart_param, getName()); throw Exception(ErrorCodes::BAD_ARGUMENTS, "{} doesn't look like datepart name in {}", datepart_param, getName());
if (datepart_kind == IntervalKind::Kind::Nanosecond || datepart_kind == IntervalKind::Kind::Microsecond
|| datepart_kind == IntervalKind::Kind::Millisecond)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "{} doesn't support {}", getName(), datepart_param);
result_type_is_date = (datepart_kind == IntervalKind::Kind::Year) result_type_is_date = (datepart_kind == IntervalKind::Kind::Year)
|| (datepart_kind == IntervalKind::Kind::Quarter) || (datepart_kind == IntervalKind::Kind::Month) || (datepart_kind == IntervalKind::Kind::Quarter) || (datepart_kind == IntervalKind::Kind::Month)
|| (datepart_kind == IntervalKind::Kind::Week); || (datepart_kind == IntervalKind::Kind::Week);

View File

@ -7,7 +7,6 @@
#include <Functions/PerformanceAdaptors.h> #include <Functions/PerformanceAdaptors.h>
#include <Interpreters/castColumn.h> #include <Interpreters/castColumn.h>
#include <Common/TargetSpecific.h> #include <Common/TargetSpecific.h>
#include <base/range.h>
#include <cmath> #include <cmath>
#include <numbers> #include <numbers>
@ -42,121 +41,6 @@ namespace ErrorCodes
namespace namespace
{ {
constexpr double PI = std::numbers::pi_v<double>;
constexpr float PI_F = std::numbers::pi_v<float>;
constexpr float RAD_IN_DEG = static_cast<float>(PI / 180.0);
constexpr float RAD_IN_DEG_HALF = static_cast<float>(PI / 360.0);
constexpr size_t COS_LUT_SIZE = 1024; // maxerr 0.00063%
constexpr float COS_LUT_SIZE_F = 1024.0f; // maxerr 0.00063%
constexpr size_t ASIN_SQRT_LUT_SIZE = 512;
constexpr size_t METRIC_LUT_SIZE = 1024;
/** Earth radius in meters using WGS84 authalic radius.
* We use this value to be consistent with H3 library.
*/
constexpr float EARTH_RADIUS = 6371007.180918475f;
constexpr float EARTH_DIAMETER = 2 * EARTH_RADIUS;
float cos_lut[COS_LUT_SIZE + 1]; /// cos(x) table
float asin_sqrt_lut[ASIN_SQRT_LUT_SIZE + 1]; /// asin(sqrt(x)) * earth_diameter table
float sphere_metric_lut[METRIC_LUT_SIZE + 1]; /// sphere metric, unitless: the distance in degrees for one degree across longitude depending on latitude
float sphere_metric_meters_lut[METRIC_LUT_SIZE + 1]; /// sphere metric: the distance in meters for one degree across longitude depending on latitude
float wgs84_metric_meters_lut[2 * (METRIC_LUT_SIZE + 1)]; /// ellipsoid metric: the distance in meters across one degree latitude/longitude depending on latitude
inline double sqr(double v)
{
return v * v;
}
inline float sqrf(float v)
{
return v * v;
}
void geodistInit()
{
for (size_t i = 0; i <= COS_LUT_SIZE; ++i)
cos_lut[i] = static_cast<float>(cos(2 * PI * i / COS_LUT_SIZE)); // [0, 2 * pi] -> [0, COS_LUT_SIZE]
for (size_t i = 0; i <= ASIN_SQRT_LUT_SIZE; ++i)
asin_sqrt_lut[i] = static_cast<float>(asin(
sqrt(static_cast<double>(i) / ASIN_SQRT_LUT_SIZE))); // [0, 1] -> [0, ASIN_SQRT_LUT_SIZE]
for (size_t i = 0; i <= METRIC_LUT_SIZE; ++i)
{
double latitude = i * (PI / METRIC_LUT_SIZE) - PI * 0.5; // [-pi / 2, pi / 2] -> [0, METRIC_LUT_SIZE]
/// Squared metric coefficients (for the distance in meters) on a tangent plane, for latitude and longitude (in degrees),
/// depending on the latitude (in radians).
/// https://github.com/mapbox/cheap-ruler/blob/master/index.js#L67
wgs84_metric_meters_lut[i * 2] = static_cast<float>(sqr(111132.09 - 566.05 * cos(2 * latitude) + 1.20 * cos(4 * latitude)));
wgs84_metric_meters_lut[i * 2 + 1] = static_cast<float>(sqr(111415.13 * cos(latitude) - 94.55 * cos(3 * latitude) + 0.12 * cos(5 * latitude)));
sphere_metric_meters_lut[i] = static_cast<float>(sqr((EARTH_DIAMETER * PI / 360) * cos(latitude)));
sphere_metric_lut[i] = static_cast<float>(sqr(cos(latitude)));
}
}
inline NO_SANITIZE_UNDEFINED size_t floatToIndex(float x)
{
/// Implementation specific behaviour on overflow or infinite value.
return static_cast<size_t>(x);
}
inline float geodistDegDiff(float f)
{
f = fabsf(f);
if (f > 180)
f = 360 - f;
return f;
}
inline float geodistFastCos(float x)
{
float y = fabsf(x) * (COS_LUT_SIZE_F / PI_F / 2.0f);
size_t i = floatToIndex(y);
y -= i;
i &= (COS_LUT_SIZE - 1);
return cos_lut[i] + (cos_lut[i + 1] - cos_lut[i]) * y;
}
inline float geodistFastSin(float x)
{
float y = fabsf(x) * (COS_LUT_SIZE_F / PI_F / 2.0f);
size_t i = floatToIndex(y);
y -= i;
i = (i - COS_LUT_SIZE / 4) & (COS_LUT_SIZE - 1); // cos(x - pi / 2) = sin(x), costable / 4 = pi / 2
return cos_lut[i] + (cos_lut[i + 1] - cos_lut[i]) * y;
}
/// fast implementation of asin(sqrt(x))
/// max error in floats 0.00369%, in doubles 0.00072%
inline float geodistFastAsinSqrt(float x)
{
if (x < 0.122f)
{
// distance under 4546 km, Taylor error under 0.00072%
float y = sqrtf(x);
return y + x * y * 0.166666666666666f + x * x * y * 0.075f + x * x * x * y * 0.044642857142857f;
}
if (x < 0.948f)
{
// distance under 17083 km, 512-entry LUT error under 0.00072%
x *= ASIN_SQRT_LUT_SIZE;
size_t i = floatToIndex(x);
return asin_sqrt_lut[i] + (asin_sqrt_lut[i + 1] - asin_sqrt_lut[i]) * (x - i);
}
return asinf(sqrtf(x)); // distance over 17083 km, just compute exact
}
enum class Method enum class Method
{ {
SPHERE_DEGREES, SPHERE_DEGREES,
@ -164,18 +48,117 @@ enum class Method
WGS84_METERS, WGS84_METERS,
}; };
} constexpr size_t ASIN_SQRT_LUT_SIZE = 512;
constexpr size_t COS_LUT_SIZE = 1024; // maxerr 0.00063%
constexpr size_t METRIC_LUT_SIZE = 1024;
/// Earth radius in meters using WGS84 authalic radius.
/// We use this value to be consistent with H3 library.
constexpr double EARTH_RADIUS = 6371007.180918475;
constexpr double EARTH_DIAMETER = 2.0 * EARTH_RADIUS;
constexpr double PI = std::numbers::pi_v<double>;
template <typename T>
T sqr(T v) { return v * v; }
template <typename T>
struct Impl
{
T cos_lut[COS_LUT_SIZE + 1]; /// cos(x) table
T asin_sqrt_lut[ASIN_SQRT_LUT_SIZE + 1]; /// asin(sqrt(x)) * earth_diameter table
T sphere_metric_lut[METRIC_LUT_SIZE + 1]; /// sphere metric, unitless: the distance in degrees for one degree across longitude depending on latitude
T sphere_metric_meters_lut[METRIC_LUT_SIZE + 1]; /// sphere metric: the distance in meters for one degree across longitude depending on latitude
T wgs84_metric_meters_lut[2 * (METRIC_LUT_SIZE + 1)]; /// ellipsoid metric: the distance in meters across one degree latitude/longitude depending on latitude
Impl()
{
for (size_t i = 0; i <= COS_LUT_SIZE; ++i)
cos_lut[i] = T(std::cos(2 * PI * static_cast<double>(i) / COS_LUT_SIZE)); // [0, 2 * pi] -> [0, COS_LUT_SIZE]
for (size_t i = 0; i <= ASIN_SQRT_LUT_SIZE; ++i)
asin_sqrt_lut[i] = T(std::asin(std::sqrt(static_cast<double>(i) / ASIN_SQRT_LUT_SIZE))); // [0, 1] -> [0, ASIN_SQRT_LUT_SIZE]
for (size_t i = 0; i <= METRIC_LUT_SIZE; ++i)
{
double latitude = i * (PI / METRIC_LUT_SIZE) - PI * 0.5; // [-pi / 2, pi / 2] -> [0, METRIC_LUT_SIZE]
/// Squared metric coefficients (for the distance in meters) on a tangent plane, for latitude and longitude (in degrees),
/// depending on the latitude (in radians).
/// https://github.com/mapbox/cheap-ruler/blob/master/index.js#L67
wgs84_metric_meters_lut[i * 2] = T(sqr(111132.09 - 566.05 * std::cos(2.0 * latitude) + 1.20 * std::cos(4.0 * latitude)));
wgs84_metric_meters_lut[i * 2 + 1] = T(sqr(111415.13 * std::cos(latitude) - 94.55 * std::cos(3.0 * latitude) + 0.12 * std::cos(5.0 * latitude)));
sphere_metric_meters_lut[i] = T(sqr((EARTH_DIAMETER * PI / 360) * std::cos(latitude)));
sphere_metric_lut[i] = T(sqr(std::cos(latitude)));
}
}
static inline NO_SANITIZE_UNDEFINED size_t toIndex(T x)
{
/// Implementation specific behaviour on overflow or infinite value.
return static_cast<size_t>(x);
}
static inline T degDiff(T f)
{
f = std::abs(f);
if (f > 180)
f = 360 - f;
return f;
}
inline T fastCos(T x)
{
T y = std::abs(x) * (T(COS_LUT_SIZE) / T(PI) / T(2.0));
size_t i = toIndex(y);
y -= i;
i &= (COS_LUT_SIZE - 1);
return cos_lut[i] + (cos_lut[i + 1] - cos_lut[i]) * y;
}
inline T fastSin(T x)
{
T y = std::abs(x) * (T(COS_LUT_SIZE) / T(PI) / T(2.0));
size_t i = toIndex(y);
y -= i;
i = (i - COS_LUT_SIZE / 4) & (COS_LUT_SIZE - 1); // cos(x - pi / 2) = sin(x), costable / 4 = pi / 2
return cos_lut[i] + (cos_lut[i + 1] - cos_lut[i]) * y;
}
/// fast implementation of asin(sqrt(x))
/// max error in floats 0.00369%, in doubles 0.00072%
inline T fastAsinSqrt(T x)
{
if (x < T(0.122))
{
// distance under 4546 km, Taylor error under 0.00072%
T y = std::sqrt(x);
return y + x * y * T(0.166666666666666) + x * x * y * T(0.075) + x * x * x * y * T(0.044642857142857);
}
if (x < T(0.948))
{
// distance under 17083 km, 512-entry LUT error under 0.00072%
x *= ASIN_SQRT_LUT_SIZE;
size_t i = toIndex(x);
return asin_sqrt_lut[i] + (asin_sqrt_lut[i + 1] - asin_sqrt_lut[i]) * (x - i);
}
return std::asin(std::sqrt(x)); /// distance is over 17083 km, just compute exact
}
};
template <typename T> Impl<T> impl;
DECLARE_MULTITARGET_CODE( DECLARE_MULTITARGET_CODE(
namespace namespace
{ {
template <Method method> template <Method method, typename T>
float distance(float lon1deg, float lat1deg, float lon2deg, float lat2deg) T distance(T lon1deg, T lat1deg, T lon2deg, T lat2deg)
{ {
float lat_diff = geodistDegDiff(lat1deg - lat2deg); T lat_diff = impl<T>.degDiff(lat1deg - lat2deg);
float lon_diff = geodistDegDiff(lon1deg - lon2deg); T lon_diff = impl<T>.degDiff(lon1deg - lon2deg);
if (lon_diff < 13) if (lon_diff < 13)
{ {
@ -187,51 +170,54 @@ float distance(float lon1deg, float lat1deg, float lon2deg, float lat2deg)
/// (Remember how a plane flies from Amsterdam to New York) /// (Remember how a plane flies from Amsterdam to New York)
/// But if longitude is close but latitude is different enough, there is no difference between meridian and great circle line. /// But if longitude is close but latitude is different enough, there is no difference between meridian and great circle line.
float latitude_midpoint = (lat1deg + lat2deg + 180) * METRIC_LUT_SIZE / 360; // [-90, 90] degrees -> [0, METRIC_LUT_SIZE] indexes T latitude_midpoint = (lat1deg + lat2deg + 180) * METRIC_LUT_SIZE / 360; // [-90, 90] degrees -> [0, METRIC_LUT_SIZE] indexes
size_t latitude_midpoint_index = floatToIndex(latitude_midpoint) & (METRIC_LUT_SIZE - 1); size_t latitude_midpoint_index = impl<T>.toIndex(latitude_midpoint) & (METRIC_LUT_SIZE - 1);
/// This is linear interpolation between two table items at index "latitude_midpoint_index" and "latitude_midpoint_index + 1". /// This is linear interpolation between two table items at index "latitude_midpoint_index" and "latitude_midpoint_index + 1".
float k_lat{}; T k_lat{};
float k_lon{}; T k_lon{};
if constexpr (method == Method::SPHERE_DEGREES) if constexpr (method == Method::SPHERE_DEGREES)
{ {
k_lat = 1; k_lat = 1;
k_lon = sphere_metric_lut[latitude_midpoint_index] k_lon = impl<T>.sphere_metric_lut[latitude_midpoint_index]
+ (sphere_metric_lut[latitude_midpoint_index + 1] - sphere_metric_lut[latitude_midpoint_index]) * (latitude_midpoint - latitude_midpoint_index); + (impl<T>.sphere_metric_lut[latitude_midpoint_index + 1] - impl<T>.sphere_metric_lut[latitude_midpoint_index]) * (latitude_midpoint - latitude_midpoint_index);
} }
else if constexpr (method == Method::SPHERE_METERS) else if constexpr (method == Method::SPHERE_METERS)
{ {
k_lat = sqrf(EARTH_DIAMETER * PI_F / 360.0f); k_lat = sqr(T(EARTH_DIAMETER) * T(PI) / T(360.0));
k_lon = sphere_metric_meters_lut[latitude_midpoint_index] k_lon = impl<T>.sphere_metric_meters_lut[latitude_midpoint_index]
+ (sphere_metric_meters_lut[latitude_midpoint_index + 1] - sphere_metric_meters_lut[latitude_midpoint_index]) * (latitude_midpoint - latitude_midpoint_index); + (impl<T>.sphere_metric_meters_lut[latitude_midpoint_index + 1] - impl<T>.sphere_metric_meters_lut[latitude_midpoint_index]) * (latitude_midpoint - latitude_midpoint_index);
} }
else if constexpr (method == Method::WGS84_METERS) else if constexpr (method == Method::WGS84_METERS)
{ {
k_lat = wgs84_metric_meters_lut[latitude_midpoint_index * 2] k_lat = impl<T>.wgs84_metric_meters_lut[latitude_midpoint_index * 2]
+ (wgs84_metric_meters_lut[(latitude_midpoint_index + 1) * 2] - wgs84_metric_meters_lut[latitude_midpoint_index * 2]) * (latitude_midpoint - latitude_midpoint_index); + (impl<T>.wgs84_metric_meters_lut[(latitude_midpoint_index + 1) * 2] - impl<T>.wgs84_metric_meters_lut[latitude_midpoint_index * 2]) * (latitude_midpoint - latitude_midpoint_index);
k_lon = wgs84_metric_meters_lut[latitude_midpoint_index * 2 + 1] k_lon = impl<T>.wgs84_metric_meters_lut[latitude_midpoint_index * 2 + 1]
+ (wgs84_metric_meters_lut[(latitude_midpoint_index + 1) * 2 + 1] - wgs84_metric_meters_lut[latitude_midpoint_index * 2 + 1]) * (latitude_midpoint - latitude_midpoint_index); + (impl<T>.wgs84_metric_meters_lut[(latitude_midpoint_index + 1) * 2 + 1] - impl<T>.wgs84_metric_meters_lut[latitude_midpoint_index * 2 + 1]) * (latitude_midpoint - latitude_midpoint_index);
} }
/// Metric on a tangent plane: it differs from Euclidean metric only by scale of coordinates. /// Metric on a tangent plane: it differs from Euclidean metric only by scale of coordinates.
return sqrtf(k_lat * lat_diff * lat_diff + k_lon * lon_diff * lon_diff); return std::sqrt(k_lat * lat_diff * lat_diff + k_lon * lon_diff * lon_diff);
} }
else else
{ {
// points too far away; use haversine /// Points are too far away: use Haversine.
float a = sqrf(geodistFastSin(lat_diff * RAD_IN_DEG_HALF)) static constexpr T RAD_IN_DEG = T(PI / 180.0);
+ geodistFastCos(lat1deg * RAD_IN_DEG) * geodistFastCos(lat2deg * RAD_IN_DEG) * sqrf(geodistFastSin(lon_diff * RAD_IN_DEG_HALF)); static constexpr T RAD_IN_DEG_HALF = T(PI / 360.0);
T a = sqr(impl<T>.fastSin(lat_diff * RAD_IN_DEG_HALF))
+ impl<T>.fastCos(lat1deg * RAD_IN_DEG) * impl<T>.fastCos(lat2deg * RAD_IN_DEG) * sqr(impl<T>.fastSin(lon_diff * RAD_IN_DEG_HALF));
if constexpr (method == Method::SPHERE_DEGREES) if constexpr (method == Method::SPHERE_DEGREES)
return (360.0f / PI_F) * geodistFastAsinSqrt(a); return (T(360.0) / T(PI)) * impl<T>.fastAsinSqrt(a);
else else
return EARTH_DIAMETER * geodistFastAsinSqrt(a); return T(EARTH_DIAMETER) * impl<T>.fastAsinSqrt(a);
} }
} }
@ -241,13 +227,24 @@ template <Method method>
class FunctionGeoDistance : public IFunction class FunctionGeoDistance : public IFunction
{ {
public: public:
static constexpr auto name = explicit FunctionGeoDistance(ContextPtr context)
(method == Method::SPHERE_DEGREES) ? "greatCircleAngle" {
: ((method == Method::SPHERE_METERS) ? "greatCircleDistance" always_float32 = !context->getSettingsRef().geo_distance_returns_float64_on_float64_arguments;
: "geoDistance"); }
private: private:
String getName() const override { return name; } bool always_float32;
String getName() const override
{
if constexpr (method == Method::SPHERE_DEGREES)
return "greatCircleAngle";
if constexpr (method == Method::SPHERE_METERS)
return "greatCircleDistance";
else
return "geoDistance";
}
size_t getNumberOfArguments() const override { return 4; } size_t getNumberOfArguments() const override { return 4; }
bool useDefaultImplementationForConstants() const override { return true; } bool useDefaultImplementationForConstants() const override { return true; }
@ -255,22 +252,31 @@ private:
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
{ {
for (const auto arg_idx : collections::range(0, arguments.size())) bool has_float64 = false;
for (size_t arg_idx = 0; arg_idx < 4; ++arg_idx)
{ {
const auto * arg = arguments[arg_idx].get(); WhichDataType which(arguments[arg_idx]);
if (!isNumber(WhichDataType(arg)))
if (!isNumber(which))
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument {} of function {}. " throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument {} of function {}. "
"Must be numeric", arg->getName(), std::to_string(arg_idx + 1), getName()); "Must be numeric", arguments[arg_idx]->getName(), std::to_string(arg_idx + 1), getName());
if (which.isFloat64())
has_float64 = true;
} }
return std::make_shared<DataTypeFloat32>(); if (has_float64 && !always_float32)
return std::make_shared<DataTypeFloat64>();
else
return std::make_shared<DataTypeFloat32>();
} }
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override
{ {
auto dst = ColumnVector<Float32>::create(); bool returns_float64 = WhichDataType(result_type).isFloat64();
auto & dst_data = dst->getData();
dst_data.resize(input_rows_count); auto dst = result_type->createColumn();
auto arguments_copy = arguments; auto arguments_copy = arguments;
for (auto & argument : arguments_copy) for (auto & argument : arguments_copy)
@ -280,10 +286,24 @@ private:
argument.type = result_type; argument.type = result_type;
} }
const auto * col_lon1 = convertArgumentColumnToFloat32(arguments_copy, 0); if (returns_float64)
const auto * col_lat1 = convertArgumentColumnToFloat32(arguments_copy, 1); run<Float64>(arguments_copy, dst, input_rows_count);
const auto * col_lon2 = convertArgumentColumnToFloat32(arguments_copy, 2); else
const auto * col_lat2 = convertArgumentColumnToFloat32(arguments_copy, 3); run<Float32>(arguments_copy, dst, input_rows_count);
return dst;
}
template <typename T>
void run(const ColumnsWithTypeAndName & arguments, MutableColumnPtr & dst, size_t input_rows_count) const
{
const auto * col_lon1 = convertArgumentColumn<T>(arguments, 0);
const auto * col_lat1 = convertArgumentColumn<T>(arguments, 1);
const auto * col_lon2 = convertArgumentColumn<T>(arguments, 2);
const auto * col_lat2 = convertArgumentColumn<T>(arguments, 3);
auto & dst_data = assert_cast<ColumnVector<T> &>(*dst).getData();
dst_data.resize(input_rows_count);
for (size_t row_num = 0; row_num < input_rows_count; ++row_num) for (size_t row_num = 0; row_num < input_rows_count; ++row_num)
{ {
@ -291,20 +311,20 @@ private:
col_lon1->getData()[row_num], col_lat1->getData()[row_num], col_lon1->getData()[row_num], col_lat1->getData()[row_num],
col_lon2->getData()[row_num], col_lat2->getData()[row_num]); col_lon2->getData()[row_num], col_lat2->getData()[row_num]);
} }
return dst;
} }
const ColumnFloat32 * convertArgumentColumnToFloat32(const ColumnsWithTypeAndName & arguments, size_t argument_index) const template <typename T>
const ColumnVector<T> * convertArgumentColumn(const ColumnsWithTypeAndName & arguments, size_t argument_index) const
{ {
const auto * column_typed = checkAndGetColumn<ColumnFloat32>(arguments[argument_index].column.get()); const auto * column_typed = checkAndGetColumn<ColumnVector<T>>(arguments[argument_index].column.get());
if (!column_typed) if (!column_typed)
throw Exception( throw Exception(
ErrorCodes::ILLEGAL_COLUMN, ErrorCodes::ILLEGAL_COLUMN,
"Illegal type {} of argument {} of function {}. Must be Float32.", "Illegal type {} of argument {} of function {}. Must be {}.",
arguments[argument_index].type->getName(), arguments[argument_index].type->getName(),
argument_index + 1, argument_index + 1,
getName()); getName(),
TypeName<T>);
return column_typed; return column_typed;
} }
@ -316,18 +336,19 @@ template <Method method>
class FunctionGeoDistance : public TargetSpecific::Default::FunctionGeoDistance<method> class FunctionGeoDistance : public TargetSpecific::Default::FunctionGeoDistance<method>
{ {
public: public:
explicit FunctionGeoDistance(ContextPtr context) : selector(context) explicit FunctionGeoDistance(ContextPtr context)
: TargetSpecific::Default::FunctionGeoDistance<method>(context), selector(context)
{ {
selector.registerImplementation<TargetArch::Default, selector.registerImplementation<TargetArch::Default,
TargetSpecific::Default::FunctionGeoDistance<method>>(); TargetSpecific::Default::FunctionGeoDistance<method>>(context);
#if USE_MULTITARGET_CODE #if USE_MULTITARGET_CODE
selector.registerImplementation<TargetArch::AVX, selector.registerImplementation<TargetArch::AVX,
TargetSpecific::AVX::FunctionGeoDistance<method>>(); TargetSpecific::AVX::FunctionGeoDistance<method>>(context);
selector.registerImplementation<TargetArch::AVX2, selector.registerImplementation<TargetArch::AVX2,
TargetSpecific::AVX2::FunctionGeoDistance<method>>(); TargetSpecific::AVX2::FunctionGeoDistance<method>>(context);
selector.registerImplementation<TargetArch::AVX512F, selector.registerImplementation<TargetArch::AVX512F,
TargetSpecific::AVX512F::FunctionGeoDistance<method>>(); TargetSpecific::AVX512F::FunctionGeoDistance<method>>(context);
#endif #endif
} }
@ -345,12 +366,13 @@ private:
ImplementationSelector<IFunction> selector; ImplementationSelector<IFunction> selector;
}; };
}
REGISTER_FUNCTION(GeoDistance) REGISTER_FUNCTION(GeoDistance)
{ {
geodistInit(); factory.registerFunction("greatCircleAngle", [](ContextPtr context) { return std::make_shared<FunctionGeoDistance<Method::SPHERE_DEGREES>>(std::move(context)); });
factory.registerFunction<FunctionGeoDistance<Method::SPHERE_DEGREES>>(); factory.registerFunction("greatCircleDistance", [](ContextPtr context) { return std::make_shared<FunctionGeoDistance<Method::SPHERE_METERS>>(std::move(context)); });
factory.registerFunction<FunctionGeoDistance<Method::SPHERE_METERS>>(); factory.registerFunction("geoDistance", [](ContextPtr context) { return std::make_shared<FunctionGeoDistance<Method::WGS84_METERS>>(std::move(context)); });
factory.registerFunction<FunctionGeoDistance<Method::WGS84_METERS>>();
} }
} }

View File

@ -14,4 +14,9 @@ REGISTER_FUNCTION(ScalarSubqueryResult)
factory.registerFunction<FunctionScalarSubqueryResult>(); factory.registerFunction<FunctionScalarSubqueryResult>();
} }
REGISTER_FUNCTION(ActionName)
{
factory.registerFunction<FunctionActionName>();
}
} }

View File

@ -42,4 +42,18 @@ struct ScalarSubqueryResultName
using FunctionIdentity = FunctionIdentityBase<IdentityName>; using FunctionIdentity = FunctionIdentityBase<IdentityName>;
using FunctionScalarSubqueryResult = FunctionIdentityBase<ScalarSubqueryResultName>; using FunctionScalarSubqueryResult = FunctionIdentityBase<ScalarSubqueryResultName>;
struct ActionNameName
{
static constexpr auto name = "__actionName";
};
class FunctionActionName : public FunctionIdentityBase<ActionNameName>
{
public:
using FunctionIdentityBase::FunctionIdentityBase;
static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionActionName>(); }
size_t getNumberOfArguments() const override { return 2; }
ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; }
};
} }

View File

@ -79,7 +79,7 @@ inline char * writeVarInt(Int64 x, char * ostr)
return writeVarUInt(static_cast<UInt64>((x << 1) ^ (x >> 63)), ostr); return writeVarUInt(static_cast<UInt64>((x << 1) ^ (x >> 63)), ostr);
} }
namespace impl namespace varint_impl
{ {
template <bool check_eof> template <bool check_eof>
@ -106,8 +106,8 @@ inline void readVarUInt(UInt64 & x, ReadBuffer & istr)
inline void readVarUInt(UInt64 & x, ReadBuffer & istr) inline void readVarUInt(UInt64 & x, ReadBuffer & istr)
{ {
if (istr.buffer().end() - istr.position() >= 10) if (istr.buffer().end() - istr.position() >= 10)
return impl::readVarUInt<false>(x, istr); return varint_impl::readVarUInt<false>(x, istr);
return impl::readVarUInt<true>(x, istr); return varint_impl::readVarUInt<true>(x, istr);
} }
inline void readVarUInt(UInt64 & x, std::istream & istr) inline void readVarUInt(UInt64 & x, std::istream & istr)

View File

@ -741,7 +741,7 @@ Block ActionsDAG::updateHeader(Block header) const
catch (Exception & e) catch (Exception & e)
{ {
if (e.code() == ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK) if (e.code() == ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK)
e.addMessage(" in block {}", header.dumpStructure()); e.addMessage("in block {}", header.dumpStructure());
throw; throw;
} }

View File

@ -1219,7 +1219,7 @@ void Context::addWarningMessageAboutDatabaseOrdinary(const String & database_nam
/// We don't use getFlagsPath method, because it takes a shared lock. /// We don't use getFlagsPath method, because it takes a shared lock.
auto convert_databases_flag = fs::path(shared->flags_path) / "convert_ordinary_to_atomic"; auto convert_databases_flag = fs::path(shared->flags_path) / "convert_ordinary_to_atomic";
auto message = fmt::format("Server has databases (for example `{}`) with Ordinary engine, which was deprecated. " auto message = fmt::format("Server has databases (for example `{}`) with Ordinary engine, which was deprecated. "
"To convert this database to a new Atomic engine, please create a forcing flag {} and make sure that ClickHouse has write permission for it. " "To convert this database to the new Atomic engine, create a flag {} and make sure that ClickHouse has write permission for it. "
"Example: sudo touch '{}' && sudo chmod 666 '{}'", "Example: sudo touch '{}' && sudo chmod 666 '{}'",
database_name, database_name,
convert_databases_flag.string(), convert_databases_flag.string(), convert_databases_flag.string()); convert_databases_flag.string(), convert_databases_flag.string(), convert_databases_flag.string());

View File

@ -1143,7 +1143,7 @@ void DatabaseCatalog::dequeueDroppedTableCleanup(StorageID table_id)
TableMarkedAsDropped dropped_table; TableMarkedAsDropped dropped_table;
{ {
std::lock_guard lock(tables_marked_dropped_mutex); std::lock_guard lock(tables_marked_dropped_mutex);
time_t latest_drop_time = std::numeric_limits<time_t>::min(); auto latest_drop_time = std::numeric_limits<time_t>::min();
auto it_dropped_table = tables_marked_dropped.end(); auto it_dropped_table = tables_marked_dropped.end();
for (auto it = tables_marked_dropped.begin(); it != tables_marked_dropped.end(); ++it) for (auto it = tables_marked_dropped.begin(); it != tables_marked_dropped.end(); ++it)
{ {
@ -1168,7 +1168,7 @@ void DatabaseCatalog::dequeueDroppedTableCleanup(StorageID table_id)
} }
if (it_dropped_table == tables_marked_dropped.end()) if (it_dropped_table == tables_marked_dropped.end())
throw Exception(ErrorCodes::UNKNOWN_TABLE, throw Exception(ErrorCodes::UNKNOWN_TABLE,
"The drop task of table {} is in progress, has been dropped or the database engine doesn't support it", "Table {} is being dropped, has been dropped, or the database engine does not support UNDROP",
table_id.getNameForLogs()); table_id.getNameForLogs());
latest_metadata_dropped_path = it_dropped_table->metadata_path; latest_metadata_dropped_path = it_dropped_table->metadata_path;
String table_metadata_path = getPathForMetadata(it_dropped_table->table_id); String table_metadata_path = getPathForMetadata(it_dropped_table->table_id);

View File

@ -1881,7 +1881,7 @@ void InterpreterCreateQuery::addColumnsDescriptionToCreateQueryIfNecessary(ASTCr
void InterpreterCreateQuery::processSQLSecurityOption(ContextPtr context_, ASTSQLSecurity & sql_security, bool is_attach, bool is_materialized_view) void InterpreterCreateQuery::processSQLSecurityOption(ContextPtr context_, ASTSQLSecurity & sql_security, bool is_attach, bool is_materialized_view)
{ {
/// If no SQL security is specified, apply default from default_*_view_sql_security setting. /// If no SQL security is specified, apply default from default_*_view_sql_security setting.
if (!sql_security.type.has_value()) if (!sql_security.type)
{ {
SQLSecurityType default_security; SQLSecurityType default_security;

View File

@ -18,14 +18,16 @@ namespace ErrorCodes
extern const int SUPPORT_IS_DISABLED; extern const int SUPPORT_IS_DISABLED;
} }
InterpreterUndropQuery::InterpreterUndropQuery(const ASTPtr & query_ptr_, ContextMutablePtr context_) : WithMutableContext(context_), query_ptr(query_ptr_) InterpreterUndropQuery::InterpreterUndropQuery(const ASTPtr & query_ptr_, ContextMutablePtr context_)
: WithMutableContext(context_)
, query_ptr(query_ptr_)
{ {
} }
BlockIO InterpreterUndropQuery::execute() BlockIO InterpreterUndropQuery::execute()
{ {
getContext()->checkAccess(AccessType::UNDROP_TABLE); getContext()->checkAccess(AccessType::UNDROP_TABLE);
auto & undrop = query_ptr->as<ASTUndropQuery &>(); auto & undrop = query_ptr->as<ASTUndropQuery &>();
if (!undrop.cluster.empty() && !maybeRemoveOnCluster(query_ptr, getContext())) if (!undrop.cluster.empty() && !maybeRemoveOnCluster(query_ptr, getContext()))
{ {

View File

@ -341,6 +341,11 @@ bool MutationsInterpreter::Source::hasProjection(const String & name) const
return part && part->hasProjection(name); return part && part->hasProjection(name);
} }
bool MutationsInterpreter::Source::hasBrokenProjection(const String & name) const
{
return part && part->hasBrokenProjection(name);
}
bool MutationsInterpreter::Source::isCompactPart() const bool MutationsInterpreter::Source::isCompactPart() const
{ {
return part && part->getType() == MergeTreeDataPartType::Compact; return part && part->getType() == MergeTreeDataPartType::Compact;
@ -802,7 +807,7 @@ void MutationsInterpreter::prepare(bool dry_run)
{ {
mutation_kind.set(MutationKind::MUTATE_INDEX_STATISTIC_PROJECTION); mutation_kind.set(MutationKind::MUTATE_INDEX_STATISTIC_PROJECTION);
const auto & projection = projections_desc.get(command.projection_name); const auto & projection = projections_desc.get(command.projection_name);
if (!source.hasProjection(projection.name)) if (!source.hasProjection(projection.name) || source.hasBrokenProjection(projection.name))
{ {
for (const auto & column : projection.required_columns) for (const auto & column : projection.required_columns)
dependencies.emplace(column, ColumnDependency::PROJECTION); dependencies.emplace(column, ColumnDependency::PROJECTION);
@ -989,6 +994,13 @@ void MutationsInterpreter::prepare(bool dry_run)
if (!source.hasProjection(projection.name)) if (!source.hasProjection(projection.name))
continue; continue;
/// Always rebuild broken projections.
if (source.hasBrokenProjection(projection.name))
{
materialized_projections.insert(projection.name);
continue;
}
if (need_rebuild_projections) if (need_rebuild_projections)
{ {
materialized_projections.insert(projection.name); materialized_projections.insert(projection.name);

View File

@ -126,6 +126,7 @@ public:
bool materializeTTLRecalculateOnly() const; bool materializeTTLRecalculateOnly() const;
bool hasSecondaryIndex(const String & name) const; bool hasSecondaryIndex(const String & name) const;
bool hasProjection(const String & name) const; bool hasProjection(const String & name) const;
bool hasBrokenProjection(const String & name) const;
bool isCompactPart() const; bool isCompactPart() const;
void read( void read(

View File

@ -13,22 +13,26 @@ namespace ErrorCodes
extern const int LOGICAL_ERROR; extern const int LOGICAL_ERROR;
} }
void TableStatus::write(WriteBuffer & out) const void TableStatus::write(WriteBuffer & out, UInt64 client_protocol_revision) const
{ {
writeBinary(is_replicated, out); writeBinary(is_replicated, out);
if (is_replicated) if (is_replicated)
{ {
writeVarUInt(absolute_delay, out); writeVarUInt(absolute_delay, out);
if (client_protocol_revision >= DBMS_MIN_REVISION_WITH_TABLE_READ_ONLY_CHECK)
writeVarUInt(is_readonly, out);
} }
} }
void TableStatus::read(ReadBuffer & in) void TableStatus::read(ReadBuffer & in, UInt64 server_protocol_revision)
{ {
absolute_delay = 0; absolute_delay = 0;
readBinary(is_replicated, in); readBinary(is_replicated, in);
if (is_replicated) if (is_replicated)
{ {
readVarUInt(absolute_delay, in); readVarUInt(absolute_delay, in);
if (server_protocol_revision >= DBMS_MIN_REVISION_WITH_TABLE_READ_ONLY_CHECK)
readVarUInt(is_readonly, in);
} }
} }
@ -71,14 +75,14 @@ void TablesStatusResponse::write(WriteBuffer & out, UInt64 client_protocol_revis
throw Exception(ErrorCodes::LOGICAL_ERROR, "method TablesStatusResponse::write is called for unsupported client revision"); throw Exception(ErrorCodes::LOGICAL_ERROR, "method TablesStatusResponse::write is called for unsupported client revision");
writeVarUInt(table_states_by_id.size(), out); writeVarUInt(table_states_by_id.size(), out);
for (const auto & kv: table_states_by_id) for (const auto & kv : table_states_by_id)
{ {
const QualifiedTableName & table_name = kv.first; const QualifiedTableName & table_name = kv.first;
writeBinary(table_name.database, out); writeBinary(table_name.database, out);
writeBinary(table_name.table, out); writeBinary(table_name.table, out);
const TableStatus & status = kv.second; const TableStatus & status = kv.second;
status.write(out); status.write(out, client_protocol_revision);
} }
} }
@ -100,7 +104,7 @@ void TablesStatusResponse::read(ReadBuffer & in, UInt64 server_protocol_revision
readBinary(table_name.table, in); readBinary(table_name.table, in);
TableStatus status; TableStatus status;
status.read(in); status.read(in, server_protocol_revision);
table_states_by_id.emplace(std::move(table_name), std::move(status)); table_states_by_id.emplace(std::move(table_name), std::move(status));
} }
} }

View File

@ -28,9 +28,11 @@ struct TableStatus
{ {
bool is_replicated = false; bool is_replicated = false;
UInt32 absolute_delay = 0; UInt32 absolute_delay = 0;
/// Used to filter such nodes out for INSERTs
bool is_readonly = false;
void write(WriteBuffer & out) const; void write(WriteBuffer & out, UInt64 client_protocol_revision) const;
void read(ReadBuffer & in); void read(ReadBuffer & in, UInt64 server_protocol_revision);
}; };
struct TablesStatusRequest struct TablesStatusRequest

View File

@ -14,7 +14,7 @@ namespace DB
void ASTSQLSecurity::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const void ASTSQLSecurity::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const
{ {
if (!type.has_value()) if (!type)
return; return;
if (definer || is_definer_current_user) if (definer || is_definer_current_user)

View File

@ -299,6 +299,7 @@ namespace DB
MR_MACROS(MOD, "MOD") \ MR_MACROS(MOD, "MOD") \
MR_MACROS(MODIFY_COLUMN, "MODIFY COLUMN") \ MR_MACROS(MODIFY_COLUMN, "MODIFY COLUMN") \
MR_MACROS(MODIFY_COMMENT, "MODIFY COMMENT") \ MR_MACROS(MODIFY_COMMENT, "MODIFY COMMENT") \
MR_MACROS(MODIFY_DEFINER, "MODIFY DEFINER") \
MR_MACROS(MODIFY_ORDER_BY, "MODIFY ORDER BY") \ MR_MACROS(MODIFY_ORDER_BY, "MODIFY ORDER BY") \
MR_MACROS(MODIFY_QUERY, "MODIFY QUERY") \ MR_MACROS(MODIFY_QUERY, "MODIFY QUERY") \
MR_MACROS(MODIFY_REFRESH, "MODIFY REFRESH") \ MR_MACROS(MODIFY_REFRESH, "MODIFY REFRESH") \
@ -445,6 +446,7 @@ namespace DB
MR_MACROS(SPATIAL, "SPATIAL") \ MR_MACROS(SPATIAL, "SPATIAL") \
MR_MACROS(SQL_SECURITY, "SQL SECURITY") \ MR_MACROS(SQL_SECURITY, "SQL SECURITY") \
MR_MACROS(SS, "SS") \ MR_MACROS(SS, "SS") \
MR_MACROS(START_TRANSACTION, "START TRANSACTION") \
MR_MACROS(STATISTIC, "STATISTIC") \ MR_MACROS(STATISTIC, "STATISTIC") \
MR_MACROS(STEP, "STEP") \ MR_MACROS(STEP, "STEP") \
MR_MACROS(STORAGE, "STORAGE") \ MR_MACROS(STORAGE, "STORAGE") \
@ -554,7 +556,7 @@ namespace DB
MR_MACROS(SSH_KEY, "SSH_KEY") \ MR_MACROS(SSH_KEY, "SSH_KEY") \
MR_MACROS(SSL_CERTIFICATE, "SSL_CERTIFICATE") \ MR_MACROS(SSL_CERTIFICATE, "SSL_CERTIFICATE") \
MR_MACROS(STRICTLY_ASCENDING, "STRICTLY_ASCENDING") \ MR_MACROS(STRICTLY_ASCENDING, "STRICTLY_ASCENDING") \
MR_MACROS(WITH_ITEMINDEX, "with_itemindex") \ MR_MACROS(WITH_ITEMINDEX, "WITH_ITEMINDEX") \
enum class Keyword : size_t enum class Keyword : size_t
{ {

View File

@ -41,6 +41,7 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected
ParserKeyword s_reset_setting(Keyword::RESET_SETTING); ParserKeyword s_reset_setting(Keyword::RESET_SETTING);
ParserKeyword s_modify_query(Keyword::MODIFY_QUERY); ParserKeyword s_modify_query(Keyword::MODIFY_QUERY);
ParserKeyword s_modify_sql_security(Keyword::MODIFY_SQL_SECURITY); ParserKeyword s_modify_sql_security(Keyword::MODIFY_SQL_SECURITY);
ParserKeyword s_modify_definer(Keyword::MODIFY_DEFINER);
ParserKeyword s_modify_refresh(Keyword::MODIFY_REFRESH); ParserKeyword s_modify_refresh(Keyword::MODIFY_REFRESH);
ParserKeyword s_add_index(Keyword::ADD_INDEX); ParserKeyword s_add_index(Keyword::ADD_INDEX);
@ -862,11 +863,16 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected
return false; return false;
command->type = ASTAlterCommand::MODIFY_QUERY; command->type = ASTAlterCommand::MODIFY_QUERY;
} }
else if (s_modify_sql_security.ignore(pos, expected)) else if (s_modify_sql_security.checkWithoutMoving(pos, expected))
{ {
/// This is a hack so we can reuse parser from create and don't have to write `MODIFY SQL SECURITY SQL SECURITY INVOKER` s_modify.ignore(pos, expected);
--pos; if (!sql_security_p.parse(pos, command_sql_security, expected))
--pos; return false;
command->type = ASTAlterCommand::MODIFY_SQL_SECURITY;
}
else if (s_modify_definer.checkWithoutMoving(pos, expected))
{
s_modify.ignore(pos, expected);
if (!sql_security_p.parse(pos, command_sql_security, expected)) if (!sql_security_p.parse(pos, command_sql_security, expected))
return false; return false;
command->type = ASTAlterCommand::MODIFY_SQL_SECURITY; command->type = ASTAlterCommand::MODIFY_SQL_SECURITY;

View File

@ -14,6 +14,8 @@ bool ParserTransactionControl::parseImpl(Pos & pos, ASTPtr & node, Expected & ex
if (ParserKeyword(Keyword::BEGIN_TRANSACTION).ignore(pos, expected)) if (ParserKeyword(Keyword::BEGIN_TRANSACTION).ignore(pos, expected))
action = ASTTransactionControl::BEGIN; action = ASTTransactionControl::BEGIN;
else if (ParserKeyword(Keyword::START_TRANSACTION).ignore(pos, expected))
action = ASTTransactionControl::BEGIN;
else if (ParserKeyword(Keyword::COMMIT).ignore(pos, expected)) else if (ParserKeyword(Keyword::COMMIT).ignore(pos, expected))
action = ASTTransactionControl::COMMIT; action = ASTTransactionControl::COMMIT;
else if (ParserKeyword(Keyword::ROLLBACK).ignore(pos, expected)) else if (ParserKeyword(Keyword::ROLLBACK).ignore(pos, expected))

View File

@ -157,6 +157,12 @@ public:
case QueryTreeNodeType::FUNCTION: case QueryTreeNodeType::FUNCTION:
{ {
const auto & function_node = node->as<FunctionNode &>(); const auto & function_node = node->as<FunctionNode &>();
if (function_node.getFunctionName() == "__actionName")
{
result = toString(function_node.getArguments().getNodes().at(1)->as<ConstantNode>()->getValue());
break;
}
String in_function_second_argument_node_name; String in_function_second_argument_node_name;
if (isNameOfInFunction(function_node.getFunctionName())) if (isNameOfInFunction(function_node.getFunctionName()))

View File

@ -223,7 +223,7 @@ bool analyzeProjectionCandidate(
{ {
const auto & created_projections = part_with_ranges.data_part->getProjectionParts(); const auto & created_projections = part_with_ranges.data_part->getProjectionParts();
auto it = created_projections.find(candidate.projection->name); auto it = created_projections.find(candidate.projection->name);
if (it != created_projections.end()) if (it != created_projections.end() && !it->second->is_broken)
{ {
projection_parts.push_back(it->second); projection_parts.push_back(it->second);
} }

View File

@ -237,10 +237,21 @@ FillingTransform::FillingTransform(
} }
logDebug("fill description", dumpSortDescription(fill_description)); logDebug("fill description", dumpSortDescription(fill_description));
std::set<size_t> unique_positions; std::unordered_set<size_t> ordinary_sort_positions;
for (const auto & desc : sort_description)
{
if (!desc.with_fill)
ordinary_sort_positions.insert(header_.getPositionByName(desc.column_name));
}
std::unordered_set<size_t> unique_positions;
for (auto pos : fill_column_positions) for (auto pos : fill_column_positions)
{
if (!unique_positions.insert(pos).second) if (!unique_positions.insert(pos).second)
throw Exception(ErrorCodes::INVALID_WITH_FILL_EXPRESSION, "Multiple WITH FILL for identical expressions is not supported in ORDER BY"); throw Exception(ErrorCodes::INVALID_WITH_FILL_EXPRESSION, "Multiple WITH FILL for identical expressions is not supported in ORDER BY");
if (ordinary_sort_positions.contains(pos))
throw Exception(ErrorCodes::INVALID_WITH_FILL_EXPRESSION, "ORDER BY containing the same expression with and without WITH FILL modifier is not supported");
}
if (use_with_fill_by_sorting_prefix) if (use_with_fill_by_sorting_prefix)
{ {

View File

@ -22,7 +22,6 @@ int callSetCertificate(SSL * ssl, [[maybe_unused]] void * arg)
} }
/// This is callback for OpenSSL. It will be called on every connection to obtain a certificate and private key. /// This is callback for OpenSSL. It will be called on every connection to obtain a certificate and private key.
int CertificateReloader::setCertificate(SSL * ssl) int CertificateReloader::setCertificate(SSL * ssl)
{ {
@ -30,17 +29,37 @@ int CertificateReloader::setCertificate(SSL * ssl)
if (!current) if (!current)
return -1; return -1;
SSL_use_certificate(ssl, const_cast<X509 *>(current->cert.certificate())); if (current->certs_chain.empty())
SSL_use_PrivateKey(ssl, const_cast<EVP_PKEY *>(static_cast<const EVP_PKEY *>(current->key))); return -1;
int err = SSL_check_private_key(ssl); if (auto err = SSL_clear_chain_certs(ssl); err != 1)
if (err != 1)
{ {
std::string msg = Poco::Net::Utility::getLastError(); LOG_ERROR(log, "Clear certificates {}", Poco::Net::Utility::getLastError());
LOG_ERROR(log, "Unusable key-pair {}", msg); return -1;
}
if (auto err = SSL_use_certificate(ssl, const_cast<X509 *>(current->certs_chain[0].certificate())); err != 1)
{
LOG_ERROR(log, "Use certificate {}", Poco::Net::Utility::getLastError());
return -1;
}
for (auto cert = current->certs_chain.begin() + 1; cert != current->certs_chain.end(); cert++)
{
if (auto err = SSL_add1_chain_cert(ssl, const_cast<X509 *>(cert->certificate())); err != 1)
{
LOG_ERROR(log, "Add certificate to chain {}", Poco::Net::Utility::getLastError());
return -1;
}
}
if (auto err = SSL_use_PrivateKey(ssl, const_cast<EVP_PKEY *>(static_cast<const EVP_PKEY *>(current->key))); err != 1)
{
LOG_ERROR(log, "Use private key {}", Poco::Net::Utility::getLastError());
return -1;
}
if (auto err = SSL_check_private_key(ssl); err != 1)
{
LOG_ERROR(log, "Unusable key-pair {}", Poco::Net::Utility::getLastError());
return -1; return -1;
} }
return 1; return 1;
} }
@ -100,7 +119,7 @@ void CertificateReloader::tryLoad(const Poco::Util::AbstractConfiguration & conf
CertificateReloader::Data::Data(std::string cert_path, std::string key_path, std::string pass_phrase) CertificateReloader::Data::Data(std::string cert_path, std::string key_path, std::string pass_phrase)
: cert(cert_path), key(/* public key */ "", /* private key */ key_path, pass_phrase) : certs_chain(Poco::Crypto::X509Certificate::readPEM(cert_path)), key(/* public key */ "", /* private key */ key_path, pass_phrase)
{ {
} }

View File

@ -70,7 +70,7 @@ private:
struct Data struct Data
{ {
Poco::Crypto::X509Certificate cert; Poco::Crypto::X509Certificate::List certs_chain;
Poco::Crypto::EVPPKey key; Poco::Crypto::EVPPKey key;
Data(std::string cert_path, std::string key_path, std::string pass_phrase); Data(std::string cert_path, std::string key_path, std::string pass_phrase);

View File

@ -1124,6 +1124,7 @@ void TCPHandler::processTablesStatusRequest()
{ {
status.is_replicated = true; status.is_replicated = true;
status.absolute_delay = static_cast<UInt32>(replicated_table->getAbsoluteDelay()); status.absolute_delay = static_cast<UInt32>(replicated_table->getAbsoluteDelay());
status.is_readonly = replicated_table->isTableReadOnly();
} }
else else
status.is_replicated = false; status.is_replicated = false;

View File

@ -232,7 +232,8 @@ void DistributedAsyncInsertBatch::sendBatch(const SettingsChanges & settings_cha
insert_settings.applyChanges(settings_changes); insert_settings.applyChanges(settings_changes);
auto timeouts = ConnectionTimeouts::getTCPTimeoutsWithFailover(insert_settings); auto timeouts = ConnectionTimeouts::getTCPTimeoutsWithFailover(insert_settings);
connection = parent.pool->get(timeouts); auto result = parent.pool->getManyCheckedForInsert(timeouts, insert_settings, PoolMode::GET_ONE, parent.storage.remote_storage.getQualifiedName());
connection = std::move(result.front().entry);
compression_expected = connection->getCompression() == Protocol::Compression::Enable; compression_expected = connection->getCompression() == Protocol::Compression::Enable;
LOG_DEBUG(parent.log, "Sending a batch of {} files to {} ({} rows, {} bytes).", LOG_DEBUG(parent.log, "Sending a batch of {} files to {} ({} rows, {} bytes).",
@ -289,7 +290,8 @@ void DistributedAsyncInsertBatch::sendSeparateFiles(const SettingsChanges & sett
parent.storage.getContext()->getOpenTelemetrySpanLog()); parent.storage.getContext()->getOpenTelemetrySpanLog());
auto timeouts = ConnectionTimeouts::getTCPTimeoutsWithFailover(insert_settings); auto timeouts = ConnectionTimeouts::getTCPTimeoutsWithFailover(insert_settings);
auto connection = parent.pool->get(timeouts); auto result = parent.pool->getManyCheckedForInsert(timeouts, insert_settings, PoolMode::GET_ONE, parent.storage.remote_storage.getQualifiedName());
auto connection = std::move(result.front().entry);
bool compression_expected = connection->getCompression() == Protocol::Compression::Enable; bool compression_expected = connection->getCompression() == Protocol::Compression::Enable;
RemoteInserter remote(*connection, timeouts, RemoteInserter remote(*connection, timeouts,

View File

@ -101,7 +101,7 @@ DistributedAsyncInsertDirectoryQueue::DistributedAsyncInsertDirectoryQueue(
StorageDistributed & storage_, StorageDistributed & storage_,
const DiskPtr & disk_, const DiskPtr & disk_,
const std::string & relative_path_, const std::string & relative_path_,
ConnectionPoolPtr pool_, ConnectionPoolWithFailoverPtr pool_,
ActionBlocker & monitor_blocker_, ActionBlocker & monitor_blocker_,
BackgroundSchedulePool & bg_pool) BackgroundSchedulePool & bg_pool)
: storage(storage_) : storage(storage_)
@ -237,7 +237,7 @@ void DistributedAsyncInsertDirectoryQueue::run()
} }
ConnectionPoolPtr DistributedAsyncInsertDirectoryQueue::createPool(const Cluster::Addresses & addresses, const StorageDistributed & storage) ConnectionPoolWithFailoverPtr DistributedAsyncInsertDirectoryQueue::createPool(const Cluster::Addresses & addresses, const StorageDistributed & storage)
{ {
const auto pool_factory = [&storage] (const Cluster::Address & address) -> ConnectionPoolPtr const auto pool_factory = [&storage] (const Cluster::Address & address) -> ConnectionPoolPtr
{ {
@ -284,7 +284,7 @@ ConnectionPoolPtr DistributedAsyncInsertDirectoryQueue::createPool(const Cluster
auto pools = createPoolsForAddresses(addresses, pool_factory, storage.log); auto pools = createPoolsForAddresses(addresses, pool_factory, storage.log);
const auto settings = storage.getContext()->getSettings(); const auto settings = storage.getContext()->getSettings();
return pools.size() == 1 ? pools.front() : std::make_shared<ConnectionPoolWithFailover>(pools, return std::make_shared<ConnectionPoolWithFailover>(std::move(pools),
settings.load_balancing, settings.load_balancing,
settings.distributed_replica_error_half_life.totalSeconds(), settings.distributed_replica_error_half_life.totalSeconds(),
settings.distributed_replica_error_cap); settings.distributed_replica_error_cap);
@ -412,7 +412,9 @@ void DistributedAsyncInsertDirectoryQueue::processFile(std::string & file_path,
insert_settings.applyChanges(settings_changes); insert_settings.applyChanges(settings_changes);
auto timeouts = ConnectionTimeouts::getTCPTimeoutsWithFailover(insert_settings); auto timeouts = ConnectionTimeouts::getTCPTimeoutsWithFailover(insert_settings);
auto connection = pool->get(timeouts, insert_settings); auto result = pool->getManyCheckedForInsert(timeouts, insert_settings, PoolMode::GET_ONE, storage.remote_storage.getQualifiedName());
auto connection = std::move(result.front().entry);
LOG_DEBUG(log, "Sending `{}` to {} ({} rows, {} bytes)", LOG_DEBUG(log, "Sending `{}` to {} ({} rows, {} bytes)",
file_path, file_path,
connection->getDescription(), connection->getDescription(),

View File

@ -50,13 +50,13 @@ public:
StorageDistributed & storage_, StorageDistributed & storage_,
const DiskPtr & disk_, const DiskPtr & disk_,
const std::string & relative_path_, const std::string & relative_path_,
ConnectionPoolPtr pool_, ConnectionPoolWithFailoverPtr pool_,
ActionBlocker & monitor_blocker_, ActionBlocker & monitor_blocker_,
BackgroundSchedulePool & bg_pool); BackgroundSchedulePool & bg_pool);
~DistributedAsyncInsertDirectoryQueue(); ~DistributedAsyncInsertDirectoryQueue();
static ConnectionPoolPtr createPool(const Cluster::Addresses & addresses, const StorageDistributed & storage); static ConnectionPoolWithFailoverPtr createPool(const Cluster::Addresses & addresses, const StorageDistributed & storage);
void updatePath(const std::string & new_relative_path); void updatePath(const std::string & new_relative_path);
@ -111,7 +111,7 @@ private:
std::string getLoggerName() const; std::string getLoggerName() const;
StorageDistributed & storage; StorageDistributed & storage;
const ConnectionPoolPtr pool; const ConnectionPoolWithFailoverPtr pool;
DiskPtr disk; DiskPtr disk;
std::string relative_path; std::string relative_path;

View File

@ -112,19 +112,17 @@ DistributedSink::DistributedSink(
const ClusterPtr & cluster_, const ClusterPtr & cluster_,
bool insert_sync_, bool insert_sync_,
UInt64 insert_timeout_, UInt64 insert_timeout_,
StorageID main_table_,
const Names & columns_to_send_) const Names & columns_to_send_)
: SinkToStorage(metadata_snapshot_->getSampleBlock()) : SinkToStorage(metadata_snapshot_->getSampleBlock())
, context(Context::createCopy(context_)) , context(Context::createCopy(context_))
, storage(storage_) , storage(storage_)
, metadata_snapshot(metadata_snapshot_) , metadata_snapshot(metadata_snapshot_)
, query_ast(createInsertToRemoteTableQuery(main_table_.database_name, main_table_.table_name, columns_to_send_)) , query_ast(createInsertToRemoteTableQuery(storage.remote_storage.database_name, storage.remote_storage.table_name, columns_to_send_))
, query_string(queryToString(query_ast)) , query_string(queryToString(query_ast))
, cluster(cluster_) , cluster(cluster_)
, insert_sync(insert_sync_) , insert_sync(insert_sync_)
, allow_materialized(context->getSettingsRef().insert_allow_materialized_columns) , allow_materialized(context->getSettingsRef().insert_allow_materialized_columns)
, insert_timeout(insert_timeout_) , insert_timeout(insert_timeout_)
, main_table(main_table_)
, columns_to_send(columns_to_send_.begin(), columns_to_send_.end()) , columns_to_send(columns_to_send_.begin(), columns_to_send_.end())
, log(getLogger("DistributedSink")) , log(getLogger("DistributedSink"))
{ {
@ -372,10 +370,9 @@ DistributedSink::runWritingJob(JobReplica & job, const Block & current_block, si
throw Exception(ErrorCodes::LOGICAL_ERROR, "There are several writing job for an automatically replicated shard"); throw Exception(ErrorCodes::LOGICAL_ERROR, "There are several writing job for an automatically replicated shard");
/// TODO: it make sense to rewrite skip_unavailable_shards and max_parallel_replicas here /// TODO: it make sense to rewrite skip_unavailable_shards and max_parallel_replicas here
auto results = shard_info.pool->getManyChecked(timeouts, settings, PoolMode::GET_ONE, main_table.getQualifiedName()); /// NOTE: INSERT will also take into account max_replica_delay_for_distributed_queries
if (results.empty() || results.front().entry.isNull()) /// (anyway fallback_to_stale_replicas_for_distributed_queries=true by default)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected exactly one connection for shard {}", toString(job.shard_index)); auto results = shard_info.pool->getManyCheckedForInsert(timeouts, settings, PoolMode::GET_ONE, storage.remote_storage.getQualifiedName());
job.connection_entry = std::move(results.front().entry); job.connection_entry = std::move(results.front().entry);
} }
else else

View File

@ -46,7 +46,6 @@ public:
const ClusterPtr & cluster_, const ClusterPtr & cluster_,
bool insert_sync_, bool insert_sync_,
UInt64 insert_timeout_, UInt64 insert_timeout_,
StorageID main_table_,
const Names & columns_to_send_); const Names & columns_to_send_);
String getName() const override { return "DistributedSink"; } String getName() const override { return "DistributedSink"; }
@ -108,7 +107,6 @@ private:
/// Sync-related stuff /// Sync-related stuff
UInt64 insert_timeout; // in seconds UInt64 insert_timeout; // in seconds
StorageID main_table;
NameSet columns_to_send; NameSet columns_to_send;
Stopwatch watch; Stopwatch watch;
Stopwatch watch_current_block; Stopwatch watch_current_block;

View File

@ -335,7 +335,9 @@ void DataPartStorageOnDiskBase::backup(
const ReadSettings & read_settings, const ReadSettings & read_settings,
bool make_temporary_hard_links, bool make_temporary_hard_links,
BackupEntries & backup_entries, BackupEntries & backup_entries,
TemporaryFilesOnDisks * temp_dirs) const TemporaryFilesOnDisks * temp_dirs,
bool is_projection_part,
bool allow_backup_broken_projection) const
{ {
fs::path part_path_on_disk = fs::path{root_path} / part_dir; fs::path part_path_on_disk = fs::path{root_path} / part_dir;
fs::path part_path_in_backup = fs::path{path_in_backup} / part_dir; fs::path part_path_in_backup = fs::path{path_in_backup} / part_dir;
@ -377,7 +379,7 @@ void DataPartStorageOnDiskBase::backup(
bool copy_encrypted = !backup_settings.decrypt_files_from_encrypted_disks; bool copy_encrypted = !backup_settings.decrypt_files_from_encrypted_disks;
for (const auto & filepath : files_to_backup) auto backup_file = [&](const String & filepath)
{ {
auto filepath_on_disk = part_path_on_disk / filepath; auto filepath_on_disk = part_path_on_disk / filepath;
auto filepath_in_backup = part_path_in_backup / filepath; auto filepath_in_backup = part_path_in_backup / filepath;
@ -385,8 +387,10 @@ void DataPartStorageOnDiskBase::backup(
if (files_without_checksums.contains(filepath)) if (files_without_checksums.contains(filepath))
{ {
backup_entries.emplace_back(filepath_in_backup, std::make_unique<BackupEntryFromSmallFile>(disk, filepath_on_disk, read_settings, copy_encrypted)); backup_entries.emplace_back(filepath_in_backup, std::make_unique<BackupEntryFromSmallFile>(disk, filepath_on_disk, read_settings, copy_encrypted));
continue; return;
} }
else if (is_projection_part && allow_backup_broken_projection && !disk->exists(filepath_on_disk))
return;
if (make_temporary_hard_links) if (make_temporary_hard_links)
{ {
@ -411,6 +415,31 @@ void DataPartStorageOnDiskBase::backup(
backup_entry = wrapBackupEntryWith(std::move(backup_entry), temp_dir_owner); backup_entry = wrapBackupEntryWith(std::move(backup_entry), temp_dir_owner);
backup_entries.emplace_back(filepath_in_backup, std::move(backup_entry)); backup_entries.emplace_back(filepath_in_backup, std::move(backup_entry));
};
auto * log = &Poco::Logger::get("DataPartStorageOnDiskBase::backup");
for (const auto & filepath : files_to_backup)
{
if (is_projection_part && allow_backup_broken_projection)
{
try
{
backup_file(filepath);
}
catch (Exception & e)
{
if (e.code() != ErrorCodes::FILE_DOESNT_EXIST)
throw;
LOG_ERROR(log, "Cannot backup file {} of projection part {}. Will try to ignore it", filepath, part_dir);
continue;
}
}
else
{
backup_file(filepath);
}
} }
} }

View File

@ -58,7 +58,9 @@ public:
const ReadSettings & read_settings, const ReadSettings & read_settings,
bool make_temporary_hard_links, bool make_temporary_hard_links,
BackupEntries & backup_entries, BackupEntries & backup_entries,
TemporaryFilesOnDisks * temp_dirs) const override; TemporaryFilesOnDisks * temp_dirs,
bool is_projection_part,
bool allow_backup_broken_projection) const override;
MutableDataPartStoragePtr freeze( MutableDataPartStoragePtr freeze(
const std::string & to, const std::string & to,

View File

@ -223,7 +223,9 @@ public:
const ReadSettings & read_settings, const ReadSettings & read_settings,
bool make_temporary_hard_links, bool make_temporary_hard_links,
BackupEntries & backup_entries, BackupEntries & backup_entries,
TemporaryFilesOnDisks * temp_dirs) const = 0; TemporaryFilesOnDisks * temp_dirs,
bool is_projection_part,
bool allow_backup_broken_projection) const = 0;
/// Creates hardlinks into 'to/dir_path' for every file in data part. /// Creates hardlinks into 'to/dir_path' for every file in data part.
/// Callback is called after hardlinks are created, but before 'delete-on-destroy.txt' marker is removed. /// Callback is called after hardlinks are created, but before 'delete-on-destroy.txt' marker is removed.

View File

@ -705,13 +705,14 @@ void IMergeTreeDataPart::loadColumnsChecksumsIndexes(bool require_columns_checks
loadRowsCount(); /// Must be called after loadIndexGranularity() as it uses the value of `index_granularity`. loadRowsCount(); /// Must be called after loadIndexGranularity() as it uses the value of `index_granularity`.
loadExistingRowsCount(); /// Must be called after loadRowsCount() as it uses the value of `rows_count`. loadExistingRowsCount(); /// Must be called after loadRowsCount() as it uses the value of `rows_count`.
loadPartitionAndMinMaxIndex(); loadPartitionAndMinMaxIndex();
bool has_broken_projections = false;
if (!parent_part) if (!parent_part)
{ {
loadTTLInfos(); loadTTLInfos();
loadProjections(require_columns_checksums, check_consistency, false /* if_not_loaded */); loadProjections(require_columns_checksums, check_consistency, has_broken_projections, false /* if_not_loaded */);
} }
if (check_consistency) if (check_consistency && !has_broken_projections)
checkConsistency(require_columns_checksums); checkConsistency(require_columns_checksums);
loadDefaultCompressionCodec(); loadDefaultCompressionCodec();
@ -776,7 +777,7 @@ void IMergeTreeDataPart::addProjectionPart(
projection_parts[projection_name] = std::move(projection_part); projection_parts[projection_name] = std::move(projection_part);
} }
void IMergeTreeDataPart::loadProjections(bool require_columns_checksums, bool check_consistency, bool if_not_loaded) void IMergeTreeDataPart::loadProjections(bool require_columns_checksums, bool check_consistency, bool & has_broken_projection, bool if_not_loaded)
{ {
auto metadata_snapshot = storage.getInMemoryMetadataPtr(); auto metadata_snapshot = storage.getInMemoryMetadataPtr();
for (const auto & projection : metadata_snapshot->projections) for (const auto & projection : metadata_snapshot->projections)
@ -793,10 +794,36 @@ void IMergeTreeDataPart::loadProjections(bool require_columns_checksums, bool ch
else else
{ {
auto part = getProjectionPartBuilder(projection.name).withPartFormatFromDisk().build(); auto part = getProjectionPartBuilder(projection.name).withPartFormatFromDisk().build();
part->loadColumnsChecksumsIndexes(require_columns_checksums, check_consistency);
try
{
part->loadColumnsChecksumsIndexes(require_columns_checksums, check_consistency);
}
catch (...)
{
if (isRetryableException(std::current_exception()))
throw;
auto message = getCurrentExceptionMessage(true);
LOG_ERROR(&Poco::Logger::get("IMergeTreeDataPart"),
"Cannot load projection {}, will consider it broken. Reason: {}", projection.name, message);
has_broken_projection = true;
part->setBrokenReason(message, getCurrentExceptionCode());
}
addProjectionPart(projection.name, std::move(part)); addProjectionPart(projection.name, std::move(part));
} }
} }
else if (check_consistency && checksums.has(path))
{
auto part = getProjectionPartBuilder(projection.name).withPartFormatFromDisk().build();
part->setBrokenReason(
"Projection directory " + path + " does not exist while loading projections. Stacktrace: " + StackTrace().toString(),
ErrorCodes::NO_FILE_IN_DATA_PART);
addProjectionPart(projection.name, std::move(part));
has_broken_projection = true;
}
} }
} }
@ -1216,7 +1243,8 @@ void IMergeTreeDataPart::loadChecksums(bool require)
/// Check the data while we are at it. /// Check the data while we are at it.
LOG_WARNING(storage.log, "Checksums for part {} not found. Will calculate them from data on disk.", name); LOG_WARNING(storage.log, "Checksums for part {} not found. Will calculate them from data on disk.", name);
checksums = checkDataPart(shared_from_this(), false); bool noop;
checksums = checkDataPart(shared_from_this(), false, noop, /* is_cancelled */[]{ return false; }, /* throw_on_broken_projection */false);
writeChecksums(checksums, {}); writeChecksums(checksums, {});
bytes_on_disk = checksums.getTotalSizeOnDisk(); bytes_on_disk = checksums.getTotalSizeOnDisk();
@ -1352,8 +1380,9 @@ void IMergeTreeDataPart::loadExistingRowsCount()
if (existing_rows_count.has_value()) if (existing_rows_count.has_value())
return; return;
if (!rows_count || !storage.getSettings()->load_existing_rows_count_for_old_parts || !supportLightweightDeleteMutate() if (!rows_count || !supportLightweightDeleteMutate() || !hasLightweightDelete()
|| !hasLightweightDelete()) || !storage.getSettings()->exclude_deleted_rows_for_part_size_in_merge
|| !storage.getSettings()->load_existing_rows_count_for_old_parts)
existing_rows_count = rows_count; existing_rows_count = rows_count;
else else
existing_rows_count = readExistingRowsCount(); existing_rows_count = readExistingRowsCount();
@ -2337,6 +2366,32 @@ std::optional<String> IMergeTreeDataPart::getStreamNameForColumn(
return getStreamNameOrHash(stream_name, extension, storage_); return getStreamNameOrHash(stream_name, extension, storage_);
} }
void IMergeTreeDataPart::markProjectionPartAsBroken(const String & projection_name, const String & message, int code) const
{
auto it = projection_parts.find(projection_name);
if (it == projection_parts.end())
throw Exception(ErrorCodes::LOGICAL_ERROR, "There is no projection part '{}'", projection_name);
it->second->setBrokenReason(message, code);
}
bool IMergeTreeDataPart::hasBrokenProjection(const String & projection_name) const
{
auto it = projection_parts.find(projection_name);
if (it == projection_parts.end())
return false;
return it->second->is_broken;
}
void IMergeTreeDataPart::setBrokenReason(const String & message, int code) const
{
std::lock_guard lock(broken_reason_mutex);
if (is_broken)
return;
is_broken = true;
exception = message;
exception_code = code;
}
bool isCompactPart(const MergeTreeDataPartPtr & data_part) bool isCompactPart(const MergeTreeDataPartPtr & data_part)
{ {
return (data_part && data_part->getType() == MergeTreeDataPartType::Compact); return (data_part && data_part->getType() == MergeTreeDataPartType::Compact);

View File

@ -265,6 +265,12 @@ public:
/// Frozen by ALTER TABLE ... FREEZE ... It is used for information purposes in system.parts table. /// Frozen by ALTER TABLE ... FREEZE ... It is used for information purposes in system.parts table.
mutable std::atomic<bool> is_frozen {false}; mutable std::atomic<bool> is_frozen {false};
/// If it is a projection part, it can be broken sometimes.
mutable std::atomic<bool> is_broken {false};
mutable std::string exception;
mutable int exception_code = 0;
mutable std::mutex broken_reason_mutex;
/// Indicates that the part was marked Outdated by PartCheckThread because the part was not committed to ZooKeeper /// Indicates that the part was marked Outdated by PartCheckThread because the part was not committed to ZooKeeper
mutable bool is_unexpected_local_part = false; mutable bool is_unexpected_local_part = false;
@ -428,9 +434,16 @@ public:
void addProjectionPart(const String & projection_name, std::shared_ptr<IMergeTreeDataPart> && projection_part); void addProjectionPart(const String & projection_name, std::shared_ptr<IMergeTreeDataPart> && projection_part);
void markProjectionPartAsBroken(const String & projection_name, const String & message, int code) const;
bool hasProjection(const String & projection_name) const { return projection_parts.contains(projection_name); } bool hasProjection(const String & projection_name) const { return projection_parts.contains(projection_name); }
void loadProjections(bool require_columns_checksums, bool check_consistency, bool if_not_loaded = false); bool hasBrokenProjection(const String & projection_name) const;
/// Return true, if all projections were loaded successfully and none was marked as broken.
void loadProjections(bool require_columns_checksums, bool check_consistency, bool & has_broken_projection, bool if_not_loaded = false);
void setBrokenReason(const String & message, int code) const;
/// Return set of metadata file names without checksums. For example, /// Return set of metadata file names without checksums. For example,
/// columns.txt or checksums.txt itself. /// columns.txt or checksums.txt itself.
@ -593,7 +606,7 @@ protected:
const IMergeTreeDataPart * parent_part; const IMergeTreeDataPart * parent_part;
String parent_part_name; String parent_part_name;
std::map<String, std::shared_ptr<IMergeTreeDataPart>> projection_parts; mutable std::map<String, std::shared_ptr<IMergeTreeDataPart>> projection_parts;
mutable PartMetadataManagerPtr metadata_manager; mutable PartMetadataManagerPtr metadata_manager;
@ -673,7 +686,8 @@ private:
/// For the older format version calculates rows count from the size of a column with a fixed size. /// For the older format version calculates rows count from the size of a column with a fixed size.
void loadRowsCount(); void loadRowsCount();
/// Load existing rows count from _row_exists column if load_existing_rows_count_for_old_parts is true. /// Load existing rows count from _row_exists column
/// if load_existing_rows_count_for_old_parts and exclude_deleted_rows_for_part_size_in_merge are both enabled.
void loadExistingRowsCount(); void loadExistingRowsCount();
static void appendFilesOfRowsCount(Strings & files); static void appendFilesOfRowsCount(Strings & files);

View File

@ -730,8 +730,9 @@ bool MergeTask::MergeProjectionsStage::mergeMinMaxIndexAndPrepareProjections() c
MergeTreeData::DataPartsVector projection_parts; MergeTreeData::DataPartsVector projection_parts;
for (const auto & part : global_ctx->future_part->parts) for (const auto & part : global_ctx->future_part->parts)
{ {
auto it = part->getProjectionParts().find(projection.name); auto actual_projection_parts = part->getProjectionParts();
if (it != part->getProjectionParts().end()) auto it = actual_projection_parts.find(projection.name);
if (it != actual_projection_parts.end() && !it->second->is_broken)
projection_parts.push_back(it->second); projection_parts.push_back(it->second);
} }
if (projection_parts.size() < global_ctx->future_part->parts.size()) if (projection_parts.size() < global_ctx->future_part->parts.size())

View File

@ -5302,7 +5302,7 @@ MergeTreeData::PartsBackupEntries MergeTreeData::backupParts(
if (hold_table_lock && !table_lock) if (hold_table_lock && !table_lock)
table_lock = lockForShare(local_context->getCurrentQueryId(), local_context->getSettingsRef().lock_acquire_timeout); table_lock = lockForShare(local_context->getCurrentQueryId(), local_context->getSettingsRef().lock_acquire_timeout);
if (backup_settings.check_parts) if (backup_settings.check_projection_parts)
part->checkConsistencyWithProjections(/* require_part_metadata= */ true); part->checkConsistencyWithProjections(/* require_part_metadata= */ true);
BackupEntries backup_entries_from_part; BackupEntries backup_entries_from_part;
@ -5314,7 +5314,8 @@ MergeTreeData::PartsBackupEntries MergeTreeData::backupParts(
read_settings, read_settings,
make_temporary_hard_links, make_temporary_hard_links,
backup_entries_from_part, backup_entries_from_part,
&temp_dirs); &temp_dirs,
false, false);
auto projection_parts = part->getProjectionParts(); auto projection_parts = part->getProjectionParts();
for (const auto & [projection_name, projection_part] : projection_parts) for (const auto & [projection_name, projection_part] : projection_parts)
@ -5327,7 +5328,9 @@ MergeTreeData::PartsBackupEntries MergeTreeData::backupParts(
read_settings, read_settings,
make_temporary_hard_links, make_temporary_hard_links,
backup_entries_from_part, backup_entries_from_part,
&temp_dirs); &temp_dirs,
projection_part->is_broken,
backup_settings.allow_backup_broken_projections);
} }
if (hold_storage_and_part_ptrs) if (hold_storage_and_part_ptrs)
@ -7786,21 +7789,39 @@ MovePartsOutcome MergeTreeData::moveParts(const CurrentlyMovingPartsTaggerPtr &
bool MergeTreeData::partsContainSameProjections(const DataPartPtr & left, const DataPartPtr & right, String & out_reason) bool MergeTreeData::partsContainSameProjections(const DataPartPtr & left, const DataPartPtr & right, String & out_reason)
{ {
if (left->getProjectionParts().size() != right->getProjectionParts().size()) auto remove_broken_parts_from_consideration = [](auto & parts)
{
std::set<String> broken_projection_parts;
for (const auto & [name, part] : parts)
{
if (part->is_broken)
broken_projection_parts.emplace(name);
}
for (const auto & name : broken_projection_parts)
parts.erase(name);
};
auto left_projection_parts = left->getProjectionParts();
auto right_projection_parts = right->getProjectionParts();
remove_broken_parts_from_consideration(left_projection_parts);
remove_broken_parts_from_consideration(right_projection_parts);
if (left_projection_parts.size() != right_projection_parts.size())
{ {
out_reason = fmt::format( out_reason = fmt::format(
"Parts have different number of projections: {} in part '{}' and {} in part '{}'", "Parts have different number of projections: {} in part '{}' and {} in part '{}'",
left->getProjectionParts().size(), left_projection_parts.size(),
left->name, left->name,
right->getProjectionParts().size(), right_projection_parts.size(),
right->name right->name
); );
return false; return false;
} }
for (const auto & [name, _] : left->getProjectionParts()) for (const auto & [name, _] : left_projection_parts)
{ {
if (!right->hasProjection(name)) if (!right_projection_parts.contains(name))
{ {
out_reason = fmt::format( out_reason = fmt::format(
"The part '{}' doesn't have projection '{}' while part '{}' does", right->name, name, left->name "The part '{}' doesn't have projection '{}' while part '{}' does", right->name, name, left->name

View File

@ -464,8 +464,13 @@ public:
struct ProjectionPartsVector struct ProjectionPartsVector
{ {
DataPartsVector projection_parts;
DataPartsVector data_parts; DataPartsVector data_parts;
DataPartsVector projection_parts;
DataPartStateVector projection_parts_states;
DataPartsVector broken_projection_parts;
DataPartStateVector broken_projection_parts_states;
}; };
/// Returns a copy of the list so that the caller shouldn't worry about locks. /// Returns a copy of the list so that the caller shouldn't worry about locks.
@ -480,7 +485,7 @@ public:
const DataPartStates & affordable_states, DataPartStateVector * out_states = nullptr) const; const DataPartStates & affordable_states, DataPartStateVector * out_states = nullptr) const;
/// Same as above but only returns projection parts /// Same as above but only returns projection parts
ProjectionPartsVector getProjectionPartsVectorForInternalUsage( ProjectionPartsVector getProjectionPartsVectorForInternalUsage(
const DataPartStates & affordable_states, DataPartStateVector * out_states = nullptr) const; const DataPartStates & affordable_states, MergeTreeData::DataPartStateVector * out_states) const;
/// Returns absolutely all parts (and snapshot of their states) /// Returns absolutely all parts (and snapshot of their states)

View File

@ -54,6 +54,8 @@ struct MergeTreeDataPartChecksums
bool has(const String & file_name) const { return files.find(file_name) != files.end(); } bool has(const String & file_name) const { return files.find(file_name) != files.end(); }
bool remove(const String & file_name) { return files.erase(file_name); }
bool empty() const { return files.empty(); } bool empty() const { return files.empty(); }
/// Checks that the set of columns and their checksums are the same. If not, throws an exception. /// Checks that the set of columns and their checksums are the same. If not, throws an exception.

View File

@ -592,7 +592,9 @@ static std::set<ProjectionDescriptionRawPtr> getProjectionsToRecalculate(
{ {
bool need_recalculate = bool need_recalculate =
materialized_projections.contains(projection.name) materialized_projections.contains(projection.name)
|| (!is_full_part_storage && source_part->hasProjection(projection.name)); || (!is_full_part_storage
&& source_part->hasProjection(projection.name)
&& !source_part->hasBrokenProjection(projection.name));
if (need_recalculate) if (need_recalculate)
projections_to_recalc.insert(&projection); projections_to_recalc.insert(&projection);
@ -936,7 +938,8 @@ void finalizeMutatedPart(
new_data_part->modification_time = time(nullptr); new_data_part->modification_time = time(nullptr);
/// Load rest projections which are hardlinked /// Load rest projections which are hardlinked
new_data_part->loadProjections(false, false, true /* if_not_loaded */); bool noop;
new_data_part->loadProjections(false, false, noop, true /* if_not_loaded */);
/// All information about sizes is stored in checksums. /// All information about sizes is stored in checksums.
/// It doesn't make sense to touch filesystem for sizes. /// It doesn't make sense to touch filesystem for sizes.
@ -1534,7 +1537,9 @@ private:
bool need_recalculate = bool need_recalculate =
ctx->materialized_projections.contains(projection.name) ctx->materialized_projections.contains(projection.name)
|| (!is_full_part_storage && ctx->source_part->hasProjection(projection.name)); || (!is_full_part_storage
&& ctx->source_part->hasProjection(projection.name)
&& !ctx->source_part->hasBrokenProjection(projection.name));
if (need_recalculate) if (need_recalculate)
{ {
@ -1671,8 +1676,9 @@ private:
void finalize() void finalize()
{ {
bool noop;
ctx->new_data_part->minmax_idx = std::move(ctx->minmax_idx); ctx->new_data_part->minmax_idx = std::move(ctx->minmax_idx);
ctx->new_data_part->loadProjections(false, false, true /* if_not_loaded */); ctx->new_data_part->loadProjections(false, false, noop, true /* if_not_loaded */);
ctx->mutating_executor.reset(); ctx->mutating_executor.reset();
ctx->mutating_pipeline.reset(); ctx->mutating_pipeline.reset();

View File

@ -93,6 +93,7 @@ struct ReplicatedMergeTreeLogEntryData
MergeTreeDataPartFormat new_part_format; MergeTreeDataPartFormat new_part_format;
String block_id; /// For parts of level zero, the block identifier for deduplication (node name in /blocks/). String block_id; /// For parts of level zero, the block identifier for deduplication (node name in /blocks/).
mutable String actual_new_part_name; /// GET_PART could actually fetch a part covering 'new_part_name'. mutable String actual_new_part_name; /// GET_PART could actually fetch a part covering 'new_part_name'.
mutable std::unordered_set<String> replace_range_actual_new_part_names; /// Same as above, but for REPLACE_RANGE
UUID new_part_uuid = UUIDHelpers::Nil; UUID new_part_uuid = UUIDHelpers::Nil;
Strings source_parts; Strings source_parts;

View File

@ -63,7 +63,7 @@ void ReplicatedMergeTreePartCheckThread::enqueuePart(const String & name, time_t
if (parts_set.contains(name)) if (parts_set.contains(name))
return; return;
LOG_TRACE(log, "Enqueueing {} for check after after {}s", name, delay_to_check_seconds); LOG_TRACE(log, "Enqueueing {} for check after {}s", name, delay_to_check_seconds);
parts_queue.emplace_back(name, std::chrono::steady_clock::now() + std::chrono::seconds(delay_to_check_seconds)); parts_queue.emplace_back(name, std::chrono::steady_clock::now() + std::chrono::seconds(delay_to_check_seconds));
parts_set.insert(name); parts_set.insert(name);
task->schedule(); task->schedule();
@ -274,7 +274,7 @@ std::pair<bool, MergeTreeDataPartPtr> ReplicatedMergeTreePartCheckThread::findLo
return std::make_pair(exists_in_zookeeper, part); return std::make_pair(exists_in_zookeeper, part);
} }
ReplicatedCheckResult ReplicatedMergeTreePartCheckThread::checkPartImpl(const String & part_name) ReplicatedCheckResult ReplicatedMergeTreePartCheckThread::checkPartImpl(const String & part_name, bool throw_on_broken_projection)
{ {
ReplicatedCheckResult result; ReplicatedCheckResult result;
auto [exists_in_zookeeper, part] = findLocalPart(part_name); auto [exists_in_zookeeper, part] = findLocalPart(part_name);
@ -341,6 +341,7 @@ ReplicatedCheckResult ReplicatedMergeTreePartCheckThread::checkPartImpl(const St
/// before the ReplicatedMergeTreePartHeader was introduced. /// before the ReplicatedMergeTreePartHeader was introduced.
String part_path = storage.replica_path + "/parts/" + part_name; String part_path = storage.replica_path + "/parts/" + part_name;
String part_znode = zookeeper->get(part_path); String part_znode = zookeeper->get(part_path);
bool is_broken_projection = false;
try try
{ {
@ -362,8 +363,10 @@ ReplicatedCheckResult ReplicatedMergeTreePartCheckThread::checkPartImpl(const St
checkDataPart( checkDataPart(
part, part,
true, /* require_checksums */true,
[this] { return need_stop.load(); }); is_broken_projection,
[this] { return need_stop.load(); },
throw_on_broken_projection);
if (need_stop) if (need_stop)
{ {
@ -382,14 +385,27 @@ ReplicatedCheckResult ReplicatedMergeTreePartCheckThread::checkPartImpl(const St
if (isRetryableException(std::current_exception())) if (isRetryableException(std::current_exception()))
throw; throw;
tryLogCurrentException(log, __PRETTY_FUNCTION__); PreformattedMessage message;
if (is_broken_projection)
{
WriteBufferFromOwnString wb;
message = PreformattedMessage::create(
"Part {} has a broken projections. It will be ignored. Broken projections info: {}",
part_name, getCurrentExceptionMessage(false));
LOG_DEBUG(log, message);
result.action = ReplicatedCheckResult::DoNothing;
}
else
{
tryLogCurrentException(log, __PRETTY_FUNCTION__);
auto message = PreformattedMessage::create("Part {} looks broken. Removing it and will try to fetch.", part_name); message = PreformattedMessage::create("Part {} looks broken. Removing it and will try to fetch.", part_name);
LOG_ERROR(log, message); LOG_ERROR(log, message);
result.action = ReplicatedCheckResult::TryFetchMissing;
}
/// Part is broken, let's try to find it and fetch. /// Part is broken, let's try to find it and fetch.
result.status = {part_name, false, message}; result.status = {part_name, false, message};
result.action = ReplicatedCheckResult::TryFetchMissing;
return result; return result;
} }
@ -419,12 +435,12 @@ ReplicatedCheckResult ReplicatedMergeTreePartCheckThread::checkPartImpl(const St
} }
CheckResult ReplicatedMergeTreePartCheckThread::checkPartAndFix(const String & part_name, std::optional<time_t> * recheck_after) CheckResult ReplicatedMergeTreePartCheckThread::checkPartAndFix(const String & part_name, std::optional<time_t> * recheck_after, bool throw_on_broken_projection)
{ {
LOG_INFO(log, "Checking part {}", part_name); LOG_INFO(log, "Checking part {}", part_name);
ProfileEvents::increment(ProfileEvents::ReplicatedPartChecks); ProfileEvents::increment(ProfileEvents::ReplicatedPartChecks);
ReplicatedCheckResult result = checkPartImpl(part_name); ReplicatedCheckResult result = checkPartImpl(part_name, throw_on_broken_projection);
switch (result.action) switch (result.action)
{ {
case ReplicatedCheckResult::None: UNREACHABLE(); case ReplicatedCheckResult::None: UNREACHABLE();
@ -577,7 +593,7 @@ void ReplicatedMergeTreePartCheckThread::run()
} }
std::optional<time_t> recheck_after; std::optional<time_t> recheck_after;
checkPartAndFix(selected->name, &recheck_after); checkPartAndFix(selected->name, &recheck_after, /* throw_on_broken_projection */false);
if (need_stop) if (need_stop)
return; return;

View File

@ -65,9 +65,9 @@ public:
size_t size() const; size_t size() const;
/// Check part by name /// Check part by name
CheckResult checkPartAndFix(const String & part_name, std::optional<time_t> * recheck_after = nullptr); CheckResult checkPartAndFix(const String & part_name, std::optional<time_t> * recheck_after = nullptr, bool throw_on_broken_projection = true);
ReplicatedCheckResult checkPartImpl(const String & part_name); ReplicatedCheckResult checkPartImpl(const String & part_name, bool throw_on_broken_projection);
std::unique_lock<std::mutex> pausePartsCheck(); std::unique_lock<std::mutex> pausePartsCheck();

View File

@ -342,6 +342,11 @@ void ReplicatedMergeTreeQueue::updateStateOnQueueEntryRemoval(
/// NOTE actual_new_part_name is very confusing and error-prone. This approach must be fixed. /// NOTE actual_new_part_name is very confusing and error-prone. This approach must be fixed.
removeCoveredPartsFromMutations(entry->actual_new_part_name, /*remove_part = */ false, /*remove_covered_parts = */ true); removeCoveredPartsFromMutations(entry->actual_new_part_name, /*remove_part = */ false, /*remove_covered_parts = */ true);
} }
for (const auto & actual_part : entry->replace_range_actual_new_part_names)
{
LOG_TEST(log, "Entry {} has actual new part name {}, removing it from mutations", entry->znode_name, actual_part);
removeCoveredPartsFromMutations(actual_part, /*remove_part = */ false, /*remove_covered_parts = */ true);
}
LOG_TEST(log, "Adding parts [{}] to current parts", fmt::join(entry_virtual_parts, ", ")); LOG_TEST(log, "Adding parts [{}] to current parts", fmt::join(entry_virtual_parts, ", "));
@ -1180,9 +1185,9 @@ bool ReplicatedMergeTreeQueue::isCoveredByFuturePartsImpl(const LogEntry & entry
if (entry_for_same_part_it != future_parts.end()) if (entry_for_same_part_it != future_parts.end())
{ {
const LogEntry & another_entry = *entry_for_same_part_it->second; const LogEntry & another_entry = *entry_for_same_part_it->second;
constexpr auto fmt_string = "Not executing log entry {} of type {} for part {} " constexpr auto fmt_string = "Not executing log entry {} of type {} for part {} (actual part {})"
"because another log entry {} of type {} for the same part ({}) is being processed."; "because another log entry {} of type {} for the same part ({}) is being processed.";
LOG_INFO(LogToStr(out_reason, log), fmt_string, entry.znode_name, entry.type, entry.new_part_name, LOG_INFO(LogToStr(out_reason, log), fmt_string, entry.znode_name, entry.type, entry.new_part_name, new_part_name,
another_entry.znode_name, another_entry.type, another_entry.new_part_name); another_entry.znode_name, another_entry.type, another_entry.new_part_name);
return true; return true;
@ -1198,6 +1203,7 @@ bool ReplicatedMergeTreeQueue::isCoveredByFuturePartsImpl(const LogEntry & entry
auto result_part = MergeTreePartInfo::fromPartName(new_part_name, format_version); auto result_part = MergeTreePartInfo::fromPartName(new_part_name, format_version);
/// It can slow down when the size of `future_parts` is large. But it can not be large, since background pool is limited. /// It can slow down when the size of `future_parts` is large. But it can not be large, since background pool is limited.
/// (well, it can actually, thanks to REPLACE_RANGE, but it's a rare case)
for (const auto & future_part_elem : future_parts) for (const auto & future_part_elem : future_parts)
{ {
auto future_part = MergeTreePartInfo::fromPartName(future_part_elem.first, format_version); auto future_part = MergeTreePartInfo::fromPartName(future_part_elem.first, format_version);
@ -1608,26 +1614,39 @@ void ReplicatedMergeTreeQueue::CurrentlyExecuting::setActualPartName(
std::unique_lock<std::mutex> & state_lock, std::unique_lock<std::mutex> & state_lock,
std::vector<LogEntryPtr> & covered_entries_to_wait) std::vector<LogEntryPtr> & covered_entries_to_wait)
{ {
if (!entry.actual_new_part_name.empty()) if (actual_part_name.empty())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Entry actual part isn't empty yet. This is a bug."); throw Exception(ErrorCodes::LOGICAL_ERROR, "Actual part name is empty");
entry.actual_new_part_name = actual_part_name; if (!entry.actual_new_part_name.empty())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Entry {} actual part isn't empty yet: '{}'. This is a bug.",
entry.znode_name, entry.actual_new_part_name);
auto actual_part_info = MergeTreePartInfo::fromPartName(actual_part_name, queue.format_version);
for (const auto & other_part_name : entry.replace_range_actual_new_part_names)
if (!MergeTreePartInfo::fromPartName(other_part_name, queue.format_version).isDisjoint(actual_part_info))
throw Exception(ErrorCodes::LOGICAL_ERROR, "Entry {} already has actual part {} non-disjoint with {}. This is a bug.",
entry.actual_new_part_name, other_part_name, actual_part_name);
/// Check if it is the same (and already added) part. /// Check if it is the same (and already added) part.
if (entry.actual_new_part_name == entry.new_part_name) if (actual_part_name == entry.new_part_name)
return; return;
if (!queue.future_parts.emplace(entry.actual_new_part_name, entry.shared_from_this()).second) if (!queue.future_parts.emplace(actual_part_name, entry.shared_from_this()).second)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Attaching already existing future part {}. This is a bug. " throw Exception(ErrorCodes::LOGICAL_ERROR, "Attaching already existing future part {}. This is a bug. "
"It happened on attempt to execute {}: {}", "It happened on attempt to execute {}: {}",
entry.actual_new_part_name, entry.znode_name, entry.toString()); actual_part_name, entry.znode_name, entry.toString());
if (entry.type == LogEntry::REPLACE_RANGE)
entry.replace_range_actual_new_part_names.insert(actual_part_name);
else
entry.actual_new_part_name = actual_part_name;
for (LogEntryPtr & covered_entry : covered_entries_to_wait) for (LogEntryPtr & covered_entry : covered_entries_to_wait)
{ {
if (&entry == covered_entry.get()) if (&entry == covered_entry.get())
continue; continue;
LOG_TRACE(queue.log, "Waiting for {} producing {} to finish before executing {} producing not disjoint part {}", LOG_TRACE(queue.log, "Waiting for {} producing {} to finish before executing {} producing not disjoint part {} (actual part {})",
covered_entry->znode_name, covered_entry->new_part_name, entry.znode_name, entry.new_part_name); covered_entry->znode_name, covered_entry->new_part_name, entry.znode_name, entry.new_part_name, actual_part_name);
covered_entry->execution_complete.wait(state_lock, [&covered_entry] { return !covered_entry->currently_executing; }); covered_entry->execution_complete.wait(state_lock, [&covered_entry] { return !covered_entry->currently_executing; });
} }
} }
@ -1646,25 +1665,27 @@ ReplicatedMergeTreeQueue::CurrentlyExecuting::~CurrentlyExecuting()
entry->currently_executing = false; entry->currently_executing = false;
entry->execution_complete.notify_all(); entry->execution_complete.notify_all();
for (const String & new_part_name : entry->getVirtualPartNames(queue.format_version)) auto erase_and_check = [this](const String & part_name)
{ {
if (!queue.future_parts.erase(new_part_name)) if (!queue.future_parts.erase(part_name))
{ {
LOG_ERROR(queue.log, "Untagging already untagged future part {}. This is a bug.", new_part_name); LOG_ERROR(queue.log, "Untagging already untagged future part {}. This is a bug.", part_name);
assert(false); assert(false);
} }
} };
for (const String & new_part_name : entry->getVirtualPartNames(queue.format_version))
erase_and_check(new_part_name);
if (!entry->actual_new_part_name.empty()) if (!entry->actual_new_part_name.empty())
{ erase_and_check(entry->actual_new_part_name);
if (entry->actual_new_part_name != entry->new_part_name && !queue.future_parts.erase(entry->actual_new_part_name))
{
LOG_ERROR(queue.log, "Untagging already untagged future part {}. This is a bug.", entry->actual_new_part_name);
assert(false);
}
entry->actual_new_part_name.clear(); entry->actual_new_part_name.clear();
}
for (const auto & actual_part : entry->replace_range_actual_new_part_names)
erase_and_check(actual_part);
entry->replace_range_actual_new_part_names.clear();
} }

View File

@ -42,6 +42,7 @@ namespace ErrorCodes
extern const int NO_FILE_IN_DATA_PART; extern const int NO_FILE_IN_DATA_PART;
extern const int NETWORK_ERROR; extern const int NETWORK_ERROR;
extern const int SOCKET_TIMEOUT; extern const int SOCKET_TIMEOUT;
extern const int BROKEN_PROJECTION;
} }
@ -116,7 +117,9 @@ static IMergeTreeDataPart::Checksums checkDataPart(
const NameSet & files_without_checksums, const NameSet & files_without_checksums,
const ReadSettings & read_settings, const ReadSettings & read_settings,
bool require_checksums, bool require_checksums,
std::function<bool()> is_cancelled) std::function<bool()> is_cancelled,
bool & is_broken_projection,
bool throw_on_broken_projection)
{ {
/** Responsibility: /** Responsibility:
* - read list of columns from columns.txt; * - read list of columns from columns.txt;
@ -125,6 +128,7 @@ static IMergeTreeDataPart::Checksums checkDataPart(
*/ */
CurrentMetrics::Increment metric_increment{CurrentMetrics::ReplicatedChecks}; CurrentMetrics::Increment metric_increment{CurrentMetrics::ReplicatedChecks};
Poco::Logger * log = &Poco::Logger::get("checkDataPart");
NamesAndTypesList columns_txt; NamesAndTypesList columns_txt;
@ -274,17 +278,55 @@ static IMergeTreeDataPart::Checksums checkDataPart(
} }
} }
std::string broken_projections_message;
for (const auto & [name, projection] : data_part->getProjectionParts()) for (const auto & [name, projection] : data_part->getProjectionParts())
{ {
if (is_cancelled()) if (is_cancelled())
return {}; return {};
auto projection_file = name + ".proj"; auto projection_file = name + ".proj";
auto projection_checksums = checkDataPart( if (!throw_on_broken_projection && projection->is_broken)
projection, *data_part_storage.getProjection(projection_file), {
projection->getColumns(), projection->getType(), projections_on_disk.erase(projection_file);
projection->getFileNamesWithoutChecksums(), checksums_txt.remove(projection_file);
read_settings, require_checksums, is_cancelled); }
IMergeTreeDataPart::Checksums projection_checksums;
try
{
bool noop;
projection_checksums = checkDataPart(
projection, *data_part_storage.getProjection(projection_file),
projection->getColumns(), projection->getType(),
projection->getFileNamesWithoutChecksums(),
read_settings, require_checksums, is_cancelled, noop, /* throw_on_broken_projection */false);
}
catch (...)
{
if (isRetryableException(std::current_exception()))
throw;
if (!projection->is_broken)
{
LOG_TEST(log, "Marking projection {} as broken ({})", name, projection_file);
projection->setBrokenReason(getCurrentExceptionMessage(false), getCurrentExceptionCode());
}
is_broken_projection = true;
if (throw_on_broken_projection)
{
if (!broken_projections_message.empty())
broken_projections_message += "\n";
broken_projections_message += fmt::format(
"Part {} has a broken projection {} (error: {})",
data_part->name, name, getCurrentExceptionMessage(false));
continue;
}
projections_on_disk.erase(projection_file);
checksums_txt.remove(projection_file);
}
checksums_data.files[projection_file] = IMergeTreeDataPart::Checksums::Checksum( checksums_data.files[projection_file] = IMergeTreeDataPart::Checksums::Checksum(
projection_checksums.getTotalSizeOnDisk(), projection_checksums.getTotalSizeOnDisk(),
@ -293,6 +335,11 @@ static IMergeTreeDataPart::Checksums checkDataPart(
projections_on_disk.erase(projection_file); projections_on_disk.erase(projection_file);
} }
if (throw_on_broken_projection && !broken_projections_message.empty())
{
throw Exception(ErrorCodes::BROKEN_PROJECTION, "{}", broken_projections_message);
}
if (require_checksums && !projections_on_disk.empty()) if (require_checksums && !projections_on_disk.empty())
{ {
throw Exception(ErrorCodes::UNEXPECTED_FILE_IN_DATA_PART, throw Exception(ErrorCodes::UNEXPECTED_FILE_IN_DATA_PART,
@ -312,7 +359,9 @@ static IMergeTreeDataPart::Checksums checkDataPart(
IMergeTreeDataPart::Checksums checkDataPart( IMergeTreeDataPart::Checksums checkDataPart(
MergeTreeData::DataPartPtr data_part, MergeTreeData::DataPartPtr data_part,
bool require_checksums, bool require_checksums,
std::function<bool()> is_cancelled) bool & is_broken_projection,
std::function<bool()> is_cancelled,
bool throw_on_broken_projection)
{ {
/// If check of part has failed and it is stored on disk with cache /// If check of part has failed and it is stored on disk with cache
/// try to drop cache and check it once again because maybe the cache /// try to drop cache and check it once again because maybe the cache
@ -351,7 +400,9 @@ IMergeTreeDataPart::Checksums checkDataPart(
data_part->getFileNamesWithoutChecksums(), data_part->getFileNamesWithoutChecksums(),
read_settings, read_settings,
require_checksums, require_checksums,
is_cancelled); is_cancelled,
is_broken_projection,
throw_on_broken_projection);
}; };
try try
@ -365,7 +416,9 @@ IMergeTreeDataPart::Checksums checkDataPart(
data_part->getFileNamesWithoutChecksums(), data_part->getFileNamesWithoutChecksums(),
read_settings, read_settings,
require_checksums, require_checksums,
is_cancelled); is_cancelled,
is_broken_projection,
throw_on_broken_projection);
} }
catch (...) catch (...)
{ {

View File

@ -10,7 +10,9 @@ namespace DB
IMergeTreeDataPart::Checksums checkDataPart( IMergeTreeDataPart::Checksums checkDataPart(
MergeTreeData::DataPartPtr data_part, MergeTreeData::DataPartPtr data_part,
bool require_checksums, bool require_checksums,
std::function<bool()> is_cancelled = []{ return false; }); bool & is_broken_projection,
std::function<bool()> is_cancelled = []{ return false; },
bool throw_on_broken_projection = false);
bool isNotEnoughMemoryErrorCode(int code); bool isNotEnoughMemoryErrorCode(int code);
bool isRetryableException(std::exception_ptr exception_ptr); bool isRetryableException(std::exception_ptr exception_ptr);

View File

@ -334,6 +334,7 @@ StorageDistributed::StorageDistributed(
, remote_database(remote_database_) , remote_database(remote_database_)
, remote_table(remote_table_) , remote_table(remote_table_)
, remote_table_function_ptr(remote_table_function_ptr_) , remote_table_function_ptr(remote_table_function_ptr_)
, remote_storage(remote_table_function_ptr ? StorageID::createEmpty() : StorageID{remote_database, remote_table})
, log(getLogger("StorageDistributed (" + id_.table_name + ")")) , log(getLogger("StorageDistributed (" + id_.table_name + ")"))
, owned_cluster(std::move(owned_cluster_)) , owned_cluster(std::move(owned_cluster_))
, cluster_name(getContext()->getMacros()->expand(cluster_name_)) , cluster_name(getContext()->getMacros()->expand(cluster_name_))
@ -896,10 +897,6 @@ void StorageDistributed::read(
return; return;
} }
StorageID main_table = StorageID::createEmpty();
if (!remote_table_function_ptr)
main_table = StorageID{remote_database, remote_table};
const auto & snapshot_data = assert_cast<const SnapshotData &>(*storage_snapshot->data); const auto & snapshot_data = assert_cast<const SnapshotData &>(*storage_snapshot->data);
ClusterProxy::SelectStreamFactory select_stream_factory = ClusterProxy::SelectStreamFactory select_stream_factory =
ClusterProxy::SelectStreamFactory( ClusterProxy::SelectStreamFactory(
@ -932,7 +929,7 @@ void StorageDistributed::read(
query_plan, query_plan,
header, header,
processed_stage, processed_stage,
main_table, remote_storage,
remote_table_function_ptr, remote_table_function_ptr,
select_stream_factory, select_stream_factory,
log, log,
@ -978,10 +975,8 @@ SinkToStoragePtr StorageDistributed::write(const ASTPtr &, const StorageMetadata
else else
columns_to_send = metadata_snapshot->getSampleBlockNonMaterialized().getNames(); columns_to_send = metadata_snapshot->getSampleBlockNonMaterialized().getNames();
/// DistributedSink will not own cluster /// DistributedSink will not own cluster, but will own ConnectionPools of the cluster
return std::make_shared<DistributedSink>( return std::make_shared<DistributedSink>(local_context, *this, metadata_snapshot, cluster, insert_sync, timeout, columns_to_send);
local_context, *this, metadata_snapshot, cluster, insert_sync, timeout,
StorageID{remote_database, remote_table}, columns_to_send);
} }

View File

@ -241,6 +241,7 @@ private:
String remote_database; String remote_database;
String remote_table; String remote_table;
ASTPtr remote_table_function_ptr; ASTPtr remote_table_function_ptr;
StorageID remote_storage;
LoggerPtr log; LoggerPtr log;
@ -275,7 +276,7 @@ private:
struct ClusterNodeData struct ClusterNodeData
{ {
std::shared_ptr<DistributedAsyncInsertDirectoryQueue> directory_queue; std::shared_ptr<DistributedAsyncInsertDirectoryQueue> directory_queue;
ConnectionPoolPtr connection_pool; ConnectionPoolWithFailoverPtr connection_pool;
Cluster::Addresses addresses; Cluster::Addresses addresses;
size_t clusters_version; size_t clusters_version;
}; };

View File

@ -94,6 +94,8 @@ void StorageInMemoryMetadata::setSQLSecurity(const ASTSQLSecurity & sql_security
{ {
if (sql_security.definer) if (sql_security.definer)
definer = sql_security.definer->toString(); definer = sql_security.definer->toString();
else
definer = std::nullopt;
sql_security_type = sql_security.type; sql_security_type = sql_security.type;
} }

Some files were not shown because too many files have changed in this diff Show More