mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-10 01:25:21 +00:00
Merge branch 'master' into ci-fuzzer-enable
This commit is contained in:
commit
f12e4cfc64
11
.github/workflows/pull_request.yml
vendored
11
.github/workflows/pull_request.yml
vendored
@ -6,6 +6,7 @@ env:
|
||||
PYTHONUNBUFFERED: 1
|
||||
|
||||
on: # yamllint disable-line rule:truthy
|
||||
merge_group:
|
||||
pull_request:
|
||||
types:
|
||||
- synchronize
|
||||
@ -29,6 +30,7 @@ jobs:
|
||||
fetch-depth: 0 # to get version
|
||||
filter: tree:0
|
||||
- name: Labels check
|
||||
if: ${{ github.event_name != 'merge_group' }}
|
||||
run: |
|
||||
cd "$GITHUB_WORKSPACE/tests/ci"
|
||||
python3 run_check.py
|
||||
@ -56,16 +58,9 @@ jobs:
|
||||
echo 'EOF'
|
||||
} >> "$GITHUB_OUTPUT"
|
||||
- name: Re-create GH statuses for skipped jobs if any
|
||||
if: ${{ github.event_name != 'merge_group' }}
|
||||
run: |
|
||||
python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --infile ${{ runner.temp }}/ci_run_data.json --update-gh-statuses
|
||||
- name: Style check early
|
||||
# hack to run style check before the docker build job if possible (style-check image not changed)
|
||||
if: contains(fromJson(steps.runconfig.outputs.CI_DATA).jobs_data.jobs_to_do, 'Style check early')
|
||||
run: |
|
||||
DOCKER_TAG=$(echo '${{ toJson(fromJson(steps.runconfig.outputs.CI_DATA).docker_data.images) }}' | tr -d '\n')
|
||||
export DOCKER_TAG=$DOCKER_TAG
|
||||
python3 ./tests/ci/style_check.py --no-push
|
||||
python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --infile ${{ runner.temp }}/ci_run_data.json --post --job-name 'Style check'
|
||||
BuildDockers:
|
||||
needs: [RunConfig]
|
||||
if: ${{ !failure() && !cancelled() && toJson(fromJson(needs.RunConfig.outputs.data).docker_data.missing_multi) != '[]' }}
|
||||
|
174
CHANGELOG.md
174
CHANGELOG.md
@ -1,10 +1,184 @@
|
||||
### Table of Contents
|
||||
**[ClickHouse release v24.3 LTS, 2024-03-26](#243)**<br/>
|
||||
**[ClickHouse release v24.2, 2024-02-29](#242)**<br/>
|
||||
**[ClickHouse release v24.1, 2024-01-30](#241)**<br/>
|
||||
**[Changelog for 2023](https://clickhouse.com/docs/en/whats-new/changelog/2023/)**<br/>
|
||||
|
||||
# 2024 Changelog
|
||||
|
||||
### <a id="243"></a> ClickHouse release 24.3 LTS, 2024-03-26
|
||||
|
||||
#### Upgrade Notes
|
||||
* The setting `allow_experimental_analyzer` is enabled by default and it switches the query analysis to a new implementation, which has better compatibility and feature completeness. The feature "analyzer" is considered beta instead of experimental. You can turn the old behavior by setting the `compatibility` to `24.2` or disabling the `allow_experimental_analyzer` setting. Watch the [video on YouTube](https://www.youtube.com/watch?v=zhrOYQpgvkk).
|
||||
* ClickHouse allows arbitrary binary data in the String data type, which is typically UTF-8. Parquet/ORC/Arrow Strings only support UTF-8. That's why you can choose which Arrow's data type to use for the ClickHouse String data type - String or Binary. This is controlled by the settings, `output_format_parquet_string_as_string`, `output_format_orc_string_as_string`, `output_format_arrow_string_as_string`. While Binary would be more correct and compatible, using String by default will correspond to user expectations in most cases. Parquet/ORC/Arrow supports many compression methods, including lz4 and zstd. ClickHouse supports each and every compression method. Some inferior tools lack support for the faster `lz4` compression method, that's why we set `zstd` by default. This is controlled by the settings `output_format_parquet_compression_method`, `output_format_orc_compression_method`, and `output_format_arrow_compression_method`. We changed the default to `zstd` for Parquet and ORC, but not Arrow (it is emphasized for low-level usages). [#61817](https://github.com/ClickHouse/ClickHouse/pull/61817) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* In the new ClickHouse version, the functions `geoDistance`, `greatCircleDistance`, and `greatCircleAngle` will use 64-bit double precision floating point data type for internal calculations and return type if all the arguments are Float64. This closes [#58476](https://github.com/ClickHouse/ClickHouse/issues/58476). In previous versions, the function always used Float32. You can switch to the old behavior by setting `geo_distance_returns_float64_on_float64_arguments` to `false` or setting `compatibility` to `24.2` or earlier. [#61848](https://github.com/ClickHouse/ClickHouse/pull/61848) ([Alexey Milovidov](https://github.com/alexey-milovidov)). Co-authored with [Geet Patel](https://github.com/geetptl).
|
||||
* The obsolete in-memory data parts have been deprecated since version 23.5 and have not been supported since version 23.10. Now the remaining code is removed. Continuation of [#55186](https://github.com/ClickHouse/ClickHouse/issues/55186) and [#45409](https://github.com/ClickHouse/ClickHouse/issues/45409). It is unlikely that you have used in-memory data parts because they were available only before version 23.5 and only when you enabled them manually by specifying the corresponding SETTINGS for a MergeTree table. To check if you have in-memory data parts, run the following query: `SELECT part_type, count() FROM system.parts GROUP BY part_type ORDER BY part_type`. To disable the usage of in-memory data parts, do `ALTER TABLE ... MODIFY SETTING min_bytes_for_compact_part = DEFAULT, min_rows_for_compact_part = DEFAULT`. Before upgrading from old ClickHouse releases, first check that you don't have in-memory data parts. If there are in-memory data parts, disable them first, then wait while there are no in-memory data parts and continue the upgrade. [#61127](https://github.com/ClickHouse/ClickHouse/pull/61127) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Changed the column name from `duration_ms` to `duration_microseconds` in the `system.zookeeper` table to reflect the reality that the duration is in the microsecond resolution. [#60774](https://github.com/ClickHouse/ClickHouse/pull/60774) ([Duc Canh Le](https://github.com/canhld94)).
|
||||
* Reject incoming INSERT queries in case when query-level settings `async_insert` and `deduplicate_blocks_in_dependent_materialized_views` are enabled together. This behaviour is controlled by a setting `throw_if_deduplication_in_dependent_materialized_views_enabled_with_async_insert` and enabled by default. This is a continuation of https://github.com/ClickHouse/ClickHouse/pull/59699 needed to unblock https://github.com/ClickHouse/ClickHouse/pull/59915. [#60888](https://github.com/ClickHouse/ClickHouse/pull/60888) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
|
||||
* Utility `clickhouse-copier` is moved to a separate repository on GitHub: https://github.com/ClickHouse/copier. It is no longer included in the bundle but is still available as a separate download. This closes: [#60734](https://github.com/ClickHouse/ClickHouse/issues/60734) This closes: [#60540](https://github.com/ClickHouse/ClickHouse/issues/60540) This closes: [#60250](https://github.com/ClickHouse/ClickHouse/issues/60250) This closes: [#52917](https://github.com/ClickHouse/ClickHouse/issues/52917) This closes: [#51140](https://github.com/ClickHouse/ClickHouse/issues/51140) This closes: [#47517](https://github.com/ClickHouse/ClickHouse/issues/47517) This closes: [#47189](https://github.com/ClickHouse/ClickHouse/issues/47189) This closes: [#46598](https://github.com/ClickHouse/ClickHouse/issues/46598) This closes: [#40257](https://github.com/ClickHouse/ClickHouse/issues/40257) This closes: [#36504](https://github.com/ClickHouse/ClickHouse/issues/36504) This closes: [#35485](https://github.com/ClickHouse/ClickHouse/issues/35485) This closes: [#33702](https://github.com/ClickHouse/ClickHouse/issues/33702) This closes: [#26702](https://github.com/ClickHouse/ClickHouse/issues/26702).
|
||||
* To increase compatibility with MySQL, the compatibility alias `locate` now accepts arguments `(needle, haystack[, start_pos])` by default. The previous behavior `(haystack, needle, [, start_pos])` can be restored by setting `function_locate_has_mysql_compatible_argument_order = 0`. [#61092](https://github.com/ClickHouse/ClickHouse/pull/61092) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Forbid `SimpleAggregateFunction` in `ORDER BY` of `MergeTree` tables (like `AggregateFunction` is forbidden, but they are forbidden because they are not comparable) by default (use `allow_suspicious_primary_key` to allow them). [#61399](https://github.com/ClickHouse/ClickHouse/pull/61399) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* The `Ordinary` database engine is deprecated. You will receive a warning in clickhouse-client if your server is using it. This closes [#52229](https://github.com/ClickHouse/ClickHouse/issues/52229). [#56942](https://github.com/ClickHouse/ClickHouse/pull/56942) ([shabroo](https://github.com/shabroo)).
|
||||
|
||||
#### New Feature
|
||||
* Support reading and writing backups as `tar` (in addition to `zip`). [#59535](https://github.com/ClickHouse/ClickHouse/pull/59535) ([josh-hildred](https://github.com/josh-hildred)).
|
||||
* Implemented support for S3 Express buckets. [#59965](https://github.com/ClickHouse/ClickHouse/pull/59965) ([Nikita Taranov](https://github.com/nickitat)).
|
||||
* Allow to attach parts from a different disk (using copy instead of hard link). [#60112](https://github.com/ClickHouse/ClickHouse/pull/60112) ([Unalian](https://github.com/Unalian)).
|
||||
* Size-capped `Memory` tables: controlled by their settings, `min_bytes_to_keep, max_bytes_to_keep, min_rows_to_keep` and `max_rows_to_keep`. [#60612](https://github.com/ClickHouse/ClickHouse/pull/60612) ([Jake Bamrah](https://github.com/JakeBamrah)).
|
||||
* Separate limits on number of waiting and executing queries. Added new server setting `max_waiting_queries` that limits the number of queries waiting due to `async_load_databases`. Existing limits on number of executing queries no longer count waiting queries. [#61053](https://github.com/ClickHouse/ClickHouse/pull/61053) ([Sergei Trifonov](https://github.com/serxa)).
|
||||
* Added a table `system.keywords` which contains all the keywords from parser. Mostly needed and will be used for better fuzzing and syntax highlighting. [#51808](https://github.com/ClickHouse/ClickHouse/pull/51808) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
|
||||
* Add support for `ATTACH PARTITION ALL`. [#61107](https://github.com/ClickHouse/ClickHouse/pull/61107) ([Kirill Nikiforov](https://github.com/allmazz)).
|
||||
* Add a new function, `getClientHTTPHeader`. This closes [#54665](https://github.com/ClickHouse/ClickHouse/issues/54665). Co-authored with @lingtaolf. [#61820](https://github.com/ClickHouse/ClickHouse/pull/61820) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Add `generate_series` as a table function (compatibility alias for PostgreSQL to the existing `numbers` function). This function generates table with an arithmetic progression with natural numbers. [#59390](https://github.com/ClickHouse/ClickHouse/pull/59390) ([divanik](https://github.com/divanik)).
|
||||
* A mode for `topK`/`topkWeighed` support mode, which return count of values and its error. [#54508](https://github.com/ClickHouse/ClickHouse/pull/54508) ([UnamedRus](https://github.com/UnamedRus)).
|
||||
* Added function `toMillisecond` which returns the millisecond component for values of type`DateTime` or `DateTime64`. [#60281](https://github.com/ClickHouse/ClickHouse/pull/60281) ([Shaun Struwig](https://github.com/Blargian)).
|
||||
* Allow configuring HTTP redirect handlers for clickhouse-server. For example, you can make `/` redirect to the Play UI. [#60390](https://github.com/ClickHouse/ClickHouse/pull/60390) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
|
||||
#### Performance Improvement
|
||||
* Optimized function `dotProduct` to omit unnecessary and expensive memory copies. [#60928](https://github.com/ClickHouse/ClickHouse/pull/60928) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* 30x faster printing for 256-bit integers. [#61100](https://github.com/ClickHouse/ClickHouse/pull/61100) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* If the table's primary key contains mostly useless columns, don't keep them in memory. This is controlled by a new setting `primary_key_ratio_of_unique_prefix_values_to_skip_suffix_columns` with the value `0.9` by default, which means: for a composite primary key, if a column changes its value for at least 0.9 of all the times, the next columns after it will be not loaded. [#60255](https://github.com/ClickHouse/ClickHouse/pull/60255) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Improve the performance of serialized aggregation method when involving multiple `Nullable` columns. [#55809](https://github.com/ClickHouse/ClickHouse/pull/55809) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Lazy build JSON's output to improve performance of ALL JOIN. [#58278](https://github.com/ClickHouse/ClickHouse/pull/58278) ([LiuNeng](https://github.com/liuneng1994)).
|
||||
* Make HTTP/HTTPs connections with external services, such as AWS S3 reusable for all uses cases. Even when response is 3xx or 4xx. [#58845](https://github.com/ClickHouse/ClickHouse/pull/58845) ([Sema Checherinda](https://github.com/CheSema)).
|
||||
* Improvements to aggregate functions `argMin` / `argMax` / `any` / `anyLast` / `anyHeavy`, as well as `ORDER BY {u8/u16/u32/u64/i8/i16/u32/i64) LIMIT 1` queries. [#58640](https://github.com/ClickHouse/ClickHouse/pull/58640) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Trivial optimization for column's filter. Peak memory can be reduced to 44% of the original in some cases. [#59698](https://github.com/ClickHouse/ClickHouse/pull/59698) ([李扬](https://github.com/taiyang-li)).
|
||||
* Execute `multiIf` function in a columnar fashion when the result type's underlying type is a number. [#60384](https://github.com/ClickHouse/ClickHouse/pull/60384) ([李扬](https://github.com/taiyang-li)).
|
||||
* Faster (almost 2x) mutexes. [#60823](https://github.com/ClickHouse/ClickHouse/pull/60823) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Drain multiple connections in parallel when a distributed query is finishing. [#60845](https://github.com/ClickHouse/ClickHouse/pull/60845) ([lizhuoyu5](https://github.com/lzydmxy)).
|
||||
* Optimize data movement between columns of a Nullable number or a Nullable string, which improves some micro-benchmarks. [#60846](https://github.com/ClickHouse/ClickHouse/pull/60846) ([李扬](https://github.com/taiyang-li)).
|
||||
* Operations with the filesystem cache will suffer less from the lock contention. [#61066](https://github.com/ClickHouse/ClickHouse/pull/61066) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Optimize array join and other JOINs by preventing a wrong compiler's optimization. Close [#61074](https://github.com/ClickHouse/ClickHouse/issues/61074). [#61075](https://github.com/ClickHouse/ClickHouse/pull/61075) ([李扬](https://github.com/taiyang-li)).
|
||||
* If a query with a syntax error contained `COLUMNS` matcher with a regular expression, the regular expression was compiled each time during the parser's backtracking, instead of being compiled once. This was a fundamental error. The compiled regexp was put to AST. But the letter A in AST means "abstract" which means it should not contain heavyweight objects. Parts of AST can be created and discarded during parsing, including a large number of backtracking. This leads to slowness on the parsing side and consequently allows DoS by a readonly user. But the main problem is that it prevents progress in fuzzers. [#61543](https://github.com/ClickHouse/ClickHouse/pull/61543) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Add a new analyzer pass to optimize the IN operator for a single value. [#61564](https://github.com/ClickHouse/ClickHouse/pull/61564) ([LiuNeng](https://github.com/liuneng1994)).
|
||||
* DNSResolver shuffles set of resolved IPs which is needed to uniformly utilize multiple endpoints of AWS S3. [#60965](https://github.com/ClickHouse/ClickHouse/pull/60965) ([Sema Checherinda](https://github.com/CheSema)).
|
||||
|
||||
#### Experimental Feature
|
||||
* Support parallel reading for Azure blob storage. This improves the performance of the experimental Azure object storage. [#61503](https://github.com/ClickHouse/ClickHouse/pull/61503) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)).
|
||||
* Add asynchronous WriteBuffer for Azure blob storage similar to S3. This improves the performance of the experimental Azure object storage. [#59929](https://github.com/ClickHouse/ClickHouse/pull/59929) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)).
|
||||
* Use managed identity for backups IO when using Azure Blob Storage. Add a setting to prevent ClickHouse from attempting to create a non-existent container, which requires permissions at the storage account level. [#61785](https://github.com/ClickHouse/ClickHouse/pull/61785) ([Daniel Pozo Escalona](https://github.com/danipozo)).
|
||||
* Add a setting `parallel_replicas_allow_in_with_subquery = 1` which allows subqueries for IN work with parallel replicas. [#60950](https://github.com/ClickHouse/ClickHouse/pull/60950) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* A change for the "zero-copy" replication: all zero copy locks related to a table have to be dropped when the table is dropped. The directory which contains these locks has to be removed also. [#57575](https://github.com/ClickHouse/ClickHouse/pull/57575) ([Sema Checherinda](https://github.com/CheSema)).
|
||||
|
||||
#### Improvement
|
||||
* Use `MergeTree` as a default table engine. [#60524](https://github.com/ClickHouse/ClickHouse/pull/60524) ([Alexey Milovidov](https://github.com/alexey-milovidov))
|
||||
* Enable `output_format_pretty_row_numbers` by default. It is better for usability. [#61791](https://github.com/ClickHouse/ClickHouse/pull/61791) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* In the previous version, some numbers in Pretty formats were not pretty enough. [#61794](https://github.com/ClickHouse/ClickHouse/pull/61794) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* A long value in Pretty formats won't be cut if it is the single value in the resultset, such as in the result of the `SHOW CREATE TABLE` query. [#61795](https://github.com/ClickHouse/ClickHouse/pull/61795) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Similarly to `clickhouse-local`, `clickhouse-client` will accept the `--output-format` option as a synonym to the `--format` option. This closes [#59848](https://github.com/ClickHouse/ClickHouse/issues/59848). [#61797](https://github.com/ClickHouse/ClickHouse/pull/61797) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* If stdout is a terminal and the output format is not specified, `clickhouse-client` and similar tools will use `PrettyCompact` by default, similarly to the interactive mode. `clickhouse-client` and `clickhouse-local` will handle command line arguments for input and output formats in a unified fashion. This closes [#61272](https://github.com/ClickHouse/ClickHouse/issues/61272). [#61800](https://github.com/ClickHouse/ClickHouse/pull/61800) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Underscore digit groups in Pretty formats for better readability. This is controlled by a new setting, `output_format_pretty_highlight_digit_groups`. [#61802](https://github.com/ClickHouse/ClickHouse/pull/61802) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Add ability to override initial INSERT settings via `SYSTEM FLUSH DISTRIBUTED`. [#61832](https://github.com/ClickHouse/ClickHouse/pull/61832) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Enable processors profiling (time spent/in and out bytes for sorting, aggregation, ...) by default. [#61096](https://github.com/ClickHouse/ClickHouse/pull/61096) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Support files without format extension in Filesystem database. [#60795](https://github.com/ClickHouse/ClickHouse/pull/60795) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Make all format names case insensitive, like Tsv, or TSV, or tsv, or even rowbinary. [#60420](https://github.com/ClickHouse/ClickHouse/pull/60420) ([豪肥肥](https://github.com/HowePa)). I appreciate if you will continue to write it correctly, e.g., `JSON` 😇, not `Json` 🤮, but we don't mind if you spell it as you prefer.
|
||||
* Added `none_only_active` mode for `distributed_ddl_output_mode` setting. [#60340](https://github.com/ClickHouse/ClickHouse/pull/60340) ([Alexander Tokmakov](https://github.com/tavplubix)).
|
||||
* The advanced dashboard has slightly better colors for multi-line graphs. [#60391](https://github.com/ClickHouse/ClickHouse/pull/60391) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* The Advanced dashboard now has controls always visible on scrolling. This allows you to add a new chart without scrolling up. [#60692](https://github.com/ClickHouse/ClickHouse/pull/60692) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* While running the `MODIFY COLUMN` query for materialized views, check the inner table's structure to ensure every column exists. [#47427](https://github.com/ClickHouse/ClickHouse/pull/47427) ([sunny](https://github.com/sunny19930321)).
|
||||
* String types and Enums can be used in the same context, such as: arrays, UNION queries, conditional expressions. This closes [#60726](https://github.com/ClickHouse/ClickHouse/issues/60726). [#60727](https://github.com/ClickHouse/ClickHouse/pull/60727) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Allow declaring Enums in the structure of external data for query processing (this is an immediate temporary table that you can provide for your query). [#57857](https://github.com/ClickHouse/ClickHouse/pull/57857) ([Duc Canh Le](https://github.com/canhld94)).
|
||||
* Consider lightweight deleted rows when selecting parts to merge, so the disk size of the resulting part will be estimated better. [#58223](https://github.com/ClickHouse/ClickHouse/pull/58223) ([Zhuo Qiu](https://github.com/jewelzqiu)).
|
||||
* Added comments for columns for more system tables. Continuation of https://github.com/ClickHouse/ClickHouse/pull/58356. [#59016](https://github.com/ClickHouse/ClickHouse/pull/59016) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
|
||||
* Now we can use virtual columns in PREWHERE. It's worthwhile for non-const virtual columns like `_part_offset`. [#59033](https://github.com/ClickHouse/ClickHouse/pull/59033) ([Amos Bird](https://github.com/amosbird)). Improved overall usability of virtual columns. Now it is allowed to use virtual columns in `PREWHERE` (it's worthwhile for non-const virtual columns like `_part_offset`). Now a builtin documentation is available for virtual columns as a comment of column in `DESCRIBE` query with enabled setting `describe_include_virtual_columns`. [#60205](https://github.com/ClickHouse/ClickHouse/pull/60205) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Instead of using a constant key, now object storage generates key for determining remove objects capability. [#59495](https://github.com/ClickHouse/ClickHouse/pull/59495) ([Sema Checherinda](https://github.com/CheSema)).
|
||||
* Allow "local" as object storage type instead of "local_blob_storage". [#60165](https://github.com/ClickHouse/ClickHouse/pull/60165) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Parallel flush of pending INSERT blocks of Distributed engine on `DETACH`/server shutdown and `SYSTEM FLUSH DISTRIBUTED` (Parallelism will work only if you have multi-disk policy for a table (like everything in the Distributed engine right now)). [#60225](https://github.com/ClickHouse/ClickHouse/pull/60225) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Add a setting to force read-through cache for merges. [#60308](https://github.com/ClickHouse/ClickHouse/pull/60308) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* An improvement for the MySQL compatibility protocol. The issue [#57598](https://github.com/ClickHouse/ClickHouse/issues/57598) mentions a variant behaviour regarding transaction handling. An issued COMMIT/ROLLBACK when no transaction is active is reported as an error contrary to MySQL behaviour. [#60338](https://github.com/ClickHouse/ClickHouse/pull/60338) ([PapaToemmsn](https://github.com/PapaToemmsn)).
|
||||
* Function `substring` now has a new alias `byteSlice`. [#60494](https://github.com/ClickHouse/ClickHouse/pull/60494) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Renamed server setting `dns_cache_max_size` to `dns_cache_max_entries` to reduce ambiguity. [#60500](https://github.com/ClickHouse/ClickHouse/pull/60500) ([Kirill Nikiforov](https://github.com/allmazz)).
|
||||
* `SHOW INDEX | INDEXES | INDICES | KEYS` no longer sorts by the primary key columns (which was unintuitive). [#60514](https://github.com/ClickHouse/ClickHouse/pull/60514) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Keeper improvement: abort during startup if an invalid snapshot is detected to avoid data loss. [#60537](https://github.com/ClickHouse/ClickHouse/pull/60537) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
* Update tzdata to 2024a. [#60768](https://github.com/ClickHouse/ClickHouse/pull/60768) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Keeper improvement: support `leadership_expiry_ms` in Keeper's settings. [#60806](https://github.com/ClickHouse/ClickHouse/pull/60806) ([Brokenice0415](https://github.com/Brokenice0415)).
|
||||
* Always infer exponential numbers in JSON formats regardless of the setting `input_format_try_infer_exponent_floats`. Add setting `input_format_json_use_string_type_for_ambiguous_paths_in_named_tuples_inference_from_objects` that allows to use String type for ambiguous paths instead of an exception during named Tuples inference from JSON objects. [#60808](https://github.com/ClickHouse/ClickHouse/pull/60808) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Add support for `START TRANSACTION` syntax typically used in MySQL syntax, resolving https://github.com/ClickHouse/ClickHouse/discussions/60865. [#60886](https://github.com/ClickHouse/ClickHouse/pull/60886) ([Zach Naimon](https://github.com/ArctypeZach)).
|
||||
* Add a flag for the full-sorting merge join algorithm to treat null as biggest/smallest. So the behavior can be compitable with other SQL systems, like Apache Spark. [#60896](https://github.com/ClickHouse/ClickHouse/pull/60896) ([loudongfeng](https://github.com/loudongfeng)).
|
||||
* Support detect output format by file exctension in `clickhouse-client` and `clickhouse-local`. [#61036](https://github.com/ClickHouse/ClickHouse/pull/61036) ([豪肥肥](https://github.com/HowePa)).
|
||||
* Update memory limit in runtime when Linux's CGroups value changed. [#61049](https://github.com/ClickHouse/ClickHouse/pull/61049) ([Han Fei](https://github.com/hanfei1991)).
|
||||
* Add the function `toUInt128OrZero`, which was missed by mistake (the mistake is related to https://github.com/ClickHouse/ClickHouse/pull/945). The compatibility aliases `FROM_UNIXTIME` and `DATE_FORMAT` (they are not ClickHouse-native and only exist for MySQL compatibility) have been made case insensitive, as expected for SQL-compatibility aliases. [#61114](https://github.com/ClickHouse/ClickHouse/pull/61114) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Improvements for the access checks, allowing to revoke of unpossessed rights in case the target user doesn't have the revoking grants either. Example: `GRANT SELECT ON *.* TO user1; REVOKE SELECT ON system.* FROM user1;`. [#61115](https://github.com/ClickHouse/ClickHouse/pull/61115) ([pufit](https://github.com/pufit)).
|
||||
* Fix `has()` function with `Nullable` column (fixes [#60214](https://github.com/ClickHouse/ClickHouse/issues/60214)). [#61249](https://github.com/ClickHouse/ClickHouse/pull/61249) ([Mikhail Koviazin](https://github.com/mkmkme)).
|
||||
* Now it's possible to specify the attribute `merge="true"` in config substitutions for subtrees `<include from_zk="/path" merge="true">`. In case this attribute specified, clickhouse will merge subtree with existing configuration, otherwise default behavior is append new content to configuration. [#61299](https://github.com/ClickHouse/ClickHouse/pull/61299) ([alesapin](https://github.com/alesapin)).
|
||||
* Add async metrics for virtual memory mappings: `VMMaxMapCount` & `VMNumMaps`. Closes [#60662](https://github.com/ClickHouse/ClickHouse/issues/60662). [#61354](https://github.com/ClickHouse/ClickHouse/pull/61354) ([Tuan Pham Anh](https://github.com/tuanpavn)).
|
||||
* Use `temporary_files_codec` setting in all places where we create temporary data, for example external memory sorting and external memory GROUP BY. Before it worked only in `partial_merge` JOIN algorithm. [#61456](https://github.com/ClickHouse/ClickHouse/pull/61456) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
* Add a new setting `max_parser_backtracks` which allows to limit the complexity of query parsing. [#61502](https://github.com/ClickHouse/ClickHouse/pull/61502) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Less contention during dynamic resize of the filesystem cache. [#61524](https://github.com/ClickHouse/ClickHouse/pull/61524) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Disallow sharded mode of StorageS3 queue, because it will be rewritten. [#61537](https://github.com/ClickHouse/ClickHouse/pull/61537) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Fixed typo: from `use_leagcy_max_level` to `use_legacy_max_level`. [#61545](https://github.com/ClickHouse/ClickHouse/pull/61545) ([William Schoeffel](https://github.com/wiledusc)).
|
||||
* Remove some duplicate entries in `system.blob_storage_log`. [#61622](https://github.com/ClickHouse/ClickHouse/pull/61622) ([YenchangChan](https://github.com/YenchangChan)).
|
||||
* Added `current_user` function as a compatibility alias for MySQL. [#61770](https://github.com/ClickHouse/ClickHouse/pull/61770) ([Yarik Briukhovetskyi](https://github.com/yariks5s)).
|
||||
* Fix inconsistent floating point aggregate function states in mixed x86-64 / ARM clusters [#60610](https://github.com/ClickHouse/ClickHouse/pull/60610) ([Harry Lee](https://github.com/HarryLeeIBM)).
|
||||
|
||||
#### Build/Testing/Packaging Improvement
|
||||
* The real-time query profiler now works on AArch64. In previous versions, it worked only when a program didn't spend time inside a syscall. [#60807](https://github.com/ClickHouse/ClickHouse/pull/60807) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* ClickHouse version has been added to docker labels. Closes [#54224](https://github.com/ClickHouse/ClickHouse/issues/54224). [#60949](https://github.com/ClickHouse/ClickHouse/pull/60949) ([Nikolay Monkov](https://github.com/nikmonkov)).
|
||||
* Upgrade `prqlc` to 0.11.3. [#60616](https://github.com/ClickHouse/ClickHouse/pull/60616) ([Maximilian Roos](https://github.com/max-sixty)).
|
||||
* Add generic query text fuzzer in `clickhouse-local`. [#61508](https://github.com/ClickHouse/ClickHouse/pull/61508) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
|
||||
#### Bug Fix (user-visible misbehavior in an official stable release)
|
||||
* Fix finished_mutations_to_keep=0 for MergeTree (as docs says 0 is to keep everything) [#60031](https://github.com/ClickHouse/ClickHouse/pull/60031) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Something was wrong with the FINAL optimization, here is how the author describes it: "PartsSplitter invalid ranges for the same part". [#60041](https://github.com/ClickHouse/ClickHouse/pull/60041) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
* Something was wrong with Apache Hive, which is experimental and not supported. [#60262](https://github.com/ClickHouse/ClickHouse/pull/60262) ([shanfengp](https://github.com/Aed-p)).
|
||||
* An improvement for experimental parallel replicas: force reanalysis if parallel replicas changed [#60362](https://github.com/ClickHouse/ClickHouse/pull/60362) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Fix usage of plain metadata type with new disks configuration option [#60396](https://github.com/ClickHouse/ClickHouse/pull/60396) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Don't allow to set max_parallel_replicas to 0 as it doesn't make sense [#60430](https://github.com/ClickHouse/ClickHouse/pull/60430) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Try to fix logical error 'Cannot capture column because it has incompatible type' in mapContainsKeyLike [#60451](https://github.com/ClickHouse/ClickHouse/pull/60451) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Avoid calculation of scalar subqueries for CREATE TABLE. [#60464](https://github.com/ClickHouse/ClickHouse/pull/60464) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Fix deadlock in parallel parsing when lots of rows are skipped due to errors [#60516](https://github.com/ClickHouse/ClickHouse/pull/60516) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Something was wrong with experimental KQL (Kusto) support: fix `max_query_size_for_kql_compound_operator`: [#60534](https://github.com/ClickHouse/ClickHouse/pull/60534) ([Yong Wang](https://github.com/kashwy)).
|
||||
* Keeper fix: add timeouts when waiting for commit logs [#60544](https://github.com/ClickHouse/ClickHouse/pull/60544) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
* Don't output number tips for date types [#60577](https://github.com/ClickHouse/ClickHouse/pull/60577) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Fix reading from MergeTree with non-deterministic functions in filter [#60586](https://github.com/ClickHouse/ClickHouse/pull/60586) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Fix logical error on bad compatibility setting value type [#60596](https://github.com/ClickHouse/ClickHouse/pull/60596) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* fix(prql): Robust panic handler [#60615](https://github.com/ClickHouse/ClickHouse/pull/60615) ([Maximilian Roos](https://github.com/max-sixty)).
|
||||
* Fix `intDiv` for decimal and date arguments [#60672](https://github.com/ClickHouse/ClickHouse/pull/60672) ([Yarik Briukhovetskyi](https://github.com/yariks5s)).
|
||||
* Fix: expand CTE in alter modify query [#60682](https://github.com/ClickHouse/ClickHouse/pull/60682) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
|
||||
* Fix system.parts for non-Atomic/Ordinary database engine (i.e. Memory) [#60689](https://github.com/ClickHouse/ClickHouse/pull/60689) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Fix "Invalid storage definition in metadata file" for parameterized views [#60708](https://github.com/ClickHouse/ClickHouse/pull/60708) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Fix buffer overflow in CompressionCodecMultiple [#60731](https://github.com/ClickHouse/ClickHouse/pull/60731) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Remove nonsense from SQL/JSON [#60738](https://github.com/ClickHouse/ClickHouse/pull/60738) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Remove wrong assertion in aggregate function quantileGK [#60740](https://github.com/ClickHouse/ClickHouse/pull/60740) ([李扬](https://github.com/taiyang-li)).
|
||||
* Fix insert-select + insert_deduplication_token bug by setting streams to 1 [#60745](https://github.com/ClickHouse/ClickHouse/pull/60745) ([Jordi Villar](https://github.com/jrdi)).
|
||||
* Prevent setting custom metadata headers on unsupported multipart upload operations [#60748](https://github.com/ClickHouse/ClickHouse/pull/60748) ([Francisco J. Jurado Moreno](https://github.com/Beetelbrox)).
|
||||
* Fix toStartOfInterval [#60763](https://github.com/ClickHouse/ClickHouse/pull/60763) ([Andrey Zvonov](https://github.com/zvonand)).
|
||||
* Fix crash in arrayEnumerateRanked [#60764](https://github.com/ClickHouse/ClickHouse/pull/60764) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Fix crash when using input() in INSERT SELECT JOIN [#60765](https://github.com/ClickHouse/ClickHouse/pull/60765) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Fix crash with different allow_experimental_analyzer value in subqueries [#60770](https://github.com/ClickHouse/ClickHouse/pull/60770) ([Dmitry Novik](https://github.com/novikd)).
|
||||
* Remove recursion when reading from S3 [#60849](https://github.com/ClickHouse/ClickHouse/pull/60849) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
* Fix possible stuck on error in HashedDictionaryParallelLoader [#60926](https://github.com/ClickHouse/ClickHouse/pull/60926) ([vdimir](https://github.com/vdimir)).
|
||||
* Fix async RESTORE with Replicated database (experimental feature) [#60934](https://github.com/ClickHouse/ClickHouse/pull/60934) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
* Fix deadlock in async inserts to `Log` tables via native protocol [#61055](https://github.com/ClickHouse/ClickHouse/pull/61055) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Fix lazy execution of default argument in dictGetOrDefault for RangeHashedDictionary [#61196](https://github.com/ClickHouse/ClickHouse/pull/61196) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Fix multiple bugs in groupArraySorted [#61203](https://github.com/ClickHouse/ClickHouse/pull/61203) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Fix Keeper reconfig for standalone binary [#61233](https://github.com/ClickHouse/ClickHouse/pull/61233) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
* Fix usage of session_token in S3 engine [#61234](https://github.com/ClickHouse/ClickHouse/pull/61234) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Fix possible incorrect result of aggregate function `uniqExact` [#61257](https://github.com/ClickHouse/ClickHouse/pull/61257) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Fix bugs in show database [#61269](https://github.com/ClickHouse/ClickHouse/pull/61269) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Fix logical error in RabbitMQ storage with MATERIALIZED columns [#61320](https://github.com/ClickHouse/ClickHouse/pull/61320) ([vdimir](https://github.com/vdimir)).
|
||||
* Fix CREATE OR REPLACE DICTIONARY [#61356](https://github.com/ClickHouse/ClickHouse/pull/61356) ([Vitaly Baranov](https://github.com/vitlibar)).
|
||||
* Fix ATTACH query with external ON CLUSTER [#61365](https://github.com/ClickHouse/ClickHouse/pull/61365) ([Nikolay Degterinsky](https://github.com/evillique)).
|
||||
* Fix consecutive keys optimization for nullable keys [#61393](https://github.com/ClickHouse/ClickHouse/pull/61393) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* fix issue of actions dag split [#61458](https://github.com/ClickHouse/ClickHouse/pull/61458) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Fix finishing a failed RESTORE [#61466](https://github.com/ClickHouse/ClickHouse/pull/61466) ([Vitaly Baranov](https://github.com/vitlibar)).
|
||||
* Disable async_insert_use_adaptive_busy_timeout correctly with compatibility settings [#61468](https://github.com/ClickHouse/ClickHouse/pull/61468) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Allow queuing in restore pool [#61475](https://github.com/ClickHouse/ClickHouse/pull/61475) ([Nikita Taranov](https://github.com/nickitat)).
|
||||
* Fix an inconsistency when reading system.parts using UUID. [#61479](https://github.com/ClickHouse/ClickHouse/pull/61479) ([Dan Wu](https://github.com/wudanzy)).
|
||||
* Fix ALTER QUERY MODIFY SQL SECURITY [#61480](https://github.com/ClickHouse/ClickHouse/pull/61480) ([pufit](https://github.com/pufit)).
|
||||
* Fix a crash in window view (experimental feature) [#61526](https://github.com/ClickHouse/ClickHouse/pull/61526) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fix `repeat` with non-native integers [#61527](https://github.com/ClickHouse/ClickHouse/pull/61527) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
* Fix client's `-s` argument [#61530](https://github.com/ClickHouse/ClickHouse/pull/61530) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Fix crash in arrayPartialReverseSort [#61539](https://github.com/ClickHouse/ClickHouse/pull/61539) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Fix string search with const position [#61547](https://github.com/ClickHouse/ClickHouse/pull/61547) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
* Fix addDays cause an error when used DateTime64 [#61561](https://github.com/ClickHouse/ClickHouse/pull/61561) ([Shuai li](https://github.com/loneylee)).
|
||||
* Disallow LowCardinality input type for JSONExtract [#61617](https://github.com/ClickHouse/ClickHouse/pull/61617) ([Julia Kartseva](https://github.com/jkartseva)).
|
||||
* Fix `system.part_log` for async insert with deduplication [#61620](https://github.com/ClickHouse/ClickHouse/pull/61620) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
* Fix a `Non-ready set` exception for system.parts. [#61666](https://github.com/ClickHouse/ClickHouse/pull/61666) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Fix actual_part_name for REPLACE_RANGE (`Entry actual part isn't empty yet`) [#61675](https://github.com/ClickHouse/ClickHouse/pull/61675) ([Alexander Tokmakov](https://github.com/tavplubix)).
|
||||
* Fix a sanitizer report in `multiSearchAllPositionsCaseInsensitiveUTF8` for incorrect UTF-8 [#61749](https://github.com/ClickHouse/ClickHouse/pull/61749) ([pufit](https://github.com/pufit)).
|
||||
* Fix an observation that the RANGE frame is not supported for Nullable columns. [#61766](https://github.com/ClickHouse/ClickHouse/pull/61766) ([YuanLiu](https://github.com/ditgittube)).
|
||||
|
||||
### <a id="242"></a> ClickHouse release 24.2, 2024-02-29
|
||||
|
||||
#### Backward Incompatible Change
|
||||
|
@ -28,7 +28,6 @@ curl https://clickhouse.com/ | sh
|
||||
* [Slack](https://clickhouse.com/slack) and [Telegram](https://telegram.me/clickhouse_en) allow chatting with ClickHouse users in real-time.
|
||||
* [Blog](https://clickhouse.com/blog/) contains various ClickHouse-related articles, as well as announcements and reports about events.
|
||||
* [Code Browser (github.dev)](https://github.dev/ClickHouse/ClickHouse) with syntax highlighting, powered by github.dev.
|
||||
* [Static Analysis (SonarCloud)](https://sonarcloud.io/project/issues?resolved=false&id=ClickHouse_ClickHouse) proposes C++ quality improvements.
|
||||
* [Contacts](https://clickhouse.com/company/contact) can help to get your questions answered if there are any.
|
||||
|
||||
## Monthly Release & Community Call
|
||||
|
24
docs/changelogs/v23.12.6.19-stable.md
Normal file
24
docs/changelogs/v23.12.6.19-stable.md
Normal file
@ -0,0 +1,24 @@
|
||||
---
|
||||
sidebar_position: 1
|
||||
sidebar_label: 2024
|
||||
---
|
||||
|
||||
# 2024 Changelog
|
||||
|
||||
### ClickHouse release v23.12.6.19-stable (40080a3c2a4) FIXME as compared to v23.12.5.81-stable (a0fbe3ae813)
|
||||
|
||||
#### Bug Fix (user-visible misbehavior in an official stable release)
|
||||
|
||||
* Improve isolation of query cache entries under re-created users or role switches [#58611](https://github.com/ClickHouse/ClickHouse/pull/58611) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Fix possible incorrect result of aggregate function `uniqExact` [#61257](https://github.com/ClickHouse/ClickHouse/pull/61257) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Fix consecutive keys optimization for nullable keys [#61393](https://github.com/ClickHouse/ClickHouse/pull/61393) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Fix string search with const position [#61547](https://github.com/ClickHouse/ClickHouse/pull/61547) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
* Fix crash in `multiSearchAllPositionsCaseInsensitiveUTF8` for incorrect UTF-8 [#61749](https://github.com/ClickHouse/ClickHouse/pull/61749) ([pufit](https://github.com/pufit)).
|
||||
|
||||
#### CI Fix or Improvement (changelog entry is not required)
|
||||
|
||||
* Backported in [#61429](https://github.com/ClickHouse/ClickHouse/issues/61429):. [#61374](https://github.com/ClickHouse/ClickHouse/pull/61374) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Backported in [#61486](https://github.com/ClickHouse/ClickHouse/issues/61486): ... [#61441](https://github.com/ClickHouse/ClickHouse/pull/61441) ([Max K.](https://github.com/maxknv)).
|
||||
* Backported in [#61641](https://github.com/ClickHouse/ClickHouse/issues/61641):. [#61592](https://github.com/ClickHouse/ClickHouse/pull/61592) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Backported in [#61811](https://github.com/ClickHouse/ClickHouse/issues/61811): ![Screenshot_20240323_025055](https://github.com/ClickHouse/ClickHouse/assets/18581488/ccaab212-a1d3-4dfb-8d56-b1991760b6bf). [#61801](https://github.com/ClickHouse/ClickHouse/pull/61801) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
|
13
docs/changelogs/v23.3.22.3-lts.md
Normal file
13
docs/changelogs/v23.3.22.3-lts.md
Normal file
@ -0,0 +1,13 @@
|
||||
---
|
||||
sidebar_position: 1
|
||||
sidebar_label: 2024
|
||||
---
|
||||
|
||||
# 2024 Changelog
|
||||
|
||||
### ClickHouse release v23.3.22.3-lts (04075bf96a1) FIXME as compared to v23.3.21.26-lts (d9672a3731f)
|
||||
|
||||
#### Bug Fix (user-visible misbehavior in an official stable release)
|
||||
|
||||
* Fix crash in `multiSearchAllPositionsCaseInsensitiveUTF8` for incorrect UTF-8 [#61749](https://github.com/ClickHouse/ClickHouse/pull/61749) ([pufit](https://github.com/pufit)).
|
||||
|
20
docs/changelogs/v23.8.12.13-lts.md
Normal file
20
docs/changelogs/v23.8.12.13-lts.md
Normal file
@ -0,0 +1,20 @@
|
||||
---
|
||||
sidebar_position: 1
|
||||
sidebar_label: 2024
|
||||
---
|
||||
|
||||
# 2024 Changelog
|
||||
|
||||
### ClickHouse release v23.8.12.13-lts (bdbd0d87e5d) FIXME as compared to v23.8.11.28-lts (31879d2ab4c)
|
||||
|
||||
#### Bug Fix (user-visible misbehavior in an official stable release)
|
||||
|
||||
* Improve isolation of query cache entries under re-created users or role switches [#58611](https://github.com/ClickHouse/ClickHouse/pull/58611) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Fix string search with const position [#61547](https://github.com/ClickHouse/ClickHouse/pull/61547) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
* Fix crash in `multiSearchAllPositionsCaseInsensitiveUTF8` for incorrect UTF-8 [#61749](https://github.com/ClickHouse/ClickHouse/pull/61749) ([pufit](https://github.com/pufit)).
|
||||
|
||||
#### CI Fix or Improvement (changelog entry is not required)
|
||||
|
||||
* Backported in [#61428](https://github.com/ClickHouse/ClickHouse/issues/61428):. [#61374](https://github.com/ClickHouse/ClickHouse/pull/61374) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Backported in [#61484](https://github.com/ClickHouse/ClickHouse/issues/61484): ... [#61441](https://github.com/ClickHouse/ClickHouse/pull/61441) ([Max K.](https://github.com/maxknv)).
|
||||
|
32
docs/changelogs/v24.1.8.22-stable.md
Normal file
32
docs/changelogs/v24.1.8.22-stable.md
Normal file
@ -0,0 +1,32 @@
|
||||
---
|
||||
sidebar_position: 1
|
||||
sidebar_label: 2024
|
||||
---
|
||||
|
||||
# 2024 Changelog
|
||||
|
||||
### ClickHouse release v24.1.8.22-stable (7fb8f96d3da) FIXME as compared to v24.1.7.18-stable (90925babd78)
|
||||
|
||||
#### Bug Fix (user-visible misbehavior in an official stable release)
|
||||
|
||||
* Fix possible incorrect result of aggregate function `uniqExact` [#61257](https://github.com/ClickHouse/ClickHouse/pull/61257) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Fix consecutive keys optimization for nullable keys [#61393](https://github.com/ClickHouse/ClickHouse/pull/61393) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Fix bug when reading system.parts using UUID (issue 61220). [#61479](https://github.com/ClickHouse/ClickHouse/pull/61479) ([Dan Wu](https://github.com/wudanzy)).
|
||||
* Fix client `-s` argument [#61530](https://github.com/ClickHouse/ClickHouse/pull/61530) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Fix string search with const position [#61547](https://github.com/ClickHouse/ClickHouse/pull/61547) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
* Fix crash in `multiSearchAllPositionsCaseInsensitiveUTF8` for incorrect UTF-8 [#61749](https://github.com/ClickHouse/ClickHouse/pull/61749) ([pufit](https://github.com/pufit)).
|
||||
|
||||
#### CI Fix or Improvement (changelog entry is not required)
|
||||
|
||||
* Backported in [#61431](https://github.com/ClickHouse/ClickHouse/issues/61431):. [#61374](https://github.com/ClickHouse/ClickHouse/pull/61374) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Backported in [#61488](https://github.com/ClickHouse/ClickHouse/issues/61488): ... [#61441](https://github.com/ClickHouse/ClickHouse/pull/61441) ([Max K.](https://github.com/maxknv)).
|
||||
* Backported in [#61642](https://github.com/ClickHouse/ClickHouse/issues/61642):. [#61592](https://github.com/ClickHouse/ClickHouse/pull/61592) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
|
||||
#### NO CL ENTRY
|
||||
|
||||
* NO CL ENTRY: 'Revert "Backport [#61479](https://github.com/ClickHouse/ClickHouse/issues/61479) to 24.1: Fix bug when reading system.parts using UUID (issue 61220)."'. [#61775](https://github.com/ClickHouse/ClickHouse/pull/61775) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
|
||||
|
||||
#### NOT FOR CHANGELOG / INSIGNIFICANT
|
||||
|
||||
* Speed up cctools building [#61011](https://github.com/ClickHouse/ClickHouse/pull/61011) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
|
@ -55,9 +55,7 @@ To build using Homebrew's vanilla Clang compiler (the only **recommended** way):
|
||||
cd ClickHouse
|
||||
mkdir build
|
||||
export PATH=$(brew --prefix llvm)/bin:$PATH
|
||||
export CC=$(brew --prefix llvm)/bin/clang
|
||||
export CXX=$(brew --prefix llvm)/bin/clang++
|
||||
cmake -G Ninja -DCMAKE_BUILD_TYPE=RelWithDebInfo -S . -B build
|
||||
cmake -G Ninja -DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_C_COMPILER=$(brew --prefix llvm)/bin/clang -DCMAKE_CXX_COMPILER=$(brew --prefix llvm)/bin/clang++ -S . -B build
|
||||
cmake --build build
|
||||
# The resulting binary will be created at: build/programs/clickhouse
|
||||
```
|
||||
|
@ -867,3 +867,31 @@ Default value: `Never`
|
||||
Persists virtual column `_block_number` on merges.
|
||||
|
||||
Default value: false.
|
||||
|
||||
## exclude_deleted_rows_for_part_size_in_merge {#exclude_deleted_rows_for_part_size_in_merge}
|
||||
|
||||
If enabled, estimated actual size of data parts (i.e., excluding those rows that have been deleted through `DELETE FROM`) will be used when selecting parts to merge. Note that this behavior is only triggered for data parts affected by `DELETE FROM` executed after this setting is enabled.
|
||||
|
||||
Possible values:
|
||||
|
||||
- true, false
|
||||
|
||||
Default value: false
|
||||
|
||||
**See Also**
|
||||
|
||||
- [load_existing_rows_count_for_old_parts](#load_existing_rows_count_for_old_parts) setting
|
||||
|
||||
## load_existing_rows_count_for_old_parts {#load_existing_rows_count_for_old_parts}
|
||||
|
||||
If enabled along with [exclude_deleted_rows_for_part_size_in_merge](#exclude_deleted_rows_for_part_size_in_merge), deleted rows count for existing data parts will be calculated during table starting up. Note that it may slow down start up table loading.
|
||||
|
||||
Possible values:
|
||||
|
||||
- true, false
|
||||
|
||||
Default value: false
|
||||
|
||||
**See Also**
|
||||
|
||||
- [exclude_deleted_rows_for_part_size_in_merge](#exclude_deleted_rows_for_part_size_in_merge) setting
|
||||
|
@ -127,7 +127,7 @@ See the [deployment](docs/en/deployment-guides/terminology.md) documentation for
|
||||
|
||||
#### Verify that experimental transactions are enabled
|
||||
|
||||
Issue a `BEGIN TRANSACTION` followed by a `ROLLBACK` to verify that experimental transactions are enabled, and that ClickHouse Keeper is enabled as it is used to track transactions.
|
||||
Issue a `BEGIN TRANSACTION` or `START TRANSACTION` followed by a `ROLLBACK` to verify that experimental transactions are enabled, and that ClickHouse Keeper is enabled as it is used to track transactions.
|
||||
|
||||
```sql
|
||||
BEGIN TRANSACTION
|
||||
|
@ -36,7 +36,7 @@ if [ "$1" = configure ] || [ -n "$not_deb_os" ]; then
|
||||
fi
|
||||
|
||||
/bin/systemctl daemon-reload
|
||||
/bin/systemctl enable --now clickhouse-server
|
||||
/bin/systemctl enable clickhouse-server
|
||||
else
|
||||
# If you downgrading to version older than 1.1.54336 run: systemctl disable clickhouse-server
|
||||
if [ -x "/etc/init.d/clickhouse-server" ]; then
|
||||
|
@ -777,7 +777,13 @@ struct IdentifierResolveScope
|
||||
std::unordered_map<QueryTreeNodePtr, TableExpressionData> table_expression_node_to_data;
|
||||
|
||||
QueryTreeNodePtrWithHashSet nullable_group_by_keys;
|
||||
QueryTreeNodePtrWithHashMap<QueryTreeNodePtr> nullable_join_columns;
|
||||
|
||||
/** It's possible that after a JOIN, a column in the projection has a type different from the column in the source table.
|
||||
* (For example, after join_use_nulls or USING column casted to supertype)
|
||||
* However, the column in the projection still refers to the table as its source.
|
||||
* This map is used to revert these columns back to their original columns in the source table.
|
||||
*/
|
||||
QueryTreeNodePtrWithHashMap<QueryTreeNodePtr> join_columns_with_changed_types;
|
||||
|
||||
/// Use identifier lookup to result cache
|
||||
bool use_identifier_lookup_to_result_cache = true;
|
||||
@ -1308,7 +1314,8 @@ private:
|
||||
if (!resolved_expression->getResultType()->equals(*new_result_type))
|
||||
resolved_expression = buildCastFunction(resolved_expression, new_result_type, scope.context, true);
|
||||
}
|
||||
scope.nullable_join_columns[nullable_resolved_identifier] = resolved_identifier;
|
||||
if (!nullable_resolved_identifier->isEqual(*resolved_identifier))
|
||||
scope.join_columns_with_changed_types[nullable_resolved_identifier] = resolved_identifier;
|
||||
return nullable_resolved_identifier;
|
||||
}
|
||||
return nullptr;
|
||||
@ -1401,6 +1408,8 @@ private:
|
||||
const NamesAndTypes & matched_columns,
|
||||
const IdentifierResolveScope & scope);
|
||||
|
||||
void updateMatchedColumnsFromJoinUsing(QueryTreeNodesWithNames & result_matched_column_nodes_with_names, const QueryTreeNodePtr & source_table_expression, IdentifierResolveScope & scope);
|
||||
|
||||
QueryTreeNodesWithNames resolveQualifiedMatcher(QueryTreeNodePtr & matcher_node, IdentifierResolveScope & scope);
|
||||
|
||||
QueryTreeNodesWithNames resolveUnqualifiedMatcher(QueryTreeNodePtr & matcher_node, IdentifierResolveScope & scope);
|
||||
@ -2168,10 +2177,13 @@ void QueryAnalyzer::evaluateScalarSubqueryIfNeeded(QueryTreeNodePtr & node, Iden
|
||||
!nearest_query_scope)
|
||||
{
|
||||
auto constant_value = std::make_shared<ConstantValue>(std::move(scalar_value), scalar_type);
|
||||
auto constant_node = std::make_shared<ConstantNode>(std::move(constant_value), node);
|
||||
auto constant_node = std::make_shared<ConstantNode>(constant_value, node);
|
||||
|
||||
if (constant_node->getValue().isNull())
|
||||
{
|
||||
node = buildCastFunction(constant_node, constant_node->getResultType(), context);
|
||||
node = std::make_shared<ConstantNode>(std::move(constant_value), node);
|
||||
}
|
||||
else
|
||||
node = std::move(constant_node);
|
||||
|
||||
@ -3309,6 +3321,78 @@ QueryTreeNodePtr checkIsMissedObjectJSONSubcolumn(const QueryTreeNodePtr & left_
|
||||
return {};
|
||||
}
|
||||
|
||||
/// Used to replace columns that changed type because of JOIN to their original type
|
||||
class ReplaceColumnsVisitor : public InDepthQueryTreeVisitor<ReplaceColumnsVisitor>
|
||||
{
|
||||
public:
|
||||
explicit ReplaceColumnsVisitor(const QueryTreeNodePtrWithHashMap<QueryTreeNodePtr> & replacement_map_, const ContextPtr & context_)
|
||||
: replacement_map(replacement_map_)
|
||||
, context(context_)
|
||||
{}
|
||||
|
||||
/// Apply replacement transitively, because column may change it's type twice, one to have a supertype and then because of `joun_use_nulls`
|
||||
static QueryTreeNodePtr findTransitiveReplacement(QueryTreeNodePtr node, const QueryTreeNodePtrWithHashMap<QueryTreeNodePtr> & replacement_map_)
|
||||
{
|
||||
auto it = replacement_map_.find(node);
|
||||
QueryTreeNodePtr result_node = nullptr;
|
||||
for (; it != replacement_map_.end(); it = replacement_map_.find(result_node))
|
||||
{
|
||||
if (result_node && result_node->isEqual(*it->second))
|
||||
{
|
||||
Strings map_dump;
|
||||
for (const auto & [k, v]: replacement_map_)
|
||||
map_dump.push_back(fmt::format("{} -> {} (is_equals: {}, is_same: {})",
|
||||
k.node->dumpTree(), v->dumpTree(), k.node->isEqual(*v), k.node == v));
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Infinite loop in query tree replacement map: {}", fmt::join(map_dump, "; "));
|
||||
}
|
||||
chassert(it->second);
|
||||
|
||||
result_node = it->second;
|
||||
}
|
||||
return result_node;
|
||||
}
|
||||
|
||||
void visitImpl(QueryTreeNodePtr & node)
|
||||
{
|
||||
if (auto replacement_node = findTransitiveReplacement(node, replacement_map))
|
||||
node = replacement_node;
|
||||
|
||||
if (auto * function_node = node->as<FunctionNode>(); function_node && function_node->isResolved())
|
||||
rerunFunctionResolve(function_node, context);
|
||||
}
|
||||
|
||||
/// We want to re-run resolve for function _after_ its arguments are replaced
|
||||
bool shouldTraverseTopToBottom() const { return false; }
|
||||
|
||||
bool needChildVisit(QueryTreeNodePtr & /* parent */, QueryTreeNodePtr & child)
|
||||
{
|
||||
/// Visit only expressions, but not subqueries
|
||||
return child->getNodeType() == QueryTreeNodeType::IDENTIFIER
|
||||
|| child->getNodeType() == QueryTreeNodeType::LIST
|
||||
|| child->getNodeType() == QueryTreeNodeType::FUNCTION
|
||||
|| child->getNodeType() == QueryTreeNodeType::COLUMN;
|
||||
}
|
||||
|
||||
private:
|
||||
const QueryTreeNodePtrWithHashMap<QueryTreeNodePtr> & replacement_map;
|
||||
const ContextPtr & context;
|
||||
};
|
||||
|
||||
/// Compare resolved identifiers considering columns that become nullable after JOIN
|
||||
bool resolvedIdenfiersFromJoinAreEquals(
|
||||
const QueryTreeNodePtr & left_resolved_identifier,
|
||||
const QueryTreeNodePtr & right_resolved_identifier,
|
||||
const IdentifierResolveScope & scope)
|
||||
{
|
||||
auto left_original_node = ReplaceColumnsVisitor::findTransitiveReplacement(left_resolved_identifier, scope.join_columns_with_changed_types);
|
||||
const auto & left_resolved_to_compare = left_original_node ? left_original_node : left_resolved_identifier;
|
||||
|
||||
auto right_original_node = ReplaceColumnsVisitor::findTransitiveReplacement(right_resolved_identifier, scope.join_columns_with_changed_types);
|
||||
const auto & right_resolved_to_compare = right_original_node ? right_original_node : right_resolved_identifier;
|
||||
|
||||
return left_resolved_to_compare->isEqual(*right_resolved_to_compare, IQueryTreeNode::CompareOptions{.compare_aliases = false});
|
||||
}
|
||||
|
||||
QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromJoin(const IdentifierLookup & identifier_lookup,
|
||||
const QueryTreeNodePtr & table_expression_node,
|
||||
IdentifierResolveScope & scope)
|
||||
@ -3443,9 +3527,13 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromJoin(const IdentifierLoo
|
||||
auto & result_column = result_column_node->as<ColumnNode &>();
|
||||
result_column.setColumnType(using_column_node.getColumnType());
|
||||
|
||||
const auto & join_using_left_column = using_expression_list.getNodes().at(0);
|
||||
if (!result_column_node->isEqual(*join_using_left_column))
|
||||
scope.join_columns_with_changed_types[result_column_node] = join_using_left_column;
|
||||
|
||||
resolved_identifier = std::move(result_column_node);
|
||||
}
|
||||
else if (left_resolved_identifier->isEqual(*right_resolved_identifier, IQueryTreeNode::CompareOptions{.compare_aliases = false}))
|
||||
else if (resolvedIdenfiersFromJoinAreEquals(left_resolved_identifier, right_resolved_identifier, scope))
|
||||
{
|
||||
const auto & identifier_path_part = identifier_lookup.identifier.front();
|
||||
auto * left_resolved_identifier_column = left_resolved_identifier->as<ColumnNode>();
|
||||
@ -3521,6 +3609,9 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromJoin(const IdentifierLoo
|
||||
auto left_resolved_column_clone = std::static_pointer_cast<ColumnNode>(left_resolved_column.clone());
|
||||
left_resolved_column_clone->setColumnType(using_column_node_it->second->getColumnType());
|
||||
resolved_identifier = std::move(left_resolved_column_clone);
|
||||
|
||||
if (!resolved_identifier->isEqual(*using_column_node_it->second))
|
||||
scope.join_columns_with_changed_types[resolved_identifier] = using_column_node_it->second;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -3543,6 +3634,8 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromJoin(const IdentifierLoo
|
||||
auto right_resolved_column_clone = std::static_pointer_cast<ColumnNode>(right_resolved_column.clone());
|
||||
right_resolved_column_clone->setColumnType(using_column_node_it->second->getColumnType());
|
||||
resolved_identifier = std::move(right_resolved_column_clone);
|
||||
if (!resolved_identifier->isEqual(*using_column_node_it->second))
|
||||
scope.join_columns_with_changed_types[resolved_identifier] = using_column_node_it->second;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -3552,9 +3645,17 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromJoin(const IdentifierLoo
|
||||
|
||||
if (scope.join_use_nulls)
|
||||
{
|
||||
auto projection_name_it = node_to_projection_name.find(resolved_identifier);
|
||||
auto nullable_resolved_identifier = convertJoinedColumnTypeToNullIfNeeded(resolved_identifier, join_kind, resolved_side, scope);
|
||||
if (nullable_resolved_identifier)
|
||||
{
|
||||
resolved_identifier = nullable_resolved_identifier;
|
||||
/// Set the same projection name for new nullable node
|
||||
if (projection_name_it != node_to_projection_name.end())
|
||||
{
|
||||
node_to_projection_name.emplace(resolved_identifier, projection_name_it->second);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return resolved_identifier;
|
||||
@ -4213,6 +4314,95 @@ QueryAnalyzer::QueryTreeNodesWithNames QueryAnalyzer::getMatchedColumnNodesWithN
|
||||
return matched_column_nodes_with_names;
|
||||
}
|
||||
|
||||
bool hasTableExpressionInJoinTree(const QueryTreeNodePtr & join_tree_node, const QueryTreeNodePtr & table_expression)
|
||||
{
|
||||
QueryTreeNodes nodes_to_process;
|
||||
nodes_to_process.push_back(join_tree_node);
|
||||
|
||||
while (!nodes_to_process.empty())
|
||||
{
|
||||
auto node_to_process = std::move(nodes_to_process.back());
|
||||
nodes_to_process.pop_back();
|
||||
if (node_to_process == table_expression)
|
||||
return true;
|
||||
|
||||
if (node_to_process->getNodeType() == QueryTreeNodeType::JOIN)
|
||||
{
|
||||
const auto & join_node = node_to_process->as<JoinNode &>();
|
||||
nodes_to_process.push_back(join_node.getLeftTableExpression());
|
||||
nodes_to_process.push_back(join_node.getRightTableExpression());
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/// Columns that resolved from matcher can also match columns from JOIN USING.
|
||||
/// In that case we update type to type of column in USING section.
|
||||
/// TODO: It's not completely correct for qualified matchers, so t1.* should be resolved to left table column type.
|
||||
/// But in planner we do not distinguish such cases.
|
||||
void QueryAnalyzer::updateMatchedColumnsFromJoinUsing(
|
||||
QueryTreeNodesWithNames & result_matched_column_nodes_with_names,
|
||||
const QueryTreeNodePtr & source_table_expression,
|
||||
IdentifierResolveScope & scope)
|
||||
{
|
||||
auto * nearest_query_scope = scope.getNearestQueryScope();
|
||||
auto * nearest_query_scope_query_node = nearest_query_scope ? nearest_query_scope->scope_node->as<QueryNode>() : nullptr;
|
||||
|
||||
/// If there are no parent query scope or query scope does not have join tree
|
||||
if (!nearest_query_scope_query_node || !nearest_query_scope_query_node->getJoinTree())
|
||||
{
|
||||
throw Exception(ErrorCodes::UNSUPPORTED_METHOD,
|
||||
"There are no table sources. In scope {}",
|
||||
scope.scope_node->formatASTForErrorMessage());
|
||||
}
|
||||
|
||||
const auto & join_tree = nearest_query_scope_query_node->getJoinTree();
|
||||
|
||||
const auto * join_node = join_tree->as<JoinNode>();
|
||||
if (join_node && join_node->isUsingJoinExpression())
|
||||
{
|
||||
const auto & join_using_list = join_node->getJoinExpression()->as<ListNode &>();
|
||||
const auto & join_using_nodes = join_using_list.getNodes();
|
||||
|
||||
for (auto & [matched_column_node, _] : result_matched_column_nodes_with_names)
|
||||
{
|
||||
auto & matched_column_node_typed = matched_column_node->as<ColumnNode &>();
|
||||
const auto & matched_column_name = matched_column_node_typed.getColumnName();
|
||||
|
||||
for (const auto & join_using_node : join_using_nodes)
|
||||
{
|
||||
auto & join_using_column_node = join_using_node->as<ColumnNode &>();
|
||||
const auto & join_using_column_name = join_using_column_node.getColumnName();
|
||||
|
||||
if (matched_column_name != join_using_column_name)
|
||||
continue;
|
||||
|
||||
const auto & join_using_column_nodes_list = join_using_column_node.getExpressionOrThrow()->as<ListNode &>();
|
||||
const auto & join_using_column_nodes = join_using_column_nodes_list.getNodes();
|
||||
|
||||
auto it = node_to_projection_name.find(matched_column_node);
|
||||
|
||||
if (hasTableExpressionInJoinTree(join_node->getLeftTableExpression(), source_table_expression))
|
||||
matched_column_node = join_using_column_nodes.at(0);
|
||||
else if (hasTableExpressionInJoinTree(join_node->getRightTableExpression(), source_table_expression))
|
||||
matched_column_node = join_using_column_nodes.at(1);
|
||||
else
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR,
|
||||
"Cannot find column {} in JOIN USING section {}",
|
||||
matched_column_node->dumpTree(), join_node->dumpTree());
|
||||
|
||||
matched_column_node = matched_column_node->clone();
|
||||
if (it != node_to_projection_name.end())
|
||||
node_to_projection_name.emplace(matched_column_node, it->second);
|
||||
|
||||
matched_column_node->as<ColumnNode &>().setColumnType(join_using_column_node.getResultType());
|
||||
if (!matched_column_node->isEqual(*join_using_column_nodes.at(0)))
|
||||
scope.join_columns_with_changed_types[matched_column_node] = join_using_column_nodes.at(0);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** Resolve qualified tree matcher.
|
||||
*
|
||||
* First try to match qualified identifier to expression. If qualified identifier matched expression node then
|
||||
@ -4330,6 +4520,8 @@ QueryAnalyzer::QueryTreeNodesWithNames QueryAnalyzer::resolveQualifiedMatcher(Qu
|
||||
matched_columns,
|
||||
scope);
|
||||
|
||||
updateMatchedColumnsFromJoinUsing(result_matched_column_nodes_with_names, table_expression_node, scope);
|
||||
|
||||
return result_matched_column_nodes_with_names;
|
||||
}
|
||||
|
||||
@ -4465,6 +4657,8 @@ QueryAnalyzer::QueryTreeNodesWithNames QueryAnalyzer::resolveUnqualifiedMatcher(
|
||||
|
||||
matched_column_node = matched_column_node->clone();
|
||||
matched_column_node->as<ColumnNode &>().setColumnType(join_using_column_node.getResultType());
|
||||
if (!matched_column_node->isEqual(*join_using_column_nodes.at(0)))
|
||||
scope.join_columns_with_changed_types[matched_column_node] = join_using_column_nodes.at(0);
|
||||
|
||||
table_expression_column_names_to_skip.insert(join_using_column_name);
|
||||
matched_expression_nodes_with_column_names.emplace_back(std::move(matched_column_node), join_using_column_name);
|
||||
@ -4584,7 +4778,9 @@ ProjectionNames QueryAnalyzer::resolveMatcher(QueryTreeNodePtr & matcher_node, I
|
||||
node = nullable_node;
|
||||
/// Set the same projection name for new nullable node
|
||||
if (projection_name_it != node_to_projection_name.end())
|
||||
{
|
||||
node_to_projection_name.emplace(node, projection_name_it->second);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -7609,29 +7805,6 @@ void QueryAnalyzer::resolveQueryJoinTreeNode(QueryTreeNodePtr & join_tree_node,
|
||||
scope.table_expressions_in_resolve_process.erase(join_tree_node.get());
|
||||
}
|
||||
|
||||
class ReplaceColumnsVisitor : public InDepthQueryTreeVisitor<ReplaceColumnsVisitor>
|
||||
{
|
||||
public:
|
||||
explicit ReplaceColumnsVisitor(const QueryTreeNodePtrWithHashMap<QueryTreeNodePtr> & replacement_map_, const ContextPtr & context_)
|
||||
: replacement_map(replacement_map_)
|
||||
, context(context_)
|
||||
{}
|
||||
|
||||
void visitImpl(QueryTreeNodePtr & node)
|
||||
{
|
||||
if (auto it = replacement_map.find(node); it != replacement_map.end())
|
||||
node = it->second;
|
||||
if (auto * function_node = node->as<FunctionNode>())
|
||||
rerunFunctionResolve(function_node, context);
|
||||
}
|
||||
|
||||
bool shouldTraverseTopToBottom() const { return false; }
|
||||
|
||||
private:
|
||||
const QueryTreeNodePtrWithHashMap<QueryTreeNodePtr> & replacement_map;
|
||||
const ContextPtr & context;
|
||||
};
|
||||
|
||||
/** Resolve query.
|
||||
* This function modifies query node during resolve. It is caller responsibility to clone query node before resolve
|
||||
* if it is needed for later use.
|
||||
@ -7823,19 +7996,17 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier
|
||||
{
|
||||
resolveExpressionNode(prewhere_node, scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/);
|
||||
|
||||
if (scope.join_use_nulls)
|
||||
{
|
||||
/** Expression in PREWHERE with JOIN should not be modified by join_use_nulls.
|
||||
* Example: SELECT * FROM t1 JOIN t2 USING (id) PREWHERE b = 1
|
||||
* Column `a` should be resolved from table and should not change its type to Nullable.
|
||||
* More complicated example when column is somewhere inside an expression:
|
||||
* SELECT a + 1 as b FROM t1 JOIN t2 USING (id) PREWHERE b = 1
|
||||
* expression `a + 1 as b` in projection and in PREWHERE should have different `a`.
|
||||
*/
|
||||
prewhere_node = prewhere_node->clone();
|
||||
ReplaceColumnsVisitor replace_visitor(scope.nullable_join_columns, scope.context);
|
||||
replace_visitor.visit(prewhere_node);
|
||||
}
|
||||
/** Expressions in PREWHERE with JOIN should not change their type.
|
||||
* Example: SELECT * FROM t1 JOIN t2 USING (a) PREWHERE a = 1
|
||||
* Column `a` in PREWHERE should be resolved from the left table
|
||||
* and should not change its type to Nullable or to the supertype of `a` from t1 and t2.
|
||||
* Here's a more complicated example where the column is somewhere inside an expression:
|
||||
* SELECT a + 1 as b FROM t1 JOIN t2 USING (id) PREWHERE b = 1
|
||||
* The expression `a + 1 as b` in the projection and in PREWHERE should have different `a`.
|
||||
*/
|
||||
prewhere_node = prewhere_node->clone();
|
||||
ReplaceColumnsVisitor replace_visitor(scope.join_columns_with_changed_types, scope.context);
|
||||
replace_visitor.visit(prewhere_node);
|
||||
}
|
||||
|
||||
if (query_node_typed.getWhere())
|
||||
|
@ -33,6 +33,8 @@ namespace ErrorCodes
|
||||
M(UInt64, shard_num) \
|
||||
M(UInt64, replica_num) \
|
||||
M(Bool, check_parts) \
|
||||
M(Bool, check_projection_parts) \
|
||||
M(Bool, allow_backup_broken_projections) \
|
||||
M(Bool, internal) \
|
||||
M(String, host_id) \
|
||||
M(OptionalUUID, backup_uuid)
|
||||
|
@ -65,6 +65,12 @@ struct BackupSettings
|
||||
/// Check checksums of the data parts before writing them to a backup.
|
||||
bool check_parts = true;
|
||||
|
||||
/// Check checksums of the projection data parts before writing them to a backup.
|
||||
bool check_projection_parts = true;
|
||||
|
||||
/// Allow to create backup with broken projections.
|
||||
bool allow_backup_broken_projections = false;
|
||||
|
||||
/// Internal, should not be specified by user.
|
||||
/// Whether this backup is a part of a distributed backup created by BACKUP ON CLUSTER.
|
||||
bool internal = false;
|
||||
|
@ -2,7 +2,6 @@
|
||||
|
||||
#include <AggregateFunctions/AggregateFunctionFactory.h>
|
||||
#include <AggregateFunctions/Combinators/AggregateFunctionCombinatorFactory.h>
|
||||
#include <Core/Settings.h>
|
||||
#include <Columns/ColumnString.h>
|
||||
#include <Common/typeid_cast.h>
|
||||
#include <Common/Macros.h>
|
||||
|
@ -591,6 +591,7 @@
|
||||
M(710, FAULT_INJECTED) \
|
||||
M(711, FILECACHE_ACCESS_DENIED) \
|
||||
M(712, TOO_MANY_MATERIALIZED_VIEWS) \
|
||||
M(713, BROKEN_PROJECTION) \
|
||||
M(714, UNEXPECTED_CLUSTER) \
|
||||
M(715, CANNOT_DETECT_FORMAT) \
|
||||
M(716, CANNOT_FORGET_PARTITION) \
|
||||
|
@ -1,11 +1,9 @@
|
||||
#pragma once
|
||||
|
||||
#include <condition_variable>
|
||||
#include <mutex>
|
||||
#include <type_traits>
|
||||
#include <variant>
|
||||
#include <boost/noncopyable.hpp>
|
||||
#include <condition_variable>
|
||||
#include <Poco/Timespan.h>
|
||||
#include <boost/noncopyable.hpp>
|
||||
|
||||
#include <Common/logger_useful.h>
|
||||
#include <Common/Exception.h>
|
||||
@ -17,6 +15,14 @@ namespace ProfileEvents
|
||||
extern const Event ConnectionPoolIsFullMicroseconds;
|
||||
}
|
||||
|
||||
namespace DB
|
||||
{
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
/** A class from which you can inherit and get a pool of something. Used for database connection pools.
|
||||
* Descendant class must provide a method for creating a new object to place in the pool.
|
||||
*/
|
||||
@ -29,22 +35,6 @@ public:
|
||||
using ObjectPtr = std::shared_ptr<Object>;
|
||||
using Ptr = std::shared_ptr<PoolBase<TObject>>;
|
||||
|
||||
enum class BehaviourOnLimit
|
||||
{
|
||||
/**
|
||||
* Default behaviour - when limit on pool size is reached, callers will wait until object will be returned back in pool.
|
||||
*/
|
||||
Wait,
|
||||
|
||||
/**
|
||||
* If no free objects in pool - allocate a new object, but not store it in pool.
|
||||
* This behaviour is needed when we simply don't want to waste time waiting or if we cannot guarantee that query could be processed using fixed amount of connections.
|
||||
* For example, when we read from table on s3, one GetObject request corresponds to the whole FileSystemCache segment. This segments are shared between different
|
||||
* reading tasks, so in general case connection could be taken from pool by one task and returned back by another one. And these tasks are processed completely independently.
|
||||
*/
|
||||
AllocateNewBypassingPool,
|
||||
};
|
||||
|
||||
private:
|
||||
|
||||
/** The object with the flag, whether it is currently used. */
|
||||
@ -99,53 +89,37 @@ public:
|
||||
Object & operator*() && = delete;
|
||||
const Object & operator*() const && = delete;
|
||||
|
||||
Object * operator->() & { return castToObjectPtr(); }
|
||||
const Object * operator->() const & { return castToObjectPtr(); }
|
||||
Object & operator*() & { return *castToObjectPtr(); }
|
||||
const Object & operator*() const & { return *castToObjectPtr(); }
|
||||
Object * operator->() & { return &*data->data.object; }
|
||||
const Object * operator->() const & { return &*data->data.object; }
|
||||
Object & operator*() & { return *data->data.object; }
|
||||
const Object & operator*() const & { return *data->data.object; }
|
||||
|
||||
/**
|
||||
* Expire an object to make it reallocated later.
|
||||
*/
|
||||
void expire()
|
||||
{
|
||||
if (data.index() == 1)
|
||||
std::get<1>(data)->data.is_expired = true;
|
||||
data->data.is_expired = true;
|
||||
}
|
||||
|
||||
bool isNull() const { return data.index() == 0 ? !std::get<0>(data) : !std::get<1>(data); }
|
||||
bool isNull() const { return data == nullptr; }
|
||||
|
||||
PoolBase * getPool() const
|
||||
{
|
||||
if (!data)
|
||||
throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Attempt to get pool from uninitialized entry");
|
||||
return &data->data.pool;
|
||||
}
|
||||
|
||||
private:
|
||||
/**
|
||||
* Plain object will be stored instead of PoolEntryHelper if fallback was made in get() (see BehaviourOnLimit::AllocateNewBypassingPool).
|
||||
*/
|
||||
std::variant<ObjectPtr, std::shared_ptr<PoolEntryHelper>> data;
|
||||
std::shared_ptr<PoolEntryHelper> data;
|
||||
|
||||
explicit Entry(ObjectPtr && object) : data(std::move(object)) { }
|
||||
|
||||
explicit Entry(PooledObject & object) : data(std::make_shared<PoolEntryHelper>(object)) { }
|
||||
|
||||
auto castToObjectPtr() const
|
||||
{
|
||||
return std::visit(
|
||||
[](const auto & ptr)
|
||||
{
|
||||
using T = std::decay_t<decltype(ptr)>;
|
||||
if constexpr (std::is_same_v<ObjectPtr, T>)
|
||||
return ptr.get();
|
||||
else
|
||||
return ptr->data.object.get();
|
||||
},
|
||||
data);
|
||||
}
|
||||
explicit Entry(PooledObject & object) : data(std::make_shared<PoolEntryHelper>(object)) {}
|
||||
};
|
||||
|
||||
virtual ~PoolBase() = default;
|
||||
|
||||
/** Allocates the object.
|
||||
* If 'behaviour_on_limit' is Wait - wait for free object in pool for 'timeout'. With 'timeout' < 0, the timeout is infinite.
|
||||
* If 'behaviour_on_limit' is AllocateNewBypassingPool and there is no free object - a new object will be created but not stored in the pool.
|
||||
*/
|
||||
/** Allocates the object. Wait for free object in pool for 'timeout'. With 'timeout' < 0, the timeout is infinite. */
|
||||
Entry get(Poco::Timespan::TimeDiff timeout)
|
||||
{
|
||||
std::unique_lock lock(mutex);
|
||||
@ -176,9 +150,6 @@ public:
|
||||
return Entry(*items.back());
|
||||
}
|
||||
|
||||
if (behaviour_on_limit == BehaviourOnLimit::AllocateNewBypassingPool)
|
||||
return Entry(allocObject());
|
||||
|
||||
Stopwatch blocked;
|
||||
if (timeout < 0)
|
||||
{
|
||||
@ -213,8 +184,6 @@ private:
|
||||
/** The maximum size of the pool. */
|
||||
unsigned max_items;
|
||||
|
||||
BehaviourOnLimit behaviour_on_limit;
|
||||
|
||||
/** Pool. */
|
||||
Objects items;
|
||||
|
||||
@ -225,8 +194,8 @@ private:
|
||||
protected:
|
||||
LoggerPtr log;
|
||||
|
||||
PoolBase(unsigned max_items_, LoggerPtr log_, BehaviourOnLimit behaviour_on_limit_ = BehaviourOnLimit::Wait)
|
||||
: max_items(max_items_), behaviour_on_limit(behaviour_on_limit_), log(log_)
|
||||
PoolBase(unsigned max_items_, LoggerPtr log_)
|
||||
: max_items(max_items_), log(log_)
|
||||
{
|
||||
items.reserve(max_items);
|
||||
}
|
||||
|
@ -868,6 +868,8 @@ class IColumn;
|
||||
M(Bool, use_variant_as_common_type, false, "Use Variant as a result type for if/multiIf in case when there is no common type for arguments", 0) \
|
||||
M(Bool, enable_order_by_all, true, "Enable sorting expression ORDER BY ALL.", 0) \
|
||||
M(Bool, traverse_shadow_remote_data_paths, false, "Traverse shadow directory when query system.remote_data_paths", 0) \
|
||||
M(Bool, geo_distance_returns_float64_on_float64_arguments, true, "If all four arguments to `geoDistance`, `greatCircleDistance`, `greatCircleAngle` functions are Float64, return Float64 and use double precision for internal calculations. In previous ClickHouse versions, the functions always returned Float32.", 0) \
|
||||
M(Bool, allow_get_client_http_header, false, "Allow to use the function `getClientHTTPHeader` which lets to obtain a value of an the current HTTP request's header. It is not enabled by default for security reasons, because some headers, such as `Cookie`, could contain sensitive info. Note that the `X-ClickHouse-*` and `Authentication` headers are always restricted and cannot be obtained with this function.", 0) \
|
||||
\
|
||||
/** Experimental functions */ \
|
||||
M(Bool, allow_experimental_materialized_postgresql_table, false, "Allows to use the MaterializedPostgreSQL table engine. Disabled by default, because this feature is experimental", 0) \
|
||||
@ -902,7 +904,6 @@ class IColumn;
|
||||
M(Int64, ignore_cold_parts_seconds, 0, "Only available in ClickHouse Cloud. Exclude new data parts from SELECT queries until they're either pre-warmed (see cache_populated_by_fetch) or this many seconds old. Only for Replicated-/SharedMergeTree.", 0) \
|
||||
M(Int64, prefer_warmed_unmerged_parts_seconds, 0, "Only available in ClickHouse Cloud. If a merged part is less than this many seconds old and is not pre-warmed (see cache_populated_by_fetch), but all its source parts are available and pre-warmed, SELECT queries will read from those parts instead. Only for ReplicatedMergeTree. Note that this only checks whether CacheWarmer processed the part; if the part was fetched into cache by something else, it'll still be considered cold until CacheWarmer gets to it; if it was warmed, then evicted from cache, it'll still be considered warm.", 0) \
|
||||
M(Bool, iceberg_engine_ignore_schema_evolution, false, "Ignore schema evolution in Iceberg table engine and read all data using latest schema saved on table creation. Note that it can lead to incorrect result", 0) \
|
||||
M(Bool, allow_get_client_http_header, false, "Allow to use the function `getClientHTTPHeader` which lets to obtain a value of an the current HTTP request's header. It is not enabled by default for security reasons, because some headers, such as `Cookie`, could contain sensitive info. Note that the `X-ClickHouse-*` and `Authentication` headers are always restricted and cannot be obtained with this function.", 0) \
|
||||
|
||||
// End of COMMON_SETTINGS
|
||||
// Please add settings related to formats into the FORMAT_FACTORY_SETTINGS, move obsolete settings to OBSOLETE_SETTINGS and obsolete format settings to OBSOLETE_FORMAT_SETTINGS.
|
||||
|
@ -115,6 +115,7 @@ static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> sett
|
||||
{"output_format_parquet_compression_method", "lz4", "zstd", "Parquet/ORC/Arrow support many compression methods, including lz4 and zstd. ClickHouse supports each and every compression method. Some inferior tools, such as 'duckdb', lack support for the faster `lz4` compression method, that's why we set zstd by default."},
|
||||
{"output_format_orc_compression_method", "lz4", "zstd", "Parquet/ORC/Arrow support many compression methods, including lz4 and zstd. ClickHouse supports each and every compression method. Some inferior tools, such as 'duckdb', lack support for the faster `lz4` compression method, that's why we set zstd by default."},
|
||||
{"output_format_pretty_highlight_digit_groups", false, true, "If enabled and if output is a terminal, highlight every digit corresponding to the number of thousands, millions, etc. with underline."},
|
||||
{"geo_distance_returns_float64_on_float64_arguments", false, true, "Increase the default precision."},
|
||||
{"azure_max_inflight_parts_for_one_file", 20, 20, "The maximum number of a concurrent loaded parts in multipart upload request. 0 means unlimited."},
|
||||
{"azure_strict_upload_part_size", 0, 0, "The exact size of part to upload during multipart upload to Azure blob storage."},
|
||||
{"azure_min_upload_part_size", 16*1024*1024, 16*1024*1024, "The minimum size of part to upload during multipart upload to Azure blob storage."},
|
||||
|
@ -71,6 +71,17 @@ void applyMetadataChangesToCreateQuery(const ASTPtr & query, const StorageInMemo
|
||||
query->replace(ast_create_query.refresh_strategy, metadata.refresh);
|
||||
}
|
||||
|
||||
if (metadata.sql_security_type)
|
||||
{
|
||||
auto new_sql_security = std::make_shared<ASTSQLSecurity>();
|
||||
new_sql_security->type = metadata.sql_security_type;
|
||||
|
||||
if (metadata.definer)
|
||||
new_sql_security->definer = std::make_shared<ASTUserNameWithHost>(*metadata.definer);
|
||||
|
||||
ast_create_query.sql_security = std::move(new_sql_security);
|
||||
}
|
||||
|
||||
/// MaterializedView, Dictionary are types of CREATE query without storage.
|
||||
if (ast_create_query.storage)
|
||||
{
|
||||
|
@ -348,6 +348,7 @@ public:
|
||||
String getName() const override { return Name::name; }
|
||||
bool useDefaultImplementationForNulls() const override { return false; }
|
||||
bool useDefaultImplementationForConstants() const override { return true; }
|
||||
bool useDefaultImplementationForLowCardinalityColumns() const override { return false; }
|
||||
|
||||
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override
|
||||
{
|
||||
@ -469,9 +470,6 @@ public:
|
||||
else
|
||||
return_type = json_return_type;
|
||||
|
||||
/// Top-level LowCardinality columns are processed outside JSON parser.
|
||||
json_return_type = removeLowCardinality(json_return_type);
|
||||
|
||||
DataTypes argument_types;
|
||||
argument_types.reserve(arguments.size());
|
||||
for (const auto & argument : arguments)
|
||||
@ -867,11 +865,9 @@ struct JSONExtractTree
|
||||
explicit LowCardinalityFixedStringNode(const size_t fixed_length_) : fixed_length(fixed_length_) { }
|
||||
bool insertResultToColumn(IColumn & dest, const Element & element) override
|
||||
{
|
||||
// If element is an object we delegate the insertion to JSONExtractRawImpl
|
||||
if (element.isObject())
|
||||
// For types other than string, delegate the insertion to JSONExtractRawImpl.
|
||||
if (!element.isString())
|
||||
return JSONExtractRawImpl<JSONParser>::insertResultToLowCardinalityFixedStringColumn(dest, element, fixed_length);
|
||||
else if (!element.isString())
|
||||
return false;
|
||||
|
||||
auto str = element.getString();
|
||||
if (str.size() > fixed_length)
|
||||
@ -1486,9 +1482,6 @@ public:
|
||||
// We use insertResultToLowCardinalityFixedStringColumn in case we are inserting raw data in a Low Cardinality FixedString column
|
||||
static bool insertResultToLowCardinalityFixedStringColumn(IColumn & dest, const Element & element, size_t fixed_length)
|
||||
{
|
||||
if (element.getObject().size() > fixed_length)
|
||||
return false;
|
||||
|
||||
ColumnFixedString::Chars chars;
|
||||
WriteBufferFromVector<ColumnFixedString::Chars> buf(chars, AppendModeTag());
|
||||
traverse(element, buf);
|
||||
|
@ -7,7 +7,6 @@
|
||||
#include <Functions/PerformanceAdaptors.h>
|
||||
#include <Interpreters/castColumn.h>
|
||||
#include <Common/TargetSpecific.h>
|
||||
#include <base/range.h>
|
||||
#include <cmath>
|
||||
#include <numbers>
|
||||
|
||||
@ -42,121 +41,6 @@ namespace ErrorCodes
|
||||
namespace
|
||||
{
|
||||
|
||||
constexpr double PI = std::numbers::pi_v<double>;
|
||||
constexpr float PI_F = std::numbers::pi_v<float>;
|
||||
|
||||
constexpr float RAD_IN_DEG = static_cast<float>(PI / 180.0);
|
||||
constexpr float RAD_IN_DEG_HALF = static_cast<float>(PI / 360.0);
|
||||
|
||||
constexpr size_t COS_LUT_SIZE = 1024; // maxerr 0.00063%
|
||||
constexpr float COS_LUT_SIZE_F = 1024.0f; // maxerr 0.00063%
|
||||
constexpr size_t ASIN_SQRT_LUT_SIZE = 512;
|
||||
constexpr size_t METRIC_LUT_SIZE = 1024;
|
||||
|
||||
/** Earth radius in meters using WGS84 authalic radius.
|
||||
* We use this value to be consistent with H3 library.
|
||||
*/
|
||||
constexpr float EARTH_RADIUS = 6371007.180918475f;
|
||||
constexpr float EARTH_DIAMETER = 2 * EARTH_RADIUS;
|
||||
|
||||
|
||||
float cos_lut[COS_LUT_SIZE + 1]; /// cos(x) table
|
||||
float asin_sqrt_lut[ASIN_SQRT_LUT_SIZE + 1]; /// asin(sqrt(x)) * earth_diameter table
|
||||
|
||||
float sphere_metric_lut[METRIC_LUT_SIZE + 1]; /// sphere metric, unitless: the distance in degrees for one degree across longitude depending on latitude
|
||||
float sphere_metric_meters_lut[METRIC_LUT_SIZE + 1]; /// sphere metric: the distance in meters for one degree across longitude depending on latitude
|
||||
float wgs84_metric_meters_lut[2 * (METRIC_LUT_SIZE + 1)]; /// ellipsoid metric: the distance in meters across one degree latitude/longitude depending on latitude
|
||||
|
||||
|
||||
inline double sqr(double v)
|
||||
{
|
||||
return v * v;
|
||||
}
|
||||
|
||||
inline float sqrf(float v)
|
||||
{
|
||||
return v * v;
|
||||
}
|
||||
|
||||
void geodistInit()
|
||||
{
|
||||
for (size_t i = 0; i <= COS_LUT_SIZE; ++i)
|
||||
cos_lut[i] = static_cast<float>(cos(2 * PI * i / COS_LUT_SIZE)); // [0, 2 * pi] -> [0, COS_LUT_SIZE]
|
||||
|
||||
for (size_t i = 0; i <= ASIN_SQRT_LUT_SIZE; ++i)
|
||||
asin_sqrt_lut[i] = static_cast<float>(asin(
|
||||
sqrt(static_cast<double>(i) / ASIN_SQRT_LUT_SIZE))); // [0, 1] -> [0, ASIN_SQRT_LUT_SIZE]
|
||||
|
||||
for (size_t i = 0; i <= METRIC_LUT_SIZE; ++i)
|
||||
{
|
||||
double latitude = i * (PI / METRIC_LUT_SIZE) - PI * 0.5; // [-pi / 2, pi / 2] -> [0, METRIC_LUT_SIZE]
|
||||
|
||||
/// Squared metric coefficients (for the distance in meters) on a tangent plane, for latitude and longitude (in degrees),
|
||||
/// depending on the latitude (in radians).
|
||||
|
||||
/// https://github.com/mapbox/cheap-ruler/blob/master/index.js#L67
|
||||
wgs84_metric_meters_lut[i * 2] = static_cast<float>(sqr(111132.09 - 566.05 * cos(2 * latitude) + 1.20 * cos(4 * latitude)));
|
||||
wgs84_metric_meters_lut[i * 2 + 1] = static_cast<float>(sqr(111415.13 * cos(latitude) - 94.55 * cos(3 * latitude) + 0.12 * cos(5 * latitude)));
|
||||
|
||||
sphere_metric_meters_lut[i] = static_cast<float>(sqr((EARTH_DIAMETER * PI / 360) * cos(latitude)));
|
||||
|
||||
sphere_metric_lut[i] = static_cast<float>(sqr(cos(latitude)));
|
||||
}
|
||||
}
|
||||
|
||||
inline NO_SANITIZE_UNDEFINED size_t floatToIndex(float x)
|
||||
{
|
||||
/// Implementation specific behaviour on overflow or infinite value.
|
||||
return static_cast<size_t>(x);
|
||||
}
|
||||
|
||||
inline float geodistDegDiff(float f)
|
||||
{
|
||||
f = fabsf(f);
|
||||
if (f > 180)
|
||||
f = 360 - f;
|
||||
return f;
|
||||
}
|
||||
|
||||
inline float geodistFastCos(float x)
|
||||
{
|
||||
float y = fabsf(x) * (COS_LUT_SIZE_F / PI_F / 2.0f);
|
||||
size_t i = floatToIndex(y);
|
||||
y -= i;
|
||||
i &= (COS_LUT_SIZE - 1);
|
||||
return cos_lut[i] + (cos_lut[i + 1] - cos_lut[i]) * y;
|
||||
}
|
||||
|
||||
inline float geodistFastSin(float x)
|
||||
{
|
||||
float y = fabsf(x) * (COS_LUT_SIZE_F / PI_F / 2.0f);
|
||||
size_t i = floatToIndex(y);
|
||||
y -= i;
|
||||
i = (i - COS_LUT_SIZE / 4) & (COS_LUT_SIZE - 1); // cos(x - pi / 2) = sin(x), costable / 4 = pi / 2
|
||||
return cos_lut[i] + (cos_lut[i + 1] - cos_lut[i]) * y;
|
||||
}
|
||||
|
||||
/// fast implementation of asin(sqrt(x))
|
||||
/// max error in floats 0.00369%, in doubles 0.00072%
|
||||
inline float geodistFastAsinSqrt(float x)
|
||||
{
|
||||
if (x < 0.122f)
|
||||
{
|
||||
// distance under 4546 km, Taylor error under 0.00072%
|
||||
float y = sqrtf(x);
|
||||
return y + x * y * 0.166666666666666f + x * x * y * 0.075f + x * x * x * y * 0.044642857142857f;
|
||||
}
|
||||
if (x < 0.948f)
|
||||
{
|
||||
// distance under 17083 km, 512-entry LUT error under 0.00072%
|
||||
x *= ASIN_SQRT_LUT_SIZE;
|
||||
size_t i = floatToIndex(x);
|
||||
return asin_sqrt_lut[i] + (asin_sqrt_lut[i + 1] - asin_sqrt_lut[i]) * (x - i);
|
||||
}
|
||||
return asinf(sqrtf(x)); // distance over 17083 km, just compute exact
|
||||
}
|
||||
|
||||
|
||||
enum class Method
|
||||
{
|
||||
SPHERE_DEGREES,
|
||||
@ -164,18 +48,117 @@ enum class Method
|
||||
WGS84_METERS,
|
||||
};
|
||||
|
||||
}
|
||||
constexpr size_t ASIN_SQRT_LUT_SIZE = 512;
|
||||
constexpr size_t COS_LUT_SIZE = 1024; // maxerr 0.00063%
|
||||
constexpr size_t METRIC_LUT_SIZE = 1024;
|
||||
|
||||
/// Earth radius in meters using WGS84 authalic radius.
|
||||
/// We use this value to be consistent with H3 library.
|
||||
constexpr double EARTH_RADIUS = 6371007.180918475;
|
||||
constexpr double EARTH_DIAMETER = 2.0 * EARTH_RADIUS;
|
||||
constexpr double PI = std::numbers::pi_v<double>;
|
||||
|
||||
template <typename T>
|
||||
T sqr(T v) { return v * v; }
|
||||
|
||||
template <typename T>
|
||||
struct Impl
|
||||
{
|
||||
T cos_lut[COS_LUT_SIZE + 1]; /// cos(x) table
|
||||
T asin_sqrt_lut[ASIN_SQRT_LUT_SIZE + 1]; /// asin(sqrt(x)) * earth_diameter table
|
||||
T sphere_metric_lut[METRIC_LUT_SIZE + 1]; /// sphere metric, unitless: the distance in degrees for one degree across longitude depending on latitude
|
||||
T sphere_metric_meters_lut[METRIC_LUT_SIZE + 1]; /// sphere metric: the distance in meters for one degree across longitude depending on latitude
|
||||
T wgs84_metric_meters_lut[2 * (METRIC_LUT_SIZE + 1)]; /// ellipsoid metric: the distance in meters across one degree latitude/longitude depending on latitude
|
||||
|
||||
Impl()
|
||||
{
|
||||
for (size_t i = 0; i <= COS_LUT_SIZE; ++i)
|
||||
cos_lut[i] = T(std::cos(2 * PI * static_cast<double>(i) / COS_LUT_SIZE)); // [0, 2 * pi] -> [0, COS_LUT_SIZE]
|
||||
|
||||
for (size_t i = 0; i <= ASIN_SQRT_LUT_SIZE; ++i)
|
||||
asin_sqrt_lut[i] = T(std::asin(std::sqrt(static_cast<double>(i) / ASIN_SQRT_LUT_SIZE))); // [0, 1] -> [0, ASIN_SQRT_LUT_SIZE]
|
||||
|
||||
for (size_t i = 0; i <= METRIC_LUT_SIZE; ++i)
|
||||
{
|
||||
double latitude = i * (PI / METRIC_LUT_SIZE) - PI * 0.5; // [-pi / 2, pi / 2] -> [0, METRIC_LUT_SIZE]
|
||||
|
||||
/// Squared metric coefficients (for the distance in meters) on a tangent plane, for latitude and longitude (in degrees),
|
||||
/// depending on the latitude (in radians).
|
||||
|
||||
/// https://github.com/mapbox/cheap-ruler/blob/master/index.js#L67
|
||||
wgs84_metric_meters_lut[i * 2] = T(sqr(111132.09 - 566.05 * std::cos(2.0 * latitude) + 1.20 * std::cos(4.0 * latitude)));
|
||||
wgs84_metric_meters_lut[i * 2 + 1] = T(sqr(111415.13 * std::cos(latitude) - 94.55 * std::cos(3.0 * latitude) + 0.12 * std::cos(5.0 * latitude)));
|
||||
sphere_metric_meters_lut[i] = T(sqr((EARTH_DIAMETER * PI / 360) * std::cos(latitude)));
|
||||
|
||||
sphere_metric_lut[i] = T(sqr(std::cos(latitude)));
|
||||
}
|
||||
}
|
||||
|
||||
static inline NO_SANITIZE_UNDEFINED size_t toIndex(T x)
|
||||
{
|
||||
/// Implementation specific behaviour on overflow or infinite value.
|
||||
return static_cast<size_t>(x);
|
||||
}
|
||||
|
||||
static inline T degDiff(T f)
|
||||
{
|
||||
f = std::abs(f);
|
||||
if (f > 180)
|
||||
f = 360 - f;
|
||||
return f;
|
||||
}
|
||||
|
||||
inline T fastCos(T x)
|
||||
{
|
||||
T y = std::abs(x) * (T(COS_LUT_SIZE) / T(PI) / T(2.0));
|
||||
size_t i = toIndex(y);
|
||||
y -= i;
|
||||
i &= (COS_LUT_SIZE - 1);
|
||||
return cos_lut[i] + (cos_lut[i + 1] - cos_lut[i]) * y;
|
||||
}
|
||||
|
||||
inline T fastSin(T x)
|
||||
{
|
||||
T y = std::abs(x) * (T(COS_LUT_SIZE) / T(PI) / T(2.0));
|
||||
size_t i = toIndex(y);
|
||||
y -= i;
|
||||
i = (i - COS_LUT_SIZE / 4) & (COS_LUT_SIZE - 1); // cos(x - pi / 2) = sin(x), costable / 4 = pi / 2
|
||||
return cos_lut[i] + (cos_lut[i + 1] - cos_lut[i]) * y;
|
||||
}
|
||||
|
||||
/// fast implementation of asin(sqrt(x))
|
||||
/// max error in floats 0.00369%, in doubles 0.00072%
|
||||
inline T fastAsinSqrt(T x)
|
||||
{
|
||||
if (x < T(0.122))
|
||||
{
|
||||
// distance under 4546 km, Taylor error under 0.00072%
|
||||
T y = std::sqrt(x);
|
||||
return y + x * y * T(0.166666666666666) + x * x * y * T(0.075) + x * x * x * y * T(0.044642857142857);
|
||||
}
|
||||
if (x < T(0.948))
|
||||
{
|
||||
// distance under 17083 km, 512-entry LUT error under 0.00072%
|
||||
x *= ASIN_SQRT_LUT_SIZE;
|
||||
size_t i = toIndex(x);
|
||||
return asin_sqrt_lut[i] + (asin_sqrt_lut[i + 1] - asin_sqrt_lut[i]) * (x - i);
|
||||
}
|
||||
return std::asin(std::sqrt(x)); /// distance is over 17083 km, just compute exact
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T> Impl<T> impl;
|
||||
|
||||
DECLARE_MULTITARGET_CODE(
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
template <Method method>
|
||||
float distance(float lon1deg, float lat1deg, float lon2deg, float lat2deg)
|
||||
template <Method method, typename T>
|
||||
T distance(T lon1deg, T lat1deg, T lon2deg, T lat2deg)
|
||||
{
|
||||
float lat_diff = geodistDegDiff(lat1deg - lat2deg);
|
||||
float lon_diff = geodistDegDiff(lon1deg - lon2deg);
|
||||
T lat_diff = impl<T>.degDiff(lat1deg - lat2deg);
|
||||
T lon_diff = impl<T>.degDiff(lon1deg - lon2deg);
|
||||
|
||||
if (lon_diff < 13)
|
||||
{
|
||||
@ -187,51 +170,54 @@ float distance(float lon1deg, float lat1deg, float lon2deg, float lat2deg)
|
||||
/// (Remember how a plane flies from Amsterdam to New York)
|
||||
/// But if longitude is close but latitude is different enough, there is no difference between meridian and great circle line.
|
||||
|
||||
float latitude_midpoint = (lat1deg + lat2deg + 180) * METRIC_LUT_SIZE / 360; // [-90, 90] degrees -> [0, METRIC_LUT_SIZE] indexes
|
||||
size_t latitude_midpoint_index = floatToIndex(latitude_midpoint) & (METRIC_LUT_SIZE - 1);
|
||||
T latitude_midpoint = (lat1deg + lat2deg + 180) * METRIC_LUT_SIZE / 360; // [-90, 90] degrees -> [0, METRIC_LUT_SIZE] indexes
|
||||
size_t latitude_midpoint_index = impl<T>.toIndex(latitude_midpoint) & (METRIC_LUT_SIZE - 1);
|
||||
|
||||
/// This is linear interpolation between two table items at index "latitude_midpoint_index" and "latitude_midpoint_index + 1".
|
||||
|
||||
float k_lat{};
|
||||
float k_lon{};
|
||||
T k_lat{};
|
||||
T k_lon{};
|
||||
|
||||
if constexpr (method == Method::SPHERE_DEGREES)
|
||||
{
|
||||
k_lat = 1;
|
||||
|
||||
k_lon = sphere_metric_lut[latitude_midpoint_index]
|
||||
+ (sphere_metric_lut[latitude_midpoint_index + 1] - sphere_metric_lut[latitude_midpoint_index]) * (latitude_midpoint - latitude_midpoint_index);
|
||||
k_lon = impl<T>.sphere_metric_lut[latitude_midpoint_index]
|
||||
+ (impl<T>.sphere_metric_lut[latitude_midpoint_index + 1] - impl<T>.sphere_metric_lut[latitude_midpoint_index]) * (latitude_midpoint - latitude_midpoint_index);
|
||||
}
|
||||
else if constexpr (method == Method::SPHERE_METERS)
|
||||
{
|
||||
k_lat = sqrf(EARTH_DIAMETER * PI_F / 360.0f);
|
||||
k_lat = sqr(T(EARTH_DIAMETER) * T(PI) / T(360.0));
|
||||
|
||||
k_lon = sphere_metric_meters_lut[latitude_midpoint_index]
|
||||
+ (sphere_metric_meters_lut[latitude_midpoint_index + 1] - sphere_metric_meters_lut[latitude_midpoint_index]) * (latitude_midpoint - latitude_midpoint_index);
|
||||
k_lon = impl<T>.sphere_metric_meters_lut[latitude_midpoint_index]
|
||||
+ (impl<T>.sphere_metric_meters_lut[latitude_midpoint_index + 1] - impl<T>.sphere_metric_meters_lut[latitude_midpoint_index]) * (latitude_midpoint - latitude_midpoint_index);
|
||||
}
|
||||
else if constexpr (method == Method::WGS84_METERS)
|
||||
{
|
||||
k_lat = wgs84_metric_meters_lut[latitude_midpoint_index * 2]
|
||||
+ (wgs84_metric_meters_lut[(latitude_midpoint_index + 1) * 2] - wgs84_metric_meters_lut[latitude_midpoint_index * 2]) * (latitude_midpoint - latitude_midpoint_index);
|
||||
k_lat = impl<T>.wgs84_metric_meters_lut[latitude_midpoint_index * 2]
|
||||
+ (impl<T>.wgs84_metric_meters_lut[(latitude_midpoint_index + 1) * 2] - impl<T>.wgs84_metric_meters_lut[latitude_midpoint_index * 2]) * (latitude_midpoint - latitude_midpoint_index);
|
||||
|
||||
k_lon = wgs84_metric_meters_lut[latitude_midpoint_index * 2 + 1]
|
||||
+ (wgs84_metric_meters_lut[(latitude_midpoint_index + 1) * 2 + 1] - wgs84_metric_meters_lut[latitude_midpoint_index * 2 + 1]) * (latitude_midpoint - latitude_midpoint_index);
|
||||
k_lon = impl<T>.wgs84_metric_meters_lut[latitude_midpoint_index * 2 + 1]
|
||||
+ (impl<T>.wgs84_metric_meters_lut[(latitude_midpoint_index + 1) * 2 + 1] - impl<T>.wgs84_metric_meters_lut[latitude_midpoint_index * 2 + 1]) * (latitude_midpoint - latitude_midpoint_index);
|
||||
}
|
||||
|
||||
/// Metric on a tangent plane: it differs from Euclidean metric only by scale of coordinates.
|
||||
return sqrtf(k_lat * lat_diff * lat_diff + k_lon * lon_diff * lon_diff);
|
||||
return std::sqrt(k_lat * lat_diff * lat_diff + k_lon * lon_diff * lon_diff);
|
||||
}
|
||||
else
|
||||
{
|
||||
// points too far away; use haversine
|
||||
/// Points are too far away: use Haversine.
|
||||
|
||||
float a = sqrf(geodistFastSin(lat_diff * RAD_IN_DEG_HALF))
|
||||
+ geodistFastCos(lat1deg * RAD_IN_DEG) * geodistFastCos(lat2deg * RAD_IN_DEG) * sqrf(geodistFastSin(lon_diff * RAD_IN_DEG_HALF));
|
||||
static constexpr T RAD_IN_DEG = T(PI / 180.0);
|
||||
static constexpr T RAD_IN_DEG_HALF = T(PI / 360.0);
|
||||
|
||||
T a = sqr(impl<T>.fastSin(lat_diff * RAD_IN_DEG_HALF))
|
||||
+ impl<T>.fastCos(lat1deg * RAD_IN_DEG) * impl<T>.fastCos(lat2deg * RAD_IN_DEG) * sqr(impl<T>.fastSin(lon_diff * RAD_IN_DEG_HALF));
|
||||
|
||||
if constexpr (method == Method::SPHERE_DEGREES)
|
||||
return (360.0f / PI_F) * geodistFastAsinSqrt(a);
|
||||
return (T(360.0) / T(PI)) * impl<T>.fastAsinSqrt(a);
|
||||
else
|
||||
return EARTH_DIAMETER * geodistFastAsinSqrt(a);
|
||||
return T(EARTH_DIAMETER) * impl<T>.fastAsinSqrt(a);
|
||||
}
|
||||
}
|
||||
|
||||
@ -241,13 +227,24 @@ template <Method method>
|
||||
class FunctionGeoDistance : public IFunction
|
||||
{
|
||||
public:
|
||||
static constexpr auto name =
|
||||
(method == Method::SPHERE_DEGREES) ? "greatCircleAngle"
|
||||
: ((method == Method::SPHERE_METERS) ? "greatCircleDistance"
|
||||
: "geoDistance");
|
||||
explicit FunctionGeoDistance(ContextPtr context)
|
||||
{
|
||||
always_float32 = !context->getSettingsRef().geo_distance_returns_float64_on_float64_arguments;
|
||||
}
|
||||
|
||||
private:
|
||||
String getName() const override { return name; }
|
||||
bool always_float32;
|
||||
|
||||
String getName() const override
|
||||
{
|
||||
if constexpr (method == Method::SPHERE_DEGREES)
|
||||
return "greatCircleAngle";
|
||||
if constexpr (method == Method::SPHERE_METERS)
|
||||
return "greatCircleDistance";
|
||||
else
|
||||
return "geoDistance";
|
||||
}
|
||||
|
||||
size_t getNumberOfArguments() const override { return 4; }
|
||||
|
||||
bool useDefaultImplementationForConstants() const override { return true; }
|
||||
@ -255,22 +252,31 @@ private:
|
||||
|
||||
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
|
||||
{
|
||||
for (const auto arg_idx : collections::range(0, arguments.size()))
|
||||
bool has_float64 = false;
|
||||
|
||||
for (size_t arg_idx = 0; arg_idx < 4; ++arg_idx)
|
||||
{
|
||||
const auto * arg = arguments[arg_idx].get();
|
||||
if (!isNumber(WhichDataType(arg)))
|
||||
WhichDataType which(arguments[arg_idx]);
|
||||
|
||||
if (!isNumber(which))
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument {} of function {}. "
|
||||
"Must be numeric", arg->getName(), std::to_string(arg_idx + 1), getName());
|
||||
"Must be numeric", arguments[arg_idx]->getName(), std::to_string(arg_idx + 1), getName());
|
||||
|
||||
if (which.isFloat64())
|
||||
has_float64 = true;
|
||||
}
|
||||
|
||||
return std::make_shared<DataTypeFloat32>();
|
||||
if (has_float64 && !always_float32)
|
||||
return std::make_shared<DataTypeFloat64>();
|
||||
else
|
||||
return std::make_shared<DataTypeFloat32>();
|
||||
}
|
||||
|
||||
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override
|
||||
{
|
||||
auto dst = ColumnVector<Float32>::create();
|
||||
auto & dst_data = dst->getData();
|
||||
dst_data.resize(input_rows_count);
|
||||
bool returns_float64 = WhichDataType(result_type).isFloat64();
|
||||
|
||||
auto dst = result_type->createColumn();
|
||||
|
||||
auto arguments_copy = arguments;
|
||||
for (auto & argument : arguments_copy)
|
||||
@ -280,10 +286,24 @@ private:
|
||||
argument.type = result_type;
|
||||
}
|
||||
|
||||
const auto * col_lon1 = convertArgumentColumnToFloat32(arguments_copy, 0);
|
||||
const auto * col_lat1 = convertArgumentColumnToFloat32(arguments_copy, 1);
|
||||
const auto * col_lon2 = convertArgumentColumnToFloat32(arguments_copy, 2);
|
||||
const auto * col_lat2 = convertArgumentColumnToFloat32(arguments_copy, 3);
|
||||
if (returns_float64)
|
||||
run<Float64>(arguments_copy, dst, input_rows_count);
|
||||
else
|
||||
run<Float32>(arguments_copy, dst, input_rows_count);
|
||||
|
||||
return dst;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void run(const ColumnsWithTypeAndName & arguments, MutableColumnPtr & dst, size_t input_rows_count) const
|
||||
{
|
||||
const auto * col_lon1 = convertArgumentColumn<T>(arguments, 0);
|
||||
const auto * col_lat1 = convertArgumentColumn<T>(arguments, 1);
|
||||
const auto * col_lon2 = convertArgumentColumn<T>(arguments, 2);
|
||||
const auto * col_lat2 = convertArgumentColumn<T>(arguments, 3);
|
||||
|
||||
auto & dst_data = assert_cast<ColumnVector<T> &>(*dst).getData();
|
||||
dst_data.resize(input_rows_count);
|
||||
|
||||
for (size_t row_num = 0; row_num < input_rows_count; ++row_num)
|
||||
{
|
||||
@ -291,20 +311,20 @@ private:
|
||||
col_lon1->getData()[row_num], col_lat1->getData()[row_num],
|
||||
col_lon2->getData()[row_num], col_lat2->getData()[row_num]);
|
||||
}
|
||||
|
||||
return dst;
|
||||
}
|
||||
|
||||
const ColumnFloat32 * convertArgumentColumnToFloat32(const ColumnsWithTypeAndName & arguments, size_t argument_index) const
|
||||
template <typename T>
|
||||
const ColumnVector<T> * convertArgumentColumn(const ColumnsWithTypeAndName & arguments, size_t argument_index) const
|
||||
{
|
||||
const auto * column_typed = checkAndGetColumn<ColumnFloat32>(arguments[argument_index].column.get());
|
||||
const auto * column_typed = checkAndGetColumn<ColumnVector<T>>(arguments[argument_index].column.get());
|
||||
if (!column_typed)
|
||||
throw Exception(
|
||||
ErrorCodes::ILLEGAL_COLUMN,
|
||||
"Illegal type {} of argument {} of function {}. Must be Float32.",
|
||||
"Illegal type {} of argument {} of function {}. Must be {}.",
|
||||
arguments[argument_index].type->getName(),
|
||||
argument_index + 1,
|
||||
getName());
|
||||
getName(),
|
||||
TypeName<T>);
|
||||
|
||||
return column_typed;
|
||||
}
|
||||
@ -316,18 +336,19 @@ template <Method method>
|
||||
class FunctionGeoDistance : public TargetSpecific::Default::FunctionGeoDistance<method>
|
||||
{
|
||||
public:
|
||||
explicit FunctionGeoDistance(ContextPtr context) : selector(context)
|
||||
explicit FunctionGeoDistance(ContextPtr context)
|
||||
: TargetSpecific::Default::FunctionGeoDistance<method>(context), selector(context)
|
||||
{
|
||||
selector.registerImplementation<TargetArch::Default,
|
||||
TargetSpecific::Default::FunctionGeoDistance<method>>();
|
||||
TargetSpecific::Default::FunctionGeoDistance<method>>(context);
|
||||
|
||||
#if USE_MULTITARGET_CODE
|
||||
selector.registerImplementation<TargetArch::AVX,
|
||||
TargetSpecific::AVX::FunctionGeoDistance<method>>();
|
||||
TargetSpecific::AVX::FunctionGeoDistance<method>>(context);
|
||||
selector.registerImplementation<TargetArch::AVX2,
|
||||
TargetSpecific::AVX2::FunctionGeoDistance<method>>();
|
||||
TargetSpecific::AVX2::FunctionGeoDistance<method>>(context);
|
||||
selector.registerImplementation<TargetArch::AVX512F,
|
||||
TargetSpecific::AVX512F::FunctionGeoDistance<method>>();
|
||||
TargetSpecific::AVX512F::FunctionGeoDistance<method>>(context);
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -345,12 +366,13 @@ private:
|
||||
ImplementationSelector<IFunction> selector;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
REGISTER_FUNCTION(GeoDistance)
|
||||
{
|
||||
geodistInit();
|
||||
factory.registerFunction<FunctionGeoDistance<Method::SPHERE_DEGREES>>();
|
||||
factory.registerFunction<FunctionGeoDistance<Method::SPHERE_METERS>>();
|
||||
factory.registerFunction<FunctionGeoDistance<Method::WGS84_METERS>>();
|
||||
factory.registerFunction("greatCircleAngle", [](ContextPtr context) { return std::make_shared<FunctionGeoDistance<Method::SPHERE_DEGREES>>(std::move(context)); });
|
||||
factory.registerFunction("greatCircleDistance", [](ContextPtr context) { return std::make_shared<FunctionGeoDistance<Method::SPHERE_METERS>>(std::move(context)); });
|
||||
factory.registerFunction("geoDistance", [](ContextPtr context) { return std::make_shared<FunctionGeoDistance<Method::WGS84_METERS>>(std::move(context)); });
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -14,4 +14,9 @@ REGISTER_FUNCTION(ScalarSubqueryResult)
|
||||
factory.registerFunction<FunctionScalarSubqueryResult>();
|
||||
}
|
||||
|
||||
REGISTER_FUNCTION(ActionName)
|
||||
{
|
||||
factory.registerFunction<FunctionActionName>();
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -42,4 +42,18 @@ struct ScalarSubqueryResultName
|
||||
using FunctionIdentity = FunctionIdentityBase<IdentityName>;
|
||||
using FunctionScalarSubqueryResult = FunctionIdentityBase<ScalarSubqueryResultName>;
|
||||
|
||||
struct ActionNameName
|
||||
{
|
||||
static constexpr auto name = "__actionName";
|
||||
};
|
||||
|
||||
class FunctionActionName : public FunctionIdentityBase<ActionNameName>
|
||||
{
|
||||
public:
|
||||
using FunctionIdentityBase::FunctionIdentityBase;
|
||||
static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionActionName>(); }
|
||||
size_t getNumberOfArguments() const override { return 2; }
|
||||
ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; }
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -79,7 +79,7 @@ inline char * writeVarInt(Int64 x, char * ostr)
|
||||
return writeVarUInt(static_cast<UInt64>((x << 1) ^ (x >> 63)), ostr);
|
||||
}
|
||||
|
||||
namespace impl
|
||||
namespace varint_impl
|
||||
{
|
||||
|
||||
template <bool check_eof>
|
||||
@ -106,8 +106,8 @@ inline void readVarUInt(UInt64 & x, ReadBuffer & istr)
|
||||
inline void readVarUInt(UInt64 & x, ReadBuffer & istr)
|
||||
{
|
||||
if (istr.buffer().end() - istr.position() >= 10)
|
||||
return impl::readVarUInt<false>(x, istr);
|
||||
return impl::readVarUInt<true>(x, istr);
|
||||
return varint_impl::readVarUInt<false>(x, istr);
|
||||
return varint_impl::readVarUInt<true>(x, istr);
|
||||
}
|
||||
|
||||
inline void readVarUInt(UInt64 & x, std::istream & istr)
|
||||
|
@ -741,7 +741,7 @@ Block ActionsDAG::updateHeader(Block header) const
|
||||
catch (Exception & e)
|
||||
{
|
||||
if (e.code() == ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK)
|
||||
e.addMessage(" in block {}", header.dumpStructure());
|
||||
e.addMessage("in block {}", header.dumpStructure());
|
||||
|
||||
throw;
|
||||
}
|
||||
|
@ -1219,7 +1219,7 @@ void Context::addWarningMessageAboutDatabaseOrdinary(const String & database_nam
|
||||
/// We don't use getFlagsPath method, because it takes a shared lock.
|
||||
auto convert_databases_flag = fs::path(shared->flags_path) / "convert_ordinary_to_atomic";
|
||||
auto message = fmt::format("Server has databases (for example `{}`) with Ordinary engine, which was deprecated. "
|
||||
"To convert this database to a new Atomic engine, please create a forcing flag {} and make sure that ClickHouse has write permission for it. "
|
||||
"To convert this database to a new Atomic engine, create a flag {} and make sure that ClickHouse has write permission for it. "
|
||||
"Example: sudo touch '{}' && sudo chmod 666 '{}'",
|
||||
database_name,
|
||||
convert_databases_flag.string(), convert_databases_flag.string(), convert_databases_flag.string());
|
||||
|
@ -1881,7 +1881,7 @@ void InterpreterCreateQuery::addColumnsDescriptionToCreateQueryIfNecessary(ASTCr
|
||||
void InterpreterCreateQuery::processSQLSecurityOption(ContextPtr context_, ASTSQLSecurity & sql_security, bool is_attach, bool is_materialized_view)
|
||||
{
|
||||
/// If no SQL security is specified, apply default from default_*_view_sql_security setting.
|
||||
if (!sql_security.type.has_value())
|
||||
if (!sql_security.type)
|
||||
{
|
||||
SQLSecurityType default_security;
|
||||
|
||||
|
@ -341,6 +341,11 @@ bool MutationsInterpreter::Source::hasProjection(const String & name) const
|
||||
return part && part->hasProjection(name);
|
||||
}
|
||||
|
||||
bool MutationsInterpreter::Source::hasBrokenProjection(const String & name) const
|
||||
{
|
||||
return part && part->hasBrokenProjection(name);
|
||||
}
|
||||
|
||||
bool MutationsInterpreter::Source::isCompactPart() const
|
||||
{
|
||||
return part && part->getType() == MergeTreeDataPartType::Compact;
|
||||
@ -802,7 +807,7 @@ void MutationsInterpreter::prepare(bool dry_run)
|
||||
{
|
||||
mutation_kind.set(MutationKind::MUTATE_INDEX_STATISTIC_PROJECTION);
|
||||
const auto & projection = projections_desc.get(command.projection_name);
|
||||
if (!source.hasProjection(projection.name))
|
||||
if (!source.hasProjection(projection.name) || source.hasBrokenProjection(projection.name))
|
||||
{
|
||||
for (const auto & column : projection.required_columns)
|
||||
dependencies.emplace(column, ColumnDependency::PROJECTION);
|
||||
@ -989,6 +994,13 @@ void MutationsInterpreter::prepare(bool dry_run)
|
||||
if (!source.hasProjection(projection.name))
|
||||
continue;
|
||||
|
||||
/// Always rebuild broken projections.
|
||||
if (source.hasBrokenProjection(projection.name))
|
||||
{
|
||||
materialized_projections.insert(projection.name);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (need_rebuild_projections)
|
||||
{
|
||||
materialized_projections.insert(projection.name);
|
||||
|
@ -126,6 +126,7 @@ public:
|
||||
bool materializeTTLRecalculateOnly() const;
|
||||
bool hasSecondaryIndex(const String & name) const;
|
||||
bool hasProjection(const String & name) const;
|
||||
bool hasBrokenProjection(const String & name) const;
|
||||
bool isCompactPart() const;
|
||||
|
||||
void read(
|
||||
|
@ -14,7 +14,7 @@ namespace DB
|
||||
|
||||
void ASTSQLSecurity::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const
|
||||
{
|
||||
if (!type.has_value())
|
||||
if (!type)
|
||||
return;
|
||||
|
||||
if (definer || is_definer_current_user)
|
||||
|
@ -299,6 +299,7 @@ namespace DB
|
||||
MR_MACROS(MOD, "MOD") \
|
||||
MR_MACROS(MODIFY_COLUMN, "MODIFY COLUMN") \
|
||||
MR_MACROS(MODIFY_COMMENT, "MODIFY COMMENT") \
|
||||
MR_MACROS(MODIFY_DEFINER, "MODIFY DEFINER") \
|
||||
MR_MACROS(MODIFY_ORDER_BY, "MODIFY ORDER BY") \
|
||||
MR_MACROS(MODIFY_QUERY, "MODIFY QUERY") \
|
||||
MR_MACROS(MODIFY_REFRESH, "MODIFY REFRESH") \
|
||||
@ -445,6 +446,7 @@ namespace DB
|
||||
MR_MACROS(SPATIAL, "SPATIAL") \
|
||||
MR_MACROS(SQL_SECURITY, "SQL SECURITY") \
|
||||
MR_MACROS(SS, "SS") \
|
||||
MR_MACROS(START_TRANSACTION, "START TRANSACTION") \
|
||||
MR_MACROS(STATISTIC, "STATISTIC") \
|
||||
MR_MACROS(STEP, "STEP") \
|
||||
MR_MACROS(STORAGE, "STORAGE") \
|
||||
@ -554,7 +556,7 @@ namespace DB
|
||||
MR_MACROS(SSH_KEY, "SSH_KEY") \
|
||||
MR_MACROS(SSL_CERTIFICATE, "SSL_CERTIFICATE") \
|
||||
MR_MACROS(STRICTLY_ASCENDING, "STRICTLY_ASCENDING") \
|
||||
MR_MACROS(WITH_ITEMINDEX, "with_itemindex") \
|
||||
MR_MACROS(WITH_ITEMINDEX, "WITH_ITEMINDEX") \
|
||||
|
||||
enum class Keyword : size_t
|
||||
{
|
||||
|
@ -41,6 +41,7 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected
|
||||
ParserKeyword s_reset_setting(Keyword::RESET_SETTING);
|
||||
ParserKeyword s_modify_query(Keyword::MODIFY_QUERY);
|
||||
ParserKeyword s_modify_sql_security(Keyword::MODIFY_SQL_SECURITY);
|
||||
ParserKeyword s_modify_definer(Keyword::MODIFY_DEFINER);
|
||||
ParserKeyword s_modify_refresh(Keyword::MODIFY_REFRESH);
|
||||
|
||||
ParserKeyword s_add_index(Keyword::ADD_INDEX);
|
||||
@ -862,11 +863,16 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected
|
||||
return false;
|
||||
command->type = ASTAlterCommand::MODIFY_QUERY;
|
||||
}
|
||||
else if (s_modify_sql_security.ignore(pos, expected))
|
||||
else if (s_modify_sql_security.checkWithoutMoving(pos, expected))
|
||||
{
|
||||
/// This is a hack so we can reuse parser from create and don't have to write `MODIFY SQL SECURITY SQL SECURITY INVOKER`
|
||||
--pos;
|
||||
--pos;
|
||||
s_modify.ignore(pos, expected);
|
||||
if (!sql_security_p.parse(pos, command_sql_security, expected))
|
||||
return false;
|
||||
command->type = ASTAlterCommand::MODIFY_SQL_SECURITY;
|
||||
}
|
||||
else if (s_modify_definer.checkWithoutMoving(pos, expected))
|
||||
{
|
||||
s_modify.ignore(pos, expected);
|
||||
if (!sql_security_p.parse(pos, command_sql_security, expected))
|
||||
return false;
|
||||
command->type = ASTAlterCommand::MODIFY_SQL_SECURITY;
|
||||
|
@ -14,6 +14,8 @@ bool ParserTransactionControl::parseImpl(Pos & pos, ASTPtr & node, Expected & ex
|
||||
|
||||
if (ParserKeyword(Keyword::BEGIN_TRANSACTION).ignore(pos, expected))
|
||||
action = ASTTransactionControl::BEGIN;
|
||||
else if (ParserKeyword(Keyword::START_TRANSACTION).ignore(pos, expected))
|
||||
action = ASTTransactionControl::BEGIN;
|
||||
else if (ParserKeyword(Keyword::COMMIT).ignore(pos, expected))
|
||||
action = ASTTransactionControl::COMMIT;
|
||||
else if (ParserKeyword(Keyword::ROLLBACK).ignore(pos, expected))
|
||||
|
@ -157,6 +157,12 @@ public:
|
||||
case QueryTreeNodeType::FUNCTION:
|
||||
{
|
||||
const auto & function_node = node->as<FunctionNode &>();
|
||||
if (function_node.getFunctionName() == "__actionName")
|
||||
{
|
||||
result = toString(function_node.getArguments().getNodes().at(1)->as<ConstantNode>()->getValue());
|
||||
break;
|
||||
}
|
||||
|
||||
String in_function_second_argument_node_name;
|
||||
|
||||
if (isNameOfInFunction(function_node.getFunctionName()))
|
||||
|
@ -223,7 +223,7 @@ bool analyzeProjectionCandidate(
|
||||
{
|
||||
const auto & created_projections = part_with_ranges.data_part->getProjectionParts();
|
||||
auto it = created_projections.find(candidate.projection->name);
|
||||
if (it != created_projections.end())
|
||||
if (it != created_projections.end() && !it->second->is_broken)
|
||||
{
|
||||
projection_parts.push_back(it->second);
|
||||
}
|
||||
|
@ -335,7 +335,9 @@ void DataPartStorageOnDiskBase::backup(
|
||||
const ReadSettings & read_settings,
|
||||
bool make_temporary_hard_links,
|
||||
BackupEntries & backup_entries,
|
||||
TemporaryFilesOnDisks * temp_dirs) const
|
||||
TemporaryFilesOnDisks * temp_dirs,
|
||||
bool is_projection_part,
|
||||
bool allow_backup_broken_projection) const
|
||||
{
|
||||
fs::path part_path_on_disk = fs::path{root_path} / part_dir;
|
||||
fs::path part_path_in_backup = fs::path{path_in_backup} / part_dir;
|
||||
@ -377,7 +379,7 @@ void DataPartStorageOnDiskBase::backup(
|
||||
|
||||
bool copy_encrypted = !backup_settings.decrypt_files_from_encrypted_disks;
|
||||
|
||||
for (const auto & filepath : files_to_backup)
|
||||
auto backup_file = [&](const String & filepath)
|
||||
{
|
||||
auto filepath_on_disk = part_path_on_disk / filepath;
|
||||
auto filepath_in_backup = part_path_in_backup / filepath;
|
||||
@ -385,8 +387,10 @@ void DataPartStorageOnDiskBase::backup(
|
||||
if (files_without_checksums.contains(filepath))
|
||||
{
|
||||
backup_entries.emplace_back(filepath_in_backup, std::make_unique<BackupEntryFromSmallFile>(disk, filepath_on_disk, read_settings, copy_encrypted));
|
||||
continue;
|
||||
return;
|
||||
}
|
||||
else if (is_projection_part && allow_backup_broken_projection && !disk->exists(filepath_on_disk))
|
||||
return;
|
||||
|
||||
if (make_temporary_hard_links)
|
||||
{
|
||||
@ -411,6 +415,31 @@ void DataPartStorageOnDiskBase::backup(
|
||||
backup_entry = wrapBackupEntryWith(std::move(backup_entry), temp_dir_owner);
|
||||
|
||||
backup_entries.emplace_back(filepath_in_backup, std::move(backup_entry));
|
||||
};
|
||||
|
||||
auto * log = &Poco::Logger::get("DataPartStorageOnDiskBase::backup");
|
||||
|
||||
for (const auto & filepath : files_to_backup)
|
||||
{
|
||||
if (is_projection_part && allow_backup_broken_projection)
|
||||
{
|
||||
try
|
||||
{
|
||||
backup_file(filepath);
|
||||
}
|
||||
catch (Exception & e)
|
||||
{
|
||||
if (e.code() != ErrorCodes::FILE_DOESNT_EXIST)
|
||||
throw;
|
||||
|
||||
LOG_ERROR(log, "Cannot backup file {} of projection part {}. Will try to ignore it", filepath, part_dir);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
backup_file(filepath);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -58,7 +58,9 @@ public:
|
||||
const ReadSettings & read_settings,
|
||||
bool make_temporary_hard_links,
|
||||
BackupEntries & backup_entries,
|
||||
TemporaryFilesOnDisks * temp_dirs) const override;
|
||||
TemporaryFilesOnDisks * temp_dirs,
|
||||
bool is_projection_part,
|
||||
bool allow_backup_broken_projection) const override;
|
||||
|
||||
MutableDataPartStoragePtr freeze(
|
||||
const std::string & to,
|
||||
|
@ -223,7 +223,9 @@ public:
|
||||
const ReadSettings & read_settings,
|
||||
bool make_temporary_hard_links,
|
||||
BackupEntries & backup_entries,
|
||||
TemporaryFilesOnDisks * temp_dirs) const = 0;
|
||||
TemporaryFilesOnDisks * temp_dirs,
|
||||
bool is_projection_part,
|
||||
bool allow_backup_broken_projection) const = 0;
|
||||
|
||||
/// Creates hardlinks into 'to/dir_path' for every file in data part.
|
||||
/// Callback is called after hardlinks are created, but before 'delete-on-destroy.txt' marker is removed.
|
||||
|
@ -705,13 +705,14 @@ void IMergeTreeDataPart::loadColumnsChecksumsIndexes(bool require_columns_checks
|
||||
loadRowsCount(); /// Must be called after loadIndexGranularity() as it uses the value of `index_granularity`.
|
||||
loadExistingRowsCount(); /// Must be called after loadRowsCount() as it uses the value of `rows_count`.
|
||||
loadPartitionAndMinMaxIndex();
|
||||
bool has_broken_projections = false;
|
||||
if (!parent_part)
|
||||
{
|
||||
loadTTLInfos();
|
||||
loadProjections(require_columns_checksums, check_consistency, false /* if_not_loaded */);
|
||||
loadProjections(require_columns_checksums, check_consistency, has_broken_projections, false /* if_not_loaded */);
|
||||
}
|
||||
|
||||
if (check_consistency)
|
||||
if (check_consistency && !has_broken_projections)
|
||||
checkConsistency(require_columns_checksums);
|
||||
|
||||
loadDefaultCompressionCodec();
|
||||
@ -776,7 +777,7 @@ void IMergeTreeDataPart::addProjectionPart(
|
||||
projection_parts[projection_name] = std::move(projection_part);
|
||||
}
|
||||
|
||||
void IMergeTreeDataPart::loadProjections(bool require_columns_checksums, bool check_consistency, bool if_not_loaded)
|
||||
void IMergeTreeDataPart::loadProjections(bool require_columns_checksums, bool check_consistency, bool & has_broken_projection, bool if_not_loaded)
|
||||
{
|
||||
auto metadata_snapshot = storage.getInMemoryMetadataPtr();
|
||||
for (const auto & projection : metadata_snapshot->projections)
|
||||
@ -793,10 +794,36 @@ void IMergeTreeDataPart::loadProjections(bool require_columns_checksums, bool ch
|
||||
else
|
||||
{
|
||||
auto part = getProjectionPartBuilder(projection.name).withPartFormatFromDisk().build();
|
||||
part->loadColumnsChecksumsIndexes(require_columns_checksums, check_consistency);
|
||||
|
||||
try
|
||||
{
|
||||
part->loadColumnsChecksumsIndexes(require_columns_checksums, check_consistency);
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
if (isRetryableException(std::current_exception()))
|
||||
throw;
|
||||
|
||||
auto message = getCurrentExceptionMessage(true);
|
||||
LOG_ERROR(&Poco::Logger::get("IMergeTreeDataPart"),
|
||||
"Cannot load projection {}, will consider it broken. Reason: {}", projection.name, message);
|
||||
|
||||
has_broken_projection = true;
|
||||
part->setBrokenReason(message, getCurrentExceptionCode());
|
||||
}
|
||||
|
||||
addProjectionPart(projection.name, std::move(part));
|
||||
}
|
||||
}
|
||||
else if (check_consistency && checksums.has(path))
|
||||
{
|
||||
auto part = getProjectionPartBuilder(projection.name).withPartFormatFromDisk().build();
|
||||
part->setBrokenReason(
|
||||
"Projection directory " + path + " does not exist while loading projections. Stacktrace: " + StackTrace().toString(),
|
||||
ErrorCodes::NO_FILE_IN_DATA_PART);
|
||||
addProjectionPart(projection.name, std::move(part));
|
||||
has_broken_projection = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -1216,7 +1243,8 @@ void IMergeTreeDataPart::loadChecksums(bool require)
|
||||
/// Check the data while we are at it.
|
||||
LOG_WARNING(storage.log, "Checksums for part {} not found. Will calculate them from data on disk.", name);
|
||||
|
||||
checksums = checkDataPart(shared_from_this(), false);
|
||||
bool noop;
|
||||
checksums = checkDataPart(shared_from_this(), false, noop, /* is_cancelled */[]{ return false; }, /* throw_on_broken_projection */false);
|
||||
writeChecksums(checksums, {});
|
||||
|
||||
bytes_on_disk = checksums.getTotalSizeOnDisk();
|
||||
@ -1352,8 +1380,9 @@ void IMergeTreeDataPart::loadExistingRowsCount()
|
||||
if (existing_rows_count.has_value())
|
||||
return;
|
||||
|
||||
if (!rows_count || !storage.getSettings()->load_existing_rows_count_for_old_parts || !supportLightweightDeleteMutate()
|
||||
|| !hasLightweightDelete())
|
||||
if (!rows_count || !supportLightweightDeleteMutate() || !hasLightweightDelete()
|
||||
|| !storage.getSettings()->exclude_deleted_rows_for_part_size_in_merge
|
||||
|| !storage.getSettings()->load_existing_rows_count_for_old_parts)
|
||||
existing_rows_count = rows_count;
|
||||
else
|
||||
existing_rows_count = readExistingRowsCount();
|
||||
@ -2337,6 +2366,32 @@ std::optional<String> IMergeTreeDataPart::getStreamNameForColumn(
|
||||
return getStreamNameOrHash(stream_name, extension, storage_);
|
||||
}
|
||||
|
||||
void IMergeTreeDataPart::markProjectionPartAsBroken(const String & projection_name, const String & message, int code) const
|
||||
{
|
||||
auto it = projection_parts.find(projection_name);
|
||||
if (it == projection_parts.end())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "There is no projection part '{}'", projection_name);
|
||||
it->second->setBrokenReason(message, code);
|
||||
}
|
||||
|
||||
bool IMergeTreeDataPart::hasBrokenProjection(const String & projection_name) const
|
||||
{
|
||||
auto it = projection_parts.find(projection_name);
|
||||
if (it == projection_parts.end())
|
||||
return false;
|
||||
return it->second->is_broken;
|
||||
}
|
||||
|
||||
void IMergeTreeDataPart::setBrokenReason(const String & message, int code) const
|
||||
{
|
||||
std::lock_guard lock(broken_reason_mutex);
|
||||
if (is_broken)
|
||||
return;
|
||||
is_broken = true;
|
||||
exception = message;
|
||||
exception_code = code;
|
||||
}
|
||||
|
||||
bool isCompactPart(const MergeTreeDataPartPtr & data_part)
|
||||
{
|
||||
return (data_part && data_part->getType() == MergeTreeDataPartType::Compact);
|
||||
|
@ -265,6 +265,12 @@ public:
|
||||
/// Frozen by ALTER TABLE ... FREEZE ... It is used for information purposes in system.parts table.
|
||||
mutable std::atomic<bool> is_frozen {false};
|
||||
|
||||
/// If it is a projection part, it can be broken sometimes.
|
||||
mutable std::atomic<bool> is_broken {false};
|
||||
mutable std::string exception;
|
||||
mutable int exception_code = 0;
|
||||
mutable std::mutex broken_reason_mutex;
|
||||
|
||||
/// Indicates that the part was marked Outdated by PartCheckThread because the part was not committed to ZooKeeper
|
||||
mutable bool is_unexpected_local_part = false;
|
||||
|
||||
@ -428,9 +434,16 @@ public:
|
||||
|
||||
void addProjectionPart(const String & projection_name, std::shared_ptr<IMergeTreeDataPart> && projection_part);
|
||||
|
||||
void markProjectionPartAsBroken(const String & projection_name, const String & message, int code) const;
|
||||
|
||||
bool hasProjection(const String & projection_name) const { return projection_parts.contains(projection_name); }
|
||||
|
||||
void loadProjections(bool require_columns_checksums, bool check_consistency, bool if_not_loaded = false);
|
||||
bool hasBrokenProjection(const String & projection_name) const;
|
||||
|
||||
/// Return true, if all projections were loaded successfully and none was marked as broken.
|
||||
void loadProjections(bool require_columns_checksums, bool check_consistency, bool & has_broken_projection, bool if_not_loaded = false);
|
||||
|
||||
void setBrokenReason(const String & message, int code) const;
|
||||
|
||||
/// Return set of metadata file names without checksums. For example,
|
||||
/// columns.txt or checksums.txt itself.
|
||||
@ -593,7 +606,7 @@ protected:
|
||||
const IMergeTreeDataPart * parent_part;
|
||||
String parent_part_name;
|
||||
|
||||
std::map<String, std::shared_ptr<IMergeTreeDataPart>> projection_parts;
|
||||
mutable std::map<String, std::shared_ptr<IMergeTreeDataPart>> projection_parts;
|
||||
|
||||
mutable PartMetadataManagerPtr metadata_manager;
|
||||
|
||||
@ -673,7 +686,8 @@ private:
|
||||
/// For the older format version calculates rows count from the size of a column with a fixed size.
|
||||
void loadRowsCount();
|
||||
|
||||
/// Load existing rows count from _row_exists column if load_existing_rows_count_for_old_parts is true.
|
||||
/// Load existing rows count from _row_exists column
|
||||
/// if load_existing_rows_count_for_old_parts and exclude_deleted_rows_for_part_size_in_merge are both enabled.
|
||||
void loadExistingRowsCount();
|
||||
|
||||
static void appendFilesOfRowsCount(Strings & files);
|
||||
|
@ -730,8 +730,9 @@ bool MergeTask::MergeProjectionsStage::mergeMinMaxIndexAndPrepareProjections() c
|
||||
MergeTreeData::DataPartsVector projection_parts;
|
||||
for (const auto & part : global_ctx->future_part->parts)
|
||||
{
|
||||
auto it = part->getProjectionParts().find(projection.name);
|
||||
if (it != part->getProjectionParts().end())
|
||||
auto actual_projection_parts = part->getProjectionParts();
|
||||
auto it = actual_projection_parts.find(projection.name);
|
||||
if (it != actual_projection_parts.end() && !it->second->is_broken)
|
||||
projection_parts.push_back(it->second);
|
||||
}
|
||||
if (projection_parts.size() < global_ctx->future_part->parts.size())
|
||||
|
@ -5302,7 +5302,7 @@ MergeTreeData::PartsBackupEntries MergeTreeData::backupParts(
|
||||
if (hold_table_lock && !table_lock)
|
||||
table_lock = lockForShare(local_context->getCurrentQueryId(), local_context->getSettingsRef().lock_acquire_timeout);
|
||||
|
||||
if (backup_settings.check_parts)
|
||||
if (backup_settings.check_projection_parts)
|
||||
part->checkConsistencyWithProjections(/* require_part_metadata= */ true);
|
||||
|
||||
BackupEntries backup_entries_from_part;
|
||||
@ -5314,7 +5314,8 @@ MergeTreeData::PartsBackupEntries MergeTreeData::backupParts(
|
||||
read_settings,
|
||||
make_temporary_hard_links,
|
||||
backup_entries_from_part,
|
||||
&temp_dirs);
|
||||
&temp_dirs,
|
||||
false, false);
|
||||
|
||||
auto projection_parts = part->getProjectionParts();
|
||||
for (const auto & [projection_name, projection_part] : projection_parts)
|
||||
@ -5327,7 +5328,9 @@ MergeTreeData::PartsBackupEntries MergeTreeData::backupParts(
|
||||
read_settings,
|
||||
make_temporary_hard_links,
|
||||
backup_entries_from_part,
|
||||
&temp_dirs);
|
||||
&temp_dirs,
|
||||
projection_part->is_broken,
|
||||
backup_settings.allow_backup_broken_projections);
|
||||
}
|
||||
|
||||
if (hold_storage_and_part_ptrs)
|
||||
@ -7786,21 +7789,39 @@ MovePartsOutcome MergeTreeData::moveParts(const CurrentlyMovingPartsTaggerPtr &
|
||||
|
||||
bool MergeTreeData::partsContainSameProjections(const DataPartPtr & left, const DataPartPtr & right, String & out_reason)
|
||||
{
|
||||
if (left->getProjectionParts().size() != right->getProjectionParts().size())
|
||||
auto remove_broken_parts_from_consideration = [](auto & parts)
|
||||
{
|
||||
std::set<String> broken_projection_parts;
|
||||
for (const auto & [name, part] : parts)
|
||||
{
|
||||
if (part->is_broken)
|
||||
broken_projection_parts.emplace(name);
|
||||
}
|
||||
for (const auto & name : broken_projection_parts)
|
||||
parts.erase(name);
|
||||
};
|
||||
|
||||
auto left_projection_parts = left->getProjectionParts();
|
||||
auto right_projection_parts = right->getProjectionParts();
|
||||
|
||||
remove_broken_parts_from_consideration(left_projection_parts);
|
||||
remove_broken_parts_from_consideration(right_projection_parts);
|
||||
|
||||
if (left_projection_parts.size() != right_projection_parts.size())
|
||||
{
|
||||
out_reason = fmt::format(
|
||||
"Parts have different number of projections: {} in part '{}' and {} in part '{}'",
|
||||
left->getProjectionParts().size(),
|
||||
left_projection_parts.size(),
|
||||
left->name,
|
||||
right->getProjectionParts().size(),
|
||||
right_projection_parts.size(),
|
||||
right->name
|
||||
);
|
||||
return false;
|
||||
}
|
||||
|
||||
for (const auto & [name, _] : left->getProjectionParts())
|
||||
for (const auto & [name, _] : left_projection_parts)
|
||||
{
|
||||
if (!right->hasProjection(name))
|
||||
if (!right_projection_parts.contains(name))
|
||||
{
|
||||
out_reason = fmt::format(
|
||||
"The part '{}' doesn't have projection '{}' while part '{}' does", right->name, name, left->name
|
||||
|
@ -464,8 +464,13 @@ public:
|
||||
|
||||
struct ProjectionPartsVector
|
||||
{
|
||||
DataPartsVector projection_parts;
|
||||
DataPartsVector data_parts;
|
||||
|
||||
DataPartsVector projection_parts;
|
||||
DataPartStateVector projection_parts_states;
|
||||
|
||||
DataPartsVector broken_projection_parts;
|
||||
DataPartStateVector broken_projection_parts_states;
|
||||
};
|
||||
|
||||
/// Returns a copy of the list so that the caller shouldn't worry about locks.
|
||||
@ -480,7 +485,7 @@ public:
|
||||
const DataPartStates & affordable_states, DataPartStateVector * out_states = nullptr) const;
|
||||
/// Same as above but only returns projection parts
|
||||
ProjectionPartsVector getProjectionPartsVectorForInternalUsage(
|
||||
const DataPartStates & affordable_states, DataPartStateVector * out_states = nullptr) const;
|
||||
const DataPartStates & affordable_states, MergeTreeData::DataPartStateVector * out_states) const;
|
||||
|
||||
|
||||
/// Returns absolutely all parts (and snapshot of their states)
|
||||
|
@ -54,6 +54,8 @@ struct MergeTreeDataPartChecksums
|
||||
|
||||
bool has(const String & file_name) const { return files.find(file_name) != files.end(); }
|
||||
|
||||
bool remove(const String & file_name) { return files.erase(file_name); }
|
||||
|
||||
bool empty() const { return files.empty(); }
|
||||
|
||||
/// Checks that the set of columns and their checksums are the same. If not, throws an exception.
|
||||
|
@ -592,7 +592,9 @@ static std::set<ProjectionDescriptionRawPtr> getProjectionsToRecalculate(
|
||||
{
|
||||
bool need_recalculate =
|
||||
materialized_projections.contains(projection.name)
|
||||
|| (!is_full_part_storage && source_part->hasProjection(projection.name));
|
||||
|| (!is_full_part_storage
|
||||
&& source_part->hasProjection(projection.name)
|
||||
&& !source_part->hasBrokenProjection(projection.name));
|
||||
|
||||
if (need_recalculate)
|
||||
projections_to_recalc.insert(&projection);
|
||||
@ -936,7 +938,8 @@ void finalizeMutatedPart(
|
||||
new_data_part->modification_time = time(nullptr);
|
||||
|
||||
/// Load rest projections which are hardlinked
|
||||
new_data_part->loadProjections(false, false, true /* if_not_loaded */);
|
||||
bool noop;
|
||||
new_data_part->loadProjections(false, false, noop, true /* if_not_loaded */);
|
||||
|
||||
/// All information about sizes is stored in checksums.
|
||||
/// It doesn't make sense to touch filesystem for sizes.
|
||||
@ -1534,7 +1537,9 @@ private:
|
||||
|
||||
bool need_recalculate =
|
||||
ctx->materialized_projections.contains(projection.name)
|
||||
|| (!is_full_part_storage && ctx->source_part->hasProjection(projection.name));
|
||||
|| (!is_full_part_storage
|
||||
&& ctx->source_part->hasProjection(projection.name)
|
||||
&& !ctx->source_part->hasBrokenProjection(projection.name));
|
||||
|
||||
if (need_recalculate)
|
||||
{
|
||||
@ -1671,8 +1676,9 @@ private:
|
||||
|
||||
void finalize()
|
||||
{
|
||||
bool noop;
|
||||
ctx->new_data_part->minmax_idx = std::move(ctx->minmax_idx);
|
||||
ctx->new_data_part->loadProjections(false, false, true /* if_not_loaded */);
|
||||
ctx->new_data_part->loadProjections(false, false, noop, true /* if_not_loaded */);
|
||||
ctx->mutating_executor.reset();
|
||||
ctx->mutating_pipeline.reset();
|
||||
|
||||
|
@ -93,6 +93,7 @@ struct ReplicatedMergeTreeLogEntryData
|
||||
MergeTreeDataPartFormat new_part_format;
|
||||
String block_id; /// For parts of level zero, the block identifier for deduplication (node name in /blocks/).
|
||||
mutable String actual_new_part_name; /// GET_PART could actually fetch a part covering 'new_part_name'.
|
||||
mutable std::unordered_set<String> replace_range_actual_new_part_names; /// Same as above, but for REPLACE_RANGE
|
||||
UUID new_part_uuid = UUIDHelpers::Nil;
|
||||
|
||||
Strings source_parts;
|
||||
|
@ -63,7 +63,7 @@ void ReplicatedMergeTreePartCheckThread::enqueuePart(const String & name, time_t
|
||||
if (parts_set.contains(name))
|
||||
return;
|
||||
|
||||
LOG_TRACE(log, "Enqueueing {} for check after after {}s", name, delay_to_check_seconds);
|
||||
LOG_TRACE(log, "Enqueueing {} for check after {}s", name, delay_to_check_seconds);
|
||||
parts_queue.emplace_back(name, std::chrono::steady_clock::now() + std::chrono::seconds(delay_to_check_seconds));
|
||||
parts_set.insert(name);
|
||||
task->schedule();
|
||||
@ -274,7 +274,7 @@ std::pair<bool, MergeTreeDataPartPtr> ReplicatedMergeTreePartCheckThread::findLo
|
||||
return std::make_pair(exists_in_zookeeper, part);
|
||||
}
|
||||
|
||||
ReplicatedCheckResult ReplicatedMergeTreePartCheckThread::checkPartImpl(const String & part_name)
|
||||
ReplicatedCheckResult ReplicatedMergeTreePartCheckThread::checkPartImpl(const String & part_name, bool throw_on_broken_projection)
|
||||
{
|
||||
ReplicatedCheckResult result;
|
||||
auto [exists_in_zookeeper, part] = findLocalPart(part_name);
|
||||
@ -341,6 +341,7 @@ ReplicatedCheckResult ReplicatedMergeTreePartCheckThread::checkPartImpl(const St
|
||||
/// before the ReplicatedMergeTreePartHeader was introduced.
|
||||
String part_path = storage.replica_path + "/parts/" + part_name;
|
||||
String part_znode = zookeeper->get(part_path);
|
||||
bool is_broken_projection = false;
|
||||
|
||||
try
|
||||
{
|
||||
@ -362,8 +363,10 @@ ReplicatedCheckResult ReplicatedMergeTreePartCheckThread::checkPartImpl(const St
|
||||
|
||||
checkDataPart(
|
||||
part,
|
||||
true,
|
||||
[this] { return need_stop.load(); });
|
||||
/* require_checksums */true,
|
||||
is_broken_projection,
|
||||
[this] { return need_stop.load(); },
|
||||
throw_on_broken_projection);
|
||||
|
||||
if (need_stop)
|
||||
{
|
||||
@ -382,14 +385,27 @@ ReplicatedCheckResult ReplicatedMergeTreePartCheckThread::checkPartImpl(const St
|
||||
if (isRetryableException(std::current_exception()))
|
||||
throw;
|
||||
|
||||
tryLogCurrentException(log, __PRETTY_FUNCTION__);
|
||||
PreformattedMessage message;
|
||||
if (is_broken_projection)
|
||||
{
|
||||
WriteBufferFromOwnString wb;
|
||||
message = PreformattedMessage::create(
|
||||
"Part {} has a broken projections. It will be ignored. Broken projections info: {}",
|
||||
part_name, getCurrentExceptionMessage(false));
|
||||
LOG_DEBUG(log, message);
|
||||
result.action = ReplicatedCheckResult::DoNothing;
|
||||
}
|
||||
else
|
||||
{
|
||||
tryLogCurrentException(log, __PRETTY_FUNCTION__);
|
||||
|
||||
auto message = PreformattedMessage::create("Part {} looks broken. Removing it and will try to fetch.", part_name);
|
||||
LOG_ERROR(log, message);
|
||||
message = PreformattedMessage::create("Part {} looks broken. Removing it and will try to fetch.", part_name);
|
||||
LOG_ERROR(log, message);
|
||||
result.action = ReplicatedCheckResult::TryFetchMissing;
|
||||
}
|
||||
|
||||
/// Part is broken, let's try to find it and fetch.
|
||||
result.status = {part_name, false, message};
|
||||
result.action = ReplicatedCheckResult::TryFetchMissing;
|
||||
return result;
|
||||
|
||||
}
|
||||
@ -419,12 +435,12 @@ ReplicatedCheckResult ReplicatedMergeTreePartCheckThread::checkPartImpl(const St
|
||||
}
|
||||
|
||||
|
||||
CheckResult ReplicatedMergeTreePartCheckThread::checkPartAndFix(const String & part_name, std::optional<time_t> * recheck_after)
|
||||
CheckResult ReplicatedMergeTreePartCheckThread::checkPartAndFix(const String & part_name, std::optional<time_t> * recheck_after, bool throw_on_broken_projection)
|
||||
{
|
||||
LOG_INFO(log, "Checking part {}", part_name);
|
||||
ProfileEvents::increment(ProfileEvents::ReplicatedPartChecks);
|
||||
|
||||
ReplicatedCheckResult result = checkPartImpl(part_name);
|
||||
ReplicatedCheckResult result = checkPartImpl(part_name, throw_on_broken_projection);
|
||||
switch (result.action)
|
||||
{
|
||||
case ReplicatedCheckResult::None: UNREACHABLE();
|
||||
@ -577,7 +593,7 @@ void ReplicatedMergeTreePartCheckThread::run()
|
||||
}
|
||||
|
||||
std::optional<time_t> recheck_after;
|
||||
checkPartAndFix(selected->name, &recheck_after);
|
||||
checkPartAndFix(selected->name, &recheck_after, /* throw_on_broken_projection */false);
|
||||
|
||||
if (need_stop)
|
||||
return;
|
||||
|
@ -65,9 +65,9 @@ public:
|
||||
size_t size() const;
|
||||
|
||||
/// Check part by name
|
||||
CheckResult checkPartAndFix(const String & part_name, std::optional<time_t> * recheck_after = nullptr);
|
||||
CheckResult checkPartAndFix(const String & part_name, std::optional<time_t> * recheck_after = nullptr, bool throw_on_broken_projection = true);
|
||||
|
||||
ReplicatedCheckResult checkPartImpl(const String & part_name);
|
||||
ReplicatedCheckResult checkPartImpl(const String & part_name, bool throw_on_broken_projection);
|
||||
|
||||
std::unique_lock<std::mutex> pausePartsCheck();
|
||||
|
||||
|
@ -342,6 +342,11 @@ void ReplicatedMergeTreeQueue::updateStateOnQueueEntryRemoval(
|
||||
/// NOTE actual_new_part_name is very confusing and error-prone. This approach must be fixed.
|
||||
removeCoveredPartsFromMutations(entry->actual_new_part_name, /*remove_part = */ false, /*remove_covered_parts = */ true);
|
||||
}
|
||||
for (const auto & actual_part : entry->replace_range_actual_new_part_names)
|
||||
{
|
||||
LOG_TEST(log, "Entry {} has actual new part name {}, removing it from mutations", entry->znode_name, actual_part);
|
||||
removeCoveredPartsFromMutations(actual_part, /*remove_part = */ false, /*remove_covered_parts = */ true);
|
||||
}
|
||||
|
||||
LOG_TEST(log, "Adding parts [{}] to current parts", fmt::join(entry_virtual_parts, ", "));
|
||||
|
||||
@ -1180,9 +1185,9 @@ bool ReplicatedMergeTreeQueue::isCoveredByFuturePartsImpl(const LogEntry & entry
|
||||
if (entry_for_same_part_it != future_parts.end())
|
||||
{
|
||||
const LogEntry & another_entry = *entry_for_same_part_it->second;
|
||||
constexpr auto fmt_string = "Not executing log entry {} of type {} for part {} "
|
||||
constexpr auto fmt_string = "Not executing log entry {} of type {} for part {} (actual part {})"
|
||||
"because another log entry {} of type {} for the same part ({}) is being processed.";
|
||||
LOG_INFO(LogToStr(out_reason, log), fmt_string, entry.znode_name, entry.type, entry.new_part_name,
|
||||
LOG_INFO(LogToStr(out_reason, log), fmt_string, entry.znode_name, entry.type, entry.new_part_name, new_part_name,
|
||||
another_entry.znode_name, another_entry.type, another_entry.new_part_name);
|
||||
return true;
|
||||
|
||||
@ -1198,6 +1203,7 @@ bool ReplicatedMergeTreeQueue::isCoveredByFuturePartsImpl(const LogEntry & entry
|
||||
auto result_part = MergeTreePartInfo::fromPartName(new_part_name, format_version);
|
||||
|
||||
/// It can slow down when the size of `future_parts` is large. But it can not be large, since background pool is limited.
|
||||
/// (well, it can actually, thanks to REPLACE_RANGE, but it's a rare case)
|
||||
for (const auto & future_part_elem : future_parts)
|
||||
{
|
||||
auto future_part = MergeTreePartInfo::fromPartName(future_part_elem.first, format_version);
|
||||
@ -1608,26 +1614,39 @@ void ReplicatedMergeTreeQueue::CurrentlyExecuting::setActualPartName(
|
||||
std::unique_lock<std::mutex> & state_lock,
|
||||
std::vector<LogEntryPtr> & covered_entries_to_wait)
|
||||
{
|
||||
if (!entry.actual_new_part_name.empty())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Entry actual part isn't empty yet. This is a bug.");
|
||||
if (actual_part_name.empty())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Actual part name is empty");
|
||||
|
||||
entry.actual_new_part_name = actual_part_name;
|
||||
if (!entry.actual_new_part_name.empty())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Entry {} actual part isn't empty yet: '{}'. This is a bug.",
|
||||
entry.znode_name, entry.actual_new_part_name);
|
||||
|
||||
auto actual_part_info = MergeTreePartInfo::fromPartName(actual_part_name, queue.format_version);
|
||||
for (const auto & other_part_name : entry.replace_range_actual_new_part_names)
|
||||
if (!MergeTreePartInfo::fromPartName(other_part_name, queue.format_version).isDisjoint(actual_part_info))
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Entry {} already has actual part {} non-disjoint with {}. This is a bug.",
|
||||
entry.actual_new_part_name, other_part_name, actual_part_name);
|
||||
|
||||
/// Check if it is the same (and already added) part.
|
||||
if (entry.actual_new_part_name == entry.new_part_name)
|
||||
if (actual_part_name == entry.new_part_name)
|
||||
return;
|
||||
|
||||
if (!queue.future_parts.emplace(entry.actual_new_part_name, entry.shared_from_this()).second)
|
||||
if (!queue.future_parts.emplace(actual_part_name, entry.shared_from_this()).second)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Attaching already existing future part {}. This is a bug. "
|
||||
"It happened on attempt to execute {}: {}",
|
||||
entry.actual_new_part_name, entry.znode_name, entry.toString());
|
||||
actual_part_name, entry.znode_name, entry.toString());
|
||||
|
||||
if (entry.type == LogEntry::REPLACE_RANGE)
|
||||
entry.replace_range_actual_new_part_names.insert(actual_part_name);
|
||||
else
|
||||
entry.actual_new_part_name = actual_part_name;
|
||||
|
||||
for (LogEntryPtr & covered_entry : covered_entries_to_wait)
|
||||
{
|
||||
if (&entry == covered_entry.get())
|
||||
continue;
|
||||
LOG_TRACE(queue.log, "Waiting for {} producing {} to finish before executing {} producing not disjoint part {}",
|
||||
covered_entry->znode_name, covered_entry->new_part_name, entry.znode_name, entry.new_part_name);
|
||||
LOG_TRACE(queue.log, "Waiting for {} producing {} to finish before executing {} producing not disjoint part {} (actual part {})",
|
||||
covered_entry->znode_name, covered_entry->new_part_name, entry.znode_name, entry.new_part_name, actual_part_name);
|
||||
covered_entry->execution_complete.wait(state_lock, [&covered_entry] { return !covered_entry->currently_executing; });
|
||||
}
|
||||
}
|
||||
@ -1646,25 +1665,27 @@ ReplicatedMergeTreeQueue::CurrentlyExecuting::~CurrentlyExecuting()
|
||||
entry->currently_executing = false;
|
||||
entry->execution_complete.notify_all();
|
||||
|
||||
for (const String & new_part_name : entry->getVirtualPartNames(queue.format_version))
|
||||
auto erase_and_check = [this](const String & part_name)
|
||||
{
|
||||
if (!queue.future_parts.erase(new_part_name))
|
||||
if (!queue.future_parts.erase(part_name))
|
||||
{
|
||||
LOG_ERROR(queue.log, "Untagging already untagged future part {}. This is a bug.", new_part_name);
|
||||
LOG_ERROR(queue.log, "Untagging already untagged future part {}. This is a bug.", part_name);
|
||||
assert(false);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
for (const String & new_part_name : entry->getVirtualPartNames(queue.format_version))
|
||||
erase_and_check(new_part_name);
|
||||
|
||||
if (!entry->actual_new_part_name.empty())
|
||||
{
|
||||
if (entry->actual_new_part_name != entry->new_part_name && !queue.future_parts.erase(entry->actual_new_part_name))
|
||||
{
|
||||
LOG_ERROR(queue.log, "Untagging already untagged future part {}. This is a bug.", entry->actual_new_part_name);
|
||||
assert(false);
|
||||
}
|
||||
erase_and_check(entry->actual_new_part_name);
|
||||
|
||||
entry->actual_new_part_name.clear();
|
||||
}
|
||||
entry->actual_new_part_name.clear();
|
||||
|
||||
for (const auto & actual_part : entry->replace_range_actual_new_part_names)
|
||||
erase_and_check(actual_part);
|
||||
|
||||
entry->replace_range_actual_new_part_names.clear();
|
||||
}
|
||||
|
||||
|
||||
|
@ -42,6 +42,7 @@ namespace ErrorCodes
|
||||
extern const int NO_FILE_IN_DATA_PART;
|
||||
extern const int NETWORK_ERROR;
|
||||
extern const int SOCKET_TIMEOUT;
|
||||
extern const int BROKEN_PROJECTION;
|
||||
}
|
||||
|
||||
|
||||
@ -116,7 +117,9 @@ static IMergeTreeDataPart::Checksums checkDataPart(
|
||||
const NameSet & files_without_checksums,
|
||||
const ReadSettings & read_settings,
|
||||
bool require_checksums,
|
||||
std::function<bool()> is_cancelled)
|
||||
std::function<bool()> is_cancelled,
|
||||
bool & is_broken_projection,
|
||||
bool throw_on_broken_projection)
|
||||
{
|
||||
/** Responsibility:
|
||||
* - read list of columns from columns.txt;
|
||||
@ -125,6 +128,7 @@ static IMergeTreeDataPart::Checksums checkDataPart(
|
||||
*/
|
||||
|
||||
CurrentMetrics::Increment metric_increment{CurrentMetrics::ReplicatedChecks};
|
||||
Poco::Logger * log = &Poco::Logger::get("checkDataPart");
|
||||
|
||||
NamesAndTypesList columns_txt;
|
||||
|
||||
@ -274,17 +278,55 @@ static IMergeTreeDataPart::Checksums checkDataPart(
|
||||
}
|
||||
}
|
||||
|
||||
std::string broken_projections_message;
|
||||
for (const auto & [name, projection] : data_part->getProjectionParts())
|
||||
{
|
||||
if (is_cancelled())
|
||||
return {};
|
||||
|
||||
auto projection_file = name + ".proj";
|
||||
auto projection_checksums = checkDataPart(
|
||||
projection, *data_part_storage.getProjection(projection_file),
|
||||
projection->getColumns(), projection->getType(),
|
||||
projection->getFileNamesWithoutChecksums(),
|
||||
read_settings, require_checksums, is_cancelled);
|
||||
if (!throw_on_broken_projection && projection->is_broken)
|
||||
{
|
||||
projections_on_disk.erase(projection_file);
|
||||
checksums_txt.remove(projection_file);
|
||||
}
|
||||
|
||||
IMergeTreeDataPart::Checksums projection_checksums;
|
||||
try
|
||||
{
|
||||
bool noop;
|
||||
projection_checksums = checkDataPart(
|
||||
projection, *data_part_storage.getProjection(projection_file),
|
||||
projection->getColumns(), projection->getType(),
|
||||
projection->getFileNamesWithoutChecksums(),
|
||||
read_settings, require_checksums, is_cancelled, noop, /* throw_on_broken_projection */false);
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
if (isRetryableException(std::current_exception()))
|
||||
throw;
|
||||
|
||||
if (!projection->is_broken)
|
||||
{
|
||||
LOG_TEST(log, "Marking projection {} as broken ({})", name, projection_file);
|
||||
projection->setBrokenReason(getCurrentExceptionMessage(false), getCurrentExceptionCode());
|
||||
}
|
||||
|
||||
is_broken_projection = true;
|
||||
if (throw_on_broken_projection)
|
||||
{
|
||||
if (!broken_projections_message.empty())
|
||||
broken_projections_message += "\n";
|
||||
|
||||
broken_projections_message += fmt::format(
|
||||
"Part {} has a broken projection {} (error: {})",
|
||||
data_part->name, name, getCurrentExceptionMessage(false));
|
||||
continue;
|
||||
}
|
||||
|
||||
projections_on_disk.erase(projection_file);
|
||||
checksums_txt.remove(projection_file);
|
||||
}
|
||||
|
||||
checksums_data.files[projection_file] = IMergeTreeDataPart::Checksums::Checksum(
|
||||
projection_checksums.getTotalSizeOnDisk(),
|
||||
@ -293,6 +335,11 @@ static IMergeTreeDataPart::Checksums checkDataPart(
|
||||
projections_on_disk.erase(projection_file);
|
||||
}
|
||||
|
||||
if (throw_on_broken_projection && !broken_projections_message.empty())
|
||||
{
|
||||
throw Exception(ErrorCodes::BROKEN_PROJECTION, "{}", broken_projections_message);
|
||||
}
|
||||
|
||||
if (require_checksums && !projections_on_disk.empty())
|
||||
{
|
||||
throw Exception(ErrorCodes::UNEXPECTED_FILE_IN_DATA_PART,
|
||||
@ -312,7 +359,9 @@ static IMergeTreeDataPart::Checksums checkDataPart(
|
||||
IMergeTreeDataPart::Checksums checkDataPart(
|
||||
MergeTreeData::DataPartPtr data_part,
|
||||
bool require_checksums,
|
||||
std::function<bool()> is_cancelled)
|
||||
bool & is_broken_projection,
|
||||
std::function<bool()> is_cancelled,
|
||||
bool throw_on_broken_projection)
|
||||
{
|
||||
/// If check of part has failed and it is stored on disk with cache
|
||||
/// try to drop cache and check it once again because maybe the cache
|
||||
@ -351,7 +400,9 @@ IMergeTreeDataPart::Checksums checkDataPart(
|
||||
data_part->getFileNamesWithoutChecksums(),
|
||||
read_settings,
|
||||
require_checksums,
|
||||
is_cancelled);
|
||||
is_cancelled,
|
||||
is_broken_projection,
|
||||
throw_on_broken_projection);
|
||||
};
|
||||
|
||||
try
|
||||
@ -365,7 +416,9 @@ IMergeTreeDataPart::Checksums checkDataPart(
|
||||
data_part->getFileNamesWithoutChecksums(),
|
||||
read_settings,
|
||||
require_checksums,
|
||||
is_cancelled);
|
||||
is_cancelled,
|
||||
is_broken_projection,
|
||||
throw_on_broken_projection);
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
|
@ -10,7 +10,9 @@ namespace DB
|
||||
IMergeTreeDataPart::Checksums checkDataPart(
|
||||
MergeTreeData::DataPartPtr data_part,
|
||||
bool require_checksums,
|
||||
std::function<bool()> is_cancelled = []{ return false; });
|
||||
bool & is_broken_projection,
|
||||
std::function<bool()> is_cancelled = []{ return false; },
|
||||
bool throw_on_broken_projection = false);
|
||||
|
||||
bool isNotEnoughMemoryErrorCode(int code);
|
||||
bool isRetryableException(std::exception_ptr exception_ptr);
|
||||
|
@ -94,6 +94,8 @@ void StorageInMemoryMetadata::setSQLSecurity(const ASTSQLSecurity & sql_security
|
||||
{
|
||||
if (sql_security.definer)
|
||||
definer = sql_security.definer->toString();
|
||||
else
|
||||
definer = std::nullopt;
|
||||
|
||||
sql_security_type = sql_security.type;
|
||||
}
|
||||
|
@ -902,15 +902,27 @@ SelectQueryInfo ReadFromMerge::getModifiedQueryInfo(const ContextMutablePtr & mo
|
||||
if (!storage_snapshot_->tryGetColumn(get_column_options, "_table"))
|
||||
{
|
||||
auto table_name_node = std::make_shared<ConstantNode>(current_storage_id.table_name);
|
||||
table_name_node->setAlias("_table");
|
||||
column_name_to_node.emplace("_table", table_name_node);
|
||||
auto table_name_alias = std::make_shared<ConstantNode>("__table1._table");
|
||||
|
||||
auto function_node = std::make_shared<FunctionNode>("__actionName");
|
||||
function_node->getArguments().getNodes().push_back(std::move(table_name_node));
|
||||
function_node->getArguments().getNodes().push_back(std::move(table_name_alias));
|
||||
function_node->resolveAsFunction(FunctionFactory::instance().get("__actionName", context));
|
||||
|
||||
column_name_to_node.emplace("_table", function_node);
|
||||
}
|
||||
|
||||
if (!storage_snapshot_->tryGetColumn(get_column_options, "_database"))
|
||||
{
|
||||
auto database_name_node = std::make_shared<ConstantNode>(current_storage_id.database_name);
|
||||
database_name_node->setAlias("_database");
|
||||
column_name_to_node.emplace("_database", database_name_node);
|
||||
auto database_name_alias = std::make_shared<ConstantNode>("__table1._database");
|
||||
|
||||
auto function_node = std::make_shared<FunctionNode>("__actionName");
|
||||
function_node->getArguments().getNodes().push_back(std::move(database_name_node));
|
||||
function_node->getArguments().getNodes().push_back(std::move(database_name_alias));
|
||||
function_node->resolveAsFunction(FunctionFactory::instance().get("__actionName", context));
|
||||
|
||||
column_name_to_node.emplace("_database", function_node);
|
||||
}
|
||||
|
||||
auto storage_columns = storage_snapshot_->metadata->getColumns();
|
||||
@ -1061,7 +1073,7 @@ QueryPipelineBuilderPtr ReadFromMerge::createSources(
|
||||
String table_column = table_alias.empty() || processed_stage == QueryProcessingStage::FetchColumns ? "_table" : table_alias + "._table";
|
||||
|
||||
if (has_database_virtual_column && common_header.has(database_column)
|
||||
&& (storage_stage == QueryProcessingStage::FetchColumns || !pipe_header.has("'" + database_name + "'_String")))
|
||||
&& storage_stage == QueryProcessingStage::FetchColumns && !pipe_header.has(database_column))
|
||||
{
|
||||
ColumnWithTypeAndName column;
|
||||
column.name = database_column;
|
||||
@ -1077,7 +1089,7 @@ QueryPipelineBuilderPtr ReadFromMerge::createSources(
|
||||
}
|
||||
|
||||
if (has_table_virtual_column && common_header.has(table_column)
|
||||
&& (storage_stage == QueryProcessingStage::FetchColumns || !pipe_header.has("'" + table_name + "'_String")))
|
||||
&& storage_stage == QueryProcessingStage::FetchColumns && !pipe_header.has(table_column))
|
||||
{
|
||||
ColumnWithTypeAndName column;
|
||||
column.name = table_column;
|
||||
|
@ -2042,7 +2042,7 @@ PartitionCommandsResultInfo StorageMergeTree::attachPartition(
|
||||
MergeTreeData::Transaction transaction(*this, local_context->getCurrentTransaction().get());
|
||||
{
|
||||
auto lock = lockParts();
|
||||
fillNewPartName(loaded_parts[i], lock);
|
||||
fillNewPartNameAndResetLevel(loaded_parts[i], lock);
|
||||
renameTempPartAndAdd(loaded_parts[i], transaction, lock);
|
||||
transaction.commit(&lock);
|
||||
}
|
||||
@ -2313,11 +2313,12 @@ std::optional<CheckResult> StorageMergeTree::checkDataNext(DataValidationTasksPt
|
||||
{
|
||||
/// If the checksums file is not present, calculate the checksums and write them to disk.
|
||||
static constexpr auto checksums_path = "checksums.txt";
|
||||
bool noop;
|
||||
if (part->isStoredOnDisk() && !part->getDataPartStorage().exists(checksums_path))
|
||||
{
|
||||
try
|
||||
{
|
||||
auto calculated_checksums = checkDataPart(part, false);
|
||||
auto calculated_checksums = checkDataPart(part, false, noop, /* is_cancelled */[]{ return false; }, /* throw_on_broken_projection */true);
|
||||
calculated_checksums.checkEqual(part->checksums, true);
|
||||
|
||||
auto & part_mutable = const_cast<IMergeTreeDataPart &>(*part);
|
||||
@ -2338,7 +2339,7 @@ std::optional<CheckResult> StorageMergeTree::checkDataNext(DataValidationTasksPt
|
||||
{
|
||||
try
|
||||
{
|
||||
checkDataPart(part, true);
|
||||
checkDataPart(part, true, noop, /* is_cancelled */[]{ return false; }, /* throw_on_broken_projection */true);
|
||||
return CheckResult(part->name, true, "");
|
||||
}
|
||||
catch (...)
|
||||
@ -2481,4 +2482,12 @@ void StorageMergeTree::fillNewPartName(MutableDataPartPtr & part, DataPartsLock
|
||||
part->setName(part->getNewName(part->info));
|
||||
}
|
||||
|
||||
void StorageMergeTree::fillNewPartNameAndResetLevel(MutableDataPartPtr & part, DataPartsLock &)
|
||||
{
|
||||
part->info.min_block = part->info.max_block = increment.get();
|
||||
part->info.mutation = 0;
|
||||
part->info.level = 0;
|
||||
part->setName(part->getNewName(part->info));
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -260,6 +260,7 @@ private:
|
||||
std::set<String> * mutation_ids = nullptr, bool from_another_mutation = false) const;
|
||||
|
||||
void fillNewPartName(MutableDataPartPtr & part, DataPartsLock & lock);
|
||||
void fillNewPartNameAndResetLevel(MutableDataPartPtr & part, DataPartsLock & lock);
|
||||
|
||||
void startBackgroundMovesIfNeeded() override;
|
||||
|
||||
|
@ -8990,12 +8990,11 @@ IStorage::DataValidationTasksPtr StorageReplicatedMergeTree::getCheckTaskList(
|
||||
|
||||
std::optional<CheckResult> StorageReplicatedMergeTree::checkDataNext(DataValidationTasksPtr & check_task_list)
|
||||
{
|
||||
|
||||
if (auto part = assert_cast<DataValidationTasks *>(check_task_list.get())->next())
|
||||
{
|
||||
try
|
||||
{
|
||||
return CheckResult(part_check_thread.checkPartAndFix(part->name));
|
||||
return part_check_thread.checkPartAndFix(part->name, /* recheck_after */nullptr, /* throw_on_broken_projection */true);
|
||||
}
|
||||
catch (const Exception & ex)
|
||||
{
|
||||
|
@ -84,8 +84,11 @@ StorageSystemProjectionParts::StorageSystemProjectionParts(const StorageID & tab
|
||||
{"rows_where_ttl_info.expression", std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>()), "The TTL expression."},
|
||||
{"rows_where_ttl_info.min", std::make_shared<DataTypeArray>(std::make_shared<DataTypeDateTime>()), "The minimum value of the calculated TTL expression within this part. Used to understand whether we have at least one row with expired TTL."},
|
||||
{"rows_where_ttl_info.max", std::make_shared<DataTypeArray>(std::make_shared<DataTypeDateTime>()), "The maximum value of the calculated TTL expression within this part. Used to understand whether we have all rows with expired TTL."},
|
||||
}
|
||||
)
|
||||
|
||||
{"is_broken", std::make_shared<DataTypeUInt8>(), "Whether projection part is broken"},
|
||||
{"exception_code", std::make_shared<DataTypeInt32>(), "Exception message explaining broken state of the projection part"},
|
||||
{"exception", std::make_shared<DataTypeString>(), "Exception code explaining broken state of the projection part"},
|
||||
})
|
||||
{
|
||||
}
|
||||
|
||||
@ -272,12 +275,38 @@ void StorageSystemProjectionParts::processNextStorage(
|
||||
add_ttl_info_map(part->ttl_infos.moves_ttl);
|
||||
|
||||
if (columns_mask[src_index++])
|
||||
columns[res_index++]->insert(queryToString(part->default_codec->getCodecDesc()));
|
||||
{
|
||||
if (part->default_codec)
|
||||
columns[res_index++]->insert(queryToString(part->default_codec->getCodecDesc()));
|
||||
else
|
||||
columns[res_index++]->insertDefault();
|
||||
}
|
||||
|
||||
add_ttl_info_map(part->ttl_infos.recompression_ttl);
|
||||
add_ttl_info_map(part->ttl_infos.group_by_ttl);
|
||||
add_ttl_info_map(part->ttl_infos.rows_where_ttl);
|
||||
|
||||
{
|
||||
if (columns_mask[src_index++])
|
||||
columns[res_index++]->insert(part->is_broken.load(std::memory_order_relaxed));
|
||||
|
||||
if (part->is_broken)
|
||||
{
|
||||
std::lock_guard lock(part->broken_reason_mutex);
|
||||
if (columns_mask[src_index++])
|
||||
columns[res_index++]->insert(part->exception_code);
|
||||
if (columns_mask[src_index++])
|
||||
columns[res_index++]->insert(part->exception);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (columns_mask[src_index++])
|
||||
columns[res_index++]->insertDefault();
|
||||
if (columns_mask[src_index++])
|
||||
columns[res_index++]->insertDefault();
|
||||
}
|
||||
}
|
||||
|
||||
/// _state column should be the latest.
|
||||
/// Do not use part->getState*, it can be changed from different thread
|
||||
if (has_state_column)
|
||||
|
@ -1,6 +1,4 @@
|
||||
00725_memory_tracking
|
||||
01155_rename_move_materialized_view
|
||||
01624_soft_constraints
|
||||
02354_vector_search_queries
|
||||
# Check after constants refactoring
|
||||
02901_parallel_replicas_rollup
|
||||
|
@ -1033,22 +1033,6 @@ def _print_results(result: Any, outfile: Optional[str], pretty: bool = False) ->
|
||||
raise AssertionError(f"Unexpected type for 'res': {type(result)}")
|
||||
|
||||
|
||||
def _check_and_update_for_early_style_check(jobs_data: dict, docker_data: dict) -> None:
|
||||
"""
|
||||
This is temporary hack to start style check before docker build if possible
|
||||
FIXME: need better solution to do style check as soon as possible and as fast as possible w/o dependency on docker job
|
||||
"""
|
||||
jobs_to_do = jobs_data.get("jobs_to_do", [])
|
||||
docker_to_build = docker_data.get("missing_multi", [])
|
||||
if (
|
||||
JobNames.STYLE_CHECK in jobs_to_do
|
||||
and docker_to_build
|
||||
and "clickhouse/style-test" not in docker_to_build
|
||||
):
|
||||
index = jobs_to_do.index(JobNames.STYLE_CHECK)
|
||||
jobs_to_do[index] = "Style check early"
|
||||
|
||||
|
||||
def _update_config_for_docs_only(jobs_data: dict) -> None:
|
||||
DOCS_CHECK_JOBS = [JobNames.DOCS_CHECK, JobNames.STYLE_CHECK]
|
||||
print(f"NOTE: Will keep only docs related jobs: [{DOCS_CHECK_JOBS}]")
|
||||
@ -1306,6 +1290,12 @@ def _configure_jobs(
|
||||
if params["num_batches"] > 1:
|
||||
params["batches"] = list(requested_batches)
|
||||
|
||||
if pr_info.is_merge_queue():
|
||||
# FIXME: Quick support for MQ workflow which is only StyleCheck for now
|
||||
jobs_to_do = [JobNames.STYLE_CHECK]
|
||||
jobs_to_skip = []
|
||||
print(f"NOTE: This is Merge Queue CI: set jobs to do: [{jobs_to_do}]")
|
||||
|
||||
return {
|
||||
"digests": digests,
|
||||
"jobs_to_do": jobs_to_do,
|
||||
@ -1752,11 +1742,6 @@ def main() -> int:
|
||||
else {}
|
||||
)
|
||||
|
||||
# # FIXME: Early style check manipulates with job names might be not robust with await feature
|
||||
# if pr_info.number != 0:
|
||||
# # FIXME: it runs style check before docker build if possible (style-check images is not changed)
|
||||
# # find a way to do style check always before docker build and others
|
||||
# _check_and_update_for_early_style_check(jobs_data, docker_data)
|
||||
if not args.skip_jobs and pr_info.has_changes_in_documentation_only():
|
||||
_update_config_for_docs_only(jobs_data)
|
||||
|
||||
|
@ -220,7 +220,7 @@ class JobConfig:
|
||||
digest: DigestConfig = field(default_factory=DigestConfig)
|
||||
# will be triggered for the job if omited in CI workflow yml
|
||||
run_command: str = ""
|
||||
# job timeout
|
||||
# job timeout, seconds
|
||||
timeout: Optional[int] = None
|
||||
# sets number of batches for multi-batch job
|
||||
num_batches: int = 1
|
||||
@ -517,10 +517,11 @@ clickbench_test_params = {
|
||||
),
|
||||
"run_command": 'clickbench.py "$CHECK_NAME"',
|
||||
}
|
||||
install_test_params = {
|
||||
"digest": install_check_digest,
|
||||
"run_command": 'install_check.py "$CHECK_NAME"',
|
||||
}
|
||||
install_test_params = JobConfig(
|
||||
digest=install_check_digest,
|
||||
run_command='install_check.py "$CHECK_NAME"',
|
||||
timeout=900,
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
@ -1105,10 +1106,10 @@ CI_CONFIG = CIConfig(
|
||||
},
|
||||
test_configs={
|
||||
JobNames.INSTALL_TEST_AMD: TestConfig(
|
||||
Build.PACKAGE_RELEASE, job_config=JobConfig(**install_test_params) # type: ignore
|
||||
Build.PACKAGE_RELEASE, job_config=install_test_params
|
||||
),
|
||||
JobNames.INSTALL_TEST_ARM: TestConfig(
|
||||
Build.PACKAGE_AARCH64, job_config=JobConfig(**install_test_params) # type: ignore
|
||||
Build.PACKAGE_AARCH64, job_config=install_test_params
|
||||
),
|
||||
JobNames.STATEFUL_TEST_ASAN: TestConfig(
|
||||
Build.PACKAGE_ASAN, job_config=JobConfig(**stateful_test_common_params) # type: ignore
|
||||
|
@ -1,5 +1,6 @@
|
||||
from contextlib import contextmanager
|
||||
import os
|
||||
import signal
|
||||
from typing import Any, List, Union, Iterator
|
||||
from pathlib import Path
|
||||
|
||||
@ -48,3 +49,14 @@ class GHActions:
|
||||
for line in lines:
|
||||
print(line)
|
||||
print("::endgroup::")
|
||||
|
||||
|
||||
def set_job_timeout():
|
||||
def timeout_handler(_signum, _frame):
|
||||
print("Timeout expired")
|
||||
raise TimeoutError("Job's KILL_TIMEOUT expired")
|
||||
|
||||
kill_timeout = int(os.getenv("KILL_TIMEOUT", "0"))
|
||||
assert kill_timeout > 0, "kill timeout must be provided in KILL_TIMEOUT env"
|
||||
signal.signal(signal.SIGALRM, timeout_handler)
|
||||
signal.alarm(kill_timeout)
|
||||
|
@ -14,10 +14,11 @@ from build_download_helper import download_builds_filter
|
||||
|
||||
from compress_files import compress_fast
|
||||
from docker_images_helper import DockerImage, pull_image, get_docker_image
|
||||
from env_helper import REPORT_PATH, TEMP_PATH as TEMP
|
||||
from env_helper import CI, REPORT_PATH, TEMP_PATH as TEMP
|
||||
from report import JobReport, TestResults, TestResult, FAILURE, FAIL, OK, SUCCESS
|
||||
from stopwatch import Stopwatch
|
||||
from tee_popen import TeePopen
|
||||
from ci_utils import set_job_timeout
|
||||
|
||||
|
||||
RPM_IMAGE = "clickhouse/install-rpm-test"
|
||||
@ -255,6 +256,9 @@ def main():
|
||||
|
||||
args = parse_args()
|
||||
|
||||
if CI:
|
||||
set_job_timeout()
|
||||
|
||||
TEMP_PATH.mkdir(parents=True, exist_ok=True)
|
||||
LOGS_PATH.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
|
@ -215,6 +215,7 @@ class PRInfo:
|
||||
.replace("{base}", base_sha)
|
||||
.replace("{head}", self.sha)
|
||||
)
|
||||
self.commit_html_url = f"{repo_prefix}/commits/{self.sha}"
|
||||
|
||||
elif "commits" in github_event:
|
||||
self.event_type = EventType.PUSH
|
||||
|
@ -0,0 +1,13 @@
|
||||
<clickhouse>
|
||||
<storage_configuration>
|
||||
<disks>
|
||||
<backups>
|
||||
<type>local</type>
|
||||
<path>/var/lib/clickhouse/disks/backups/</path>
|
||||
</backups>
|
||||
</disks>
|
||||
</storage_configuration>
|
||||
<backups>
|
||||
<allowed_disk>backups</allowed_disk>
|
||||
</backups>
|
||||
</clickhouse>
|
576
tests/integration/test_broken_projections/test.py
Normal file
576
tests/integration/test_broken_projections/test.py
Normal file
@ -0,0 +1,576 @@
|
||||
import time
|
||||
import pytest
|
||||
import logging
|
||||
import string
|
||||
import random
|
||||
from helpers.cluster import ClickHouseCluster
|
||||
|
||||
cluster = ClickHouseCluster(__file__)
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def cluster():
|
||||
try:
|
||||
cluster = ClickHouseCluster(__file__)
|
||||
cluster.add_instance(
|
||||
"node",
|
||||
main_configs=["config.d/backups.xml"],
|
||||
stay_alive=True,
|
||||
with_zookeeper=True,
|
||||
)
|
||||
|
||||
logging.info("Starting cluster...")
|
||||
cluster.start()
|
||||
logging.info("Cluster started")
|
||||
|
||||
yield cluster
|
||||
finally:
|
||||
cluster.shutdown()
|
||||
|
||||
|
||||
def create_table(node, table, replica, data_prefix="", aggressive_merge=True):
|
||||
if data_prefix == "":
|
||||
data_prefix = table
|
||||
|
||||
if aggressive_merge:
|
||||
vertical_merge_algorithm_min_rows_to_activate = 1
|
||||
vertical_merge_algorithm_min_columns_to_activate = 1
|
||||
max_parts_to_merge_at_once = 3
|
||||
else:
|
||||
vertical_merge_algorithm_min_rows_to_activate = 100000
|
||||
vertical_merge_algorithm_min_columns_to_activate = 100
|
||||
max_parts_to_merge_at_once = 3
|
||||
|
||||
node.query(
|
||||
f"""
|
||||
DROP TABLE IF EXISTS {table} SYNC;
|
||||
CREATE TABLE {table}
|
||||
(
|
||||
a String,
|
||||
b String,
|
||||
c Int64,
|
||||
d Int64,
|
||||
e Int64,
|
||||
PROJECTION proj1
|
||||
(
|
||||
SELECT c ORDER BY d
|
||||
),
|
||||
PROJECTION proj2
|
||||
(
|
||||
SELECT d ORDER BY c
|
||||
)
|
||||
)
|
||||
ENGINE = ReplicatedMergeTree('/test_broken_projection_{data_prefix}/data/', '{replica}') ORDER BY a
|
||||
SETTINGS min_bytes_for_wide_part = 0,
|
||||
max_parts_to_merge_at_once={max_parts_to_merge_at_once},
|
||||
enable_vertical_merge_algorithm=0,
|
||||
vertical_merge_algorithm_min_rows_to_activate = {vertical_merge_algorithm_min_rows_to_activate},
|
||||
vertical_merge_algorithm_min_columns_to_activate = {vertical_merge_algorithm_min_columns_to_activate},
|
||||
compress_primary_key=0;
|
||||
"""
|
||||
)
|
||||
|
||||
|
||||
def insert(node, table, offset, size):
|
||||
node.query(
|
||||
f"""
|
||||
INSERT INTO {table}
|
||||
SELECT number, number, number, number, number%2 FROM numbers({offset}, {size})
|
||||
SETTINGS insert_keeper_fault_injection_probability=0.0;
|
||||
"""
|
||||
)
|
||||
|
||||
|
||||
def get_parts(node, table):
|
||||
return (
|
||||
node.query(
|
||||
f"""
|
||||
SELECT name
|
||||
FROM system.parts
|
||||
WHERE table='{table}' AND database=currentDatabase() AND active = 1
|
||||
ORDER BY name;"
|
||||
"""
|
||||
)
|
||||
.strip()
|
||||
.split("\n")
|
||||
)
|
||||
|
||||
|
||||
def bash(node, command):
|
||||
node.exec_in_container(["bash", "-c", command], privileged=True, user="root")
|
||||
|
||||
|
||||
def break_projection(node, table, part, parent_part, break_type):
|
||||
part_path = node.query(
|
||||
f"""
|
||||
SELECT path
|
||||
FROM system.projection_parts
|
||||
WHERE table='{table}'
|
||||
AND database=currentDatabase()
|
||||
AND active=1
|
||||
AND part_name='{part}'
|
||||
AND parent_name='{parent_part}'
|
||||
ORDER BY modification_time DESC
|
||||
LIMIT 1;
|
||||
"""
|
||||
).strip()
|
||||
|
||||
node.query(
|
||||
f"select throwIf(substring('{part_path}', 1, 1) != '/', 'Path is relative: {part_path}')"
|
||||
)
|
||||
|
||||
if break_type == "data":
|
||||
bash(node, f"rm '{part_path}/d.bin'")
|
||||
bash(node, f"rm '{part_path}/c.bin'")
|
||||
elif break_type == "metadata":
|
||||
bash(node, f"rm '{part_path}/columns.txt'")
|
||||
elif break_type == "part":
|
||||
bash(node, f"rm -r '{part_path}'")
|
||||
|
||||
|
||||
def break_part(node, table, part):
|
||||
part_path = node.query(
|
||||
f"""
|
||||
SELECT path
|
||||
FROM system.parts
|
||||
WHERE table='{table}'
|
||||
AND database=currentDatabase()
|
||||
AND active=1
|
||||
AND part_name='{part}'
|
||||
ORDER BY modification_time DESC
|
||||
LIMIT 1;
|
||||
"""
|
||||
).strip()
|
||||
|
||||
node.query(
|
||||
f"select throwIf(substring('{part_path}', 1, 1) != '/', 'Path is relative: {part_path}')"
|
||||
)
|
||||
bash(node, f"rm '{part_path}/columns.txt'")
|
||||
|
||||
|
||||
def get_broken_projections_info(node, table):
|
||||
return node.query(
|
||||
f"""
|
||||
SELECT parent_name, name, errors.name FROM
|
||||
(
|
||||
SELECT parent_name, name, exception_code
|
||||
FROM system.projection_parts
|
||||
WHERE table='{table}'
|
||||
AND database=currentDatabase()
|
||||
AND is_broken = 1
|
||||
) AS parts_info
|
||||
INNER JOIN system.errors AS errors
|
||||
ON parts_info.exception_code = errors.code
|
||||
ORDER BY parent_name, name
|
||||
"""
|
||||
).strip()
|
||||
|
||||
|
||||
def get_projections_info(node, table):
|
||||
return node.query(
|
||||
f"""
|
||||
SELECT parent_name, name, is_broken
|
||||
FROM system.projection_parts
|
||||
WHERE table='{table}'
|
||||
AND active = 1
|
||||
AND database=currentDatabase()
|
||||
ORDER BY parent_name, name
|
||||
"""
|
||||
).strip()
|
||||
|
||||
|
||||
def optimize(node, table, final, no_wait):
|
||||
query = f"OPTIMIZE TABLE {table}"
|
||||
if final:
|
||||
query += " FINAL"
|
||||
if no_wait:
|
||||
query += " SETTINGS alter_sync=0"
|
||||
node.query(query)
|
||||
|
||||
|
||||
def reattach(node, table):
|
||||
node.query(
|
||||
f"""
|
||||
DETACH TABLE {table};
|
||||
ATTACH TABLE {table};
|
||||
"""
|
||||
)
|
||||
|
||||
|
||||
def materialize_projection(node, table, proj):
|
||||
node.query(
|
||||
f"ALTER TABLE {table} MATERIALIZE PROJECTION {proj} SETTINGS mutations_sync=2"
|
||||
)
|
||||
|
||||
|
||||
def check_table_full(node, table):
|
||||
return node.query(
|
||||
f"CHECK TABLE {table} SETTINGS check_query_single_value_result = 0;"
|
||||
).strip()
|
||||
|
||||
|
||||
def random_str(length=6):
|
||||
alphabet = string.ascii_lowercase + string.digits
|
||||
return "".join(random.SystemRandom().choice(alphabet) for _ in range(length))
|
||||
|
||||
|
||||
def check(node, table, check_result, expect_broken_part="", expected_error=""):
|
||||
if expect_broken_part == "proj1":
|
||||
assert expected_error in node.query_and_get_error(
|
||||
f"SELECT c FROM '{table}' WHERE d == 12 ORDER BY c"
|
||||
)
|
||||
else:
|
||||
query_id = node.query(
|
||||
f"SELECT queryID() FROM (SELECT c FROM '{table}' WHERE d == 12 ORDER BY c)"
|
||||
).strip()
|
||||
node.query("SYSTEM FLUSH LOGS")
|
||||
res = node.query(
|
||||
f"""
|
||||
SELECT query, splitByChar('.', arrayJoin(projections))[-1]
|
||||
FROM system.query_log
|
||||
WHERE query_id='{query_id}' AND type='QueryFinish'
|
||||
"""
|
||||
)
|
||||
if res == "":
|
||||
res = node.query(
|
||||
"""
|
||||
SELECT query_id, query, splitByChar('.', arrayJoin(projections))[-1]
|
||||
FROM system.query_log ORDER BY query_start_time_microseconds DESC
|
||||
"""
|
||||
)
|
||||
print(f"LOG: {res}")
|
||||
assert False
|
||||
assert "proj1" in res
|
||||
|
||||
if expect_broken_part == "proj2":
|
||||
assert expected_error in node.query_and_get_error(
|
||||
f"SELECT d FROM '{table}' WHERE c == 12 ORDER BY d"
|
||||
)
|
||||
else:
|
||||
query_id = node.query(
|
||||
f"SELECT queryID() FROM (SELECT d FROM '{table}' WHERE c == 12 ORDER BY d)"
|
||||
).strip()
|
||||
node.query("SYSTEM FLUSH LOGS")
|
||||
res = node.query(
|
||||
f"""
|
||||
SELECT query, splitByChar('.', arrayJoin(projections))[-1]
|
||||
FROM system.query_log
|
||||
WHERE query_id='{query_id}' AND type='QueryFinish'
|
||||
"""
|
||||
)
|
||||
if res == "":
|
||||
res = node.query(
|
||||
"""
|
||||
SELECT query_id, query, splitByChar('.', arrayJoin(projections))[-1]
|
||||
FROM system.query_log ORDER BY query_start_time_microseconds DESC
|
||||
"""
|
||||
)
|
||||
print(f"LOG: {res}")
|
||||
assert False
|
||||
assert "proj2" in res
|
||||
|
||||
assert check_result == int(node.query(f"CHECK TABLE {table}"))
|
||||
|
||||
|
||||
def test_broken_ignored(cluster):
|
||||
node = cluster.instances["node"]
|
||||
|
||||
table_name = "test1"
|
||||
create_table(node, table_name, 1)
|
||||
|
||||
insert(node, table_name, 0, 5)
|
||||
insert(node, table_name, 5, 5)
|
||||
insert(node, table_name, 10, 5)
|
||||
insert(node, table_name, 15, 5)
|
||||
|
||||
assert ["all_0_0_0", "all_1_1_0", "all_2_2_0", "all_3_3_0"] == get_parts(
|
||||
node, table_name
|
||||
)
|
||||
|
||||
# Break metadata (columns.txt) file of projection 'proj1'
|
||||
break_projection(node, table_name, "proj1", "all_2_2_0", "metadata")
|
||||
|
||||
# Do select and after "check table" query.
|
||||
# Select works because it does not read columns.txt.
|
||||
# But expect check table result as 0.
|
||||
check(node, table_name, 0)
|
||||
|
||||
# Projection 'proj1' from part all_2_2_0 will now appear in broken parts info
|
||||
# because it was marked broken during "check table" query.
|
||||
assert "all_2_2_0\tproj1\tFILE_DOESNT_EXIST" in get_broken_projections_info(
|
||||
node, table_name
|
||||
)
|
||||
|
||||
# Check table query will also show a list of parts which have broken projections.
|
||||
assert "all_2_2_0" in check_table_full(node, table_name)
|
||||
|
||||
# Break data file of projection 'proj2' for part all_2_2_0
|
||||
break_projection(node, table_name, "proj2", "all_2_2_0", "data")
|
||||
|
||||
# It will not yet appear in broken projections info.
|
||||
assert "proj2" not in get_broken_projections_info(node, table_name)
|
||||
|
||||
# Select now fails with error "File doesn't exist"
|
||||
check(node, table_name, 0, "proj2", "FILE_DOESNT_EXIST")
|
||||
|
||||
# Projection 'proj2' from part all_2_2_0 will now appear in broken parts info.
|
||||
assert "all_2_2_0\tproj2\tNO_FILE_IN_DATA_PART" in get_broken_projections_info(
|
||||
node, table_name
|
||||
)
|
||||
|
||||
# Second select works, because projection is now marked as broken.
|
||||
check(node, table_name, 0)
|
||||
|
||||
# Break data file of projection 'proj2' for part all_3_3_0
|
||||
break_projection(node, table_name, "proj2", "all_3_3_0", "data")
|
||||
|
||||
# It will not yet appear in broken projections info.
|
||||
assert "all_3_3_0" not in get_broken_projections_info(node, table_name)
|
||||
|
||||
insert(node, table_name, 20, 5)
|
||||
insert(node, table_name, 25, 5)
|
||||
|
||||
# Part all_3_3_0 has 'proj' and 'proj2' projections, but 'proj2' is broken and server does NOT know it yet.
|
||||
# Parts all_4_4_0 and all_5_5_0 have both non-broken projections.
|
||||
# So a merge will be create for future part all_3_5_1.
|
||||
# During merge it will fail to read from 'proj2' of part all_3_3_0 and proj2 will be marked broken.
|
||||
# Merge will be retried and on second attempt it will succeed.
|
||||
# The result part all_3_5_1 will have only 1 projection - 'proj', because
|
||||
# it will skip 'proj2' as it will see that one part does not have it anymore in the set of valid projections.
|
||||
optimize(node, table_name, 0, 1)
|
||||
time.sleep(5)
|
||||
|
||||
# table_uuid=node.query(f"SELECT uuid FROM system.tables WHERE table='{table_name}' and database=currentDatabase()").strip()
|
||||
# assert 0 < int(
|
||||
# node.query(
|
||||
# f"""
|
||||
# SYSTEM FLUSH LOGS;
|
||||
# SELECT count() FROM system.text_log
|
||||
# WHERE level='Error'
|
||||
# AND logger_name='MergeTreeBackgroundExecutor'
|
||||
# AND message like 'Exception while executing background task %{table_uuid}:all_3_5_1%%Cannot open file%proj2.proj/c.bin%'
|
||||
# """)
|
||||
# )
|
||||
|
||||
assert "all_3_3_0" in get_broken_projections_info(node, table_name)
|
||||
check(node, table_name, 0)
|
||||
|
||||
|
||||
def test_materialize_broken_projection(cluster):
|
||||
node = cluster.instances["node"]
|
||||
|
||||
table_name = "test2"
|
||||
create_table(node, table_name, 1)
|
||||
|
||||
insert(node, table_name, 0, 5)
|
||||
insert(node, table_name, 5, 5)
|
||||
insert(node, table_name, 10, 5)
|
||||
insert(node, table_name, 15, 5)
|
||||
|
||||
assert ["all_0_0_0", "all_1_1_0", "all_2_2_0", "all_3_3_0"] == get_parts(
|
||||
node, table_name
|
||||
)
|
||||
|
||||
break_projection(node, table_name, "proj1", "all_1_1_0", "metadata")
|
||||
reattach(node, table_name)
|
||||
|
||||
assert "all_1_1_0\tproj1\tNO_FILE_IN_DATA_PART" in get_broken_projections_info(
|
||||
node, table_name
|
||||
)
|
||||
assert "Part all_1_1_0 has a broken projection proj1" in check_table_full(
|
||||
node, table_name
|
||||
)
|
||||
|
||||
break_projection(node, table_name, "proj2", "all_1_1_0", "data")
|
||||
reattach(node, table_name)
|
||||
|
||||
assert "all_1_1_0\tproj2\tFILE_DOESNT_EXIST" in get_broken_projections_info(
|
||||
node, table_name
|
||||
)
|
||||
assert "Part all_1_1_0 has a broken projection proj2" in check_table_full(
|
||||
node, table_name
|
||||
)
|
||||
|
||||
materialize_projection(node, table_name, "proj1")
|
||||
|
||||
assert "has a broken projection" not in check_table_full(node, table_name)
|
||||
|
||||
|
||||
def test_broken_ignored_replicated(cluster):
|
||||
node = cluster.instances["node"]
|
||||
|
||||
table_name = "test3"
|
||||
table_name2 = "test3_replica"
|
||||
create_table(node, table_name, 1)
|
||||
|
||||
insert(node, table_name, 0, 5)
|
||||
insert(node, table_name, 5, 5)
|
||||
insert(node, table_name, 10, 5)
|
||||
insert(node, table_name, 15, 5)
|
||||
|
||||
check(node, table_name, 1)
|
||||
|
||||
create_table(node, table_name2, 2, table_name)
|
||||
check(node, table_name2, 1)
|
||||
|
||||
break_projection(node, table_name, "proj1", "all_0_0_0", "data")
|
||||
assert "Part all_0_0_0 has a broken projection proj1" in check_table_full(
|
||||
node, table_name
|
||||
)
|
||||
|
||||
break_part(node, table_name, "all_0_0_0")
|
||||
node.query(f"SYSTEM SYNC REPLICA {table_name}")
|
||||
assert "has a broken projection" not in check_table_full(node, table_name)
|
||||
|
||||
|
||||
def get_random_string(string_length=8):
|
||||
alphabet = string.ascii_letters + string.digits
|
||||
return "".join((random.choice(alphabet) for _ in range(string_length)))
|
||||
|
||||
|
||||
def test_broken_projections_in_backups_1(cluster):
|
||||
node = cluster.instances["node"]
|
||||
|
||||
table_name = "test4"
|
||||
create_table(node, table_name, 1, aggressive_merge=False, data_prefix=table_name)
|
||||
|
||||
node.query("SYSTEM STOP MERGES")
|
||||
|
||||
insert(node, table_name, 0, 5)
|
||||
insert(node, table_name, 5, 5)
|
||||
insert(node, table_name, 10, 5)
|
||||
insert(node, table_name, 15, 5)
|
||||
|
||||
assert ["all_0_0_0", "all_1_1_0", "all_2_2_0", "all_3_3_0"] == get_parts(
|
||||
node, table_name
|
||||
)
|
||||
|
||||
check(node, table_name, 1)
|
||||
|
||||
break_projection(node, table_name, "proj1", "all_2_2_0", "data")
|
||||
check(node, table_name, 0, "proj1", "FILE_DOESNT_EXIST")
|
||||
|
||||
assert "all_2_2_0\tproj1\tNO_FILE_IN_DATA_PART" in get_broken_projections_info(
|
||||
node, table_name
|
||||
)
|
||||
|
||||
backup_name = f"b1-{get_random_string()}"
|
||||
assert "BACKUP_CREATED" in node.query(
|
||||
f"""
|
||||
set backup_restore_keeper_fault_injection_probability=0.0;
|
||||
backup table {table_name} to Disk('backups', '{backup_name}') settings check_projection_parts=false;
|
||||
"""
|
||||
)
|
||||
|
||||
assert "RESTORED" in node.query(
|
||||
f"""
|
||||
drop table {table_name} sync;
|
||||
set backup_restore_keeper_fault_injection_probability=0.0;
|
||||
restore table {table_name} from Disk('backups', '{backup_name}');
|
||||
"""
|
||||
)
|
||||
|
||||
node.query("SYSTEM STOP MERGES")
|
||||
|
||||
check(node, table_name, 1)
|
||||
assert "" == get_broken_projections_info(node, table_name)
|
||||
|
||||
|
||||
def test_broken_projections_in_backups_2(cluster):
|
||||
node = cluster.instances["node"]
|
||||
|
||||
table_name = "test5"
|
||||
create_table(node, table_name, 1, aggressive_merge=False, data_prefix=table_name)
|
||||
|
||||
insert(node, table_name, 0, 5)
|
||||
insert(node, table_name, 5, 5)
|
||||
insert(node, table_name, 10, 5)
|
||||
insert(node, table_name, 15, 5)
|
||||
|
||||
assert ["all_0_0_0", "all_1_1_0", "all_2_2_0", "all_3_3_0"] == get_parts(
|
||||
node, table_name
|
||||
)
|
||||
|
||||
check(node, table_name, 1)
|
||||
break_projection(node, table_name, "proj2", "all_2_2_0", "part")
|
||||
check(node, table_name, 0, "proj2", "ErrnoException")
|
||||
|
||||
assert "all_2_2_0\tproj2\tFILE_DOESNT_EXIST" == get_broken_projections_info(
|
||||
node, table_name
|
||||
)
|
||||
|
||||
assert "FILE_DOESNT_EXIST" in node.query_and_get_error(
|
||||
f"""
|
||||
set backup_restore_keeper_fault_injection_probability=0.0;
|
||||
backup table {table_name} to Disk('backups', 'b2')
|
||||
"""
|
||||
)
|
||||
|
||||
materialize_projection(node, table_name, "proj2")
|
||||
check(node, table_name, 1)
|
||||
|
||||
backup_name = f"b3-{get_random_string()}"
|
||||
assert "BACKUP_CREATED" in node.query(
|
||||
f"""
|
||||
set backup_restore_keeper_fault_injection_probability=0.0;
|
||||
backup table {table_name} to Disk('backups', '{backup_name}') settings check_projection_parts=false;
|
||||
"""
|
||||
)
|
||||
|
||||
assert "RESTORED" in node.query(
|
||||
f"""
|
||||
drop table {table_name} sync;
|
||||
set backup_restore_keeper_fault_injection_probability=0.0;
|
||||
restore table {table_name} from Disk('backups', '{backup_name}');
|
||||
"""
|
||||
)
|
||||
check(node, table_name, 1)
|
||||
|
||||
|
||||
def test_broken_projections_in_backups_3(cluster):
|
||||
node = cluster.instances["node"]
|
||||
|
||||
table_name = "test6"
|
||||
create_table(node, table_name, 1, aggressive_merge=False, data_prefix=table_name)
|
||||
|
||||
node.query("SYSTEM STOP MERGES")
|
||||
|
||||
insert(node, table_name, 0, 5)
|
||||
insert(node, table_name, 5, 5)
|
||||
insert(node, table_name, 10, 5)
|
||||
insert(node, table_name, 15, 5)
|
||||
|
||||
assert ["all_0_0_0", "all_1_1_0", "all_2_2_0", "all_3_3_0"] == get_parts(
|
||||
node, table_name
|
||||
)
|
||||
|
||||
check(node, table_name, 1)
|
||||
|
||||
break_projection(node, table_name, "proj1", "all_1_1_0", "part")
|
||||
assert "Part all_1_1_0 has a broken projection proj1" in check_table_full(
|
||||
node, table_name
|
||||
)
|
||||
assert "all_1_1_0\tproj1\tFILE_DOESNT_EXIST" == get_broken_projections_info(
|
||||
node, table_name
|
||||
)
|
||||
|
||||
backup_name = f"b4-{get_random_string()}"
|
||||
assert "BACKUP_CREATED" in node.query(
|
||||
f"""
|
||||
set backup_restore_keeper_fault_injection_probability=0.0;
|
||||
backup table {table_name} to Disk('backups', '{backup_name}') settings check_projection_parts=false, allow_backup_broken_projections=true;
|
||||
"""
|
||||
)
|
||||
|
||||
assert "RESTORED" in node.query(
|
||||
f"""
|
||||
drop table {table_name} sync;
|
||||
set backup_restore_keeper_fault_injection_probability=0.0;
|
||||
restore table {table_name} from Disk('backups', '{backup_name}');
|
||||
"""
|
||||
)
|
||||
|
||||
check(node, table_name, 0)
|
||||
assert "all_1_1_0\tproj1\tNO_FILE_IN_DATA_PART" == get_broken_projections_info(
|
||||
node, table_name
|
||||
)
|
@ -5,7 +5,7 @@ import time
|
||||
import pytz
|
||||
import uuid
|
||||
import grpc
|
||||
from helpers.cluster import ClickHouseCluster, run_and_check
|
||||
from helpers.cluster import ClickHouseCluster, is_arm, run_and_check
|
||||
from threading import Thread
|
||||
import gzip
|
||||
import lz4.frame
|
||||
@ -20,6 +20,10 @@ import clickhouse_grpc_pb2, clickhouse_grpc_pb2_grpc # Execute pb2/generate.py
|
||||
GRPC_PORT = 9100
|
||||
DEFAULT_ENCODING = "utf-8"
|
||||
|
||||
# GRPC is disabled on ARM build - skip tests
|
||||
if is_arm():
|
||||
pytestmark = pytest.mark.skip
|
||||
|
||||
|
||||
# Utilities
|
||||
|
||||
|
@ -5,9 +5,13 @@ from string import Template
|
||||
import pymysql.cursors
|
||||
import pytest
|
||||
from helpers.client import QueryRuntimeException
|
||||
from helpers.cluster import ClickHouseCluster
|
||||
from helpers.cluster import ClickHouseCluster, is_arm
|
||||
from helpers.network import PartitionManager
|
||||
|
||||
|
||||
if is_arm():
|
||||
pytestmark = pytest.mark.skip
|
||||
|
||||
cluster = ClickHouseCluster(__file__)
|
||||
clickhouse_node = cluster.add_instance(
|
||||
"node1",
|
||||
|
@ -67,8 +67,8 @@ def test_rollback_unfinished_on_restart1(start_cluster):
|
||||
tx(1, "insert into mt values (5, 50)")
|
||||
tx(1, "alter table mt update m = m+n in partition id '1' where 1")
|
||||
|
||||
# check that uncommitted insert will be rolled back on restart
|
||||
tx(3, "begin transaction")
|
||||
# check that uncommitted insert will be rolled back on restart (using `START TRANSACTION` syntax)
|
||||
tx(3, "start transaction")
|
||||
tid5 = tx(3, "select transactionID()").strip()
|
||||
tx(3, "insert into mt values (6, 70)")
|
||||
|
||||
|
@ -69,6 +69,7 @@ hello
|
||||
(3333.6,'test')
|
||||
(3333.6333333333,'test')
|
||||
(3333.6333333333,'test')
|
||||
\N
|
||||
123456.1234 Decimal(20, 4)
|
||||
123456.1234 Decimal(20, 4)
|
||||
123456789012345.12 Decimal(30, 4)
|
||||
|
@ -81,6 +81,7 @@ SELECT JSONExtract('{"a":3333.6333333333333333333333, "b":"test"}', 'Tuple(a Dec
|
||||
SELECT JSONExtract('{"a":"3333.6333333333333333333333", "b":"test"}', 'Tuple(a Decimal(10,1), b LowCardinality(String))');
|
||||
SELECT JSONExtract('{"a":3333.6333333333333333333333, "b":"test"}', 'Tuple(a Decimal(20,10), b LowCardinality(String))');
|
||||
SELECT JSONExtract('{"a":"3333.6333333333333333333333", "b":"test"}', 'Tuple(a Decimal(20,10), b LowCardinality(String))');
|
||||
SELECT JSONExtract(materialize('{"string_value":null}'), materialize('string_value'), 'LowCardinality(Nullable(String))');
|
||||
SELECT JSONExtract('{"a":123456.123456}', 'a', 'Decimal(20, 4)') as a, toTypeName(a);
|
||||
SELECT JSONExtract('{"a":"123456.123456"}', 'a', 'Decimal(20, 4)') as a, toTypeName(a);
|
||||
SELECT JSONExtract('{"a":"123456789012345.12"}', 'a', 'Decimal(30, 4)') as a, toTypeName(a);
|
||||
@ -326,3 +327,4 @@ SELECT JSONExtract('[]', JSONExtract('0', 'UInt256'), 'UInt256'); -- { serverErr
|
||||
|
||||
SELECT '--show error: key of map type should be String';
|
||||
SELECT JSONExtract('{"a": [100.0, 200], "b": [-100, 200.0, 300]}', 'Map(Int64, Array(Float64))'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
|
||||
SELECT JSONExtract(materialize(toLowCardinality('{"string_value":null}')), materialize('string_value'), 'LowCardinality(Nullable(String))'); -- {serverError ILLEGAL_TYPE_OF_ARGUMENT }
|
||||
|
@ -6,3 +6,11 @@
|
||||
10007555
|
||||
10007554
|
||||
10001780
|
||||
111195.05197522942
|
||||
111195.05197522942
|
||||
110567.32686882635
|
||||
111699.2516454354
|
||||
10007554.677770648
|
||||
10007554.677770648
|
||||
10007554.677770648
|
||||
10001780.1
|
||||
|
@ -1,3 +1,19 @@
|
||||
SET geo_distance_returns_float64_on_float64_arguments = 0;
|
||||
|
||||
SELECT greatCircleDistance(0., 0., 0., 1.);
|
||||
SELECT greatCircleDistance(0., 89., 0, 90.);
|
||||
|
||||
SELECT geoDistance(0., 0., 0., 1.);
|
||||
SELECT geoDistance(0., 89., 0., 90.);
|
||||
|
||||
SELECT greatCircleDistance(0., 0., 90., 0.);
|
||||
SELECT greatCircleDistance(0., 0., 0., 90.);
|
||||
|
||||
SELECT geoDistance(0., 0., 90., 0.);
|
||||
SELECT geoDistance(0., 0., 0., 90.);
|
||||
|
||||
SET geo_distance_returns_float64_on_float64_arguments = 1;
|
||||
|
||||
SELECT greatCircleDistance(0., 0., 0., 1.);
|
||||
SELECT greatCircleDistance(0., 89., 0, 90.);
|
||||
|
||||
|
@ -3,3 +3,8 @@
|
||||
0.7135
|
||||
10007555
|
||||
10007554
|
||||
0.1224
|
||||
0.7071
|
||||
0.7135
|
||||
10007555
|
||||
10007554
|
||||
|
@ -1,3 +1,14 @@
|
||||
SET geo_distance_returns_float64_on_float64_arguments = 0;
|
||||
|
||||
SELECT round(greatCircleAngle(0, 45, 0.1, 45.1), 4);
|
||||
SELECT round(greatCircleAngle(0, 45, 1, 45), 4);
|
||||
SELECT round(greatCircleAngle(0, 45, 1, 45.1), 4);
|
||||
|
||||
SELECT round(greatCircleDistance(0, 0, 0, 90), 4);
|
||||
SELECT round(greatCircleDistance(0, 0, 90, 0), 4);
|
||||
|
||||
SET geo_distance_returns_float64_on_float64_arguments = 1;
|
||||
|
||||
SELECT round(greatCircleAngle(0, 45, 0.1, 45.1), 4);
|
||||
SELECT round(greatCircleAngle(0, 45, 1, 45), 4);
|
||||
SELECT round(greatCircleAngle(0, 45, 1, 45.1), 4);
|
||||
|
@ -688,6 +688,9 @@ CREATE TABLE system.projection_parts
|
||||
`rows_where_ttl_info.expression` Array(String),
|
||||
`rows_where_ttl_info.min` Array(DateTime),
|
||||
`rows_where_ttl_info.max` Array(DateTime),
|
||||
`is_broken` UInt8,
|
||||
`exception_code` Int32,
|
||||
`exception` String,
|
||||
`bytes` UInt64 ALIAS bytes_on_disk,
|
||||
`marks_size` UInt64 ALIAS marks_bytes,
|
||||
`part_name` String ALIAS name
|
||||
|
@ -57,6 +57,7 @@ URLPathHierarchy
|
||||
UUIDNumToString
|
||||
UUIDStringToNum
|
||||
_CAST
|
||||
__actionName
|
||||
__bitBoolMaskAnd
|
||||
__bitBoolMaskOr
|
||||
__bitSwapLastTwo
|
||||
|
@ -8,3 +8,10 @@
|
||||
\0\0\0\0\0
|
||||
131231
|
||||
131231
|
||||
1234
|
||||
1234
|
||||
{"b":131231}
|
||||
\0\0\0\0
|
||||
1234567890
|
||||
18446744073709551615
|
||||
-9223372036854775807
|
||||
|
@ -6,3 +6,10 @@ SELECT JSONExtract('{"a": 123456}', 'a', 'FixedString(5)');
|
||||
SELECT JSONExtract('{"a": 123456}', 'a', 'FixedString(6)');
|
||||
SELECT JSONExtract(materialize('{"a": 131231}'), 'a', 'LowCardinality(FixedString(5))') FROM numbers(2);
|
||||
SELECT JSONExtract(materialize('{"a": 131231}'), 'a', 'LowCardinality(FixedString(6))') FROM numbers(2);
|
||||
SELECT JSONExtract(materialize('{"a": 131231, "b": 1234}'), 'b', 'LowCardinality(FixedString(4))');
|
||||
SELECT JSONExtract(materialize('{"a": 131231, "b": "1234"}'), 'b', 'LowCardinality(FixedString(4))');
|
||||
SELECT JSONExtract(materialize('{"a": {"b": 131231} }'), 'a', 'LowCardinality(FixedString(12))');
|
||||
SELECT JSONExtract(materialize('{"a": 131231, "b": 1234567890}'), 'b', 'LowCardinality(FixedString(4))');
|
||||
SELECT JSONExtract(materialize('{"a": 131231, "b": 1234567890}'), 'b', 'LowCardinality(FixedString(10))');
|
||||
SELECT JSONExtract(materialize('{"a": 18446744073709551615}'), 'a', 'LowCardinality(FixedString(20))');
|
||||
SELECT JSONExtract(materialize('{"a": -9223372036854775807}'), 'a', 'LowCardinality(FixedString(20))');
|
||||
|
@ -72,7 +72,7 @@ FROM (
|
||||
LEFT JOIN (SELECT 2 :: UInt32 as a) t2
|
||||
USING (a)
|
||||
) ORDER BY 1;
|
||||
1 Int32
|
||||
1 Int64
|
||||
SELECT *, * APPLY toTypeName
|
||||
FROM (
|
||||
SELECT t2.*
|
||||
@ -80,7 +80,7 @@ FROM (
|
||||
LEFT JOIN (SELECT 2 :: UInt32 as a) t2
|
||||
USING (a)
|
||||
) ORDER BY 1;
|
||||
\N Nullable(UInt32)
|
||||
\N Nullable(Int64)
|
||||
SELECT *, * APPLY toTypeName
|
||||
FROM (
|
||||
SELECT *
|
||||
@ -209,7 +209,7 @@ FROM (
|
||||
RIGHT JOIN (SELECT 2 :: UInt32 as a) t2
|
||||
USING (a)
|
||||
) ORDER BY 1;
|
||||
2 Nullable(Int32)
|
||||
2 Nullable(Int64)
|
||||
SELECT *, * APPLY toTypeName
|
||||
FROM (
|
||||
SELECT t2.*
|
||||
@ -217,7 +217,7 @@ FROM (
|
||||
RIGHT JOIN (SELECT 2 :: UInt32 as a) t2
|
||||
USING (a)
|
||||
) ORDER BY 1;
|
||||
2 UInt32
|
||||
2 Int64
|
||||
SELECT *, * APPLY toTypeName
|
||||
FROM (
|
||||
SELECT *
|
||||
@ -354,8 +354,8 @@ FROM (
|
||||
FULL JOIN (SELECT 2 :: UInt32 as a) t2
|
||||
USING (a)
|
||||
) ORDER BY 1;
|
||||
1 Nullable(Int32)
|
||||
2 Nullable(Int32)
|
||||
1 Nullable(Int64)
|
||||
2 Nullable(Int64)
|
||||
SELECT *, * APPLY toTypeName
|
||||
FROM (
|
||||
SELECT t2.*
|
||||
@ -363,8 +363,8 @@ FROM (
|
||||
FULL JOIN (SELECT 2 :: UInt32 as a) t2
|
||||
USING (a)
|
||||
) ORDER BY 1;
|
||||
2 Nullable(UInt32)
|
||||
\N Nullable(UInt32)
|
||||
2 Nullable(Int64)
|
||||
\N Nullable(Int64)
|
||||
SELECT *, * APPLY toTypeName
|
||||
FROM (
|
||||
SELECT *
|
||||
|
@ -12,8 +12,10 @@ OK
|
||||
2
|
||||
2
|
||||
OK
|
||||
1
|
||||
===== MaterializedView =====
|
||||
OK
|
||||
1
|
||||
0
|
||||
0
|
||||
OK
|
||||
|
@ -1,18 +1,17 @@
|
||||
#!/usr/bin/env bash
|
||||
# Tags: no-replicated-database
|
||||
|
||||
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
# shellcheck source=../shell_config.sh
|
||||
. "$CURDIR"/../shell_config.sh
|
||||
|
||||
|
||||
user1="user02884_1_$RANDOM$RANDOM"
|
||||
user2="user02884_2_$RANDOM$RANDOM"
|
||||
user3="user02884_3_$RANDOM$RANDOM"
|
||||
db="db02884_$RANDOM$RANDOM"
|
||||
user1="user02884_1_${CLICKHOUSE_DATABASE}_$RANDOM"
|
||||
user2="user02884_2_${CLICKHOUSE_DATABASE}_$RANDOM"
|
||||
user3="user02884_3_${CLICKHOUSE_DATABASE}_$RANDOM"
|
||||
db=${CLICKHOUSE_DATABASE}
|
||||
|
||||
${CLICKHOUSE_CLIENT} --multiquery <<EOF
|
||||
DROP DATABASE IF EXISTS $db;
|
||||
CREATE DATABASE $db;
|
||||
CREATE TABLE $db.test_table (s String) ENGINE = MergeTree ORDER BY s;
|
||||
|
||||
DROP USER IF EXISTS $user1, $user2, $user3;
|
||||
@ -92,6 +91,7 @@ ${CLICKHOUSE_CLIENT} --user $user2 --query "SELECT count() FROM $db.test_view_10
|
||||
|
||||
${CLICKHOUSE_CLIENT} --query "ALTER TABLE $db.test_view_10 MODIFY SQL SECURITY INVOKER"
|
||||
(( $(${CLICKHOUSE_CLIENT} --user $user2 --query "SELECT * FROM $db.test_view_10" 2>&1 | grep -c "Not enough privileges") >= 1 )) && echo "OK" || echo "UNEXPECTED"
|
||||
${CLICKHOUSE_CLIENT} --query "SHOW CREATE TABLE $db.test_view_10" | grep -c "SQL SECURITY INVOKER"
|
||||
|
||||
|
||||
echo "===== MaterializedView ====="
|
||||
@ -136,6 +136,7 @@ ${CLICKHOUSE_CLIENT} --query "GRANT SELECT ON $db.test_mv_5 TO $user2"
|
||||
|
||||
${CLICKHOUSE_CLIENT} --query "ALTER TABLE $db.test_mv_5 MODIFY SQL SECURITY NONE"
|
||||
${CLICKHOUSE_CLIENT} --user $user2 --query "SELECT * FROM $db.test_mv_5"
|
||||
${CLICKHOUSE_CLIENT} --query "SHOW CREATE TABLE $db.test_mv_5" | grep -c "SQL SECURITY NONE"
|
||||
|
||||
${CLICKHOUSE_CLIENT} --query "GRANT SELECT ON $db.test_mv_1 TO $user2"
|
||||
${CLICKHOUSE_CLIENT} --query "GRANT SELECT ON $db.test_mv_3 TO $user2"
|
||||
@ -221,6 +222,4 @@ EOF
|
||||
|
||||
${CLICKHOUSE_CLIENT} --user $user2 --query "SELECT * FROM $db.test_mv_row_2"
|
||||
|
||||
|
||||
${CLICKHOUSE_CLIENT} --query "DROP DATABASE IF EXISTS $db;"
|
||||
${CLICKHOUSE_CLIENT} --query "DROP USER IF EXISTS $user1, $user2, $user3";
|
||||
|
@ -0,0 +1,4 @@
|
||||
all_0_1_1 1
|
||||
all_2_3_1 1
|
||||
0
|
||||
40 1580
|
@ -0,0 +1,47 @@
|
||||
-- Tags: no-parallel
|
||||
|
||||
create database if not exists shard_0;
|
||||
create database if not exists shard_1;
|
||||
|
||||
drop table if exists shard_0.from_1;
|
||||
drop table if exists shard_1.from_1;
|
||||
drop table if exists shard_0.to;
|
||||
drop table if exists shard_1.to;
|
||||
|
||||
create table shard_0.from_1 (x UInt32) engine = ReplicatedMergeTree('/clickhouse/tables/from_1_' || currentDatabase(), '0') order by x settings old_parts_lifetime=1, max_cleanup_delay_period=1, cleanup_delay_period=1;
|
||||
create table shard_1.from_1 (x UInt32) engine = ReplicatedMergeTree('/clickhouse/tables/from_1_' || currentDatabase(), '1') order by x settings old_parts_lifetime=1, max_cleanup_delay_period=1, cleanup_delay_period=1;
|
||||
|
||||
system stop merges shard_0.from_1;
|
||||
system stop merges shard_1.from_1;
|
||||
insert into shard_0.from_1 select number + 20 from numbers(10);
|
||||
insert into shard_0.from_1 select number + 30 from numbers(10);
|
||||
|
||||
insert into shard_0.from_1 select number + 40 from numbers(10);
|
||||
insert into shard_0.from_1 select number + 50 from numbers(10);
|
||||
|
||||
system sync replica shard_1.from_1;
|
||||
|
||||
create table shard_0.to (x UInt32) engine = ReplicatedMergeTree('/clickhouse/tables/to_' || currentDatabase(), '0') order by x settings old_parts_lifetime=1, max_cleanup_delay_period=1, cleanup_delay_period=1, max_parts_to_merge_at_once=2, shared_merge_tree_disable_merges_and_mutations_assignment=1;
|
||||
|
||||
create table shard_1.to (x UInt32) engine = ReplicatedMergeTree('/clickhouse/tables/to_' || currentDatabase(), '1') order by x settings old_parts_lifetime=1, max_cleanup_delay_period=1, cleanup_delay_period=1, max_parts_to_merge_at_once=2;
|
||||
|
||||
detach table shard_1.to;
|
||||
|
||||
alter table shard_0.from_1 on cluster test_cluster_two_shards_different_databases move partition tuple() to table shard_0.to format Null settings distributed_ddl_output_mode='never_throw', distributed_ddl_task_timeout = 1;
|
||||
|
||||
drop table if exists shard_0.from_1;
|
||||
drop table if exists shard_1.from_1;
|
||||
OPTIMIZE TABLE shard_0.to;
|
||||
OPTIMIZE TABLE shard_0.to;
|
||||
select name, active from system.parts where database='shard_0' and table='to' and active order by name;
|
||||
|
||||
system restart replica shard_0.to;
|
||||
|
||||
select sleep(3);
|
||||
|
||||
attach table shard_1.to;
|
||||
system sync replica shard_1.to;
|
||||
select count(), sum(x) from shard_1.to;
|
||||
|
||||
drop table if exists shard_0.to;
|
||||
drop table if exists shard_1.to;
|
@ -1,2 +0,0 @@
|
||||
1
|
||||
1
|
@ -0,0 +1,92 @@
|
||||
{% for column_expression_type in ['ALIAS', 'MATERIALIZED'] -%}
|
||||
{{ column_expression_type }}
|
||||
1
|
||||
1
|
||||
369 124 123 b
|
||||
369 124 123 b
|
||||
124
|
||||
3693 1231 a 1231
|
||||
3693 1232 1231 1231 a
|
||||
a
|
||||
-- { echoOn }
|
||||
-- USING alias column contains default in old analyzer (but both queries below should have the same result)
|
||||
SELECT y * 2, s || 'a' FROM t1 FULL JOIN t2 USING (y) ORDER BY ALL SETTINGS allow_experimental_analyzer = 1;
|
||||
738 ba
|
||||
7386 aa
|
||||
13332 a
|
||||
SELECT y * 2, s || 'a' FROM (SELECT s, y FROM t1) t1 FULL JOIN (SELECT y FROM t2) t2 USING (y) ORDER BY ALL;
|
||||
738 ba
|
||||
7386 aa
|
||||
13332 a
|
||||
SELECT (1, *) FROM t1 FULL JOIN t2 USING (y) ORDER BY ALL SETTINGS allow_experimental_analyzer = 1;
|
||||
(1,369,123,'b',124)
|
||||
(1,3693,1231,'a',0)
|
||||
(1,6666,0,'',48)
|
||||
SELECT (1, *) FROM (SELECT s, y FROM t1) t1 FULL JOIN (SELECT y FROM t2) t2 USING (y) ORDER BY ALL SETTINGS allow_experimental_analyzer = 1;
|
||||
(1,369,'b')
|
||||
(1,3693,'a')
|
||||
(1,6666,'')
|
||||
SELECT (1, t1.*) FROM t1 FULL JOIN t2 USING (y) ORDER BY ALL;
|
||||
(1,0,'')
|
||||
(1,123,'b')
|
||||
(1,1231,'a')
|
||||
SELECT (1, t1.*) FROM (SELECT s, y FROM t1) t1 FULL JOIN (SELECT y FROM t2) t2 USING (y) ORDER BY ALL;
|
||||
(1,'',6666)
|
||||
(1,'a',3693)
|
||||
(1,'b',369)
|
||||
SELECT (1, t1.*, t2.*) FROM t1 FULL JOIN t2 USING (y) ORDER BY ALL;
|
||||
(1,0,'',6666,48)
|
||||
(1,123,'b',369,124)
|
||||
(1,1231,'a',0,0)
|
||||
SELECT (1, t1.*, t2.*) FROM (SELECT s, y FROM t1) t1 FULL JOIN (SELECT y FROM t2) t2 USING (y) ORDER BY ALL;
|
||||
(1,'',0,6666)
|
||||
(1,'a',3693,0)
|
||||
(1,'b',369,369)
|
||||
SELECT t1.z, t2.z, t3.z FROM t1 FULL JOIN t2 USING (y) FULL JOIN t3 USING (y) ORDER BY 1,2,3 SETTINGS allow_experimental_analyzer = 1;
|
||||
0 0 43
|
||||
0 48 0
|
||||
124 124 0
|
||||
1232 0 1232
|
||||
SELECT * FROM t1 FULL JOIN t2 USING (y) FULL JOIN t3 USING (y) ORDER BY 1,2,3 SETTINGS allow_experimental_analyzer = 1;
|
||||
126 0 0 42
|
||||
369 123 b 124 0
|
||||
3693 1231 a 0 1231
|
||||
6666 0 48 0
|
||||
SELECT t1.*, t2.*, t3.* FROM t1 FULL JOIN t2 USING (y) FULL JOIN t3 USING (y) ORDER BY 1,2,3 SETTINGS allow_experimental_analyzer = 1;
|
||||
0 126 0 42
|
||||
0 6666 48 0
|
||||
123 b 369 124 0
|
||||
1231 a 3693 0 1231
|
||||
SELECT (1, t1.*, t2.*, t3.*) FROM t1 FULL JOIN t2 USING (y) FULL JOIN t3 USING (y) ORDER BY 1 SETTINGS allow_experimental_analyzer = 1;
|
||||
(1,0,'',126,0,42)
|
||||
(1,0,'',6666,48,0)
|
||||
(1,123,'b',369,124,0)
|
||||
(1,1231,'a',3693,0,1231)
|
||||
SELECT y FROM t1 FULL JOIN t2 USING (y) ORDER BY ALL SETTINGS allow_experimental_analyzer = 1;
|
||||
369
|
||||
3693
|
||||
6666
|
||||
SELECT y FROM (SELECT s, y FROM t1) t1 FULL JOIN (SELECT y FROM t2) t2 USING (y) ORDER BY ALL;
|
||||
369
|
||||
3693
|
||||
6666
|
||||
SELECT s FROM t1 FULL JOIN t2 USING (y) ORDER BY ALL;
|
||||
|
||||
a
|
||||
b
|
||||
SELECT s FROM (SELECT s, y FROM t1) t1 FULL JOIN (SELECT y FROM t2) t2 USING (y) ORDER BY ALL;
|
||||
|
||||
a
|
||||
b
|
||||
SELECT y FROM t1 FULL JOIN t2 USING (y) PREWHERE y * 2 > 2 ORDER BY ALL SETTINGS allow_experimental_analyzer = 1, join_use_nulls = 0;
|
||||
369
|
||||
3693
|
||||
6666
|
||||
SELECT y FROM t1 FULL JOIN t2 USING (y) PREWHERE y * 2 > 2 ORDER BY ALL SETTINGS allow_experimental_analyzer = 1, join_use_nulls = 1;
|
||||
369
|
||||
3693
|
||||
6666
|
||||
DROP TABLE IF EXISTS t1;
|
||||
DROP TABLE IF EXISTS t2;
|
||||
DROP TABLE IF EXISTS t3;
|
||||
{% endfor -%}
|
@ -1,12 +0,0 @@
|
||||
CREATE TABLE t1 (x Int16, y ALIAS x + x * 2) ENGINE=MergeTree() ORDER BY x;
|
||||
CREATE TABLE t2 (y Int16, z Int16) ENGINE=MergeTree() ORDER BY y;
|
||||
|
||||
INSERT INTO t1 VALUES (1231), (123);
|
||||
INSERT INTO t2 VALUES (6666, 48);
|
||||
INSERT INTO t2 VALUES (369, 50);
|
||||
|
||||
SELECT count() FROM t1 INNER JOIN t2 USING (y);
|
||||
SELECT count() FROM t2 INNER JOIN t1 USING (y);
|
||||
|
||||
DROP TABLE IF EXISTS t1;
|
||||
DROP TABLE IF EXISTS t2;
|
@ -0,0 +1,67 @@
|
||||
DROP TABLE IF EXISTS t1;
|
||||
DROP TABLE IF EXISTS t2;
|
||||
DROP TABLE IF EXISTS t3;
|
||||
|
||||
{% for column_expression_type in ['ALIAS', 'MATERIALIZED'] %}
|
||||
|
||||
-- { echoOff }
|
||||
|
||||
SELECT '{{ column_expression_type }}';
|
||||
|
||||
CREATE TABLE t1 (x Int16, y Int64 {{ column_expression_type }} x + x * 2, z {{ column_expression_type }} x + 1, s String) ENGINE=MergeTree() ORDER BY x;
|
||||
CREATE TABLE t2 (y Int128, z Int16) ENGINE=MergeTree() ORDER BY y;
|
||||
|
||||
CREATE TABLE t3 (x Int16, y Int64 {{ column_expression_type }} x + x * 2, z {{ column_expression_type }} x + 1) ENGINE=MergeTree() ORDER BY x;
|
||||
|
||||
INSERT INTO t1 VALUES (1231, 'a'), (123, 'b');
|
||||
|
||||
INSERT INTO t2 VALUES (6666, 48);
|
||||
INSERT INTO t2 VALUES (369, 124);
|
||||
|
||||
INSERT INTO t3 VALUES (1231), (42);
|
||||
|
||||
SELECT count() FROM t1 INNER JOIN t2 USING (y);
|
||||
SELECT count() FROM t2 INNER JOIN t1 USING (y);
|
||||
|
||||
-- `SELECT *` works differently for ALIAS columns with analyzer
|
||||
SELECT * FROM t1 INNER JOIN t2 USING (y, z) SETTINGS allow_experimental_analyzer = 1;
|
||||
SELECT * FROM t2 INNER JOIN t1 USING (y, z) SETTINGS allow_experimental_analyzer = 1;
|
||||
SELECT t2.z FROM t1 INNER JOIN t2 USING (y);
|
||||
|
||||
SELECT * FROM t1 INNER JOIN t3 USING (y) SETTINGS allow_experimental_analyzer = 1;
|
||||
SELECT * FROM t3 INNER JOIN t1 USING (y, z) SETTINGS allow_experimental_analyzer = 1;
|
||||
SELECT s FROM t1 INNER JOIN t3 USING (y);
|
||||
|
||||
-- { echoOn }
|
||||
-- USING alias column contains default in old analyzer (but both queries below should have the same result)
|
||||
SELECT y * 2, s || 'a' FROM t1 FULL JOIN t2 USING (y) ORDER BY ALL SETTINGS allow_experimental_analyzer = 1;
|
||||
SELECT y * 2, s || 'a' FROM (SELECT s, y FROM t1) t1 FULL JOIN (SELECT y FROM t2) t2 USING (y) ORDER BY ALL;
|
||||
|
||||
SELECT (1, *) FROM t1 FULL JOIN t2 USING (y) ORDER BY ALL SETTINGS allow_experimental_analyzer = 1;
|
||||
SELECT (1, *) FROM (SELECT s, y FROM t1) t1 FULL JOIN (SELECT y FROM t2) t2 USING (y) ORDER BY ALL SETTINGS allow_experimental_analyzer = 1;
|
||||
|
||||
SELECT (1, t1.*) FROM t1 FULL JOIN t2 USING (y) ORDER BY ALL;
|
||||
SELECT (1, t1.*) FROM (SELECT s, y FROM t1) t1 FULL JOIN (SELECT y FROM t2) t2 USING (y) ORDER BY ALL;
|
||||
|
||||
SELECT (1, t1.*, t2.*) FROM t1 FULL JOIN t2 USING (y) ORDER BY ALL;
|
||||
SELECT (1, t1.*, t2.*) FROM (SELECT s, y FROM t1) t1 FULL JOIN (SELECT y FROM t2) t2 USING (y) ORDER BY ALL;
|
||||
|
||||
SELECT t1.z, t2.z, t3.z FROM t1 FULL JOIN t2 USING (y) FULL JOIN t3 USING (y) ORDER BY 1,2,3 SETTINGS allow_experimental_analyzer = 1;
|
||||
SELECT * FROM t1 FULL JOIN t2 USING (y) FULL JOIN t3 USING (y) ORDER BY 1,2,3 SETTINGS allow_experimental_analyzer = 1;
|
||||
SELECT t1.*, t2.*, t3.* FROM t1 FULL JOIN t2 USING (y) FULL JOIN t3 USING (y) ORDER BY 1,2,3 SETTINGS allow_experimental_analyzer = 1;
|
||||
SELECT (1, t1.*, t2.*, t3.*) FROM t1 FULL JOIN t2 USING (y) FULL JOIN t3 USING (y) ORDER BY 1 SETTINGS allow_experimental_analyzer = 1;
|
||||
|
||||
SELECT y FROM t1 FULL JOIN t2 USING (y) ORDER BY ALL SETTINGS allow_experimental_analyzer = 1;
|
||||
SELECT y FROM (SELECT s, y FROM t1) t1 FULL JOIN (SELECT y FROM t2) t2 USING (y) ORDER BY ALL;
|
||||
|
||||
SELECT s FROM t1 FULL JOIN t2 USING (y) ORDER BY ALL;
|
||||
SELECT s FROM (SELECT s, y FROM t1) t1 FULL JOIN (SELECT y FROM t2) t2 USING (y) ORDER BY ALL;
|
||||
|
||||
SELECT y FROM t1 FULL JOIN t2 USING (y) PREWHERE y * 2 > 2 ORDER BY ALL SETTINGS allow_experimental_analyzer = 1, join_use_nulls = 0;
|
||||
SELECT y FROM t1 FULL JOIN t2 USING (y) PREWHERE y * 2 > 2 ORDER BY ALL SETTINGS allow_experimental_analyzer = 1, join_use_nulls = 1;
|
||||
|
||||
DROP TABLE IF EXISTS t1;
|
||||
DROP TABLE IF EXISTS t2;
|
||||
DROP TABLE IF EXISTS t3;
|
||||
|
||||
{% endfor %}
|
@ -0,0 +1,14 @@
|
||||
--
|
||||
3
|
||||
--
|
||||
3
|
||||
--
|
||||
0
|
||||
--
|
||||
\N \N
|
||||
--
|
||||
a
|
||||
a a
|
||||
--
|
||||
a a
|
||||
\N \N
|
@ -0,0 +1,81 @@
|
||||
SET join_use_nulls = 1;
|
||||
|
||||
SELECT '--';
|
||||
|
||||
select c FROM (
|
||||
select
|
||||
d2.c
|
||||
from ( select 1 as a, 2 as b ) d1
|
||||
FULL join ( select 1 as a, 3 as c ) d2
|
||||
on (d1.a = d2.a)
|
||||
)
|
||||
;
|
||||
|
||||
SELECT '--';
|
||||
|
||||
with d1 as (
|
||||
select
|
||||
1 as a,
|
||||
2 as b
|
||||
),
|
||||
d2 as (
|
||||
select
|
||||
1 as a,
|
||||
3 as c
|
||||
),
|
||||
joined as (
|
||||
select
|
||||
d1.*,
|
||||
d2.c
|
||||
from d1
|
||||
inner join d2
|
||||
on (d1.a = d2.a)
|
||||
)
|
||||
select c
|
||||
from joined;
|
||||
|
||||
SELECT '--';
|
||||
|
||||
WITH
|
||||
a AS ( SELECT 0 AS key, 'a' AS acol ),
|
||||
b AS ( SELECT 2 AS key )
|
||||
SELECT a.key
|
||||
FROM b
|
||||
LEFT JOIN a ON 1
|
||||
LEFT JOIN a AS a1 ON 1
|
||||
;
|
||||
|
||||
SELECT '--';
|
||||
|
||||
WITH
|
||||
a AS ( SELECT 0 AS key, 'a' AS acol ),
|
||||
b AS ( SELECT 2 AS key )
|
||||
SELECT a.acol, a1.acol
|
||||
FROM b
|
||||
LEFT JOIN a ON a.key = b.key
|
||||
LEFT JOIN a AS a1 ON a1.key = a.key
|
||||
;
|
||||
SELECT '--';
|
||||
|
||||
WITH
|
||||
a AS ( SELECT 0 AS key, 'a' AS acol ),
|
||||
b AS ( SELECT 2 AS key )
|
||||
SELECT a.acol, a1.acol
|
||||
FROM b
|
||||
FULL JOIN a ON a.key = b.key
|
||||
FULL JOIN a AS a1 ON a1.key = a.key
|
||||
ORDER BY 1
|
||||
SETTINGS join_use_nulls = 0
|
||||
;
|
||||
|
||||
SELECT '--';
|
||||
|
||||
WITH
|
||||
a AS ( SELECT 0 AS key, 'a' AS acol ),
|
||||
b AS ( SELECT 2 AS key )
|
||||
SELECT a.acol, a1.acol
|
||||
FROM b
|
||||
FULL JOIN a ON a.key = b.key
|
||||
FULL JOIN a AS a1 ON a1.key = a.key
|
||||
ORDER BY 1
|
||||
;
|
@ -0,0 +1,22 @@
|
||||
-- {echoOn}
|
||||
DROP TABLE IF EXISTS test;
|
||||
CREATE TABLE test (a Int) ENGINE = MergeTree ORDER BY tuple();
|
||||
INSERT INTO test VALUES (1), (2), (3);
|
||||
OPTIMIZE TABLE test FINAL;
|
||||
SELECT part_name FROM system.parts where table='test' and active and database = currentDatabase();
|
||||
all_1_1_1
|
||||
ALTER TABLE test DETACH PART 'all_1_1_1';
|
||||
ALTER TABLE test ATTACH PART 'all_1_1_1';
|
||||
SELECT part_name FROM system.parts where table='test' and active and database = currentDatabase();
|
||||
all_2_2_0
|
||||
-- Same as above, but with attach partition (different code path, should be tested as well)
|
||||
DROP TABLE IF EXISTS test;
|
||||
CREATE TABLE test (a Int) ENGINE = MergeTree ORDER BY tuple();
|
||||
INSERT INTO test VALUES (1), (2), (3);
|
||||
OPTIMIZE TABLE test FINAL;
|
||||
SELECT part_name FROM system.parts where table='test' and active and database = currentDatabase();
|
||||
all_1_1_1
|
||||
ALTER TABLE test DETACH PART 'all_1_1_1';
|
||||
ALTER TABLE test ATTACH PARTITION tuple();
|
||||
SELECT part_name FROM system.parts where table='test' and active and database = currentDatabase();
|
||||
all_2_2_0
|
@ -0,0 +1,21 @@
|
||||
-- Tags: no-shared-merge-tree
|
||||
SET alter_sync = 2;
|
||||
-- {echoOn}
|
||||
DROP TABLE IF EXISTS test;
|
||||
CREATE TABLE test (a Int) ENGINE = MergeTree ORDER BY tuple();
|
||||
INSERT INTO test VALUES (1), (2), (3);
|
||||
OPTIMIZE TABLE test FINAL;
|
||||
SELECT part_name FROM system.parts where table='test' and active and database = currentDatabase();
|
||||
ALTER TABLE test DETACH PART 'all_1_1_1';
|
||||
ALTER TABLE test ATTACH PART 'all_1_1_1';
|
||||
SELECT part_name FROM system.parts where table='test' and active and database = currentDatabase();
|
||||
|
||||
-- Same as above, but with attach partition (different code path, should be tested as well)
|
||||
DROP TABLE IF EXISTS test;
|
||||
CREATE TABLE test (a Int) ENGINE = MergeTree ORDER BY tuple();
|
||||
INSERT INTO test VALUES (1), (2), (3);
|
||||
OPTIMIZE TABLE test FINAL;
|
||||
SELECT part_name FROM system.parts where table='test' and active and database = currentDatabase();
|
||||
ALTER TABLE test DETACH PART 'all_1_1_1';
|
||||
ALTER TABLE test ATTACH PARTITION tuple();
|
||||
SELECT part_name FROM system.parts where table='test' and active and database = currentDatabase();
|
@ -0,0 +1,6 @@
|
||||
[] 0 ['2']
|
||||
['0'] 2 ['0']
|
||||
['0'] 2 ['0']
|
||||
['1'] 1 []
|
||||
|
||||
[] 3 []
|
@ -0,0 +1,14 @@
|
||||
SELECT *
|
||||
FROM (
|
||||
SELECT
|
||||
([toString(number % 2)] :: Array(LowCardinality(String))) AS item_id,
|
||||
count()
|
||||
FROM numbers(3)
|
||||
GROUP BY item_id WITH TOTALS
|
||||
) AS l FULL JOIN (
|
||||
SELECT
|
||||
([toString((number % 2) * 2)] :: Array(String)) AS item_id
|
||||
FROM numbers(3)
|
||||
) AS r
|
||||
ON l.item_id = r.item_id
|
||||
ORDER BY 1,2,3;
|
@ -1,5 +1,6 @@
|
||||
v24.2.2.71-stable 2024-03-15
|
||||
v24.2.1.2248-stable 2024-02-29
|
||||
v24.1.8.22-stable 2024-03-26
|
||||
v24.1.7.18-stable 2024-03-15
|
||||
v24.1.6.52-stable 2024-03-07
|
||||
v24.1.5.6-stable 2024-02-14
|
||||
@ -7,6 +8,7 @@ v24.1.4.20-stable 2024-02-14
|
||||
v24.1.3.31-stable 2024-02-09
|
||||
v24.1.2.5-stable 2024-02-02
|
||||
v24.1.1.2048-stable 2024-01-30
|
||||
v23.12.6.19-stable 2024-03-26
|
||||
v23.12.5.81-stable 2024-03-15
|
||||
v23.12.4.15-stable 2024-02-09
|
||||
v23.12.3.40-stable 2024-02-02
|
||||
@ -29,6 +31,7 @@ v23.9.4.11-stable 2023-11-08
|
||||
v23.9.3.12-stable 2023-10-31
|
||||
v23.9.2.56-stable 2023-10-19
|
||||
v23.9.1.1854-stable 2023-09-29
|
||||
v23.8.12.13-lts 2024-03-26
|
||||
v23.8.11.28-lts 2024-03-15
|
||||
v23.8.10.43-lts 2024-03-05
|
||||
v23.8.9.54-lts 2024-01-05
|
||||
@ -60,6 +63,7 @@ v23.4.4.16-stable 2023-06-17
|
||||
v23.4.3.48-stable 2023-06-12
|
||||
v23.4.2.11-stable 2023-05-02
|
||||
v23.4.1.1943-stable 2023-04-27
|
||||
v23.3.22.3-lts 2024-03-26
|
||||
v23.3.21.26-lts 2024-03-15
|
||||
v23.3.20.27-lts 2024-03-05
|
||||
v23.3.19.32-lts 2024-01-05
|
||||
|
|
Loading…
Reference in New Issue
Block a user