diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index 2dddde9aa14..ff0adee1443 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -6,6 +6,7 @@ env: PYTHONUNBUFFERED: 1 on: # yamllint disable-line rule:truthy + merge_group: pull_request: types: - synchronize @@ -29,6 +30,7 @@ jobs: fetch-depth: 0 # to get version filter: tree:0 - name: Labels check + if: ${{ github.event_name != 'merge_group' }} run: | cd "$GITHUB_WORKSPACE/tests/ci" python3 run_check.py @@ -56,16 +58,9 @@ jobs: echo 'EOF' } >> "$GITHUB_OUTPUT" - name: Re-create GH statuses for skipped jobs if any + if: ${{ github.event_name != 'merge_group' }} run: | python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --infile ${{ runner.temp }}/ci_run_data.json --update-gh-statuses - - name: Style check early - # hack to run style check before the docker build job if possible (style-check image not changed) - if: contains(fromJson(steps.runconfig.outputs.CI_DATA).jobs_data.jobs_to_do, 'Style check early') - run: | - DOCKER_TAG=$(echo '${{ toJson(fromJson(steps.runconfig.outputs.CI_DATA).docker_data.images) }}' | tr -d '\n') - export DOCKER_TAG=$DOCKER_TAG - python3 ./tests/ci/style_check.py --no-push - python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --infile ${{ runner.temp }}/ci_run_data.json --post --job-name 'Style check' BuildDockers: needs: [RunConfig] if: ${{ !failure() && !cancelled() && toJson(fromJson(needs.RunConfig.outputs.data).docker_data.missing_multi) != '[]' }} diff --git a/CHANGELOG.md b/CHANGELOG.md index 9df678d4b9a..e576fb447c1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,10 +1,184 @@ ### Table of Contents +**[ClickHouse release v24.3 LTS, 2024-03-26](#243)**
**[ClickHouse release v24.2, 2024-02-29](#242)**
**[ClickHouse release v24.1, 2024-01-30](#241)**
**[Changelog for 2023](https://clickhouse.com/docs/en/whats-new/changelog/2023/)**
# 2024 Changelog +### ClickHouse release 24.3 LTS, 2024-03-26 + +#### Upgrade Notes +* The setting `allow_experimental_analyzer` is enabled by default and it switches the query analysis to a new implementation, which has better compatibility and feature completeness. The feature "analyzer" is considered beta instead of experimental. You can turn the old behavior by setting the `compatibility` to `24.2` or disabling the `allow_experimental_analyzer` setting. Watch the [video on YouTube](https://www.youtube.com/watch?v=zhrOYQpgvkk). +* ClickHouse allows arbitrary binary data in the String data type, which is typically UTF-8. Parquet/ORC/Arrow Strings only support UTF-8. That's why you can choose which Arrow's data type to use for the ClickHouse String data type - String or Binary. This is controlled by the settings, `output_format_parquet_string_as_string`, `output_format_orc_string_as_string`, `output_format_arrow_string_as_string`. While Binary would be more correct and compatible, using String by default will correspond to user expectations in most cases. Parquet/ORC/Arrow supports many compression methods, including lz4 and zstd. ClickHouse supports each and every compression method. Some inferior tools lack support for the faster `lz4` compression method, that's why we set `zstd` by default. This is controlled by the settings `output_format_parquet_compression_method`, `output_format_orc_compression_method`, and `output_format_arrow_compression_method`. We changed the default to `zstd` for Parquet and ORC, but not Arrow (it is emphasized for low-level usages). [#61817](https://github.com/ClickHouse/ClickHouse/pull/61817) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* In the new ClickHouse version, the functions `geoDistance`, `greatCircleDistance`, and `greatCircleAngle` will use 64-bit double precision floating point data type for internal calculations and return type if all the arguments are Float64. This closes [#58476](https://github.com/ClickHouse/ClickHouse/issues/58476). In previous versions, the function always used Float32. You can switch to the old behavior by setting `geo_distance_returns_float64_on_float64_arguments` to `false` or setting `compatibility` to `24.2` or earlier. [#61848](https://github.com/ClickHouse/ClickHouse/pull/61848) ([Alexey Milovidov](https://github.com/alexey-milovidov)). Co-authored with [Geet Patel](https://github.com/geetptl). +* The obsolete in-memory data parts have been deprecated since version 23.5 and have not been supported since version 23.10. Now the remaining code is removed. Continuation of [#55186](https://github.com/ClickHouse/ClickHouse/issues/55186) and [#45409](https://github.com/ClickHouse/ClickHouse/issues/45409). It is unlikely that you have used in-memory data parts because they were available only before version 23.5 and only when you enabled them manually by specifying the corresponding SETTINGS for a MergeTree table. To check if you have in-memory data parts, run the following query: `SELECT part_type, count() FROM system.parts GROUP BY part_type ORDER BY part_type`. To disable the usage of in-memory data parts, do `ALTER TABLE ... MODIFY SETTING min_bytes_for_compact_part = DEFAULT, min_rows_for_compact_part = DEFAULT`. Before upgrading from old ClickHouse releases, first check that you don't have in-memory data parts. If there are in-memory data parts, disable them first, then wait while there are no in-memory data parts and continue the upgrade. [#61127](https://github.com/ClickHouse/ClickHouse/pull/61127) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Changed the column name from `duration_ms` to `duration_microseconds` in the `system.zookeeper` table to reflect the reality that the duration is in the microsecond resolution. [#60774](https://github.com/ClickHouse/ClickHouse/pull/60774) ([Duc Canh Le](https://github.com/canhld94)). +* Reject incoming INSERT queries in case when query-level settings `async_insert` and `deduplicate_blocks_in_dependent_materialized_views` are enabled together. This behaviour is controlled by a setting `throw_if_deduplication_in_dependent_materialized_views_enabled_with_async_insert` and enabled by default. This is a continuation of https://github.com/ClickHouse/ClickHouse/pull/59699 needed to unblock https://github.com/ClickHouse/ClickHouse/pull/59915. [#60888](https://github.com/ClickHouse/ClickHouse/pull/60888) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Utility `clickhouse-copier` is moved to a separate repository on GitHub: https://github.com/ClickHouse/copier. It is no longer included in the bundle but is still available as a separate download. This closes: [#60734](https://github.com/ClickHouse/ClickHouse/issues/60734) This closes: [#60540](https://github.com/ClickHouse/ClickHouse/issues/60540) This closes: [#60250](https://github.com/ClickHouse/ClickHouse/issues/60250) This closes: [#52917](https://github.com/ClickHouse/ClickHouse/issues/52917) This closes: [#51140](https://github.com/ClickHouse/ClickHouse/issues/51140) This closes: [#47517](https://github.com/ClickHouse/ClickHouse/issues/47517) This closes: [#47189](https://github.com/ClickHouse/ClickHouse/issues/47189) This closes: [#46598](https://github.com/ClickHouse/ClickHouse/issues/46598) This closes: [#40257](https://github.com/ClickHouse/ClickHouse/issues/40257) This closes: [#36504](https://github.com/ClickHouse/ClickHouse/issues/36504) This closes: [#35485](https://github.com/ClickHouse/ClickHouse/issues/35485) This closes: [#33702](https://github.com/ClickHouse/ClickHouse/issues/33702) This closes: [#26702](https://github.com/ClickHouse/ClickHouse/issues/26702). +* To increase compatibility with MySQL, the compatibility alias `locate` now accepts arguments `(needle, haystack[, start_pos])` by default. The previous behavior `(haystack, needle, [, start_pos])` can be restored by setting `function_locate_has_mysql_compatible_argument_order = 0`. [#61092](https://github.com/ClickHouse/ClickHouse/pull/61092) ([Robert Schulze](https://github.com/rschu1ze)). +* Forbid `SimpleAggregateFunction` in `ORDER BY` of `MergeTree` tables (like `AggregateFunction` is forbidden, but they are forbidden because they are not comparable) by default (use `allow_suspicious_primary_key` to allow them). [#61399](https://github.com/ClickHouse/ClickHouse/pull/61399) ([Azat Khuzhin](https://github.com/azat)). +* The `Ordinary` database engine is deprecated. You will receive a warning in clickhouse-client if your server is using it. This closes [#52229](https://github.com/ClickHouse/ClickHouse/issues/52229). [#56942](https://github.com/ClickHouse/ClickHouse/pull/56942) ([shabroo](https://github.com/shabroo)). + +#### New Feature +* Support reading and writing backups as `tar` (in addition to `zip`). [#59535](https://github.com/ClickHouse/ClickHouse/pull/59535) ([josh-hildred](https://github.com/josh-hildred)). +* Implemented support for S3 Express buckets. [#59965](https://github.com/ClickHouse/ClickHouse/pull/59965) ([Nikita Taranov](https://github.com/nickitat)). +* Allow to attach parts from a different disk (using copy instead of hard link). [#60112](https://github.com/ClickHouse/ClickHouse/pull/60112) ([Unalian](https://github.com/Unalian)). +* Size-capped `Memory` tables: controlled by their settings, `min_bytes_to_keep, max_bytes_to_keep, min_rows_to_keep` and `max_rows_to_keep`. [#60612](https://github.com/ClickHouse/ClickHouse/pull/60612) ([Jake Bamrah](https://github.com/JakeBamrah)). +* Separate limits on number of waiting and executing queries. Added new server setting `max_waiting_queries` that limits the number of queries waiting due to `async_load_databases`. Existing limits on number of executing queries no longer count waiting queries. [#61053](https://github.com/ClickHouse/ClickHouse/pull/61053) ([Sergei Trifonov](https://github.com/serxa)). +* Added a table `system.keywords` which contains all the keywords from parser. Mostly needed and will be used for better fuzzing and syntax highlighting. [#51808](https://github.com/ClickHouse/ClickHouse/pull/51808) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Add support for `ATTACH PARTITION ALL`. [#61107](https://github.com/ClickHouse/ClickHouse/pull/61107) ([Kirill Nikiforov](https://github.com/allmazz)). +* Add a new function, `getClientHTTPHeader`. This closes [#54665](https://github.com/ClickHouse/ClickHouse/issues/54665). Co-authored with @lingtaolf. [#61820](https://github.com/ClickHouse/ClickHouse/pull/61820) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add `generate_series` as a table function (compatibility alias for PostgreSQL to the existing `numbers` function). This function generates table with an arithmetic progression with natural numbers. [#59390](https://github.com/ClickHouse/ClickHouse/pull/59390) ([divanik](https://github.com/divanik)). +* A mode for `topK`/`topkWeighed` support mode, which return count of values and its error. [#54508](https://github.com/ClickHouse/ClickHouse/pull/54508) ([UnamedRus](https://github.com/UnamedRus)). +* Added function `toMillisecond` which returns the millisecond component for values of type`DateTime` or `DateTime64`. [#60281](https://github.com/ClickHouse/ClickHouse/pull/60281) ([Shaun Struwig](https://github.com/Blargian)). +* Allow configuring HTTP redirect handlers for clickhouse-server. For example, you can make `/` redirect to the Play UI. [#60390](https://github.com/ClickHouse/ClickHouse/pull/60390) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### Performance Improvement +* Optimized function `dotProduct` to omit unnecessary and expensive memory copies. [#60928](https://github.com/ClickHouse/ClickHouse/pull/60928) ([Robert Schulze](https://github.com/rschu1ze)). +* 30x faster printing for 256-bit integers. [#61100](https://github.com/ClickHouse/ClickHouse/pull/61100) ([Raúl Marín](https://github.com/Algunenano)). +* If the table's primary key contains mostly useless columns, don't keep them in memory. This is controlled by a new setting `primary_key_ratio_of_unique_prefix_values_to_skip_suffix_columns` with the value `0.9` by default, which means: for a composite primary key, if a column changes its value for at least 0.9 of all the times, the next columns after it will be not loaded. [#60255](https://github.com/ClickHouse/ClickHouse/pull/60255) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Improve the performance of serialized aggregation method when involving multiple `Nullable` columns. [#55809](https://github.com/ClickHouse/ClickHouse/pull/55809) ([Amos Bird](https://github.com/amosbird)). +* Lazy build JSON's output to improve performance of ALL JOIN. [#58278](https://github.com/ClickHouse/ClickHouse/pull/58278) ([LiuNeng](https://github.com/liuneng1994)). +* Make HTTP/HTTPs connections with external services, such as AWS S3 reusable for all uses cases. Even when response is 3xx or 4xx. [#58845](https://github.com/ClickHouse/ClickHouse/pull/58845) ([Sema Checherinda](https://github.com/CheSema)). +* Improvements to aggregate functions `argMin` / `argMax` / `any` / `anyLast` / `anyHeavy`, as well as `ORDER BY {u8/u16/u32/u64/i8/i16/u32/i64) LIMIT 1` queries. [#58640](https://github.com/ClickHouse/ClickHouse/pull/58640) ([Raúl Marín](https://github.com/Algunenano)). +* Trivial optimization for column's filter. Peak memory can be reduced to 44% of the original in some cases. [#59698](https://github.com/ClickHouse/ClickHouse/pull/59698) ([李扬](https://github.com/taiyang-li)). +* Execute `multiIf` function in a columnar fashion when the result type's underlying type is a number. [#60384](https://github.com/ClickHouse/ClickHouse/pull/60384) ([李扬](https://github.com/taiyang-li)). +* Faster (almost 2x) mutexes. [#60823](https://github.com/ClickHouse/ClickHouse/pull/60823) ([Azat Khuzhin](https://github.com/azat)). +* Drain multiple connections in parallel when a distributed query is finishing. [#60845](https://github.com/ClickHouse/ClickHouse/pull/60845) ([lizhuoyu5](https://github.com/lzydmxy)). +* Optimize data movement between columns of a Nullable number or a Nullable string, which improves some micro-benchmarks. [#60846](https://github.com/ClickHouse/ClickHouse/pull/60846) ([李扬](https://github.com/taiyang-li)). +* Operations with the filesystem cache will suffer less from the lock contention. [#61066](https://github.com/ClickHouse/ClickHouse/pull/61066) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Optimize array join and other JOINs by preventing a wrong compiler's optimization. Close [#61074](https://github.com/ClickHouse/ClickHouse/issues/61074). [#61075](https://github.com/ClickHouse/ClickHouse/pull/61075) ([李扬](https://github.com/taiyang-li)). +* If a query with a syntax error contained `COLUMNS` matcher with a regular expression, the regular expression was compiled each time during the parser's backtracking, instead of being compiled once. This was a fundamental error. The compiled regexp was put to AST. But the letter A in AST means "abstract" which means it should not contain heavyweight objects. Parts of AST can be created and discarded during parsing, including a large number of backtracking. This leads to slowness on the parsing side and consequently allows DoS by a readonly user. But the main problem is that it prevents progress in fuzzers. [#61543](https://github.com/ClickHouse/ClickHouse/pull/61543) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add a new analyzer pass to optimize the IN operator for a single value. [#61564](https://github.com/ClickHouse/ClickHouse/pull/61564) ([LiuNeng](https://github.com/liuneng1994)). +* DNSResolver shuffles set of resolved IPs which is needed to uniformly utilize multiple endpoints of AWS S3. [#60965](https://github.com/ClickHouse/ClickHouse/pull/60965) ([Sema Checherinda](https://github.com/CheSema)). + +#### Experimental Feature +* Support parallel reading for Azure blob storage. This improves the performance of the experimental Azure object storage. [#61503](https://github.com/ClickHouse/ClickHouse/pull/61503) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Add asynchronous WriteBuffer for Azure blob storage similar to S3. This improves the performance of the experimental Azure object storage. [#59929](https://github.com/ClickHouse/ClickHouse/pull/59929) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Use managed identity for backups IO when using Azure Blob Storage. Add a setting to prevent ClickHouse from attempting to create a non-existent container, which requires permissions at the storage account level. [#61785](https://github.com/ClickHouse/ClickHouse/pull/61785) ([Daniel Pozo Escalona](https://github.com/danipozo)). +* Add a setting `parallel_replicas_allow_in_with_subquery = 1` which allows subqueries for IN work with parallel replicas. [#60950](https://github.com/ClickHouse/ClickHouse/pull/60950) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* A change for the "zero-copy" replication: all zero copy locks related to a table have to be dropped when the table is dropped. The directory which contains these locks has to be removed also. [#57575](https://github.com/ClickHouse/ClickHouse/pull/57575) ([Sema Checherinda](https://github.com/CheSema)). + +#### Improvement +* Use `MergeTree` as a default table engine. [#60524](https://github.com/ClickHouse/ClickHouse/pull/60524) ([Alexey Milovidov](https://github.com/alexey-milovidov)) +* Enable `output_format_pretty_row_numbers` by default. It is better for usability. [#61791](https://github.com/ClickHouse/ClickHouse/pull/61791) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* In the previous version, some numbers in Pretty formats were not pretty enough. [#61794](https://github.com/ClickHouse/ClickHouse/pull/61794) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* A long value in Pretty formats won't be cut if it is the single value in the resultset, such as in the result of the `SHOW CREATE TABLE` query. [#61795](https://github.com/ClickHouse/ClickHouse/pull/61795) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Similarly to `clickhouse-local`, `clickhouse-client` will accept the `--output-format` option as a synonym to the `--format` option. This closes [#59848](https://github.com/ClickHouse/ClickHouse/issues/59848). [#61797](https://github.com/ClickHouse/ClickHouse/pull/61797) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* If stdout is a terminal and the output format is not specified, `clickhouse-client` and similar tools will use `PrettyCompact` by default, similarly to the interactive mode. `clickhouse-client` and `clickhouse-local` will handle command line arguments for input and output formats in a unified fashion. This closes [#61272](https://github.com/ClickHouse/ClickHouse/issues/61272). [#61800](https://github.com/ClickHouse/ClickHouse/pull/61800) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Underscore digit groups in Pretty formats for better readability. This is controlled by a new setting, `output_format_pretty_highlight_digit_groups`. [#61802](https://github.com/ClickHouse/ClickHouse/pull/61802) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add ability to override initial INSERT settings via `SYSTEM FLUSH DISTRIBUTED`. [#61832](https://github.com/ClickHouse/ClickHouse/pull/61832) ([Azat Khuzhin](https://github.com/azat)). +* Enable processors profiling (time spent/in and out bytes for sorting, aggregation, ...) by default. [#61096](https://github.com/ClickHouse/ClickHouse/pull/61096) ([Azat Khuzhin](https://github.com/azat)). +* Support files without format extension in Filesystem database. [#60795](https://github.com/ClickHouse/ClickHouse/pull/60795) ([Kruglov Pavel](https://github.com/Avogar)). +* Make all format names case insensitive, like Tsv, or TSV, or tsv, or even rowbinary. [#60420](https://github.com/ClickHouse/ClickHouse/pull/60420) ([豪肥肥](https://github.com/HowePa)). I appreciate if you will continue to write it correctly, e.g., `JSON` 😇, not `Json` 🤮, but we don't mind if you spell it as you prefer. +* Added `none_only_active` mode for `distributed_ddl_output_mode` setting. [#60340](https://github.com/ClickHouse/ClickHouse/pull/60340) ([Alexander Tokmakov](https://github.com/tavplubix)). +* The advanced dashboard has slightly better colors for multi-line graphs. [#60391](https://github.com/ClickHouse/ClickHouse/pull/60391) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* The Advanced dashboard now has controls always visible on scrolling. This allows you to add a new chart without scrolling up. [#60692](https://github.com/ClickHouse/ClickHouse/pull/60692) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* While running the `MODIFY COLUMN` query for materialized views, check the inner table's structure to ensure every column exists. [#47427](https://github.com/ClickHouse/ClickHouse/pull/47427) ([sunny](https://github.com/sunny19930321)). +* String types and Enums can be used in the same context, such as: arrays, UNION queries, conditional expressions. This closes [#60726](https://github.com/ClickHouse/ClickHouse/issues/60726). [#60727](https://github.com/ClickHouse/ClickHouse/pull/60727) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Allow declaring Enums in the structure of external data for query processing (this is an immediate temporary table that you can provide for your query). [#57857](https://github.com/ClickHouse/ClickHouse/pull/57857) ([Duc Canh Le](https://github.com/canhld94)). +* Consider lightweight deleted rows when selecting parts to merge, so the disk size of the resulting part will be estimated better. [#58223](https://github.com/ClickHouse/ClickHouse/pull/58223) ([Zhuo Qiu](https://github.com/jewelzqiu)). +* Added comments for columns for more system tables. Continuation of https://github.com/ClickHouse/ClickHouse/pull/58356. [#59016](https://github.com/ClickHouse/ClickHouse/pull/59016) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Now we can use virtual columns in PREWHERE. It's worthwhile for non-const virtual columns like `_part_offset`. [#59033](https://github.com/ClickHouse/ClickHouse/pull/59033) ([Amos Bird](https://github.com/amosbird)). Improved overall usability of virtual columns. Now it is allowed to use virtual columns in `PREWHERE` (it's worthwhile for non-const virtual columns like `_part_offset`). Now a builtin documentation is available for virtual columns as a comment of column in `DESCRIBE` query with enabled setting `describe_include_virtual_columns`. [#60205](https://github.com/ClickHouse/ClickHouse/pull/60205) ([Anton Popov](https://github.com/CurtizJ)). +* Instead of using a constant key, now object storage generates key for determining remove objects capability. [#59495](https://github.com/ClickHouse/ClickHouse/pull/59495) ([Sema Checherinda](https://github.com/CheSema)). +* Allow "local" as object storage type instead of "local_blob_storage". [#60165](https://github.com/ClickHouse/ClickHouse/pull/60165) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Parallel flush of pending INSERT blocks of Distributed engine on `DETACH`/server shutdown and `SYSTEM FLUSH DISTRIBUTED` (Parallelism will work only if you have multi-disk policy for a table (like everything in the Distributed engine right now)). [#60225](https://github.com/ClickHouse/ClickHouse/pull/60225) ([Azat Khuzhin](https://github.com/azat)). +* Add a setting to force read-through cache for merges. [#60308](https://github.com/ClickHouse/ClickHouse/pull/60308) ([Kseniia Sumarokova](https://github.com/kssenii)). +* An improvement for the MySQL compatibility protocol. The issue [#57598](https://github.com/ClickHouse/ClickHouse/issues/57598) mentions a variant behaviour regarding transaction handling. An issued COMMIT/ROLLBACK when no transaction is active is reported as an error contrary to MySQL behaviour. [#60338](https://github.com/ClickHouse/ClickHouse/pull/60338) ([PapaToemmsn](https://github.com/PapaToemmsn)). +* Function `substring` now has a new alias `byteSlice`. [#60494](https://github.com/ClickHouse/ClickHouse/pull/60494) ([Robert Schulze](https://github.com/rschu1ze)). +* Renamed server setting `dns_cache_max_size` to `dns_cache_max_entries` to reduce ambiguity. [#60500](https://github.com/ClickHouse/ClickHouse/pull/60500) ([Kirill Nikiforov](https://github.com/allmazz)). +* `SHOW INDEX | INDEXES | INDICES | KEYS` no longer sorts by the primary key columns (which was unintuitive). [#60514](https://github.com/ClickHouse/ClickHouse/pull/60514) ([Robert Schulze](https://github.com/rschu1ze)). +* Keeper improvement: abort during startup if an invalid snapshot is detected to avoid data loss. [#60537](https://github.com/ClickHouse/ClickHouse/pull/60537) ([Antonio Andelic](https://github.com/antonio2368)). +* Update tzdata to 2024a. [#60768](https://github.com/ClickHouse/ClickHouse/pull/60768) ([Raúl Marín](https://github.com/Algunenano)). +* Keeper improvement: support `leadership_expiry_ms` in Keeper's settings. [#60806](https://github.com/ClickHouse/ClickHouse/pull/60806) ([Brokenice0415](https://github.com/Brokenice0415)). +* Always infer exponential numbers in JSON formats regardless of the setting `input_format_try_infer_exponent_floats`. Add setting `input_format_json_use_string_type_for_ambiguous_paths_in_named_tuples_inference_from_objects` that allows to use String type for ambiguous paths instead of an exception during named Tuples inference from JSON objects. [#60808](https://github.com/ClickHouse/ClickHouse/pull/60808) ([Kruglov Pavel](https://github.com/Avogar)). +* Add support for `START TRANSACTION` syntax typically used in MySQL syntax, resolving https://github.com/ClickHouse/ClickHouse/discussions/60865. [#60886](https://github.com/ClickHouse/ClickHouse/pull/60886) ([Zach Naimon](https://github.com/ArctypeZach)). +* Add a flag for the full-sorting merge join algorithm to treat null as biggest/smallest. So the behavior can be compitable with other SQL systems, like Apache Spark. [#60896](https://github.com/ClickHouse/ClickHouse/pull/60896) ([loudongfeng](https://github.com/loudongfeng)). +* Support detect output format by file exctension in `clickhouse-client` and `clickhouse-local`. [#61036](https://github.com/ClickHouse/ClickHouse/pull/61036) ([豪肥肥](https://github.com/HowePa)). +* Update memory limit in runtime when Linux's CGroups value changed. [#61049](https://github.com/ClickHouse/ClickHouse/pull/61049) ([Han Fei](https://github.com/hanfei1991)). +* Add the function `toUInt128OrZero`, which was missed by mistake (the mistake is related to https://github.com/ClickHouse/ClickHouse/pull/945). The compatibility aliases `FROM_UNIXTIME` and `DATE_FORMAT` (they are not ClickHouse-native and only exist for MySQL compatibility) have been made case insensitive, as expected for SQL-compatibility aliases. [#61114](https://github.com/ClickHouse/ClickHouse/pull/61114) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Improvements for the access checks, allowing to revoke of unpossessed rights in case the target user doesn't have the revoking grants either. Example: `GRANT SELECT ON *.* TO user1; REVOKE SELECT ON system.* FROM user1;`. [#61115](https://github.com/ClickHouse/ClickHouse/pull/61115) ([pufit](https://github.com/pufit)). +* Fix `has()` function with `Nullable` column (fixes [#60214](https://github.com/ClickHouse/ClickHouse/issues/60214)). [#61249](https://github.com/ClickHouse/ClickHouse/pull/61249) ([Mikhail Koviazin](https://github.com/mkmkme)). +* Now it's possible to specify the attribute `merge="true"` in config substitutions for subtrees ``. In case this attribute specified, clickhouse will merge subtree with existing configuration, otherwise default behavior is append new content to configuration. [#61299](https://github.com/ClickHouse/ClickHouse/pull/61299) ([alesapin](https://github.com/alesapin)). +* Add async metrics for virtual memory mappings: `VMMaxMapCount` & `VMNumMaps`. Closes [#60662](https://github.com/ClickHouse/ClickHouse/issues/60662). [#61354](https://github.com/ClickHouse/ClickHouse/pull/61354) ([Tuan Pham Anh](https://github.com/tuanpavn)). +* Use `temporary_files_codec` setting in all places where we create temporary data, for example external memory sorting and external memory GROUP BY. Before it worked only in `partial_merge` JOIN algorithm. [#61456](https://github.com/ClickHouse/ClickHouse/pull/61456) ([Maksim Kita](https://github.com/kitaisreal)). +* Add a new setting `max_parser_backtracks` which allows to limit the complexity of query parsing. [#61502](https://github.com/ClickHouse/ClickHouse/pull/61502) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Less contention during dynamic resize of the filesystem cache. [#61524](https://github.com/ClickHouse/ClickHouse/pull/61524) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Disallow sharded mode of StorageS3 queue, because it will be rewritten. [#61537](https://github.com/ClickHouse/ClickHouse/pull/61537) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fixed typo: from `use_leagcy_max_level` to `use_legacy_max_level`. [#61545](https://github.com/ClickHouse/ClickHouse/pull/61545) ([William Schoeffel](https://github.com/wiledusc)). +* Remove some duplicate entries in `system.blob_storage_log`. [#61622](https://github.com/ClickHouse/ClickHouse/pull/61622) ([YenchangChan](https://github.com/YenchangChan)). +* Added `current_user` function as a compatibility alias for MySQL. [#61770](https://github.com/ClickHouse/ClickHouse/pull/61770) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Fix inconsistent floating point aggregate function states in mixed x86-64 / ARM clusters [#60610](https://github.com/ClickHouse/ClickHouse/pull/60610) ([Harry Lee](https://github.com/HarryLeeIBM)). + +#### Build/Testing/Packaging Improvement +* The real-time query profiler now works on AArch64. In previous versions, it worked only when a program didn't spend time inside a syscall. [#60807](https://github.com/ClickHouse/ClickHouse/pull/60807) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* ClickHouse version has been added to docker labels. Closes [#54224](https://github.com/ClickHouse/ClickHouse/issues/54224). [#60949](https://github.com/ClickHouse/ClickHouse/pull/60949) ([Nikolay Monkov](https://github.com/nikmonkov)). +* Upgrade `prqlc` to 0.11.3. [#60616](https://github.com/ClickHouse/ClickHouse/pull/60616) ([Maximilian Roos](https://github.com/max-sixty)). +* Add generic query text fuzzer in `clickhouse-local`. [#61508](https://github.com/ClickHouse/ClickHouse/pull/61508) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### Bug Fix (user-visible misbehavior in an official stable release) +* Fix finished_mutations_to_keep=0 for MergeTree (as docs says 0 is to keep everything) [#60031](https://github.com/ClickHouse/ClickHouse/pull/60031) ([Azat Khuzhin](https://github.com/azat)). +* Something was wrong with the FINAL optimization, here is how the author describes it: "PartsSplitter invalid ranges for the same part". [#60041](https://github.com/ClickHouse/ClickHouse/pull/60041) ([Maksim Kita](https://github.com/kitaisreal)). +* Something was wrong with Apache Hive, which is experimental and not supported. [#60262](https://github.com/ClickHouse/ClickHouse/pull/60262) ([shanfengp](https://github.com/Aed-p)). +* An improvement for experimental parallel replicas: force reanalysis if parallel replicas changed [#60362](https://github.com/ClickHouse/ClickHouse/pull/60362) ([Raúl Marín](https://github.com/Algunenano)). +* Fix usage of plain metadata type with new disks configuration option [#60396](https://github.com/ClickHouse/ClickHouse/pull/60396) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Don't allow to set max_parallel_replicas to 0 as it doesn't make sense [#60430](https://github.com/ClickHouse/ClickHouse/pull/60430) ([Kruglov Pavel](https://github.com/Avogar)). +* Try to fix logical error 'Cannot capture column because it has incompatible type' in mapContainsKeyLike [#60451](https://github.com/ClickHouse/ClickHouse/pull/60451) ([Kruglov Pavel](https://github.com/Avogar)). +* Avoid calculation of scalar subqueries for CREATE TABLE. [#60464](https://github.com/ClickHouse/ClickHouse/pull/60464) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix deadlock in parallel parsing when lots of rows are skipped due to errors [#60516](https://github.com/ClickHouse/ClickHouse/pull/60516) ([Kruglov Pavel](https://github.com/Avogar)). +* Something was wrong with experimental KQL (Kusto) support: fix `max_query_size_for_kql_compound_operator`: [#60534](https://github.com/ClickHouse/ClickHouse/pull/60534) ([Yong Wang](https://github.com/kashwy)). +* Keeper fix: add timeouts when waiting for commit logs [#60544](https://github.com/ClickHouse/ClickHouse/pull/60544) ([Antonio Andelic](https://github.com/antonio2368)). +* Don't output number tips for date types [#60577](https://github.com/ClickHouse/ClickHouse/pull/60577) ([Raúl Marín](https://github.com/Algunenano)). +* Fix reading from MergeTree with non-deterministic functions in filter [#60586](https://github.com/ClickHouse/ClickHouse/pull/60586) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix logical error on bad compatibility setting value type [#60596](https://github.com/ClickHouse/ClickHouse/pull/60596) ([Kruglov Pavel](https://github.com/Avogar)). +* fix(prql): Robust panic handler [#60615](https://github.com/ClickHouse/ClickHouse/pull/60615) ([Maximilian Roos](https://github.com/max-sixty)). +* Fix `intDiv` for decimal and date arguments [#60672](https://github.com/ClickHouse/ClickHouse/pull/60672) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Fix: expand CTE in alter modify query [#60682](https://github.com/ClickHouse/ClickHouse/pull/60682) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Fix system.parts for non-Atomic/Ordinary database engine (i.e. Memory) [#60689](https://github.com/ClickHouse/ClickHouse/pull/60689) ([Azat Khuzhin](https://github.com/azat)). +* Fix "Invalid storage definition in metadata file" for parameterized views [#60708](https://github.com/ClickHouse/ClickHouse/pull/60708) ([Azat Khuzhin](https://github.com/azat)). +* Fix buffer overflow in CompressionCodecMultiple [#60731](https://github.com/ClickHouse/ClickHouse/pull/60731) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove nonsense from SQL/JSON [#60738](https://github.com/ClickHouse/ClickHouse/pull/60738) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove wrong assertion in aggregate function quantileGK [#60740](https://github.com/ClickHouse/ClickHouse/pull/60740) ([李扬](https://github.com/taiyang-li)). +* Fix insert-select + insert_deduplication_token bug by setting streams to 1 [#60745](https://github.com/ClickHouse/ClickHouse/pull/60745) ([Jordi Villar](https://github.com/jrdi)). +* Prevent setting custom metadata headers on unsupported multipart upload operations [#60748](https://github.com/ClickHouse/ClickHouse/pull/60748) ([Francisco J. Jurado Moreno](https://github.com/Beetelbrox)). +* Fix toStartOfInterval [#60763](https://github.com/ClickHouse/ClickHouse/pull/60763) ([Andrey Zvonov](https://github.com/zvonand)). +* Fix crash in arrayEnumerateRanked [#60764](https://github.com/ClickHouse/ClickHouse/pull/60764) ([Raúl Marín](https://github.com/Algunenano)). +* Fix crash when using input() in INSERT SELECT JOIN [#60765](https://github.com/ClickHouse/ClickHouse/pull/60765) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix crash with different allow_experimental_analyzer value in subqueries [#60770](https://github.com/ClickHouse/ClickHouse/pull/60770) ([Dmitry Novik](https://github.com/novikd)). +* Remove recursion when reading from S3 [#60849](https://github.com/ClickHouse/ClickHouse/pull/60849) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix possible stuck on error in HashedDictionaryParallelLoader [#60926](https://github.com/ClickHouse/ClickHouse/pull/60926) ([vdimir](https://github.com/vdimir)). +* Fix async RESTORE with Replicated database (experimental feature) [#60934](https://github.com/ClickHouse/ClickHouse/pull/60934) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix deadlock in async inserts to `Log` tables via native protocol [#61055](https://github.com/ClickHouse/ClickHouse/pull/61055) ([Anton Popov](https://github.com/CurtizJ)). +* Fix lazy execution of default argument in dictGetOrDefault for RangeHashedDictionary [#61196](https://github.com/ClickHouse/ClickHouse/pull/61196) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix multiple bugs in groupArraySorted [#61203](https://github.com/ClickHouse/ClickHouse/pull/61203) ([Raúl Marín](https://github.com/Algunenano)). +* Fix Keeper reconfig for standalone binary [#61233](https://github.com/ClickHouse/ClickHouse/pull/61233) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix usage of session_token in S3 engine [#61234](https://github.com/ClickHouse/ClickHouse/pull/61234) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix possible incorrect result of aggregate function `uniqExact` [#61257](https://github.com/ClickHouse/ClickHouse/pull/61257) ([Anton Popov](https://github.com/CurtizJ)). +* Fix bugs in show database [#61269](https://github.com/ClickHouse/ClickHouse/pull/61269) ([Raúl Marín](https://github.com/Algunenano)). +* Fix logical error in RabbitMQ storage with MATERIALIZED columns [#61320](https://github.com/ClickHouse/ClickHouse/pull/61320) ([vdimir](https://github.com/vdimir)). +* Fix CREATE OR REPLACE DICTIONARY [#61356](https://github.com/ClickHouse/ClickHouse/pull/61356) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix ATTACH query with external ON CLUSTER [#61365](https://github.com/ClickHouse/ClickHouse/pull/61365) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fix consecutive keys optimization for nullable keys [#61393](https://github.com/ClickHouse/ClickHouse/pull/61393) ([Anton Popov](https://github.com/CurtizJ)). +* fix issue of actions dag split [#61458](https://github.com/ClickHouse/ClickHouse/pull/61458) ([Raúl Marín](https://github.com/Algunenano)). +* Fix finishing a failed RESTORE [#61466](https://github.com/ClickHouse/ClickHouse/pull/61466) ([Vitaly Baranov](https://github.com/vitlibar)). +* Disable async_insert_use_adaptive_busy_timeout correctly with compatibility settings [#61468](https://github.com/ClickHouse/ClickHouse/pull/61468) ([Raúl Marín](https://github.com/Algunenano)). +* Allow queuing in restore pool [#61475](https://github.com/ClickHouse/ClickHouse/pull/61475) ([Nikita Taranov](https://github.com/nickitat)). +* Fix an inconsistency when reading system.parts using UUID. [#61479](https://github.com/ClickHouse/ClickHouse/pull/61479) ([Dan Wu](https://github.com/wudanzy)). +* Fix ALTER QUERY MODIFY SQL SECURITY [#61480](https://github.com/ClickHouse/ClickHouse/pull/61480) ([pufit](https://github.com/pufit)). +* Fix a crash in window view (experimental feature) [#61526](https://github.com/ClickHouse/ClickHouse/pull/61526) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix `repeat` with non-native integers [#61527](https://github.com/ClickHouse/ClickHouse/pull/61527) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix client's `-s` argument [#61530](https://github.com/ClickHouse/ClickHouse/pull/61530) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix crash in arrayPartialReverseSort [#61539](https://github.com/ClickHouse/ClickHouse/pull/61539) ([Raúl Marín](https://github.com/Algunenano)). +* Fix string search with const position [#61547](https://github.com/ClickHouse/ClickHouse/pull/61547) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix addDays cause an error when used DateTime64 [#61561](https://github.com/ClickHouse/ClickHouse/pull/61561) ([Shuai li](https://github.com/loneylee)). +* Disallow LowCardinality input type for JSONExtract [#61617](https://github.com/ClickHouse/ClickHouse/pull/61617) ([Julia Kartseva](https://github.com/jkartseva)). +* Fix `system.part_log` for async insert with deduplication [#61620](https://github.com/ClickHouse/ClickHouse/pull/61620) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix a `Non-ready set` exception for system.parts. [#61666](https://github.com/ClickHouse/ClickHouse/pull/61666) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix actual_part_name for REPLACE_RANGE (`Entry actual part isn't empty yet`) [#61675](https://github.com/ClickHouse/ClickHouse/pull/61675) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix a sanitizer report in `multiSearchAllPositionsCaseInsensitiveUTF8` for incorrect UTF-8 [#61749](https://github.com/ClickHouse/ClickHouse/pull/61749) ([pufit](https://github.com/pufit)). +* Fix an observation that the RANGE frame is not supported for Nullable columns. [#61766](https://github.com/ClickHouse/ClickHouse/pull/61766) ([YuanLiu](https://github.com/ditgittube)). + ### ClickHouse release 24.2, 2024-02-29 #### Backward Incompatible Change diff --git a/README.md b/README.md index e00ce42a60b..2b97bd25d70 100644 --- a/README.md +++ b/README.md @@ -28,7 +28,6 @@ curl https://clickhouse.com/ | sh * [Slack](https://clickhouse.com/slack) and [Telegram](https://telegram.me/clickhouse_en) allow chatting with ClickHouse users in real-time. * [Blog](https://clickhouse.com/blog/) contains various ClickHouse-related articles, as well as announcements and reports about events. * [Code Browser (github.dev)](https://github.dev/ClickHouse/ClickHouse) with syntax highlighting, powered by github.dev. -* [Static Analysis (SonarCloud)](https://sonarcloud.io/project/issues?resolved=false&id=ClickHouse_ClickHouse) proposes C++ quality improvements. * [Contacts](https://clickhouse.com/company/contact) can help to get your questions answered if there are any. ## Monthly Release & Community Call diff --git a/docs/changelogs/v23.12.6.19-stable.md b/docs/changelogs/v23.12.6.19-stable.md new file mode 100644 index 00000000000..4659532d3de --- /dev/null +++ b/docs/changelogs/v23.12.6.19-stable.md @@ -0,0 +1,24 @@ +--- +sidebar_position: 1 +sidebar_label: 2024 +--- + +# 2024 Changelog + +### ClickHouse release v23.12.6.19-stable (40080a3c2a4) FIXME as compared to v23.12.5.81-stable (a0fbe3ae813) + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Improve isolation of query cache entries under re-created users or role switches [#58611](https://github.com/ClickHouse/ClickHouse/pull/58611) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix possible incorrect result of aggregate function `uniqExact` [#61257](https://github.com/ClickHouse/ClickHouse/pull/61257) ([Anton Popov](https://github.com/CurtizJ)). +* Fix consecutive keys optimization for nullable keys [#61393](https://github.com/ClickHouse/ClickHouse/pull/61393) ([Anton Popov](https://github.com/CurtizJ)). +* Fix string search with const position [#61547](https://github.com/ClickHouse/ClickHouse/pull/61547) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix crash in `multiSearchAllPositionsCaseInsensitiveUTF8` for incorrect UTF-8 [#61749](https://github.com/ClickHouse/ClickHouse/pull/61749) ([pufit](https://github.com/pufit)). + +#### CI Fix or Improvement (changelog entry is not required) + +* Backported in [#61429](https://github.com/ClickHouse/ClickHouse/issues/61429):. [#61374](https://github.com/ClickHouse/ClickHouse/pull/61374) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Backported in [#61486](https://github.com/ClickHouse/ClickHouse/issues/61486): ... [#61441](https://github.com/ClickHouse/ClickHouse/pull/61441) ([Max K.](https://github.com/maxknv)). +* Backported in [#61641](https://github.com/ClickHouse/ClickHouse/issues/61641):. [#61592](https://github.com/ClickHouse/ClickHouse/pull/61592) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Backported in [#61811](https://github.com/ClickHouse/ClickHouse/issues/61811): ![Screenshot_20240323_025055](https://github.com/ClickHouse/ClickHouse/assets/18581488/ccaab212-a1d3-4dfb-8d56-b1991760b6bf). [#61801](https://github.com/ClickHouse/ClickHouse/pull/61801) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + diff --git a/docs/changelogs/v23.3.22.3-lts.md b/docs/changelogs/v23.3.22.3-lts.md new file mode 100644 index 00000000000..2900480e12d --- /dev/null +++ b/docs/changelogs/v23.3.22.3-lts.md @@ -0,0 +1,13 @@ +--- +sidebar_position: 1 +sidebar_label: 2024 +--- + +# 2024 Changelog + +### ClickHouse release v23.3.22.3-lts (04075bf96a1) FIXME as compared to v23.3.21.26-lts (d9672a3731f) + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Fix crash in `multiSearchAllPositionsCaseInsensitiveUTF8` for incorrect UTF-8 [#61749](https://github.com/ClickHouse/ClickHouse/pull/61749) ([pufit](https://github.com/pufit)). + diff --git a/docs/changelogs/v23.8.12.13-lts.md b/docs/changelogs/v23.8.12.13-lts.md new file mode 100644 index 00000000000..dbb36fdc00e --- /dev/null +++ b/docs/changelogs/v23.8.12.13-lts.md @@ -0,0 +1,20 @@ +--- +sidebar_position: 1 +sidebar_label: 2024 +--- + +# 2024 Changelog + +### ClickHouse release v23.8.12.13-lts (bdbd0d87e5d) FIXME as compared to v23.8.11.28-lts (31879d2ab4c) + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Improve isolation of query cache entries under re-created users or role switches [#58611](https://github.com/ClickHouse/ClickHouse/pull/58611) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix string search with const position [#61547](https://github.com/ClickHouse/ClickHouse/pull/61547) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix crash in `multiSearchAllPositionsCaseInsensitiveUTF8` for incorrect UTF-8 [#61749](https://github.com/ClickHouse/ClickHouse/pull/61749) ([pufit](https://github.com/pufit)). + +#### CI Fix or Improvement (changelog entry is not required) + +* Backported in [#61428](https://github.com/ClickHouse/ClickHouse/issues/61428):. [#61374](https://github.com/ClickHouse/ClickHouse/pull/61374) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Backported in [#61484](https://github.com/ClickHouse/ClickHouse/issues/61484): ... [#61441](https://github.com/ClickHouse/ClickHouse/pull/61441) ([Max K.](https://github.com/maxknv)). + diff --git a/docs/changelogs/v24.1.8.22-stable.md b/docs/changelogs/v24.1.8.22-stable.md new file mode 100644 index 00000000000..f780de41c40 --- /dev/null +++ b/docs/changelogs/v24.1.8.22-stable.md @@ -0,0 +1,32 @@ +--- +sidebar_position: 1 +sidebar_label: 2024 +--- + +# 2024 Changelog + +### ClickHouse release v24.1.8.22-stable (7fb8f96d3da) FIXME as compared to v24.1.7.18-stable (90925babd78) + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Fix possible incorrect result of aggregate function `uniqExact` [#61257](https://github.com/ClickHouse/ClickHouse/pull/61257) ([Anton Popov](https://github.com/CurtizJ)). +* Fix consecutive keys optimization for nullable keys [#61393](https://github.com/ClickHouse/ClickHouse/pull/61393) ([Anton Popov](https://github.com/CurtizJ)). +* Fix bug when reading system.parts using UUID (issue 61220). [#61479](https://github.com/ClickHouse/ClickHouse/pull/61479) ([Dan Wu](https://github.com/wudanzy)). +* Fix client `-s` argument [#61530](https://github.com/ClickHouse/ClickHouse/pull/61530) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix string search with const position [#61547](https://github.com/ClickHouse/ClickHouse/pull/61547) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix crash in `multiSearchAllPositionsCaseInsensitiveUTF8` for incorrect UTF-8 [#61749](https://github.com/ClickHouse/ClickHouse/pull/61749) ([pufit](https://github.com/pufit)). + +#### CI Fix or Improvement (changelog entry is not required) + +* Backported in [#61431](https://github.com/ClickHouse/ClickHouse/issues/61431):. [#61374](https://github.com/ClickHouse/ClickHouse/pull/61374) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Backported in [#61488](https://github.com/ClickHouse/ClickHouse/issues/61488): ... [#61441](https://github.com/ClickHouse/ClickHouse/pull/61441) ([Max K.](https://github.com/maxknv)). +* Backported in [#61642](https://github.com/ClickHouse/ClickHouse/issues/61642):. [#61592](https://github.com/ClickHouse/ClickHouse/pull/61592) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + +#### NO CL ENTRY + +* NO CL ENTRY: 'Revert "Backport [#61479](https://github.com/ClickHouse/ClickHouse/issues/61479) to 24.1: Fix bug when reading system.parts using UUID (issue 61220)."'. [#61775](https://github.com/ClickHouse/ClickHouse/pull/61775) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Speed up cctools building [#61011](https://github.com/ClickHouse/ClickHouse/pull/61011) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + diff --git a/docs/en/development/build-osx.md b/docs/en/development/build-osx.md index 21b9446aa66..a6c49f1f476 100644 --- a/docs/en/development/build-osx.md +++ b/docs/en/development/build-osx.md @@ -55,9 +55,7 @@ To build using Homebrew's vanilla Clang compiler (the only **recommended** way): cd ClickHouse mkdir build export PATH=$(brew --prefix llvm)/bin:$PATH -export CC=$(brew --prefix llvm)/bin/clang -export CXX=$(brew --prefix llvm)/bin/clang++ -cmake -G Ninja -DCMAKE_BUILD_TYPE=RelWithDebInfo -S . -B build +cmake -G Ninja -DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_C_COMPILER=$(brew --prefix llvm)/bin/clang -DCMAKE_CXX_COMPILER=$(brew --prefix llvm)/bin/clang++ -S . -B build cmake --build build # The resulting binary will be created at: build/programs/clickhouse ``` diff --git a/docs/en/operations/settings/merge-tree-settings.md b/docs/en/operations/settings/merge-tree-settings.md index c7e461d15ae..3e411a51ff4 100644 --- a/docs/en/operations/settings/merge-tree-settings.md +++ b/docs/en/operations/settings/merge-tree-settings.md @@ -867,3 +867,31 @@ Default value: `Never` Persists virtual column `_block_number` on merges. Default value: false. + +## exclude_deleted_rows_for_part_size_in_merge {#exclude_deleted_rows_for_part_size_in_merge} + +If enabled, estimated actual size of data parts (i.e., excluding those rows that have been deleted through `DELETE FROM`) will be used when selecting parts to merge. Note that this behavior is only triggered for data parts affected by `DELETE FROM` executed after this setting is enabled. + +Possible values: + +- true, false + +Default value: false + +**See Also** + +- [load_existing_rows_count_for_old_parts](#load_existing_rows_count_for_old_parts) setting + +## load_existing_rows_count_for_old_parts {#load_existing_rows_count_for_old_parts} + +If enabled along with [exclude_deleted_rows_for_part_size_in_merge](#exclude_deleted_rows_for_part_size_in_merge), deleted rows count for existing data parts will be calculated during table starting up. Note that it may slow down start up table loading. + +Possible values: + +- true, false + +Default value: false + +**See Also** + +- [exclude_deleted_rows_for_part_size_in_merge](#exclude_deleted_rows_for_part_size_in_merge) setting diff --git a/docs/en/sql-reference/transactions.md b/docs/en/sql-reference/transactions.md index cb89a091d68..09cdc192b03 100644 --- a/docs/en/sql-reference/transactions.md +++ b/docs/en/sql-reference/transactions.md @@ -127,7 +127,7 @@ See the [deployment](docs/en/deployment-guides/terminology.md) documentation for #### Verify that experimental transactions are enabled -Issue a `BEGIN TRANSACTION` followed by a `ROLLBACK` to verify that experimental transactions are enabled, and that ClickHouse Keeper is enabled as it is used to track transactions. +Issue a `BEGIN TRANSACTION` or `START TRANSACTION` followed by a `ROLLBACK` to verify that experimental transactions are enabled, and that ClickHouse Keeper is enabled as it is used to track transactions. ```sql BEGIN TRANSACTION diff --git a/packages/clickhouse-server.postinstall b/packages/clickhouse-server.postinstall index 41d4405a790..d3b49db758f 100644 --- a/packages/clickhouse-server.postinstall +++ b/packages/clickhouse-server.postinstall @@ -36,7 +36,7 @@ if [ "$1" = configure ] || [ -n "$not_deb_os" ]; then fi /bin/systemctl daemon-reload - /bin/systemctl enable --now clickhouse-server + /bin/systemctl enable clickhouse-server else # If you downgrading to version older than 1.1.54336 run: systemctl disable clickhouse-server if [ -x "/etc/init.d/clickhouse-server" ]; then diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp index 6719504eac3..be783f207c2 100644 --- a/src/Analyzer/Passes/QueryAnalysisPass.cpp +++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp @@ -777,7 +777,13 @@ struct IdentifierResolveScope std::unordered_map table_expression_node_to_data; QueryTreeNodePtrWithHashSet nullable_group_by_keys; - QueryTreeNodePtrWithHashMap nullable_join_columns; + + /** It's possible that after a JOIN, a column in the projection has a type different from the column in the source table. + * (For example, after join_use_nulls or USING column casted to supertype) + * However, the column in the projection still refers to the table as its source. + * This map is used to revert these columns back to their original columns in the source table. + */ + QueryTreeNodePtrWithHashMap join_columns_with_changed_types; /// Use identifier lookup to result cache bool use_identifier_lookup_to_result_cache = true; @@ -1308,7 +1314,8 @@ private: if (!resolved_expression->getResultType()->equals(*new_result_type)) resolved_expression = buildCastFunction(resolved_expression, new_result_type, scope.context, true); } - scope.nullable_join_columns[nullable_resolved_identifier] = resolved_identifier; + if (!nullable_resolved_identifier->isEqual(*resolved_identifier)) + scope.join_columns_with_changed_types[nullable_resolved_identifier] = resolved_identifier; return nullable_resolved_identifier; } return nullptr; @@ -1401,6 +1408,8 @@ private: const NamesAndTypes & matched_columns, const IdentifierResolveScope & scope); + void updateMatchedColumnsFromJoinUsing(QueryTreeNodesWithNames & result_matched_column_nodes_with_names, const QueryTreeNodePtr & source_table_expression, IdentifierResolveScope & scope); + QueryTreeNodesWithNames resolveQualifiedMatcher(QueryTreeNodePtr & matcher_node, IdentifierResolveScope & scope); QueryTreeNodesWithNames resolveUnqualifiedMatcher(QueryTreeNodePtr & matcher_node, IdentifierResolveScope & scope); @@ -2168,10 +2177,13 @@ void QueryAnalyzer::evaluateScalarSubqueryIfNeeded(QueryTreeNodePtr & node, Iden !nearest_query_scope) { auto constant_value = std::make_shared(std::move(scalar_value), scalar_type); - auto constant_node = std::make_shared(std::move(constant_value), node); + auto constant_node = std::make_shared(constant_value, node); if (constant_node->getValue().isNull()) + { node = buildCastFunction(constant_node, constant_node->getResultType(), context); + node = std::make_shared(std::move(constant_value), node); + } else node = std::move(constant_node); @@ -3309,6 +3321,78 @@ QueryTreeNodePtr checkIsMissedObjectJSONSubcolumn(const QueryTreeNodePtr & left_ return {}; } +/// Used to replace columns that changed type because of JOIN to their original type +class ReplaceColumnsVisitor : public InDepthQueryTreeVisitor +{ +public: + explicit ReplaceColumnsVisitor(const QueryTreeNodePtrWithHashMap & replacement_map_, const ContextPtr & context_) + : replacement_map(replacement_map_) + , context(context_) + {} + + /// Apply replacement transitively, because column may change it's type twice, one to have a supertype and then because of `joun_use_nulls` + static QueryTreeNodePtr findTransitiveReplacement(QueryTreeNodePtr node, const QueryTreeNodePtrWithHashMap & replacement_map_) + { + auto it = replacement_map_.find(node); + QueryTreeNodePtr result_node = nullptr; + for (; it != replacement_map_.end(); it = replacement_map_.find(result_node)) + { + if (result_node && result_node->isEqual(*it->second)) + { + Strings map_dump; + for (const auto & [k, v]: replacement_map_) + map_dump.push_back(fmt::format("{} -> {} (is_equals: {}, is_same: {})", + k.node->dumpTree(), v->dumpTree(), k.node->isEqual(*v), k.node == v)); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Infinite loop in query tree replacement map: {}", fmt::join(map_dump, "; ")); + } + chassert(it->second); + + result_node = it->second; + } + return result_node; + } + + void visitImpl(QueryTreeNodePtr & node) + { + if (auto replacement_node = findTransitiveReplacement(node, replacement_map)) + node = replacement_node; + + if (auto * function_node = node->as(); function_node && function_node->isResolved()) + rerunFunctionResolve(function_node, context); + } + + /// We want to re-run resolve for function _after_ its arguments are replaced + bool shouldTraverseTopToBottom() const { return false; } + + bool needChildVisit(QueryTreeNodePtr & /* parent */, QueryTreeNodePtr & child) + { + /// Visit only expressions, but not subqueries + return child->getNodeType() == QueryTreeNodeType::IDENTIFIER + || child->getNodeType() == QueryTreeNodeType::LIST + || child->getNodeType() == QueryTreeNodeType::FUNCTION + || child->getNodeType() == QueryTreeNodeType::COLUMN; + } + +private: + const QueryTreeNodePtrWithHashMap & replacement_map; + const ContextPtr & context; +}; + +/// Compare resolved identifiers considering columns that become nullable after JOIN +bool resolvedIdenfiersFromJoinAreEquals( + const QueryTreeNodePtr & left_resolved_identifier, + const QueryTreeNodePtr & right_resolved_identifier, + const IdentifierResolveScope & scope) +{ + auto left_original_node = ReplaceColumnsVisitor::findTransitiveReplacement(left_resolved_identifier, scope.join_columns_with_changed_types); + const auto & left_resolved_to_compare = left_original_node ? left_original_node : left_resolved_identifier; + + auto right_original_node = ReplaceColumnsVisitor::findTransitiveReplacement(right_resolved_identifier, scope.join_columns_with_changed_types); + const auto & right_resolved_to_compare = right_original_node ? right_original_node : right_resolved_identifier; + + return left_resolved_to_compare->isEqual(*right_resolved_to_compare, IQueryTreeNode::CompareOptions{.compare_aliases = false}); +} + QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromJoin(const IdentifierLookup & identifier_lookup, const QueryTreeNodePtr & table_expression_node, IdentifierResolveScope & scope) @@ -3443,9 +3527,13 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromJoin(const IdentifierLoo auto & result_column = result_column_node->as(); result_column.setColumnType(using_column_node.getColumnType()); + const auto & join_using_left_column = using_expression_list.getNodes().at(0); + if (!result_column_node->isEqual(*join_using_left_column)) + scope.join_columns_with_changed_types[result_column_node] = join_using_left_column; + resolved_identifier = std::move(result_column_node); } - else if (left_resolved_identifier->isEqual(*right_resolved_identifier, IQueryTreeNode::CompareOptions{.compare_aliases = false})) + else if (resolvedIdenfiersFromJoinAreEquals(left_resolved_identifier, right_resolved_identifier, scope)) { const auto & identifier_path_part = identifier_lookup.identifier.front(); auto * left_resolved_identifier_column = left_resolved_identifier->as(); @@ -3521,6 +3609,9 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromJoin(const IdentifierLoo auto left_resolved_column_clone = std::static_pointer_cast(left_resolved_column.clone()); left_resolved_column_clone->setColumnType(using_column_node_it->second->getColumnType()); resolved_identifier = std::move(left_resolved_column_clone); + + if (!resolved_identifier->isEqual(*using_column_node_it->second)) + scope.join_columns_with_changed_types[resolved_identifier] = using_column_node_it->second; } } } @@ -3543,6 +3634,8 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromJoin(const IdentifierLoo auto right_resolved_column_clone = std::static_pointer_cast(right_resolved_column.clone()); right_resolved_column_clone->setColumnType(using_column_node_it->second->getColumnType()); resolved_identifier = std::move(right_resolved_column_clone); + if (!resolved_identifier->isEqual(*using_column_node_it->second)) + scope.join_columns_with_changed_types[resolved_identifier] = using_column_node_it->second; } } } @@ -3552,9 +3645,17 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromJoin(const IdentifierLoo if (scope.join_use_nulls) { + auto projection_name_it = node_to_projection_name.find(resolved_identifier); auto nullable_resolved_identifier = convertJoinedColumnTypeToNullIfNeeded(resolved_identifier, join_kind, resolved_side, scope); if (nullable_resolved_identifier) + { resolved_identifier = nullable_resolved_identifier; + /// Set the same projection name for new nullable node + if (projection_name_it != node_to_projection_name.end()) + { + node_to_projection_name.emplace(resolved_identifier, projection_name_it->second); + } + } } return resolved_identifier; @@ -4213,6 +4314,95 @@ QueryAnalyzer::QueryTreeNodesWithNames QueryAnalyzer::getMatchedColumnNodesWithN return matched_column_nodes_with_names; } +bool hasTableExpressionInJoinTree(const QueryTreeNodePtr & join_tree_node, const QueryTreeNodePtr & table_expression) +{ + QueryTreeNodes nodes_to_process; + nodes_to_process.push_back(join_tree_node); + + while (!nodes_to_process.empty()) + { + auto node_to_process = std::move(nodes_to_process.back()); + nodes_to_process.pop_back(); + if (node_to_process == table_expression) + return true; + + if (node_to_process->getNodeType() == QueryTreeNodeType::JOIN) + { + const auto & join_node = node_to_process->as(); + nodes_to_process.push_back(join_node.getLeftTableExpression()); + nodes_to_process.push_back(join_node.getRightTableExpression()); + } + } + return false; +} + +/// Columns that resolved from matcher can also match columns from JOIN USING. +/// In that case we update type to type of column in USING section. +/// TODO: It's not completely correct for qualified matchers, so t1.* should be resolved to left table column type. +/// But in planner we do not distinguish such cases. +void QueryAnalyzer::updateMatchedColumnsFromJoinUsing( + QueryTreeNodesWithNames & result_matched_column_nodes_with_names, + const QueryTreeNodePtr & source_table_expression, + IdentifierResolveScope & scope) +{ + auto * nearest_query_scope = scope.getNearestQueryScope(); + auto * nearest_query_scope_query_node = nearest_query_scope ? nearest_query_scope->scope_node->as() : nullptr; + + /// If there are no parent query scope or query scope does not have join tree + if (!nearest_query_scope_query_node || !nearest_query_scope_query_node->getJoinTree()) + { + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, + "There are no table sources. In scope {}", + scope.scope_node->formatASTForErrorMessage()); + } + + const auto & join_tree = nearest_query_scope_query_node->getJoinTree(); + + const auto * join_node = join_tree->as(); + if (join_node && join_node->isUsingJoinExpression()) + { + const auto & join_using_list = join_node->getJoinExpression()->as(); + const auto & join_using_nodes = join_using_list.getNodes(); + + for (auto & [matched_column_node, _] : result_matched_column_nodes_with_names) + { + auto & matched_column_node_typed = matched_column_node->as(); + const auto & matched_column_name = matched_column_node_typed.getColumnName(); + + for (const auto & join_using_node : join_using_nodes) + { + auto & join_using_column_node = join_using_node->as(); + const auto & join_using_column_name = join_using_column_node.getColumnName(); + + if (matched_column_name != join_using_column_name) + continue; + + const auto & join_using_column_nodes_list = join_using_column_node.getExpressionOrThrow()->as(); + const auto & join_using_column_nodes = join_using_column_nodes_list.getNodes(); + + auto it = node_to_projection_name.find(matched_column_node); + + if (hasTableExpressionInJoinTree(join_node->getLeftTableExpression(), source_table_expression)) + matched_column_node = join_using_column_nodes.at(0); + else if (hasTableExpressionInJoinTree(join_node->getRightTableExpression(), source_table_expression)) + matched_column_node = join_using_column_nodes.at(1); + else + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Cannot find column {} in JOIN USING section {}", + matched_column_node->dumpTree(), join_node->dumpTree()); + + matched_column_node = matched_column_node->clone(); + if (it != node_to_projection_name.end()) + node_to_projection_name.emplace(matched_column_node, it->second); + + matched_column_node->as().setColumnType(join_using_column_node.getResultType()); + if (!matched_column_node->isEqual(*join_using_column_nodes.at(0))) + scope.join_columns_with_changed_types[matched_column_node] = join_using_column_nodes.at(0); + } + } + } +} + /** Resolve qualified tree matcher. * * First try to match qualified identifier to expression. If qualified identifier matched expression node then @@ -4330,6 +4520,8 @@ QueryAnalyzer::QueryTreeNodesWithNames QueryAnalyzer::resolveQualifiedMatcher(Qu matched_columns, scope); + updateMatchedColumnsFromJoinUsing(result_matched_column_nodes_with_names, table_expression_node, scope); + return result_matched_column_nodes_with_names; } @@ -4465,6 +4657,8 @@ QueryAnalyzer::QueryTreeNodesWithNames QueryAnalyzer::resolveUnqualifiedMatcher( matched_column_node = matched_column_node->clone(); matched_column_node->as().setColumnType(join_using_column_node.getResultType()); + if (!matched_column_node->isEqual(*join_using_column_nodes.at(0))) + scope.join_columns_with_changed_types[matched_column_node] = join_using_column_nodes.at(0); table_expression_column_names_to_skip.insert(join_using_column_name); matched_expression_nodes_with_column_names.emplace_back(std::move(matched_column_node), join_using_column_name); @@ -4584,7 +4778,9 @@ ProjectionNames QueryAnalyzer::resolveMatcher(QueryTreeNodePtr & matcher_node, I node = nullable_node; /// Set the same projection name for new nullable node if (projection_name_it != node_to_projection_name.end()) + { node_to_projection_name.emplace(node, projection_name_it->second); + } } } } @@ -7609,29 +7805,6 @@ void QueryAnalyzer::resolveQueryJoinTreeNode(QueryTreeNodePtr & join_tree_node, scope.table_expressions_in_resolve_process.erase(join_tree_node.get()); } -class ReplaceColumnsVisitor : public InDepthQueryTreeVisitor -{ -public: - explicit ReplaceColumnsVisitor(const QueryTreeNodePtrWithHashMap & replacement_map_, const ContextPtr & context_) - : replacement_map(replacement_map_) - , context(context_) - {} - - void visitImpl(QueryTreeNodePtr & node) - { - if (auto it = replacement_map.find(node); it != replacement_map.end()) - node = it->second; - if (auto * function_node = node->as()) - rerunFunctionResolve(function_node, context); - } - - bool shouldTraverseTopToBottom() const { return false; } - -private: - const QueryTreeNodePtrWithHashMap & replacement_map; - const ContextPtr & context; -}; - /** Resolve query. * This function modifies query node during resolve. It is caller responsibility to clone query node before resolve * if it is needed for later use. @@ -7823,19 +7996,17 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier { resolveExpressionNode(prewhere_node, scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/); - if (scope.join_use_nulls) - { - /** Expression in PREWHERE with JOIN should not be modified by join_use_nulls. - * Example: SELECT * FROM t1 JOIN t2 USING (id) PREWHERE b = 1 - * Column `a` should be resolved from table and should not change its type to Nullable. - * More complicated example when column is somewhere inside an expression: - * SELECT a + 1 as b FROM t1 JOIN t2 USING (id) PREWHERE b = 1 - * expression `a + 1 as b` in projection and in PREWHERE should have different `a`. - */ - prewhere_node = prewhere_node->clone(); - ReplaceColumnsVisitor replace_visitor(scope.nullable_join_columns, scope.context); - replace_visitor.visit(prewhere_node); - } + /** Expressions in PREWHERE with JOIN should not change their type. + * Example: SELECT * FROM t1 JOIN t2 USING (a) PREWHERE a = 1 + * Column `a` in PREWHERE should be resolved from the left table + * and should not change its type to Nullable or to the supertype of `a` from t1 and t2. + * Here's a more complicated example where the column is somewhere inside an expression: + * SELECT a + 1 as b FROM t1 JOIN t2 USING (id) PREWHERE b = 1 + * The expression `a + 1 as b` in the projection and in PREWHERE should have different `a`. + */ + prewhere_node = prewhere_node->clone(); + ReplaceColumnsVisitor replace_visitor(scope.join_columns_with_changed_types, scope.context); + replace_visitor.visit(prewhere_node); } if (query_node_typed.getWhere()) diff --git a/src/Backups/BackupSettings.cpp b/src/Backups/BackupSettings.cpp index 468e5651274..06f49dfa448 100644 --- a/src/Backups/BackupSettings.cpp +++ b/src/Backups/BackupSettings.cpp @@ -33,6 +33,8 @@ namespace ErrorCodes M(UInt64, shard_num) \ M(UInt64, replica_num) \ M(Bool, check_parts) \ + M(Bool, check_projection_parts) \ + M(Bool, allow_backup_broken_projections) \ M(Bool, internal) \ M(String, host_id) \ M(OptionalUUID, backup_uuid) diff --git a/src/Backups/BackupSettings.h b/src/Backups/BackupSettings.h index 13709ca11c6..eccf4e90ce7 100644 --- a/src/Backups/BackupSettings.h +++ b/src/Backups/BackupSettings.h @@ -65,6 +65,12 @@ struct BackupSettings /// Check checksums of the data parts before writing them to a backup. bool check_parts = true; + /// Check checksums of the projection data parts before writing them to a backup. + bool check_projection_parts = true; + + /// Allow to create backup with broken projections. + bool allow_backup_broken_projections = false; + /// Internal, should not be specified by user. /// Whether this backup is a part of a distributed backup created by BACKUP ON CLUSTER. bool internal = false; diff --git a/src/Client/Suggest.cpp b/src/Client/Suggest.cpp index eb98c3a5740..03df582de10 100644 --- a/src/Client/Suggest.cpp +++ b/src/Client/Suggest.cpp @@ -2,7 +2,6 @@ #include #include -#include #include #include #include diff --git a/src/Common/ErrorCodes.cpp b/src/Common/ErrorCodes.cpp index 75ba9cff81e..af609fabb8f 100644 --- a/src/Common/ErrorCodes.cpp +++ b/src/Common/ErrorCodes.cpp @@ -591,6 +591,7 @@ M(710, FAULT_INJECTED) \ M(711, FILECACHE_ACCESS_DENIED) \ M(712, TOO_MANY_MATERIALIZED_VIEWS) \ + M(713, BROKEN_PROJECTION) \ M(714, UNEXPECTED_CLUSTER) \ M(715, CANNOT_DETECT_FORMAT) \ M(716, CANNOT_FORGET_PARTITION) \ diff --git a/src/Common/PoolBase.h b/src/Common/PoolBase.h index ef35002c45a..d6fc1656eca 100644 --- a/src/Common/PoolBase.h +++ b/src/Common/PoolBase.h @@ -1,11 +1,9 @@ #pragma once -#include #include -#include -#include -#include +#include #include +#include #include #include @@ -17,6 +15,14 @@ namespace ProfileEvents extern const Event ConnectionPoolIsFullMicroseconds; } +namespace DB +{ + namespace ErrorCodes + { + extern const int LOGICAL_ERROR; + } +} + /** A class from which you can inherit and get a pool of something. Used for database connection pools. * Descendant class must provide a method for creating a new object to place in the pool. */ @@ -29,22 +35,6 @@ public: using ObjectPtr = std::shared_ptr; using Ptr = std::shared_ptr>; - enum class BehaviourOnLimit - { - /** - * Default behaviour - when limit on pool size is reached, callers will wait until object will be returned back in pool. - */ - Wait, - - /** - * If no free objects in pool - allocate a new object, but not store it in pool. - * This behaviour is needed when we simply don't want to waste time waiting or if we cannot guarantee that query could be processed using fixed amount of connections. - * For example, when we read from table on s3, one GetObject request corresponds to the whole FileSystemCache segment. This segments are shared between different - * reading tasks, so in general case connection could be taken from pool by one task and returned back by another one. And these tasks are processed completely independently. - */ - AllocateNewBypassingPool, - }; - private: /** The object with the flag, whether it is currently used. */ @@ -99,53 +89,37 @@ public: Object & operator*() && = delete; const Object & operator*() const && = delete; - Object * operator->() & { return castToObjectPtr(); } - const Object * operator->() const & { return castToObjectPtr(); } - Object & operator*() & { return *castToObjectPtr(); } - const Object & operator*() const & { return *castToObjectPtr(); } + Object * operator->() & { return &*data->data.object; } + const Object * operator->() const & { return &*data->data.object; } + Object & operator*() & { return *data->data.object; } + const Object & operator*() const & { return *data->data.object; } /** * Expire an object to make it reallocated later. */ void expire() { - if (data.index() == 1) - std::get<1>(data)->data.is_expired = true; + data->data.is_expired = true; } - bool isNull() const { return data.index() == 0 ? !std::get<0>(data) : !std::get<1>(data); } + bool isNull() const { return data == nullptr; } + + PoolBase * getPool() const + { + if (!data) + throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Attempt to get pool from uninitialized entry"); + return &data->data.pool; + } private: - /** - * Plain object will be stored instead of PoolEntryHelper if fallback was made in get() (see BehaviourOnLimit::AllocateNewBypassingPool). - */ - std::variant> data; + std::shared_ptr data; - explicit Entry(ObjectPtr && object) : data(std::move(object)) { } - - explicit Entry(PooledObject & object) : data(std::make_shared(object)) { } - - auto castToObjectPtr() const - { - return std::visit( - [](const auto & ptr) - { - using T = std::decay_t; - if constexpr (std::is_same_v) - return ptr.get(); - else - return ptr->data.object.get(); - }, - data); - } + explicit Entry(PooledObject & object) : data(std::make_shared(object)) {} }; virtual ~PoolBase() = default; - /** Allocates the object. - * If 'behaviour_on_limit' is Wait - wait for free object in pool for 'timeout'. With 'timeout' < 0, the timeout is infinite. - * If 'behaviour_on_limit' is AllocateNewBypassingPool and there is no free object - a new object will be created but not stored in the pool. - */ + /** Allocates the object. Wait for free object in pool for 'timeout'. With 'timeout' < 0, the timeout is infinite. */ Entry get(Poco::Timespan::TimeDiff timeout) { std::unique_lock lock(mutex); @@ -176,9 +150,6 @@ public: return Entry(*items.back()); } - if (behaviour_on_limit == BehaviourOnLimit::AllocateNewBypassingPool) - return Entry(allocObject()); - Stopwatch blocked; if (timeout < 0) { @@ -213,8 +184,6 @@ private: /** The maximum size of the pool. */ unsigned max_items; - BehaviourOnLimit behaviour_on_limit; - /** Pool. */ Objects items; @@ -225,8 +194,8 @@ private: protected: LoggerPtr log; - PoolBase(unsigned max_items_, LoggerPtr log_, BehaviourOnLimit behaviour_on_limit_ = BehaviourOnLimit::Wait) - : max_items(max_items_), behaviour_on_limit(behaviour_on_limit_), log(log_) + PoolBase(unsigned max_items_, LoggerPtr log_) + : max_items(max_items_), log(log_) { items.reserve(max_items); } diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 55e4653b9c5..170fd4e9ca0 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -868,6 +868,8 @@ class IColumn; M(Bool, use_variant_as_common_type, false, "Use Variant as a result type for if/multiIf in case when there is no common type for arguments", 0) \ M(Bool, enable_order_by_all, true, "Enable sorting expression ORDER BY ALL.", 0) \ M(Bool, traverse_shadow_remote_data_paths, false, "Traverse shadow directory when query system.remote_data_paths", 0) \ + M(Bool, geo_distance_returns_float64_on_float64_arguments, true, "If all four arguments to `geoDistance`, `greatCircleDistance`, `greatCircleAngle` functions are Float64, return Float64 and use double precision for internal calculations. In previous ClickHouse versions, the functions always returned Float32.", 0) \ + M(Bool, allow_get_client_http_header, false, "Allow to use the function `getClientHTTPHeader` which lets to obtain a value of an the current HTTP request's header. It is not enabled by default for security reasons, because some headers, such as `Cookie`, could contain sensitive info. Note that the `X-ClickHouse-*` and `Authentication` headers are always restricted and cannot be obtained with this function.", 0) \ \ /** Experimental functions */ \ M(Bool, allow_experimental_materialized_postgresql_table, false, "Allows to use the MaterializedPostgreSQL table engine. Disabled by default, because this feature is experimental", 0) \ @@ -902,7 +904,6 @@ class IColumn; M(Int64, ignore_cold_parts_seconds, 0, "Only available in ClickHouse Cloud. Exclude new data parts from SELECT queries until they're either pre-warmed (see cache_populated_by_fetch) or this many seconds old. Only for Replicated-/SharedMergeTree.", 0) \ M(Int64, prefer_warmed_unmerged_parts_seconds, 0, "Only available in ClickHouse Cloud. If a merged part is less than this many seconds old and is not pre-warmed (see cache_populated_by_fetch), but all its source parts are available and pre-warmed, SELECT queries will read from those parts instead. Only for ReplicatedMergeTree. Note that this only checks whether CacheWarmer processed the part; if the part was fetched into cache by something else, it'll still be considered cold until CacheWarmer gets to it; if it was warmed, then evicted from cache, it'll still be considered warm.", 0) \ M(Bool, iceberg_engine_ignore_schema_evolution, false, "Ignore schema evolution in Iceberg table engine and read all data using latest schema saved on table creation. Note that it can lead to incorrect result", 0) \ - M(Bool, allow_get_client_http_header, false, "Allow to use the function `getClientHTTPHeader` which lets to obtain a value of an the current HTTP request's header. It is not enabled by default for security reasons, because some headers, such as `Cookie`, could contain sensitive info. Note that the `X-ClickHouse-*` and `Authentication` headers are always restricted and cannot be obtained with this function.", 0) \ // End of COMMON_SETTINGS // Please add settings related to formats into the FORMAT_FACTORY_SETTINGS, move obsolete settings to OBSOLETE_SETTINGS and obsolete format settings to OBSOLETE_FORMAT_SETTINGS. diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h index 530c5796e5e..6b31e9cd249 100644 --- a/src/Core/SettingsChangesHistory.h +++ b/src/Core/SettingsChangesHistory.h @@ -115,6 +115,7 @@ static std::map sett {"output_format_parquet_compression_method", "lz4", "zstd", "Parquet/ORC/Arrow support many compression methods, including lz4 and zstd. ClickHouse supports each and every compression method. Some inferior tools, such as 'duckdb', lack support for the faster `lz4` compression method, that's why we set zstd by default."}, {"output_format_orc_compression_method", "lz4", "zstd", "Parquet/ORC/Arrow support many compression methods, including lz4 and zstd. ClickHouse supports each and every compression method. Some inferior tools, such as 'duckdb', lack support for the faster `lz4` compression method, that's why we set zstd by default."}, {"output_format_pretty_highlight_digit_groups", false, true, "If enabled and if output is a terminal, highlight every digit corresponding to the number of thousands, millions, etc. with underline."}, + {"geo_distance_returns_float64_on_float64_arguments", false, true, "Increase the default precision."}, {"azure_max_inflight_parts_for_one_file", 20, 20, "The maximum number of a concurrent loaded parts in multipart upload request. 0 means unlimited."}, {"azure_strict_upload_part_size", 0, 0, "The exact size of part to upload during multipart upload to Azure blob storage."}, {"azure_min_upload_part_size", 16*1024*1024, 16*1024*1024, "The minimum size of part to upload during multipart upload to Azure blob storage."}, diff --git a/src/Databases/DatabasesCommon.cpp b/src/Databases/DatabasesCommon.cpp index d40e5f98aaa..4dffb16e486 100644 --- a/src/Databases/DatabasesCommon.cpp +++ b/src/Databases/DatabasesCommon.cpp @@ -71,6 +71,17 @@ void applyMetadataChangesToCreateQuery(const ASTPtr & query, const StorageInMemo query->replace(ast_create_query.refresh_strategy, metadata.refresh); } + if (metadata.sql_security_type) + { + auto new_sql_security = std::make_shared(); + new_sql_security->type = metadata.sql_security_type; + + if (metadata.definer) + new_sql_security->definer = std::make_shared(*metadata.definer); + + ast_create_query.sql_security = std::move(new_sql_security); + } + /// MaterializedView, Dictionary are types of CREATE query without storage. if (ast_create_query.storage) { diff --git a/src/Functions/FunctionsJSON.h b/src/Functions/FunctionsJSON.h index 2539fa1aeb4..8a193785f87 100644 --- a/src/Functions/FunctionsJSON.h +++ b/src/Functions/FunctionsJSON.h @@ -348,6 +348,7 @@ public: String getName() const override { return Name::name; } bool useDefaultImplementationForNulls() const override { return false; } bool useDefaultImplementationForConstants() const override { return true; } + bool useDefaultImplementationForLowCardinalityColumns() const override { return false; } ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override { @@ -469,9 +470,6 @@ public: else return_type = json_return_type; - /// Top-level LowCardinality columns are processed outside JSON parser. - json_return_type = removeLowCardinality(json_return_type); - DataTypes argument_types; argument_types.reserve(arguments.size()); for (const auto & argument : arguments) @@ -867,11 +865,9 @@ struct JSONExtractTree explicit LowCardinalityFixedStringNode(const size_t fixed_length_) : fixed_length(fixed_length_) { } bool insertResultToColumn(IColumn & dest, const Element & element) override { - // If element is an object we delegate the insertion to JSONExtractRawImpl - if (element.isObject()) + // For types other than string, delegate the insertion to JSONExtractRawImpl. + if (!element.isString()) return JSONExtractRawImpl::insertResultToLowCardinalityFixedStringColumn(dest, element, fixed_length); - else if (!element.isString()) - return false; auto str = element.getString(); if (str.size() > fixed_length) @@ -1486,9 +1482,6 @@ public: // We use insertResultToLowCardinalityFixedStringColumn in case we are inserting raw data in a Low Cardinality FixedString column static bool insertResultToLowCardinalityFixedStringColumn(IColumn & dest, const Element & element, size_t fixed_length) { - if (element.getObject().size() > fixed_length) - return false; - ColumnFixedString::Chars chars; WriteBufferFromVector buf(chars, AppendModeTag()); traverse(element, buf); diff --git a/src/Functions/greatCircleDistance.cpp b/src/Functions/greatCircleDistance.cpp index d1d1a101187..01184f74b13 100644 --- a/src/Functions/greatCircleDistance.cpp +++ b/src/Functions/greatCircleDistance.cpp @@ -7,7 +7,6 @@ #include #include #include -#include #include #include @@ -42,121 +41,6 @@ namespace ErrorCodes namespace { -constexpr double PI = std::numbers::pi_v; -constexpr float PI_F = std::numbers::pi_v; - -constexpr float RAD_IN_DEG = static_cast(PI / 180.0); -constexpr float RAD_IN_DEG_HALF = static_cast(PI / 360.0); - -constexpr size_t COS_LUT_SIZE = 1024; // maxerr 0.00063% -constexpr float COS_LUT_SIZE_F = 1024.0f; // maxerr 0.00063% -constexpr size_t ASIN_SQRT_LUT_SIZE = 512; -constexpr size_t METRIC_LUT_SIZE = 1024; - -/** Earth radius in meters using WGS84 authalic radius. - * We use this value to be consistent with H3 library. - */ -constexpr float EARTH_RADIUS = 6371007.180918475f; -constexpr float EARTH_DIAMETER = 2 * EARTH_RADIUS; - - -float cos_lut[COS_LUT_SIZE + 1]; /// cos(x) table -float asin_sqrt_lut[ASIN_SQRT_LUT_SIZE + 1]; /// asin(sqrt(x)) * earth_diameter table - -float sphere_metric_lut[METRIC_LUT_SIZE + 1]; /// sphere metric, unitless: the distance in degrees for one degree across longitude depending on latitude -float sphere_metric_meters_lut[METRIC_LUT_SIZE + 1]; /// sphere metric: the distance in meters for one degree across longitude depending on latitude -float wgs84_metric_meters_lut[2 * (METRIC_LUT_SIZE + 1)]; /// ellipsoid metric: the distance in meters across one degree latitude/longitude depending on latitude - - -inline double sqr(double v) -{ - return v * v; -} - -inline float sqrf(float v) -{ - return v * v; -} - -void geodistInit() -{ - for (size_t i = 0; i <= COS_LUT_SIZE; ++i) - cos_lut[i] = static_cast(cos(2 * PI * i / COS_LUT_SIZE)); // [0, 2 * pi] -> [0, COS_LUT_SIZE] - - for (size_t i = 0; i <= ASIN_SQRT_LUT_SIZE; ++i) - asin_sqrt_lut[i] = static_cast(asin( - sqrt(static_cast(i) / ASIN_SQRT_LUT_SIZE))); // [0, 1] -> [0, ASIN_SQRT_LUT_SIZE] - - for (size_t i = 0; i <= METRIC_LUT_SIZE; ++i) - { - double latitude = i * (PI / METRIC_LUT_SIZE) - PI * 0.5; // [-pi / 2, pi / 2] -> [0, METRIC_LUT_SIZE] - - /// Squared metric coefficients (for the distance in meters) on a tangent plane, for latitude and longitude (in degrees), - /// depending on the latitude (in radians). - - /// https://github.com/mapbox/cheap-ruler/blob/master/index.js#L67 - wgs84_metric_meters_lut[i * 2] = static_cast(sqr(111132.09 - 566.05 * cos(2 * latitude) + 1.20 * cos(4 * latitude))); - wgs84_metric_meters_lut[i * 2 + 1] = static_cast(sqr(111415.13 * cos(latitude) - 94.55 * cos(3 * latitude) + 0.12 * cos(5 * latitude))); - - sphere_metric_meters_lut[i] = static_cast(sqr((EARTH_DIAMETER * PI / 360) * cos(latitude))); - - sphere_metric_lut[i] = static_cast(sqr(cos(latitude))); - } -} - -inline NO_SANITIZE_UNDEFINED size_t floatToIndex(float x) -{ - /// Implementation specific behaviour on overflow or infinite value. - return static_cast(x); -} - -inline float geodistDegDiff(float f) -{ - f = fabsf(f); - if (f > 180) - f = 360 - f; - return f; -} - -inline float geodistFastCos(float x) -{ - float y = fabsf(x) * (COS_LUT_SIZE_F / PI_F / 2.0f); - size_t i = floatToIndex(y); - y -= i; - i &= (COS_LUT_SIZE - 1); - return cos_lut[i] + (cos_lut[i + 1] - cos_lut[i]) * y; -} - -inline float geodistFastSin(float x) -{ - float y = fabsf(x) * (COS_LUT_SIZE_F / PI_F / 2.0f); - size_t i = floatToIndex(y); - y -= i; - i = (i - COS_LUT_SIZE / 4) & (COS_LUT_SIZE - 1); // cos(x - pi / 2) = sin(x), costable / 4 = pi / 2 - return cos_lut[i] + (cos_lut[i + 1] - cos_lut[i]) * y; -} - -/// fast implementation of asin(sqrt(x)) -/// max error in floats 0.00369%, in doubles 0.00072% -inline float geodistFastAsinSqrt(float x) -{ - if (x < 0.122f) - { - // distance under 4546 km, Taylor error under 0.00072% - float y = sqrtf(x); - return y + x * y * 0.166666666666666f + x * x * y * 0.075f + x * x * x * y * 0.044642857142857f; - } - if (x < 0.948f) - { - // distance under 17083 km, 512-entry LUT error under 0.00072% - x *= ASIN_SQRT_LUT_SIZE; - size_t i = floatToIndex(x); - return asin_sqrt_lut[i] + (asin_sqrt_lut[i + 1] - asin_sqrt_lut[i]) * (x - i); - } - return asinf(sqrtf(x)); // distance over 17083 km, just compute exact -} - - enum class Method { SPHERE_DEGREES, @@ -164,18 +48,117 @@ enum class Method WGS84_METERS, }; -} +constexpr size_t ASIN_SQRT_LUT_SIZE = 512; +constexpr size_t COS_LUT_SIZE = 1024; // maxerr 0.00063% +constexpr size_t METRIC_LUT_SIZE = 1024; + +/// Earth radius in meters using WGS84 authalic radius. +/// We use this value to be consistent with H3 library. +constexpr double EARTH_RADIUS = 6371007.180918475; +constexpr double EARTH_DIAMETER = 2.0 * EARTH_RADIUS; +constexpr double PI = std::numbers::pi_v; + +template +T sqr(T v) { return v * v; } + +template +struct Impl +{ + T cos_lut[COS_LUT_SIZE + 1]; /// cos(x) table + T asin_sqrt_lut[ASIN_SQRT_LUT_SIZE + 1]; /// asin(sqrt(x)) * earth_diameter table + T sphere_metric_lut[METRIC_LUT_SIZE + 1]; /// sphere metric, unitless: the distance in degrees for one degree across longitude depending on latitude + T sphere_metric_meters_lut[METRIC_LUT_SIZE + 1]; /// sphere metric: the distance in meters for one degree across longitude depending on latitude + T wgs84_metric_meters_lut[2 * (METRIC_LUT_SIZE + 1)]; /// ellipsoid metric: the distance in meters across one degree latitude/longitude depending on latitude + + Impl() + { + for (size_t i = 0; i <= COS_LUT_SIZE; ++i) + cos_lut[i] = T(std::cos(2 * PI * static_cast(i) / COS_LUT_SIZE)); // [0, 2 * pi] -> [0, COS_LUT_SIZE] + + for (size_t i = 0; i <= ASIN_SQRT_LUT_SIZE; ++i) + asin_sqrt_lut[i] = T(std::asin(std::sqrt(static_cast(i) / ASIN_SQRT_LUT_SIZE))); // [0, 1] -> [0, ASIN_SQRT_LUT_SIZE] + + for (size_t i = 0; i <= METRIC_LUT_SIZE; ++i) + { + double latitude = i * (PI / METRIC_LUT_SIZE) - PI * 0.5; // [-pi / 2, pi / 2] -> [0, METRIC_LUT_SIZE] + + /// Squared metric coefficients (for the distance in meters) on a tangent plane, for latitude and longitude (in degrees), + /// depending on the latitude (in radians). + + /// https://github.com/mapbox/cheap-ruler/blob/master/index.js#L67 + wgs84_metric_meters_lut[i * 2] = T(sqr(111132.09 - 566.05 * std::cos(2.0 * latitude) + 1.20 * std::cos(4.0 * latitude))); + wgs84_metric_meters_lut[i * 2 + 1] = T(sqr(111415.13 * std::cos(latitude) - 94.55 * std::cos(3.0 * latitude) + 0.12 * std::cos(5.0 * latitude))); + sphere_metric_meters_lut[i] = T(sqr((EARTH_DIAMETER * PI / 360) * std::cos(latitude))); + + sphere_metric_lut[i] = T(sqr(std::cos(latitude))); + } + } + + static inline NO_SANITIZE_UNDEFINED size_t toIndex(T x) + { + /// Implementation specific behaviour on overflow or infinite value. + return static_cast(x); + } + + static inline T degDiff(T f) + { + f = std::abs(f); + if (f > 180) + f = 360 - f; + return f; + } + + inline T fastCos(T x) + { + T y = std::abs(x) * (T(COS_LUT_SIZE) / T(PI) / T(2.0)); + size_t i = toIndex(y); + y -= i; + i &= (COS_LUT_SIZE - 1); + return cos_lut[i] + (cos_lut[i + 1] - cos_lut[i]) * y; + } + + inline T fastSin(T x) + { + T y = std::abs(x) * (T(COS_LUT_SIZE) / T(PI) / T(2.0)); + size_t i = toIndex(y); + y -= i; + i = (i - COS_LUT_SIZE / 4) & (COS_LUT_SIZE - 1); // cos(x - pi / 2) = sin(x), costable / 4 = pi / 2 + return cos_lut[i] + (cos_lut[i + 1] - cos_lut[i]) * y; + } + + /// fast implementation of asin(sqrt(x)) + /// max error in floats 0.00369%, in doubles 0.00072% + inline T fastAsinSqrt(T x) + { + if (x < T(0.122)) + { + // distance under 4546 km, Taylor error under 0.00072% + T y = std::sqrt(x); + return y + x * y * T(0.166666666666666) + x * x * y * T(0.075) + x * x * x * y * T(0.044642857142857); + } + if (x < T(0.948)) + { + // distance under 17083 km, 512-entry LUT error under 0.00072% + x *= ASIN_SQRT_LUT_SIZE; + size_t i = toIndex(x); + return asin_sqrt_lut[i] + (asin_sqrt_lut[i + 1] - asin_sqrt_lut[i]) * (x - i); + } + return std::asin(std::sqrt(x)); /// distance is over 17083 km, just compute exact + } +}; + +template Impl impl; DECLARE_MULTITARGET_CODE( namespace { -template -float distance(float lon1deg, float lat1deg, float lon2deg, float lat2deg) +template +T distance(T lon1deg, T lat1deg, T lon2deg, T lat2deg) { - float lat_diff = geodistDegDiff(lat1deg - lat2deg); - float lon_diff = geodistDegDiff(lon1deg - lon2deg); + T lat_diff = impl.degDiff(lat1deg - lat2deg); + T lon_diff = impl.degDiff(lon1deg - lon2deg); if (lon_diff < 13) { @@ -187,51 +170,54 @@ float distance(float lon1deg, float lat1deg, float lon2deg, float lat2deg) /// (Remember how a plane flies from Amsterdam to New York) /// But if longitude is close but latitude is different enough, there is no difference between meridian and great circle line. - float latitude_midpoint = (lat1deg + lat2deg + 180) * METRIC_LUT_SIZE / 360; // [-90, 90] degrees -> [0, METRIC_LUT_SIZE] indexes - size_t latitude_midpoint_index = floatToIndex(latitude_midpoint) & (METRIC_LUT_SIZE - 1); + T latitude_midpoint = (lat1deg + lat2deg + 180) * METRIC_LUT_SIZE / 360; // [-90, 90] degrees -> [0, METRIC_LUT_SIZE] indexes + size_t latitude_midpoint_index = impl.toIndex(latitude_midpoint) & (METRIC_LUT_SIZE - 1); /// This is linear interpolation between two table items at index "latitude_midpoint_index" and "latitude_midpoint_index + 1". - float k_lat{}; - float k_lon{}; + T k_lat{}; + T k_lon{}; if constexpr (method == Method::SPHERE_DEGREES) { k_lat = 1; - k_lon = sphere_metric_lut[latitude_midpoint_index] - + (sphere_metric_lut[latitude_midpoint_index + 1] - sphere_metric_lut[latitude_midpoint_index]) * (latitude_midpoint - latitude_midpoint_index); + k_lon = impl.sphere_metric_lut[latitude_midpoint_index] + + (impl.sphere_metric_lut[latitude_midpoint_index + 1] - impl.sphere_metric_lut[latitude_midpoint_index]) * (latitude_midpoint - latitude_midpoint_index); } else if constexpr (method == Method::SPHERE_METERS) { - k_lat = sqrf(EARTH_DIAMETER * PI_F / 360.0f); + k_lat = sqr(T(EARTH_DIAMETER) * T(PI) / T(360.0)); - k_lon = sphere_metric_meters_lut[latitude_midpoint_index] - + (sphere_metric_meters_lut[latitude_midpoint_index + 1] - sphere_metric_meters_lut[latitude_midpoint_index]) * (latitude_midpoint - latitude_midpoint_index); + k_lon = impl.sphere_metric_meters_lut[latitude_midpoint_index] + + (impl.sphere_metric_meters_lut[latitude_midpoint_index + 1] - impl.sphere_metric_meters_lut[latitude_midpoint_index]) * (latitude_midpoint - latitude_midpoint_index); } else if constexpr (method == Method::WGS84_METERS) { - k_lat = wgs84_metric_meters_lut[latitude_midpoint_index * 2] - + (wgs84_metric_meters_lut[(latitude_midpoint_index + 1) * 2] - wgs84_metric_meters_lut[latitude_midpoint_index * 2]) * (latitude_midpoint - latitude_midpoint_index); + k_lat = impl.wgs84_metric_meters_lut[latitude_midpoint_index * 2] + + (impl.wgs84_metric_meters_lut[(latitude_midpoint_index + 1) * 2] - impl.wgs84_metric_meters_lut[latitude_midpoint_index * 2]) * (latitude_midpoint - latitude_midpoint_index); - k_lon = wgs84_metric_meters_lut[latitude_midpoint_index * 2 + 1] - + (wgs84_metric_meters_lut[(latitude_midpoint_index + 1) * 2 + 1] - wgs84_metric_meters_lut[latitude_midpoint_index * 2 + 1]) * (latitude_midpoint - latitude_midpoint_index); + k_lon = impl.wgs84_metric_meters_lut[latitude_midpoint_index * 2 + 1] + + (impl.wgs84_metric_meters_lut[(latitude_midpoint_index + 1) * 2 + 1] - impl.wgs84_metric_meters_lut[latitude_midpoint_index * 2 + 1]) * (latitude_midpoint - latitude_midpoint_index); } /// Metric on a tangent plane: it differs from Euclidean metric only by scale of coordinates. - return sqrtf(k_lat * lat_diff * lat_diff + k_lon * lon_diff * lon_diff); + return std::sqrt(k_lat * lat_diff * lat_diff + k_lon * lon_diff * lon_diff); } else { - // points too far away; use haversine + /// Points are too far away: use Haversine. - float a = sqrf(geodistFastSin(lat_diff * RAD_IN_DEG_HALF)) - + geodistFastCos(lat1deg * RAD_IN_DEG) * geodistFastCos(lat2deg * RAD_IN_DEG) * sqrf(geodistFastSin(lon_diff * RAD_IN_DEG_HALF)); + static constexpr T RAD_IN_DEG = T(PI / 180.0); + static constexpr T RAD_IN_DEG_HALF = T(PI / 360.0); + + T a = sqr(impl.fastSin(lat_diff * RAD_IN_DEG_HALF)) + + impl.fastCos(lat1deg * RAD_IN_DEG) * impl.fastCos(lat2deg * RAD_IN_DEG) * sqr(impl.fastSin(lon_diff * RAD_IN_DEG_HALF)); if constexpr (method == Method::SPHERE_DEGREES) - return (360.0f / PI_F) * geodistFastAsinSqrt(a); + return (T(360.0) / T(PI)) * impl.fastAsinSqrt(a); else - return EARTH_DIAMETER * geodistFastAsinSqrt(a); + return T(EARTH_DIAMETER) * impl.fastAsinSqrt(a); } } @@ -241,13 +227,24 @@ template class FunctionGeoDistance : public IFunction { public: - static constexpr auto name = - (method == Method::SPHERE_DEGREES) ? "greatCircleAngle" - : ((method == Method::SPHERE_METERS) ? "greatCircleDistance" - : "geoDistance"); + explicit FunctionGeoDistance(ContextPtr context) + { + always_float32 = !context->getSettingsRef().geo_distance_returns_float64_on_float64_arguments; + } private: - String getName() const override { return name; } + bool always_float32; + + String getName() const override + { + if constexpr (method == Method::SPHERE_DEGREES) + return "greatCircleAngle"; + if constexpr (method == Method::SPHERE_METERS) + return "greatCircleDistance"; + else + return "geoDistance"; + } + size_t getNumberOfArguments() const override { return 4; } bool useDefaultImplementationForConstants() const override { return true; } @@ -255,22 +252,31 @@ private: DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override { - for (const auto arg_idx : collections::range(0, arguments.size())) + bool has_float64 = false; + + for (size_t arg_idx = 0; arg_idx < 4; ++arg_idx) { - const auto * arg = arguments[arg_idx].get(); - if (!isNumber(WhichDataType(arg))) + WhichDataType which(arguments[arg_idx]); + + if (!isNumber(which)) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument {} of function {}. " - "Must be numeric", arg->getName(), std::to_string(arg_idx + 1), getName()); + "Must be numeric", arguments[arg_idx]->getName(), std::to_string(arg_idx + 1), getName()); + + if (which.isFloat64()) + has_float64 = true; } - return std::make_shared(); + if (has_float64 && !always_float32) + return std::make_shared(); + else + return std::make_shared(); } ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override { - auto dst = ColumnVector::create(); - auto & dst_data = dst->getData(); - dst_data.resize(input_rows_count); + bool returns_float64 = WhichDataType(result_type).isFloat64(); + + auto dst = result_type->createColumn(); auto arguments_copy = arguments; for (auto & argument : arguments_copy) @@ -280,10 +286,24 @@ private: argument.type = result_type; } - const auto * col_lon1 = convertArgumentColumnToFloat32(arguments_copy, 0); - const auto * col_lat1 = convertArgumentColumnToFloat32(arguments_copy, 1); - const auto * col_lon2 = convertArgumentColumnToFloat32(arguments_copy, 2); - const auto * col_lat2 = convertArgumentColumnToFloat32(arguments_copy, 3); + if (returns_float64) + run(arguments_copy, dst, input_rows_count); + else + run(arguments_copy, dst, input_rows_count); + + return dst; + } + + template + void run(const ColumnsWithTypeAndName & arguments, MutableColumnPtr & dst, size_t input_rows_count) const + { + const auto * col_lon1 = convertArgumentColumn(arguments, 0); + const auto * col_lat1 = convertArgumentColumn(arguments, 1); + const auto * col_lon2 = convertArgumentColumn(arguments, 2); + const auto * col_lat2 = convertArgumentColumn(arguments, 3); + + auto & dst_data = assert_cast &>(*dst).getData(); + dst_data.resize(input_rows_count); for (size_t row_num = 0; row_num < input_rows_count; ++row_num) { @@ -291,20 +311,20 @@ private: col_lon1->getData()[row_num], col_lat1->getData()[row_num], col_lon2->getData()[row_num], col_lat2->getData()[row_num]); } - - return dst; } - const ColumnFloat32 * convertArgumentColumnToFloat32(const ColumnsWithTypeAndName & arguments, size_t argument_index) const + template + const ColumnVector * convertArgumentColumn(const ColumnsWithTypeAndName & arguments, size_t argument_index) const { - const auto * column_typed = checkAndGetColumn(arguments[argument_index].column.get()); + const auto * column_typed = checkAndGetColumn>(arguments[argument_index].column.get()); if (!column_typed) throw Exception( ErrorCodes::ILLEGAL_COLUMN, - "Illegal type {} of argument {} of function {}. Must be Float32.", + "Illegal type {} of argument {} of function {}. Must be {}.", arguments[argument_index].type->getName(), argument_index + 1, - getName()); + getName(), + TypeName); return column_typed; } @@ -316,18 +336,19 @@ template class FunctionGeoDistance : public TargetSpecific::Default::FunctionGeoDistance { public: - explicit FunctionGeoDistance(ContextPtr context) : selector(context) + explicit FunctionGeoDistance(ContextPtr context) + : TargetSpecific::Default::FunctionGeoDistance(context), selector(context) { selector.registerImplementation>(); + TargetSpecific::Default::FunctionGeoDistance>(context); #if USE_MULTITARGET_CODE selector.registerImplementation>(); + TargetSpecific::AVX::FunctionGeoDistance>(context); selector.registerImplementation>(); + TargetSpecific::AVX2::FunctionGeoDistance>(context); selector.registerImplementation>(); + TargetSpecific::AVX512F::FunctionGeoDistance>(context); #endif } @@ -345,12 +366,13 @@ private: ImplementationSelector selector; }; +} + REGISTER_FUNCTION(GeoDistance) { - geodistInit(); - factory.registerFunction>(); - factory.registerFunction>(); - factory.registerFunction>(); + factory.registerFunction("greatCircleAngle", [](ContextPtr context) { return std::make_shared>(std::move(context)); }); + factory.registerFunction("greatCircleDistance", [](ContextPtr context) { return std::make_shared>(std::move(context)); }); + factory.registerFunction("geoDistance", [](ContextPtr context) { return std::make_shared>(std::move(context)); }); } } diff --git a/src/Functions/identity.cpp b/src/Functions/identity.cpp index 43cca76c801..2541e715cb1 100644 --- a/src/Functions/identity.cpp +++ b/src/Functions/identity.cpp @@ -14,4 +14,9 @@ REGISTER_FUNCTION(ScalarSubqueryResult) factory.registerFunction(); } +REGISTER_FUNCTION(ActionName) +{ + factory.registerFunction(); +} + } diff --git a/src/Functions/identity.h b/src/Functions/identity.h index c753625caa7..3422342e20b 100644 --- a/src/Functions/identity.h +++ b/src/Functions/identity.h @@ -42,4 +42,18 @@ struct ScalarSubqueryResultName using FunctionIdentity = FunctionIdentityBase; using FunctionScalarSubqueryResult = FunctionIdentityBase; +struct ActionNameName +{ + static constexpr auto name = "__actionName"; +}; + +class FunctionActionName : public FunctionIdentityBase +{ +public: + using FunctionIdentityBase::FunctionIdentityBase; + static FunctionPtr create(ContextPtr) { return std::make_shared(); } + size_t getNumberOfArguments() const override { return 2; } + ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; } +}; + } diff --git a/src/IO/VarInt.h b/src/IO/VarInt.h index 12a86ac55f6..9e72705341d 100644 --- a/src/IO/VarInt.h +++ b/src/IO/VarInt.h @@ -79,7 +79,7 @@ inline char * writeVarInt(Int64 x, char * ostr) return writeVarUInt(static_cast((x << 1) ^ (x >> 63)), ostr); } -namespace impl +namespace varint_impl { template @@ -106,8 +106,8 @@ inline void readVarUInt(UInt64 & x, ReadBuffer & istr) inline void readVarUInt(UInt64 & x, ReadBuffer & istr) { if (istr.buffer().end() - istr.position() >= 10) - return impl::readVarUInt(x, istr); - return impl::readVarUInt(x, istr); + return varint_impl::readVarUInt(x, istr); + return varint_impl::readVarUInt(x, istr); } inline void readVarUInt(UInt64 & x, std::istream & istr) diff --git a/src/Interpreters/ActionsDAG.cpp b/src/Interpreters/ActionsDAG.cpp index 37ef217cb6d..09e9364a3f1 100644 --- a/src/Interpreters/ActionsDAG.cpp +++ b/src/Interpreters/ActionsDAG.cpp @@ -741,7 +741,7 @@ Block ActionsDAG::updateHeader(Block header) const catch (Exception & e) { if (e.code() == ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK) - e.addMessage(" in block {}", header.dumpStructure()); + e.addMessage("in block {}", header.dumpStructure()); throw; } diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 086ed333366..7030522dd2a 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -1219,7 +1219,7 @@ void Context::addWarningMessageAboutDatabaseOrdinary(const String & database_nam /// We don't use getFlagsPath method, because it takes a shared lock. auto convert_databases_flag = fs::path(shared->flags_path) / "convert_ordinary_to_atomic"; auto message = fmt::format("Server has databases (for example `{}`) with Ordinary engine, which was deprecated. " - "To convert this database to a new Atomic engine, please create a forcing flag {} and make sure that ClickHouse has write permission for it. " + "To convert this database to a new Atomic engine, create a flag {} and make sure that ClickHouse has write permission for it. " "Example: sudo touch '{}' && sudo chmod 666 '{}'", database_name, convert_databases_flag.string(), convert_databases_flag.string(), convert_databases_flag.string()); diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index 8b777a758f6..d90d2446fca 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -1881,7 +1881,7 @@ void InterpreterCreateQuery::addColumnsDescriptionToCreateQueryIfNecessary(ASTCr void InterpreterCreateQuery::processSQLSecurityOption(ContextPtr context_, ASTSQLSecurity & sql_security, bool is_attach, bool is_materialized_view) { /// If no SQL security is specified, apply default from default_*_view_sql_security setting. - if (!sql_security.type.has_value()) + if (!sql_security.type) { SQLSecurityType default_security; diff --git a/src/Interpreters/MutationsInterpreter.cpp b/src/Interpreters/MutationsInterpreter.cpp index 45ca9afce7c..222447ca650 100644 --- a/src/Interpreters/MutationsInterpreter.cpp +++ b/src/Interpreters/MutationsInterpreter.cpp @@ -341,6 +341,11 @@ bool MutationsInterpreter::Source::hasProjection(const String & name) const return part && part->hasProjection(name); } +bool MutationsInterpreter::Source::hasBrokenProjection(const String & name) const +{ + return part && part->hasBrokenProjection(name); +} + bool MutationsInterpreter::Source::isCompactPart() const { return part && part->getType() == MergeTreeDataPartType::Compact; @@ -802,7 +807,7 @@ void MutationsInterpreter::prepare(bool dry_run) { mutation_kind.set(MutationKind::MUTATE_INDEX_STATISTIC_PROJECTION); const auto & projection = projections_desc.get(command.projection_name); - if (!source.hasProjection(projection.name)) + if (!source.hasProjection(projection.name) || source.hasBrokenProjection(projection.name)) { for (const auto & column : projection.required_columns) dependencies.emplace(column, ColumnDependency::PROJECTION); @@ -989,6 +994,13 @@ void MutationsInterpreter::prepare(bool dry_run) if (!source.hasProjection(projection.name)) continue; + /// Always rebuild broken projections. + if (source.hasBrokenProjection(projection.name)) + { + materialized_projections.insert(projection.name); + continue; + } + if (need_rebuild_projections) { materialized_projections.insert(projection.name); diff --git a/src/Interpreters/MutationsInterpreter.h b/src/Interpreters/MutationsInterpreter.h index eda94190185..4c35ec34b58 100644 --- a/src/Interpreters/MutationsInterpreter.h +++ b/src/Interpreters/MutationsInterpreter.h @@ -126,6 +126,7 @@ public: bool materializeTTLRecalculateOnly() const; bool hasSecondaryIndex(const String & name) const; bool hasProjection(const String & name) const; + bool hasBrokenProjection(const String & name) const; bool isCompactPart() const; void read( diff --git a/src/Parsers/ASTCreateQuery.cpp b/src/Parsers/ASTCreateQuery.cpp index de5eb40837f..0403dc33164 100644 --- a/src/Parsers/ASTCreateQuery.cpp +++ b/src/Parsers/ASTCreateQuery.cpp @@ -14,7 +14,7 @@ namespace DB void ASTSQLSecurity::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const { - if (!type.has_value()) + if (!type) return; if (definer || is_definer_current_user) diff --git a/src/Parsers/CommonParsers.h b/src/Parsers/CommonParsers.h index b2a07bc06bf..49964b5c728 100644 --- a/src/Parsers/CommonParsers.h +++ b/src/Parsers/CommonParsers.h @@ -299,6 +299,7 @@ namespace DB MR_MACROS(MOD, "MOD") \ MR_MACROS(MODIFY_COLUMN, "MODIFY COLUMN") \ MR_MACROS(MODIFY_COMMENT, "MODIFY COMMENT") \ + MR_MACROS(MODIFY_DEFINER, "MODIFY DEFINER") \ MR_MACROS(MODIFY_ORDER_BY, "MODIFY ORDER BY") \ MR_MACROS(MODIFY_QUERY, "MODIFY QUERY") \ MR_MACROS(MODIFY_REFRESH, "MODIFY REFRESH") \ @@ -445,6 +446,7 @@ namespace DB MR_MACROS(SPATIAL, "SPATIAL") \ MR_MACROS(SQL_SECURITY, "SQL SECURITY") \ MR_MACROS(SS, "SS") \ + MR_MACROS(START_TRANSACTION, "START TRANSACTION") \ MR_MACROS(STATISTIC, "STATISTIC") \ MR_MACROS(STEP, "STEP") \ MR_MACROS(STORAGE, "STORAGE") \ @@ -554,7 +556,7 @@ namespace DB MR_MACROS(SSH_KEY, "SSH_KEY") \ MR_MACROS(SSL_CERTIFICATE, "SSL_CERTIFICATE") \ MR_MACROS(STRICTLY_ASCENDING, "STRICTLY_ASCENDING") \ - MR_MACROS(WITH_ITEMINDEX, "with_itemindex") \ + MR_MACROS(WITH_ITEMINDEX, "WITH_ITEMINDEX") \ enum class Keyword : size_t { diff --git a/src/Parsers/ParserAlterQuery.cpp b/src/Parsers/ParserAlterQuery.cpp index c0c43381585..1baff45113b 100644 --- a/src/Parsers/ParserAlterQuery.cpp +++ b/src/Parsers/ParserAlterQuery.cpp @@ -41,6 +41,7 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected ParserKeyword s_reset_setting(Keyword::RESET_SETTING); ParserKeyword s_modify_query(Keyword::MODIFY_QUERY); ParserKeyword s_modify_sql_security(Keyword::MODIFY_SQL_SECURITY); + ParserKeyword s_modify_definer(Keyword::MODIFY_DEFINER); ParserKeyword s_modify_refresh(Keyword::MODIFY_REFRESH); ParserKeyword s_add_index(Keyword::ADD_INDEX); @@ -862,11 +863,16 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected return false; command->type = ASTAlterCommand::MODIFY_QUERY; } - else if (s_modify_sql_security.ignore(pos, expected)) + else if (s_modify_sql_security.checkWithoutMoving(pos, expected)) { - /// This is a hack so we can reuse parser from create and don't have to write `MODIFY SQL SECURITY SQL SECURITY INVOKER` - --pos; - --pos; + s_modify.ignore(pos, expected); + if (!sql_security_p.parse(pos, command_sql_security, expected)) + return false; + command->type = ASTAlterCommand::MODIFY_SQL_SECURITY; + } + else if (s_modify_definer.checkWithoutMoving(pos, expected)) + { + s_modify.ignore(pos, expected); if (!sql_security_p.parse(pos, command_sql_security, expected)) return false; command->type = ASTAlterCommand::MODIFY_SQL_SECURITY; diff --git a/src/Parsers/ParserTransactionControl.cpp b/src/Parsers/ParserTransactionControl.cpp index 43d28850246..62ed02c779f 100644 --- a/src/Parsers/ParserTransactionControl.cpp +++ b/src/Parsers/ParserTransactionControl.cpp @@ -14,6 +14,8 @@ bool ParserTransactionControl::parseImpl(Pos & pos, ASTPtr & node, Expected & ex if (ParserKeyword(Keyword::BEGIN_TRANSACTION).ignore(pos, expected)) action = ASTTransactionControl::BEGIN; + else if (ParserKeyword(Keyword::START_TRANSACTION).ignore(pos, expected)) + action = ASTTransactionControl::BEGIN; else if (ParserKeyword(Keyword::COMMIT).ignore(pos, expected)) action = ASTTransactionControl::COMMIT; else if (ParserKeyword(Keyword::ROLLBACK).ignore(pos, expected)) diff --git a/src/Planner/PlannerActionsVisitor.cpp b/src/Planner/PlannerActionsVisitor.cpp index af23e684f23..326dd683343 100644 --- a/src/Planner/PlannerActionsVisitor.cpp +++ b/src/Planner/PlannerActionsVisitor.cpp @@ -157,6 +157,12 @@ public: case QueryTreeNodeType::FUNCTION: { const auto & function_node = node->as(); + if (function_node.getFunctionName() == "__actionName") + { + result = toString(function_node.getArguments().getNodes().at(1)->as()->getValue()); + break; + } + String in_function_second_argument_node_name; if (isNameOfInFunction(function_node.getFunctionName())) diff --git a/src/Processors/QueryPlan/Optimizations/projectionsCommon.cpp b/src/Processors/QueryPlan/Optimizations/projectionsCommon.cpp index 8333f5e857b..3009460a468 100644 --- a/src/Processors/QueryPlan/Optimizations/projectionsCommon.cpp +++ b/src/Processors/QueryPlan/Optimizations/projectionsCommon.cpp @@ -223,7 +223,7 @@ bool analyzeProjectionCandidate( { const auto & created_projections = part_with_ranges.data_part->getProjectionParts(); auto it = created_projections.find(candidate.projection->name); - if (it != created_projections.end()) + if (it != created_projections.end() && !it->second->is_broken) { projection_parts.push_back(it->second); } diff --git a/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp b/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp index ff9941ee808..18e4c87b298 100644 --- a/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp +++ b/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp @@ -335,7 +335,9 @@ void DataPartStorageOnDiskBase::backup( const ReadSettings & read_settings, bool make_temporary_hard_links, BackupEntries & backup_entries, - TemporaryFilesOnDisks * temp_dirs) const + TemporaryFilesOnDisks * temp_dirs, + bool is_projection_part, + bool allow_backup_broken_projection) const { fs::path part_path_on_disk = fs::path{root_path} / part_dir; fs::path part_path_in_backup = fs::path{path_in_backup} / part_dir; @@ -377,7 +379,7 @@ void DataPartStorageOnDiskBase::backup( bool copy_encrypted = !backup_settings.decrypt_files_from_encrypted_disks; - for (const auto & filepath : files_to_backup) + auto backup_file = [&](const String & filepath) { auto filepath_on_disk = part_path_on_disk / filepath; auto filepath_in_backup = part_path_in_backup / filepath; @@ -385,8 +387,10 @@ void DataPartStorageOnDiskBase::backup( if (files_without_checksums.contains(filepath)) { backup_entries.emplace_back(filepath_in_backup, std::make_unique(disk, filepath_on_disk, read_settings, copy_encrypted)); - continue; + return; } + else if (is_projection_part && allow_backup_broken_projection && !disk->exists(filepath_on_disk)) + return; if (make_temporary_hard_links) { @@ -411,6 +415,31 @@ void DataPartStorageOnDiskBase::backup( backup_entry = wrapBackupEntryWith(std::move(backup_entry), temp_dir_owner); backup_entries.emplace_back(filepath_in_backup, std::move(backup_entry)); + }; + + auto * log = &Poco::Logger::get("DataPartStorageOnDiskBase::backup"); + + for (const auto & filepath : files_to_backup) + { + if (is_projection_part && allow_backup_broken_projection) + { + try + { + backup_file(filepath); + } + catch (Exception & e) + { + if (e.code() != ErrorCodes::FILE_DOESNT_EXIST) + throw; + + LOG_ERROR(log, "Cannot backup file {} of projection part {}. Will try to ignore it", filepath, part_dir); + continue; + } + } + else + { + backup_file(filepath); + } } } diff --git a/src/Storages/MergeTree/DataPartStorageOnDiskBase.h b/src/Storages/MergeTree/DataPartStorageOnDiskBase.h index 52dc850c7fd..75bf3d6f93c 100644 --- a/src/Storages/MergeTree/DataPartStorageOnDiskBase.h +++ b/src/Storages/MergeTree/DataPartStorageOnDiskBase.h @@ -58,7 +58,9 @@ public: const ReadSettings & read_settings, bool make_temporary_hard_links, BackupEntries & backup_entries, - TemporaryFilesOnDisks * temp_dirs) const override; + TemporaryFilesOnDisks * temp_dirs, + bool is_projection_part, + bool allow_backup_broken_projection) const override; MutableDataPartStoragePtr freeze( const std::string & to, diff --git a/src/Storages/MergeTree/IDataPartStorage.h b/src/Storages/MergeTree/IDataPartStorage.h index 5899ef58cd5..d06d9791a53 100644 --- a/src/Storages/MergeTree/IDataPartStorage.h +++ b/src/Storages/MergeTree/IDataPartStorage.h @@ -223,7 +223,9 @@ public: const ReadSettings & read_settings, bool make_temporary_hard_links, BackupEntries & backup_entries, - TemporaryFilesOnDisks * temp_dirs) const = 0; + TemporaryFilesOnDisks * temp_dirs, + bool is_projection_part, + bool allow_backup_broken_projection) const = 0; /// Creates hardlinks into 'to/dir_path' for every file in data part. /// Callback is called after hardlinks are created, but before 'delete-on-destroy.txt' marker is removed. diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index 57f951ea987..a26e2b725be 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -705,13 +705,14 @@ void IMergeTreeDataPart::loadColumnsChecksumsIndexes(bool require_columns_checks loadRowsCount(); /// Must be called after loadIndexGranularity() as it uses the value of `index_granularity`. loadExistingRowsCount(); /// Must be called after loadRowsCount() as it uses the value of `rows_count`. loadPartitionAndMinMaxIndex(); + bool has_broken_projections = false; if (!parent_part) { loadTTLInfos(); - loadProjections(require_columns_checksums, check_consistency, false /* if_not_loaded */); + loadProjections(require_columns_checksums, check_consistency, has_broken_projections, false /* if_not_loaded */); } - if (check_consistency) + if (check_consistency && !has_broken_projections) checkConsistency(require_columns_checksums); loadDefaultCompressionCodec(); @@ -776,7 +777,7 @@ void IMergeTreeDataPart::addProjectionPart( projection_parts[projection_name] = std::move(projection_part); } -void IMergeTreeDataPart::loadProjections(bool require_columns_checksums, bool check_consistency, bool if_not_loaded) +void IMergeTreeDataPart::loadProjections(bool require_columns_checksums, bool check_consistency, bool & has_broken_projection, bool if_not_loaded) { auto metadata_snapshot = storage.getInMemoryMetadataPtr(); for (const auto & projection : metadata_snapshot->projections) @@ -793,10 +794,36 @@ void IMergeTreeDataPart::loadProjections(bool require_columns_checksums, bool ch else { auto part = getProjectionPartBuilder(projection.name).withPartFormatFromDisk().build(); - part->loadColumnsChecksumsIndexes(require_columns_checksums, check_consistency); + + try + { + part->loadColumnsChecksumsIndexes(require_columns_checksums, check_consistency); + } + catch (...) + { + if (isRetryableException(std::current_exception())) + throw; + + auto message = getCurrentExceptionMessage(true); + LOG_ERROR(&Poco::Logger::get("IMergeTreeDataPart"), + "Cannot load projection {}, will consider it broken. Reason: {}", projection.name, message); + + has_broken_projection = true; + part->setBrokenReason(message, getCurrentExceptionCode()); + } + addProjectionPart(projection.name, std::move(part)); } } + else if (check_consistency && checksums.has(path)) + { + auto part = getProjectionPartBuilder(projection.name).withPartFormatFromDisk().build(); + part->setBrokenReason( + "Projection directory " + path + " does not exist while loading projections. Stacktrace: " + StackTrace().toString(), + ErrorCodes::NO_FILE_IN_DATA_PART); + addProjectionPart(projection.name, std::move(part)); + has_broken_projection = true; + } } } @@ -1216,7 +1243,8 @@ void IMergeTreeDataPart::loadChecksums(bool require) /// Check the data while we are at it. LOG_WARNING(storage.log, "Checksums for part {} not found. Will calculate them from data on disk.", name); - checksums = checkDataPart(shared_from_this(), false); + bool noop; + checksums = checkDataPart(shared_from_this(), false, noop, /* is_cancelled */[]{ return false; }, /* throw_on_broken_projection */false); writeChecksums(checksums, {}); bytes_on_disk = checksums.getTotalSizeOnDisk(); @@ -1352,8 +1380,9 @@ void IMergeTreeDataPart::loadExistingRowsCount() if (existing_rows_count.has_value()) return; - if (!rows_count || !storage.getSettings()->load_existing_rows_count_for_old_parts || !supportLightweightDeleteMutate() - || !hasLightweightDelete()) + if (!rows_count || !supportLightweightDeleteMutate() || !hasLightweightDelete() + || !storage.getSettings()->exclude_deleted_rows_for_part_size_in_merge + || !storage.getSettings()->load_existing_rows_count_for_old_parts) existing_rows_count = rows_count; else existing_rows_count = readExistingRowsCount(); @@ -2337,6 +2366,32 @@ std::optional IMergeTreeDataPart::getStreamNameForColumn( return getStreamNameOrHash(stream_name, extension, storage_); } +void IMergeTreeDataPart::markProjectionPartAsBroken(const String & projection_name, const String & message, int code) const +{ + auto it = projection_parts.find(projection_name); + if (it == projection_parts.end()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "There is no projection part '{}'", projection_name); + it->second->setBrokenReason(message, code); +} + +bool IMergeTreeDataPart::hasBrokenProjection(const String & projection_name) const +{ + auto it = projection_parts.find(projection_name); + if (it == projection_parts.end()) + return false; + return it->second->is_broken; +} + +void IMergeTreeDataPart::setBrokenReason(const String & message, int code) const +{ + std::lock_guard lock(broken_reason_mutex); + if (is_broken) + return; + is_broken = true; + exception = message; + exception_code = code; +} + bool isCompactPart(const MergeTreeDataPartPtr & data_part) { return (data_part && data_part->getType() == MergeTreeDataPartType::Compact); diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h index 03095e69a71..7519980a7a3 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.h +++ b/src/Storages/MergeTree/IMergeTreeDataPart.h @@ -265,6 +265,12 @@ public: /// Frozen by ALTER TABLE ... FREEZE ... It is used for information purposes in system.parts table. mutable std::atomic is_frozen {false}; + /// If it is a projection part, it can be broken sometimes. + mutable std::atomic is_broken {false}; + mutable std::string exception; + mutable int exception_code = 0; + mutable std::mutex broken_reason_mutex; + /// Indicates that the part was marked Outdated by PartCheckThread because the part was not committed to ZooKeeper mutable bool is_unexpected_local_part = false; @@ -428,9 +434,16 @@ public: void addProjectionPart(const String & projection_name, std::shared_ptr && projection_part); + void markProjectionPartAsBroken(const String & projection_name, const String & message, int code) const; + bool hasProjection(const String & projection_name) const { return projection_parts.contains(projection_name); } - void loadProjections(bool require_columns_checksums, bool check_consistency, bool if_not_loaded = false); + bool hasBrokenProjection(const String & projection_name) const; + + /// Return true, if all projections were loaded successfully and none was marked as broken. + void loadProjections(bool require_columns_checksums, bool check_consistency, bool & has_broken_projection, bool if_not_loaded = false); + + void setBrokenReason(const String & message, int code) const; /// Return set of metadata file names without checksums. For example, /// columns.txt or checksums.txt itself. @@ -593,7 +606,7 @@ protected: const IMergeTreeDataPart * parent_part; String parent_part_name; - std::map> projection_parts; + mutable std::map> projection_parts; mutable PartMetadataManagerPtr metadata_manager; @@ -673,7 +686,8 @@ private: /// For the older format version calculates rows count from the size of a column with a fixed size. void loadRowsCount(); - /// Load existing rows count from _row_exists column if load_existing_rows_count_for_old_parts is true. + /// Load existing rows count from _row_exists column + /// if load_existing_rows_count_for_old_parts and exclude_deleted_rows_for_part_size_in_merge are both enabled. void loadExistingRowsCount(); static void appendFilesOfRowsCount(Strings & files); diff --git a/src/Storages/MergeTree/MergeTask.cpp b/src/Storages/MergeTree/MergeTask.cpp index bc49a505d8b..1776d1da27c 100644 --- a/src/Storages/MergeTree/MergeTask.cpp +++ b/src/Storages/MergeTree/MergeTask.cpp @@ -730,8 +730,9 @@ bool MergeTask::MergeProjectionsStage::mergeMinMaxIndexAndPrepareProjections() c MergeTreeData::DataPartsVector projection_parts; for (const auto & part : global_ctx->future_part->parts) { - auto it = part->getProjectionParts().find(projection.name); - if (it != part->getProjectionParts().end()) + auto actual_projection_parts = part->getProjectionParts(); + auto it = actual_projection_parts.find(projection.name); + if (it != actual_projection_parts.end() && !it->second->is_broken) projection_parts.push_back(it->second); } if (projection_parts.size() < global_ctx->future_part->parts.size()) diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 6cc8063d90a..08a2ff89e7b 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -5302,7 +5302,7 @@ MergeTreeData::PartsBackupEntries MergeTreeData::backupParts( if (hold_table_lock && !table_lock) table_lock = lockForShare(local_context->getCurrentQueryId(), local_context->getSettingsRef().lock_acquire_timeout); - if (backup_settings.check_parts) + if (backup_settings.check_projection_parts) part->checkConsistencyWithProjections(/* require_part_metadata= */ true); BackupEntries backup_entries_from_part; @@ -5314,7 +5314,8 @@ MergeTreeData::PartsBackupEntries MergeTreeData::backupParts( read_settings, make_temporary_hard_links, backup_entries_from_part, - &temp_dirs); + &temp_dirs, + false, false); auto projection_parts = part->getProjectionParts(); for (const auto & [projection_name, projection_part] : projection_parts) @@ -5327,7 +5328,9 @@ MergeTreeData::PartsBackupEntries MergeTreeData::backupParts( read_settings, make_temporary_hard_links, backup_entries_from_part, - &temp_dirs); + &temp_dirs, + projection_part->is_broken, + backup_settings.allow_backup_broken_projections); } if (hold_storage_and_part_ptrs) @@ -7786,21 +7789,39 @@ MovePartsOutcome MergeTreeData::moveParts(const CurrentlyMovingPartsTaggerPtr & bool MergeTreeData::partsContainSameProjections(const DataPartPtr & left, const DataPartPtr & right, String & out_reason) { - if (left->getProjectionParts().size() != right->getProjectionParts().size()) + auto remove_broken_parts_from_consideration = [](auto & parts) + { + std::set broken_projection_parts; + for (const auto & [name, part] : parts) + { + if (part->is_broken) + broken_projection_parts.emplace(name); + } + for (const auto & name : broken_projection_parts) + parts.erase(name); + }; + + auto left_projection_parts = left->getProjectionParts(); + auto right_projection_parts = right->getProjectionParts(); + + remove_broken_parts_from_consideration(left_projection_parts); + remove_broken_parts_from_consideration(right_projection_parts); + + if (left_projection_parts.size() != right_projection_parts.size()) { out_reason = fmt::format( "Parts have different number of projections: {} in part '{}' and {} in part '{}'", - left->getProjectionParts().size(), + left_projection_parts.size(), left->name, - right->getProjectionParts().size(), + right_projection_parts.size(), right->name ); return false; } - for (const auto & [name, _] : left->getProjectionParts()) + for (const auto & [name, _] : left_projection_parts) { - if (!right->hasProjection(name)) + if (!right_projection_parts.contains(name)) { out_reason = fmt::format( "The part '{}' doesn't have projection '{}' while part '{}' does", right->name, name, left->name diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index 10777cced7d..046376be474 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -464,8 +464,13 @@ public: struct ProjectionPartsVector { - DataPartsVector projection_parts; DataPartsVector data_parts; + + DataPartsVector projection_parts; + DataPartStateVector projection_parts_states; + + DataPartsVector broken_projection_parts; + DataPartStateVector broken_projection_parts_states; }; /// Returns a copy of the list so that the caller shouldn't worry about locks. @@ -480,7 +485,7 @@ public: const DataPartStates & affordable_states, DataPartStateVector * out_states = nullptr) const; /// Same as above but only returns projection parts ProjectionPartsVector getProjectionPartsVectorForInternalUsage( - const DataPartStates & affordable_states, DataPartStateVector * out_states = nullptr) const; + const DataPartStates & affordable_states, MergeTreeData::DataPartStateVector * out_states) const; /// Returns absolutely all parts (and snapshot of their states) diff --git a/src/Storages/MergeTree/MergeTreeDataPartChecksum.h b/src/Storages/MergeTree/MergeTreeDataPartChecksum.h index 837b940e354..d4980a67a43 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartChecksum.h +++ b/src/Storages/MergeTree/MergeTreeDataPartChecksum.h @@ -54,6 +54,8 @@ struct MergeTreeDataPartChecksums bool has(const String & file_name) const { return files.find(file_name) != files.end(); } + bool remove(const String & file_name) { return files.erase(file_name); } + bool empty() const { return files.empty(); } /// Checks that the set of columns and their checksums are the same. If not, throws an exception. diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index 3c0f42b618d..f67e9484598 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -592,7 +592,9 @@ static std::set getProjectionsToRecalculate( { bool need_recalculate = materialized_projections.contains(projection.name) - || (!is_full_part_storage && source_part->hasProjection(projection.name)); + || (!is_full_part_storage + && source_part->hasProjection(projection.name) + && !source_part->hasBrokenProjection(projection.name)); if (need_recalculate) projections_to_recalc.insert(&projection); @@ -936,7 +938,8 @@ void finalizeMutatedPart( new_data_part->modification_time = time(nullptr); /// Load rest projections which are hardlinked - new_data_part->loadProjections(false, false, true /* if_not_loaded */); + bool noop; + new_data_part->loadProjections(false, false, noop, true /* if_not_loaded */); /// All information about sizes is stored in checksums. /// It doesn't make sense to touch filesystem for sizes. @@ -1534,7 +1537,9 @@ private: bool need_recalculate = ctx->materialized_projections.contains(projection.name) - || (!is_full_part_storage && ctx->source_part->hasProjection(projection.name)); + || (!is_full_part_storage + && ctx->source_part->hasProjection(projection.name) + && !ctx->source_part->hasBrokenProjection(projection.name)); if (need_recalculate) { @@ -1671,8 +1676,9 @@ private: void finalize() { + bool noop; ctx->new_data_part->minmax_idx = std::move(ctx->minmax_idx); - ctx->new_data_part->loadProjections(false, false, true /* if_not_loaded */); + ctx->new_data_part->loadProjections(false, false, noop, true /* if_not_loaded */); ctx->mutating_executor.reset(); ctx->mutating_pipeline.reset(); diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.h b/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.h index 054c576cfc5..7693f34cc1e 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.h @@ -93,6 +93,7 @@ struct ReplicatedMergeTreeLogEntryData MergeTreeDataPartFormat new_part_format; String block_id; /// For parts of level zero, the block identifier for deduplication (node name in /blocks/). mutable String actual_new_part_name; /// GET_PART could actually fetch a part covering 'new_part_name'. + mutable std::unordered_set replace_range_actual_new_part_names; /// Same as above, but for REPLACE_RANGE UUID new_part_uuid = UUIDHelpers::Nil; Strings source_parts; diff --git a/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp index 156c41563ec..bc0b4f73a31 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp @@ -63,7 +63,7 @@ void ReplicatedMergeTreePartCheckThread::enqueuePart(const String & name, time_t if (parts_set.contains(name)) return; - LOG_TRACE(log, "Enqueueing {} for check after after {}s", name, delay_to_check_seconds); + LOG_TRACE(log, "Enqueueing {} for check after {}s", name, delay_to_check_seconds); parts_queue.emplace_back(name, std::chrono::steady_clock::now() + std::chrono::seconds(delay_to_check_seconds)); parts_set.insert(name); task->schedule(); @@ -274,7 +274,7 @@ std::pair ReplicatedMergeTreePartCheckThread::findLo return std::make_pair(exists_in_zookeeper, part); } -ReplicatedCheckResult ReplicatedMergeTreePartCheckThread::checkPartImpl(const String & part_name) +ReplicatedCheckResult ReplicatedMergeTreePartCheckThread::checkPartImpl(const String & part_name, bool throw_on_broken_projection) { ReplicatedCheckResult result; auto [exists_in_zookeeper, part] = findLocalPart(part_name); @@ -341,6 +341,7 @@ ReplicatedCheckResult ReplicatedMergeTreePartCheckThread::checkPartImpl(const St /// before the ReplicatedMergeTreePartHeader was introduced. String part_path = storage.replica_path + "/parts/" + part_name; String part_znode = zookeeper->get(part_path); + bool is_broken_projection = false; try { @@ -362,8 +363,10 @@ ReplicatedCheckResult ReplicatedMergeTreePartCheckThread::checkPartImpl(const St checkDataPart( part, - true, - [this] { return need_stop.load(); }); + /* require_checksums */true, + is_broken_projection, + [this] { return need_stop.load(); }, + throw_on_broken_projection); if (need_stop) { @@ -382,14 +385,27 @@ ReplicatedCheckResult ReplicatedMergeTreePartCheckThread::checkPartImpl(const St if (isRetryableException(std::current_exception())) throw; - tryLogCurrentException(log, __PRETTY_FUNCTION__); + PreformattedMessage message; + if (is_broken_projection) + { + WriteBufferFromOwnString wb; + message = PreformattedMessage::create( + "Part {} has a broken projections. It will be ignored. Broken projections info: {}", + part_name, getCurrentExceptionMessage(false)); + LOG_DEBUG(log, message); + result.action = ReplicatedCheckResult::DoNothing; + } + else + { + tryLogCurrentException(log, __PRETTY_FUNCTION__); - auto message = PreformattedMessage::create("Part {} looks broken. Removing it and will try to fetch.", part_name); - LOG_ERROR(log, message); + message = PreformattedMessage::create("Part {} looks broken. Removing it and will try to fetch.", part_name); + LOG_ERROR(log, message); + result.action = ReplicatedCheckResult::TryFetchMissing; + } /// Part is broken, let's try to find it and fetch. result.status = {part_name, false, message}; - result.action = ReplicatedCheckResult::TryFetchMissing; return result; } @@ -419,12 +435,12 @@ ReplicatedCheckResult ReplicatedMergeTreePartCheckThread::checkPartImpl(const St } -CheckResult ReplicatedMergeTreePartCheckThread::checkPartAndFix(const String & part_name, std::optional * recheck_after) +CheckResult ReplicatedMergeTreePartCheckThread::checkPartAndFix(const String & part_name, std::optional * recheck_after, bool throw_on_broken_projection) { LOG_INFO(log, "Checking part {}", part_name); ProfileEvents::increment(ProfileEvents::ReplicatedPartChecks); - ReplicatedCheckResult result = checkPartImpl(part_name); + ReplicatedCheckResult result = checkPartImpl(part_name, throw_on_broken_projection); switch (result.action) { case ReplicatedCheckResult::None: UNREACHABLE(); @@ -577,7 +593,7 @@ void ReplicatedMergeTreePartCheckThread::run() } std::optional recheck_after; - checkPartAndFix(selected->name, &recheck_after); + checkPartAndFix(selected->name, &recheck_after, /* throw_on_broken_projection */false); if (need_stop) return; diff --git a/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.h b/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.h index f2e26b3d324..9091f698546 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.h @@ -65,9 +65,9 @@ public: size_t size() const; /// Check part by name - CheckResult checkPartAndFix(const String & part_name, std::optional * recheck_after = nullptr); + CheckResult checkPartAndFix(const String & part_name, std::optional * recheck_after = nullptr, bool throw_on_broken_projection = true); - ReplicatedCheckResult checkPartImpl(const String & part_name); + ReplicatedCheckResult checkPartImpl(const String & part_name, bool throw_on_broken_projection); std::unique_lock pausePartsCheck(); diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp index 42f564f40da..ee4ed87d456 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp @@ -342,6 +342,11 @@ void ReplicatedMergeTreeQueue::updateStateOnQueueEntryRemoval( /// NOTE actual_new_part_name is very confusing and error-prone. This approach must be fixed. removeCoveredPartsFromMutations(entry->actual_new_part_name, /*remove_part = */ false, /*remove_covered_parts = */ true); } + for (const auto & actual_part : entry->replace_range_actual_new_part_names) + { + LOG_TEST(log, "Entry {} has actual new part name {}, removing it from mutations", entry->znode_name, actual_part); + removeCoveredPartsFromMutations(actual_part, /*remove_part = */ false, /*remove_covered_parts = */ true); + } LOG_TEST(log, "Adding parts [{}] to current parts", fmt::join(entry_virtual_parts, ", ")); @@ -1180,9 +1185,9 @@ bool ReplicatedMergeTreeQueue::isCoveredByFuturePartsImpl(const LogEntry & entry if (entry_for_same_part_it != future_parts.end()) { const LogEntry & another_entry = *entry_for_same_part_it->second; - constexpr auto fmt_string = "Not executing log entry {} of type {} for part {} " + constexpr auto fmt_string = "Not executing log entry {} of type {} for part {} (actual part {})" "because another log entry {} of type {} for the same part ({}) is being processed."; - LOG_INFO(LogToStr(out_reason, log), fmt_string, entry.znode_name, entry.type, entry.new_part_name, + LOG_INFO(LogToStr(out_reason, log), fmt_string, entry.znode_name, entry.type, entry.new_part_name, new_part_name, another_entry.znode_name, another_entry.type, another_entry.new_part_name); return true; @@ -1198,6 +1203,7 @@ bool ReplicatedMergeTreeQueue::isCoveredByFuturePartsImpl(const LogEntry & entry auto result_part = MergeTreePartInfo::fromPartName(new_part_name, format_version); /// It can slow down when the size of `future_parts` is large. But it can not be large, since background pool is limited. + /// (well, it can actually, thanks to REPLACE_RANGE, but it's a rare case) for (const auto & future_part_elem : future_parts) { auto future_part = MergeTreePartInfo::fromPartName(future_part_elem.first, format_version); @@ -1608,26 +1614,39 @@ void ReplicatedMergeTreeQueue::CurrentlyExecuting::setActualPartName( std::unique_lock & state_lock, std::vector & covered_entries_to_wait) { - if (!entry.actual_new_part_name.empty()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Entry actual part isn't empty yet. This is a bug."); + if (actual_part_name.empty()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Actual part name is empty"); - entry.actual_new_part_name = actual_part_name; + if (!entry.actual_new_part_name.empty()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Entry {} actual part isn't empty yet: '{}'. This is a bug.", + entry.znode_name, entry.actual_new_part_name); + + auto actual_part_info = MergeTreePartInfo::fromPartName(actual_part_name, queue.format_version); + for (const auto & other_part_name : entry.replace_range_actual_new_part_names) + if (!MergeTreePartInfo::fromPartName(other_part_name, queue.format_version).isDisjoint(actual_part_info)) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Entry {} already has actual part {} non-disjoint with {}. This is a bug.", + entry.actual_new_part_name, other_part_name, actual_part_name); /// Check if it is the same (and already added) part. - if (entry.actual_new_part_name == entry.new_part_name) + if (actual_part_name == entry.new_part_name) return; - if (!queue.future_parts.emplace(entry.actual_new_part_name, entry.shared_from_this()).second) + if (!queue.future_parts.emplace(actual_part_name, entry.shared_from_this()).second) throw Exception(ErrorCodes::LOGICAL_ERROR, "Attaching already existing future part {}. This is a bug. " "It happened on attempt to execute {}: {}", - entry.actual_new_part_name, entry.znode_name, entry.toString()); + actual_part_name, entry.znode_name, entry.toString()); + + if (entry.type == LogEntry::REPLACE_RANGE) + entry.replace_range_actual_new_part_names.insert(actual_part_name); + else + entry.actual_new_part_name = actual_part_name; for (LogEntryPtr & covered_entry : covered_entries_to_wait) { if (&entry == covered_entry.get()) continue; - LOG_TRACE(queue.log, "Waiting for {} producing {} to finish before executing {} producing not disjoint part {}", - covered_entry->znode_name, covered_entry->new_part_name, entry.znode_name, entry.new_part_name); + LOG_TRACE(queue.log, "Waiting for {} producing {} to finish before executing {} producing not disjoint part {} (actual part {})", + covered_entry->znode_name, covered_entry->new_part_name, entry.znode_name, entry.new_part_name, actual_part_name); covered_entry->execution_complete.wait(state_lock, [&covered_entry] { return !covered_entry->currently_executing; }); } } @@ -1646,25 +1665,27 @@ ReplicatedMergeTreeQueue::CurrentlyExecuting::~CurrentlyExecuting() entry->currently_executing = false; entry->execution_complete.notify_all(); - for (const String & new_part_name : entry->getVirtualPartNames(queue.format_version)) + auto erase_and_check = [this](const String & part_name) { - if (!queue.future_parts.erase(new_part_name)) + if (!queue.future_parts.erase(part_name)) { - LOG_ERROR(queue.log, "Untagging already untagged future part {}. This is a bug.", new_part_name); + LOG_ERROR(queue.log, "Untagging already untagged future part {}. This is a bug.", part_name); assert(false); } - } + }; + + for (const String & new_part_name : entry->getVirtualPartNames(queue.format_version)) + erase_and_check(new_part_name); if (!entry->actual_new_part_name.empty()) - { - if (entry->actual_new_part_name != entry->new_part_name && !queue.future_parts.erase(entry->actual_new_part_name)) - { - LOG_ERROR(queue.log, "Untagging already untagged future part {}. This is a bug.", entry->actual_new_part_name); - assert(false); - } + erase_and_check(entry->actual_new_part_name); - entry->actual_new_part_name.clear(); - } + entry->actual_new_part_name.clear(); + + for (const auto & actual_part : entry->replace_range_actual_new_part_names) + erase_and_check(actual_part); + + entry->replace_range_actual_new_part_names.clear(); } diff --git a/src/Storages/MergeTree/checkDataPart.cpp b/src/Storages/MergeTree/checkDataPart.cpp index 2d7727b57cb..0a1057916d6 100644 --- a/src/Storages/MergeTree/checkDataPart.cpp +++ b/src/Storages/MergeTree/checkDataPart.cpp @@ -42,6 +42,7 @@ namespace ErrorCodes extern const int NO_FILE_IN_DATA_PART; extern const int NETWORK_ERROR; extern const int SOCKET_TIMEOUT; + extern const int BROKEN_PROJECTION; } @@ -116,7 +117,9 @@ static IMergeTreeDataPart::Checksums checkDataPart( const NameSet & files_without_checksums, const ReadSettings & read_settings, bool require_checksums, - std::function is_cancelled) + std::function is_cancelled, + bool & is_broken_projection, + bool throw_on_broken_projection) { /** Responsibility: * - read list of columns from columns.txt; @@ -125,6 +128,7 @@ static IMergeTreeDataPart::Checksums checkDataPart( */ CurrentMetrics::Increment metric_increment{CurrentMetrics::ReplicatedChecks}; + Poco::Logger * log = &Poco::Logger::get("checkDataPart"); NamesAndTypesList columns_txt; @@ -274,17 +278,55 @@ static IMergeTreeDataPart::Checksums checkDataPart( } } + std::string broken_projections_message; for (const auto & [name, projection] : data_part->getProjectionParts()) { if (is_cancelled()) return {}; auto projection_file = name + ".proj"; - auto projection_checksums = checkDataPart( - projection, *data_part_storage.getProjection(projection_file), - projection->getColumns(), projection->getType(), - projection->getFileNamesWithoutChecksums(), - read_settings, require_checksums, is_cancelled); + if (!throw_on_broken_projection && projection->is_broken) + { + projections_on_disk.erase(projection_file); + checksums_txt.remove(projection_file); + } + + IMergeTreeDataPart::Checksums projection_checksums; + try + { + bool noop; + projection_checksums = checkDataPart( + projection, *data_part_storage.getProjection(projection_file), + projection->getColumns(), projection->getType(), + projection->getFileNamesWithoutChecksums(), + read_settings, require_checksums, is_cancelled, noop, /* throw_on_broken_projection */false); + } + catch (...) + { + if (isRetryableException(std::current_exception())) + throw; + + if (!projection->is_broken) + { + LOG_TEST(log, "Marking projection {} as broken ({})", name, projection_file); + projection->setBrokenReason(getCurrentExceptionMessage(false), getCurrentExceptionCode()); + } + + is_broken_projection = true; + if (throw_on_broken_projection) + { + if (!broken_projections_message.empty()) + broken_projections_message += "\n"; + + broken_projections_message += fmt::format( + "Part {} has a broken projection {} (error: {})", + data_part->name, name, getCurrentExceptionMessage(false)); + continue; + } + + projections_on_disk.erase(projection_file); + checksums_txt.remove(projection_file); + } checksums_data.files[projection_file] = IMergeTreeDataPart::Checksums::Checksum( projection_checksums.getTotalSizeOnDisk(), @@ -293,6 +335,11 @@ static IMergeTreeDataPart::Checksums checkDataPart( projections_on_disk.erase(projection_file); } + if (throw_on_broken_projection && !broken_projections_message.empty()) + { + throw Exception(ErrorCodes::BROKEN_PROJECTION, "{}", broken_projections_message); + } + if (require_checksums && !projections_on_disk.empty()) { throw Exception(ErrorCodes::UNEXPECTED_FILE_IN_DATA_PART, @@ -312,7 +359,9 @@ static IMergeTreeDataPart::Checksums checkDataPart( IMergeTreeDataPart::Checksums checkDataPart( MergeTreeData::DataPartPtr data_part, bool require_checksums, - std::function is_cancelled) + bool & is_broken_projection, + std::function is_cancelled, + bool throw_on_broken_projection) { /// If check of part has failed and it is stored on disk with cache /// try to drop cache and check it once again because maybe the cache @@ -351,7 +400,9 @@ IMergeTreeDataPart::Checksums checkDataPart( data_part->getFileNamesWithoutChecksums(), read_settings, require_checksums, - is_cancelled); + is_cancelled, + is_broken_projection, + throw_on_broken_projection); }; try @@ -365,7 +416,9 @@ IMergeTreeDataPart::Checksums checkDataPart( data_part->getFileNamesWithoutChecksums(), read_settings, require_checksums, - is_cancelled); + is_cancelled, + is_broken_projection, + throw_on_broken_projection); } catch (...) { diff --git a/src/Storages/MergeTree/checkDataPart.h b/src/Storages/MergeTree/checkDataPart.h index ada89a54dcc..2f9894bc70f 100644 --- a/src/Storages/MergeTree/checkDataPart.h +++ b/src/Storages/MergeTree/checkDataPart.h @@ -10,7 +10,9 @@ namespace DB IMergeTreeDataPart::Checksums checkDataPart( MergeTreeData::DataPartPtr data_part, bool require_checksums, - std::function is_cancelled = []{ return false; }); + bool & is_broken_projection, + std::function is_cancelled = []{ return false; }, + bool throw_on_broken_projection = false); bool isNotEnoughMemoryErrorCode(int code); bool isRetryableException(std::exception_ptr exception_ptr); diff --git a/src/Storages/StorageInMemoryMetadata.cpp b/src/Storages/StorageInMemoryMetadata.cpp index 8e5195d497f..1ac739f03fd 100644 --- a/src/Storages/StorageInMemoryMetadata.cpp +++ b/src/Storages/StorageInMemoryMetadata.cpp @@ -94,6 +94,8 @@ void StorageInMemoryMetadata::setSQLSecurity(const ASTSQLSecurity & sql_security { if (sql_security.definer) definer = sql_security.definer->toString(); + else + definer = std::nullopt; sql_security_type = sql_security.type; } diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp index 3d1ca0c76b6..caec03c95b3 100644 --- a/src/Storages/StorageMerge.cpp +++ b/src/Storages/StorageMerge.cpp @@ -902,15 +902,27 @@ SelectQueryInfo ReadFromMerge::getModifiedQueryInfo(const ContextMutablePtr & mo if (!storage_snapshot_->tryGetColumn(get_column_options, "_table")) { auto table_name_node = std::make_shared(current_storage_id.table_name); - table_name_node->setAlias("_table"); - column_name_to_node.emplace("_table", table_name_node); + auto table_name_alias = std::make_shared("__table1._table"); + + auto function_node = std::make_shared("__actionName"); + function_node->getArguments().getNodes().push_back(std::move(table_name_node)); + function_node->getArguments().getNodes().push_back(std::move(table_name_alias)); + function_node->resolveAsFunction(FunctionFactory::instance().get("__actionName", context)); + + column_name_to_node.emplace("_table", function_node); } if (!storage_snapshot_->tryGetColumn(get_column_options, "_database")) { auto database_name_node = std::make_shared(current_storage_id.database_name); - database_name_node->setAlias("_database"); - column_name_to_node.emplace("_database", database_name_node); + auto database_name_alias = std::make_shared("__table1._database"); + + auto function_node = std::make_shared("__actionName"); + function_node->getArguments().getNodes().push_back(std::move(database_name_node)); + function_node->getArguments().getNodes().push_back(std::move(database_name_alias)); + function_node->resolveAsFunction(FunctionFactory::instance().get("__actionName", context)); + + column_name_to_node.emplace("_database", function_node); } auto storage_columns = storage_snapshot_->metadata->getColumns(); @@ -1061,7 +1073,7 @@ QueryPipelineBuilderPtr ReadFromMerge::createSources( String table_column = table_alias.empty() || processed_stage == QueryProcessingStage::FetchColumns ? "_table" : table_alias + "._table"; if (has_database_virtual_column && common_header.has(database_column) - && (storage_stage == QueryProcessingStage::FetchColumns || !pipe_header.has("'" + database_name + "'_String"))) + && storage_stage == QueryProcessingStage::FetchColumns && !pipe_header.has(database_column)) { ColumnWithTypeAndName column; column.name = database_column; @@ -1077,7 +1089,7 @@ QueryPipelineBuilderPtr ReadFromMerge::createSources( } if (has_table_virtual_column && common_header.has(table_column) - && (storage_stage == QueryProcessingStage::FetchColumns || !pipe_header.has("'" + table_name + "'_String"))) + && storage_stage == QueryProcessingStage::FetchColumns && !pipe_header.has(table_column)) { ColumnWithTypeAndName column; column.name = table_column; diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index 788f250e6a9..aad4fc36a1b 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -2042,7 +2042,7 @@ PartitionCommandsResultInfo StorageMergeTree::attachPartition( MergeTreeData::Transaction transaction(*this, local_context->getCurrentTransaction().get()); { auto lock = lockParts(); - fillNewPartName(loaded_parts[i], lock); + fillNewPartNameAndResetLevel(loaded_parts[i], lock); renameTempPartAndAdd(loaded_parts[i], transaction, lock); transaction.commit(&lock); } @@ -2313,11 +2313,12 @@ std::optional StorageMergeTree::checkDataNext(DataValidationTasksPt { /// If the checksums file is not present, calculate the checksums and write them to disk. static constexpr auto checksums_path = "checksums.txt"; + bool noop; if (part->isStoredOnDisk() && !part->getDataPartStorage().exists(checksums_path)) { try { - auto calculated_checksums = checkDataPart(part, false); + auto calculated_checksums = checkDataPart(part, false, noop, /* is_cancelled */[]{ return false; }, /* throw_on_broken_projection */true); calculated_checksums.checkEqual(part->checksums, true); auto & part_mutable = const_cast(*part); @@ -2338,7 +2339,7 @@ std::optional StorageMergeTree::checkDataNext(DataValidationTasksPt { try { - checkDataPart(part, true); + checkDataPart(part, true, noop, /* is_cancelled */[]{ return false; }, /* throw_on_broken_projection */true); return CheckResult(part->name, true, ""); } catch (...) @@ -2481,4 +2482,12 @@ void StorageMergeTree::fillNewPartName(MutableDataPartPtr & part, DataPartsLock part->setName(part->getNewName(part->info)); } +void StorageMergeTree::fillNewPartNameAndResetLevel(MutableDataPartPtr & part, DataPartsLock &) +{ + part->info.min_block = part->info.max_block = increment.get(); + part->info.mutation = 0; + part->info.level = 0; + part->setName(part->getNewName(part->info)); +} + } diff --git a/src/Storages/StorageMergeTree.h b/src/Storages/StorageMergeTree.h index c384a391291..d864b3e626c 100644 --- a/src/Storages/StorageMergeTree.h +++ b/src/Storages/StorageMergeTree.h @@ -260,6 +260,7 @@ private: std::set * mutation_ids = nullptr, bool from_another_mutation = false) const; void fillNewPartName(MutableDataPartPtr & part, DataPartsLock & lock); + void fillNewPartNameAndResetLevel(MutableDataPartPtr & part, DataPartsLock & lock); void startBackgroundMovesIfNeeded() override; diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index ce6735d9176..905473302ba 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -8990,12 +8990,11 @@ IStorage::DataValidationTasksPtr StorageReplicatedMergeTree::getCheckTaskList( std::optional StorageReplicatedMergeTree::checkDataNext(DataValidationTasksPtr & check_task_list) { - if (auto part = assert_cast(check_task_list.get())->next()) { try { - return CheckResult(part_check_thread.checkPartAndFix(part->name)); + return part_check_thread.checkPartAndFix(part->name, /* recheck_after */nullptr, /* throw_on_broken_projection */true); } catch (const Exception & ex) { diff --git a/src/Storages/System/StorageSystemProjectionParts.cpp b/src/Storages/System/StorageSystemProjectionParts.cpp index 4bdcea67313..c44c1fd1ea5 100644 --- a/src/Storages/System/StorageSystemProjectionParts.cpp +++ b/src/Storages/System/StorageSystemProjectionParts.cpp @@ -84,8 +84,11 @@ StorageSystemProjectionParts::StorageSystemProjectionParts(const StorageID & tab {"rows_where_ttl_info.expression", std::make_shared(std::make_shared()), "The TTL expression."}, {"rows_where_ttl_info.min", std::make_shared(std::make_shared()), "The minimum value of the calculated TTL expression within this part. Used to understand whether we have at least one row with expired TTL."}, {"rows_where_ttl_info.max", std::make_shared(std::make_shared()), "The maximum value of the calculated TTL expression within this part. Used to understand whether we have all rows with expired TTL."}, - } - ) + + {"is_broken", std::make_shared(), "Whether projection part is broken"}, + {"exception_code", std::make_shared(), "Exception message explaining broken state of the projection part"}, + {"exception", std::make_shared(), "Exception code explaining broken state of the projection part"}, + }) { } @@ -272,12 +275,38 @@ void StorageSystemProjectionParts::processNextStorage( add_ttl_info_map(part->ttl_infos.moves_ttl); if (columns_mask[src_index++]) - columns[res_index++]->insert(queryToString(part->default_codec->getCodecDesc())); + { + if (part->default_codec) + columns[res_index++]->insert(queryToString(part->default_codec->getCodecDesc())); + else + columns[res_index++]->insertDefault(); + } add_ttl_info_map(part->ttl_infos.recompression_ttl); add_ttl_info_map(part->ttl_infos.group_by_ttl); add_ttl_info_map(part->ttl_infos.rows_where_ttl); + { + if (columns_mask[src_index++]) + columns[res_index++]->insert(part->is_broken.load(std::memory_order_relaxed)); + + if (part->is_broken) + { + std::lock_guard lock(part->broken_reason_mutex); + if (columns_mask[src_index++]) + columns[res_index++]->insert(part->exception_code); + if (columns_mask[src_index++]) + columns[res_index++]->insert(part->exception); + } + else + { + if (columns_mask[src_index++]) + columns[res_index++]->insertDefault(); + if (columns_mask[src_index++]) + columns[res_index++]->insertDefault(); + } + } + /// _state column should be the latest. /// Do not use part->getState*, it can be changed from different thread if (has_state_column) diff --git a/tests/analyzer_tech_debt.txt b/tests/analyzer_tech_debt.txt index 1cffdd5211d..16f153253d6 100644 --- a/tests/analyzer_tech_debt.txt +++ b/tests/analyzer_tech_debt.txt @@ -1,6 +1,4 @@ 00725_memory_tracking -01155_rename_move_materialized_view 01624_soft_constraints 02354_vector_search_queries -# Check after constants refactoring 02901_parallel_replicas_rollup diff --git a/tests/ci/ci.py b/tests/ci/ci.py index 514189a8b8a..a3cea281a56 100644 --- a/tests/ci/ci.py +++ b/tests/ci/ci.py @@ -1033,22 +1033,6 @@ def _print_results(result: Any, outfile: Optional[str], pretty: bool = False) -> raise AssertionError(f"Unexpected type for 'res': {type(result)}") -def _check_and_update_for_early_style_check(jobs_data: dict, docker_data: dict) -> None: - """ - This is temporary hack to start style check before docker build if possible - FIXME: need better solution to do style check as soon as possible and as fast as possible w/o dependency on docker job - """ - jobs_to_do = jobs_data.get("jobs_to_do", []) - docker_to_build = docker_data.get("missing_multi", []) - if ( - JobNames.STYLE_CHECK in jobs_to_do - and docker_to_build - and "clickhouse/style-test" not in docker_to_build - ): - index = jobs_to_do.index(JobNames.STYLE_CHECK) - jobs_to_do[index] = "Style check early" - - def _update_config_for_docs_only(jobs_data: dict) -> None: DOCS_CHECK_JOBS = [JobNames.DOCS_CHECK, JobNames.STYLE_CHECK] print(f"NOTE: Will keep only docs related jobs: [{DOCS_CHECK_JOBS}]") @@ -1306,6 +1290,12 @@ def _configure_jobs( if params["num_batches"] > 1: params["batches"] = list(requested_batches) + if pr_info.is_merge_queue(): + # FIXME: Quick support for MQ workflow which is only StyleCheck for now + jobs_to_do = [JobNames.STYLE_CHECK] + jobs_to_skip = [] + print(f"NOTE: This is Merge Queue CI: set jobs to do: [{jobs_to_do}]") + return { "digests": digests, "jobs_to_do": jobs_to_do, @@ -1752,11 +1742,6 @@ def main() -> int: else {} ) - # # FIXME: Early style check manipulates with job names might be not robust with await feature - # if pr_info.number != 0: - # # FIXME: it runs style check before docker build if possible (style-check images is not changed) - # # find a way to do style check always before docker build and others - # _check_and_update_for_early_style_check(jobs_data, docker_data) if not args.skip_jobs and pr_info.has_changes_in_documentation_only(): _update_config_for_docs_only(jobs_data) diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index 573976f1f84..b4fe6b516fc 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -220,7 +220,7 @@ class JobConfig: digest: DigestConfig = field(default_factory=DigestConfig) # will be triggered for the job if omited in CI workflow yml run_command: str = "" - # job timeout + # job timeout, seconds timeout: Optional[int] = None # sets number of batches for multi-batch job num_batches: int = 1 @@ -517,10 +517,11 @@ clickbench_test_params = { ), "run_command": 'clickbench.py "$CHECK_NAME"', } -install_test_params = { - "digest": install_check_digest, - "run_command": 'install_check.py "$CHECK_NAME"', -} +install_test_params = JobConfig( + digest=install_check_digest, + run_command='install_check.py "$CHECK_NAME"', + timeout=900, +) @dataclass @@ -1105,10 +1106,10 @@ CI_CONFIG = CIConfig( }, test_configs={ JobNames.INSTALL_TEST_AMD: TestConfig( - Build.PACKAGE_RELEASE, job_config=JobConfig(**install_test_params) # type: ignore + Build.PACKAGE_RELEASE, job_config=install_test_params ), JobNames.INSTALL_TEST_ARM: TestConfig( - Build.PACKAGE_AARCH64, job_config=JobConfig(**install_test_params) # type: ignore + Build.PACKAGE_AARCH64, job_config=install_test_params ), JobNames.STATEFUL_TEST_ASAN: TestConfig( Build.PACKAGE_ASAN, job_config=JobConfig(**stateful_test_common_params) # type: ignore diff --git a/tests/ci/ci_utils.py b/tests/ci/ci_utils.py index 2967ec2f309..97d42f9845b 100644 --- a/tests/ci/ci_utils.py +++ b/tests/ci/ci_utils.py @@ -1,5 +1,6 @@ from contextlib import contextmanager import os +import signal from typing import Any, List, Union, Iterator from pathlib import Path @@ -48,3 +49,14 @@ class GHActions: for line in lines: print(line) print("::endgroup::") + + +def set_job_timeout(): + def timeout_handler(_signum, _frame): + print("Timeout expired") + raise TimeoutError("Job's KILL_TIMEOUT expired") + + kill_timeout = int(os.getenv("KILL_TIMEOUT", "0")) + assert kill_timeout > 0, "kill timeout must be provided in KILL_TIMEOUT env" + signal.signal(signal.SIGALRM, timeout_handler) + signal.alarm(kill_timeout) diff --git a/tests/ci/install_check.py b/tests/ci/install_check.py index 4fc112c6d9f..71e2198f533 100644 --- a/tests/ci/install_check.py +++ b/tests/ci/install_check.py @@ -14,10 +14,11 @@ from build_download_helper import download_builds_filter from compress_files import compress_fast from docker_images_helper import DockerImage, pull_image, get_docker_image -from env_helper import REPORT_PATH, TEMP_PATH as TEMP +from env_helper import CI, REPORT_PATH, TEMP_PATH as TEMP from report import JobReport, TestResults, TestResult, FAILURE, FAIL, OK, SUCCESS from stopwatch import Stopwatch from tee_popen import TeePopen +from ci_utils import set_job_timeout RPM_IMAGE = "clickhouse/install-rpm-test" @@ -255,6 +256,9 @@ def main(): args = parse_args() + if CI: + set_job_timeout() + TEMP_PATH.mkdir(parents=True, exist_ok=True) LOGS_PATH.mkdir(parents=True, exist_ok=True) diff --git a/tests/ci/pr_info.py b/tests/ci/pr_info.py index 84f2db4002d..ddf59c49e1f 100644 --- a/tests/ci/pr_info.py +++ b/tests/ci/pr_info.py @@ -215,6 +215,7 @@ class PRInfo: .replace("{base}", base_sha) .replace("{head}", self.sha) ) + self.commit_html_url = f"{repo_prefix}/commits/{self.sha}" elif "commits" in github_event: self.event_type = EventType.PUSH diff --git a/tests/integration/test_broken_projections/__init__.py b/tests/integration/test_broken_projections/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_broken_projections/config.d/backups.xml b/tests/integration/test_broken_projections/config.d/backups.xml new file mode 100644 index 00000000000..4da8edffd67 --- /dev/null +++ b/tests/integration/test_broken_projections/config.d/backups.xml @@ -0,0 +1,13 @@ + + + + + local + /var/lib/clickhouse/disks/backups/ + + + + + backups + + diff --git a/tests/integration/test_broken_projections/test.py b/tests/integration/test_broken_projections/test.py new file mode 100644 index 00000000000..4a4690a5d0a --- /dev/null +++ b/tests/integration/test_broken_projections/test.py @@ -0,0 +1,576 @@ +import time +import pytest +import logging +import string +import random +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) + + +@pytest.fixture(scope="module") +def cluster(): + try: + cluster = ClickHouseCluster(__file__) + cluster.add_instance( + "node", + main_configs=["config.d/backups.xml"], + stay_alive=True, + with_zookeeper=True, + ) + + logging.info("Starting cluster...") + cluster.start() + logging.info("Cluster started") + + yield cluster + finally: + cluster.shutdown() + + +def create_table(node, table, replica, data_prefix="", aggressive_merge=True): + if data_prefix == "": + data_prefix = table + + if aggressive_merge: + vertical_merge_algorithm_min_rows_to_activate = 1 + vertical_merge_algorithm_min_columns_to_activate = 1 + max_parts_to_merge_at_once = 3 + else: + vertical_merge_algorithm_min_rows_to_activate = 100000 + vertical_merge_algorithm_min_columns_to_activate = 100 + max_parts_to_merge_at_once = 3 + + node.query( + f""" + DROP TABLE IF EXISTS {table} SYNC; + CREATE TABLE {table} + ( + a String, + b String, + c Int64, + d Int64, + e Int64, + PROJECTION proj1 + ( + SELECT c ORDER BY d + ), + PROJECTION proj2 + ( + SELECT d ORDER BY c + ) + ) + ENGINE = ReplicatedMergeTree('/test_broken_projection_{data_prefix}/data/', '{replica}') ORDER BY a + SETTINGS min_bytes_for_wide_part = 0, + max_parts_to_merge_at_once={max_parts_to_merge_at_once}, + enable_vertical_merge_algorithm=0, + vertical_merge_algorithm_min_rows_to_activate = {vertical_merge_algorithm_min_rows_to_activate}, + vertical_merge_algorithm_min_columns_to_activate = {vertical_merge_algorithm_min_columns_to_activate}, + compress_primary_key=0; + """ + ) + + +def insert(node, table, offset, size): + node.query( + f""" + INSERT INTO {table} + SELECT number, number, number, number, number%2 FROM numbers({offset}, {size}) + SETTINGS insert_keeper_fault_injection_probability=0.0; + """ + ) + + +def get_parts(node, table): + return ( + node.query( + f""" + SELECT name + FROM system.parts + WHERE table='{table}' AND database=currentDatabase() AND active = 1 + ORDER BY name;" + """ + ) + .strip() + .split("\n") + ) + + +def bash(node, command): + node.exec_in_container(["bash", "-c", command], privileged=True, user="root") + + +def break_projection(node, table, part, parent_part, break_type): + part_path = node.query( + f""" + SELECT path + FROM system.projection_parts + WHERE table='{table}' + AND database=currentDatabase() + AND active=1 + AND part_name='{part}' + AND parent_name='{parent_part}' + ORDER BY modification_time DESC + LIMIT 1; + """ + ).strip() + + node.query( + f"select throwIf(substring('{part_path}', 1, 1) != '/', 'Path is relative: {part_path}')" + ) + + if break_type == "data": + bash(node, f"rm '{part_path}/d.bin'") + bash(node, f"rm '{part_path}/c.bin'") + elif break_type == "metadata": + bash(node, f"rm '{part_path}/columns.txt'") + elif break_type == "part": + bash(node, f"rm -r '{part_path}'") + + +def break_part(node, table, part): + part_path = node.query( + f""" + SELECT path + FROM system.parts + WHERE table='{table}' + AND database=currentDatabase() + AND active=1 + AND part_name='{part}' + ORDER BY modification_time DESC + LIMIT 1; + """ + ).strip() + + node.query( + f"select throwIf(substring('{part_path}', 1, 1) != '/', 'Path is relative: {part_path}')" + ) + bash(node, f"rm '{part_path}/columns.txt'") + + +def get_broken_projections_info(node, table): + return node.query( + f""" + SELECT parent_name, name, errors.name FROM + ( + SELECT parent_name, name, exception_code + FROM system.projection_parts + WHERE table='{table}' + AND database=currentDatabase() + AND is_broken = 1 + ) AS parts_info + INNER JOIN system.errors AS errors + ON parts_info.exception_code = errors.code + ORDER BY parent_name, name + """ + ).strip() + + +def get_projections_info(node, table): + return node.query( + f""" + SELECT parent_name, name, is_broken + FROM system.projection_parts + WHERE table='{table}' + AND active = 1 + AND database=currentDatabase() + ORDER BY parent_name, name + """ + ).strip() + + +def optimize(node, table, final, no_wait): + query = f"OPTIMIZE TABLE {table}" + if final: + query += " FINAL" + if no_wait: + query += " SETTINGS alter_sync=0" + node.query(query) + + +def reattach(node, table): + node.query( + f""" + DETACH TABLE {table}; + ATTACH TABLE {table}; + """ + ) + + +def materialize_projection(node, table, proj): + node.query( + f"ALTER TABLE {table} MATERIALIZE PROJECTION {proj} SETTINGS mutations_sync=2" + ) + + +def check_table_full(node, table): + return node.query( + f"CHECK TABLE {table} SETTINGS check_query_single_value_result = 0;" + ).strip() + + +def random_str(length=6): + alphabet = string.ascii_lowercase + string.digits + return "".join(random.SystemRandom().choice(alphabet) for _ in range(length)) + + +def check(node, table, check_result, expect_broken_part="", expected_error=""): + if expect_broken_part == "proj1": + assert expected_error in node.query_and_get_error( + f"SELECT c FROM '{table}' WHERE d == 12 ORDER BY c" + ) + else: + query_id = node.query( + f"SELECT queryID() FROM (SELECT c FROM '{table}' WHERE d == 12 ORDER BY c)" + ).strip() + node.query("SYSTEM FLUSH LOGS") + res = node.query( + f""" + SELECT query, splitByChar('.', arrayJoin(projections))[-1] + FROM system.query_log + WHERE query_id='{query_id}' AND type='QueryFinish' + """ + ) + if res == "": + res = node.query( + """ + SELECT query_id, query, splitByChar('.', arrayJoin(projections))[-1] + FROM system.query_log ORDER BY query_start_time_microseconds DESC + """ + ) + print(f"LOG: {res}") + assert False + assert "proj1" in res + + if expect_broken_part == "proj2": + assert expected_error in node.query_and_get_error( + f"SELECT d FROM '{table}' WHERE c == 12 ORDER BY d" + ) + else: + query_id = node.query( + f"SELECT queryID() FROM (SELECT d FROM '{table}' WHERE c == 12 ORDER BY d)" + ).strip() + node.query("SYSTEM FLUSH LOGS") + res = node.query( + f""" + SELECT query, splitByChar('.', arrayJoin(projections))[-1] + FROM system.query_log + WHERE query_id='{query_id}' AND type='QueryFinish' + """ + ) + if res == "": + res = node.query( + """ + SELECT query_id, query, splitByChar('.', arrayJoin(projections))[-1] + FROM system.query_log ORDER BY query_start_time_microseconds DESC + """ + ) + print(f"LOG: {res}") + assert False + assert "proj2" in res + + assert check_result == int(node.query(f"CHECK TABLE {table}")) + + +def test_broken_ignored(cluster): + node = cluster.instances["node"] + + table_name = "test1" + create_table(node, table_name, 1) + + insert(node, table_name, 0, 5) + insert(node, table_name, 5, 5) + insert(node, table_name, 10, 5) + insert(node, table_name, 15, 5) + + assert ["all_0_0_0", "all_1_1_0", "all_2_2_0", "all_3_3_0"] == get_parts( + node, table_name + ) + + # Break metadata (columns.txt) file of projection 'proj1' + break_projection(node, table_name, "proj1", "all_2_2_0", "metadata") + + # Do select and after "check table" query. + # Select works because it does not read columns.txt. + # But expect check table result as 0. + check(node, table_name, 0) + + # Projection 'proj1' from part all_2_2_0 will now appear in broken parts info + # because it was marked broken during "check table" query. + assert "all_2_2_0\tproj1\tFILE_DOESNT_EXIST" in get_broken_projections_info( + node, table_name + ) + + # Check table query will also show a list of parts which have broken projections. + assert "all_2_2_0" in check_table_full(node, table_name) + + # Break data file of projection 'proj2' for part all_2_2_0 + break_projection(node, table_name, "proj2", "all_2_2_0", "data") + + # It will not yet appear in broken projections info. + assert "proj2" not in get_broken_projections_info(node, table_name) + + # Select now fails with error "File doesn't exist" + check(node, table_name, 0, "proj2", "FILE_DOESNT_EXIST") + + # Projection 'proj2' from part all_2_2_0 will now appear in broken parts info. + assert "all_2_2_0\tproj2\tNO_FILE_IN_DATA_PART" in get_broken_projections_info( + node, table_name + ) + + # Second select works, because projection is now marked as broken. + check(node, table_name, 0) + + # Break data file of projection 'proj2' for part all_3_3_0 + break_projection(node, table_name, "proj2", "all_3_3_0", "data") + + # It will not yet appear in broken projections info. + assert "all_3_3_0" not in get_broken_projections_info(node, table_name) + + insert(node, table_name, 20, 5) + insert(node, table_name, 25, 5) + + # Part all_3_3_0 has 'proj' and 'proj2' projections, but 'proj2' is broken and server does NOT know it yet. + # Parts all_4_4_0 and all_5_5_0 have both non-broken projections. + # So a merge will be create for future part all_3_5_1. + # During merge it will fail to read from 'proj2' of part all_3_3_0 and proj2 will be marked broken. + # Merge will be retried and on second attempt it will succeed. + # The result part all_3_5_1 will have only 1 projection - 'proj', because + # it will skip 'proj2' as it will see that one part does not have it anymore in the set of valid projections. + optimize(node, table_name, 0, 1) + time.sleep(5) + + # table_uuid=node.query(f"SELECT uuid FROM system.tables WHERE table='{table_name}' and database=currentDatabase()").strip() + # assert 0 < int( + # node.query( + # f""" + # SYSTEM FLUSH LOGS; + # SELECT count() FROM system.text_log + # WHERE level='Error' + # AND logger_name='MergeTreeBackgroundExecutor' + # AND message like 'Exception while executing background task %{table_uuid}:all_3_5_1%%Cannot open file%proj2.proj/c.bin%' + # """) + # ) + + assert "all_3_3_0" in get_broken_projections_info(node, table_name) + check(node, table_name, 0) + + +def test_materialize_broken_projection(cluster): + node = cluster.instances["node"] + + table_name = "test2" + create_table(node, table_name, 1) + + insert(node, table_name, 0, 5) + insert(node, table_name, 5, 5) + insert(node, table_name, 10, 5) + insert(node, table_name, 15, 5) + + assert ["all_0_0_0", "all_1_1_0", "all_2_2_0", "all_3_3_0"] == get_parts( + node, table_name + ) + + break_projection(node, table_name, "proj1", "all_1_1_0", "metadata") + reattach(node, table_name) + + assert "all_1_1_0\tproj1\tNO_FILE_IN_DATA_PART" in get_broken_projections_info( + node, table_name + ) + assert "Part all_1_1_0 has a broken projection proj1" in check_table_full( + node, table_name + ) + + break_projection(node, table_name, "proj2", "all_1_1_0", "data") + reattach(node, table_name) + + assert "all_1_1_0\tproj2\tFILE_DOESNT_EXIST" in get_broken_projections_info( + node, table_name + ) + assert "Part all_1_1_0 has a broken projection proj2" in check_table_full( + node, table_name + ) + + materialize_projection(node, table_name, "proj1") + + assert "has a broken projection" not in check_table_full(node, table_name) + + +def test_broken_ignored_replicated(cluster): + node = cluster.instances["node"] + + table_name = "test3" + table_name2 = "test3_replica" + create_table(node, table_name, 1) + + insert(node, table_name, 0, 5) + insert(node, table_name, 5, 5) + insert(node, table_name, 10, 5) + insert(node, table_name, 15, 5) + + check(node, table_name, 1) + + create_table(node, table_name2, 2, table_name) + check(node, table_name2, 1) + + break_projection(node, table_name, "proj1", "all_0_0_0", "data") + assert "Part all_0_0_0 has a broken projection proj1" in check_table_full( + node, table_name + ) + + break_part(node, table_name, "all_0_0_0") + node.query(f"SYSTEM SYNC REPLICA {table_name}") + assert "has a broken projection" not in check_table_full(node, table_name) + + +def get_random_string(string_length=8): + alphabet = string.ascii_letters + string.digits + return "".join((random.choice(alphabet) for _ in range(string_length))) + + +def test_broken_projections_in_backups_1(cluster): + node = cluster.instances["node"] + + table_name = "test4" + create_table(node, table_name, 1, aggressive_merge=False, data_prefix=table_name) + + node.query("SYSTEM STOP MERGES") + + insert(node, table_name, 0, 5) + insert(node, table_name, 5, 5) + insert(node, table_name, 10, 5) + insert(node, table_name, 15, 5) + + assert ["all_0_0_0", "all_1_1_0", "all_2_2_0", "all_3_3_0"] == get_parts( + node, table_name + ) + + check(node, table_name, 1) + + break_projection(node, table_name, "proj1", "all_2_2_0", "data") + check(node, table_name, 0, "proj1", "FILE_DOESNT_EXIST") + + assert "all_2_2_0\tproj1\tNO_FILE_IN_DATA_PART" in get_broken_projections_info( + node, table_name + ) + + backup_name = f"b1-{get_random_string()}" + assert "BACKUP_CREATED" in node.query( + f""" + set backup_restore_keeper_fault_injection_probability=0.0; + backup table {table_name} to Disk('backups', '{backup_name}') settings check_projection_parts=false; + """ + ) + + assert "RESTORED" in node.query( + f""" + drop table {table_name} sync; + set backup_restore_keeper_fault_injection_probability=0.0; + restore table {table_name} from Disk('backups', '{backup_name}'); + """ + ) + + node.query("SYSTEM STOP MERGES") + + check(node, table_name, 1) + assert "" == get_broken_projections_info(node, table_name) + + +def test_broken_projections_in_backups_2(cluster): + node = cluster.instances["node"] + + table_name = "test5" + create_table(node, table_name, 1, aggressive_merge=False, data_prefix=table_name) + + insert(node, table_name, 0, 5) + insert(node, table_name, 5, 5) + insert(node, table_name, 10, 5) + insert(node, table_name, 15, 5) + + assert ["all_0_0_0", "all_1_1_0", "all_2_2_0", "all_3_3_0"] == get_parts( + node, table_name + ) + + check(node, table_name, 1) + break_projection(node, table_name, "proj2", "all_2_2_0", "part") + check(node, table_name, 0, "proj2", "ErrnoException") + + assert "all_2_2_0\tproj2\tFILE_DOESNT_EXIST" == get_broken_projections_info( + node, table_name + ) + + assert "FILE_DOESNT_EXIST" in node.query_and_get_error( + f""" + set backup_restore_keeper_fault_injection_probability=0.0; + backup table {table_name} to Disk('backups', 'b2') + """ + ) + + materialize_projection(node, table_name, "proj2") + check(node, table_name, 1) + + backup_name = f"b3-{get_random_string()}" + assert "BACKUP_CREATED" in node.query( + f""" + set backup_restore_keeper_fault_injection_probability=0.0; + backup table {table_name} to Disk('backups', '{backup_name}') settings check_projection_parts=false; + """ + ) + + assert "RESTORED" in node.query( + f""" + drop table {table_name} sync; + set backup_restore_keeper_fault_injection_probability=0.0; + restore table {table_name} from Disk('backups', '{backup_name}'); + """ + ) + check(node, table_name, 1) + + +def test_broken_projections_in_backups_3(cluster): + node = cluster.instances["node"] + + table_name = "test6" + create_table(node, table_name, 1, aggressive_merge=False, data_prefix=table_name) + + node.query("SYSTEM STOP MERGES") + + insert(node, table_name, 0, 5) + insert(node, table_name, 5, 5) + insert(node, table_name, 10, 5) + insert(node, table_name, 15, 5) + + assert ["all_0_0_0", "all_1_1_0", "all_2_2_0", "all_3_3_0"] == get_parts( + node, table_name + ) + + check(node, table_name, 1) + + break_projection(node, table_name, "proj1", "all_1_1_0", "part") + assert "Part all_1_1_0 has a broken projection proj1" in check_table_full( + node, table_name + ) + assert "all_1_1_0\tproj1\tFILE_DOESNT_EXIST" == get_broken_projections_info( + node, table_name + ) + + backup_name = f"b4-{get_random_string()}" + assert "BACKUP_CREATED" in node.query( + f""" + set backup_restore_keeper_fault_injection_probability=0.0; + backup table {table_name} to Disk('backups', '{backup_name}') settings check_projection_parts=false, allow_backup_broken_projections=true; + """ + ) + + assert "RESTORED" in node.query( + f""" + drop table {table_name} sync; + set backup_restore_keeper_fault_injection_probability=0.0; + restore table {table_name} from Disk('backups', '{backup_name}'); + """ + ) + + check(node, table_name, 0) + assert "all_1_1_0\tproj1\tNO_FILE_IN_DATA_PART" == get_broken_projections_info( + node, table_name + ) diff --git a/tests/integration/test_grpc_protocol/test.py b/tests/integration/test_grpc_protocol/test.py index 385523d1117..fc115e5a21f 100644 --- a/tests/integration/test_grpc_protocol/test.py +++ b/tests/integration/test_grpc_protocol/test.py @@ -5,7 +5,7 @@ import time import pytz import uuid import grpc -from helpers.cluster import ClickHouseCluster, run_and_check +from helpers.cluster import ClickHouseCluster, is_arm, run_and_check from threading import Thread import gzip import lz4.frame @@ -20,6 +20,10 @@ import clickhouse_grpc_pb2, clickhouse_grpc_pb2_grpc # Execute pb2/generate.py GRPC_PORT = 9100 DEFAULT_ENCODING = "utf-8" +# GRPC is disabled on ARM build - skip tests +if is_arm(): + pytestmark = pytest.mark.skip + # Utilities diff --git a/tests/integration/test_mysql57_database_engine/test.py b/tests/integration/test_mysql57_database_engine/test.py index e07cf443fd8..410d277d667 100644 --- a/tests/integration/test_mysql57_database_engine/test.py +++ b/tests/integration/test_mysql57_database_engine/test.py @@ -5,9 +5,13 @@ from string import Template import pymysql.cursors import pytest from helpers.client import QueryRuntimeException -from helpers.cluster import ClickHouseCluster +from helpers.cluster import ClickHouseCluster, is_arm from helpers.network import PartitionManager + +if is_arm(): + pytestmark = pytest.mark.skip + cluster = ClickHouseCluster(__file__) clickhouse_node = cluster.add_instance( "node1", diff --git a/tests/integration/test_transactions/test.py b/tests/integration/test_transactions/test.py index 46660581223..d63b7b6f545 100644 --- a/tests/integration/test_transactions/test.py +++ b/tests/integration/test_transactions/test.py @@ -67,8 +67,8 @@ def test_rollback_unfinished_on_restart1(start_cluster): tx(1, "insert into mt values (5, 50)") tx(1, "alter table mt update m = m+n in partition id '1' where 1") - # check that uncommitted insert will be rolled back on restart - tx(3, "begin transaction") + # check that uncommitted insert will be rolled back on restart (using `START TRANSACTION` syntax) + tx(3, "start transaction") tid5 = tx(3, "select transactionID()").strip() tx(3, "insert into mt values (6, 70)") diff --git a/tests/queries/0_stateless/00918_json_functions.reference b/tests/queries/0_stateless/00918_json_functions.reference index 43b15ded93d..7b725111755 100644 --- a/tests/queries/0_stateless/00918_json_functions.reference +++ b/tests/queries/0_stateless/00918_json_functions.reference @@ -69,6 +69,7 @@ hello (3333.6,'test') (3333.6333333333,'test') (3333.6333333333,'test') +\N 123456.1234 Decimal(20, 4) 123456.1234 Decimal(20, 4) 123456789012345.12 Decimal(30, 4) diff --git a/tests/queries/0_stateless/00918_json_functions.sql b/tests/queries/0_stateless/00918_json_functions.sql index e19dd17670e..61fcb21fcbd 100644 --- a/tests/queries/0_stateless/00918_json_functions.sql +++ b/tests/queries/0_stateless/00918_json_functions.sql @@ -81,6 +81,7 @@ SELECT JSONExtract('{"a":3333.6333333333333333333333, "b":"test"}', 'Tuple(a Dec SELECT JSONExtract('{"a":"3333.6333333333333333333333", "b":"test"}', 'Tuple(a Decimal(10,1), b LowCardinality(String))'); SELECT JSONExtract('{"a":3333.6333333333333333333333, "b":"test"}', 'Tuple(a Decimal(20,10), b LowCardinality(String))'); SELECT JSONExtract('{"a":"3333.6333333333333333333333", "b":"test"}', 'Tuple(a Decimal(20,10), b LowCardinality(String))'); +SELECT JSONExtract(materialize('{"string_value":null}'), materialize('string_value'), 'LowCardinality(Nullable(String))'); SELECT JSONExtract('{"a":123456.123456}', 'a', 'Decimal(20, 4)') as a, toTypeName(a); SELECT JSONExtract('{"a":"123456.123456"}', 'a', 'Decimal(20, 4)') as a, toTypeName(a); SELECT JSONExtract('{"a":"123456789012345.12"}', 'a', 'Decimal(30, 4)') as a, toTypeName(a); @@ -326,3 +327,4 @@ SELECT JSONExtract('[]', JSONExtract('0', 'UInt256'), 'UInt256'); -- { serverErr SELECT '--show error: key of map type should be String'; SELECT JSONExtract('{"a": [100.0, 200], "b": [-100, 200.0, 300]}', 'Map(Int64, Array(Float64))'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT JSONExtract(materialize(toLowCardinality('{"string_value":null}')), materialize('string_value'), 'LowCardinality(Nullable(String))'); -- {serverError ILLEGAL_TYPE_OF_ARGUMENT } diff --git a/tests/queries/0_stateless/01043_geo_distance.reference b/tests/queries/0_stateless/01043_geo_distance.reference index cd8a8a6dfe9..33e1285872e 100644 --- a/tests/queries/0_stateless/01043_geo_distance.reference +++ b/tests/queries/0_stateless/01043_geo_distance.reference @@ -6,3 +6,11 @@ 10007555 10007554 10001780 +111195.05197522942 +111195.05197522942 +110567.32686882635 +111699.2516454354 +10007554.677770648 +10007554.677770648 +10007554.677770648 +10001780.1 diff --git a/tests/queries/0_stateless/01043_geo_distance.sql b/tests/queries/0_stateless/01043_geo_distance.sql index c1fb29b9eb7..cf877d05b9f 100644 --- a/tests/queries/0_stateless/01043_geo_distance.sql +++ b/tests/queries/0_stateless/01043_geo_distance.sql @@ -1,3 +1,19 @@ +SET geo_distance_returns_float64_on_float64_arguments = 0; + +SELECT greatCircleDistance(0., 0., 0., 1.); +SELECT greatCircleDistance(0., 89., 0, 90.); + +SELECT geoDistance(0., 0., 0., 1.); +SELECT geoDistance(0., 89., 0., 90.); + +SELECT greatCircleDistance(0., 0., 90., 0.); +SELECT greatCircleDistance(0., 0., 0., 90.); + +SELECT geoDistance(0., 0., 90., 0.); +SELECT geoDistance(0., 0., 0., 90.); + +SET geo_distance_returns_float64_on_float64_arguments = 1; + SELECT greatCircleDistance(0., 0., 0., 1.); SELECT greatCircleDistance(0., 89., 0, 90.); diff --git a/tests/queries/0_stateless/01678_great_circle_angle.reference b/tests/queries/0_stateless/01678_great_circle_angle.reference index f3382476d4a..a409e1d84b8 100644 --- a/tests/queries/0_stateless/01678_great_circle_angle.reference +++ b/tests/queries/0_stateless/01678_great_circle_angle.reference @@ -3,3 +3,8 @@ 0.7135 10007555 10007554 +0.1224 +0.7071 +0.7135 +10007555 +10007554 diff --git a/tests/queries/0_stateless/01678_great_circle_angle.sql b/tests/queries/0_stateless/01678_great_circle_angle.sql index 124c7bfadf2..595622822f1 100644 --- a/tests/queries/0_stateless/01678_great_circle_angle.sql +++ b/tests/queries/0_stateless/01678_great_circle_angle.sql @@ -1,3 +1,14 @@ +SET geo_distance_returns_float64_on_float64_arguments = 0; + +SELECT round(greatCircleAngle(0, 45, 0.1, 45.1), 4); +SELECT round(greatCircleAngle(0, 45, 1, 45), 4); +SELECT round(greatCircleAngle(0, 45, 1, 45.1), 4); + +SELECT round(greatCircleDistance(0, 0, 0, 90), 4); +SELECT round(greatCircleDistance(0, 0, 90, 0), 4); + +SET geo_distance_returns_float64_on_float64_arguments = 1; + SELECT round(greatCircleAngle(0, 45, 0.1, 45.1), 4); SELECT round(greatCircleAngle(0, 45, 1, 45), 4); SELECT round(greatCircleAngle(0, 45, 1, 45.1), 4); diff --git a/tests/queries/0_stateless/02117_show_create_table_system.reference b/tests/queries/0_stateless/02117_show_create_table_system.reference index 5081527ceef..c9638e62655 100644 --- a/tests/queries/0_stateless/02117_show_create_table_system.reference +++ b/tests/queries/0_stateless/02117_show_create_table_system.reference @@ -688,6 +688,9 @@ CREATE TABLE system.projection_parts `rows_where_ttl_info.expression` Array(String), `rows_where_ttl_info.min` Array(DateTime), `rows_where_ttl_info.max` Array(DateTime), + `is_broken` UInt8, + `exception_code` Int32, + `exception` String, `bytes` UInt64 ALIAS bytes_on_disk, `marks_size` UInt64 ALIAS marks_bytes, `part_name` String ALIAS name diff --git a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference index 8b85ac48c16..3ddf165dec0 100644 --- a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference +++ b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference @@ -57,6 +57,7 @@ URLPathHierarchy UUIDNumToString UUIDStringToNum _CAST +__actionName __bitBoolMaskAnd __bitBoolMaskOr __bitSwapLastTwo diff --git a/tests/queries/0_stateless/02474_extract_fixedstring_from_json.reference b/tests/queries/0_stateless/02474_extract_fixedstring_from_json.reference index 783d12fcf1a..21ddf5d3512 100644 --- a/tests/queries/0_stateless/02474_extract_fixedstring_from_json.reference +++ b/tests/queries/0_stateless/02474_extract_fixedstring_from_json.reference @@ -8,3 +8,10 @@ \0\0\0\0\0 131231 131231 +1234 +1234 +{"b":131231} +\0\0\0\0 +1234567890 +18446744073709551615 +-9223372036854775807 diff --git a/tests/queries/0_stateless/02474_extract_fixedstring_from_json.sql b/tests/queries/0_stateless/02474_extract_fixedstring_from_json.sql index cfc47e00cba..bbb9f55062b 100644 --- a/tests/queries/0_stateless/02474_extract_fixedstring_from_json.sql +++ b/tests/queries/0_stateless/02474_extract_fixedstring_from_json.sql @@ -6,3 +6,10 @@ SELECT JSONExtract('{"a": 123456}', 'a', 'FixedString(5)'); SELECT JSONExtract('{"a": 123456}', 'a', 'FixedString(6)'); SELECT JSONExtract(materialize('{"a": 131231}'), 'a', 'LowCardinality(FixedString(5))') FROM numbers(2); SELECT JSONExtract(materialize('{"a": 131231}'), 'a', 'LowCardinality(FixedString(6))') FROM numbers(2); +SELECT JSONExtract(materialize('{"a": 131231, "b": 1234}'), 'b', 'LowCardinality(FixedString(4))'); +SELECT JSONExtract(materialize('{"a": 131231, "b": "1234"}'), 'b', 'LowCardinality(FixedString(4))'); +SELECT JSONExtract(materialize('{"a": {"b": 131231} }'), 'a', 'LowCardinality(FixedString(12))'); +SELECT JSONExtract(materialize('{"a": 131231, "b": 1234567890}'), 'b', 'LowCardinality(FixedString(4))'); +SELECT JSONExtract(materialize('{"a": 131231, "b": 1234567890}'), 'b', 'LowCardinality(FixedString(10))'); +SELECT JSONExtract(materialize('{"a": 18446744073709551615}'), 'a', 'LowCardinality(FixedString(20))'); +SELECT JSONExtract(materialize('{"a": -9223372036854775807}'), 'a', 'LowCardinality(FixedString(20))'); diff --git a/tests/queries/0_stateless/02722_matcher_join_use_nulls.reference b/tests/queries/0_stateless/02722_matcher_join_use_nulls.reference index 746d02dc381..5617854bfd1 100644 --- a/tests/queries/0_stateless/02722_matcher_join_use_nulls.reference +++ b/tests/queries/0_stateless/02722_matcher_join_use_nulls.reference @@ -72,7 +72,7 @@ FROM ( LEFT JOIN (SELECT 2 :: UInt32 as a) t2 USING (a) ) ORDER BY 1; -1 Int32 +1 Int64 SELECT *, * APPLY toTypeName FROM ( SELECT t2.* @@ -80,7 +80,7 @@ FROM ( LEFT JOIN (SELECT 2 :: UInt32 as a) t2 USING (a) ) ORDER BY 1; -\N Nullable(UInt32) +\N Nullable(Int64) SELECT *, * APPLY toTypeName FROM ( SELECT * @@ -209,7 +209,7 @@ FROM ( RIGHT JOIN (SELECT 2 :: UInt32 as a) t2 USING (a) ) ORDER BY 1; -2 Nullable(Int32) +2 Nullable(Int64) SELECT *, * APPLY toTypeName FROM ( SELECT t2.* @@ -217,7 +217,7 @@ FROM ( RIGHT JOIN (SELECT 2 :: UInt32 as a) t2 USING (a) ) ORDER BY 1; -2 UInt32 +2 Int64 SELECT *, * APPLY toTypeName FROM ( SELECT * @@ -354,8 +354,8 @@ FROM ( FULL JOIN (SELECT 2 :: UInt32 as a) t2 USING (a) ) ORDER BY 1; -1 Nullable(Int32) -2 Nullable(Int32) +1 Nullable(Int64) +2 Nullable(Int64) SELECT *, * APPLY toTypeName FROM ( SELECT t2.* @@ -363,8 +363,8 @@ FROM ( FULL JOIN (SELECT 2 :: UInt32 as a) t2 USING (a) ) ORDER BY 1; -2 Nullable(UInt32) -\N Nullable(UInt32) +2 Nullable(Int64) +\N Nullable(Int64) SELECT *, * APPLY toTypeName FROM ( SELECT * diff --git a/tests/queries/0_stateless/02884_create_view_with_sql_security_option.reference b/tests/queries/0_stateless/02884_create_view_with_sql_security_option.reference index 79728fadc04..6d9d1f07ec2 100644 --- a/tests/queries/0_stateless/02884_create_view_with_sql_security_option.reference +++ b/tests/queries/0_stateless/02884_create_view_with_sql_security_option.reference @@ -12,8 +12,10 @@ OK 2 2 OK +1 ===== MaterializedView ===== OK +1 0 0 OK diff --git a/tests/queries/0_stateless/02884_create_view_with_sql_security_option.sh b/tests/queries/0_stateless/02884_create_view_with_sql_security_option.sh index a4ab3ed0024..bead7db8450 100755 --- a/tests/queries/0_stateless/02884_create_view_with_sql_security_option.sh +++ b/tests/queries/0_stateless/02884_create_view_with_sql_security_option.sh @@ -1,18 +1,17 @@ #!/usr/bin/env bash +# Tags: no-replicated-database CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -user1="user02884_1_$RANDOM$RANDOM" -user2="user02884_2_$RANDOM$RANDOM" -user3="user02884_3_$RANDOM$RANDOM" -db="db02884_$RANDOM$RANDOM" +user1="user02884_1_${CLICKHOUSE_DATABASE}_$RANDOM" +user2="user02884_2_${CLICKHOUSE_DATABASE}_$RANDOM" +user3="user02884_3_${CLICKHOUSE_DATABASE}_$RANDOM" +db=${CLICKHOUSE_DATABASE} ${CLICKHOUSE_CLIENT} --multiquery <&1 | grep -c "Not enough privileges") >= 1 )) && echo "OK" || echo "UNEXPECTED" +${CLICKHOUSE_CLIENT} --query "SHOW CREATE TABLE $db.test_view_10" | grep -c "SQL SECURITY INVOKER" echo "===== MaterializedView =====" @@ -136,6 +136,7 @@ ${CLICKHOUSE_CLIENT} --query "GRANT SELECT ON $db.test_mv_5 TO $user2" ${CLICKHOUSE_CLIENT} --query "ALTER TABLE $db.test_mv_5 MODIFY SQL SECURITY NONE" ${CLICKHOUSE_CLIENT} --user $user2 --query "SELECT * FROM $db.test_mv_5" +${CLICKHOUSE_CLIENT} --query "SHOW CREATE TABLE $db.test_mv_5" | grep -c "SQL SECURITY NONE" ${CLICKHOUSE_CLIENT} --query "GRANT SELECT ON $db.test_mv_1 TO $user2" ${CLICKHOUSE_CLIENT} --query "GRANT SELECT ON $db.test_mv_3 TO $user2" @@ -221,6 +222,4 @@ EOF ${CLICKHOUSE_CLIENT} --user $user2 --query "SELECT * FROM $db.test_mv_row_2" - -${CLICKHOUSE_CLIENT} --query "DROP DATABASE IF EXISTS $db;" ${CLICKHOUSE_CLIENT} --query "DROP USER IF EXISTS $user1, $user2, $user3"; diff --git a/tests/queries/0_stateless/02916_move_partition_inactive_replica.reference b/tests/queries/0_stateless/02916_move_partition_inactive_replica.reference new file mode 100644 index 00000000000..361a0d1e9bb --- /dev/null +++ b/tests/queries/0_stateless/02916_move_partition_inactive_replica.reference @@ -0,0 +1,4 @@ +all_0_1_1 1 +all_2_3_1 1 +0 +40 1580 diff --git a/tests/queries/0_stateless/02916_move_partition_inactive_replica.sql b/tests/queries/0_stateless/02916_move_partition_inactive_replica.sql new file mode 100644 index 00000000000..ca153eea221 --- /dev/null +++ b/tests/queries/0_stateless/02916_move_partition_inactive_replica.sql @@ -0,0 +1,47 @@ +-- Tags: no-parallel + +create database if not exists shard_0; +create database if not exists shard_1; + +drop table if exists shard_0.from_1; +drop table if exists shard_1.from_1; +drop table if exists shard_0.to; +drop table if exists shard_1.to; + +create table shard_0.from_1 (x UInt32) engine = ReplicatedMergeTree('/clickhouse/tables/from_1_' || currentDatabase(), '0') order by x settings old_parts_lifetime=1, max_cleanup_delay_period=1, cleanup_delay_period=1; +create table shard_1.from_1 (x UInt32) engine = ReplicatedMergeTree('/clickhouse/tables/from_1_' || currentDatabase(), '1') order by x settings old_parts_lifetime=1, max_cleanup_delay_period=1, cleanup_delay_period=1; + +system stop merges shard_0.from_1; +system stop merges shard_1.from_1; +insert into shard_0.from_1 select number + 20 from numbers(10); +insert into shard_0.from_1 select number + 30 from numbers(10); + +insert into shard_0.from_1 select number + 40 from numbers(10); +insert into shard_0.from_1 select number + 50 from numbers(10); + +system sync replica shard_1.from_1; + +create table shard_0.to (x UInt32) engine = ReplicatedMergeTree('/clickhouse/tables/to_' || currentDatabase(), '0') order by x settings old_parts_lifetime=1, max_cleanup_delay_period=1, cleanup_delay_period=1, max_parts_to_merge_at_once=2, shared_merge_tree_disable_merges_and_mutations_assignment=1; + +create table shard_1.to (x UInt32) engine = ReplicatedMergeTree('/clickhouse/tables/to_' || currentDatabase(), '1') order by x settings old_parts_lifetime=1, max_cleanup_delay_period=1, cleanup_delay_period=1, max_parts_to_merge_at_once=2; + +detach table shard_1.to; + +alter table shard_0.from_1 on cluster test_cluster_two_shards_different_databases move partition tuple() to table shard_0.to format Null settings distributed_ddl_output_mode='never_throw', distributed_ddl_task_timeout = 1; + +drop table if exists shard_0.from_1; +drop table if exists shard_1.from_1; +OPTIMIZE TABLE shard_0.to; +OPTIMIZE TABLE shard_0.to; +select name, active from system.parts where database='shard_0' and table='to' and active order by name; + +system restart replica shard_0.to; + +select sleep(3); + +attach table shard_1.to; +system sync replica shard_1.to; +select count(), sum(x) from shard_1.to; + +drop table if exists shard_0.to; +drop table if exists shard_1.to; diff --git a/tests/queries/0_stateless/02955_analyzer_using_functional_args.reference b/tests/queries/0_stateless/02955_analyzer_using_functional_args.reference deleted file mode 100644 index 6ed281c757a..00000000000 --- a/tests/queries/0_stateless/02955_analyzer_using_functional_args.reference +++ /dev/null @@ -1,2 +0,0 @@ -1 -1 diff --git a/tests/queries/0_stateless/02955_analyzer_using_functional_args.reference.j2 b/tests/queries/0_stateless/02955_analyzer_using_functional_args.reference.j2 new file mode 100644 index 00000000000..e0c6a439112 --- /dev/null +++ b/tests/queries/0_stateless/02955_analyzer_using_functional_args.reference.j2 @@ -0,0 +1,92 @@ +{% for column_expression_type in ['ALIAS', 'MATERIALIZED'] -%} +{{ column_expression_type }} +1 +1 +369 124 123 b +369 124 123 b +124 +3693 1231 a 1231 +3693 1232 1231 1231 a +a +-- { echoOn } +-- USING alias column contains default in old analyzer (but both queries below should have the same result) +SELECT y * 2, s || 'a' FROM t1 FULL JOIN t2 USING (y) ORDER BY ALL SETTINGS allow_experimental_analyzer = 1; +738 ba +7386 aa +13332 a +SELECT y * 2, s || 'a' FROM (SELECT s, y FROM t1) t1 FULL JOIN (SELECT y FROM t2) t2 USING (y) ORDER BY ALL; +738 ba +7386 aa +13332 a +SELECT (1, *) FROM t1 FULL JOIN t2 USING (y) ORDER BY ALL SETTINGS allow_experimental_analyzer = 1; +(1,369,123,'b',124) +(1,3693,1231,'a',0) +(1,6666,0,'',48) +SELECT (1, *) FROM (SELECT s, y FROM t1) t1 FULL JOIN (SELECT y FROM t2) t2 USING (y) ORDER BY ALL SETTINGS allow_experimental_analyzer = 1; +(1,369,'b') +(1,3693,'a') +(1,6666,'') +SELECT (1, t1.*) FROM t1 FULL JOIN t2 USING (y) ORDER BY ALL; +(1,0,'') +(1,123,'b') +(1,1231,'a') +SELECT (1, t1.*) FROM (SELECT s, y FROM t1) t1 FULL JOIN (SELECT y FROM t2) t2 USING (y) ORDER BY ALL; +(1,'',6666) +(1,'a',3693) +(1,'b',369) +SELECT (1, t1.*, t2.*) FROM t1 FULL JOIN t2 USING (y) ORDER BY ALL; +(1,0,'',6666,48) +(1,123,'b',369,124) +(1,1231,'a',0,0) +SELECT (1, t1.*, t2.*) FROM (SELECT s, y FROM t1) t1 FULL JOIN (SELECT y FROM t2) t2 USING (y) ORDER BY ALL; +(1,'',0,6666) +(1,'a',3693,0) +(1,'b',369,369) +SELECT t1.z, t2.z, t3.z FROM t1 FULL JOIN t2 USING (y) FULL JOIN t3 USING (y) ORDER BY 1,2,3 SETTINGS allow_experimental_analyzer = 1; +0 0 43 +0 48 0 +124 124 0 +1232 0 1232 +SELECT * FROM t1 FULL JOIN t2 USING (y) FULL JOIN t3 USING (y) ORDER BY 1,2,3 SETTINGS allow_experimental_analyzer = 1; +126 0 0 42 +369 123 b 124 0 +3693 1231 a 0 1231 +6666 0 48 0 +SELECT t1.*, t2.*, t3.* FROM t1 FULL JOIN t2 USING (y) FULL JOIN t3 USING (y) ORDER BY 1,2,3 SETTINGS allow_experimental_analyzer = 1; +0 126 0 42 +0 6666 48 0 +123 b 369 124 0 +1231 a 3693 0 1231 +SELECT (1, t1.*, t2.*, t3.*) FROM t1 FULL JOIN t2 USING (y) FULL JOIN t3 USING (y) ORDER BY 1 SETTINGS allow_experimental_analyzer = 1; +(1,0,'',126,0,42) +(1,0,'',6666,48,0) +(1,123,'b',369,124,0) +(1,1231,'a',3693,0,1231) +SELECT y FROM t1 FULL JOIN t2 USING (y) ORDER BY ALL SETTINGS allow_experimental_analyzer = 1; +369 +3693 +6666 +SELECT y FROM (SELECT s, y FROM t1) t1 FULL JOIN (SELECT y FROM t2) t2 USING (y) ORDER BY ALL; +369 +3693 +6666 +SELECT s FROM t1 FULL JOIN t2 USING (y) ORDER BY ALL; + +a +b +SELECT s FROM (SELECT s, y FROM t1) t1 FULL JOIN (SELECT y FROM t2) t2 USING (y) ORDER BY ALL; + +a +b +SELECT y FROM t1 FULL JOIN t2 USING (y) PREWHERE y * 2 > 2 ORDER BY ALL SETTINGS allow_experimental_analyzer = 1, join_use_nulls = 0; +369 +3693 +6666 +SELECT y FROM t1 FULL JOIN t2 USING (y) PREWHERE y * 2 > 2 ORDER BY ALL SETTINGS allow_experimental_analyzer = 1, join_use_nulls = 1; +369 +3693 +6666 +DROP TABLE IF EXISTS t1; +DROP TABLE IF EXISTS t2; +DROP TABLE IF EXISTS t3; +{% endfor -%} diff --git a/tests/queries/0_stateless/02955_analyzer_using_functional_args.sql b/tests/queries/0_stateless/02955_analyzer_using_functional_args.sql deleted file mode 100644 index 7983b43d7e5..00000000000 --- a/tests/queries/0_stateless/02955_analyzer_using_functional_args.sql +++ /dev/null @@ -1,12 +0,0 @@ -CREATE TABLE t1 (x Int16, y ALIAS x + x * 2) ENGINE=MergeTree() ORDER BY x; -CREATE TABLE t2 (y Int16, z Int16) ENGINE=MergeTree() ORDER BY y; - -INSERT INTO t1 VALUES (1231), (123); -INSERT INTO t2 VALUES (6666, 48); -INSERT INTO t2 VALUES (369, 50); - -SELECT count() FROM t1 INNER JOIN t2 USING (y); -SELECT count() FROM t2 INNER JOIN t1 USING (y); - -DROP TABLE IF EXISTS t1; -DROP TABLE IF EXISTS t2; diff --git a/tests/queries/0_stateless/02955_analyzer_using_functional_args.sql.j2 b/tests/queries/0_stateless/02955_analyzer_using_functional_args.sql.j2 new file mode 100644 index 00000000000..f5b81231afe --- /dev/null +++ b/tests/queries/0_stateless/02955_analyzer_using_functional_args.sql.j2 @@ -0,0 +1,67 @@ +DROP TABLE IF EXISTS t1; +DROP TABLE IF EXISTS t2; +DROP TABLE IF EXISTS t3; + +{% for column_expression_type in ['ALIAS', 'MATERIALIZED'] %} + +-- { echoOff } + +SELECT '{{ column_expression_type }}'; + +CREATE TABLE t1 (x Int16, y Int64 {{ column_expression_type }} x + x * 2, z {{ column_expression_type }} x + 1, s String) ENGINE=MergeTree() ORDER BY x; +CREATE TABLE t2 (y Int128, z Int16) ENGINE=MergeTree() ORDER BY y; + +CREATE TABLE t3 (x Int16, y Int64 {{ column_expression_type }} x + x * 2, z {{ column_expression_type }} x + 1) ENGINE=MergeTree() ORDER BY x; + +INSERT INTO t1 VALUES (1231, 'a'), (123, 'b'); + +INSERT INTO t2 VALUES (6666, 48); +INSERT INTO t2 VALUES (369, 124); + +INSERT INTO t3 VALUES (1231), (42); + +SELECT count() FROM t1 INNER JOIN t2 USING (y); +SELECT count() FROM t2 INNER JOIN t1 USING (y); + +-- `SELECT *` works differently for ALIAS columns with analyzer +SELECT * FROM t1 INNER JOIN t2 USING (y, z) SETTINGS allow_experimental_analyzer = 1; +SELECT * FROM t2 INNER JOIN t1 USING (y, z) SETTINGS allow_experimental_analyzer = 1; +SELECT t2.z FROM t1 INNER JOIN t2 USING (y); + +SELECT * FROM t1 INNER JOIN t3 USING (y) SETTINGS allow_experimental_analyzer = 1; +SELECT * FROM t3 INNER JOIN t1 USING (y, z) SETTINGS allow_experimental_analyzer = 1; +SELECT s FROM t1 INNER JOIN t3 USING (y); + +-- { echoOn } +-- USING alias column contains default in old analyzer (but both queries below should have the same result) +SELECT y * 2, s || 'a' FROM t1 FULL JOIN t2 USING (y) ORDER BY ALL SETTINGS allow_experimental_analyzer = 1; +SELECT y * 2, s || 'a' FROM (SELECT s, y FROM t1) t1 FULL JOIN (SELECT y FROM t2) t2 USING (y) ORDER BY ALL; + +SELECT (1, *) FROM t1 FULL JOIN t2 USING (y) ORDER BY ALL SETTINGS allow_experimental_analyzer = 1; +SELECT (1, *) FROM (SELECT s, y FROM t1) t1 FULL JOIN (SELECT y FROM t2) t2 USING (y) ORDER BY ALL SETTINGS allow_experimental_analyzer = 1; + +SELECT (1, t1.*) FROM t1 FULL JOIN t2 USING (y) ORDER BY ALL; +SELECT (1, t1.*) FROM (SELECT s, y FROM t1) t1 FULL JOIN (SELECT y FROM t2) t2 USING (y) ORDER BY ALL; + +SELECT (1, t1.*, t2.*) FROM t1 FULL JOIN t2 USING (y) ORDER BY ALL; +SELECT (1, t1.*, t2.*) FROM (SELECT s, y FROM t1) t1 FULL JOIN (SELECT y FROM t2) t2 USING (y) ORDER BY ALL; + +SELECT t1.z, t2.z, t3.z FROM t1 FULL JOIN t2 USING (y) FULL JOIN t3 USING (y) ORDER BY 1,2,3 SETTINGS allow_experimental_analyzer = 1; +SELECT * FROM t1 FULL JOIN t2 USING (y) FULL JOIN t3 USING (y) ORDER BY 1,2,3 SETTINGS allow_experimental_analyzer = 1; +SELECT t1.*, t2.*, t3.* FROM t1 FULL JOIN t2 USING (y) FULL JOIN t3 USING (y) ORDER BY 1,2,3 SETTINGS allow_experimental_analyzer = 1; +SELECT (1, t1.*, t2.*, t3.*) FROM t1 FULL JOIN t2 USING (y) FULL JOIN t3 USING (y) ORDER BY 1 SETTINGS allow_experimental_analyzer = 1; + +SELECT y FROM t1 FULL JOIN t2 USING (y) ORDER BY ALL SETTINGS allow_experimental_analyzer = 1; +SELECT y FROM (SELECT s, y FROM t1) t1 FULL JOIN (SELECT y FROM t2) t2 USING (y) ORDER BY ALL; + +SELECT s FROM t1 FULL JOIN t2 USING (y) ORDER BY ALL; +SELECT s FROM (SELECT s, y FROM t1) t1 FULL JOIN (SELECT y FROM t2) t2 USING (y) ORDER BY ALL; + +SELECT y FROM t1 FULL JOIN t2 USING (y) PREWHERE y * 2 > 2 ORDER BY ALL SETTINGS allow_experimental_analyzer = 1, join_use_nulls = 0; +SELECT y FROM t1 FULL JOIN t2 USING (y) PREWHERE y * 2 > 2 ORDER BY ALL SETTINGS allow_experimental_analyzer = 1, join_use_nulls = 1; + +DROP TABLE IF EXISTS t1; +DROP TABLE IF EXISTS t2; +DROP TABLE IF EXISTS t3; + +{% endfor %} diff --git a/tests/queries/0_stateless/02973_analyzer_join_use_nulls_column_not_found.reference b/tests/queries/0_stateless/02973_analyzer_join_use_nulls_column_not_found.reference new file mode 100644 index 00000000000..d53d7235814 --- /dev/null +++ b/tests/queries/0_stateless/02973_analyzer_join_use_nulls_column_not_found.reference @@ -0,0 +1,14 @@ +-- +3 +-- +3 +-- +0 +-- +\N \N +-- + a +a a +-- +a a +\N \N diff --git a/tests/queries/0_stateless/02973_analyzer_join_use_nulls_column_not_found.sql b/tests/queries/0_stateless/02973_analyzer_join_use_nulls_column_not_found.sql new file mode 100644 index 00000000000..5d1afa2a274 --- /dev/null +++ b/tests/queries/0_stateless/02973_analyzer_join_use_nulls_column_not_found.sql @@ -0,0 +1,81 @@ +SET join_use_nulls = 1; + +SELECT '--'; + +select c FROM ( + select + d2.c + from ( select 1 as a, 2 as b ) d1 + FULL join ( select 1 as a, 3 as c ) d2 + on (d1.a = d2.a) +) +; + +SELECT '--'; + +with d1 as ( + select + 1 as a, + 2 as b +), +d2 as ( + select + 1 as a, + 3 as c +), +joined as ( + select + d1.*, + d2.c + from d1 + inner join d2 + on (d1.a = d2.a) +) +select c +from joined; + +SELECT '--'; + +WITH + a AS ( SELECT 0 AS key, 'a' AS acol ), + b AS ( SELECT 2 AS key ) +SELECT a.key +FROM b +LEFT JOIN a ON 1 +LEFT JOIN a AS a1 ON 1 +; + +SELECT '--'; + +WITH + a AS ( SELECT 0 AS key, 'a' AS acol ), + b AS ( SELECT 2 AS key ) +SELECT a.acol, a1.acol +FROM b +LEFT JOIN a ON a.key = b.key +LEFT JOIN a AS a1 ON a1.key = a.key +; +SELECT '--'; + +WITH + a AS ( SELECT 0 AS key, 'a' AS acol ), + b AS ( SELECT 2 AS key ) +SELECT a.acol, a1.acol +FROM b +FULL JOIN a ON a.key = b.key +FULL JOIN a AS a1 ON a1.key = a.key +ORDER BY 1 +SETTINGS join_use_nulls = 0 +; + +SELECT '--'; + +WITH + a AS ( SELECT 0 AS key, 'a' AS acol ), + b AS ( SELECT 2 AS key ) +SELECT a.acol, a1.acol +FROM b +FULL JOIN a ON a.key = b.key +FULL JOIN a AS a1 ON a1.key = a.key +ORDER BY 1 +; diff --git a/tests/queries/0_stateless/03013_test_part_level_is_reset_attach_from_disk_mt.reference b/tests/queries/0_stateless/03013_test_part_level_is_reset_attach_from_disk_mt.reference new file mode 100644 index 00000000000..a3cb6a511e5 --- /dev/null +++ b/tests/queries/0_stateless/03013_test_part_level_is_reset_attach_from_disk_mt.reference @@ -0,0 +1,22 @@ +-- {echoOn} +DROP TABLE IF EXISTS test; +CREATE TABLE test (a Int) ENGINE = MergeTree ORDER BY tuple(); +INSERT INTO test VALUES (1), (2), (3); +OPTIMIZE TABLE test FINAL; +SELECT part_name FROM system.parts where table='test' and active and database = currentDatabase(); +all_1_1_1 +ALTER TABLE test DETACH PART 'all_1_1_1'; +ALTER TABLE test ATTACH PART 'all_1_1_1'; +SELECT part_name FROM system.parts where table='test' and active and database = currentDatabase(); +all_2_2_0 +-- Same as above, but with attach partition (different code path, should be tested as well) +DROP TABLE IF EXISTS test; +CREATE TABLE test (a Int) ENGINE = MergeTree ORDER BY tuple(); +INSERT INTO test VALUES (1), (2), (3); +OPTIMIZE TABLE test FINAL; +SELECT part_name FROM system.parts where table='test' and active and database = currentDatabase(); +all_1_1_1 +ALTER TABLE test DETACH PART 'all_1_1_1'; +ALTER TABLE test ATTACH PARTITION tuple(); +SELECT part_name FROM system.parts where table='test' and active and database = currentDatabase(); +all_2_2_0 diff --git a/tests/queries/0_stateless/03013_test_part_level_is_reset_attach_from_disk_mt.sql b/tests/queries/0_stateless/03013_test_part_level_is_reset_attach_from_disk_mt.sql new file mode 100644 index 00000000000..eb05dfea802 --- /dev/null +++ b/tests/queries/0_stateless/03013_test_part_level_is_reset_attach_from_disk_mt.sql @@ -0,0 +1,21 @@ +-- Tags: no-shared-merge-tree +SET alter_sync = 2; +-- {echoOn} +DROP TABLE IF EXISTS test; +CREATE TABLE test (a Int) ENGINE = MergeTree ORDER BY tuple(); +INSERT INTO test VALUES (1), (2), (3); +OPTIMIZE TABLE test FINAL; +SELECT part_name FROM system.parts where table='test' and active and database = currentDatabase(); +ALTER TABLE test DETACH PART 'all_1_1_1'; +ALTER TABLE test ATTACH PART 'all_1_1_1'; +SELECT part_name FROM system.parts where table='test' and active and database = currentDatabase(); + +-- Same as above, but with attach partition (different code path, should be tested as well) +DROP TABLE IF EXISTS test; +CREATE TABLE test (a Int) ENGINE = MergeTree ORDER BY tuple(); +INSERT INTO test VALUES (1), (2), (3); +OPTIMIZE TABLE test FINAL; +SELECT part_name FROM system.parts where table='test' and active and database = currentDatabase(); +ALTER TABLE test DETACH PART 'all_1_1_1'; +ALTER TABLE test ATTACH PARTITION tuple(); +SELECT part_name FROM system.parts where table='test' and active and database = currentDatabase(); diff --git a/tests/queries/0_stateless/03031_low_cardinality_logical_error.reference b/tests/queries/0_stateless/03031_low_cardinality_logical_error.reference new file mode 100644 index 00000000000..931811041f2 --- /dev/null +++ b/tests/queries/0_stateless/03031_low_cardinality_logical_error.reference @@ -0,0 +1,6 @@ +[] 0 ['2'] +['0'] 2 ['0'] +['0'] 2 ['0'] +['1'] 1 [] + +[] 3 [] diff --git a/tests/queries/0_stateless/03031_low_cardinality_logical_error.sql b/tests/queries/0_stateless/03031_low_cardinality_logical_error.sql new file mode 100644 index 00000000000..02ef0585b00 --- /dev/null +++ b/tests/queries/0_stateless/03031_low_cardinality_logical_error.sql @@ -0,0 +1,14 @@ +SELECT * +FROM ( + SELECT + ([toString(number % 2)] :: Array(LowCardinality(String))) AS item_id, + count() + FROM numbers(3) + GROUP BY item_id WITH TOTALS +) AS l FULL JOIN ( + SELECT + ([toString((number % 2) * 2)] :: Array(String)) AS item_id + FROM numbers(3) +) AS r +ON l.item_id = r.item_id +ORDER BY 1,2,3; diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv index 32fbfee8274..e4482998fd6 100644 --- a/utils/list-versions/version_date.tsv +++ b/utils/list-versions/version_date.tsv @@ -1,5 +1,6 @@ v24.2.2.71-stable 2024-03-15 v24.2.1.2248-stable 2024-02-29 +v24.1.8.22-stable 2024-03-26 v24.1.7.18-stable 2024-03-15 v24.1.6.52-stable 2024-03-07 v24.1.5.6-stable 2024-02-14 @@ -7,6 +8,7 @@ v24.1.4.20-stable 2024-02-14 v24.1.3.31-stable 2024-02-09 v24.1.2.5-stable 2024-02-02 v24.1.1.2048-stable 2024-01-30 +v23.12.6.19-stable 2024-03-26 v23.12.5.81-stable 2024-03-15 v23.12.4.15-stable 2024-02-09 v23.12.3.40-stable 2024-02-02 @@ -29,6 +31,7 @@ v23.9.4.11-stable 2023-11-08 v23.9.3.12-stable 2023-10-31 v23.9.2.56-stable 2023-10-19 v23.9.1.1854-stable 2023-09-29 +v23.8.12.13-lts 2024-03-26 v23.8.11.28-lts 2024-03-15 v23.8.10.43-lts 2024-03-05 v23.8.9.54-lts 2024-01-05 @@ -60,6 +63,7 @@ v23.4.4.16-stable 2023-06-17 v23.4.3.48-stable 2023-06-12 v23.4.2.11-stable 2023-05-02 v23.4.1.1943-stable 2023-04-27 +v23.3.22.3-lts 2024-03-26 v23.3.21.26-lts 2024-03-15 v23.3.20.27-lts 2024-03-05 v23.3.19.32-lts 2024-01-05