diff --git a/.clang-tidy b/.clang-tidy index e9451272681..4dd8b9859c9 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -209,3 +209,5 @@ CheckOptions: # Workaround clang-tidy bug: https://github.com/llvm/llvm-project/issues/46097 - key: readability-identifier-naming.TypeTemplateParameterIgnoredRegexp value: expr-type + - key: cppcoreguidelines-avoid-do-while.IgnoreMacros + value: true diff --git a/.github/workflows/docs_check.yml b/.github/workflows/docs_check.yml index d69020d810e..a0d0e49b95b 100644 --- a/.github/workflows/docs_check.yml +++ b/.github/workflows/docs_check.yml @@ -13,9 +13,11 @@ on: # yamllint disable-line rule:truthy branches: - master paths: + - 'CHANGELOG.md' + - 'README.md' + - 'SECURITY.md' - 'docker/docs/**' - 'docs/**' - - 'website/**' - 'utils/check-style/aspell-ignore/**' jobs: CheckLabels: diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index 27c4f5811da..f6d6d192f48 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -154,7 +154,7 @@ jobs: - name: Set Up Build Tools run: | sudo apt-get update - sudo apt-get install -yq git cmake ccache python3 ninja-build + sudo apt-get install -yq git cmake ccache ninja-build python3 yasm sudo bash -c "$(wget -O - https://apt.llvm.org/llvm.sh)" - name: Run build-wrapper run: | diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index bdf9c0615a8..7d410f833c5 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -13,9 +13,11 @@ on: # yamllint disable-line rule:truthy branches: - master paths-ignore: + - 'CHANGELOG.md' + - 'README.md' + - 'SECURITY.md' - 'docker/docs/**' - 'docs/**' - - 'website/**' - 'utils/check-style/aspell-ignore/**' ########################################################################################## ##################################### SMALL CHECKS ####################################### @@ -3103,10 +3105,10 @@ jobs: - name: Set envs run: | cat >> "$GITHUB_ENV" << 'EOF' - TEMP_PATH=${{runner.temp}}/stress_thread + TEMP_PATH=${{runner.temp}}/stress_asan REPORTS_PATH=${{runner.temp}}/reports_dir CHECK_NAME=Stress test (asan) - REPO_COPY=${{runner.temp}}/stress_thread/ClickHouse + REPO_COPY=${{runner.temp}}/stress_asan/ClickHouse EOF - name: Download json reports uses: actions/download-artifact@v3 @@ -3265,6 +3267,142 @@ jobs: docker ps --quiet | xargs --no-run-if-empty docker kill ||: docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: sudo rm -fr "$TEMP_PATH" + ############################################################################################## + ######################################### UPGRADE CHECK ###################################### + ############################################################################################## + UpgradeCheckAsan: + needs: [BuilderDebAsan] + runs-on: [self-hosted, stress-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/upgrade_asan + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Upgrade check (asan) + REPO_COPY=${{runner.temp}}/upgrade_asan/ClickHouse + EOF + - name: Download json reports + uses: actions/download-artifact@v3 + with: + path: ${{ env.REPORTS_PATH }} + - name: Check out repository code + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + - name: Upgrade check + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 upgrade_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" + UpgradeCheckTsan: + needs: [BuilderDebTsan] + # same as for stress test with tsan + runs-on: [self-hosted, func-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/upgrade_thread + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Upgrade check (tsan) + REPO_COPY=${{runner.temp}}/upgrade_thread/ClickHouse + EOF + - name: Download json reports + uses: actions/download-artifact@v3 + with: + path: ${{ env.REPORTS_PATH }} + - name: Check out repository code + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + - name: Upgrade check + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 upgrade_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" + UpgradeCheckMsan: + needs: [BuilderDebMsan] + runs-on: [self-hosted, stress-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/upgrade_memory + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Upgrade check (msan) + REPO_COPY=${{runner.temp}}/upgrade_memory/ClickHouse + EOF + - name: Download json reports + uses: actions/download-artifact@v3 + with: + path: ${{ env.REPORTS_PATH }} + - name: Check out repository code + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + - name: Upgrade check + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 upgrade_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" + UpgradeCheckDebug: + needs: [BuilderDebDebug] + runs-on: [self-hosted, stress-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/upgrade_debug + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Upgrade check (debug) + REPO_COPY=${{runner.temp}}/upgrade_debug/ClickHouse + EOF + - name: Download json reports + uses: actions/download-artifact@v3 + with: + path: ${{ env.REPORTS_PATH }} + - name: Check out repository code + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + - name: Upgrade check + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 upgrade_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" ############################################################################################## ##################################### AST FUZZERS ############################################ ############################################################################################## diff --git a/CHANGELOG.md b/CHANGELOG.md index a89619aa7ca..e22377e2332 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,9 +1,192 @@ ### Table of Contents +**[ClickHouse release v23.2, 2023-02-23](#232)**
**[ClickHouse release v23.1, 2023-01-25](#231)**
**[Changelog for 2022](https://clickhouse.com/docs/en/whats-new/changelog/2022/)**
# 2023 Changelog +### ClickHouse release 23.2, 2023-02-23 + +#### Backward Incompatible Change +* Extend function "toDayOfWeek()" (alias: "DAYOFWEEK") with a mode argument that encodes whether the week starts on Monday or Sunday and whether counting starts at 0 or 1. For consistency with other date time functions, the mode argument was inserted between the time and the time zone arguments. This breaks existing usage of the (previously undocumented) 2-argument syntax "toDayOfWeek(time, time_zone)". A fix is to rewrite the function into "toDayOfWeek(time, 0, time_zone)". [#45233](https://github.com/ClickHouse/ClickHouse/pull/45233) ([Robert Schulze](https://github.com/rschu1ze)). +* Rename setting `max_query_cache_size` to `filesystem_cache_max_download_size`. [#45614](https://github.com/ClickHouse/ClickHouse/pull/45614) ([Kseniia Sumarokova](https://github.com/kssenii)). +* The `default` user will not have permissions for access type `SHOW NAMED COLLECTION` by default (e.g. `default` user will no longer be able to grant ALL to other users as it was before, therefore this PR is backward incompatible). [#46010](https://github.com/ClickHouse/ClickHouse/pull/46010) ([Kseniia Sumarokova](https://github.com/kssenii)). +* If the SETTINGS clause is specified before the FORMAT clause, the settings will be applied to formatting as well. [#46003](https://github.com/ClickHouse/ClickHouse/pull/46003) ([Azat Khuzhin](https://github.com/azat)). +* Remove support for setting `materialized_postgresql_allow_automatic_update` (which was by default turned off). [#46106](https://github.com/ClickHouse/ClickHouse/pull/46106) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Slightly improve performance of `countDigits` on realistic datasets. This closed [#44518](https://github.com/ClickHouse/ClickHouse/issues/44518). In previous versions, `countDigits(0)` returned `0`; now it returns `1`, which is more correct, and follows the existing documentation. [#46187](https://github.com/ClickHouse/ClickHouse/pull/46187) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Disallow creation of new columns compressed by a combination of codecs "Delta" or "DoubleDelta" followed by codecs "Gorilla" or "FPC". This can be bypassed using setting "allow_suspicious_codecs = true". [#45652](https://github.com/ClickHouse/ClickHouse/pull/45652) ([Robert Schulze](https://github.com/rschu1ze)). + +#### New Feature +* Add `StorageIceberg` and table function `iceberg` to access iceberg table store on S3. [#45384](https://github.com/ClickHouse/ClickHouse/pull/45384) ([flynn](https://github.com/ucasfl)). +* Allow configuring storage as `SETTINGS disk = ''` (instead of `storage_policy`) and with explicit disk creation `SETTINGS disk = disk(type=s3, ...)`. [#41976](https://github.com/ClickHouse/ClickHouse/pull/41976) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Expose `ProfileEvents` counters in `system.part_log`. [#38614](https://github.com/ClickHouse/ClickHouse/pull/38614) ([Bharat Nallan](https://github.com/bharatnc)). +* Enrichment of the existing `ReplacingMergeTree` engine to allow duplicate the insertion. It leverages the power of both `ReplacingMergeTree` and `CollapsingMergeTree` in one MergeTree engine. Deleted data are not returned when queried, but not removed from disk neither. [#41005](https://github.com/ClickHouse/ClickHouse/pull/41005) ([youennL-cs](https://github.com/youennL-cs)). +* Add `generateULID` function. Closes [#36536](https://github.com/ClickHouse/ClickHouse/issues/36536). [#44662](https://github.com/ClickHouse/ClickHouse/pull/44662) ([Nikolay Degterinsky](https://github.com/evillique)). +* Add `corrMatrix` aggregate function, calculating each two columns. In addition, since Aggregatefunctions `covarSamp` and `covarPop` are similar to `corr`, I add `covarSampMatrix`, `covarPopMatrix` by the way. @alexey-milovidov closes [#44587](https://github.com/ClickHouse/ClickHouse/issues/44587). [#44680](https://github.com/ClickHouse/ClickHouse/pull/44680) ([FFFFFFFHHHHHHH](https://github.com/FFFFFFFHHHHHHH)). +* Introduce arrayShuffle function for random array permutations. [#45271](https://github.com/ClickHouse/ClickHouse/pull/45271) ([Joanna Hulboj](https://github.com/jh0x)). +* Support types `FIXED_SIZE_BINARY` type in Arrow, `FIXED_LENGTH_BYTE_ARRAY` in `Parquet` and match them to `FixedString`. Add settings `output_format_parquet_fixed_string_as_fixed_byte_array/output_format_arrow_fixed_string_as_fixed_byte_array` to control default output type for FixedString. Closes [#45326](https://github.com/ClickHouse/ClickHouse/issues/45326). [#45340](https://github.com/ClickHouse/ClickHouse/pull/45340) ([Kruglov Pavel](https://github.com/Avogar)). +* Add a new column `last_exception_time` to system.replication_queue. [#45457](https://github.com/ClickHouse/ClickHouse/pull/45457) ([Frank Chen](https://github.com/FrankChen021)). +* Add two new functions which allow for user-defined keys/seeds with SipHash{64,128}. [#45513](https://github.com/ClickHouse/ClickHouse/pull/45513) ([Salvatore Mesoraca](https://github.com/aiven-sal)). +* Allow a three-argument version for table function `format`. close [#45808](https://github.com/ClickHouse/ClickHouse/issues/45808). [#45873](https://github.com/ClickHouse/ClickHouse/pull/45873) ([FFFFFFFHHHHHHH](https://github.com/FFFFFFFHHHHHHH)). +* Add `JodaTime` format support for 'x','w','S'. Refer to https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html. [#46073](https://github.com/ClickHouse/ClickHouse/pull/46073) ([zk_kiger](https://github.com/zk-kiger)). +* Support window function `ntile`. ([lgbo](https://github.com/lgbo-ustc)). +* Add setting `final` to implicitly apply the `FINAL` modifier to every table. [#40945](https://github.com/ClickHouse/ClickHouse/pull/40945) ([Arthur Passos](https://github.com/arthurpassos)). +* Added `arrayPartialSort` and `arrayPartialReverseSort` functions. [#46296](https://github.com/ClickHouse/ClickHouse/pull/46296) ([Joanna Hulboj](https://github.com/jh0x)). +* The new http parameter `client_protocol_version` allows setting a client protocol version for HTTP responses using the Native format. [#40397](https://github.com/ClickHouse/ClickHouse/issues/40397). [#46360](https://github.com/ClickHouse/ClickHouse/pull/46360) ([Geoff Genz](https://github.com/genzgd)). +* Add new function `regexpExtract`, like spark function `REGEXP_EXTRACT` for compatibility. It is similar to the existing function `extract`. [#46469](https://github.com/ClickHouse/ClickHouse/pull/46469) ([李扬](https://github.com/taiyang-li)). +* Add new function `JSONArrayLength`, which returns the number of elements in the outermost JSON array. The function returns NULL if the input JSON string is invalid. [#46631](https://github.com/ClickHouse/ClickHouse/pull/46631) ([李扬](https://github.com/taiyang-li)). + +#### Performance Improvement +* The introduced logic works if PREWHERE condition is a conjunction of multiple conditions (cond1 AND cond2 AND ... ). It groups those conditions that require reading the same columns into steps. After each step the corresponding part of the full condition is computed and the result rows might be filtered. This allows to read fewer rows in the next steps thus saving IO bandwidth and doing less computation. This logic is disabled by default for now. It will be enabled by default in one of the future releases once it is known to not have any regressions, so it is highly encouraged to be used for testing. It can be controlled by 2 settings: "enable_multiple_prewhere_read_steps" and "move_all_conditions_to_prewhere". [#46140](https://github.com/ClickHouse/ClickHouse/pull/46140) ([Alexander Gololobov](https://github.com/davenger)). +* An option added to aggregate partitions independently if table partition key and group by key are compatible. Controlled by the setting `allow_aggregate_partitions_independently`. Disabled by default because of limited applicability (please refer to the docs). [#45364](https://github.com/ClickHouse/ClickHouse/pull/45364) ([Nikita Taranov](https://github.com/nickitat)). +* Allow using Vertical merge algorithm with parts in Compact format. This will allow ClickHouse server to use much less memory for background operations. This closes [#46084](https://github.com/ClickHouse/ClickHouse/issues/46084). [#45681](https://github.com/ClickHouse/ClickHouse/pull/45681) [#46282](https://github.com/ClickHouse/ClickHouse/pull/46282) ([Anton Popov](https://github.com/CurtizJ)). +* Optimize `Parquet` reader by using batch reader. [#45878](https://github.com/ClickHouse/ClickHouse/pull/45878) ([LiuNeng](https://github.com/liuneng1994)). +* Add new `local_filesystem_read_method` method `io_uring` based on the asynchronous Linux [io_uring](https://kernel.dk/io_uring.pdf) subsystem, improving read performance almost universally compared to the default `pread` method. [#38456](https://github.com/ClickHouse/ClickHouse/pull/38456) ([Saulius Valatka](https://github.com/sauliusvl)). +* Rewrite aggregate functions with `if` expression as argument when logically equivalent. For example, `avg(if(cond, col, null))` can be rewritten to avgIf(cond, col). It is helpful in performance. [#44730](https://github.com/ClickHouse/ClickHouse/pull/44730) ([李扬](https://github.com/taiyang-li)). +* Improve lower/upper function performance with avx512 instructions. [#37894](https://github.com/ClickHouse/ClickHouse/pull/37894) ([yaqi-zhao](https://github.com/yaqi-zhao)). +* Remove the limitation that on systems with >=32 cores and SMT disabled ClickHouse uses only half of the cores (the case when you disable Hyper Threading in BIOS). [#44973](https://github.com/ClickHouse/ClickHouse/pull/44973) ([Robert Schulze](https://github.com/rschu1ze)). +* Improve performance of function `multiIf` by columnar executing, speed up by 2.3x. [#45296](https://github.com/ClickHouse/ClickHouse/pull/45296) ([李扬](https://github.com/taiyang-li)). +* Add fast path for function `position` when the needle is empty. [#45382](https://github.com/ClickHouse/ClickHouse/pull/45382) ([李扬](https://github.com/taiyang-li)). +* Enable `query_plan_remove_redundant_sorting` optimization by default. Optimization implemented in [#45420](https://github.com/ClickHouse/ClickHouse/issues/45420). [#45567](https://github.com/ClickHouse/ClickHouse/pull/45567) ([Igor Nikonov](https://github.com/devcrafter)). +* Increased HTTP Transfer Encoding chunk size to improve performance of large queries using the HTTP interface. [#45593](https://github.com/ClickHouse/ClickHouse/pull/45593) ([Geoff Genz](https://github.com/genzgd)). +* Fixed performance of short `SELECT` queries that read from tables with large number of `Array`/`Map`/`Nested` columns. [#45630](https://github.com/ClickHouse/ClickHouse/pull/45630) ([Anton Popov](https://github.com/CurtizJ)). +* Improve performance of filtering for big integers and decimal types. [#45949](https://github.com/ClickHouse/ClickHouse/pull/45949) ([李扬](https://github.com/taiyang-li)). +* This change could effectively reduce the overhead of obtaining the filter from ColumnNullable(UInt8) and improve the overall query performance. To evaluate the impact of this change, we adopted TPC-H benchmark but revised the column types from non-nullable to nullable, and we measured the QPS of its queries as the performance indicator. [#45962](https://github.com/ClickHouse/ClickHouse/pull/45962) ([Zhiguo Zhou](https://github.com/ZhiguoZh)). +* Make the `_part` and `_partition_id` virtual column be `LowCardinality(String)` type. Closes [#45964](https://github.com/ClickHouse/ClickHouse/issues/45964). [#45975](https://github.com/ClickHouse/ClickHouse/pull/45975) ([flynn](https://github.com/ucasfl)). +* Improve the performance of Decimal conversion when the scale does not change. [#46095](https://github.com/ClickHouse/ClickHouse/pull/46095) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Allow to increase prefetching for read data. [#46168](https://github.com/ClickHouse/ClickHouse/pull/46168) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Rewrite `arrayExists(x -> x = 1, arr)` -> `has(arr, 1)`, which improve performance by 1.34x. [#46188](https://github.com/ClickHouse/ClickHouse/pull/46188) ([李扬](https://github.com/taiyang-li)). +* Fix too big memory usage for vertical merges on non-remote disk. Respect `max_insert_delayed_streams_for_parallel_write` for the remote disk. [#46275](https://github.com/ClickHouse/ClickHouse/pull/46275) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Update zstd to v1.5.4. It has some minor improvements in performance and compression ratio. If you run replicas with different versions of ClickHouse you may see reasonable error messages `Data after merge/mutation is not byte-identical to data on another replicas.` with explanation. These messages are Ok and you should not worry. [#46280](https://github.com/ClickHouse/ClickHouse/pull/46280) ([Raúl Marín](https://github.com/Algunenano)). +* Fix performance degradation caused by [#39737](https://github.com/ClickHouse/ClickHouse/issues/39737). [#46309](https://github.com/ClickHouse/ClickHouse/pull/46309) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* The `replicas_status` handle will answer quickly even in case of a large replication queue. [#46310](https://github.com/ClickHouse/ClickHouse/pull/46310) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add avx512 support for aggregate function `sum`, function unary arithmetic, function comparison. [#37870](https://github.com/ClickHouse/ClickHouse/pull/37870) ([zhao zhou](https://github.com/zzachimed)). +* Rewrote the code around marks distribution and the overall coordination of the reading in order to achieve the maximum performance improvement. This closes [#34527](https://github.com/ClickHouse/ClickHouse/issues/34527). [#43772](https://github.com/ClickHouse/ClickHouse/pull/43772) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Remove redundant DISTINCT clauses in query (subqueries). Implemented on top of query plan. It does similar optimization as `optimize_duplicate_order_by_and_distinct` regarding DISTINCT clauses. Can be enabled via `query_plan_remove_redundant_distinct` setting. Related to [#42648](https://github.com/ClickHouse/ClickHouse/issues/42648). [#44176](https://github.com/ClickHouse/ClickHouse/pull/44176) ([Igor Nikonov](https://github.com/devcrafter)). +* A few query rewrite optimizations: `sumIf(123, cond) -> 123 * countIf(1, cond)`, `sum(if(cond, 123, 0)) -> 123 * countIf(cond)`, `sum(if(cond, 0, 123)) -> 123 * countIf(not(cond))` [#44728](https://github.com/ClickHouse/ClickHouse/pull/44728) ([李扬](https://github.com/taiyang-li)). +* Improved how memory bound merging and aggregation in order on top query plan interact. Previously we fell back to explicit sorting for AIO in some cases when it wasn't actually needed. [#45892](https://github.com/ClickHouse/ClickHouse/pull/45892) ([Nikita Taranov](https://github.com/nickitat)). +* Concurrent merges are scheduled using round-robin by default to ensure fair and starvation-free operation. Previously in heavily overloaded shards, big merges could possibly be starved by smaller merges due to the use of strict priority scheduling. Added `background_merges_mutations_scheduling_policy` server config option to select scheduling algorithm (`round_robin` or `shortest_task_first`). [#46247](https://github.com/ClickHouse/ClickHouse/pull/46247) ([Sergei Trifonov](https://github.com/serxa)). + +#### Improvement +* Enable retries for INSERT by default in case of ZooKeeper session loss. We already use it in production. [#46308](https://github.com/ClickHouse/ClickHouse/pull/46308) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add ability to ignore unknown keys in JSON object for named tuples (`input_format_json_ignore_unknown_keys_in_named_tuple`). [#45678](https://github.com/ClickHouse/ClickHouse/pull/45678) ([Azat Khuzhin](https://github.com/azat)). +* Support optimizing the `where` clause with sorting key expression move to `prewhere` for query with `final`. [#38893](https://github.com/ClickHouse/ClickHouse/issues/38893). [#38950](https://github.com/ClickHouse/ClickHouse/pull/38950) ([hexiaoting](https://github.com/hexiaoting)). +* Add new metrics for backups: num_processed_files and processed_files_size described actual number of processed files. [#42244](https://github.com/ClickHouse/ClickHouse/pull/42244) ([Aleksandr](https://github.com/AVMusorin)). +* Added retries on interserver DNS errors. [#43179](https://github.com/ClickHouse/ClickHouse/pull/43179) ([Anton Kozlov](https://github.com/tonickkozlov)). +* Keeper improvement: try preallocating space on the disk to avoid undefined out-of-space issues. Introduce setting `max_log_file_size` for the maximum size of Keeper's Raft log files. [#44370](https://github.com/ClickHouse/ClickHouse/pull/44370) ([Antonio Andelic](https://github.com/antonio2368)). +* Optimize behavior for a replica delay api logic in case the replica is read-only. [#45148](https://github.com/ClickHouse/ClickHouse/pull/45148) ([mateng915](https://github.com/mateng0915)). +* Ask for the password in clickhouse-client interactively in a case when the empty password is wrong. Closes [#46702](https://github.com/ClickHouse/ClickHouse/issues/46702). [#46730](https://github.com/ClickHouse/ClickHouse/pull/46730) ([Nikolay Degterinsky](https://github.com/evillique)). +* Mark `Gorilla` compression on columns of non-Float* type as suspicious. [#45376](https://github.com/ClickHouse/ClickHouse/pull/45376) ([Robert Schulze](https://github.com/rschu1ze)). +* Show replica name that is executing a merge in the `postpone_reason` column. [#45458](https://github.com/ClickHouse/ClickHouse/pull/45458) ([Frank Chen](https://github.com/FrankChen021)). +* Save exception stack trace in part_log. [#45459](https://github.com/ClickHouse/ClickHouse/pull/45459) ([Frank Chen](https://github.com/FrankChen021)). +* The `regexp_tree` dictionary is polished and now it is compatible with https://github.com/ua-parser/uap-core. [#45631](https://github.com/ClickHouse/ClickHouse/pull/45631) ([Han Fei](https://github.com/hanfei1991)). +* Updated checking of `SYSTEM SYNC REPLICA`, resolves [#45508](https://github.com/ClickHouse/ClickHouse/issues/45508) [#45648](https://github.com/ClickHouse/ClickHouse/pull/45648) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Rename setting `replication_alter_partitions_sync` to `alter_sync`. [#45659](https://github.com/ClickHouse/ClickHouse/pull/45659) ([Antonio Andelic](https://github.com/antonio2368)). +* The `generateRandom` table function and the engine now support `LowCardinality` data types. This is useful for testing, for example you can write `INSERT INTO table SELECT * FROM generateRandom() LIMIT 1000`. This is needed to debug [#45590](https://github.com/ClickHouse/ClickHouse/issues/45590). [#45661](https://github.com/ClickHouse/ClickHouse/pull/45661) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* The experimental query result cache now provides more modular configuration settings. [#45679](https://github.com/ClickHouse/ClickHouse/pull/45679) ([Robert Schulze](https://github.com/rschu1ze)). +* Renamed "query result cache" to "query cache". [#45682](https://github.com/ClickHouse/ClickHouse/pull/45682) ([Robert Schulze](https://github.com/rschu1ze)). +* add `SYSTEM SYNC FILE CACHE` command. It will do the `sync` syscall. [#8921](https://github.com/ClickHouse/ClickHouse/issues/8921). [#45685](https://github.com/ClickHouse/ClickHouse/pull/45685) ([DR](https://github.com/freedomDR)). +* Add a new S3 setting `allow_head_object_request`. This PR makes usage of `GetObjectAttributes` request instead of `HeadObject` introduced in https://github.com/ClickHouse/ClickHouse/pull/45288 optional (and disabled by default). [#45701](https://github.com/ClickHouse/ClickHouse/pull/45701) ([Vitaly Baranov](https://github.com/vitlibar)). +* Add ability to override connection settings based on connection names (that said that now you can forget about storing password for each connection, you can simply put everything into `~/.clickhouse-client/config.xml` and even use different history files for them, which can be also useful). [#45715](https://github.com/ClickHouse/ClickHouse/pull/45715) ([Azat Khuzhin](https://github.com/azat)). +* Arrow format: support the duration type. Closes [#45669](https://github.com/ClickHouse/ClickHouse/issues/45669). [#45750](https://github.com/ClickHouse/ClickHouse/pull/45750) ([flynn](https://github.com/ucasfl)). +* Extend the logging in the Query Cache to improve investigations of the caching behavior. [#45751](https://github.com/ClickHouse/ClickHouse/pull/45751) ([Robert Schulze](https://github.com/rschu1ze)). +* The query cache's server-level settings are now reconfigurable at runtime. [#45758](https://github.com/ClickHouse/ClickHouse/pull/45758) ([Robert Schulze](https://github.com/rschu1ze)). +* Hide password in logs when a table function's arguments are specified with a named collection. [#45774](https://github.com/ClickHouse/ClickHouse/pull/45774) ([Vitaly Baranov](https://github.com/vitlibar)). +* Improve internal S3 client to correctly deduce regions and redirections for different types of URLs. [#45783](https://github.com/ClickHouse/ClickHouse/pull/45783) ([Antonio Andelic](https://github.com/antonio2368)). +* Add support for Map, IPv4 and IPv6 types in generateRandom. Mostly useful for testing. [#45785](https://github.com/ClickHouse/ClickHouse/pull/45785) ([Raúl Marín](https://github.com/Algunenano)). +* Support empty/notEmpty for IP types. [#45799](https://github.com/ClickHouse/ClickHouse/pull/45799) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* The column `num_processed_files` was split into two columns: `num_files` (for BACKUP) and `files_read` (for RESTORE). The column `processed_files_size` was split into two columns: `total_size` (for BACKUP) and `bytes_read` (for RESTORE). [#45800](https://github.com/ClickHouse/ClickHouse/pull/45800) ([Vitaly Baranov](https://github.com/vitlibar)). +* Add support for `SHOW ENGINES` query for MySQL compatibility. [#45859](https://github.com/ClickHouse/ClickHouse/pull/45859) ([Filatenkov Artur](https://github.com/FArthur-cmd)). +* Improved how the obfuscator deals with queries. [#45867](https://github.com/ClickHouse/ClickHouse/pull/45867) ([Raúl Marín](https://github.com/Algunenano)). +* Improve behaviour of conversion into Date for boundary value 65535 (2149-06-06). [#46042](https://github.com/ClickHouse/ClickHouse/pull/46042) [#45914](https://github.com/ClickHouse/ClickHouse/pull/45914) ([Joanna Hulboj](https://github.com/jh0x)). +* Add setting `check_referential_table_dependencies` to check referential dependencies on `DROP TABLE`. This PR solves [#38326](https://github.com/ClickHouse/ClickHouse/issues/38326). [#45936](https://github.com/ClickHouse/ClickHouse/pull/45936) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix `tupleElement` to return `Null` when having `Null` argument. Closes [#45894](https://github.com/ClickHouse/ClickHouse/issues/45894). [#45952](https://github.com/ClickHouse/ClickHouse/pull/45952) ([flynn](https://github.com/ucasfl)). +* Throw an error on no files satisfying the S3 wildcard. Closes [#45587](https://github.com/ClickHouse/ClickHouse/issues/45587). [#45957](https://github.com/ClickHouse/ClickHouse/pull/45957) ([chen](https://github.com/xiedeyantu)). +* Use cluster state data to check concurrent backup/restore. [#45982](https://github.com/ClickHouse/ClickHouse/pull/45982) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* ClickHouse Client: Use "exact" matching for fuzzy search, which has correct case ignorance and more appropriate algorithm for matching SQL queries. [#46000](https://github.com/ClickHouse/ClickHouse/pull/46000) ([Azat Khuzhin](https://github.com/azat)). +* Forbid wrong create View syntax `CREATE View X TO Y AS SELECT`. Closes [#4331](https://github.com/ClickHouse/ClickHouse/issues/4331). [#46043](https://github.com/ClickHouse/ClickHouse/pull/46043) ([flynn](https://github.com/ucasfl)). +* Storage `Log` family support setting the `storage_policy`. Closes [#43421](https://github.com/ClickHouse/ClickHouse/issues/43421). [#46044](https://github.com/ClickHouse/ClickHouse/pull/46044) ([flynn](https://github.com/ucasfl)). +* Improve `JSONColumns` format when the result is empty. Closes [#46024](https://github.com/ClickHouse/ClickHouse/issues/46024). [#46053](https://github.com/ClickHouse/ClickHouse/pull/46053) ([flynn](https://github.com/ucasfl)). +* Add reference implementation for SipHash128. [#46065](https://github.com/ClickHouse/ClickHouse/pull/46065) ([Salvatore Mesoraca](https://github.com/aiven-sal)). +* Add a new metric to record allocations times and bytes using mmap. [#46068](https://github.com/ClickHouse/ClickHouse/pull/46068) ([李扬](https://github.com/taiyang-li)). +* Currently for functions like `leftPad`, `rightPad`, `leftPadUTF8`, `rightPadUTF8`, the second argument `length` must be UInt8|16|32|64|128|256. Which is too strict for clickhouse users, besides, it is not consistent with other similar functions like `arrayResize`, `substring` and so on. [#46103](https://github.com/ClickHouse/ClickHouse/pull/46103) ([李扬](https://github.com/taiyang-li)). +* Fix assertion in the `welchTTest` function in debug build when the resulting statistics is NaN. Unified the behavior with other similar functions. Change the behavior of `studentTTest` to return NaN instead of throwing an exception because the previous behavior was inconvenient. This closes [#41176](https://github.com/ClickHouse/ClickHouse/issues/41176) This closes [#42162](https://github.com/ClickHouse/ClickHouse/issues/42162). [#46141](https://github.com/ClickHouse/ClickHouse/pull/46141) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* More convenient usage of big integers and ORDER BY WITH FILL. Allow using plain integers for start and end points in WITH FILL when ORDER BY big (128-bit and 256-bit) integers. Fix the wrong result for big integers with negative start or end points. This closes [#16733](https://github.com/ClickHouse/ClickHouse/issues/16733). [#46152](https://github.com/ClickHouse/ClickHouse/pull/46152) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add `parts`, `active_parts` and `total_marks` columns to `system.tables` on [issue](https://github.com/ClickHouse/ClickHouse/issues/44336). [#46161](https://github.com/ClickHouse/ClickHouse/pull/46161) ([attack204](https://github.com/attack204)). +* Functions "multi[Fuzzy]Match(Any|AnyIndex|AllIndices}" now reject regexes which will likely evaluate very slowly in vectorscan. [#46167](https://github.com/ClickHouse/ClickHouse/pull/46167) ([Robert Schulze](https://github.com/rschu1ze)). +* When `insert_null_as_default` is enabled and column doesn't have defined default value, the default of column type will be used. Also this PR fixes using default values on nulls in case of LowCardinality columns. [#46171](https://github.com/ClickHouse/ClickHouse/pull/46171) ([Kruglov Pavel](https://github.com/Avogar)). +* Prefer explicitly defined access keys for S3 clients. If `use_environment_credentials` is set to `true`, and the user has provided the access key through query or config, they will be used instead of the ones from the environment variable. [#46191](https://github.com/ClickHouse/ClickHouse/pull/46191) ([Antonio Andelic](https://github.com/antonio2368)). +* Add an alias "DATE_FORMAT()" for function "formatDateTime()" to improve compatibility with MySQL's SQL dialect, extend function `formatDateTime` with substitutions "a", "b", "c", "h", "i", "k", "l" "r", "s", "W". ### Documentation entry for user-facing changes User-readable short description: `DATE_FORMAT` is an alias of `formatDateTime`. Formats a Time according to the given Format string. Format is a constant expression, so you cannot have multiple formats for a single result column. (Provide link to [formatDateTime](https://clickhouse.com/docs/en/sql-reference/functions/date-time-functions/#formatdatetime)). [#46302](https://github.com/ClickHouse/ClickHouse/pull/46302) ([Jake Bamrah](https://github.com/JakeBamrah)). +* Add `ProfileEvents` and `CurrentMetrics` about the callback tasks for parallel replicas (`s3Cluster` and `MergeTree` tables). [#46313](https://github.com/ClickHouse/ClickHouse/pull/46313) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add support for `DELETE` and `UPDATE` for tables using `KeeperMap` storage engine. [#46330](https://github.com/ClickHouse/ClickHouse/pull/46330) ([Antonio Andelic](https://github.com/antonio2368)). +* Allow writing RENAME queries with query parameters. Resolves [#45778](https://github.com/ClickHouse/ClickHouse/issues/45778). [#46407](https://github.com/ClickHouse/ClickHouse/pull/46407) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fix parameterized SELECT queries with REPLACE transformer. Resolves [#33002](https://github.com/ClickHouse/ClickHouse/issues/33002). [#46420](https://github.com/ClickHouse/ClickHouse/pull/46420) ([Nikolay Degterinsky](https://github.com/evillique)). +* Exclude the internal database used for temporary/external tables from the calculation of asynchronous metric "NumberOfDatabases". This makes the behavior consistent with system table "system.databases". [#46435](https://github.com/ClickHouse/ClickHouse/pull/46435) ([Robert Schulze](https://github.com/rschu1ze)). +* Added `last_exception_time` column into distribution_queue table. [#46564](https://github.com/ClickHouse/ClickHouse/pull/46564) ([Aleksandr](https://github.com/AVMusorin)). +* Support for IN clause with parameter in parameterized views. [#46583](https://github.com/ClickHouse/ClickHouse/pull/46583) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Do not load named collections on server startup (load them on first access instead). [#46607](https://github.com/ClickHouse/ClickHouse/pull/46607) ([Kseniia Sumarokova](https://github.com/kssenii)). + + +#### Build/Testing/Packaging Improvement +* Introduce GWP-ASan implemented by the LLVM runtime. This closes [#27039](https://github.com/ClickHouse/ClickHouse/issues/27039). [#45226](https://github.com/ClickHouse/ClickHouse/pull/45226) ([Han Fei](https://github.com/hanfei1991)). +* We want to make our tests less stable and more flaky: add randomization for merge tree settings in tests. [#38983](https://github.com/ClickHouse/ClickHouse/pull/38983) ([Anton Popov](https://github.com/CurtizJ)). +* Enable the HDFS support in PowerPC and which helps to fixes the following functional tests 02113_hdfs_assert.sh, 02244_hdfs_cluster.sql and 02368_cancel_write_into_hdfs.sh. [#44949](https://github.com/ClickHouse/ClickHouse/pull/44949) ([MeenaRenganathan22](https://github.com/MeenaRenganathan22)). +* Add systemd.service file for clickhouse-keeper. Fixes [#44293](https://github.com/ClickHouse/ClickHouse/issues/44293). [#45568](https://github.com/ClickHouse/ClickHouse/pull/45568) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* ClickHouse's fork of poco was moved from "contrib/" to "base/poco/". [#46075](https://github.com/ClickHouse/ClickHouse/pull/46075) ([Robert Schulze](https://github.com/rschu1ze)). +* Add an option for `clickhouse-watchdog` to restart the child process. This does not make a lot of use. [#46312](https://github.com/ClickHouse/ClickHouse/pull/46312) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* If the environment variable `CLICKHOUSE_DOCKER_RESTART_ON_EXIT` is set to 1, the Docker container will run `clickhouse-server` as a child instead of the first process, and restart it when it exited. [#46391](https://github.com/ClickHouse/ClickHouse/pull/46391) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix Systemd service file. [#46461](https://github.com/ClickHouse/ClickHouse/pull/46461) ([SuperDJY](https://github.com/cmsxbc)). +* Raised the minimum Clang version needed to build ClickHouse from 12 to 15. [#46710](https://github.com/ClickHouse/ClickHouse/pull/46710) ([Robert Schulze](https://github.com/rschu1ze)). +* Upgrade Intel QPL from v0.3.0 to v1.0.0 2. Build libaccel-config and link it statically to QPL library instead of dynamically. [#45809](https://github.com/ClickHouse/ClickHouse/pull/45809) ([jasperzhu](https://github.com/jinjunzh)). + + +#### Bug Fix (user-visible misbehavior in official stable or prestable release) + +* Flush data exactly by `rabbitmq_flush_interval_ms` or by `rabbitmq_max_block_size` in `StorageRabbitMQ`. Closes [#42389](https://github.com/ClickHouse/ClickHouse/issues/42389). Closes [#45160](https://github.com/ClickHouse/ClickHouse/issues/45160). [#44404](https://github.com/ClickHouse/ClickHouse/pull/44404) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Use PODArray to render in sparkBar function, so we can control the memory usage. Close [#44467](https://github.com/ClickHouse/ClickHouse/issues/44467). [#44489](https://github.com/ClickHouse/ClickHouse/pull/44489) ([Duc Canh Le](https://github.com/canhld94)). +* Fix functions (quantilesExactExclusive, quantilesExactInclusive) return unsorted array element. [#45379](https://github.com/ClickHouse/ClickHouse/pull/45379) ([wujunfu](https://github.com/wujunfu)). +* Fix uncaught exception in HTTPHandler when open telemetry is enabled. [#45456](https://github.com/ClickHouse/ClickHouse/pull/45456) ([Frank Chen](https://github.com/FrankChen021)). +* Don't infer Dates from 8 digit numbers. It could lead to wrong data to be read. [#45581](https://github.com/ClickHouse/ClickHouse/pull/45581) ([Kruglov Pavel](https://github.com/Avogar)). +* Fixes to correctly use `odbc_bridge_use_connection_pooling` setting. [#45591](https://github.com/ClickHouse/ClickHouse/pull/45591) ([Bharat Nallan](https://github.com/bharatnc)). +* When the callback in the cache is called, it is possible that this cache is destructed. To keep it safe, we capture members by value. It's also safe for task schedule because it will be deactivated before storage is destroyed. Resolve [#45548](https://github.com/ClickHouse/ClickHouse/issues/45548). [#45601](https://github.com/ClickHouse/ClickHouse/pull/45601) ([Han Fei](https://github.com/hanfei1991)). +* Fix data corruption when codecs Delta or DoubleDelta are combined with codec Gorilla. [#45615](https://github.com/ClickHouse/ClickHouse/pull/45615) ([Robert Schulze](https://github.com/rschu1ze)). +* Correctly check types when using N-gram bloom filter index to avoid invalid reads. [#45617](https://github.com/ClickHouse/ClickHouse/pull/45617) ([Antonio Andelic](https://github.com/antonio2368)). +* A couple of segfaults have been reported around `c-ares`. They were introduced in my previous pull requests. I have fixed them with the help of Alexander Tokmakov. [#45629](https://github.com/ClickHouse/ClickHouse/pull/45629) ([Arthur Passos](https://github.com/arthurpassos)). +* Fix key description when encountering duplicate primary keys. This can happen in projections. See [#45590](https://github.com/ClickHouse/ClickHouse/issues/45590) for details. [#45686](https://github.com/ClickHouse/ClickHouse/pull/45686) ([Amos Bird](https://github.com/amosbird)). +* Set compression method and level for backup Closes [#45690](https://github.com/ClickHouse/ClickHouse/issues/45690). [#45737](https://github.com/ClickHouse/ClickHouse/pull/45737) ([Pradeep Chhetri](https://github.com/chhetripradeep)). +* Should use `select_query_typed.limitByOffset()` instead of `select_query_typed.limitOffset()`. [#45817](https://github.com/ClickHouse/ClickHouse/pull/45817) ([刘陶峰](https://github.com/taofengliu)). +* When use experimental analyzer, queries like `SELECT number FROM numbers(100) LIMIT 10 OFFSET 10;` get wrong results (empty result for this sql). That is caused by an unnecessary offset step added by planner. [#45822](https://github.com/ClickHouse/ClickHouse/pull/45822) ([刘陶峰](https://github.com/taofengliu)). +* Backward compatibility - allow implicit narrowing conversion from UInt64 to IPv4 - required for "INSERT ... VALUES ..." expression. [#45865](https://github.com/ClickHouse/ClickHouse/pull/45865) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Bugfix IPv6 parser for mixed ip4 address with missed first octet (like `::.1.2.3`). [#45871](https://github.com/ClickHouse/ClickHouse/pull/45871) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Add the `query_kind` column to the `system.processes` table and the `SHOW PROCESSLIST` query. Remove duplicate code. It fixes a bug: the global configuration parameter `max_concurrent_select_queries` was not respected to queries with `INTERSECT` or `EXCEPT` chains. [#45872](https://github.com/ClickHouse/ClickHouse/pull/45872) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix crash in a function `stochasticLinearRegression`. Found by WingFuzz. [#45985](https://github.com/ClickHouse/ClickHouse/pull/45985) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix crash in `SELECT` queries with `INTERSECT` and `EXCEPT` modifiers that read data from tables with enabled sparse columns (controlled by setting `ratio_of_defaults_for_sparse_serialization`). [#45987](https://github.com/ClickHouse/ClickHouse/pull/45987) ([Anton Popov](https://github.com/CurtizJ)). +* Fix read in order optimization for DESC sorting with FINAL, close [#45815](https://github.com/ClickHouse/ClickHouse/issues/45815). [#46009](https://github.com/ClickHouse/ClickHouse/pull/46009) ([Vladimir C](https://github.com/vdimir)). +* Fix reading of non existing nested columns with multiple level in compact parts. [#46045](https://github.com/ClickHouse/ClickHouse/pull/46045) ([Azat Khuzhin](https://github.com/azat)). +* Fix elapsed column in system.processes (10x error). [#46047](https://github.com/ClickHouse/ClickHouse/pull/46047) ([Azat Khuzhin](https://github.com/azat)). +* Follow-up fix for Replace domain IP types (IPv4, IPv6) with native https://github.com/ClickHouse/ClickHouse/pull/43221. [#46087](https://github.com/ClickHouse/ClickHouse/pull/46087) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Fix environment variable substitution in the configuration when a parameter already has a value. This closes [#46131](https://github.com/ClickHouse/ClickHouse/issues/46131). This closes [#9547](https://github.com/ClickHouse/ClickHouse/issues/9547). [#46144](https://github.com/ClickHouse/ClickHouse/pull/46144) ([pufit](https://github.com/pufit)). +* Fix incorrect predicate push down with grouping sets. Closes [#45947](https://github.com/ClickHouse/ClickHouse/issues/45947). [#46151](https://github.com/ClickHouse/ClickHouse/pull/46151) ([flynn](https://github.com/ucasfl)). +* Fix possible pipeline stuck error on `fulls_sorting_join` with constant keys. [#46175](https://github.com/ClickHouse/ClickHouse/pull/46175) ([Vladimir C](https://github.com/vdimir)). +* Never rewrite tuple functions as literals during formatting to avoid incorrect results. [#46232](https://github.com/ClickHouse/ClickHouse/pull/46232) ([Salvatore Mesoraca](https://github.com/aiven-sal)). +* Fix possible out of bounds error while reading LowCardinality(Nullable) in Arrow format. [#46270](https://github.com/ClickHouse/ClickHouse/pull/46270) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix `SYSTEM UNFREEZE` queries failing with the exception `CANNOT_PARSE_INPUT_ASSERTION_FAILED`. [#46325](https://github.com/ClickHouse/ClickHouse/pull/46325) ([Aleksei Filatov](https://github.com/aalexfvk)). +* Fix possible crash which can be caused by an integer overflow while deserializing aggregating state of a function that stores HashTable. [#46349](https://github.com/ClickHouse/ClickHouse/pull/46349) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix possible `LOGICAL_ERROR` in asynchronous inserts with invalid data sent in format `VALUES`. [#46350](https://github.com/ClickHouse/ClickHouse/pull/46350) ([Anton Popov](https://github.com/CurtizJ)). +* Fixed a LOGICAL_ERROR on an attempt to execute `ALTER ... MOVE PART ... TO TABLE`. This type of query was never actually supported. [#46359](https://github.com/ClickHouse/ClickHouse/pull/46359) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix s3Cluster schema inference in parallel distributed insert select when `parallel_distributed_insert_select` is enabled. [#46381](https://github.com/ClickHouse/ClickHouse/pull/46381) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix queries like `ALTER TABLE ... UPDATE nested.arr1 = nested.arr2 ...`, where `arr1` and `arr2` are fields of the same `Nested` column. [#46387](https://github.com/ClickHouse/ClickHouse/pull/46387) ([Anton Popov](https://github.com/CurtizJ)). +* Scheduler may fail to schedule a task. If it happens, the whole MulityPartUpload should be aborted and `UploadHelper` must wait for already scheduled tasks. [#46451](https://github.com/ClickHouse/ClickHouse/pull/46451) ([Dmitry Novik](https://github.com/novikd)). +* Fix PREWHERE for Merge with different default types (fixes some `NOT_FOUND_COLUMN_IN_BLOCK` when the default type for the column differs, also allow `PREWHERE` when the type of column is the same across tables, and prohibit it, only if it differs). [#46454](https://github.com/ClickHouse/ClickHouse/pull/46454) ([Azat Khuzhin](https://github.com/azat)). +* Fix a crash that could happen when constant values are used in `ORDER BY`. Fixes [#46466](https://github.com/ClickHouse/ClickHouse/issues/46466). [#46493](https://github.com/ClickHouse/ClickHouse/pull/46493) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Do not throw exception if `disk` setting was specified on query level, but `storage_policy` was specified in config merge tree settings section. `disk` will override setting from config. [#46533](https://github.com/ClickHouse/ClickHouse/pull/46533) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix an invalid processing of constant `LowCardinality` argument in function `arrayMap`. This bug could lead to a segfault in release, and logical error `Bad cast` in debug build. [#46569](https://github.com/ClickHouse/ClickHouse/pull/46569) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* fixes [#46557](https://github.com/ClickHouse/ClickHouse/issues/46557). [#46611](https://github.com/ClickHouse/ClickHouse/pull/46611) ([Alexander Gololobov](https://github.com/davenger)). +* Fix endless restarts of clickhouse-server systemd unit if server cannot start within 1m30sec (Disable timeout logic for starting clickhouse-server from systemd service). [#46613](https://github.com/ClickHouse/ClickHouse/pull/46613) ([Azat Khuzhin](https://github.com/azat)). +* Allocated during asynchronous inserts memory buffers were deallocated in the global context and MemoryTracker counters for corresponding user and query were not updated correctly. That led to false positive OOM exceptions. [#46622](https://github.com/ClickHouse/ClickHouse/pull/46622) ([Dmitry Novik](https://github.com/novikd)). +* Updated to not clear on_expression from table_join as its used by future analyze runs resolves [#45185](https://github.com/ClickHouse/ClickHouse/issues/45185). [#46487](https://github.com/ClickHouse/ClickHouse/pull/46487) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). + + ### ClickHouse release 23.1, 2023-01-26 ### ClickHouse release 23.1 diff --git a/CMakeLists.txt b/CMakeLists.txt index 0b1ffbd54bb..cbb666b81c3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -391,10 +391,12 @@ if (COMPILER_CLANG) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fstrict-vtable-pointers") - # Set new experimental pass manager, it's a performance, build time and binary size win. - # Can be removed after https://reviews.llvm.org/D66490 merged and released to at least two versions of clang. - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fexperimental-new-pass-manager") - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fexperimental-new-pass-manager") + if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 16) + # Set new experimental pass manager, it's a performance, build time and binary size win. + # Can be removed after https://reviews.llvm.org/D66490 merged and released to at least two versions of clang. + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fexperimental-new-pass-manager") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fexperimental-new-pass-manager") + endif () # We cannot afford to use LTO when compiling unit tests, and it's not enough # to only supply -fno-lto at the final linking stage. So we disable it diff --git a/README.md b/README.md index d2809c1b141..fcbe65e8223 100644 --- a/README.md +++ b/README.md @@ -14,14 +14,13 @@ curl https://clickhouse.com/ | sh * [Tutorial](https://clickhouse.com/docs/en/getting_started/tutorial/) shows how to set up and query a small ClickHouse cluster. * [Documentation](https://clickhouse.com/docs/en/) provides more in-depth information. * [YouTube channel](https://www.youtube.com/c/ClickHouseDB) has a lot of content about ClickHouse in video format. -* [Slack](https://clickhousedb.slack.com/) and [Telegram](https://telegram.me/clickhouse_en) allow chatting with ClickHouse users in real-time. +* [Slack](https://join.slack.com/t/clickhousedb/shared_invite/zt-1gh9ds7f4-PgDhJAaF8ad5RbWBAAjzFg) and [Telegram](https://telegram.me/clickhouse_en) allow chatting with ClickHouse users in real-time. * [Blog](https://clickhouse.com/blog/) contains various ClickHouse-related articles, as well as announcements and reports about events. * [Code Browser (Woboq)](https://clickhouse.com/codebrowser/ClickHouse/index.html) with syntax highlight and navigation. * [Code Browser (github.dev)](https://github.dev/ClickHouse/ClickHouse) with syntax highlight, powered by github.dev. * [Contacts](https://clickhouse.com/company/contact) can help to get your questions answered if there are any. ## Upcoming Events -* [**ClickHouse Workshop**](https://clickhouse.com/company/events/2023-02-15-clickhouse-workshop?utm_source=github&utm_medium=social&utm_campaign=workshop) - Feb 15 & 16 - In this 2-day (3 hrs per day) free training, topics range from introductory content to a deep dive on interacting with and understanding your data. There will be both live training and hands-on labs. * [**v23.2 Release Webinar**](https://clickhouse.com/company/events/v23-2-release-webinar?utm_source=github&utm_medium=social&utm_campaign=release-webinar-2023-02) - Feb 23 - 23.2 is rapidly approaching. Original creator, co-founder, and CTO of ClickHouse Alexey Milovidov will walk us through the highlights of the release. * [**ClickHouse Meetup in Amsterdam**](https://www.meetup.com/clickhouse-netherlands-user-group/events/291485868/) - Mar 9 - The first ClickHouse Amsterdam Meetup of 2023 is here! 🎉 Join us for short lightning talks and long discussions. Food, drinks & good times on us. * [**ClickHouse Meetup in SF Bay Area**](https://www.meetup.com/clickhouse-silicon-valley-meetup-group/events/291490121/) - Mar 14 - A night to meet with ClickHouse team in the San Francisco area! Food and drink are a given...but networking is the primary focus. diff --git a/SECURITY.md b/SECURITY.md index 0fd72971d30..7c6648c70eb 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -13,9 +13,10 @@ The following versions of ClickHouse server are currently being supported with s | Version | Supported | |:-|:-| +| 23.2 | ✔️ | | 23.1 | ✔️ | | 22.12 | ✔️ | -| 22.11 | ✔️ | +| 22.11 | ❌ | | 22.10 | ❌ | | 22.9 | ❌ | | 22.8 | ✔️ | diff --git a/base/glibc-compatibility/glibc-compatibility.c b/base/glibc-compatibility/glibc-compatibility.c index bae03ad590a..7e8ea5051d7 100644 --- a/base/glibc-compatibility/glibc-compatibility.c +++ b/base/glibc-compatibility/glibc-compatibility.c @@ -195,7 +195,6 @@ long splice(int fd_in, off_t *off_in, int fd_out, off_t *off_out, size_t len, un #include #include -#if !defined(__aarch64__) struct statx { uint32_t stx_mask; uint32_t stx_blksize; @@ -226,7 +225,6 @@ int statx(int fd, const char *restrict path, int flag, { return syscall(SYS_statx, fd, path, flag, mask, statxbuf); } -#endif #include diff --git a/base/glibc-compatibility/musl/fallocate.c b/base/glibc-compatibility/musl/fallocate.c index 31e63822c94..2369f157436 100644 --- a/base/glibc-compatibility/musl/fallocate.c +++ b/base/glibc-compatibility/musl/fallocate.c @@ -8,3 +8,8 @@ int fallocate(int fd, int mode, off_t base, off_t len) { return syscall(SYS_fallocate, fd, mode, base, len); } + +int fallocate64(int fd, int mode, off_t base, off_t len) +{ + return fallocate(fd, mode, base, len); +} diff --git a/base/glibc-compatibility/musl/pwritev.c b/base/glibc-compatibility/musl/pwritev.c index 4cd31fcaea9..bdfeb572423 100644 --- a/base/glibc-compatibility/musl/pwritev.c +++ b/base/glibc-compatibility/musl/pwritev.c @@ -9,3 +9,8 @@ ssize_t pwritev(int fd, const struct iovec *iov, int count, off_t ofs) /// There was cancellable syscall (syscall_cp), but I don't care. return syscall(SYS_pwritev, fd, iov, count, (long)(ofs), (long)(ofs>>32)); } + +ssize_t pwritev64(int fd, const struct iovec *iov, int count, off_t ofs) +{ + return pwritev(fd, iov, count, ofs); +} diff --git a/base/poco/Foundation/include/Poco/Alignment.h b/base/poco/Foundation/include/Poco/Alignment.h index b1d48ffd62d..300c55ee8ef 100644 --- a/base/poco/Foundation/include/Poco/Alignment.h +++ b/base/poco/Foundation/include/Poco/Alignment.h @@ -136,7 +136,6 @@ struct AlignedCharArrayImpl; // MSVC requires special handling here. -# ifdef POCO_COMPILER_CLANG # if __has_feature(cxx_alignas) # define POCO_ALIGNEDCHARARRAY_TEMPLATE_ALIGNMENT(x) \ @@ -148,17 +147,6 @@ struct AlignedCharArrayImpl; # define POCO_HAVE_ALIGNMENT # endif -# elif defined(__GNUC__) || defined(__IBM_ATTRIBUTES) - -# define POCO_ALIGNEDCHARARRAY_TEMPLATE_ALIGNMENT(x) \ - template <> \ - struct AlignedCharArrayImpl \ - { \ - char aligned __attribute__((aligned(x))); \ - } -# define POCO_HAVE_ALIGNMENT - -# endif # ifdef POCO_HAVE_ALIGNMENT POCO_ALIGNEDCHARARRAY_TEMPLATE_ALIGNMENT(1); diff --git a/base/poco/Foundation/include/Poco/BinaryReader.h b/base/poco/Foundation/include/Poco/BinaryReader.h index 280724a8a47..4042b507a2f 100644 --- a/base/poco/Foundation/include/Poco/BinaryReader.h +++ b/base/poco/Foundation/include/Poco/BinaryReader.h @@ -76,7 +76,7 @@ public: BinaryReader & operator>>(float & value); BinaryReader & operator>>(double & value); -#if defined(POCO_HAVE_INT64) && !defined(POCO_LONG_IS_64_BIT) +#if !defined(POCO_LONG_IS_64_BIT) BinaryReader & operator>>(Int64 & value); BinaryReader & operator>>(UInt64 & value); #endif @@ -106,12 +106,10 @@ public: /// See BinaryWriter::write7BitEncoded() for a description /// of the compression algorithm. -#if defined(POCO_HAVE_INT64) void read7BitEncoded(UInt64 & value); /// Reads a 64-bit unsigned integer in compressed format. /// See BinaryWriter::write7BitEncoded() for a description /// of the compression algorithm. -#endif void readRaw(std::streamsize length, std::string & value); /// Reads length bytes of raw data into value. diff --git a/base/poco/Foundation/include/Poco/BinaryWriter.h b/base/poco/Foundation/include/Poco/BinaryWriter.h index 30a353a8ff7..aa280d4ccab 100644 --- a/base/poco/Foundation/include/Poco/BinaryWriter.h +++ b/base/poco/Foundation/include/Poco/BinaryWriter.h @@ -81,7 +81,7 @@ public: BinaryWriter & operator<<(float value); BinaryWriter & operator<<(double value); -#if defined(POCO_HAVE_INT64) && !defined(POCO_LONG_IS_64_BIT) +#if !defined(POCO_LONG_IS_64_BIT) BinaryWriter & operator<<(Int64 value); BinaryWriter & operator<<(UInt64 value); #endif @@ -114,7 +114,6 @@ public: /// written out. value is then shifted by seven bits and the next byte is written. /// This process is repeated until the entire integer has been written. -#if defined(POCO_HAVE_INT64) void write7BitEncoded(UInt64 value); /// Writes a 64-bit unsigned integer in a compressed format. /// The value written out seven bits at a time, starting @@ -125,7 +124,6 @@ public: /// If value will not fit in seven bits, the high bit is set on the first byte and /// written out. value is then shifted by seven bits and the next byte is written. /// This process is repeated until the entire integer has been written. -#endif void writeRaw(const std::string & rawData); /// Writes the string as-is to the stream. diff --git a/base/poco/Foundation/include/Poco/ByteOrder.h b/base/poco/Foundation/include/Poco/ByteOrder.h index 4f2644ddf4e..a8abf09f93b 100644 --- a/base/poco/Foundation/include/Poco/ByteOrder.h +++ b/base/poco/Foundation/include/Poco/ByteOrder.h @@ -34,73 +34,55 @@ public: static UInt16 flipBytes(UInt16 value); static Int32 flipBytes(Int32 value); static UInt32 flipBytes(UInt32 value); -#if defined(POCO_HAVE_INT64) static Int64 flipBytes(Int64 value); static UInt64 flipBytes(UInt64 value); -#endif static Int16 toBigEndian(Int16 value); static UInt16 toBigEndian(UInt16 value); static Int32 toBigEndian(Int32 value); static UInt32 toBigEndian(UInt32 value); -#if defined(POCO_HAVE_INT64) static Int64 toBigEndian(Int64 value); static UInt64 toBigEndian(UInt64 value); -#endif static Int16 fromBigEndian(Int16 value); static UInt16 fromBigEndian(UInt16 value); static Int32 fromBigEndian(Int32 value); static UInt32 fromBigEndian(UInt32 value); -#if defined(POCO_HAVE_INT64) static Int64 fromBigEndian(Int64 value); static UInt64 fromBigEndian(UInt64 value); -#endif static Int16 toLittleEndian(Int16 value); static UInt16 toLittleEndian(UInt16 value); static Int32 toLittleEndian(Int32 value); static UInt32 toLittleEndian(UInt32 value); -#if defined(POCO_HAVE_INT64) static Int64 toLittleEndian(Int64 value); static UInt64 toLittleEndian(UInt64 value); -#endif static Int16 fromLittleEndian(Int16 value); static UInt16 fromLittleEndian(UInt16 value); static Int32 fromLittleEndian(Int32 value); static UInt32 fromLittleEndian(UInt32 value); -#if defined(POCO_HAVE_INT64) static Int64 fromLittleEndian(Int64 value); static UInt64 fromLittleEndian(UInt64 value); -#endif static Int16 toNetwork(Int16 value); static UInt16 toNetwork(UInt16 value); static Int32 toNetwork(Int32 value); static UInt32 toNetwork(UInt32 value); -#if defined(POCO_HAVE_INT64) static Int64 toNetwork(Int64 value); static UInt64 toNetwork(UInt64 value); -#endif static Int16 fromNetwork(Int16 value); static UInt16 fromNetwork(UInt16 value); static Int32 fromNetwork(Int32 value); static UInt32 fromNetwork(UInt32 value); -#if defined(POCO_HAVE_INT64) static Int64 fromNetwork(Int64 value); static UInt64 fromNetwork(UInt64 value); -#endif }; #if !defined(POCO_NO_BYTESWAP_BUILTINS) -# if defined(__clang__) -# if __has_builtin(__builtin_bswap32) -# define POCO_HAVE_GCC_BYTESWAP 1 -# endif -# elif defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3)) +# if __has_builtin(__builtin_bswap32) # define POCO_HAVE_GCC_BYTESWAP 1 # endif #endif @@ -143,7 +125,6 @@ inline Int32 ByteOrder::flipBytes(Int32 value) } -#if defined(POCO_HAVE_INT64) inline UInt64 ByteOrder::flipBytes(UInt64 value) { # if defined(POCO_HAVE_MSC_BYTESWAP) @@ -162,7 +143,6 @@ inline Int64 ByteOrder::flipBytes(Int64 value) { return Int64(flipBytes(UInt64(value))); } -#endif // POCO_HAVE_INT64 // @@ -180,7 +160,6 @@ inline Int64 ByteOrder::flipBytes(Int64 value) } -#if defined(POCO_HAVE_INT64) # define POCO_IMPLEMENT_BYTEORDER_NOOP(op) \ POCO_IMPLEMENT_BYTEORDER_NOOP_(op, Int16) \ POCO_IMPLEMENT_BYTEORDER_NOOP_(op, UInt16) \ @@ -195,18 +174,6 @@ inline Int64 ByteOrder::flipBytes(Int64 value) POCO_IMPLEMENT_BYTEORDER_FLIP_(op, UInt32) \ POCO_IMPLEMENT_BYTEORDER_FLIP_(op, Int64) \ POCO_IMPLEMENT_BYTEORDER_FLIP_(op, UInt64) -#else -# define POCO_IMPLEMENT_BYTEORDER_NOOP(op) \ - POCO_IMPLEMENT_BYTEORDER_NOOP_(op, Int16) \ - POCO_IMPLEMENT_BYTEORDER_NOOP_(op, UInt16) \ - POCO_IMPLEMENT_BYTEORDER_NOOP_(op, Int32) \ - POCO_IMPLEMENT_BYTEORDER_NOOP_(op, UInt32) -# define POCO_IMPLEMENT_BYTEORDER_FLIP(op) \ - POCO_IMPLEMENT_BYTEORDER_FLIP_(op, Int16) \ - POCO_IMPLEMENT_BYTEORDER_FLIP_(op, UInt16) \ - POCO_IMPLEMENT_BYTEORDER_FLIP_(op, Int32) \ - POCO_IMPLEMENT_BYTEORDER_FLIP_(op, UInt32) -#endif #if defined(POCO_ARCH_BIG_ENDIAN) diff --git a/base/poco/Foundation/include/Poco/Foundation.h b/base/poco/Foundation/include/Poco/Foundation.h index 34493041720..bee9d9d90a9 100644 --- a/base/poco/Foundation/include/Poco/Foundation.h +++ b/base/poco/Foundation/include/Poco/Foundation.h @@ -98,10 +98,8 @@ # define POCO_DEPRECATED #elif defined(_GNUC_) # define POCO_DEPRECATED __attribute__((deprecated)) -#elif defined(__clang__) -# define POCO_DEPRECATED __attribute__((deprecated)) #else -# define POCO_DEPRECATED +# define POCO_DEPRECATED __attribute__((deprecated)) #endif diff --git a/base/poco/Foundation/include/Poco/NumberFormatter.h b/base/poco/Foundation/include/Poco/NumberFormatter.h index e246ca16ec3..a320b576083 100644 --- a/base/poco/Foundation/include/Poco/NumberFormatter.h +++ b/base/poco/Foundation/include/Poco/NumberFormatter.h @@ -151,7 +151,6 @@ public: /// If prefix is true, "0x" prefix is prepended to the /// resulting string. -#ifdef POCO_HAVE_INT64 # ifdef POCO_LONG_IS_64_BIT @@ -255,7 +254,6 @@ public: # endif // ifdef POCO_LONG_IS_64_BIT -#endif // ifdef POCO_HAVE_INT64 static std::string format(float value); /// Formats a float value in decimal floating-point notation, @@ -380,7 +378,6 @@ public: /// right justified and zero-padded in a field having at least the /// specified width. -#ifdef POCO_HAVE_INT64 # ifdef POCO_LONG_IS_64_BIT @@ -472,7 +469,6 @@ public: # endif // ifdef POCO_LONG_IS_64_BIT -#endif // ifdef POCO_HAVE_INT64 static void append(std::string & str, float value); /// Formats a float value in decimal floating-point notation, @@ -673,7 +669,6 @@ inline std::string NumberFormatter::formatHex(unsigned long value, int width, bo } -#ifdef POCO_HAVE_INT64 # ifdef POCO_LONG_IS_64_BIT @@ -843,7 +838,6 @@ inline std::string NumberFormatter::formatHex(UInt64 value, int width, bool pref # endif // ifdef POCO_LONG_IS_64_BIT -#endif // ifdef POCO_HAVE_INT64 inline std::string NumberFormatter::format(float value) diff --git a/base/poco/Foundation/include/Poco/NumberParser.h b/base/poco/Foundation/include/Poco/NumberParser.h index de813e37dae..32f8c0dc989 100644 --- a/base/poco/Foundation/include/Poco/NumberParser.h +++ b/base/poco/Foundation/include/Poco/NumberParser.h @@ -80,7 +80,6 @@ public: /// Returns true if a valid integer has been found, false otherwise. /// If parsing was not successful, value is undefined. -#if defined(POCO_HAVE_INT64) static Int64 parse64(const std::string & s, char thousandSeparator = ','); /// Parses a 64-bit integer value in decimal notation from the given string. @@ -118,7 +117,6 @@ public: /// Returns true if a valid integer has been found, false otherwise. /// If parsing was not successful, value is undefined. -#endif // defined(POCO_HAVE_INT64) static double parseFloat(const std::string & s, char decimalSeparator = '.', char thousandSeparator = ','); /// Parses a double value in decimal floating point notation diff --git a/base/poco/Foundation/include/Poco/Platform.h b/base/poco/Foundation/include/Poco/Platform.h index eb8f80a0d25..fe45833aea6 100644 --- a/base/poco/Foundation/include/Poco/Platform.h +++ b/base/poco/Foundation/include/Poco/Platform.h @@ -212,25 +212,6 @@ #endif -#if defined(__clang__) -# define POCO_COMPILER_CLANG -#elif defined(__GNUC__) -# define POCO_COMPILER_GCC -#elif defined(__MINGW32__) || defined(__MINGW64__) -# define POCO_COMPILER_MINGW -#elif defined(__INTEL_COMPILER) || defined(__ICC) || defined(__ECC) || defined(__ICL) -# define POCO_COMPILER_INTEL -#elif defined(__MWERKS__) || defined(__CWCC__) -# define POCO_COMPILER_CODEWARRIOR -#elif defined(__sgi) || defined(sgi) -# define POCO_COMPILER_SGI -#elif defined(__BORLANDC__) || defined(__CODEGEARC__) -# define POCO_COMPILER_CBUILDER -#elif defined(__DMC__) -# define POCO_COMPILER_DMARS -#endif - - #ifdef __GNUC__ # define POCO_UNUSED __attribute__((unused)) #else diff --git a/base/poco/Foundation/include/Poco/Platform_POSIX.h b/base/poco/Foundation/include/Poco/Platform_POSIX.h index 96f0c32cb9e..b23c6d68b90 100644 --- a/base/poco/Foundation/include/Poco/Platform_POSIX.h +++ b/base/poco/Foundation/include/Poco/Platform_POSIX.h @@ -32,10 +32,8 @@ // // Thread-safety of local static initialization // -#if __cplusplus >= 201103L || __GNUC__ >= 4 || defined(__clang__) -# ifndef POCO_LOCAL_STATIC_INIT_IS_THREADSAFE -# define POCO_LOCAL_STATIC_INIT_IS_THREADSAFE 1 -# endif +#ifndef POCO_LOCAL_STATIC_INIT_IS_THREADSAFE +# define POCO_LOCAL_STATIC_INIT_IS_THREADSAFE 1 #endif diff --git a/base/poco/Foundation/include/Poco/StreamCopier.h b/base/poco/Foundation/include/Poco/StreamCopier.h index 72b19306388..c24e73d88dd 100644 --- a/base/poco/Foundation/include/Poco/StreamCopier.h +++ b/base/poco/Foundation/include/Poco/StreamCopier.h @@ -38,7 +38,6 @@ public: /// /// Returns the number of bytes copied. -#if defined(POCO_HAVE_INT64) static Poco::UInt64 copyStream64(std::istream & istr, std::ostream & ostr, std::size_t bufferSize = 8192); /// Writes all bytes readable from istr to ostr, using an internal buffer. /// @@ -46,14 +45,12 @@ public: /// /// Note: the only difference to copyStream() is that a 64-bit unsigned /// integer is used to count the number of bytes copied. -#endif static std::streamsize copyStreamUnbuffered(std::istream & istr, std::ostream & ostr); /// Writes all bytes readable from istr to ostr. /// /// Returns the number of bytes copied. -#if defined(POCO_HAVE_INT64) static Poco::UInt64 copyStreamUnbuffered64(std::istream & istr, std::ostream & ostr); /// Writes all bytes readable from istr to ostr. /// @@ -61,14 +58,12 @@ public: /// /// Note: the only difference to copyStreamUnbuffered() is that a 64-bit unsigned /// integer is used to count the number of bytes copied. -#endif static std::streamsize copyToString(std::istream & istr, std::string & str, std::size_t bufferSize = 8192); /// Appends all bytes readable from istr to the given string, using an internal buffer. /// /// Returns the number of bytes copied. -#if defined(POCO_HAVE_INT64) static Poco::UInt64 copyToString64(std::istream & istr, std::string & str, std::size_t bufferSize = 8192); /// Appends all bytes readable from istr to the given string, using an internal buffer. /// @@ -76,7 +71,6 @@ public: /// /// Note: the only difference to copyToString() is that a 64-bit unsigned /// integer is used to count the number of bytes copied. -#endif }; diff --git a/base/poco/Foundation/include/Poco/Timespan.h b/base/poco/Foundation/include/Poco/Timespan.h index f0657ac0c36..ec7430eeddc 100644 --- a/base/poco/Foundation/include/Poco/Timespan.h +++ b/base/poco/Foundation/include/Poco/Timespan.h @@ -67,19 +67,7 @@ public: void swap(Timespan & timespan); /// Swaps the Timespan with another one. - bool operator==(const Timespan & ts) const; - bool operator!=(const Timespan & ts) const; - bool operator>(const Timespan & ts) const; - bool operator>=(const Timespan & ts) const; - bool operator<(const Timespan & ts) const; - bool operator<=(const Timespan & ts) const; - - bool operator==(TimeDiff microSeconds) const; - bool operator!=(TimeDiff microSeconds) const; - bool operator>(TimeDiff microSeconds) const; - bool operator>=(TimeDiff microSeconds) const; - bool operator<(TimeDiff microSeconds) const; - bool operator<=(TimeDiff microSeconds) const; + auto operator<=>(const Timespan & ts) const = default; Timespan operator+(const Timespan & d) const; Timespan operator-(const Timespan & d) const; @@ -215,78 +203,6 @@ inline Timespan::TimeDiff Timespan::totalMicroseconds() const } -inline bool Timespan::operator==(const Timespan & ts) const -{ - return _span == ts._span; -} - - -inline bool Timespan::operator!=(const Timespan & ts) const -{ - return _span != ts._span; -} - - -inline bool Timespan::operator>(const Timespan & ts) const -{ - return _span > ts._span; -} - - -inline bool Timespan::operator>=(const Timespan & ts) const -{ - return _span >= ts._span; -} - - -inline bool Timespan::operator<(const Timespan & ts) const -{ - return _span < ts._span; -} - - -inline bool Timespan::operator<=(const Timespan & ts) const -{ - return _span <= ts._span; -} - - -inline bool Timespan::operator==(TimeDiff microSeconds) const -{ - return _span == microSeconds; -} - - -inline bool Timespan::operator!=(TimeDiff microSeconds) const -{ - return _span != microSeconds; -} - - -inline bool Timespan::operator>(TimeDiff microSeconds) const -{ - return _span > microSeconds; -} - - -inline bool Timespan::operator>=(TimeDiff microSeconds) const -{ - return _span >= microSeconds; -} - - -inline bool Timespan::operator<(TimeDiff microSeconds) const -{ - return _span < microSeconds; -} - - -inline bool Timespan::operator<=(TimeDiff microSeconds) const -{ - return _span <= microSeconds; -} - - inline void swap(Timespan & s1, Timespan & s2) { s1.swap(s2); diff --git a/base/poco/Foundation/include/Poco/Token.h b/base/poco/Foundation/include/Poco/Token.h index 2d62ed87de6..1aec9e620fe 100644 --- a/base/poco/Foundation/include/Poco/Token.h +++ b/base/poco/Foundation/include/Poco/Token.h @@ -84,13 +84,11 @@ public: virtual std::string asString() const; /// Returns a string representation of the token. -#if defined(POCO_HAVE_INT64) virtual Int64 asInteger64() const; /// Returns a 64-bit integer representation of the token. virtual UInt64 asUnsignedInteger64() const; /// Returns an unsigned 64-bit integer representation of the token. -#endif virtual int asInteger() const; /// Returns an integer representation of the token. diff --git a/base/poco/Foundation/include/Poco/Types.h b/base/poco/Foundation/include/Poco/Types.h index 156b3584d15..4f4924a2542 100644 --- a/base/poco/Foundation/include/Poco/Types.h +++ b/base/poco/Foundation/include/Poco/Types.h @@ -25,7 +25,6 @@ namespace Poco { -#if defined(__GNUC__) || defined(__clang__) // // Unix/GCC/Clang // @@ -46,8 +45,6 @@ typedef unsigned long UInt64; typedef signed long long Int64; typedef unsigned long long UInt64; # endif -# define POCO_HAVE_INT64 1 -#endif } // namespace Poco diff --git a/base/poco/Foundation/src/BinaryReader.cpp b/base/poco/Foundation/src/BinaryReader.cpp index fb57371fbc3..f2961e03966 100644 --- a/base/poco/Foundation/src/BinaryReader.cpp +++ b/base/poco/Foundation/src/BinaryReader.cpp @@ -170,7 +170,7 @@ BinaryReader& BinaryReader::operator >> (double& value) } -#if defined(POCO_HAVE_INT64) && !defined(POCO_LONG_IS_64_BIT) +#if !defined(POCO_LONG_IS_64_BIT) BinaryReader& BinaryReader::operator >> (Int64& value) @@ -233,7 +233,6 @@ void BinaryReader::read7BitEncoded(UInt32& value) } -#if defined(POCO_HAVE_INT64) void BinaryReader::read7BitEncoded(UInt64& value) @@ -254,7 +253,6 @@ void BinaryReader::read7BitEncoded(UInt64& value) } -#endif void BinaryReader::readRaw(std::streamsize length, std::string& value) diff --git a/base/poco/Foundation/src/BinaryWriter.cpp b/base/poco/Foundation/src/BinaryWriter.cpp index 62e1adfe373..6db5ab7cb90 100644 --- a/base/poco/Foundation/src/BinaryWriter.cpp +++ b/base/poco/Foundation/src/BinaryWriter.cpp @@ -212,7 +212,7 @@ BinaryWriter& BinaryWriter::operator << (double value) } -#if defined(POCO_HAVE_INT64) && !defined(POCO_LONG_IS_64_BIT) +#if !defined(POCO_LONG_IS_64_BIT) BinaryWriter& BinaryWriter::operator << (Int64 value) @@ -303,7 +303,6 @@ void BinaryWriter::write7BitEncoded(UInt32 value) } -#if defined(POCO_HAVE_INT64) void BinaryWriter::write7BitEncoded(UInt64 value) @@ -319,7 +318,6 @@ void BinaryWriter::write7BitEncoded(UInt64 value) } -#endif void BinaryWriter::writeRaw(const std::string& rawData) diff --git a/base/poco/Foundation/src/NumberFormatter.cpp b/base/poco/Foundation/src/NumberFormatter.cpp index 5c8126e9b0a..0a9334059a9 100644 --- a/base/poco/Foundation/src/NumberFormatter.cpp +++ b/base/poco/Foundation/src/NumberFormatter.cpp @@ -234,7 +234,6 @@ void NumberFormatter::appendHex(std::string& str, unsigned long value, int width } -#ifdef POCO_HAVE_INT64 #ifdef POCO_LONG_IS_64_BIT @@ -424,7 +423,6 @@ void NumberFormatter::appendHex(std::string& str, UInt64 value, int width) #endif // ifdef POCO_LONG_IS_64_BIT -#endif // ifdef POCO_HAVE_INT64 void NumberFormatter::append(std::string& str, float value) diff --git a/base/poco/Foundation/src/NumberParser.cpp b/base/poco/Foundation/src/NumberParser.cpp index 56eeb167595..8d32e1a722c 100644 --- a/base/poco/Foundation/src/NumberParser.cpp +++ b/base/poco/Foundation/src/NumberParser.cpp @@ -44,7 +44,7 @@ int NumberParser::parse(const std::string& s, char thSep) if (tryParse(s, result, thSep)) return result; else - throw SyntaxException("Not a valid integer", s); + throw SyntaxException("Not a valid integer", "'" + s + "'"); } @@ -60,7 +60,7 @@ unsigned NumberParser::parseUnsigned(const std::string& s, char thSep) if (tryParseUnsigned(s, result, thSep)) return result; else - throw SyntaxException("Not a valid unsigned integer", s); + throw SyntaxException("Not a valid unsigned integer", "'" + s + "'"); } @@ -76,7 +76,7 @@ unsigned NumberParser::parseHex(const std::string& s) if (tryParseHex(s, result)) return result; else - throw SyntaxException("Not a valid hexadecimal integer", s); + throw SyntaxException("Not a valid hexadecimal integer", "'" + s + "'"); } @@ -94,7 +94,7 @@ unsigned NumberParser::parseOct(const std::string& s) if (tryParseOct(s, result)) return result; else - throw SyntaxException("Not a valid hexadecimal integer", s); + throw SyntaxException("Not a valid hexadecimal integer", "'" + s + "'"); } @@ -104,7 +104,6 @@ bool NumberParser::tryParseOct(const std::string& s, unsigned& value) } -#if defined(POCO_HAVE_INT64) Int64 NumberParser::parse64(const std::string& s, char thSep) @@ -113,7 +112,7 @@ Int64 NumberParser::parse64(const std::string& s, char thSep) if (tryParse64(s, result, thSep)) return result; else - throw SyntaxException("Not a valid integer", s); + throw SyntaxException("Not a valid integer", "'" + s + "'"); } @@ -129,7 +128,7 @@ UInt64 NumberParser::parseUnsigned64(const std::string& s, char thSep) if (tryParseUnsigned64(s, result, thSep)) return result; else - throw SyntaxException("Not a valid unsigned integer", s); + throw SyntaxException("Not a valid unsigned integer", "'" + s + "'"); } @@ -145,7 +144,7 @@ UInt64 NumberParser::parseHex64(const std::string& s) if (tryParseHex64(s, result)) return result; else - throw SyntaxException("Not a valid hexadecimal integer", s); + throw SyntaxException("Not a valid hexadecimal integer", "'" + s + "'"); } @@ -163,7 +162,7 @@ UInt64 NumberParser::parseOct64(const std::string& s) if (tryParseOct64(s, result)) return result; else - throw SyntaxException("Not a valid hexadecimal integer", s); + throw SyntaxException("Not a valid hexadecimal integer", "'" + s + "'"); } @@ -173,7 +172,6 @@ bool NumberParser::tryParseOct64(const std::string& s, UInt64& value) } -#endif // defined(POCO_HAVE_INT64) double NumberParser::parseFloat(const std::string& s, char decSep, char thSep) @@ -182,7 +180,7 @@ double NumberParser::parseFloat(const std::string& s, char decSep, char thSep) if (tryParseFloat(s, result, decSep, thSep)) return result; else - throw SyntaxException("Not a valid floating-point number", s); + throw SyntaxException("Not a valid floating-point number", "'" + s + "'"); } @@ -198,7 +196,7 @@ bool NumberParser::parseBool(const std::string& s) if (tryParseBool(s, result)) return result; else - throw SyntaxException("Not a valid bool number", s); + throw SyntaxException("Not a valid bool number", "'" + s + "'"); } diff --git a/base/poco/Foundation/src/StreamCopier.cpp b/base/poco/Foundation/src/StreamCopier.cpp index 6f34cc233a2..508d1e7b2ae 100644 --- a/base/poco/Foundation/src/StreamCopier.cpp +++ b/base/poco/Foundation/src/StreamCopier.cpp @@ -42,7 +42,6 @@ std::streamsize StreamCopier::copyStream(std::istream& istr, std::ostream& ostr, } -#if defined(POCO_HAVE_INT64) Poco::UInt64 StreamCopier::copyStream64(std::istream& istr, std::ostream& ostr, std::size_t bufferSize) { poco_assert (bufferSize > 0); @@ -64,7 +63,6 @@ Poco::UInt64 StreamCopier::copyStream64(std::istream& istr, std::ostream& ostr, } return len; } -#endif std::streamsize StreamCopier::copyToString(std::istream& istr, std::string& str, std::size_t bufferSize) @@ -90,7 +88,6 @@ std::streamsize StreamCopier::copyToString(std::istream& istr, std::string& str, } -#if defined(POCO_HAVE_INT64) Poco::UInt64 StreamCopier::copyToString64(std::istream& istr, std::string& str, std::size_t bufferSize) { poco_assert (bufferSize > 0); @@ -112,7 +109,6 @@ Poco::UInt64 StreamCopier::copyToString64(std::istream& istr, std::string& str, } return len; } -#endif std::streamsize StreamCopier::copyStreamUnbuffered(std::istream& istr, std::ostream& ostr) @@ -130,7 +126,6 @@ std::streamsize StreamCopier::copyStreamUnbuffered(std::istream& istr, std::ostr } -#if defined(POCO_HAVE_INT64) Poco::UInt64 StreamCopier::copyStreamUnbuffered64(std::istream& istr, std::ostream& ostr) { char c = 0; @@ -144,7 +139,6 @@ Poco::UInt64 StreamCopier::copyStreamUnbuffered64(std::istream& istr, std::ostre } return len; } -#endif } // namespace Poco diff --git a/base/poco/Foundation/src/Token.cpp b/base/poco/Foundation/src/Token.cpp index 98e8bb25e93..4e81c6ef885 100644 --- a/base/poco/Foundation/src/Token.cpp +++ b/base/poco/Foundation/src/Token.cpp @@ -54,7 +54,6 @@ std::string Token::asString() const } -#if defined(POCO_HAVE_INT64) Int64 Token::asInteger64() const { return NumberParser::parse64(_value); @@ -65,7 +64,6 @@ UInt64 Token::asUnsignedInteger64() const { return NumberParser::parseUnsigned64(_value); } -#endif int Token::asInteger() const diff --git a/base/poco/JSON/include/Poco/JSON/Handler.h b/base/poco/JSON/include/Poco/JSON/Handler.h index f9114a59221..c412a05003f 100644 --- a/base/poco/JSON/include/Poco/JSON/Handler.h +++ b/base/poco/JSON/include/Poco/JSON/Handler.h @@ -74,14 +74,12 @@ namespace JSON /// An unsigned value is read. This will only be triggered if the /// value cannot fit into a signed int. -#if defined(POCO_HAVE_INT64) virtual void value(Int64 v) = 0; /// A 64-bit integer value is read. virtual void value(UInt64 v) = 0; /// An unsigned 64-bit integer value is read. This will only be /// triggered if the value cannot fit into a signed 64-bit integer. -#endif virtual void value(const std::string & value) = 0; /// A string value is read. diff --git a/base/poco/JSON/include/Poco/JSON/ParseHandler.h b/base/poco/JSON/include/Poco/JSON/ParseHandler.h index 4669dc8638f..1b8ac3066d2 100644 --- a/base/poco/JSON/include/Poco/JSON/ParseHandler.h +++ b/base/poco/JSON/include/Poco/JSON/ParseHandler.h @@ -73,14 +73,12 @@ namespace JSON /// An unsigned value is read. This will only be triggered if the /// value cannot fit into a signed int. -#if defined(POCO_HAVE_INT64) virtual void value(Int64 v); /// A 64-bit integer value is read virtual void value(UInt64 v); /// An unsigned 64-bit integer value is read. This will only be /// triggered if the value cannot fit into a signed 64-bit integer. -#endif virtual void value(const std::string & s); /// A string value is read. @@ -126,7 +124,6 @@ namespace JSON } -#if defined(POCO_HAVE_INT64) inline void ParseHandler::value(Int64 v) { setValue(v); @@ -137,7 +134,6 @@ namespace JSON { setValue(v); } -#endif inline void ParseHandler::value(const std::string & s) diff --git a/base/poco/JSON/include/Poco/JSON/PrintHandler.h b/base/poco/JSON/include/Poco/JSON/PrintHandler.h index 34a991653ba..390f4d8bba9 100644 --- a/base/poco/JSON/include/Poco/JSON/PrintHandler.h +++ b/base/poco/JSON/include/Poco/JSON/PrintHandler.h @@ -81,13 +81,11 @@ namespace JSON /// An unsigned value is read. This will only be triggered if the /// value cannot fit into a signed int. -#if defined(POCO_HAVE_INT64) void value(Int64 v); /// A 64-bit integer value is read; it will be written to the output. void value(UInt64 v); /// An unsigned 64-bit integer value is read; it will be written to the output. -#endif void value(const std::string & value); /// A string value is read; it will be formatted and written to the output. diff --git a/base/poco/JSON/src/PrintHandler.cpp b/base/poco/JSON/src/PrintHandler.cpp index bf735d0869c..ea81cbdd1c0 100644 --- a/base/poco/JSON/src/PrintHandler.cpp +++ b/base/poco/JSON/src/PrintHandler.cpp @@ -154,7 +154,6 @@ void PrintHandler::value(unsigned v) } -#if defined(POCO_HAVE_INT64) void PrintHandler::value(Int64 v) { arrayValue(); @@ -169,7 +168,6 @@ void PrintHandler::value(UInt64 v) _out << v; _objStart = false; } -#endif void PrintHandler::value(const std::string& value) diff --git a/base/poco/Net/include/Poco/Net/HTTPFixedLengthStream.h b/base/poco/Net/include/Poco/Net/HTTPFixedLengthStream.h index dcdd1cfcaf8..4de211fdb92 100644 --- a/base/poco/Net/include/Poco/Net/HTTPFixedLengthStream.h +++ b/base/poco/Net/include/Poco/Net/HTTPFixedLengthStream.h @@ -43,11 +43,7 @@ namespace Net public: typedef HTTPBasicStreamBuf::openmode openmode; -#if defined(POCO_HAVE_INT64) typedef Poco::Int64 ContentLength; -#else - typedef std::streamsize ContentLength; -#endif HTTPFixedLengthStreamBuf(HTTPSession & session, ContentLength length, openmode mode); ~HTTPFixedLengthStreamBuf(); diff --git a/base/poco/Net/include/Poco/Net/HTTPMessage.h b/base/poco/Net/include/Poco/Net/HTTPMessage.h index 5c54bf7306b..0bef50803a8 100644 --- a/base/poco/Net/include/Poco/Net/HTTPMessage.h +++ b/base/poco/Net/include/Poco/Net/HTTPMessage.h @@ -56,7 +56,6 @@ namespace Net /// which may be UNKNOWN_CONTENT_LENGTH if /// no Content-Length header is present. -#if defined(POCO_HAVE_INT64) void setContentLength64(Poco::Int64 length); /// Sets the Content-Length header. /// @@ -73,7 +72,6 @@ namespace Net /// /// In contrast to getContentLength(), this method /// always returns a 64-bit integer for content length. -#endif // defined(POCO_HAVE_INT64) bool hasContentLength() const; /// Returns true iff a Content-Length header is present. diff --git a/base/poco/Net/include/Poco/Net/HTTPRequest.h b/base/poco/Net/include/Poco/Net/HTTPRequest.h index 7f17342b22d..269167feb83 100644 --- a/base/poco/Net/include/Poco/Net/HTTPRequest.h +++ b/base/poco/Net/include/Poco/Net/HTTPRequest.h @@ -132,14 +132,10 @@ namespace Net /// Writes the HTTP request to the given /// output stream. -#if __clang__ # pragma clang diagnostic push # pragma clang diagnostic ignored "-Woverloaded-virtual" -#endif void read(std::istream & istr); -#if __clang__ # pragma clang diagnostic pop -#endif /// Reads the HTTP request from the /// given input stream. diff --git a/base/poco/Net/include/Poco/Net/HTTPResponse.h b/base/poco/Net/include/Poco/Net/HTTPResponse.h index b889f0b30fb..3c444c3d38c 100644 --- a/base/poco/Net/include/Poco/Net/HTTPResponse.h +++ b/base/poco/Net/include/Poco/Net/HTTPResponse.h @@ -188,14 +188,10 @@ namespace Net /// Writes the HTTP response to the given /// output stream, but do not finish with \r\n delimiter. -#if __clang__ # pragma clang diagnostic push # pragma clang diagnostic ignored "-Woverloaded-virtual" -#endif void read(std::istream & istr); -#if __clang__ # pragma clang diagnostic pop -#endif /// Reads the HTTP response from the /// given input stream. /// diff --git a/base/poco/Net/src/HTTPClientSession.cpp b/base/poco/Net/src/HTTPClientSession.cpp index 323e9526df5..c5697b556d1 100644 --- a/base/poco/Net/src/HTTPClientSession.cpp +++ b/base/poco/Net/src/HTTPClientSession.cpp @@ -264,11 +264,7 @@ std::ostream& HTTPClientSession::sendRequest(HTTPRequest& request) { Poco::CountingOutputStream cs; request.write(cs); -#if POCO_HAVE_INT64 _pRequestStream = new HTTPFixedLengthOutputStream(*this, request.getContentLength64() + cs.chars()); -#else - _pRequestStream = new HTTPFixedLengthOutputStream(*this, request.getContentLength() + cs.chars()); -#endif request.write(*_pRequestStream); } else if ((method != HTTPRequest::HTTP_PUT && method != HTTPRequest::HTTP_POST && method != HTTPRequest::HTTP_PATCH) || request.has(HTTPRequest::UPGRADE)) @@ -334,11 +330,7 @@ std::istream& HTTPClientSession::receiveResponse(HTTPResponse& response) else if (response.getChunkedTransferEncoding()) _pResponseStream = new HTTPChunkedInputStream(*this); else if (response.hasContentLength()) -#if defined(POCO_HAVE_INT64) _pResponseStream = new HTTPFixedLengthInputStream(*this, response.getContentLength64()); -#else - _pResponseStream = new HTTPFixedLengthInputStream(*this, response.getContentLength()); -#endif else _pResponseStream = new HTTPInputStream(*this); diff --git a/base/poco/Net/src/HTTPMessage.cpp b/base/poco/Net/src/HTTPMessage.cpp index debda04c3b3..0cd234ee9cb 100644 --- a/base/poco/Net/src/HTTPMessage.cpp +++ b/base/poco/Net/src/HTTPMessage.cpp @@ -89,7 +89,6 @@ std::streamsize HTTPMessage::getContentLength() const } -#if defined(POCO_HAVE_INT64) void HTTPMessage::setContentLength64(Poco::Int64 length) { if (length != UNKNOWN_CONTENT_LENGTH) @@ -108,7 +107,6 @@ Poco::Int64 HTTPMessage::getContentLength64() const } else return UNKNOWN_CONTENT_LENGTH; } -#endif // defined(POCO_HAVE_INT64) void HTTPMessage::setTransferEncoding(const std::string& transferEncoding) diff --git a/base/poco/Net/src/HTTPServerRequestImpl.cpp b/base/poco/Net/src/HTTPServerRequestImpl.cpp index d8ea7398c9b..d893e49aafb 100644 --- a/base/poco/Net/src/HTTPServerRequestImpl.cpp +++ b/base/poco/Net/src/HTTPServerRequestImpl.cpp @@ -49,11 +49,7 @@ HTTPServerRequestImpl::HTTPServerRequestImpl(HTTPServerResponseImpl& response, H if (getChunkedTransferEncoding()) _pStream = new HTTPChunkedInputStream(session); else if (hasContentLength()) -#if defined(POCO_HAVE_INT64) _pStream = new HTTPFixedLengthInputStream(session, getContentLength64()); -#else - _pStream = new HTTPFixedLengthInputStream(session, getContentLength()); -#endif else if (getMethod() == HTTPRequest::HTTP_GET || getMethod() == HTTPRequest::HTTP_HEAD || getMethod() == HTTPRequest::HTTP_DELETE) _pStream = new HTTPFixedLengthInputStream(session, 0); else diff --git a/base/poco/Net/src/HTTPServerResponseImpl.cpp b/base/poco/Net/src/HTTPServerResponseImpl.cpp index fb6783c633e..55de22c876c 100644 --- a/base/poco/Net/src/HTTPServerResponseImpl.cpp +++ b/base/poco/Net/src/HTTPServerResponseImpl.cpp @@ -92,11 +92,7 @@ std::ostream& HTTPServerResponseImpl::send() { Poco::CountingOutputStream cs; write(cs); -#if defined(POCO_HAVE_INT64) _pStream = new HTTPFixedLengthOutputStream(_session, getContentLength64() + cs.chars()); -#else - _pStream = new HTTPFixedLengthOutputStream(_session, getContentLength() + cs.chars()); -#endif write(*_pStream); } else @@ -153,11 +149,7 @@ void HTTPServerResponseImpl::sendFile(const std::string& path, const std::string Timestamp dateTime = f.getLastModified(); File::FileSize length = f.getSize(); set("Last-Modified", DateTimeFormatter::format(dateTime, DateTimeFormat::HTTP_FORMAT)); -#if defined(POCO_HAVE_INT64) setContentLength64(length); -#else - setContentLength(static_cast(length)); -#endif setContentType(mediaType); setChunkedTransferEncoding(false); diff --git a/base/poco/Util/include/Poco/Util/AbstractConfiguration.h b/base/poco/Util/include/Poco/Util/AbstractConfiguration.h index a0e5e2c50dd..926ac3ba8a9 100644 --- a/base/poco/Util/include/Poco/Util/AbstractConfiguration.h +++ b/base/poco/Util/include/Poco/Util/AbstractConfiguration.h @@ -167,7 +167,6 @@ namespace Util /// If the value contains references to other properties (${}), these /// are expanded. -#if defined(POCO_HAVE_INT64) Int64 getInt64(const std::string & key) const; /// Returns the Int64 value of the property with the given name. @@ -205,7 +204,6 @@ namespace Util /// If the value contains references to other properties (${}), these /// are expanded. -#endif // defined(POCO_HAVE_INT64) double getDouble(const std::string & key) const; /// Returns the double value of the property with the given name. @@ -255,7 +253,6 @@ namespace Util /// Sets the property with the given key to the given value. /// An already existing value for the key is overwritten. -#if defined(POCO_HAVE_INT64) virtual void setInt64(const std::string & key, Int64 value); /// Sets the property with the given key to the given value. @@ -265,7 +262,6 @@ namespace Util /// Sets the property with the given key to the given value. /// An already existing value for the key is overwritten. -#endif // defined(POCO_HAVE_INT64) virtual void setDouble(const std::string & key, double value); /// Sets the property with the given key to the given value. @@ -335,12 +331,10 @@ namespace Util static int parseInt(const std::string & value); static unsigned parseUInt(const std::string & value); -#if defined(POCO_HAVE_INT64) static Int64 parseInt64(const std::string & value); static UInt64 parseUInt64(const std::string & value); -#endif // defined(POCO_HAVE_INT64) static bool parseBool(const std::string & value); void setRawWithEvent(const std::string & key, std::string value); diff --git a/base/poco/Util/include/Poco/Util/WinRegistryKey.h b/base/poco/Util/include/Poco/Util/WinRegistryKey.h index b28f6aefb37..9aa5e35ed8a 100644 --- a/base/poco/Util/include/Poco/Util/WinRegistryKey.h +++ b/base/poco/Util/include/Poco/Util/WinRegistryKey.h @@ -123,7 +123,6 @@ namespace Util /// /// Throws a NotFoundException if the value does not exist. -#if defined(POCO_HAVE_INT64) void setInt64(const std::string & name, Poco::Int64 value); /// Sets the numeric (REG_QWORD) value with the given name. @@ -135,7 +134,6 @@ namespace Util /// /// Throws a NotFoundException if the value does not exist. -#endif // POCO_HAVE_INT64 void deleteValue(const std::string & name); /// Deletes the value with the given name. diff --git a/base/poco/Util/src/AbstractConfiguration.cpp b/base/poco/Util/src/AbstractConfiguration.cpp index 95e8da68a57..2c892decd9a 100644 --- a/base/poco/Util/src/AbstractConfiguration.cpp +++ b/base/poco/Util/src/AbstractConfiguration.cpp @@ -163,7 +163,6 @@ unsigned AbstractConfiguration::getUInt(const std::string& key, unsigned default } -#if defined(POCO_HAVE_INT64) Int64 AbstractConfiguration::getInt64(const std::string& key) const @@ -214,7 +213,6 @@ UInt64 AbstractConfiguration::getUInt64(const std::string& key, UInt64 defaultVa } -#endif // defined(POCO_HAVE_INT64) double AbstractConfiguration::getDouble(const std::string& key) const @@ -283,7 +281,6 @@ void AbstractConfiguration::setUInt(const std::string& key, unsigned int value) } -#if defined(POCO_HAVE_INT64) void AbstractConfiguration::setInt64(const std::string& key, Int64 value) @@ -302,7 +299,6 @@ void AbstractConfiguration::setUInt64(const std::string& key, UInt64 value) } -#endif // defined(POCO_HAVE_INT64) void AbstractConfiguration::setDouble(const std::string& key, double value) diff --git a/cmake/autogenerated_versions.txt b/cmake/autogenerated_versions.txt index 812a0d9e64b..b52b2eda992 100644 --- a/cmake/autogenerated_versions.txt +++ b/cmake/autogenerated_versions.txt @@ -2,11 +2,11 @@ # NOTE: has nothing common with DBMS_TCP_PROTOCOL_VERSION, # only DBMS_TCP_PROTOCOL_VERSION should be incremented on protocol changes. -SET(VERSION_REVISION 54471) +SET(VERSION_REVISION 54472) SET(VERSION_MAJOR 23) -SET(VERSION_MINOR 2) +SET(VERSION_MINOR 3) SET(VERSION_PATCH 1) -SET(VERSION_GITHASH dcaac47702510cc87ddf266bc524f6b7ce0a8e6e) -SET(VERSION_DESCRIBE v23.2.1.1-testing) -SET(VERSION_STRING 23.2.1.1) +SET(VERSION_GITHASH 52bf836e03a6ba7cf2d654eaaf73231701abc3a2) +SET(VERSION_DESCRIBE v23.3.1.2537-testing) +SET(VERSION_STRING 23.3.1.2537) # end of autochange diff --git a/cmake/cpu_features.cmake b/cmake/cpu_features.cmake index b0ff2349957..9fc3960c166 100644 --- a/cmake/cpu_features.cmake +++ b/cmake/cpu_features.cmake @@ -30,7 +30,7 @@ elseif (ARCH_AARCH64) # support it. set (COMPILER_FLAGS "${COMPILER_FLAGS} -march=armv8+crc") else () - # ARMv8.2 is quite ancient but the lowest common denominator supported by both Graviton 2 and 3 processors [1]. In particular, it + # ARMv8.2 is quite ancient but the lowest common denominator supported by both Graviton 2 and 3 processors [1, 10]. In particular, it # includes LSE (made mandatory with ARMv8.1) which provides nice speedups without having to fall back to compat flag # "-moutline-atomics" for v8.0 [2, 3, 4] that requires a recent glibc with runtime dispatch helper, limiting our ability to run on # old OSs. @@ -45,21 +45,39 @@ elseif (ARCH_AARCH64) # dotprod: Scalar vector product (SDOT and UDOT instructions). Probably the most obscure extra flag with doubtful performance benefits # but it has been activated since always, so why not enable it. It's not 100% clear in which revision this flag was # introduced as optional, either in v8.2 [7] or in v8.4 [8]. - # ldapr: Load-Acquire RCpc Register. Better support of release/acquire of atomics. Good for allocators and high contention code. - # Optional in v8.2, mandatory in v8.3 [9]. Supported in Graviton 2+, Azure and GCP instances. Generated from clang 15. + # rcpc: Load-Acquire RCpc Register. Better support of release/acquire of atomics. Good for allocators and high contention code. + # Optional in v8.2, mandatory in v8.3 [9]. Supported in Graviton >=2, Azure and GCP instances. # - # [1] https://github.com/aws/aws-graviton-getting-started/blob/main/c-c%2B%2B.md - # [2] https://community.arm.com/arm-community-blogs/b/tools-software-ides-blog/posts/making-the-most-of-the-arm-architecture-in-gcc-10 - # [3] https://mysqlonarm.github.io/ARM-LSE-and-MySQL/ - # [4] https://dev.to/aws-builders/large-system-extensions-for-aws-graviton-processors-3eci - # [5] https://developer.arm.com/tools-and-software/open-source-software/developer-tools/llvm-toolchain/sve-support - # [6] https://developer.arm.com/documentation/100067/0612/armclang-Command-line-Options/-mcpu?lang=en - # [7] https://gcc.gnu.org/onlinedocs/gcc/ARM-Options.html - # [8] https://developer.arm.com/documentation/102651/a/What-are-dot-product-intructions- - # [9] https://developer.arm.com/documentation/dui0801/g/A64-Data-Transfer-Instructions/LDAPR?lang=en - set (COMPILER_FLAGS "${COMPILER_FLAGS} -march=armv8.2-a+simd+crypto+dotprod+ssbs -Xclang=-target-feature -Xclang=+ldapr -Wno-unused-command-line-argument") + # [1] https://github.com/aws/aws-graviton-getting-started/blob/main/c-c%2B%2B.md + # [2] https://community.arm.com/arm-community-blogs/b/tools-software-ides-blog/posts/making-the-most-of-the-arm-architecture-in-gcc-10 + # [3] https://mysqlonarm.github.io/ARM-LSE-and-MySQL/ + # [4] https://dev.to/aws-builders/large-system-extensions-for-aws-graviton-processors-3eci + # [5] https://developer.arm.com/tools-and-software/open-source-software/developer-tools/llvm-toolchain/sve-support + # [6] https://developer.arm.com/documentation/100067/0612/armclang-Command-line-Options/-mcpu?lang=en + # [7] https://gcc.gnu.org/onlinedocs/gcc/ARM-Options.html + # [8] https://developer.arm.com/documentation/102651/a/What-are-dot-product-intructions- + # [9] https://developer.arm.com/documentation/dui0801/g/A64-Data-Transfer-Instructions/LDAPR?lang=en + # [10] https://github.com/aws/aws-graviton-getting-started/blob/main/README.md + set (COMPILER_FLAGS "${COMPILER_FLAGS} -march=armv8.2-a+simd+crypto+dotprod+ssbs+rcpc") endif () + # Best-effort check: The build generates and executes intermediate binaries, e.g. protoc and llvm-tablegen. If we build on ARM for ARM + # and the build machine is too old, i.e. doesn't satisfy above modern profile, then these intermediate binaries will not run (dump + # SIGILL). Even if they could run, the build machine wouldn't be able to run the ClickHouse binary. In that case, suggest to run the + # build with the compat profile. + if (OS_LINUX AND CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "^(aarch64.*|AARCH64.*|arm64.*|ARM64.*)" AND NOT NO_ARMV81_OR_HIGHER) + # CPU features in /proc/cpuinfo and compiler flags don't align :( ... pick some obvious flags contained in the modern but not in the + # legacy profile (full Graviton 3 /proc/cpuinfo is "fp asimd evtstrm aes pmull sha1 sha2 crc32 atomics fphp asimdhp cpuid asimdrdm + # jscvt fcma lrcpc dcpop sha3 sm3 sm4 asimddp sha512 sve asimdfhm dit uscat ilrcpc flagm ssbs paca pacg dcpodp svei8mm svebf16 i8mm + # bf16 dgh rng") + execute_process( + COMMAND grep -P "^(?=.*atomic)(?=.*ssbs)" /proc/cpuinfo + OUTPUT_VARIABLE FLAGS) + if (NOT FLAGS) + MESSAGE(FATAL_ERROR "The build machine does not satisfy the minimum CPU requirements, try to run cmake with -DNO_ARMV81_OR_HIGHER=1") + endif() + endif() + elseif (ARCH_PPC64LE) # By Default, build for power8 and up, allow building for power9 and up # Note that gcc and clang have support for x86 SSE2 intrinsics when building for PowerPC @@ -102,6 +120,22 @@ elseif (ARCH_AMD64) SET(ENABLE_AVX512_FOR_SPEC_OP 0) endif() + # Same best-effort check for x86 as above for ARM. + if (OS_LINUX AND CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "amd64|x86_64" AND NOT NO_SSE3_OR_HIGHER) + # Test for flags in standard profile but not in NO_SSE3_OR_HIGHER profile. + # /proc/cpuid for Intel Xeon 8124: "fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse + # sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc arch_perfmon rep_good nopl xtopology nonstop_tsc cpuid aperfmperf + # tsc_known_freq pni pclmulqdq monitor ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c + # rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_single pti fsgsbase tsc_adjust bmi1 hle avx2 smep bmi2 erms invpcid rtm mpx + # avx512f avx512dq rdseed adx smap clflushopt clwb avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves ida arat pku ospke"" + execute_process( + COMMAND grep -P "^(?=.*ssse3)(?=.*sse4_1)(?=.*sse4_2)" /proc/cpuinfo + OUTPUT_VARIABLE FLAGS) + if (NOT FLAGS) + MESSAGE(FATAL_ERROR "The build machine does not satisfy the minimum CPU requirements, try to run cmake with -DNO_SSE3_OR_HIGHER=1") + endif() + endif() + # ClickHouse can be cross-compiled (e.g. on an ARM host for x86) but it is also possible to build ClickHouse on x86 w/o AVX for x86 w/ # AVX. We only check that the compiler can emit certain SIMD instructions, we don't care if the host system is able to run the binary. # Therefore, use check_cxx_source_compiles (= does the code compile+link?) instead of check_cxx_source_runs (= does the code diff --git a/cmake/warnings.cmake b/cmake/warnings.cmake index 8364b0c2c08..5d116b199cf 100644 --- a/cmake/warnings.cmake +++ b/cmake/warnings.cmake @@ -45,6 +45,7 @@ if (COMPILER_CLANG) no_warning(weak-vtables) no_warning(thread-safety-negative) # experimental flag, too many false positives no_warning(enum-constexpr-conversion) # breaks magic-enum library in clang-16 + no_warning(unsafe-buffer-usage) # too aggressive # TODO Enable conversion, sign-conversion, double-promotion warnings. elseif (COMPILER_GCC) # Add compiler options only to c++ compiler diff --git a/contrib/capnproto b/contrib/capnproto index e19cd661e49..dc8b50b9997 160000 --- a/contrib/capnproto +++ b/contrib/capnproto @@ -1 +1 @@ -Subproject commit e19cd661e49dd9022d3f920b69d843333b896451 +Subproject commit dc8b50b999777bcb23c89bb5907c785c3f654441 diff --git a/contrib/libunwind b/contrib/libunwind index 5022f30f3e0..e48aa13f67d 160000 --- a/contrib/libunwind +++ b/contrib/libunwind @@ -1 +1 @@ -Subproject commit 5022f30f3e092a54a7c101c335ce5e08769db366 +Subproject commit e48aa13f67dc722511b5af33a32ba9b7748176b5 diff --git a/contrib/llvm-project-cmake/CMakeLists.txt b/contrib/llvm-project-cmake/CMakeLists.txt index 8759c16ac3e..fe6cffd33e2 100644 --- a/contrib/llvm-project-cmake/CMakeLists.txt +++ b/contrib/llvm-project-cmake/CMakeLists.txt @@ -98,6 +98,16 @@ set(LLVM_ENABLE_BINDINGS 0 CACHE INTERNAL "") set (LLVM_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/llvm-project/llvm") set (LLVM_BINARY_DIR "${ClickHouse_BINARY_DIR}/contrib/llvm-project/llvm") + +# Since we always use toolchain files to generate hermatic builds, cmake will +# think it's a cross compilation, and LLVM will try to configure NATIVE LLVM +# targets with all tests enabled, which will slow down cmake configuration and +# compilation (You'll see Building native llvm-tblgen...). Let's disable the +# cross compiling indicator for now. +# +# TODO We should let cmake know whether it's indeed a cross compilation in the +# first place. +set (CMAKE_CROSSCOMPILING 0) add_subdirectory ("${LLVM_SOURCE_DIR}" "${LLVM_BINARY_DIR}") set_directory_properties (PROPERTIES diff --git a/contrib/xz-cmake/CMakeLists.txt b/contrib/xz-cmake/CMakeLists.txt index 9d08adc9c7a..c3a8203c83e 100644 --- a/contrib/xz-cmake/CMakeLists.txt +++ b/contrib/xz-cmake/CMakeLists.txt @@ -94,6 +94,10 @@ if (ARCH_AMD64 OR ARCH_AARCH64) add_compile_definitions(TUKLIB_FAST_UNALIGNED_ACCESS=1) endif () +if (ARCH_S390X) + add_compile_definitions(WORDS_BIGENDIAN) +endif () + find_package(Threads REQUIRED) diff --git a/contrib/zstd-cmake/CMakeLists.txt b/contrib/zstd-cmake/CMakeLists.txt index f44d5db12c4..a6262178dc7 100644 --- a/contrib/zstd-cmake/CMakeLists.txt +++ b/contrib/zstd-cmake/CMakeLists.txt @@ -30,25 +30,10 @@ # - zstd homepage : http://www.zstd.net/ # ################################################################ -# Get library version based on information from input content (use regular exp) -function(GetLibraryVersion _content _outputVar1 _outputVar2 _outputVar3) - string(REGEX MATCHALL ".*define ZSTD_VERSION_MAJOR+.* ([0-9]+).*define ZSTD_VERSION_MINOR+.* ([0-9]+).*define ZSTD_VERSION_RELEASE+.* ([0-9]+)" VERSION_REGEX "${_content}") - SET(${_outputVar1} ${CMAKE_MATCH_1} PARENT_SCOPE) - SET(${_outputVar2} ${CMAKE_MATCH_2} PARENT_SCOPE) - SET(${_outputVar3} ${CMAKE_MATCH_3} PARENT_SCOPE) -endfunction() - # Define library directory, where sources and header files are located SET(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/zstd/lib") INCLUDE_DIRECTORIES(BEFORE ${LIBRARY_DIR} "${LIBRARY_DIR}/common") -# Read file content -FILE(READ "${LIBRARY_DIR}/zstd.h" HEADER_CONTENT) - -# Parse version -GetLibraryVersion("${HEADER_CONTENT}" LIBVER_MAJOR LIBVER_MINOR LIBVER_RELEASE) -MESSAGE(STATUS "ZSTD VERSION ${LIBVER_MAJOR}.${LIBVER_MINOR}.${LIBVER_RELEASE}") - # cd contrib/zstd/lib # find . -name '*.c' -or -name '*.S' | grep -vP 'deprecated|legacy' | sort | sed 's/^\./ "${LIBRARY_DIR}/"' SET(Sources diff --git a/docker/images.json b/docker/images.json index 78e7729671e..508138d79af 100644 --- a/docker/images.json +++ b/docker/images.json @@ -43,7 +43,8 @@ "docker/test/stateful": { "name": "clickhouse/stateful-test", "dependent": [ - "docker/test/stress" + "docker/test/stress", + "docker/test/upgrade" ] }, "docker/test/unit": { @@ -54,6 +55,10 @@ "name": "clickhouse/stress-test", "dependent": [] }, + "docker/test/upgrade": { + "name": "clickhouse/upgrade-check", + "dependent": [] + }, "docker/test/codebrowser": { "name": "clickhouse/codebrowser", "dependent": [] diff --git a/docker/keeper/Dockerfile b/docker/keeper/Dockerfile index eda8274edac..09395befdad 100644 --- a/docker/keeper/Dockerfile +++ b/docker/keeper/Dockerfile @@ -29,7 +29,7 @@ RUN arch=${TARGETARCH:-amd64} \ esac ARG REPOSITORY="https://s3.amazonaws.com/clickhouse-builds/22.4/31c367d3cd3aefd316778601ff6565119fe36682/package_release" -ARG VERSION="23.1.3.5" +ARG VERSION="23.2.1.2537" ARG PACKAGES="clickhouse-keeper" # user/group precreated explicitly with fixed uid/gid on purpose. diff --git a/docker/server/Dockerfile.alpine b/docker/server/Dockerfile.alpine index 8a73d72b3a5..472f25eed2d 100644 --- a/docker/server/Dockerfile.alpine +++ b/docker/server/Dockerfile.alpine @@ -33,7 +33,7 @@ RUN arch=${TARGETARCH:-amd64} \ # lts / testing / prestable / etc ARG REPO_CHANNEL="stable" ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}" -ARG VERSION="23.1.3.5" +ARG VERSION="23.2.1.2537" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" # user/group precreated explicitly with fixed uid/gid on purpose. diff --git a/docker/server/Dockerfile.ubuntu b/docker/server/Dockerfile.ubuntu index ba2d7430e06..5dbb244c298 100644 --- a/docker/server/Dockerfile.ubuntu +++ b/docker/server/Dockerfile.ubuntu @@ -1,4 +1,4 @@ -FROM ubuntu:20.04 +FROM ubuntu:22.04 # see https://github.com/moby/moby/issues/4032#issuecomment-192327844 ARG DEBIAN_FRONTEND=noninteractive @@ -9,19 +9,20 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list && groupadd -r clickhouse --gid=101 \ && useradd -r -g clickhouse --uid=101 --home-dir=/var/lib/clickhouse --shell=/bin/bash clickhouse \ && apt-get update \ + && apt-get upgrade -yq \ && apt-get install --yes --no-install-recommends \ apt-transport-https \ ca-certificates \ dirmngr \ - gnupg \ - locales \ + gnupg2 \ wget \ + locales \ tzdata \ && apt-get clean ARG REPO_CHANNEL="stable" ARG REPOSITORY="deb https://packages.clickhouse.com/deb ${REPO_CHANNEL} main" -ARG VERSION="23.1.3.5" +ARG VERSION="23.2.1.2537" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" # set non-empty deb_location_url url to create a docker image @@ -80,15 +81,8 @@ RUN arch=${TARGETARCH:-amd64} \ && mkdir -p /var/lib/clickhouse /var/log/clickhouse-server /etc/clickhouse-server /etc/clickhouse-client \ && chmod ugo+Xrw -R /var/lib/clickhouse /var/log/clickhouse-server /etc/clickhouse-server /etc/clickhouse-client -# Remove as much of Ubuntu as possible. -# ClickHouse does not need Ubuntu. It can run on top of Linux kernel without any OS distribution. -# ClickHouse does not need Docker at all. ClickHouse is above all that. -# It does not care about Ubuntu, Docker, or other cruft and you should neither. -# The fact that this Docker image is based on Ubuntu is just a misconception. -# Some vulnerability scanners are arguing about Ubuntu, which is not relevant to ClickHouse at all. -# ClickHouse does not care when you report false vulnerabilities by running some Docker scanners. - -RUN apt-get remove --purge -y libksba8 && apt-get autoremove -y +RUN apt-get autoremove --purge -yq libksba8 && \ + apt-get autoremove -yq # we need to allow "others" access to clickhouse folder, because docker container # can be started with arbitrary uid (openshift usecase) diff --git a/docker/test/performance-comparison/Dockerfile b/docker/test/performance-comparison/Dockerfile index 66d535bc94a..ab9f1f8a2e3 100644 --- a/docker/test/performance-comparison/Dockerfile +++ b/docker/test/performance-comparison/Dockerfile @@ -1,5 +1,10 @@ # docker build -t clickhouse/performance-comparison . -FROM ubuntu:20.04 + +# Using ubuntu:22.04 over 20.04 as all other images, since: +# a) ubuntu 20.04 has too old parallel, and does not support --memsuspend +# b) anyway for perf tests it should not be important (backward compatiblity +# with older ubuntu had been checked lots of times in various tests) +FROM ubuntu:22.04 # ARG for quick switch to a given ubuntu mirror ARG apt_archive="http://archive.ubuntu.com" diff --git a/docker/test/performance-comparison/compare.sh b/docker/test/performance-comparison/compare.sh index 725dcbd7157..293ad9ac411 100755 --- a/docker/test/performance-comparison/compare.sh +++ b/docker/test/performance-comparison/compare.sh @@ -537,9 +537,20 @@ unset IFS # all nodes. numactl --show numactl --cpunodebind=all --membind=all numactl --show -# Use less jobs to avoid OOM. Some queries can consume 8+ GB of memory. -jobs_count=$(($(grep -c ^processor /proc/cpuinfo) / 4)) -numactl --cpunodebind=all --membind=all parallel --jobs $jobs_count --joblog analyze/parallel-log.txt --null < analyze/commands.txt 2>> analyze/errors.log + +# Notes for parallel: +# +# Some queries can consume 8+ GB of memory, so it worth to limit amount of jobs +# that can be run in parallel. +# +# --memfree: +# +# will kill jobs, which is not good (and retried until --retries exceeded) +# +# --memsuspend: +# +# If the available memory falls below 2 * size, GNU parallel will suspend some of the running jobs. +numactl --cpunodebind=all --membind=all parallel -v --joblog analyze/parallel-log.txt --memsuspend 15G --null < analyze/commands.txt 2>> analyze/errors.log clickhouse-local --query " -- Join the metric names back to the metric statistics we've calculated, and make diff --git a/docker/test/stateful/Dockerfile b/docker/test/stateful/Dockerfile index b67a638188c..71a2e92e3a8 100644 --- a/docker/test/stateful/Dockerfile +++ b/docker/test/stateful/Dockerfile @@ -1,4 +1,4 @@ -# rebuild in #33610 +# rebuild in #47031 # docker build -t clickhouse/stateful-test . ARG FROM_TAG=latest FROM clickhouse/stateless-test:$FROM_TAG diff --git a/docker/test/stress/Dockerfile b/docker/test/stress/Dockerfile index 393508fd551..e9712f430fd 100644 --- a/docker/test/stress/Dockerfile +++ b/docker/test/stress/Dockerfile @@ -21,10 +21,9 @@ RUN apt-get update -y \ openssl \ netcat-openbsd \ telnet \ - llvm-9 \ - brotli + brotli \ + && apt-get clean -COPY ./stress /stress COPY run.sh / ENV DATASETS="hits visits" diff --git a/docker/test/stress/run.sh b/docker/test/stress/run.sh index 95704336468..15f58d6c3a3 100644 --- a/docker/test/stress/run.sh +++ b/docker/test/stress/run.sh @@ -8,229 +8,13 @@ dmesg --clear set -x -# core.COMM.PID-TID -sysctl kernel.core_pattern='core.%e.%p-%P' +# we mount tests folder from repo to /usr/share +ln -s /usr/share/clickhouse-test/ci/stress.py /usr/bin/stress +ln -s /usr/share/clickhouse-test/clickhouse-test /usr/bin/clickhouse-test -OK="\tOK\t\\N\t" -FAIL="\tFAIL\t\\N\t" - -FAILURE_CONTEXT_LINES=50 -FAILURE_CONTEXT_MAX_LINE_WIDTH=400 - -function escaped() -{ - # That's the simplest way I found to escape a string in bash. Yep, bash is the most convenient programming language. - # Also limit lines width just in case (too long lines are not really useful usually) - clickhouse local -S 's String' --input-format=LineAsString -q "select substr(s, 1, $FAILURE_CONTEXT_MAX_LINE_WIDTH) - from table format CustomSeparated settings format_custom_row_after_delimiter='\\\\\\\\n'" -} -function head_escaped() -{ - head -n $FAILURE_CONTEXT_LINES $1 | escaped -} -function unts() -{ - grep -Po "[0-9][0-9]:[0-9][0-9] \K.*" -} -function trim_server_logs() -{ - head -n $FAILURE_CONTEXT_LINES "/test_output/$1" | grep -Eo " \[ [0-9]+ \] \{.*" | escaped -} - -function install_packages() -{ - dpkg -i $1/clickhouse-common-static_*.deb - dpkg -i $1/clickhouse-common-static-dbg_*.deb - dpkg -i $1/clickhouse-server_*.deb - dpkg -i $1/clickhouse-client_*.deb -} - -function configure() -{ - # install test configs - export USE_DATABASE_ORDINARY=1 - export EXPORT_S3_STORAGE_POLICIES=1 - /usr/share/clickhouse-test/config/install.sh - - # we mount tests folder from repo to /usr/share - ln -s /usr/share/clickhouse-test/clickhouse-test /usr/bin/clickhouse-test - ln -s /usr/share/clickhouse-test/ci/download_release_packages.py /usr/bin/download_release_packages - ln -s /usr/share/clickhouse-test/ci/get_previous_release_tag.py /usr/bin/get_previous_release_tag - - # avoid too slow startup - sudo cat /etc/clickhouse-server/config.d/keeper_port.xml \ - | sed "s|100000|10000|" \ - > /etc/clickhouse-server/config.d/keeper_port.xml.tmp - sudo mv /etc/clickhouse-server/config.d/keeper_port.xml.tmp /etc/clickhouse-server/config.d/keeper_port.xml - sudo chown clickhouse /etc/clickhouse-server/config.d/keeper_port.xml - sudo chgrp clickhouse /etc/clickhouse-server/config.d/keeper_port.xml - - # for clickhouse-server (via service) - echo "ASAN_OPTIONS='malloc_context_size=10 verbosity=1 allocator_release_to_os_interval_ms=10000'" >> /etc/environment - # for clickhouse-client - export ASAN_OPTIONS='malloc_context_size=10 allocator_release_to_os_interval_ms=10000' - - # since we run clickhouse from root - sudo chown root: /var/lib/clickhouse - - # Set more frequent update period of asynchronous metrics to more frequently update information about real memory usage (less chance of OOM). - echo "1" \ - > /etc/clickhouse-server/config.d/asynchronous_metrics_update_period_s.xml - - local total_mem - total_mem=$(awk '/MemTotal/ { print $(NF-1) }' /proc/meminfo) # KiB - total_mem=$(( total_mem*1024 )) # bytes - - # Set maximum memory usage as half of total memory (less chance of OOM). - # - # But not via max_server_memory_usage but via max_memory_usage_for_user, - # so that we can override this setting and execute service queries, like: - # - hung check - # - show/drop database - # - ... - # - # So max_memory_usage_for_user will be a soft limit, and - # max_server_memory_usage will be hard limit, and queries that should be - # executed regardless memory limits will use max_memory_usage_for_user=0, - # instead of relying on max_untracked_memory - - max_server_memory_usage_to_ram_ratio=0.5 - echo "Setting max_server_memory_usage_to_ram_ratio to ${max_server_memory_usage_to_ram_ratio}" - cat > /etc/clickhouse-server/config.d/max_server_memory_usage.xml < - ${max_server_memory_usage_to_ram_ratio} - -EOL - - local max_users_mem - max_users_mem=$((total_mem*30/100)) # 30% - echo "Setting max_memory_usage_for_user=$max_users_mem and max_memory_usage for queries to 10G" - cat > /etc/clickhouse-server/users.d/max_memory_usage_for_user.xml < - - - 10G - ${max_users_mem} - - - -EOL - - cat > /etc/clickhouse-server/config.d/core.xml < - - - 107374182400 - - - $PWD - -EOL - - # Let OOM killer terminate other processes before clickhouse-server: - cat > /etc/clickhouse-server/config.d/oom_score.xml < - -1000 - -EOL - - # Analyzer is not yet ready for testing - cat > /etc/clickhouse-server/users.d/no_analyzer.xml < - - - - - - - - - - -EOL - -} - -function stop() -{ - local max_tries="${1:-90}" - local pid - # Preserve the pid, since the server can hung after the PID will be deleted. - pid="$(cat /var/run/clickhouse-server/clickhouse-server.pid)" - - clickhouse stop --max-tries "$max_tries" --do-not-kill && return - - # We failed to stop the server with SIGTERM. Maybe it hang, let's collect stacktraces. - echo -e "Possible deadlock on shutdown (see gdb.log)$FAIL" >> /test_output/test_results.tsv - kill -TERM "$(pidof gdb)" ||: - sleep 5 - echo "thread apply all backtrace (on stop)" >> /test_output/gdb.log - timeout 30m gdb -batch -ex 'thread apply all backtrace' -p "$pid" | ts '%Y-%m-%d %H:%M:%S' >> /test_output/gdb.log - clickhouse stop --force -} - -function start() -{ - counter=0 - until clickhouse-client --query "SELECT 1" - do - if [ "$counter" -gt ${1:-120} ] - then - echo "Cannot start clickhouse-server" - rg --text ".*Application" /var/log/clickhouse-server/clickhouse-server.log > /test_output/application_errors.txt ||: - echo -e "Cannot start clickhouse-server$FAIL$(trim_server_logs application_errors.txt)" >> /test_output/test_results.tsv - cat /var/log/clickhouse-server/stdout.log - tail -n100 /var/log/clickhouse-server/stderr.log - tail -n100000 /var/log/clickhouse-server/clickhouse-server.log | rg -F -v -e ' RaftInstance:' -e ' RaftInstance' | tail -n100 - break - fi - # use root to match with current uid - clickhouse start --user root >/var/log/clickhouse-server/stdout.log 2>>/var/log/clickhouse-server/stderr.log - sleep 0.5 - counter=$((counter + 1)) - done - - # Set follow-fork-mode to parent, because we attach to clickhouse-server, not to watchdog - # and clickhouse-server can do fork-exec, for example, to run some bridge. - # Do not set nostop noprint for all signals, because some it may cause gdb to hang, - # explicitly ignore non-fatal signals that are used by server. - # Number of SIGRTMIN can be determined only in runtime. - RTMIN=$(kill -l SIGRTMIN) - echo " -set follow-fork-mode parent -handle SIGHUP nostop noprint pass -handle SIGINT nostop noprint pass -handle SIGQUIT nostop noprint pass -handle SIGPIPE nostop noprint pass -handle SIGTERM nostop noprint pass -handle SIGUSR1 nostop noprint pass -handle SIGUSR2 nostop noprint pass -handle SIG$RTMIN nostop noprint pass -info signals -continue -backtrace full -thread apply all backtrace full -info registers -disassemble /s -up -disassemble /s -up -disassemble /s -p \"done\" -detach -quit -" > script.gdb - - # FIXME Hung check may work incorrectly because of attached gdb - # 1. False positives are possible - # 2. We cannot attach another gdb to get stacktraces if some queries hung - gdb -batch -command script.gdb -p "$(cat /var/run/clickhouse-server/clickhouse-server.pid)" | ts '%Y-%m-%d %H:%M:%S' >> /test_output/gdb.log & - sleep 5 - # gdb will send SIGSTOP, spend some time loading debug info and then send SIGCONT, wait for it (up to send_timeout, 300s) - time clickhouse-client --query "SELECT 'Connected to clickhouse-server after attaching gdb'" ||: -} +# Stress tests and upgrade check uses similar code that was placed +# in a separate bash library. See tests/ci/stress_tests.lib +source /usr/share/clickhouse-test/ci/stress_tests.lib install_packages package_folder @@ -396,7 +180,7 @@ sudo chgrp clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_defau start -./stress --hung-check --drop-databases --output-folder test_output --skip-func-tests "$SKIP_TESTS_OPTION" --global-time-limit 1200 \ +stress --hung-check --drop-databases --output-folder test_output --skip-func-tests "$SKIP_TESTS_OPTION" --global-time-limit 1200 \ && echo -e "Test script exit code$OK" >> /test_output/test_results.tsv \ || echo -e "Test script failed$FAIL script exit code: $?" >> /test_output/test_results.tsv @@ -413,316 +197,27 @@ unset "${!THREAD_@}" start -clickhouse-client --query "SELECT 'Server successfully started', 'OK', NULL, ''" >> /test_output/test_results.tsv \ - || (rg --text ".*Application" /var/log/clickhouse-server/clickhouse-server.log > /test_output/application_errors.txt \ - && echo -e "Server failed to start (see application_errors.txt and clickhouse-server.clean.log)$FAIL$(trim_server_logs application_errors.txt)" \ - >> /test_output/test_results.tsv) +check_server_start stop [ -f /var/log/clickhouse-server/clickhouse-server.log ] || echo -e "Server log does not exist\tFAIL" [ -f /var/log/clickhouse-server/stderr.log ] || echo -e "Stderr log does not exist\tFAIL" +mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.final.log + # Grep logs for sanitizer asserts, crashes and other critical errors +check_logs_for_critical_errors -# Sanitizer asserts -rg -Fa "==================" /var/log/clickhouse-server/stderr.log | rg -v "in query:" >> /test_output/tmp -rg -Fa "WARNING" /var/log/clickhouse-server/stderr.log >> /test_output/tmp -rg -Fav -e "ASan doesn't fully support makecontext/swapcontext functions" -e "DB::Exception" /test_output/tmp > /dev/null \ - && echo -e "Sanitizer assert (in stderr.log)$FAIL$(head_escaped /test_output/tmp)" >> /test_output/test_results.tsv \ - || echo -e "No sanitizer asserts$OK" >> /test_output/test_results.tsv -rm -f /test_output/tmp +tar -chf /test_output/coordination.tar /var/lib/clickhouse/coordination ||: -# OOM -rg -Fa " Application: Child process was terminated by signal 9" /var/log/clickhouse-server/clickhouse-server*.log > /dev/null \ - && echo -e "Signal 9 in clickhouse-server.log$FAIL" >> /test_output/test_results.tsv \ - || echo -e "No OOM messages in clickhouse-server.log$OK" >> /test_output/test_results.tsv - -# Logical errors -rg -Fa "Code: 49. DB::Exception: " /var/log/clickhouse-server/clickhouse-server*.log > /test_output/logical_errors.txt \ - && echo -e "Logical error thrown (see clickhouse-server.log or logical_errors.txt)$FAIL$(head_escaped /test_output/logical_errors.txt)" >> /test_output/test_results.tsv \ - || echo -e "No logical errors$OK" >> /test_output/test_results.tsv - -# Remove file logical_errors.txt if it's empty -[ -s /test_output/logical_errors.txt ] || rm /test_output/logical_errors.txt - -# No such key errors -rg --text "Code: 499.*The specified key does not exist" /var/log/clickhouse-server/clickhouse-server*.log > /test_output/no_such_key_errors.txt \ - && echo -e "S3_ERROR No such key thrown (see clickhouse-server.log or no_such_key_errors.txt)$FAIL$(trim_server_logs no_such_key_errors.txt)" >> /test_output/test_results.tsv \ - || echo -e "No lost s3 keys$OK" >> /test_output/test_results.tsv - -# Remove file no_such_key_errors.txt if it's empty -[ -s /test_output/no_such_key_errors.txt ] || rm /test_output/no_such_key_errors.txt - -# Crash -rg -Fa "########################################" /var/log/clickhouse-server/clickhouse-server*.log > /dev/null \ - && echo -e "Killed by signal (in clickhouse-server.log)$FAIL" >> /test_output/test_results.tsv \ - || echo -e "Not crashed$OK" >> /test_output/test_results.tsv - -# It also checks for crash without stacktrace (printed by watchdog) -rg -Fa " " /var/log/clickhouse-server/clickhouse-server*.log > /test_output/fatal_messages.txt \ - && echo -e "Fatal message in clickhouse-server.log (see fatal_messages.txt)$FAIL$(trim_server_logs fatal_messages.txt)" >> /test_output/test_results.tsv \ - || echo -e "No fatal messages in clickhouse-server.log$OK" >> /test_output/test_results.tsv - -# Remove file fatal_messages.txt if it's empty -[ -s /test_output/fatal_messages.txt ] || rm /test_output/fatal_messages.txt - -rg -Fa "########################################" /test_output/* > /dev/null \ - && echo -e "Killed by signal (output files)$FAIL" >> /test_output/test_results.tsv - -function get_gdb_log_context() -{ - rg -A50 -Fa " received signal " /test_output/gdb.log | head_escaped -} - -rg -Fa " received signal " /test_output/gdb.log > /dev/null \ - && echo -e "Found signal in gdb.log$FAIL$(get_gdb_log_context)" >> /test_output/test_results.tsv - -if [ "$DISABLE_BC_CHECK" -ne "1" ]; then - echo -e "Backward compatibility check\n" - - echo "Get previous release tag" - previous_release_tag=$(clickhouse-client --version | rg -o "[0-9]*\.[0-9]*\.[0-9]*\.[0-9]*" | get_previous_release_tag) - echo $previous_release_tag - - echo "Clone previous release repository" - git clone https://github.com/ClickHouse/ClickHouse.git --no-tags --progress --branch=$previous_release_tag --no-recurse-submodules --depth=1 previous_release_repository - - echo "Download clickhouse-server from the previous release" - mkdir previous_release_package_folder - - echo $previous_release_tag | download_release_packages && echo -e "Download script exit code$OK" >> /test_output/test_results.tsv \ - || echo -e "Download script failed$FAIL" >> /test_output/test_results.tsv - - mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.clean.log - for table in query_log trace_log - do - clickhouse-local --path /var/lib/clickhouse/ --only-system-tables -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.tsv.zst ||: - done - - tar -chf /test_output/coordination.tar /var/lib/clickhouse/coordination ||: - - # Check if we cloned previous release repository successfully - if ! [ "$(ls -A previous_release_repository/tests/queries)" ] - then - echo -e "Backward compatibility check: Failed to clone previous release tests$FAIL" >> /test_output/test_results.tsv - elif ! [ "$(ls -A previous_release_package_folder/clickhouse-common-static_*.deb && ls -A previous_release_package_folder/clickhouse-server_*.deb)" ] - then - echo -e "Backward compatibility check: Failed to download previous release packages$FAIL" >> /test_output/test_results.tsv - else - echo -e "Successfully cloned previous release tests$OK" >> /test_output/test_results.tsv - echo -e "Successfully downloaded previous release packages$OK" >> /test_output/test_results.tsv - - # Uninstall current packages - dpkg --remove clickhouse-client - dpkg --remove clickhouse-server - dpkg --remove clickhouse-common-static-dbg - dpkg --remove clickhouse-common-static - - rm -rf /var/lib/clickhouse/* - - # Make BC check more funny by forcing Ordinary engine for system database - mkdir /var/lib/clickhouse/metadata - echo "ATTACH DATABASE system ENGINE=Ordinary" > /var/lib/clickhouse/metadata/system.sql - - # Install previous release packages - install_packages previous_release_package_folder - - # Start server from previous release - # Previous version may not be ready for fault injections - export ZOOKEEPER_FAULT_INJECTION=0 - configure - - # Avoid "Setting s3_check_objects_after_upload is neither a builtin setting..." - rm -f /etc/clickhouse-server/users.d/enable_blobs_check.xml ||: - rm -f /etc/clickhouse-server/users.d/marks.xml ||: - - # Remove s3 related configs to avoid "there is no disk type `cache`" - rm -f /etc/clickhouse-server/config.d/storage_conf.xml ||: - rm -f /etc/clickhouse-server/config.d/azure_storage_conf.xml ||: - - # Turn on after 22.12 - rm -f /etc/clickhouse-server/config.d/compressed_marks_and_index.xml ||: - # it uses recently introduced settings which previous versions may not have - rm -f /etc/clickhouse-server/users.d/insert_keeper_retries.xml ||: - - # Turn on after 23.1 - rm -f /etc/clickhouse-server/users.d/prefetch_settings.xml ||: - - start - - clickhouse-client --query="SELECT 'Server version: ', version()" - - # Install new package before running stress test because we should use new - # clickhouse-client and new clickhouse-test. - # - # But we should leave old binary in /usr/bin/ and debug symbols in - # /usr/lib/debug/usr/bin (if any) for gdb and internal DWARF parser, so it - # will print sane stacktraces and also to avoid possible crashes. - # - # FIXME: those files can be extracted directly from debian package, but - # actually better solution will be to use different PATH instead of playing - # games with files from packages. - mv /usr/bin/clickhouse previous_release_package_folder/ - mv /usr/lib/debug/usr/bin/clickhouse.debug previous_release_package_folder/ - install_packages package_folder - mv /usr/bin/clickhouse package_folder/ - mv /usr/lib/debug/usr/bin/clickhouse.debug package_folder/ - mv previous_release_package_folder/clickhouse /usr/bin/ - mv previous_release_package_folder/clickhouse.debug /usr/lib/debug/usr/bin/clickhouse.debug - - mkdir tmp_stress_output - - ./stress --test-cmd="/usr/bin/clickhouse-test --queries=\"previous_release_repository/tests/queries\"" \ - --backward-compatibility-check --output-folder tmp_stress_output --global-time-limit=1200 \ - && echo -e "Backward compatibility check: Test script exit code$OK" >> /test_output/test_results.tsv \ - || echo -e "Backward compatibility check: Test script failed$FAIL" >> /test_output/test_results.tsv - rm -rf tmp_stress_output - - # We experienced deadlocks in this command in very rare cases. Let's debug it: - timeout 10m clickhouse-client --query="SELECT 'Tables count:', count() FROM system.tables" || - ( - echo "thread apply all backtrace (on select tables count)" >> /test_output/gdb.log - timeout 30m gdb -batch -ex 'thread apply all backtrace' -p "$(cat /var/run/clickhouse-server/clickhouse-server.pid)" | ts '%Y-%m-%d %H:%M:%S' >> /test_output/gdb.log - clickhouse stop --force - ) - - # Use bigger timeout for previous version - stop 300 - mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.backward.stress.log - - # Start new server - mv package_folder/clickhouse /usr/bin/ - mv package_folder/clickhouse.debug /usr/lib/debug/usr/bin/clickhouse.debug - # Disable fault injections on start (we don't test them here, and it can lead to tons of requests in case of huge number of tables). - export ZOOKEEPER_FAULT_INJECTION=0 - configure - start 500 - clickhouse-client --query "SELECT 'Backward compatibility check: Server successfully started', 'OK', NULL, ''" >> /test_output/test_results.tsv \ - || (rg --text ".*Application" /var/log/clickhouse-server/clickhouse-server.log >> /test_output/bc_check_application_errors.txt \ - && echo -e "Backward compatibility check: Server failed to start$FAIL$(trim_server_logs bc_check_application_errors.txt)" >> /test_output/test_results.tsv) - - clickhouse-client --query="SELECT 'Server version: ', version()" - - # Let the server run for a while before checking log. - sleep 60 - - stop - mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.backward.dirty.log - - # Error messages (we should ignore some errors) - # FIXME https://github.com/ClickHouse/ClickHouse/issues/38643 ("Unknown index: idx.") - # FIXME https://github.com/ClickHouse/ClickHouse/issues/39174 ("Cannot parse string 'Hello' as UInt64") - # FIXME Not sure if it's expected, but some tests from BC check may not be finished yet when we restarting server. - # Let's just ignore all errors from queries ("} TCPHandler: Code:", "} executeQuery: Code:") - # FIXME https://github.com/ClickHouse/ClickHouse/issues/39197 ("Missing columns: 'v3' while processing query: 'v3, k, v1, v2, p'") - # FIXME https://github.com/ClickHouse/ClickHouse/issues/39174 - bad mutation does not indicate backward incompatibility - echo "Check for Error messages in server log:" - rg -Fav -e "Code: 236. DB::Exception: Cancelled merging parts" \ - -e "Code: 236. DB::Exception: Cancelled mutating parts" \ - -e "REPLICA_IS_ALREADY_ACTIVE" \ - -e "REPLICA_ALREADY_EXISTS" \ - -e "ALL_REPLICAS_LOST" \ - -e "DDLWorker: Cannot parse DDL task query" \ - -e "RaftInstance: failed to accept a rpc connection due to error 125" \ - -e "UNKNOWN_DATABASE" \ - -e "NETWORK_ERROR" \ - -e "UNKNOWN_TABLE" \ - -e "ZooKeeperClient" \ - -e "KEEPER_EXCEPTION" \ - -e "DirectoryMonitor" \ - -e "TABLE_IS_READ_ONLY" \ - -e "Code: 1000, e.code() = 111, Connection refused" \ - -e "UNFINISHED" \ - -e "NETLINK_ERROR" \ - -e "Renaming unexpected part" \ - -e "PART_IS_TEMPORARILY_LOCKED" \ - -e "and a merge is impossible: we didn't find" \ - -e "found in queue and some source parts for it was lost" \ - -e "is lost forever." \ - -e "Unknown index: idx." \ - -e "Cannot parse string 'Hello' as UInt64" \ - -e "} TCPHandler: Code:" \ - -e "} executeQuery: Code:" \ - -e "Missing columns: 'v3' while processing query: 'v3, k, v1, v2, p'" \ - -e "[Queue = DB::DynamicRuntimeQueue]: Code: 235. DB::Exception: Part" \ - -e "The set of parts restored in place of" \ - -e "(ReplicatedMergeTreeAttachThread): Initialization failed. Error" \ - -e "Code: 269. DB::Exception: Destination table is myself" \ - -e "Coordination::Exception: Connection loss" \ - -e "MutateFromLogEntryTask" \ - -e "No connection to ZooKeeper, cannot get shared table ID" \ - -e "Session expired" \ - -e "TOO_MANY_PARTS" \ - -e "Container already exists" \ - /var/log/clickhouse-server/clickhouse-server.backward.dirty.log | rg -Fa "" > /test_output/bc_check_error_messages.txt \ - && echo -e "Backward compatibility check: Error message in clickhouse-server.log (see bc_check_error_messages.txt)$FAIL$(trim_server_logs bc_check_error_messages.txt)" \ - >> /test_output/test_results.tsv \ - || echo -e "Backward compatibility check: No Error messages in clickhouse-server.log$OK" >> /test_output/test_results.tsv - - # Remove file bc_check_error_messages.txt if it's empty - [ -s /test_output/bc_check_error_messages.txt ] || rm /test_output/bc_check_error_messages.txt - - # Sanitizer asserts - rg -Fa "==================" /var/log/clickhouse-server/stderr.log >> /test_output/tmp - rg -Fa "WARNING" /var/log/clickhouse-server/stderr.log >> /test_output/tmp - rg -Fav -e "ASan doesn't fully support makecontext/swapcontext functions" -e "DB::Exception" /test_output/tmp > /dev/null \ - && echo -e "Backward compatibility check: Sanitizer assert (in stderr.log)$FAIL$(head_escaped /test_output/tmp)" >> /test_output/test_results.tsv \ - || echo -e "Backward compatibility check: No sanitizer asserts$OK" >> /test_output/test_results.tsv - rm -f /test_output/tmp - - # OOM - rg -Fa " Application: Child process was terminated by signal 9" /var/log/clickhouse-server/clickhouse-server.backward.*.log > /dev/null \ - && echo -e "Backward compatibility check: Signal 9 in clickhouse-server.log$FAIL" >> /test_output/test_results.tsv \ - || echo -e "Backward compatibility check: No OOM messages in clickhouse-server.log$OK" >> /test_output/test_results.tsv - - # Logical errors - echo "Check for Logical errors in server log:" - rg -Fa -A20 "Code: 49. DB::Exception:" /var/log/clickhouse-server/clickhouse-server.backward.*.log > /test_output/bc_check_logical_errors.txt \ - && echo -e "Backward compatibility check: Logical error thrown (see clickhouse-server.log or bc_check_logical_errors.txt)$FAIL$(trim_server_logs bc_check_logical_errors.txt)" \ - >> /test_output/test_results.tsv \ - || echo -e "Backward compatibility check: No logical errors$OK" >> /test_output/test_results.tsv - - # Remove file bc_check_logical_errors.txt if it's empty - [ -s /test_output/bc_check_logical_errors.txt ] || rm /test_output/bc_check_logical_errors.txt - - # Crash - rg -Fa "########################################" /var/log/clickhouse-server/clickhouse-server.backward.*.log > /dev/null \ - && echo -e "Backward compatibility check: Killed by signal (in clickhouse-server.log)$FAIL" >> /test_output/test_results.tsv \ - || echo -e "Backward compatibility check: Not crashed$OK" >> /test_output/test_results.tsv - - # It also checks for crash without stacktrace (printed by watchdog) - echo "Check for Fatal message in server log:" - rg -Fa " " /var/log/clickhouse-server/clickhouse-server.backward.*.log > /test_output/bc_check_fatal_messages.txt \ - && echo -e "Backward compatibility check: Fatal message in clickhouse-server.log (see bc_check_fatal_messages.txt)$FAIL$(trim_server_logs bc_check_fatal_messages.txt)" \ - >> /test_output/test_results.tsv \ - || echo -e "Backward compatibility check: No fatal messages in clickhouse-server.log$OK" >> /test_output/test_results.tsv - - # Remove file bc_check_fatal_messages.txt if it's empty - [ -s /test_output/bc_check_fatal_messages.txt ] || rm /test_output/bc_check_fatal_messages.txt - - tar -chf /test_output/coordination.backward.tar /var/lib/clickhouse/coordination ||: - for table in query_log trace_log - do - clickhouse-local --path /var/lib/clickhouse/ --only-system-tables -q "select * from system.$table format TSVWithNamesAndTypes" \ - | zstd --threads=0 > /test_output/$table.backward.tsv.zst ||: - done - fi -fi - -dmesg -T > /test_output/dmesg.log - -# OOM in dmesg -- those are real -grep -q -F -e 'Out of memory: Killed process' -e 'oom_reaper: reaped process' -e 'oom-kill:constraint=CONSTRAINT_NONE' /test_output/dmesg.log \ - && echo -e "OOM in dmesg$FAIL$(head_escaped /test_output/dmesg.log)" >> /test_output/test_results.tsv \ - || echo -e "No OOM in dmesg$OK" >> /test_output/test_results.tsv +collect_query_and_trace_logs mv /var/log/clickhouse-server/stderr.log /test_output/ # Write check result into check_status.tsv # Try to choose most specific error for the whole check status clickhouse-local --structure "test String, res String, time Nullable(Float32), desc String" -q "SELECT 'failure', test FROM table WHERE res != 'OK' order by -(test like 'Backward compatibility check%'), -- BC check goes last (test like '%Sanitizer%') DESC, (test like '%Killed by signal%') DESC, (test like '%gdb.log%') DESC, @@ -732,14 +227,8 @@ clickhouse-local --structure "test String, res String, time Nullable(Float32), d (test like '%OOM%') DESC, (test like '%Signal 9%') DESC, (test like '%Fatal message%') DESC, -(test like '%Error message%') DESC, -(test like '%previous release%') DESC, rowNumberInAllBlocks() LIMIT 1" < /test_output/test_results.tsv > /test_output/check_status.tsv || echo "failure\tCannot parse test_results.tsv" > /test_output/check_status.tsv [ -s /test_output/check_status.tsv ] || echo -e "success\tNo errors found" > /test_output/check_status.tsv -# Core dumps -find . -type f -maxdepth 1 -name 'core.*' | while read core; do - zstd --threads=0 $core - mv $core.zst /test_output/ -done +collect_core_dumps diff --git a/docker/test/upgrade/Dockerfile b/docker/test/upgrade/Dockerfile new file mode 100644 index 00000000000..8e5890b81a0 --- /dev/null +++ b/docker/test/upgrade/Dockerfile @@ -0,0 +1,31 @@ +# rebuild in #33610 +# docker build -t clickhouse/upgrade-check . +ARG FROM_TAG=latest +FROM clickhouse/stateful-test:$FROM_TAG + +RUN apt-get update -y \ + && env DEBIAN_FRONTEND=noninteractive \ + apt-get install --yes --no-install-recommends \ + bash \ + tzdata \ + fakeroot \ + debhelper \ + parallel \ + expect \ + python3 \ + python3-lxml \ + python3-termcolor \ + python3-requests \ + curl \ + sudo \ + openssl \ + netcat-openbsd \ + telnet \ + brotli \ + && apt-get clean + +COPY run.sh / + +ENV EXPORT_S3_STORAGE_POLICIES=1 + +CMD ["/bin/bash", "/run.sh"] diff --git a/docker/test/upgrade/run.sh b/docker/test/upgrade/run.sh new file mode 100644 index 00000000000..df32e2833e7 --- /dev/null +++ b/docker/test/upgrade/run.sh @@ -0,0 +1,201 @@ +#!/bin/bash +# shellcheck disable=SC2094 +# shellcheck disable=SC2086 +# shellcheck disable=SC2024 + +# Avoid overlaps with previous runs +dmesg --clear + +set -x + +# we mount tests folder from repo to /usr/share +ln -s /usr/share/clickhouse-test/ci/stress.py /usr/bin/stress +ln -s /usr/share/clickhouse-test/clickhouse-test /usr/bin/clickhouse-test +ln -s /usr/share/clickhouse-test/ci/download_release_packages.py /usr/bin/download_release_packages +ln -s /usr/share/clickhouse-test/ci/get_previous_release_tag.py /usr/bin/get_previous_release_tag + +# Stress tests and upgrade check uses similar code that was placed +# in a separate bash library. See tests/ci/stress_tests.lib +source /usr/share/clickhouse-test/ci/stress_tests.lib + +azurite-blob --blobHost 0.0.0.0 --blobPort 10000 --debug /azurite_log & +./setup_minio.sh stateless # to have a proper environment + +echo "Get previous release tag" +previous_release_tag=$(dpkg --info package_folder/clickhouse-client*.deb | grep "Version: " | awk '{print $2}' | cut -f1 -d'+' | get_previous_release_tag) +echo $previous_release_tag + +echo "Clone previous release repository" +git clone https://github.com/ClickHouse/ClickHouse.git --no-tags --progress --branch=$previous_release_tag --no-recurse-submodules --depth=1 previous_release_repository + +echo "Download clickhouse-server from the previous release" +mkdir previous_release_package_folder + +echo $previous_release_tag | download_release_packages && echo -e "Download script exit code$OK" >> /test_output/test_results.tsv \ + || echo -e "Download script failed$FAIL" >> /test_output/test_results.tsv + +# Check if we cloned previous release repository successfully +if ! [ "$(ls -A previous_release_repository/tests/queries)" ] +then + echo -e 'failure\tFailed to clone previous release tests' > /test_output/check_status.tsv + exit +elif ! [ "$(ls -A previous_release_package_folder/clickhouse-common-static_*.deb && ls -A previous_release_package_folder/clickhouse-server_*.deb)" ] +then + echo -e 'failure\tFailed to download previous release packages' > /test_output/check_status.tsv + exit +fi + +echo -e "Successfully cloned previous release tests$OK" >> /test_output/test_results.tsv +echo -e "Successfully downloaded previous release packages$OK" >> /test_output/test_results.tsv + +# Make upgrade check more funny by forcing Ordinary engine for system database +mkdir /var/lib/clickhouse/metadata +echo "ATTACH DATABASE system ENGINE=Ordinary" > /var/lib/clickhouse/metadata/system.sql + +# Install previous release packages +install_packages previous_release_package_folder + +# Start server from previous release +# Let's enable S3 storage by default +export USE_S3_STORAGE_FOR_MERGE_TREE=1 +# Previous version may not be ready for fault injections +export ZOOKEEPER_FAULT_INJECTION=0 +configure + +# But we still need default disk because some tables loaded only into it +sudo cat /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml \ + | sed "s|
s3
|
s3
default|" \ + > /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml.tmp mv /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml.tmp /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml +sudo chown clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml +sudo chgrp clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml + +start + +clickhouse-client --query="SELECT 'Server version: ', version()" + +mkdir tmp_stress_output + +stress --test-cmd="/usr/bin/clickhouse-test --queries=\"previous_release_repository/tests/queries\"" --upgrade-check --output-folder tmp_stress_output --global-time-limit=1200 \ + && echo -e "Test script exit code$OK" >> /test_output/test_results.tsv \ + || echo -e "Test script failed$FAIL script exit code: $?" >> /test_output/test_results.tsv + +rm -rf tmp_stress_output + +# We experienced deadlocks in this command in very rare cases. Let's debug it: +timeout 10m clickhouse-client --query="SELECT 'Tables count:', count() FROM system.tables" || +( + echo "thread apply all backtrace (on select tables count)" >> /test_output/gdb.log + timeout 30m gdb -batch -ex 'thread apply all backtrace' -p "$(cat /var/run/clickhouse-server/clickhouse-server.pid)" | ts '%Y-%m-%d %H:%M:%S' >> /test_output/gdb.log + clickhouse stop --force +) + +# Use bigger timeout for previous version and disable additional hang check +stop 300 false +mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.stress.log + +# Install and start new server +install_packages package_folder +# Disable fault injections on start (we don't test them here, and it can lead to tons of requests in case of huge number of tables). +export ZOOKEEPER_FAULT_INJECTION=0 +configure +start 500 +clickhouse-client --query "SELECT 'Server successfully started', 'OK', NULL, ''" >> /test_output/test_results.tsv \ + || (rg --text ".*Application" /var/log/clickhouse-server/clickhouse-server.log > /test_output/application_errors.txt \ + && echo -e "Server failed to start (see application_errors.txt and clickhouse-server.clean.log)$FAIL$(trim_server_logs application_errors.txt)" \ + >> /test_output/test_results.tsv) + +# Remove file application_errors.txt if it's empty +[ -s /test_output/application_errors.txt ] || rm /test_output/application_errors.txt + +clickhouse-client --query="SELECT 'Server version: ', version()" + +# Let the server run for a while before checking log. +sleep 60 + +stop +mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.upgrade.log + +# Error messages (we should ignore some errors) +# FIXME https://github.com/ClickHouse/ClickHouse/issues/38643 ("Unknown index: idx.") +# FIXME https://github.com/ClickHouse/ClickHouse/issues/39174 ("Cannot parse string 'Hello' as UInt64") +# FIXME Not sure if it's expected, but some tests from stress test may not be finished yet when we restarting server. +# Let's just ignore all errors from queries ("} TCPHandler: Code:", "} executeQuery: Code:") +# FIXME https://github.com/ClickHouse/ClickHouse/issues/39197 ("Missing columns: 'v3' while processing query: 'v3, k, v1, v2, p'") +# FIXME https://github.com/ClickHouse/ClickHouse/issues/39174 - bad mutation does not indicate backward incompatibility +echo "Check for Error messages in server log:" +rg -Fav -e "Code: 236. DB::Exception: Cancelled merging parts" \ + -e "Code: 236. DB::Exception: Cancelled mutating parts" \ + -e "REPLICA_IS_ALREADY_ACTIVE" \ + -e "REPLICA_ALREADY_EXISTS" \ + -e "ALL_REPLICAS_LOST" \ + -e "DDLWorker: Cannot parse DDL task query" \ + -e "RaftInstance: failed to accept a rpc connection due to error 125" \ + -e "UNKNOWN_DATABASE" \ + -e "NETWORK_ERROR" \ + -e "UNKNOWN_TABLE" \ + -e "ZooKeeperClient" \ + -e "KEEPER_EXCEPTION" \ + -e "DirectoryMonitor" \ + -e "TABLE_IS_READ_ONLY" \ + -e "Code: 1000, e.code() = 111, Connection refused" \ + -e "UNFINISHED" \ + -e "NETLINK_ERROR" \ + -e "Renaming unexpected part" \ + -e "PART_IS_TEMPORARILY_LOCKED" \ + -e "and a merge is impossible: we didn't find" \ + -e "found in queue and some source parts for it was lost" \ + -e "is lost forever." \ + -e "Unknown index: idx." \ + -e "Cannot parse string 'Hello' as UInt64" \ + -e "} TCPHandler: Code:" \ + -e "} executeQuery: Code:" \ + -e "Missing columns: 'v3' while processing query: 'v3, k, v1, v2, p'" \ + -e "The set of parts restored in place of" \ + -e "(ReplicatedMergeTreeAttachThread): Initialization failed. Error" \ + -e "Code: 269. DB::Exception: Destination table is myself" \ + -e "Coordination::Exception: Connection loss" \ + -e "MutateFromLogEntryTask" \ + -e "No connection to ZooKeeper, cannot get shared table ID" \ + -e "Session expired" \ + -e "TOO_MANY_PARTS" \ + -e "Authentication failed" \ + -e "Cannot flush" \ + -e "Container already exists" \ + /var/log/clickhouse-server/clickhouse-server.upgrade.log | zgrep -Fa "" > /test_output/upgrade_error_messages.txt \ + && echo -e "Error message in clickhouse-server.log (see upgrade_error_messages.txt)$FAIL$(head_escaped /test_output/bc_check_error_messages.txt)" \ + >> /test_output/test_results.tsv \ + || echo -e "No Error messages after server upgrade$OK" >> /test_output/test_results.tsv + +# Remove file upgrade_error_messages.txt if it's empty +[ -s /test_output/upgrade_error_messages.txt ] || rm /test_output/upgrade_error_messages.txt + +# Grep logs for sanitizer asserts, crashes and other critical errors +check_logs_for_critical_errors + +tar -chf /test_output/coordination.tar /var/lib/clickhouse/coordination ||: + +collect_query_and_trace_logs + +check_oom_in_dmesg + +mv /var/log/clickhouse-server/stderr.log /test_output/ + +# Write check result into check_status.tsv +# Try to choose most specific error for the whole check status +clickhouse-local --structure "test String, res String, time Nullable(Float32), desc String" -q "SELECT 'failure', test FROM table WHERE res != 'OK' order by +(test like '%Sanitizer%') DESC, +(test like '%Killed by signal%') DESC, +(test like '%gdb.log%') DESC, +(test ilike '%possible deadlock%') DESC, +(test like '%start%') DESC, +(test like '%dmesg%') DESC, +(test like '%OOM%') DESC, +(test like '%Signal 9%') DESC, +(test like '%Fatal message%') DESC, +(test like '%Error message%') DESC, +(test like '%previous release%') DESC, +rowNumberInAllBlocks() +LIMIT 1" < /test_output/test_results.tsv > /test_output/check_status.tsv || echo "failure\tCannot parse test_results.tsv" > /test_output/check_status.tsv +[ -s /test_output/check_status.tsv ] || echo -e "success\tNo errors found" > /test_output/check_status.tsv + +collect_core_dumps diff --git a/docs/_includes/install/universal.sh b/docs/_includes/install/universal.sh index de34897a6f6..403aab6f4e6 100755 --- a/docs/_includes/install/universal.sh +++ b/docs/_includes/install/universal.sh @@ -60,12 +60,21 @@ fi clickhouse_download_filename_prefix="clickhouse" clickhouse="$clickhouse_download_filename_prefix" -i=0 -while [ -f "$clickhouse" ] -do - clickhouse="${clickhouse_download_filename_prefix}.${i}" - i=$(($i+1)) -done +if [ -f "$clickhouse" ] +then + read -p "ClickHouse binary ${clickhouse} already exists. Overwrite? [y/N] " answer + if [ "$answer" = "y" -o "$answer" = "Y" ] + then + rm -f "$clickhouse" + else + i=0 + while [ -f "$clickhouse" ] + do + clickhouse="${clickhouse_download_filename_prefix}.${i}" + i=$(($i+1)) + done + fi +fi URL="https://builds.clickhouse.com/master/${DIR}/clickhouse" echo @@ -76,9 +85,9 @@ echo echo "Successfully downloaded the ClickHouse binary, you can run it as: ./${clickhouse}" -if [ "${OS}" = "Linux" ] -then - echo - echo "You can also install it: - sudo ./${clickhouse} install" -fi +#if [ "${OS}" = "Linux" ] +#then + #echo + #echo "You can also install it: + #sudo ./${clickhouse} install" +#fi diff --git a/docs/changelogs/v22.11.6.44-stable.md b/docs/changelogs/v22.11.6.44-stable.md new file mode 100644 index 00000000000..6e628b85150 --- /dev/null +++ b/docs/changelogs/v22.11.6.44-stable.md @@ -0,0 +1,37 @@ +--- +sidebar_position: 1 +sidebar_label: 2023 +--- + +# 2023 Changelog + +### ClickHouse release v22.11.6.44-stable (73ddf91298f) FIXME as compared to v22.11.5.15-stable (d763e5a9239) + +#### Performance Improvement +* Backported in [#45703](https://github.com/ClickHouse/ClickHouse/issues/45703): Fixed performance of short `SELECT` queries that read from tables with large number of`Array`/`Map`/`Nested` columns. [#45630](https://github.com/ClickHouse/ClickHouse/pull/45630) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#46376](https://github.com/ClickHouse/ClickHouse/issues/46376): Fix too big memory usage for vertical merges on non-remote disk. Respect `max_insert_delayed_streams_for_parallel_write` for the remote disk. [#46275](https://github.com/ClickHouse/ClickHouse/pull/46275) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). + +#### Build/Testing/Packaging Improvement +* Backported in [#45977](https://github.com/ClickHouse/ClickHouse/issues/45977): Fix zookeeper downloading, update the version, and optimize the image size. [#44853](https://github.com/ClickHouse/ClickHouse/pull/44853) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Backported in [#46114](https://github.com/ClickHouse/ClickHouse/issues/46114): Remove the dependency on the `adduser` tool from the packages, because we don't use it. This fixes [#44934](https://github.com/ClickHouse/ClickHouse/issues/44934). [#45011](https://github.com/ClickHouse/ClickHouse/pull/45011) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#46483](https://github.com/ClickHouse/ClickHouse/issues/46483): Get rid of unnecessary build for standalone clickhouse-keeper. [#46367](https://github.com/ClickHouse/ClickHouse/pull/46367) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Backported in [#46507](https://github.com/ClickHouse/ClickHouse/issues/46507): Some time ago the ccache compression was changed to `zst`, but `gz` archives are downloaded by default. It fixes it by prioritizing zst archive. [#46490](https://github.com/ClickHouse/ClickHouse/pull/46490) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + +#### Bug Fix (user-visible misbehavior in official stable or prestable release) + +* Backported in [#45903](https://github.com/ClickHouse/ClickHouse/issues/45903): Fixed bug with non-parsable default value for EPHEMERAL column in table metadata. [#44026](https://github.com/ClickHouse/ClickHouse/pull/44026) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Backported in [#46239](https://github.com/ClickHouse/ClickHouse/issues/46239): A couple of seg faults have been reported around `c-ares`. All of the recent stack traces observed fail on inserting into `std::unodered_set<>`. I believe I have found the root cause of this, it seems to be unprocessed queries. Prior to this PR, CH calls `poll` to wait on the file descriptors in the `c-ares` channel. According to the [poll docs](https://man7.org/linux/man-pages/man2/poll.2.html), a negative return value means an error has ocurred. Because of this, we would abort the execution and return failure. The problem is that `poll` will also return a negative value if a system interrupt occurs. A system interrupt does not mean the processing has failed or ended, but we would abort it anyways because we were checking for negative values. Once the execution is aborted, the whole stack is destroyed, which includes the `std::unordered_set` passed to the `void *` parameter of the c-ares callback. Once c-ares completed the request, the callback would be invoked and would access an invalid memory address causing a segfault. [#45629](https://github.com/ClickHouse/ClickHouse/pull/45629) ([Arthur Passos](https://github.com/arthurpassos)). +* Backported in [#46216](https://github.com/ClickHouse/ClickHouse/issues/46216): Fix reading of non existing nested columns with multiple level in compact parts. [#46045](https://github.com/ClickHouse/ClickHouse/pull/46045) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#46444](https://github.com/ClickHouse/ClickHouse/issues/46444): Fix possible `LOGICAL_ERROR` in asynchronous inserts with invalid data sent in format `VALUES`. [#46350](https://github.com/ClickHouse/ClickHouse/pull/46350) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#46676](https://github.com/ClickHouse/ClickHouse/issues/46676): Fix an invalid processing of constant `LowCardinality` argument in function `arrayMap`. This bug could lead to a segfault in release, and logical error `Bad cast` in debug build. [#46569](https://github.com/ClickHouse/ClickHouse/pull/46569) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Another attempt to fix automerge, or at least to have debug footprint [#45476](https://github.com/ClickHouse/ClickHouse/pull/45476) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Add check for running workflows to merge_pr.py [#45803](https://github.com/ClickHouse/ClickHouse/pull/45803) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Get rid of progress timestamps in release publishing [#45818](https://github.com/ClickHouse/ClickHouse/pull/45818) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Add necessary dependency for sanitizers [#45959](https://github.com/ClickHouse/ClickHouse/pull/45959) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Add helping logging to auto-merge script [#46080](https://github.com/ClickHouse/ClickHouse/pull/46080) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix write buffer destruction order for vertical merge. [#46205](https://github.com/ClickHouse/ClickHouse/pull/46205) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Get rid of legacy DocsReleaseChecks [#46665](https://github.com/ClickHouse/ClickHouse/pull/46665) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + diff --git a/docs/changelogs/v22.3.19.6-lts.md b/docs/changelogs/v22.3.19.6-lts.md new file mode 100644 index 00000000000..d5b45f4ce66 --- /dev/null +++ b/docs/changelogs/v22.3.19.6-lts.md @@ -0,0 +1,17 @@ +--- +sidebar_position: 1 +sidebar_label: 2023 +--- + +# 2023 Changelog + +### ClickHouse release v22.3.19.6-lts (467e0a7bd77) FIXME as compared to v22.3.18.37-lts (fe512717551) + +#### Bug Fix (user-visible misbehavior in official stable or prestable release) + +* Backported in [#46440](https://github.com/ClickHouse/ClickHouse/issues/46440): Fix possible `LOGICAL_ERROR` in asynchronous inserts with invalid data sent in format `VALUES`. [#46350](https://github.com/ClickHouse/ClickHouse/pull/46350) ([Anton Popov](https://github.com/CurtizJ)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Get rid of legacy DocsReleaseChecks [#46665](https://github.com/ClickHouse/ClickHouse/pull/46665) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + diff --git a/docs/changelogs/v23.2.1.2537-stable.md b/docs/changelogs/v23.2.1.2537-stable.md new file mode 100644 index 00000000000..3fdcf6d6571 --- /dev/null +++ b/docs/changelogs/v23.2.1.2537-stable.md @@ -0,0 +1,473 @@ +--- +sidebar_position: 1 +sidebar_label: 2023 +--- + +# 2023 Changelog + +### ClickHouse release v23.2.1.2537-stable (52bf836e03a) FIXME as compared to v23.1.1.3077-stable (dcaac477025) + +#### Backward Incompatible Change +* Extend function "toDayOfWeek()" (alias: "DAYOFWEEK") with a mode argument that encodes whether the week starts on Monday or Sunday and whether counting starts at 0 or 1. For consistency with other date time functions, the mode argument was inserted between the time and the time zone arguments. This breaks existing usage of the (previously undocumented) 2-argument syntax "toDayOfWeek(time, time_zone)". A fix is to rewrite the function into "toDayOfWeek(time, 0, time_zone)". [#45233](https://github.com/ClickHouse/ClickHouse/pull/45233) ([Robert Schulze](https://github.com/rschu1ze)). +* Rename setting `max_query_cache_size` to `filesystem_cache_max_download_size`. [#45614](https://github.com/ClickHouse/ClickHouse/pull/45614) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix applying settings for FORMAT on the client. [#46003](https://github.com/ClickHouse/ClickHouse/pull/46003) ([Azat Khuzhin](https://github.com/azat)). +* Default user will not have permissions for access type `SHOW NAMED COLLECTION` by default (e.g. by default, default user will not longer be able to do grant ALL to other users as it was before, therefore this PR is backward incompatible). [#46010](https://github.com/ClickHouse/ClickHouse/pull/46010) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Remove support for setting `materialized_postgresql_allow_automatic_update` (which was by default turned off). Fix integration tests. [#46106](https://github.com/ClickHouse/ClickHouse/pull/46106) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Slightly improve performance of `countDigits` on realistic datasets. This closed [#44518](https://github.com/ClickHouse/ClickHouse/issues/44518). In previous versions, `countDigits(0)` returned `0`; now it returns `1`, which is more correct, and follows the existing documentation. [#46187](https://github.com/ClickHouse/ClickHouse/pull/46187) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### New Feature +* Expose ProfileEvents counters in system.part_log. [#38614](https://github.com/ClickHouse/ClickHouse/pull/38614) ([Bharat Nallan](https://github.com/bharatnc)). +* Enrichment of the existing ReplacingMergeTree engine to allow duplicates insertion. It leverages the power of both ReplacingMergeTree and CollapsingMergeTree in one mergeTree engine. Deleted data are not returned when queried, but not removed from disk neither. [#41005](https://github.com/ClickHouse/ClickHouse/pull/41005) ([youennL-cs](https://github.com/youennL-cs)). +* Add `generateULID()` function. Closes [#36536](https://github.com/ClickHouse/ClickHouse/issues/36536). [#44662](https://github.com/ClickHouse/ClickHouse/pull/44662) ([Nikolay Degterinsky](https://github.com/evillique)). +* Add `corrMatrix` Aggregatefunction, calculating each two columns. In addition, since Aggregatefunctions `covarSamp` and `covarPop` are similar to `corr`, I add `covarSampMatrix`, `covarPopMatrix` by the way. @alexey-milovidov closes [#44587](https://github.com/ClickHouse/ClickHouse/issues/44587). [#44680](https://github.com/ClickHouse/ClickHouse/pull/44680) ([FFFFFFFHHHHHHH](https://github.com/FFFFFFFHHHHHHH)). +* Rewrite aggregate functions with if expression as argument when logically equivalent. For example, avg(if(cond, col, null)) can be rewritten to avgIf(cond, col). It is helpful in performance. [#44730](https://github.com/ClickHouse/ClickHouse/pull/44730) ([李扬](https://github.com/taiyang-li)). +* Introduce arrayShuffle function for random array permutations. [#45271](https://github.com/ClickHouse/ClickHouse/pull/45271) ([Joanna Hulboj](https://github.com/jh0x)). +* Support types FIXED_SIZE_BINARY type in Arrow, FIXED_LENGTH_BYTE_ARRAY in Parquet and match them to FixedString. Add settings `output_format_parquet_fixed_string_as_fixed_byte_array/output_format_arrow_fixed_string_as_fixed_byte_array` to control default output type for FixedString. Closes [#45326](https://github.com/ClickHouse/ClickHouse/issues/45326). [#45340](https://github.com/ClickHouse/ClickHouse/pull/45340) ([Kruglov Pavel](https://github.com/Avogar)). +* Add `StorageIceberg` and table function `iceberg` to access iceberg table store on S3. [#45384](https://github.com/ClickHouse/ClickHouse/pull/45384) ([flynn](https://github.com/ucasfl)). +* Add a new column `last_exception_time` to system.replication_queue. [#45457](https://github.com/ClickHouse/ClickHouse/pull/45457) ([Frank Chen](https://github.com/FrankChen021)). +* Add two new functions which allow for user-defined keys/seeds with SipHash{64,128}. [#45513](https://github.com/ClickHouse/ClickHouse/pull/45513) ([Salvatore Mesoraca](https://github.com/aiven-sal)). +* Allow a three-argument version for table function `format`. close [#45808](https://github.com/ClickHouse/ClickHouse/issues/45808). [#45873](https://github.com/ClickHouse/ClickHouse/pull/45873) ([FFFFFFFHHHHHHH](https://github.com/FFFFFFFHHHHHHH)). +* add joda format support for 'x','w','S'.Refer to https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html. [#46073](https://github.com/ClickHouse/ClickHouse/pull/46073) ([zk_kiger](https://github.com/zk-kiger)). +* ... Support window function `ntile`. ``` insert into test_data values(1,2), (1,3), (1,4), (2,5),(2,6); select a, b, ntile(2) over (partition by a order by b rows between unbounded preceding and unbounded following ) from test_data;. [#46256](https://github.com/ClickHouse/ClickHouse/pull/46256) ([lgbo](https://github.com/lgbo-ustc)). +* Added arrayPartialSort and arrayPartialReverseSort functions. [#46296](https://github.com/ClickHouse/ClickHouse/pull/46296) ([Joanna Hulboj](https://github.com/jh0x)). +* The new http parameter `client_protocol_version` allows setting a client protocol version for HTTP responses using the Native format. [#40397](https://github.com/ClickHouse/ClickHouse/issues/40397). [#46360](https://github.com/ClickHouse/ClickHouse/pull/46360) ([Geoff Genz](https://github.com/genzgd)). +* Add new function regexpExtract, like spark function REGEXP_EXTRACT. [#46469](https://github.com/ClickHouse/ClickHouse/pull/46469) ([李扬](https://github.com/taiyang-li)). +* Author: [taiyang-li](https://github.com/taiyang-li) Add new function regexpExtract, like spark function REGEXP_EXTRACT. [#46529](https://github.com/ClickHouse/ClickHouse/pull/46529) ([Alexander Gololobov](https://github.com/davenger)). +* Add new function JSONArrayLength, which returns the number of elements in the outermost JSON array. The function returns NULL if input JSON string is invalid. [#46631](https://github.com/ClickHouse/ClickHouse/pull/46631) ([李扬](https://github.com/taiyang-li)). + +#### Performance Improvement +* Improve lower/upper function performance with avx512 instructions. [#37894](https://github.com/ClickHouse/ClickHouse/pull/37894) ([yaqi-zhao](https://github.com/yaqi-zhao)). +* Add new `local_filesystem_read_method` method `io_uring` based on the asynchronous Linux [io_uring](https://kernel.dk/io_uring.pdf) subsystem, improving read performance almost universally compared to the default `pread` method. [#38456](https://github.com/ClickHouse/ClickHouse/pull/38456) ([Saulius Valatka](https://github.com/sauliusvl)). +* Remove the limitation that on systems with >=32 cores and SMT disabled ClickHouse uses only half of the cores. [#44973](https://github.com/ClickHouse/ClickHouse/pull/44973) ([Robert Schulze](https://github.com/rschu1ze)). +* Improve performance of function multiIf by columnar executing, speed up by 2.3x. [#45296](https://github.com/ClickHouse/ClickHouse/pull/45296) ([李扬](https://github.com/taiyang-li)). +* An option added to aggregate partitions independently if table partition key and group by key are compatible. Controlled by the setting `allow_aggregate_partitions_independently`. Disabled by default because of limited applicability (please refer to the docs). [#45364](https://github.com/ClickHouse/ClickHouse/pull/45364) ([Nikita Taranov](https://github.com/nickitat)). +* Add fastpath for function position when needle is empty. [#45382](https://github.com/ClickHouse/ClickHouse/pull/45382) ([李扬](https://github.com/taiyang-li)). +* Enable `query_plan_remove_redundant_sorting` optimization by default. Optimization implemented in [#45420](https://github.com/ClickHouse/ClickHouse/issues/45420). [#45567](https://github.com/ClickHouse/ClickHouse/pull/45567) ([Igor Nikonov](https://github.com/devcrafter)). +* Increased HTTP Transfer Encoding chunk size to improve performance of large queries using the HTTP interface. [#45593](https://github.com/ClickHouse/ClickHouse/pull/45593) ([Geoff Genz](https://github.com/genzgd)). +* Fixed performance of short `SELECT` queries that read from tables with large number of`Array`/`Map`/`Nested` columns. [#45630](https://github.com/ClickHouse/ClickHouse/pull/45630) ([Anton Popov](https://github.com/CurtizJ)). +* Allow using Vertical merge algorithm with parts in Compact format. This will allow ClickHouse server to use much less memory for background operations. This closes [#46084](https://github.com/ClickHouse/ClickHouse/issues/46084). [#45681](https://github.com/ClickHouse/ClickHouse/pull/45681) ([Anton Popov](https://github.com/CurtizJ)). +* Optimize Parquet reader by using batch reader. [#45878](https://github.com/ClickHouse/ClickHouse/pull/45878) ([LiuNeng](https://github.com/liuneng1994)). +* Improve performance of ColumnArray::filter for big int and decimal. [#45949](https://github.com/ClickHouse/ClickHouse/pull/45949) ([李扬](https://github.com/taiyang-li)). +* This change could effectively reduce the overhead of obtaining the filter from ColumnNullable(UInt8) and improve the overall query performance. To evaluate the impact of this change, we adopted TPC-H benchmark but revised the column types from non-nullable to nullable, and we measured the QPS of its queries as the performance indicator. [#45962](https://github.com/ClickHouse/ClickHouse/pull/45962) ([Zhiguo Zhou](https://github.com/ZhiguoZh)). +* Make the `_part` and `_partition_id` virtual column be `LowCardinality(String)` type. Closes [#45964](https://github.com/ClickHouse/ClickHouse/issues/45964). [#45975](https://github.com/ClickHouse/ClickHouse/pull/45975) ([flynn](https://github.com/ucasfl)). +* Improve the performance of Decimal conversion when the scale does not change. [#46095](https://github.com/ClickHouse/ClickHouse/pull/46095) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* The introduced logic works if PREWHERE condition is a conjunction of multiple conditions (cond1 AND cond2 AND ... ). It groups those conditions that require reading the same columns into steps. After each step the corresponding part of the full condition is computed and the result rows might be filtered. This allows to read fewer rows in the next steps thus saving IO bandwidth and doing less computation. This logic is disabled by default for now. It will be enabled by default in one of the future releases once it is known to not have any regressions, so it is highly encouraged to be used for testing. It can be controlled by 2 settings: "enable_multiple_prewhere_read_steps" and "move_all_conditions_to_prewhere". [#46140](https://github.com/ClickHouse/ClickHouse/pull/46140) ([Alexander Gololobov](https://github.com/davenger)). +* Allow to increase prefetching for read data. [#46168](https://github.com/ClickHouse/ClickHouse/pull/46168) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Rewrite arrayExists(x -> x = 1, arr) -> has(arr, 1), which improve performance by 1.34x. [#46188](https://github.com/ClickHouse/ClickHouse/pull/46188) ([李扬](https://github.com/taiyang-li)). +* Fix too big memory usage for vertical merges on non-remote disk. Respect `max_insert_delayed_streams_for_parallel_write` for the remote disk. [#46275](https://github.com/ClickHouse/ClickHouse/pull/46275) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Update zstd to v1.5.4. It has some minor improvements in performance and compression ratio. If you run replicas with different versions of ClickHouse you may see reasonable error messages `Data after merge/mutation is not byte-identical to data on another replicas.` with explanation. These messages are Ok and you should not worry. [#46280](https://github.com/ClickHouse/ClickHouse/pull/46280) ([Raúl Marín](https://github.com/Algunenano)). +* Allow using Vertical merge algorithm with parts in Compact format. This will allow ClickHouse server to use much less memory for background operations. This closes [#46084](https://github.com/ClickHouse/ClickHouse/issues/46084). [#46282](https://github.com/ClickHouse/ClickHouse/pull/46282) ([Anton Popov](https://github.com/CurtizJ)). +* Fix performance degradation caused by [#39737](https://github.com/ClickHouse/ClickHouse/issues/39737). [#46309](https://github.com/ClickHouse/ClickHouse/pull/46309) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* The `replicas_status` handle will answer quickly even in case of a large replication queue. [#46310](https://github.com/ClickHouse/ClickHouse/pull/46310) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### Improvement +* Add avx512 support for Aggregate Sum, function unary arithmetic, function comparison. [#37870](https://github.com/ClickHouse/ClickHouse/pull/37870) ([zhao zhou](https://github.com/zzachimed)). +* close issue: [#38893](https://github.com/ClickHouse/ClickHouse/issues/38893). [#38950](https://github.com/ClickHouse/ClickHouse/pull/38950) ([hexiaoting](https://github.com/hexiaoting)). +* Migration from other databases and updates/deletes are mimicked by Collapsing/Replacing. Want to use the same SELECT queries without adding FINAL to all the existing queries. [#40945](https://github.com/ClickHouse/ClickHouse/pull/40945) ([Arthur Passos](https://github.com/arthurpassos)). +* Allow configuring storage as `SETTINGS disk=''` (instead of `storage_policy`) and with explicit disk creation `SETTINGS disk=disk(type=s3, ...)`. [#41976](https://github.com/ClickHouse/ClickHouse/pull/41976) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Add new metrics for backups: num_processed_files and processed_files_size described actual number of processed files. [#42244](https://github.com/ClickHouse/ClickHouse/pull/42244) ([Aleksandr](https://github.com/AVMusorin)). +* Added retries on interserver DNS errors. [#43179](https://github.com/ClickHouse/ClickHouse/pull/43179) ([Anton Kozlov](https://github.com/tonickkozlov)). +* Rewrote the code around marks distribution and the overall coordination of the reading in order to achieve the maximum performance improvement. This closes [#34527](https://github.com/ClickHouse/ClickHouse/issues/34527). [#43772](https://github.com/ClickHouse/ClickHouse/pull/43772) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Remove redundant DISTINCT clauses in query (subqueries). Implemented on top of query plan. It does similar optimization as `optimize_duplicate_order_by_and_distinct` regarding DISTINCT clauses. Can be enabled via `query_plan_remove_redundant_distinct` setting. Related to [#42648](https://github.com/ClickHouse/ClickHouse/issues/42648). [#44176](https://github.com/ClickHouse/ClickHouse/pull/44176) ([Igor Nikonov](https://github.com/devcrafter)). +* Keeper improvement: try preallocating space on the disk to avoid undefined out-of-space issues. Introduce setting `max_log_file_size` for the maximum size of Keeper's Raft log files. [#44370](https://github.com/ClickHouse/ClickHouse/pull/44370) ([Antonio Andelic](https://github.com/antonio2368)). +* ``` sumIf(123, cond) -> 123 * countIf(1, cond) sum(if(cond, 123, 0)) -> 123 * countIf(cond) sum(if(cond, 0, 123)) -> 123 * countIf(not(cond)) ```. [#44728](https://github.com/ClickHouse/ClickHouse/pull/44728) ([李扬](https://github.com/taiyang-li)). +* Optimize behavior for a replica delay api logic in case the replica is read-only. [#45148](https://github.com/ClickHouse/ClickHouse/pull/45148) ([mateng915](https://github.com/mateng0915)). +* Introduce gwp-asan implemented by llvm runtime. This closes [#27039](https://github.com/ClickHouse/ClickHouse/issues/27039). [#45226](https://github.com/ClickHouse/ClickHouse/pull/45226) ([Han Fei](https://github.com/hanfei1991)). +* ... in the case key casted from uint64 to uint32, small impact for little endian platform but key value becomes zero in big endian case. ### Documentation entry for user-facing changes. [#45375](https://github.com/ClickHouse/ClickHouse/pull/45375) ([Suzy Wang](https://github.com/SuzyWangIBMer)). +* Mark Gorilla compression on columns of non-Float* type as suspicious. [#45376](https://github.com/ClickHouse/ClickHouse/pull/45376) ([Robert Schulze](https://github.com/rschu1ze)). +* Allow removing redundant aggregation keys with constants (e.g., simplify `GROUP BY a, a + 1` to `GROUP BY a`). [#45415](https://github.com/ClickHouse/ClickHouse/pull/45415) ([Dmitry Novik](https://github.com/novikd)). +* Show replica name that is executing a merge in the postpone_reason. [#45458](https://github.com/ClickHouse/ClickHouse/pull/45458) ([Frank Chen](https://github.com/FrankChen021)). +* Save exception stack trace in part_log. [#45459](https://github.com/ClickHouse/ClickHouse/pull/45459) ([Frank Chen](https://github.com/FrankChen021)). +* Make RegExpTreeDictionary a ua parser which is compatible with https://github.com/ua-parser/uap-core. [#45631](https://github.com/ClickHouse/ClickHouse/pull/45631) ([Han Fei](https://github.com/hanfei1991)). +* Enable ICU data support on s390x platform. [#45632](https://github.com/ClickHouse/ClickHouse/pull/45632) ([Suzy Wang](https://github.com/SuzyWangIBMer)). +* Updated checking of SYSTEM SYNC REPLICA resolves [#45508](https://github.com/ClickHouse/ClickHouse/issues/45508) Implementation: * Updated to wait for current last entry to be processed (after pulling shared log) instead of queue size becoming 0. * Updated Subscriber to notify both queue size and removed log_entry_id. [#45648](https://github.com/ClickHouse/ClickHouse/pull/45648) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Disallow creation of new columns compressed by a combination of codecs "Delta" or "DoubleDelta" followed by codecs "Gorilla" or "FPC". This can be bypassed using setting "allow_suspicious_codecs = true". [#45652](https://github.com/ClickHouse/ClickHouse/pull/45652) ([Robert Schulze](https://github.com/rschu1ze)). +* Rename setting `replication_alter_partitions_sync` to `alter_sync`. [#45659](https://github.com/ClickHouse/ClickHouse/pull/45659) ([Antonio Andelic](https://github.com/antonio2368)). +* The `generateRandom` table function and the engine now support `LowCardinality` data types. This is useful for testing, for example you can write `INSERT INTO table SELECT * FROM generateRandom() LIMIT 1000`. This is needed to debug [#45590](https://github.com/ClickHouse/ClickHouse/issues/45590). [#45661](https://github.com/ClickHouse/ClickHouse/pull/45661) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add ability to ignore unknown keys in JSON object for named tuples (`input_format_json_ignore_unknown_keys_in_named_tuple`). [#45678](https://github.com/ClickHouse/ClickHouse/pull/45678) ([Azat Khuzhin](https://github.com/azat)). +* - The experimental query result cache now provides more modular configuration settings. [#45679](https://github.com/ClickHouse/ClickHouse/pull/45679) ([Robert Schulze](https://github.com/rschu1ze)). +* Renamed "query result cache" to "query cache". [#45682](https://github.com/ClickHouse/ClickHouse/pull/45682) ([Robert Schulze](https://github.com/rschu1ze)). +* add **SYSTEM SYNC FILE CACHE** command. It will call sync syscall. It achieve [#8921](https://github.com/ClickHouse/ClickHouse/issues/8921). [#45685](https://github.com/ClickHouse/ClickHouse/pull/45685) ([DR](https://github.com/freedomDR)). +* Add new S3 setting `allow_head_object_request`. This PR makes usage of `GetObjectAttributes` request instead of `HeadObject` introduced in https://github.com/ClickHouse/ClickHouse/pull/45288 optional (and disabled by default). [#45701](https://github.com/ClickHouse/ClickHouse/pull/45701) ([Vitaly Baranov](https://github.com/vitlibar)). +* Add ability to override connection settings based on connection names (that said that now you can forget about storing password for each connection, you can simply put everything into `~/.clickhouse-client/config.xml` and even use different history files for them, which can be also useful). [#45715](https://github.com/ClickHouse/ClickHouse/pull/45715) ([Azat Khuzhin](https://github.com/azat)). +* Arrow format support duration type. Closes [#45669](https://github.com/ClickHouse/ClickHouse/issues/45669). [#45750](https://github.com/ClickHouse/ClickHouse/pull/45750) ([flynn](https://github.com/ucasfl)). +* Extend the logging in the Query Cache to improve investigations of the caching behavior. [#45751](https://github.com/ClickHouse/ClickHouse/pull/45751) ([Robert Schulze](https://github.com/rschu1ze)). +* The query cache's server-level settings are now reconfigurable at runtime. [#45758](https://github.com/ClickHouse/ClickHouse/pull/45758) ([Robert Schulze](https://github.com/rschu1ze)). +* Hide password in logs when a table function's arguments are specified with a named collection:. [#45774](https://github.com/ClickHouse/ClickHouse/pull/45774) ([Vitaly Baranov](https://github.com/vitlibar)). +* Improve internal S3 client to correctly deduce regions and redirections for different types of URLs. [#45783](https://github.com/ClickHouse/ClickHouse/pull/45783) ([Antonio Andelic](https://github.com/antonio2368)). +* - Add support for Map, IPv4 and IPv6 types in generateRandom. Mostly useful for testing. [#45785](https://github.com/ClickHouse/ClickHouse/pull/45785) ([Raúl Marín](https://github.com/Algunenano)). +* Support empty/notEmpty for IP types. [#45799](https://github.com/ClickHouse/ClickHouse/pull/45799) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* The column `num_processed_files` was splitted into two columns: `num_files` (for BACKUP) and `files_read` (for RESTORE). The column `processed_files_size` was splitted into two columns: `total_size` (for BACKUP) and `bytes_read` (for RESTORE). [#45800](https://github.com/ClickHouse/ClickHouse/pull/45800) ([Vitaly Baranov](https://github.com/vitlibar)). +* 1. Upgrade Intel QPL from v0.3.0 to v1.0.0 2. Build libaccel-config and link it statically to QPL library instead of dynamically. [#45809](https://github.com/ClickHouse/ClickHouse/pull/45809) ([jasperzhu](https://github.com/jinjunzh)). +* Add support for `SHOW ENGINES` query. [#45859](https://github.com/ClickHouse/ClickHouse/pull/45859) ([Filatenkov Artur](https://github.com/FArthur-cmd)). +* - Improved how the obfuscator deals with queries. [#45867](https://github.com/ClickHouse/ClickHouse/pull/45867) ([Raúl Marín](https://github.com/Algunenano)). +* Improved how memory bound merging and aggregation in order on top query plan interact. Previously we fell back to explicit sorting for AIO in some cases when it wasn't actually needed. So it is a perf issue, not a correctness one. [#45892](https://github.com/ClickHouse/ClickHouse/pull/45892) ([Nikita Taranov](https://github.com/nickitat)). +* Improve behaviour of conversion into Date for boundary value 65535 (2149-06-06). [#45914](https://github.com/ClickHouse/ClickHouse/pull/45914) ([Joanna Hulboj](https://github.com/jh0x)). +* Add setting `check_referential_table_dependencies` to check referential dependencies on `DROP TABLE`. This PR solves [#38326](https://github.com/ClickHouse/ClickHouse/issues/38326). [#45936](https://github.com/ClickHouse/ClickHouse/pull/45936) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix `tupleElement` return `Null` when having `Null` argument. Closes [#45894](https://github.com/ClickHouse/ClickHouse/issues/45894). [#45952](https://github.com/ClickHouse/ClickHouse/pull/45952) ([flynn](https://github.com/ucasfl)). +* Throw an error on no files satisfying S3 wildcard. Closes [#45587](https://github.com/ClickHouse/ClickHouse/issues/45587). [#45957](https://github.com/ClickHouse/ClickHouse/pull/45957) ([chen](https://github.com/xiedeyantu)). +* Use cluster state data to check concurrent backup/restore. [#45982](https://github.com/ClickHouse/ClickHouse/pull/45982) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Use "exact" matching for fuzzy search, which has correct case ignorance and more appropriate algorithm for matching SQL queries. [#46000](https://github.com/ClickHouse/ClickHouse/pull/46000) ([Azat Khuzhin](https://github.com/azat)). +* Improve behaviour of conversion into Date for boundary value 65535 (2149-06-06). [#46042](https://github.com/ClickHouse/ClickHouse/pull/46042) ([Joanna Hulboj](https://github.com/jh0x)). +* Forbid wrong create View syntax `CREATE View X TO Y AS SELECT`. Closes [#4331](https://github.com/ClickHouse/ClickHouse/issues/4331). [#46043](https://github.com/ClickHouse/ClickHouse/pull/46043) ([flynn](https://github.com/ucasfl)). +* Storage Log family support settings `storage_policy`. Closes [#43421](https://github.com/ClickHouse/ClickHouse/issues/43421). [#46044](https://github.com/ClickHouse/ClickHouse/pull/46044) ([flynn](https://github.com/ucasfl)). +* Improve format `JSONColumns` when result is empty. Closes [#46024](https://github.com/ClickHouse/ClickHouse/issues/46024). [#46053](https://github.com/ClickHouse/ClickHouse/pull/46053) ([flynn](https://github.com/ucasfl)). +* - MultiVersion: replace lock_guard to atomic op. [#46057](https://github.com/ClickHouse/ClickHouse/pull/46057) ([Konstantin Morozov](https://github.com/k-morozov)). +* Add reference implementation for SipHash128. [#46065](https://github.com/ClickHouse/ClickHouse/pull/46065) ([Salvatore Mesoraca](https://github.com/aiven-sal)). +* Add new metric to record allocations times and bytes using mmap. [#46068](https://github.com/ClickHouse/ClickHouse/pull/46068) ([李扬](https://github.com/taiyang-li)). +* Currently for functions like `leftPad`, `rightPad`, `leftPadUTF8`, `rightPadUTF8`, the second argument `length` must be UInt8|16|32|64|128|256. Which is too strict for clickhouse users, besides, it is not consistent with other similar functions like `arrayResize`, `substring` and so on. [#46103](https://github.com/ClickHouse/ClickHouse/pull/46103) ([李扬](https://github.com/taiyang-li)). +* Update CapnProto to v0.10.3 to avoid CVE-2022-46149 ### Documentation entry for user-facing changes. [#46139](https://github.com/ClickHouse/ClickHouse/pull/46139) ([Mallik Hassan](https://github.com/SadiHassan)). +* Fix assertion in the `welchTTest` function in debug build when the resulting statistics is NaN. Unified the behavior with other similar functions. Change the behavior of `studentTTest` to return NaN instead of throwing an exception because the previous behavior was inconvenient. This closes [#41176](https://github.com/ClickHouse/ClickHouse/issues/41176) This closes [#42162](https://github.com/ClickHouse/ClickHouse/issues/42162). [#46141](https://github.com/ClickHouse/ClickHouse/pull/46141) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* More convenient usage of big integers and ORDER BY WITH FILL. Allow using plain integers for start and end points in WITH FILL when ORDER BY big (128-bit and 256-bit) integers. Fix the wrong result for big integers with negative start or end points. This closes [#16733](https://github.com/ClickHouse/ClickHouse/issues/16733). [#46152](https://github.com/ClickHouse/ClickHouse/pull/46152) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add parts, active_parts and total_marks columns to system.tables on [issue](https://github.com/ClickHouse/ClickHouse/issues/44336). [#46161](https://github.com/ClickHouse/ClickHouse/pull/46161) ([attack204](https://github.com/attack204)). +* Functions "multi[Fuzzy]Match(Any|AnyIndex|AllIndices}" now reject regexes which will likely evaluate very slowly in vectorscan. [#46167](https://github.com/ClickHouse/ClickHouse/pull/46167) ([Robert Schulze](https://github.com/rschu1ze)). +* When `insert_null_as_default` is enabled and column doesn't have defined default value, the default of column type will be used. Also this PR fixes using default values on nulls in case of LowCardinality columns. [#46171](https://github.com/ClickHouse/ClickHouse/pull/46171) ([Kruglov Pavel](https://github.com/Avogar)). +* Prefer explicitly defined access keys for S3 clients. If `use_environment_credentials` is set to `true`, and the user has provided the access key through query or config, they will be used instead of the ones from the environment variable. [#46191](https://github.com/ClickHouse/ClickHouse/pull/46191) ([Antonio Andelic](https://github.com/antonio2368)). +* Concurrent merges are scheduled using round-robin by default to ensure fair and starvation-free operation. Previously in heavily overloaded shards, big merges could possibly be starved by smaller merges due to the use of strict priority scheduling. Added `background_merges_mutations_scheduling_policy` server config option to select scheduling algorithm (`round_robin` or `shortest_task_first`). [#46247](https://github.com/ClickHouse/ClickHouse/pull/46247) ([Sergei Trifonov](https://github.com/serxa)). +* Extend setting `input_format_null_as_default` for more formats. Fix setting `input_format_defaults_for_omitted_fields` with Native and TSKV formats. [#46284](https://github.com/ClickHouse/ClickHouse/pull/46284) ([Kruglov Pavel](https://github.com/Avogar)). +* - Add an alias "DATE_FORMAT()" for function "formatDateTime()" to improve compatibility with MySQL's SQL dialect, extend function`formatDateTime()` with substitutions "a", "b", "c", "h", "i", "k", "l" "r", "s", "W". ### Documentation entry for user-facing changes User-readable short description: `DATE_FORMAT` is an alias of `formatDateTime`. Formats a Time according to the given Format string. Format is a constant expression, so you cannot have multiple formats for a single result column. (Provide link to [formatDateTime](https://clickhouse.com/docs/en/sql-reference/functions/date-time-functions/#formatdatetime)). [#46302](https://github.com/ClickHouse/ClickHouse/pull/46302) ([Jake Bamrah](https://github.com/JakeBamrah)). +* not for changelog - part of [#42648](https://github.com/ClickHouse/ClickHouse/issues/42648). [#46306](https://github.com/ClickHouse/ClickHouse/pull/46306) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Enable retries for INSERT by default in case of ZooKeeper session loss. We already use it in production. [#46308](https://github.com/ClickHouse/ClickHouse/pull/46308) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add `ProfileEvents` and `CurrentMetrics` about the callback tasks for parallel replicas (`s3Cluster` and `MergeTree` tables). [#46313](https://github.com/ClickHouse/ClickHouse/pull/46313) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add support for `DELETE` and `UPDATE` for tables using `KeeperMap` storage engine. [#46330](https://github.com/ClickHouse/ClickHouse/pull/46330) ([Antonio Andelic](https://github.com/antonio2368)). +* Update unixodbc to v2.3.11 to mitigate CVE-2011-1145 ### Documentation entry for user-facing changes. [#46363](https://github.com/ClickHouse/ClickHouse/pull/46363) ([Mallik Hassan](https://github.com/SadiHassan)). +* - Apply `ALTER TABLE table_name ON CLUSTER cluster MOVE PARTITION|PART partition_expr TO DISK|VOLUME 'disk_name'` to all replicas. Because `ALTER TABLE t MOVE` is not replicated. [#46402](https://github.com/ClickHouse/ClickHouse/pull/46402) ([lizhuoyu5](https://github.com/lzydmxy)). +* Allow writing RENAME queries with query parameters. Resolves [#45778](https://github.com/ClickHouse/ClickHouse/issues/45778). [#46407](https://github.com/ClickHouse/ClickHouse/pull/46407) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fix parameterized SELECT queries with REPLACE transformer. Resolves [#33002](https://github.com/ClickHouse/ClickHouse/issues/33002). [#46420](https://github.com/ClickHouse/ClickHouse/pull/46420) ([Nikolay Degterinsky](https://github.com/evillique)). +* Exclude the internal database used for temporary/external tables from the calculation of asynchronous metric "NumberOfDatabases". This makes the behavior consistent with system table "system.databases". [#46435](https://github.com/ClickHouse/ClickHouse/pull/46435) ([Robert Schulze](https://github.com/rschu1ze)). +* Added `last_exception_time` column into distribution_queue table. [#46564](https://github.com/ClickHouse/ClickHouse/pull/46564) ([Aleksandr](https://github.com/AVMusorin)). +* Support for IN clause in parameterized views Implementation: * In case of parameterized views, the IN clause cannot be evaluated as constant expression during CREATE VIEW, added a check to ignore this step in case of parameterized view. * If parmeters are not in IN clause, we continue to evaluate it as constant expression. [#46583](https://github.com/ClickHouse/ClickHouse/pull/46583) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Do not load named collections on server startup (load them on first access instead). [#46607](https://github.com/ClickHouse/ClickHouse/pull/46607) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Add separate access type `SHOW_NAMED_COLLECTIONS_SECRETS` to allow to see named collections and their keys, but making values hidden. Nevertheless, access type `SHOW NAMED COLLECTIONS` is still required. [#46667](https://github.com/ClickHouse/ClickHouse/pull/46667) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Hide arguments of custom disk merge tree setting. [#46670](https://github.com/ClickHouse/ClickHouse/pull/46670) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Ask for the password in clickhouse-client interactively in a case when the empty password is wrong. Closes [#46702](https://github.com/ClickHouse/ClickHouse/issues/46702). [#46730](https://github.com/ClickHouse/ClickHouse/pull/46730) ([Nikolay Degterinsky](https://github.com/evillique)). +* Backward compatibility for T64 codec support for IPv4. [#46747](https://github.com/ClickHouse/ClickHouse/pull/46747) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Allow to fallback from asynchronous insert to synchronous in case of large amount of data (more than `async_insert_max_data_size` bytes in single insert). [#46753](https://github.com/ClickHouse/ClickHouse/pull/46753) ([Anton Popov](https://github.com/CurtizJ)). + +#### Bug Fix +* Fix wiping sensitive info in logs. [#45603](https://github.com/ClickHouse/ClickHouse/pull/45603) ([Vitaly Baranov](https://github.com/vitlibar)). +* There is a check in format "time_check() || ptr ? ptr->finished() : data->is_finished()". Operator "||" will be executed before operator "?", but expected that there should be separated time and ptr checks. Also it's unexpected to run "ptr->finished()" in case of nullptr, but with current expression it's possible. [#46054](https://github.com/ClickHouse/ClickHouse/pull/46054) ([Alexey Perevyshin](https://github.com/alexX512)). + +#### Build/Testing/Packaging Improvement +* Allow to randomize merge tree settings in tests. [#38983](https://github.com/ClickHouse/ClickHouse/pull/38983) ([Anton Popov](https://github.com/CurtizJ)). +* Enable the HDFS support in PowerPC and which helps to fixes the following functional tests 02113_hdfs_assert.sh, 02244_hdfs_cluster.sql and 02368_cancel_write_into_hdfs.sh. [#44949](https://github.com/ClickHouse/ClickHouse/pull/44949) ([MeenaRenganathan22](https://github.com/MeenaRenganathan22)). +* Add systemd.service file for clickhouse-keeper. Fixes [#44293](https://github.com/ClickHouse/ClickHouse/issues/44293). [#45568](https://github.com/ClickHouse/ClickHouse/pull/45568) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* ClickHouse's fork of poco was moved from "contrib/" to "base/poco/". [#46075](https://github.com/ClickHouse/ClickHouse/pull/46075) ([Robert Schulze](https://github.com/rschu1ze)). +* Remove excessive license notices from preciseExp10.cpp. [#46163](https://github.com/ClickHouse/ClickHouse/pull/46163) ([DimasKovas](https://github.com/DimasKovas)). +* Add an option for `clickhouse-watchdog` to restart the child process. This does not make a lot of use. [#46312](https://github.com/ClickHouse/ClickHouse/pull/46312) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Get rid of unnecessary build for standalone clickhouse-keeper. [#46367](https://github.com/ClickHouse/ClickHouse/pull/46367) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* If the environment variable `CLICKHOUSE_DOCKER_RESTART_ON_EXIT` is set to 1, the Docker container will run `clickhouse-server` as a child instead of the first process, and restart it when it exited. [#46391](https://github.com/ClickHouse/ClickHouse/pull/46391) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* - Fix Systemd service file. [#46461](https://github.com/ClickHouse/ClickHouse/pull/46461) ([SuperDJY](https://github.com/cmsxbc)). +* Some time ago the ccache compression was changed to `zst`, but `gz` archives are downloaded by default. It fixes it by prioritizing zst archive. [#46490](https://github.com/ClickHouse/ClickHouse/pull/46490) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Raised the minimum Clang version needed to build ClickHouse from 12 to 15. [#46710](https://github.com/ClickHouse/ClickHouse/pull/46710) ([Robert Schulze](https://github.com/rschu1ze)). + +#### Bug Fix (user-visible misbehavior in official stable or prestable release) + +* Flush data exactly by `rabbitmq_flush_interval_ms` or by `rabbitmq_max_block_size` in `StorageRabbitMQ`. Closes [#42389](https://github.com/ClickHouse/ClickHouse/issues/42389). Closes [#45160](https://github.com/ClickHouse/ClickHouse/issues/45160). [#44404](https://github.com/ClickHouse/ClickHouse/pull/44404) ([Kseniia Sumarokova](https://github.com/kssenii)). +* - Use PODArray to render in sparkBar function, so we can control the memory usage. Close [#44467](https://github.com/ClickHouse/ClickHouse/issues/44467). [#44489](https://github.com/ClickHouse/ClickHouse/pull/44489) ([Duc Canh Le](https://github.com/canhld94)). +* Fix functions (quantilesExactExclusive, quantilesExactInclusive) return unsorted array element. [#45379](https://github.com/ClickHouse/ClickHouse/pull/45379) ([wujunfu](https://github.com/wujunfu)). +* Fix uncaught exception in HTTPHandler when open telemetry is enabled. [#45456](https://github.com/ClickHouse/ClickHouse/pull/45456) ([Frank Chen](https://github.com/FrankChen021)). +* Don't infer Dates from 8 digit numbers. It could lead to wrong data to be read. [#45581](https://github.com/ClickHouse/ClickHouse/pull/45581) ([Kruglov Pavel](https://github.com/Avogar)). +* Fixes to correctly use `odbc_bridge_use_connection_pooling` setting. [#45591](https://github.com/ClickHouse/ClickHouse/pull/45591) ([Bharat Nallan](https://github.com/bharatnc)). +* when the callback in the cache is called, it is possible that this cache is destructed. To keep it safe, we capture members by value. It's also safe for task schedule because it will be deactivated before storage is destroyed. Resolve [#45548](https://github.com/ClickHouse/ClickHouse/issues/45548). [#45601](https://github.com/ClickHouse/ClickHouse/pull/45601) ([Han Fei](https://github.com/hanfei1991)). +* - Fix data corruption when codecs Delta or DoubleDelta are combined with codec Gorilla. [#45615](https://github.com/ClickHouse/ClickHouse/pull/45615) ([Robert Schulze](https://github.com/rschu1ze)). +* Correctly check types when using N-gram bloom filter index to avoid invalid reads. [#45617](https://github.com/ClickHouse/ClickHouse/pull/45617) ([Antonio Andelic](https://github.com/antonio2368)). +* A couple of seg faults have been reported around `c-ares`. All of the recent stack traces observed fail on inserting into `std::unodered_set<>`. I believe I have found the root cause of this, it seems to be unprocessed queries. Prior to this PR, CH calls `poll` to wait on the file descriptors in the `c-ares` channel. According to the [poll docs](https://man7.org/linux/man-pages/man2/poll.2.html), a negative return value means an error has ocurred. Because of this, we would abort the execution and return failure. The problem is that `poll` will also return a negative value if a system interrupt occurs. A system interrupt does not mean the processing has failed or ended, but we would abort it anyways because we were checking for negative values. Once the execution is aborted, the whole stack is destroyed, which includes the `std::unordered_set` passed to the `void *` parameter of the c-ares callback. Once c-ares completed the request, the callback would be invoked and would access an invalid memory address causing a segfault. [#45629](https://github.com/ClickHouse/ClickHouse/pull/45629) ([Arthur Passos](https://github.com/arthurpassos)). +* Fix key description when encountering duplicate primary keys. This can happen in projections. See [#45590](https://github.com/ClickHouse/ClickHouse/issues/45590) for details. [#45686](https://github.com/ClickHouse/ClickHouse/pull/45686) ([Amos Bird](https://github.com/amosbird)). +* Set compression method and level for backup Closes [#45690](https://github.com/ClickHouse/ClickHouse/issues/45690). [#45737](https://github.com/ClickHouse/ClickHouse/pull/45737) ([Pradeep Chhetri](https://github.com/chhetripradeep)). +* Should use `select_query_typed.limitByOffset()` instead of `select_query_typed.limitOffset()`. [#45817](https://github.com/ClickHouse/ClickHouse/pull/45817) ([刘陶峰](https://github.com/taofengliu)). +* When use experimental analyzer, queries like `SELECT number FROM numbers(100) LIMIT 10 OFFSET 10;` get wrong results (empty result for this sql). That is caused by an unnecessary offset step added by planner. [#45822](https://github.com/ClickHouse/ClickHouse/pull/45822) ([刘陶峰](https://github.com/taofengliu)). +* Backward compatibility - allow implicit narrowing conversion from UInt64 to IPv4 - required for "INSERT ... VALUES ..." expression. [#45865](https://github.com/ClickHouse/ClickHouse/pull/45865) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Bugfix IPv6 parser for mixed ip4 address with missed first octet (like `::.1.2.3`). [#45871](https://github.com/ClickHouse/ClickHouse/pull/45871) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Add the `query_kind` column to the `system.processes` table and the `SHOW PROCESSLIST` query. Remove duplicate code. It fixes a bug: the global configuration parameter `max_concurrent_select_queries` was not respected to queries with `INTERSECT` or `EXCEPT` chains. [#45872](https://github.com/ClickHouse/ClickHouse/pull/45872) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix crash in a function `stochasticLinearRegression`. Found by WingFuzz. [#45985](https://github.com/ClickHouse/ClickHouse/pull/45985) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix crash in `SELECT` queries with `INTERSECT` and `EXCEPT` modifiers that read data from tables with enabled sparse columns (controlled by setting `ratio_of_defaults_for_sparse_serialization). [#45987](https://github.com/ClickHouse/ClickHouse/pull/45987) ([Anton Popov](https://github.com/CurtizJ)). +* * Fix read in order optimization for DESC sorting with FINAL, close [#45815](https://github.com/ClickHouse/ClickHouse/issues/45815). [#46009](https://github.com/ClickHouse/ClickHouse/pull/46009) ([Vladimir C](https://github.com/vdimir)). +* Fix reading of non existing nested columns with multiple level in compact parts. [#46045](https://github.com/ClickHouse/ClickHouse/pull/46045) ([Azat Khuzhin](https://github.com/azat)). +* Fix elapsed column in system.processes (10x error). [#46047](https://github.com/ClickHouse/ClickHouse/pull/46047) ([Azat Khuzhin](https://github.com/azat)). +* Follow-up fix for Replace domain IP types (IPv4, IPv6) with native https://github.com/ClickHouse/ClickHouse/pull/43221. [#46087](https://github.com/ClickHouse/ClickHouse/pull/46087) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Fix environment variable substitution in the configuration when a parameter already has a value. This closes [#46131](https://github.com/ClickHouse/ClickHouse/issues/46131). This closes [#9547](https://github.com/ClickHouse/ClickHouse/issues/9547). [#46144](https://github.com/ClickHouse/ClickHouse/pull/46144) ([pufit](https://github.com/pufit)). +* Fix incorrect predicate push down with grouping sets. Closes [#45947](https://github.com/ClickHouse/ClickHouse/issues/45947). [#46151](https://github.com/ClickHouse/ClickHouse/pull/46151) ([flynn](https://github.com/ucasfl)). +* Fix possible pipeline stuck error on `fulls_sorting_join` with constant keys. [#46175](https://github.com/ClickHouse/ClickHouse/pull/46175) ([Vladimir C](https://github.com/vdimir)). +* Never rewrite tuple functions as literals during formatting to avoid incorrect results. [#46232](https://github.com/ClickHouse/ClickHouse/pull/46232) ([Salvatore Mesoraca](https://github.com/aiven-sal)). +* Fix possible out of bounds error while reading LowCardinality(Nullable) in Arrow format. [#46270](https://github.com/ClickHouse/ClickHouse/pull/46270) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix `SYSTEM UNFREEZE` queries failing with the exception `CANNOT_PARSE_INPUT_ASSERTION_FAILED`. [#46325](https://github.com/ClickHouse/ClickHouse/pull/46325) ([Aleksei Filatov](https://github.com/aalexfvk)). +* Fix possible crash which can be caused by an integer overflow while deserializing aggregating state of a function that stores HashTable. [#46349](https://github.com/ClickHouse/ClickHouse/pull/46349) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix possible `LOGICAL_ERROR` in asynchronous inserts with invalid data sent in format `VALUES`. [#46350](https://github.com/ClickHouse/ClickHouse/pull/46350) ([Anton Popov](https://github.com/CurtizJ)). +* Fixed a LOGICAL_ERROR on an attempt to execute `ALTER ... MOVE PART ... TO TABLE`. This type of query was never actually supported. [#46359](https://github.com/ClickHouse/ClickHouse/pull/46359) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix s3Cluster schema inference in parallel distributed insert select when `parallel_distributed_insert_select` is enabled. [#46381](https://github.com/ClickHouse/ClickHouse/pull/46381) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix queries like `ALTER TABLE ... UPDATE nested.arr1 = nested.arr2 ...`, where `arr1` and `arr2` are fields of the same `Nested` column. [#46387](https://github.com/ClickHouse/ClickHouse/pull/46387) ([Anton Popov](https://github.com/CurtizJ)). +* Scheduler may fail to schedule a task. If it happens, the whole MulityPartUpload should be aborted and `UploadHelper` must wait for already scheduled tasks. [#46451](https://github.com/ClickHouse/ClickHouse/pull/46451) ([Dmitry Novik](https://github.com/novikd)). +* Fix PREWHERE for Merge with different default types (fixes some `NOT_FOUND_COLUMN_IN_BLOCK` when the default type for the column differs, also allow `PREWHERE` when the type of column is the same across tables, and prohibit it, only if it differs). [#46454](https://github.com/ClickHouse/ClickHouse/pull/46454) ([Azat Khuzhin](https://github.com/azat)). +* Fix a crash that could happen when constant values are used in `ORDER BY`. Fixes [#46466](https://github.com/ClickHouse/ClickHouse/issues/46466). [#46493](https://github.com/ClickHouse/ClickHouse/pull/46493) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Do not throw exception if `disk` setting was specified on query level, but `storage_policy` was specified in config merge tree settings section. `disk` will override setting from config. [#46533](https://github.com/ClickHouse/ClickHouse/pull/46533) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix an invalid processing of constant `LowCardinality` argument in function `arrayMap`. This bug could lead to a segfault in release, and logical error `Bad cast` in debug build. [#46569](https://github.com/ClickHouse/ClickHouse/pull/46569) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* fixes [#46557](https://github.com/ClickHouse/ClickHouse/issues/46557). [#46611](https://github.com/ClickHouse/ClickHouse/pull/46611) ([Alexander Gololobov](https://github.com/davenger)). +* Fix endless restarts of clickhouse-server systemd unit if server cannot start within 1m30sec (Disable timeout logic for starting clickhouse-server from systemd service). [#46613](https://github.com/ClickHouse/ClickHouse/pull/46613) ([Azat Khuzhin](https://github.com/azat)). +* Allocated during asynchronous inserts memory buffers were deallocated in the global context and MemoryTracker counters for corresponding user and query were not updated correctly. That led to false positive OOM exceptions. [#46622](https://github.com/ClickHouse/ClickHouse/pull/46622) ([Dmitry Novik](https://github.com/novikd)). +* Fix totals and extremes with constants in clickhouse-local. Closes [#43831](https://github.com/ClickHouse/ClickHouse/issues/43831). [#46669](https://github.com/ClickHouse/ClickHouse/pull/46669) ([Kruglov Pavel](https://github.com/Avogar)). +* Handle `input_format_null_as_default` for nested types. [#46725](https://github.com/ClickHouse/ClickHouse/pull/46725) ([Azat Khuzhin](https://github.com/azat)). + +#### Bug-fix + +* Updated to not clear on_expression from table_join as its used by future analyze runs resolves [#45185](https://github.com/ClickHouse/ClickHouse/issues/45185). [#46487](https://github.com/ClickHouse/ClickHouse/pull/46487) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). + +#### Build Improvement + +* Fixed endian issue in snappy library for s390x. [#45670](https://github.com/ClickHouse/ClickHouse/pull/45670) ([Harry Lee](https://github.com/HarryLeeIBM)). +* Fixed endian issue in CityHash for s390x. [#46096](https://github.com/ClickHouse/ClickHouse/pull/46096) ([Harry Lee](https://github.com/HarryLeeIBM)). +* Fixed Functional Test 00900_long_parquet for S390x. [#46181](https://github.com/ClickHouse/ClickHouse/pull/46181) ([Sanjam Panda](https://github.com/saitama951)). +* Fixed endian issues in SQL hash functions on s390x architectures. [#46495](https://github.com/ClickHouse/ClickHouse/pull/46495) ([Harry Lee](https://github.com/HarryLeeIBM)). + +#### NO CL ENTRY + +* NO CL ENTRY: 'Revert "Add check for running workflows to merge_pr.py"'. [#45802](https://github.com/ClickHouse/ClickHouse/pull/45802) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* NO CL ENTRY: 'Revert "Improve behaviour of conversion into Date for boundary value 65535"'. [#46007](https://github.com/ClickHouse/ClickHouse/pull/46007) ([Antonio Andelic](https://github.com/antonio2368)). +* NO CL ENTRY: 'Revert "Allow vertical merges from compact to wide parts"'. [#46236](https://github.com/ClickHouse/ClickHouse/pull/46236) ([Anton Popov](https://github.com/CurtizJ)). +* NO CL ENTRY: 'Revert "Beter diagnostics from http in clickhouse-test"'. [#46301](https://github.com/ClickHouse/ClickHouse/pull/46301) ([Alexander Tokmakov](https://github.com/tavplubix)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Revert "Merge pull request [#38212](https://github.com/ClickHouse/ClickHouse/issues/38212) from azat/no-stress" [#38750](https://github.com/ClickHouse/ClickHouse/pull/38750) ([Azat Khuzhin](https://github.com/azat)). +* More interesting settings for Stress Tests [#41534](https://github.com/ClickHouse/ClickHouse/pull/41534) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Attempt to fix 'Local: No offset stored message' from Kafka [#42391](https://github.com/ClickHouse/ClickHouse/pull/42391) ([filimonov](https://github.com/filimonov)). +* Analyzer SETTINGS push down [#42976](https://github.com/ClickHouse/ClickHouse/pull/42976) ([Maksim Kita](https://github.com/kitaisreal)). +* Simply filesystem helpers to check is-readable/writable/executable [#43405](https://github.com/ClickHouse/ClickHouse/pull/43405) ([Azat Khuzhin](https://github.com/azat)). +* Add CPU flamegraphs for perf tests [#43529](https://github.com/ClickHouse/ClickHouse/pull/43529) ([Azat Khuzhin](https://github.com/azat)). +* More robust CI parsers [#44226](https://github.com/ClickHouse/ClickHouse/pull/44226) ([Azat Khuzhin](https://github.com/azat)). +* Fix error message for a broken distributed batches ("While sending batch") [#44907](https://github.com/ClickHouse/ClickHouse/pull/44907) ([Azat Khuzhin](https://github.com/azat)). +* Catch exceptions in BackgroundSchedulePool [#44923](https://github.com/ClickHouse/ClickHouse/pull/44923) ([Azat Khuzhin](https://github.com/azat)). +* Add encryption support to OpenSSL [#45258](https://github.com/ClickHouse/ClickHouse/pull/45258) ([Boris Kuschel](https://github.com/bkuschel)). +* Revert code in TreeRewriter for proper column order for UNION [#45282](https://github.com/ClickHouse/ClickHouse/pull/45282) ([Azat Khuzhin](https://github.com/azat)). +* Fix no shared id during drop for the fourth time [#45363](https://github.com/ClickHouse/ClickHouse/pull/45363) ([alesapin](https://github.com/alesapin)). +* HashedDictionary sharded fix nullable values [#45396](https://github.com/ClickHouse/ClickHouse/pull/45396) ([Maksim Kita](https://github.com/kitaisreal)). +* Another attempt to fix automerge, or at least to have debug footprint [#45476](https://github.com/ClickHouse/ClickHouse/pull/45476) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Simplify binary locating in clickhouse-test [#45484](https://github.com/ClickHouse/ClickHouse/pull/45484) ([Azat Khuzhin](https://github.com/azat)). +* Fix race in NuRaft's asio listener [#45511](https://github.com/ClickHouse/ClickHouse/pull/45511) ([Antonio Andelic](https://github.com/antonio2368)). +* Make ColumnNode::isEqualImpl more strict [#45518](https://github.com/ClickHouse/ClickHouse/pull/45518) ([Dmitry Novik](https://github.com/novikd)). +* Fix krb5 for OpenSSL [#45519](https://github.com/ClickHouse/ClickHouse/pull/45519) ([Boris Kuschel](https://github.com/bkuschel)). +* s390x build support [#45520](https://github.com/ClickHouse/ClickHouse/pull/45520) ([Suzy Wang](https://github.com/SuzyWangIBMer)). +* Better formatting for exception messages 2 [#45527](https://github.com/ClickHouse/ClickHouse/pull/45527) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Try to fix test `test_storage_s3/test.py::test_wrong_s3_syntax` (race in `StorageS3`) [#45529](https://github.com/ClickHouse/ClickHouse/pull/45529) ([Anton Popov](https://github.com/CurtizJ)). +* Analyzer add test for CREATE TABLE AS SELECT [#45533](https://github.com/ClickHouse/ClickHouse/pull/45533) ([Maksim Kita](https://github.com/kitaisreal)). +* LowCardinality insert fix [#45585](https://github.com/ClickHouse/ClickHouse/pull/45585) ([Maksim Kita](https://github.com/kitaisreal)). +* Update 02482_load_parts_refcounts.sh [#45604](https://github.com/ClickHouse/ClickHouse/pull/45604) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Extend assertion in buildPushingToViewsChain() to respect is_detached [#45610](https://github.com/ClickHouse/ClickHouse/pull/45610) ([Azat Khuzhin](https://github.com/azat)). +* Remove useless code [#45612](https://github.com/ClickHouse/ClickHouse/pull/45612) ([Anton Popov](https://github.com/CurtizJ)). +* Improve "at least part X is missing" error message [#45613](https://github.com/ClickHouse/ClickHouse/pull/45613) ([Azat Khuzhin](https://github.com/azat)). +* Refactoring of code near merge tree parts [#45619](https://github.com/ClickHouse/ClickHouse/pull/45619) ([Anton Popov](https://github.com/CurtizJ)). +* Update version after release [#45634](https://github.com/ClickHouse/ClickHouse/pull/45634) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Update version_date.tsv and changelogs after v23.1.1.3077-stable [#45635](https://github.com/ClickHouse/ClickHouse/pull/45635) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Trim refs/tags/ from GITHUB_TAG in release workflow [#45636](https://github.com/ClickHouse/ClickHouse/pull/45636) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Update version_date.tsv and changelogs after v22.10.7.13-stable [#45637](https://github.com/ClickHouse/ClickHouse/pull/45637) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Improve release script [#45657](https://github.com/ClickHouse/ClickHouse/pull/45657) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Suppress TOO_MANY_PARTS in BC check [#45691](https://github.com/ClickHouse/ClickHouse/pull/45691) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix build [#45692](https://github.com/ClickHouse/ClickHouse/pull/45692) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Add recordings for 23.1 and Tel Aviv [#45695](https://github.com/ClickHouse/ClickHouse/pull/45695) ([Tyler Hannan](https://github.com/tylerhannan)). +* Integrate IO scheduler with buffers for remote reads and writes [#45711](https://github.com/ClickHouse/ClickHouse/pull/45711) ([Sergei Trifonov](https://github.com/serxa)). +* Add missing SYSTEM FLUSH LOGS for clickhouse-test [#45713](https://github.com/ClickHouse/ClickHouse/pull/45713) ([Azat Khuzhin](https://github.com/azat)). +* tests: add missing allow_suspicious_codecs in 02536_delta_gorilla_corruption (fixes fasttest) [#45735](https://github.com/ClickHouse/ClickHouse/pull/45735) ([Azat Khuzhin](https://github.com/azat)). +* Improve MEMERY_LIMIT_EXCEEDED exception message [#45743](https://github.com/ClickHouse/ClickHouse/pull/45743) ([Dmitry Novik](https://github.com/novikd)). +* Fix style and typo [#45744](https://github.com/ClickHouse/ClickHouse/pull/45744) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Update version_date.tsv and changelogs after v22.8.13.20-lts [#45749](https://github.com/ClickHouse/ClickHouse/pull/45749) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Update version_date.tsv and changelogs after v22.11.5.15-stable [#45754](https://github.com/ClickHouse/ClickHouse/pull/45754) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Update version_date.tsv and changelogs after v23.1.2.9-stable [#45755](https://github.com/ClickHouse/ClickHouse/pull/45755) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Docs: Fix formatting [#45756](https://github.com/ClickHouse/ClickHouse/pull/45756) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix typo + add boringssl comment [#45757](https://github.com/ClickHouse/ClickHouse/pull/45757) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix flaky test, @alesapin please help! [#45759](https://github.com/ClickHouse/ClickHouse/pull/45759) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove trash [#45760](https://github.com/ClickHouse/ClickHouse/pull/45760) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix Flaky Check [#45765](https://github.com/ClickHouse/ClickHouse/pull/45765) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Update dictionary.md [#45775](https://github.com/ClickHouse/ClickHouse/pull/45775) ([Derek Chia](https://github.com/DerekChia)). +* Added a test for multiple ignore subqueries with nested select [#45784](https://github.com/ClickHouse/ClickHouse/pull/45784) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Support DELETE ON CLUSTER [#45786](https://github.com/ClickHouse/ClickHouse/pull/45786) ([Alexander Gololobov](https://github.com/davenger)). +* outdated parts are loading async, need to wait them after attach [#45787](https://github.com/ClickHouse/ClickHouse/pull/45787) ([Sema Checherinda](https://github.com/CheSema)). +* Fix bug in tables drop which can lead to potential query hung [#45791](https://github.com/ClickHouse/ClickHouse/pull/45791) ([alesapin](https://github.com/alesapin)). +* Fix race condition on a part check cancellation [#45793](https://github.com/ClickHouse/ClickHouse/pull/45793) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Do not restrict count() query to 1 thread in isStorageTouchedByMutations() [#45794](https://github.com/ClickHouse/ClickHouse/pull/45794) ([Alexander Gololobov](https://github.com/davenger)). +* Fix test `test_azure_blob_storage_zero_copy_replication ` (memory leak in azure sdk) [#45796](https://github.com/ClickHouse/ClickHouse/pull/45796) ([Anton Popov](https://github.com/CurtizJ)). +* Add check for running workflows to merge_pr.py [#45801](https://github.com/ClickHouse/ClickHouse/pull/45801) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Add check for running workflows to merge_pr.py [#45803](https://github.com/ClickHouse/ClickHouse/pull/45803) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix flaky test `02531_two_level_aggregation_bug.sh` [#45806](https://github.com/ClickHouse/ClickHouse/pull/45806) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Minor doc follow-up to [#45382](https://github.com/ClickHouse/ClickHouse/issues/45382) [#45816](https://github.com/ClickHouse/ClickHouse/pull/45816) ([Robert Schulze](https://github.com/rschu1ze)). +* Get rid of progress timestamps in release publishing [#45818](https://github.com/ClickHouse/ClickHouse/pull/45818) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Make separate DROP_PART log entry type [#45821](https://github.com/ClickHouse/ClickHouse/pull/45821) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Do not cancel created prs [#45823](https://github.com/ClickHouse/ClickHouse/pull/45823) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix ASTQualifiedAsterisk cloning [#45829](https://github.com/ClickHouse/ClickHouse/pull/45829) ([Raúl Marín](https://github.com/Algunenano)). +* Update 02540_duplicate_primary_key.sql [#45846](https://github.com/ClickHouse/ClickHouse/pull/45846) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Proper includes for ConnectionTimeoutsContext.h [#45848](https://github.com/ClickHouse/ClickHouse/pull/45848) ([Raúl Marín](https://github.com/Algunenano)). +* Fix minor mistake after refactoring [#45857](https://github.com/ClickHouse/ClickHouse/pull/45857) ([Anton Popov](https://github.com/CurtizJ)). +* Fix flaky ttl_replicated test (remove sleep) [#45858](https://github.com/ClickHouse/ClickHouse/pull/45858) ([alesapin](https://github.com/alesapin)). +* Add some context to stress test failures [#45869](https://github.com/ClickHouse/ClickHouse/pull/45869) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix clang-tidy error in Keeper `Changelog` [#45888](https://github.com/ClickHouse/ClickHouse/pull/45888) ([Antonio Andelic](https://github.com/antonio2368)). +* do not block merges when old parts are droping in drop queries [#45889](https://github.com/ClickHouse/ClickHouse/pull/45889) ([Sema Checherinda](https://github.com/CheSema)). +* do not run wal on remote disks [#45907](https://github.com/ClickHouse/ClickHouse/pull/45907) ([Sema Checherinda](https://github.com/CheSema)). +* Dashboard improvements [#45935](https://github.com/ClickHouse/ClickHouse/pull/45935) ([Kevin Zhang](https://github.com/Kinzeng)). +* Better context for stress tests failures [#45937](https://github.com/ClickHouse/ClickHouse/pull/45937) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix IO URing [#45940](https://github.com/ClickHouse/ClickHouse/pull/45940) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Docs: Remove obsolete query result cache page [#45958](https://github.com/ClickHouse/ClickHouse/pull/45958) ([Robert Schulze](https://github.com/rschu1ze)). +* Add necessary dependency for sanitizers [#45959](https://github.com/ClickHouse/ClickHouse/pull/45959) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Update AggregateFunctionSparkbar [#45961](https://github.com/ClickHouse/ClickHouse/pull/45961) ([Vladimir C](https://github.com/vdimir)). +* Update cherrypick_pr to get mergeable state [#45972](https://github.com/ClickHouse/ClickHouse/pull/45972) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Add "final" specifier to some classes [#45973](https://github.com/ClickHouse/ClickHouse/pull/45973) ([Robert Schulze](https://github.com/rschu1ze)). +* Improve local running of cherry_pick.py [#45980](https://github.com/ClickHouse/ClickHouse/pull/45980) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Properly detect changes in Rust code and recompile Rust libraries [#45981](https://github.com/ClickHouse/ClickHouse/pull/45981) ([Azat Khuzhin](https://github.com/azat)). +* Avoid leaving symbols leftovers for query fuzzy search [#45983](https://github.com/ClickHouse/ClickHouse/pull/45983) ([Azat Khuzhin](https://github.com/azat)). +* Fix basic functionality with type `Object` and new analyzer [#45992](https://github.com/ClickHouse/ClickHouse/pull/45992) ([Anton Popov](https://github.com/CurtizJ)). +* Check dynamic columns of part before its commit [#45995](https://github.com/ClickHouse/ClickHouse/pull/45995) ([Anton Popov](https://github.com/CurtizJ)). +* Minor doc fixes for inverted index [#46004](https://github.com/ClickHouse/ClickHouse/pull/46004) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix terribly broken, fragile and potentially cyclic linking [#46006](https://github.com/ClickHouse/ClickHouse/pull/46006) ([Robert Schulze](https://github.com/rschu1ze)). +* Docs: Mention time zone randomization [#46008](https://github.com/ClickHouse/ClickHouse/pull/46008) ([Robert Schulze](https://github.com/rschu1ze)). +* Analyzer limit offset test rename [#46011](https://github.com/ClickHouse/ClickHouse/pull/46011) ([Maksim Kita](https://github.com/kitaisreal)). +* Update version_date.tsv and changelogs after v23.1.3.5-stable [#46012](https://github.com/ClickHouse/ClickHouse/pull/46012) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Update sorting properties after reading in order applied [#46014](https://github.com/ClickHouse/ClickHouse/pull/46014) ([Igor Nikonov](https://github.com/devcrafter)). +* Fix disabled by mistake hung check [#46020](https://github.com/ClickHouse/ClickHouse/pull/46020) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix memory leak at creation of curl connection in azure sdk [#46025](https://github.com/ClickHouse/ClickHouse/pull/46025) ([Anton Popov](https://github.com/CurtizJ)). +* Add checks for installable packages to workflows [#46036](https://github.com/ClickHouse/ClickHouse/pull/46036) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix data race in BACKUP [#46040](https://github.com/ClickHouse/ClickHouse/pull/46040) ([Azat Khuzhin](https://github.com/azat)). +* Dump sanitizer errors in the integration tests logs [#46041](https://github.com/ClickHouse/ClickHouse/pull/46041) ([Azat Khuzhin](https://github.com/azat)). +* Temporarily disable one rabbitmq flaky test [#46052](https://github.com/ClickHouse/ClickHouse/pull/46052) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Remove unnecessary execute() while evaluating a constant expression. [#46058](https://github.com/ClickHouse/ClickHouse/pull/46058) ([Vitaly Baranov](https://github.com/vitlibar)). +* Polish S3 client [#46070](https://github.com/ClickHouse/ClickHouse/pull/46070) ([Antonio Andelic](https://github.com/antonio2368)). +* Smallish follow-up to [#46057](https://github.com/ClickHouse/ClickHouse/issues/46057) [#46072](https://github.com/ClickHouse/ClickHouse/pull/46072) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix 00002_log_and_exception_messages_formatting [#46077](https://github.com/ClickHouse/ClickHouse/pull/46077) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Disable temporarily rabbitmq tests which use channel.startConsuming() [#46078](https://github.com/ClickHouse/ClickHouse/pull/46078) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Update yarn packages for dev branches [#46079](https://github.com/ClickHouse/ClickHouse/pull/46079) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Add helping logging to auto-merge script [#46080](https://github.com/ClickHouse/ClickHouse/pull/46080) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Simplify code around storages s3/hudi/delta-lake [#46083](https://github.com/ClickHouse/ClickHouse/pull/46083) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix build with `-DENABLE_LIBURING=0` (or `-DENABLE_LIBRARIES=0`) [#46088](https://github.com/ClickHouse/ClickHouse/pull/46088) ([Robert Schulze](https://github.com/rschu1ze)). +* Add also last messages from stdout/stderr/debuglog in clickhouse-test [#46090](https://github.com/ClickHouse/ClickHouse/pull/46090) ([Azat Khuzhin](https://github.com/azat)). +* Sanity assertions for closing file descriptors [#46091](https://github.com/ClickHouse/ClickHouse/pull/46091) ([Azat Khuzhin](https://github.com/azat)). +* Fix flaky rabbitmq test [#46107](https://github.com/ClickHouse/ClickHouse/pull/46107) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix test_merge_tree_azure_blob_storage::test_zero_copy_replication test [#46108](https://github.com/ClickHouse/ClickHouse/pull/46108) ([Azat Khuzhin](https://github.com/azat)). +* allow_drop_detached requires an argument [#46110](https://github.com/ClickHouse/ClickHouse/pull/46110) ([Sema Checherinda](https://github.com/CheSema)). +* Fix fault injection in copier and test_cluster_copier flakiness [#46120](https://github.com/ClickHouse/ClickHouse/pull/46120) ([Azat Khuzhin](https://github.com/azat)). +* Update liburing CMakeLists.txt [#46127](https://github.com/ClickHouse/ClickHouse/pull/46127) ([Nikolay Degterinsky](https://github.com/evillique)). +* Use BAD_ARGUMENTS over LOGICAL_ERROR for schema inference error file() over fd [#46132](https://github.com/ClickHouse/ClickHouse/pull/46132) ([Azat Khuzhin](https://github.com/azat)). +* Stricter warnings + fix whitespaces in poco [#46133](https://github.com/ClickHouse/ClickHouse/pull/46133) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix dependency checks [#46138](https://github.com/ClickHouse/ClickHouse/pull/46138) ([Vitaly Baranov](https://github.com/vitlibar)). +* Interpret `cluster_name` identifier in `s3Cluster` function as literal [#46143](https://github.com/ClickHouse/ClickHouse/pull/46143) ([Nikolay Degterinsky](https://github.com/evillique)). +* Remove flaky test [#46149](https://github.com/ClickHouse/ClickHouse/pull/46149) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix spelling + duplicate includes in poco [#46155](https://github.com/ClickHouse/ClickHouse/pull/46155) ([Robert Schulze](https://github.com/rschu1ze)). +* Add 00002_log_and_exception_messages_formatting back [#46156](https://github.com/ClickHouse/ClickHouse/pull/46156) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix clickhouse/clickhouse-server description to make it in sync [#46159](https://github.com/ClickHouse/ClickHouse/pull/46159) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Complain about missing Yasm at configure time at of build time [#46162](https://github.com/ClickHouse/ClickHouse/pull/46162) ([Robert Schulze](https://github.com/rschu1ze)). +* Update Dockerfile.ubuntu [#46173](https://github.com/ClickHouse/ClickHouse/pull/46173) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Cleanup disk unittest [#46179](https://github.com/ClickHouse/ClickHouse/pull/46179) ([Sergei Trifonov](https://github.com/serxa)). +* Update 01513_optimize_aggregation_in_order_memory_long.sql [#46180](https://github.com/ClickHouse/ClickHouse/pull/46180) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Make a bug in HTTP interface less annoying [#46183](https://github.com/ClickHouse/ClickHouse/pull/46183) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix write buffer destruction order for vertical merge. [#46205](https://github.com/ClickHouse/ClickHouse/pull/46205) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* fix typo [#46207](https://github.com/ClickHouse/ClickHouse/pull/46207) ([Sergei Trifonov](https://github.com/serxa)). +* increase a time gap between insert and ttl move [#46233](https://github.com/ClickHouse/ClickHouse/pull/46233) ([Sema Checherinda](https://github.com/CheSema)). +* Make `test_replicated_merge_tree_s3_restore` less flaky [#46242](https://github.com/ClickHouse/ClickHouse/pull/46242) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix test_distributed_ddl_parallel [#46243](https://github.com/ClickHouse/ClickHouse/pull/46243) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Update 00564_versioned_collapsing_merge_tree.sql [#46245](https://github.com/ClickHouse/ClickHouse/pull/46245) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Optimize docker binary-builder [#46246](https://github.com/ClickHouse/ClickHouse/pull/46246) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Update Curl to 7.87.0 [#46248](https://github.com/ClickHouse/ClickHouse/pull/46248) ([Boris Kuschel](https://github.com/bkuschel)). +* Upgrade libxml2 to address CVE-2022-40303 CVE-2022-40304 [#46249](https://github.com/ClickHouse/ClickHouse/pull/46249) ([larryluogit](https://github.com/larryluogit)). +* Run clang-format over poco [#46259](https://github.com/ClickHouse/ClickHouse/pull/46259) ([Robert Schulze](https://github.com/rschu1ze)). +* Suppress "Container already exists" in BC check [#46260](https://github.com/ClickHouse/ClickHouse/pull/46260) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix failure description for hung check [#46267](https://github.com/ClickHouse/ClickHouse/pull/46267) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Add upcoming Events [#46271](https://github.com/ClickHouse/ClickHouse/pull/46271) ([Tyler Hannan](https://github.com/tylerhannan)). +* coordination: do not allow election_timeout_lower_bound_ms > election_timeout_upper_bound_ms [#46274](https://github.com/ClickHouse/ClickHouse/pull/46274) ([Salvatore Mesoraca](https://github.com/aiven-sal)). +* fix data race between check table request and background checker [#46278](https://github.com/ClickHouse/ClickHouse/pull/46278) ([Sema Checherinda](https://github.com/CheSema)). +* Try to make 02346_full_text_search less flaky [#46279](https://github.com/ClickHouse/ClickHouse/pull/46279) ([Robert Schulze](https://github.com/rschu1ze)). +* Beter diagnostics from http in clickhouse-test [#46281](https://github.com/ClickHouse/ClickHouse/pull/46281) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Add more logging to RabbitMQ (to help debug tests) [#46283](https://github.com/ClickHouse/ClickHouse/pull/46283) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix window view test [#46285](https://github.com/ClickHouse/ClickHouse/pull/46285) ([Kseniia Sumarokova](https://github.com/kssenii)). +* suppressing test inaccuracy 00738_lock_for_inner_table [#46287](https://github.com/ClickHouse/ClickHouse/pull/46287) ([Sema Checherinda](https://github.com/CheSema)). +* Simplify ATTACH MergeTree table FROM S3 in tests [#46288](https://github.com/ClickHouse/ClickHouse/pull/46288) ([Azat Khuzhin](https://github.com/azat)). +* Update RabbitMQProducer.cpp [#46295](https://github.com/ClickHouse/ClickHouse/pull/46295) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix macOs compilation due to sprintf [#46298](https://github.com/ClickHouse/ClickHouse/pull/46298) ([Jordi Villar](https://github.com/jrdi)). +* Slightly improve error message for required Yasm assembler [#46328](https://github.com/ClickHouse/ClickHouse/pull/46328) ([Robert Schulze](https://github.com/rschu1ze)). +* Unifdef unused parts of poco [#46329](https://github.com/ClickHouse/ClickHouse/pull/46329) ([Robert Schulze](https://github.com/rschu1ze)). +* Trigger automerge on approved PRs [#46332](https://github.com/ClickHouse/ClickHouse/pull/46332) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Wait for background tasks in ~UploadHelper [#46334](https://github.com/ClickHouse/ClickHouse/pull/46334) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix flaky test_storage_rabbitmq::test_rabbitmq_address [#46337](https://github.com/ClickHouse/ClickHouse/pull/46337) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Extract common logic for S3 [#46339](https://github.com/ClickHouse/ClickHouse/pull/46339) ([Antonio Andelic](https://github.com/antonio2368)). +* Update cluster.py [#46340](https://github.com/ClickHouse/ClickHouse/pull/46340) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Try to stabilize test 02346_full_text_search.sql [#46344](https://github.com/ClickHouse/ClickHouse/pull/46344) ([Robert Schulze](https://github.com/rschu1ze)). +* Remove an unused argument [#46346](https://github.com/ClickHouse/ClickHouse/pull/46346) ([Alexander Tokmakov](https://github.com/tavplubix)). +* fix candidate selection [#46347](https://github.com/ClickHouse/ClickHouse/pull/46347) ([Sema Checherinda](https://github.com/CheSema)). +* Do not pollute logs in clickhouse-test [#46361](https://github.com/ClickHouse/ClickHouse/pull/46361) ([Azat Khuzhin](https://github.com/azat)). +* Do not continue perf tests in case of exception in create_query/fill_query [#46362](https://github.com/ClickHouse/ClickHouse/pull/46362) ([Azat Khuzhin](https://github.com/azat)). +* Minor fix in files locating for Bugfix validate check [#46368](https://github.com/ClickHouse/ClickHouse/pull/46368) ([Vladimir C](https://github.com/vdimir)). +* Temporarily disable test_rabbitmq_overloaded_insert [#46403](https://github.com/ClickHouse/ClickHouse/pull/46403) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix test test_rabbitmq_overloaded_insert [#46404](https://github.com/ClickHouse/ClickHouse/pull/46404) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix stress test [#46405](https://github.com/ClickHouse/ClickHouse/pull/46405) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix stress tests statuses [#46406](https://github.com/ClickHouse/ClickHouse/pull/46406) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Follow-up to [#46168](https://github.com/ClickHouse/ClickHouse/issues/46168) [#46409](https://github.com/ClickHouse/ClickHouse/pull/46409) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix noisy log messages [#46410](https://github.com/ClickHouse/ClickHouse/pull/46410) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Docs: Clarify query parameters [#46419](https://github.com/ClickHouse/ClickHouse/pull/46419) ([Robert Schulze](https://github.com/rschu1ze)). +* Make tests with window view less bad [#46421](https://github.com/ClickHouse/ClickHouse/pull/46421) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Move MongoDB and PostgreSQL sources to Sources folder [#46422](https://github.com/ClickHouse/ClickHouse/pull/46422) ([Nikolay Degterinsky](https://github.com/evillique)). +* Another fix for cluster copier [#46433](https://github.com/ClickHouse/ClickHouse/pull/46433) ([Antonio Andelic](https://github.com/antonio2368)). +* Update version_date.tsv and changelogs after v22.3.18.37-lts [#46436](https://github.com/ClickHouse/ClickHouse/pull/46436) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Fix a backup test [#46449](https://github.com/ClickHouse/ClickHouse/pull/46449) ([Vitaly Baranov](https://github.com/vitlibar)). +* Do not fetch submodules in release.py [#46450](https://github.com/ClickHouse/ClickHouse/pull/46450) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* resolve race in getCSNAndAssert [#46452](https://github.com/ClickHouse/ClickHouse/pull/46452) ([Sema Checherinda](https://github.com/CheSema)). +* move database credential inputs to the center on initial load [#46455](https://github.com/ClickHouse/ClickHouse/pull/46455) ([Kevin Zhang](https://github.com/Kinzeng)). +* Improve install_check.py [#46458](https://github.com/ClickHouse/ClickHouse/pull/46458) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Change logging level of a verbose message to Trace [#46459](https://github.com/ClickHouse/ClickHouse/pull/46459) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Analyzer planner fixes before enable by default [#46471](https://github.com/ClickHouse/ClickHouse/pull/46471) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix some flaky integration tests [#46478](https://github.com/ClickHouse/ClickHouse/pull/46478) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Allow to override host for client connection credentials [#46480](https://github.com/ClickHouse/ClickHouse/pull/46480) ([Azat Khuzhin](https://github.com/azat)). +* Try fix flaky test test_parallel_distributed_insert_select_with_schema_inference [#46488](https://github.com/ClickHouse/ClickHouse/pull/46488) ([Kruglov Pavel](https://github.com/Avogar)). +* Planner filter push down optimization fix [#46494](https://github.com/ClickHouse/ClickHouse/pull/46494) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix 01161_all_system_tables test flakiness [#46499](https://github.com/ClickHouse/ClickHouse/pull/46499) ([Azat Khuzhin](https://github.com/azat)). +* Compress tar archives with zstd in intergration tests [#46516](https://github.com/ClickHouse/ClickHouse/pull/46516) ([Azat Khuzhin](https://github.com/azat)). +* chore: bump testcontainers-go to 0.18.0 [#46518](https://github.com/ClickHouse/ClickHouse/pull/46518) ([Manuel de la Peña](https://github.com/mdelapenya)). +* Rollback unnecessary sync because of checking exit code [#46520](https://github.com/ClickHouse/ClickHouse/pull/46520) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix stress test [#46521](https://github.com/ClickHouse/ClickHouse/pull/46521) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Add myrrc to trusted contributors [#46526](https://github.com/ClickHouse/ClickHouse/pull/46526) ([Anton Popov](https://github.com/CurtizJ)). +* fix style [#46530](https://github.com/ClickHouse/ClickHouse/pull/46530) ([flynn](https://github.com/ucasfl)). +* Autoupdate keeper dockerfile [#46535](https://github.com/ClickHouse/ClickHouse/pull/46535) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fixes for OpenSSL and s390x [#46546](https://github.com/ClickHouse/ClickHouse/pull/46546) ([Boris Kuschel](https://github.com/bkuschel)). +* enable async-insert-max-query-number only if async_insert_deduplicate [#46549](https://github.com/ClickHouse/ClickHouse/pull/46549) ([Han Fei](https://github.com/hanfei1991)). +* Remove extra try/catch for QueryState/LocalQueryState reset [#46552](https://github.com/ClickHouse/ClickHouse/pull/46552) ([Azat Khuzhin](https://github.com/azat)). +* Whitespaces [#46553](https://github.com/ClickHouse/ClickHouse/pull/46553) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* fix build without avro [#46554](https://github.com/ClickHouse/ClickHouse/pull/46554) ([flynn](https://github.com/ucasfl)). +* Inhibit randomization in test `01551_mergetree_read_in_order_spread.sql` [#46562](https://github.com/ClickHouse/ClickHouse/pull/46562) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove PVS-Studio [#46565](https://github.com/ClickHouse/ClickHouse/pull/46565) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Inhibit settings randomization in `01304_direct_io_long.sh` [#46566](https://github.com/ClickHouse/ClickHouse/pull/46566) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix double whitespace in comment in test [#46567](https://github.com/ClickHouse/ClickHouse/pull/46567) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Rename test [#46568](https://github.com/ClickHouse/ClickHouse/pull/46568) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix ASTAsterisk::clone() [#46570](https://github.com/ClickHouse/ClickHouse/pull/46570) ([Nikolay Degterinsky](https://github.com/evillique)). +* Small update of sparkbar docs [#46579](https://github.com/ClickHouse/ClickHouse/pull/46579) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix flakiness of expect tests for clickhouse-client by avoiding history overlap [#46582](https://github.com/ClickHouse/ClickHouse/pull/46582) ([Azat Khuzhin](https://github.com/azat)). +* Always log rollback for release.py [#46586](https://github.com/ClickHouse/ClickHouse/pull/46586) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Increase table retries in `test_cluster_copier` [#46590](https://github.com/ClickHouse/ClickHouse/pull/46590) ([Antonio Andelic](https://github.com/antonio2368)). +* Update 00170_s3_cache.sql [#46593](https://github.com/ClickHouse/ClickHouse/pull/46593) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix rabbitmq test [#46595](https://github.com/ClickHouse/ClickHouse/pull/46595) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix meilisearch test flakyness [#46596](https://github.com/ClickHouse/ClickHouse/pull/46596) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix dependencies for InstallPackagesTestAarch64 [#46597](https://github.com/ClickHouse/ClickHouse/pull/46597) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Update compare.sh [#46599](https://github.com/ClickHouse/ClickHouse/pull/46599) ([Alexander Tokmakov](https://github.com/tavplubix)). +* update llvm-project to fix gwp-asan [#46600](https://github.com/ClickHouse/ClickHouse/pull/46600) ([Han Fei](https://github.com/hanfei1991)). +* Temporarily disable test_rabbitmq_overloaded_insert [#46608](https://github.com/ClickHouse/ClickHouse/pull/46608) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Update 01565_reconnect_after_client_error to not expect explicit reconnect [#46619](https://github.com/ClickHouse/ClickHouse/pull/46619) ([Azat Khuzhin](https://github.com/azat)). +* Inhibit `index_granularity_bytes` randomization in some tests [#46626](https://github.com/ClickHouse/ClickHouse/pull/46626) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix coverity [#46629](https://github.com/ClickHouse/ClickHouse/pull/46629) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix 01179_insert_values_semicolon test [#46636](https://github.com/ClickHouse/ClickHouse/pull/46636) ([Azat Khuzhin](https://github.com/azat)). +* Fix typo in read prefetch [#46640](https://github.com/ClickHouse/ClickHouse/pull/46640) ([Nikita Taranov](https://github.com/nickitat)). +* Avoid OOM in perf tests [#46641](https://github.com/ClickHouse/ClickHouse/pull/46641) ([Azat Khuzhin](https://github.com/azat)). +* Fix: remove redundant sorting optimization [#46642](https://github.com/ClickHouse/ClickHouse/pull/46642) ([Igor Nikonov](https://github.com/devcrafter)). +* Fix flaky test 01710_normal_projections [#46645](https://github.com/ClickHouse/ClickHouse/pull/46645) ([Kruglov Pavel](https://github.com/Avogar)). +* Update postgres_utility.py [#46656](https://github.com/ClickHouse/ClickHouse/pull/46656) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix integration test: terminate old version without wait [#46660](https://github.com/ClickHouse/ClickHouse/pull/46660) ([alesapin](https://github.com/alesapin)). +* Break Stress tests [#46663](https://github.com/ClickHouse/ClickHouse/pull/46663) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Get rid of legacy DocsReleaseChecks [#46665](https://github.com/ClickHouse/ClickHouse/pull/46665) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* fix layout issues in dashboard.html [#46671](https://github.com/ClickHouse/ClickHouse/pull/46671) ([Kevin Zhang](https://github.com/Kinzeng)). +* Fix Stress tests [#46683](https://github.com/ClickHouse/ClickHouse/pull/46683) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Disable flaky test_ttl_move_memory_usage.py [#46687](https://github.com/ClickHouse/ClickHouse/pull/46687) ([Alexander Tokmakov](https://github.com/tavplubix)). +* BackgroundSchedulePool should not have any query context [#46709](https://github.com/ClickHouse/ClickHouse/pull/46709) ([Azat Khuzhin](https://github.com/azat)). +* Better exception message during Tuple JSON deserialization [#46727](https://github.com/ClickHouse/ClickHouse/pull/46727) ([Kruglov Pavel](https://github.com/Avogar)). +* Poco: POCO_HAVE_INT64 is always defined [#46728](https://github.com/ClickHouse/ClickHouse/pull/46728) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix SonarCloud Job [#46732](https://github.com/ClickHouse/ClickHouse/pull/46732) ([Julio Jimenez](https://github.com/juliojimenez)). +* Remove unused MergeTreeReadTask::remove_prewhere_column [#46744](https://github.com/ClickHouse/ClickHouse/pull/46744) ([Alexander Gololobov](https://github.com/davenger)). +* On out-of-space `at` returns error, we must terminate still [#46754](https://github.com/ClickHouse/ClickHouse/pull/46754) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* CI: don't run builds/tests when CHANGELOG.md or README.md were modified [#46773](https://github.com/ClickHouse/ClickHouse/pull/46773) ([Robert Schulze](https://github.com/rschu1ze)). +* Cosmetics in hashing code [#46780](https://github.com/ClickHouse/ClickHouse/pull/46780) ([Robert Schulze](https://github.com/rschu1ze)). + +#### Testing Improvement + +* Fixed functional test 00304_http_external_data for s390x. [#45807](https://github.com/ClickHouse/ClickHouse/pull/45807) ([Harry Lee](https://github.com/HarryLeeIBM)). + diff --git a/docs/en/development/tests.md b/docs/en/development/tests.md index 28aee32c717..9ae49e8f707 100644 --- a/docs/en/development/tests.md +++ b/docs/en/development/tests.md @@ -39,12 +39,59 @@ To add new test, create a `.sql` or `.sh` file in `queries/0_stateless` director Tests should use (create, drop, etc) only tables in `test` database that is assumed to be created beforehand; also tests can use temporary tables. +### Restricting test runs + +A test can have zero or more _test tags_ specifying restrictions for test runs. + +For `.sh` tests tags are written as a comment on the second line: + +```bash +#!/usr/bin/env bash +# Tags: no-fasttest +``` + +For `.sql` tests tags are placed in the first line as a SQL comment: + +```sql +-- Tags: no-fasttest +SELECT 1 +``` + +|Tag name | What it does | Usage example | +|---|---|---| +| `disabled`| Test is not run || +| `long` | Test's execution time is extended from 1 to 10 minutes || +| `deadlock` | Test is run in a loop for a long time || +| `race` | Same as `deadlock`. Prefer `deadlock` || +| `shard` | Server is required to listen to `127.0.0.*` || +| `distributed` | Same as `shard`. Prefer `shard` || +| `global` | Same as `shard`. Prefer `shard` || +| `zookeeper` | Test requires Zookeeper or ClickHouse Keeper to run | Test uses `ReplicatedMergeTree` | +| `replica` | Same as `zookeeper`. Prefer `zookeeper` || +| `no-fasttest`| Test is not run under [Fast test](continuous-integration#fast-test) | Test uses `MySQL` table engine which is disabled in Fast test| +| `no-[asan, tsan, msan, ubsan]` | Disables tests in build with [sanitizers](#sanitizers) | Test is run under QEMU which doesn't work with sanitizers | +| `no-replicated-database` ||| +| `no-ordinary-database` ||| +| `no-parallel` | Disables running other tests in parallel with this one | Test reads from `system` tables and invariants may be broken| +| `no-parallel-replicas` ||| +| `no-debug` ||| +| `no-stress` ||| +| `no-polymorphic-parts` ||| +| `no-random-settings` ||| +| `no-random-merge-tree-settings` ||| +| `no-backward-compatibility-check` ||| +| `no-cpu-x86_64` ||| +| `no-cpu-aarch64` ||| +| `no-cpu-ppc64le` ||| +| `no-s3-storage` ||| + +In addition to the above settings, you can use `USE_*` flags from `system.build_options` to define usage of particular ClickHouse features. +For example, if your test uses a MySQL table, you should add a tag `use-mysql`. + ### Choosing the Test Name The name of the test starts with a five-digit prefix followed by a descriptive name, such as `00422_hash_function_constexpr.sql`. To choose the prefix, find the largest prefix already present in the directory, and increment it by one. In the meantime, some other tests might be added with the same numeric prefix, but this is OK and does not lead to any problems, you don't have to change it later. -Some tests are marked with `zookeeper`, `shard` or `long` in their names. `zookeeper` is for tests that are using ZooKeeper. `shard` is for tests that requires server to listen `127.0.0.*`; `distributed` or `global` have the same meaning. `long` is for tests that run slightly longer that one second. You can disable these groups of tests using `--no-zookeeper`, `--no-shard` and `--no-long` options, respectively. Make sure to add a proper prefix to your test name if it needs ZooKeeper or distributed queries. - ### Checking for an Error that Must Occur Sometimes you want to test that a server error occurs for an incorrect query. We support special annotations for this in SQL tests, in the following form: diff --git a/docs/en/engines/table-engines/integrations/deltalake.md b/docs/en/engines/table-engines/integrations/deltalake.md index 83526ac944d..99183ac7308 100644 --- a/docs/en/engines/table-engines/integrations/deltalake.md +++ b/docs/en/engines/table-engines/integrations/deltalake.md @@ -19,7 +19,9 @@ CREATE TABLE deltalake **Engine parameters** - `url` — Bucket url with path to the existing Delta Lake table. -- `aws_access_key_id`, `aws_secret_access_key` - Long-term credentials for the [AWS](https://aws.amazon.com/) account user. You can use these to authenticate your requests. Parameter is optional. If credentials are not specified, they are used from the configuration file. For more information see [Using S3 for Data Storage](../mergetree-family/mergetree.md#table_engine-mergetree-s3). +- `aws_access_key_id`, `aws_secret_access_key` - Long-term credentials for the [AWS](https://aws.amazon.com/) account user. You can use these to authenticate your requests. Parameter is optional. If credentials are not specified, they are used from the configuration file. + +Engine parameters can be specified using [Named Collections](../../../operations/named-collections.md) **Example** @@ -27,7 +29,24 @@ CREATE TABLE deltalake CREATE TABLE deltalake ENGINE=DeltaLake('http://mars-doc-test.s3.amazonaws.com/clickhouse-bucket-3/test_table/', 'ABC123', 'Abc+123') ``` +Using named collections: + +``` xml + + + + http://mars-doc-test.s3.amazonaws.com/clickhouse-bucket-3/ + ABC123 + Abc+123 + + + +``` + +```sql +CREATE TABLE deltalake ENGINE=DeltaLake(deltalake_conf, filename = 'test_table') +``` + ## See also - [deltaLake table function](../../../sql-reference/table-functions/deltalake.md) - diff --git a/docs/en/engines/table-engines/integrations/hudi.md b/docs/en/engines/table-engines/integrations/hudi.md index 4e335e6c075..a14134ecdfa 100644 --- a/docs/en/engines/table-engines/integrations/hudi.md +++ b/docs/en/engines/table-engines/integrations/hudi.md @@ -19,7 +19,9 @@ CREATE TABLE hudi_table **Engine parameters** - `url` — Bucket url with the path to an existing Hudi table. -- `aws_access_key_id`, `aws_secret_access_key` - Long-term credentials for the [AWS](https://aws.amazon.com/) account user. You can use these to authenticate your requests. Parameter is optional. If credentials are not specified, they are used from the configuration file. For more information see [Using S3 for Data Storage](../mergetree-family/mergetree.md#table_engine-mergetree-s3). +- `aws_access_key_id`, `aws_secret_access_key` - Long-term credentials for the [AWS](https://aws.amazon.com/) account user. You can use these to authenticate your requests. Parameter is optional. If credentials are not specified, they are used from the configuration file. + +Engine parameters can be specified using [Named Collections](../../../operations/named-collections.md) **Example** @@ -27,7 +29,24 @@ CREATE TABLE hudi_table CREATE TABLE hudi_table ENGINE=Hudi('http://mars-doc-test.s3.amazonaws.com/clickhouse-bucket-3/test_table/', 'ABC123', 'Abc+123') ``` +Using named collections: + +``` xml + + + + http://mars-doc-test.s3.amazonaws.com/clickhouse-bucket-3/ + ABC123 + Abc+123 + + + +``` + +```sql +CREATE TABLE hudi_table ENGINE=Hudi(hudi_conf, filename = 'test_table') +``` + ## See also - [hudi table function](/docs/en/sql-reference/table-functions/hudi.md) - diff --git a/docs/en/engines/table-engines/integrations/iceberg.md b/docs/en/engines/table-engines/integrations/iceberg.md new file mode 100644 index 00000000000..4322fc6b773 --- /dev/null +++ b/docs/en/engines/table-engines/integrations/iceberg.md @@ -0,0 +1,52 @@ +--- +slug: /en/engines/table-engines/integrations/iceberg +sidebar_label: Iceberg +--- + +# Iceberg Table Engine + +This engine provides a read-only integration with existing Apache [Iceberg](https://iceberg.apache.org/) tables in Amazon S3. + +## Create Table + +Note that the Iceberg table must already exist in S3, this command does not take DDL parameters to create a new table. + +``` sql +CREATE TABLE iceberg_table + ENGINE = Iceberg(url, [aws_access_key_id, aws_secret_access_key,]) +``` + +**Engine parameters** + +- `url` — url with the path to an existing Iceberg table. +- `aws_access_key_id`, `aws_secret_access_key` - Long-term credentials for the [AWS](https://aws.amazon.com/) account user. You can use these to authenticate your requests. Parameter is optional. If credentials are not specified, they are used from the configuration file. + +Engine parameters can be specified using [Named Collections](../../../operations/named-collections.md) + +**Example** + +```sql +CREATE TABLE iceberg_table ENGINE=Iceberg('http://test.s3.amazonaws.com/clickhouse-bucket/test_table', 'test', 'test') +``` + +Using named collections: + +``` xml + + + + http://test.s3.amazonaws.com/clickhouse-bucket/ + test + test + + + +``` + +```sql +CREATE TABLE iceberg_table ENGINE=Iceberg(iceberg_conf, filename = 'test_table') +``` + +## See also + +- [iceberg table function](/docs/en/sql-reference/table-functions/iceberg.md) diff --git a/docs/en/engines/table-engines/integrations/kafka.md b/docs/en/engines/table-engines/integrations/kafka.md index eddae7b34e7..ef422632d3e 100644 --- a/docs/en/engines/table-engines/integrations/kafka.md +++ b/docs/en/engines/table-engines/integrations/kafka.md @@ -162,22 +162,59 @@ If you want to change the target table by using `ALTER`, we recommend disabling ## Configuration {#configuration} -Similar to GraphiteMergeTree, the Kafka engine supports extended configuration using the ClickHouse config file. There are two configuration keys that you can use: global (`kafka`) and topic-level (`kafka_*`). The global configuration is applied first, and then the topic-level configuration is applied (if it exists). +Similar to GraphiteMergeTree, the Kafka engine supports extended configuration using the ClickHouse config file. There are two configuration keys that you can use: global (below ``) and topic-level (below ``). The global configuration is applied first, and then the topic-level configuration is applied (if it exists). ``` xml - + + cgrp + smallest + + + + + logs + 250 + 100000 + + + + stats + 400 + 50000 + + + +``` + +
+ +Example in deprecated syntax + +``` xml + + cgrp smallest - + + + 250 100000 + + + 400 + 50000 + ``` +
+ + For a list of possible configuration options, see the [librdkafka configuration reference](https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md). Use the underscore (`_`) instead of a dot in the ClickHouse configuration. For example, `check.crcs=true` will be `true`. ### Kerberos support {#kafka-kerberos-support} diff --git a/docs/en/engines/table-engines/mergetree-family/annindexes.md b/docs/en/engines/table-engines/mergetree-family/annindexes.md index e482926f400..f841f157376 100644 --- a/docs/en/engines/table-engines/mergetree-family/annindexes.md +++ b/docs/en/engines/table-engines/mergetree-family/annindexes.md @@ -1,6 +1,6 @@ # Approximate Nearest Neighbor Search Indexes [experimental] {#table_engines-ANNIndex} -The main task that indexes achieve is to quickly find nearest neighbors for multidimensional data. An example of such a problem can be finding similar pictures (texts) for a given picture (text). That problem can be reduced to finding the nearest [embeddings](https://cloud.google.com/architecture/overview-extracting-and-serving-feature-embeddings-for-machine-learning). They can be created from data using [UDF](../../../sql-reference/functions/index.md#executable-user-defined-functions). +The main task that indexes achieve is to quickly find nearest neighbors for multidimensional data. An example of such a problem can be finding similar pictures (texts) for a given picture (text). That problem can be reduced to finding the nearest [embeddings](https://cloud.google.com/architecture/overview-extracting-and-serving-feature-embeddings-for-machine-learning). They can be created from data using [UDF](/docs/en/sql-reference/functions/index.md/#executable-user-defined-functions). The next queries find the closest neighbors in N-dimensional space using the L2 (Euclidean) distance: ``` sql @@ -39,7 +39,7 @@ Approximate Nearest Neighbor Search Indexes (`ANNIndexes`) are similar to skip i LIMIT N ``` -In these queries, `DistanceFunction` is selected from [distance functions](../../../sql-reference/functions/distance-functions). `Point` is a known vector (something like `(0.1, 0.1, ... )`). To avoid writing large vectors, use [client parameters](../../../interfaces/cli.md#queries-with-parameters-cli-queries-with-parameters). `Value` - a float value that will bound the neighbourhood. +In these queries, `DistanceFunction` is selected from [distance functions](/docs/en/sql-reference/functions/distance-functions.md). `Point` is a known vector (something like `(0.1, 0.1, ... )`). To avoid writing large vectors, use [client parameters](/docs/en//interfaces/cli.md#queries-with-parameters-cli-queries-with-parameters). `Value` - a float value that will bound the neighbourhood. :::note ANN index can't speed up query that satisfies both types (`where + order by`, only one of them). All queries must have the limit, as algorithms are used to find nearest neighbors and need a specific number of them. @@ -85,13 +85,13 @@ As the indexes are built only during insertions into table, `INSERT` and `OPTIMI You can create your table with index which uses certain algorithm. Now only indices based on the following algorithms are supported: # Index list -- [Annoy](../../../engines/table-engines/mergetree-family/annindexes.md#annoy-annoy) +- [Annoy](/docs/en/engines/table-engines/mergetree-family/annindexes.md#annoy-annoy) # Annoy {#annoy} Implementation of the algorithm was taken from [this repository](https://github.com/spotify/annoy). Short description of the algorithm: -The algorithm recursively divides in half all space by random linear surfaces (lines in 2D, planes in 3D e.t.c.). Thus it makes tree of polyhedrons and points that they contains. Repeating the operation several times for greater accuracy it creates a forest. +The algorithm recursively divides in half all space by random linear surfaces (lines in 2D, planes in 3D etc.). Thus it makes tree of polyhedrons and points that they contains. Repeating the operation several times for greater accuracy it creates a forest. To find K Nearest Neighbours it goes down through the trees and fills the buffer of closest points using the priority queue of polyhedrons. Next, it sorts buffer and return the nearest K points. __Examples__: @@ -118,7 +118,7 @@ ORDER BY id; ``` :::note -Table with array field will work faster, but all arrays **must** have same length. Use [CONSTRAINT](../../../sql-reference/statements/create/table.md#constraints) to avoid errors. For example, `CONSTRAINT constraint_name_1 CHECK length(data) = 256`. +Table with array field will work faster, but all arrays **must** have same length. Use [CONSTRAINT](/docs/en/sql-reference/statements/create/table.md#constraints) to avoid errors. For example, `CONSTRAINT constraint_name_1 CHECK length(data) = 256`. ::: Parameter `NumTrees` is the number of trees which the algorithm will create. The bigger it is, the slower (approximately linear) it works (in both `CREATE` and `SELECT` requests), but the better accuracy you get (adjusted for randomness). By default it is set to `100`. Parameter `DistanceName` is name of distance function. By default it is set to `L2Distance`. It can be set without changing first parameter, for example diff --git a/docs/en/getting-started/install.md b/docs/en/getting-started/install.md index 404c6c6f227..0867f3a0795 100644 --- a/docs/en/getting-started/install.md +++ b/docs/en/getting-started/install.md @@ -5,6 +5,10 @@ description: Install ClickHouse slug: /en/install --- +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; +import CodeBlock from '@theme/CodeBlock'; + # Install ClickHouse You have three options for getting up and running with ClickHouse: @@ -19,17 +23,27 @@ The quickest and easiest way to get up and running with ClickHouse is to create ## Self-Managed Install +:::tip +For production installs of a specific release version see the [installation options](#available-installation-options) down below. +::: + + + + 1. The simplest way to download ClickHouse locally is to run the following command. If your operating system is supported, an appropriate ClickHouse binary will be downloaded and made runnable: + ```bash curl https://clickhouse.com/ | sh ``` 1. Run the `install` command, which defines a collection of useful symlinks along with the files and folders used by ClickHouse - all of which you can see in the output of the install script: + ```bash sudo ./clickhouse install ``` 1. At the end of the install script, you are prompted for a password for the `default` user. Feel free to enter a password, or you can optionally leave it blank: + ```response Creating log directory /var/log/clickhouse-server. Creating data directory /var/lib/clickhouse. @@ -40,6 +54,7 @@ The quickest and easiest way to get up and running with ClickHouse is to create Enter password for default user: ``` You should see the following output: + ```response ClickHouse has been successfully installed. @@ -51,10 +66,45 @@ The quickest and easiest way to get up and running with ClickHouse is to create ``` 1. Run the following command to start the ClickHouse server: + ```bash + sudo clickhouse start + ``` + + + + +1. The simplest way to download ClickHouse locally is to run the following command. If your operating system is supported, an appropriate ClickHouse binary will be downloaded and made runnable: ```bash - sudo clickhouse start + curl https://clickhouse.com/ | sh ``` +1. Run the ClickHouse server: + + ```bash + ./clickhouse server + ``` + +1. Open a new terminal and use the **clickhouse-client** to connect to your service: + + ```bash + ./clickhouse client + ``` + + ```response + ./clickhouse client + ClickHouse client version 23.2.1.1501 (official build). + Connecting to localhost:9000 as user default. + Connected to ClickHouse server version 23.2.1 revision 54461. + + local-host :) + ``` + + You are ready to start sending DDL and SQL commands to ClickHouse! + + + + + :::tip The [Quick Start](/docs/en/quick-start.mdx/#step-1-get-clickhouse) walks through the steps to download and run ClickHouse, connect to it, and insert data. ::: diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md index 992eaa8a49f..b2b2c6d5b1e 100644 --- a/docs/en/interfaces/formats.md +++ b/docs/en/interfaces/formats.md @@ -1971,7 +1971,8 @@ To exchange data with Hadoop, you can use [HDFS table engine](/docs/en/engines/t - [input_format_parquet_case_insensitive_column_matching](/docs/en/operations/settings/settings-formats.md/#input_format_parquet_case_insensitive_column_matching) - ignore case when matching Parquet columns with ClickHouse columns. Default value - `false`. - [input_format_parquet_allow_missing_columns](/docs/en/operations/settings/settings-formats.md/#input_format_parquet_allow_missing_columns) - allow missing columns while reading Parquet data. Default value - `false`. - [input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference](/docs/en/operations/settings/settings-formats.md/#input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference) - allow skipping columns with unsupported types while schema inference for Parquet format. Default value - `false`. -- [output_format_parquet_fixed_string_as_fixed_byte_array](/docs/en/operations/settings/settings-formats.md/#output_format_parquet_fixed_string_as_fixed_byte_array) - use Parquet FIXED_LENGTH_BYTE_ARRAY type instead of Binary/String for FixedString columns. Default value - `true`. +- [output_format_parquet_fixed_string_as_fixed_byte_array](/docs/en/operations/settings/settings-formats.md/#output_format_parquet_fixed_string_as_fixed_byte_array) - use Parquet FIXED_LENGTH_BYTE_ARRAY type instead of Binary/String for FixedString columns. Default value - `true`. +- [output_format_parquet_version](/docs/en/operations/settings/settings-formats.md/#output_format_parquet_version) - The version of Parquet format used in output format. Default value - `2.latest`. ## Arrow {#data-format-arrow} diff --git a/docs/en/interfaces/schema-inference.md b/docs/en/interfaces/schema-inference.md index 728afa73a17..25bdb0c36a3 100644 --- a/docs/en/interfaces/schema-inference.md +++ b/docs/en/interfaces/schema-inference.md @@ -229,7 +229,7 @@ To prevent inferring the same schema every time ClickHouse read the data from th There are special settings that control this cache: - `schema_inference_cache_max_elements_for_{file/s3/hdfs/url}` - the maximum number of cached schemas for the corresponding table function. The default value is `4096`. These settings should be set in the server config. -- `use_cache_for_{file,s3,hdfs,url}_schema_inference` - allows turning on/off using cache for schema inference. These settings can be used in queries. +- `schema_inference_use_cache_for_{file,s3,hdfs,url}` - allows turning on/off using cache for schema inference. These settings can be used in queries. The schema of the file can be changed by modifying the data or by changing format settings. For this reason, the schema inference cache identifies the schema by file source, format name, used format settings, and the last modification time of the file. @@ -1177,8 +1177,7 @@ This setting can be used to specify the types of columns that could not be deter **Example** ```sql -DESC format(JSONEachRow, '{"id" : 1, "age" : 25, "name" : "Josh", "status" : null, "hobbies" : ["football", "cooking"]}' -SETTINGS schema_inference_hints = 'age LowCardinality(UInt8), status Nullable(String)' +DESC format(JSONEachRow, '{"id" : 1, "age" : 25, "name" : "Josh", "status" : null, "hobbies" : ["football", "cooking"]}') SETTINGS schema_inference_hints = 'age LowCardinality(UInt8), status Nullable(String)', allow_suspicious_low_cardinality_types=1 ``` ```response ┌─name────┬─type────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md index 75ae6f3d2bc..17d03dfa4ec 100644 --- a/docs/en/operations/server-configuration-parameters/settings.md +++ b/docs/en/operations/server-configuration-parameters/settings.md @@ -467,7 +467,7 @@ Port for exchanging data between ClickHouse servers. The hostname that can be used by other servers to access this server. -If omitted, it is defined in the same way as the `hostname-f` command. +If omitted, it is defined in the same way as the `hostname -f` command. Useful for breaking away from a specific network interface. diff --git a/docs/en/operations/settings/constraints-on-settings.md b/docs/en/operations/settings/constraints-on-settings.md index 87c51940c73..83ef46053a4 100644 --- a/docs/en/operations/settings/constraints-on-settings.md +++ b/docs/en/operations/settings/constraints-on-settings.md @@ -50,7 +50,7 @@ If there are multiple profiles active for a user, then constraints are merged. M Read-only mode is enabled by `readonly` setting (not to confuse with `readonly` constraint type): - `readonly=0`: No read-only restrictions. -- `readonly=1`: Only read queries are allowed and settings cannot be changes unless `changeable_in_readonly` is set. +- `readonly=1`: Only read queries are allowed and settings cannot be changed unless `changeable_in_readonly` is set. - `readonly=2`: Only read queries are allowed, but settings can be changed, except for `readonly` setting itself. diff --git a/docs/en/operations/settings/settings-formats.md b/docs/en/operations/settings/settings-formats.md index 3f81dc528f5..3580d83f704 100644 --- a/docs/en/operations/settings/settings-formats.md +++ b/docs/en/operations/settings/settings-formats.md @@ -15,11 +15,12 @@ When writing data, ClickHouse throws an exception if input data contain columns Supported formats: -- [JSONEachRow](../../interfaces/formats.md/#jsoneachrow) +- [JSONEachRow](../../interfaces/formats.md/#jsoneachrow) (and other JSON formats) +- [BSONEachRow](../../interfaces/formats.md/#bsoneachrow) (and other JSON formats) - [TSKV](../../interfaces/formats.md/#tskv) - All formats with suffixes WithNames/WithNamesAndTypes -- [JSONColumns](../../interfaces/formats.md/#jsoncolumns) - [MySQLDump](../../interfaces/formats.md/#mysqldump) +- [Native](../../interfaces/formats.md/#native) Possible values: @@ -78,7 +79,7 @@ Default value: 1. ## input_format_defaults_for_omitted_fields {#input_format_defaults_for_omitted_fields} -When performing `INSERT` queries, replace omitted input column values with default values of the respective columns. This option only applies to [JSONEachRow](../../interfaces/formats.md/#jsoneachrow), [CSV](../../interfaces/formats.md/#csv), [TabSeparated](../../interfaces/formats.md/#tabseparated) formats and formats with `WithNames`/`WithNamesAndTypes` suffixes. +When performing `INSERT` queries, replace omitted input column values with default values of the respective columns. This option applies to [JSONEachRow](../../interfaces/formats.md/#jsoneachrow) (and other JSON formats), [CSV](../../interfaces/formats.md/#csv), [TabSeparated](../../interfaces/formats.md/#tabseparated), [TSKV](../../interfaces/formats.md/#tskv), [Parquet](../../interfaces/formats.md/#parquet), [Arrow](../../interfaces/formats.md/#arrow), [Avro](../../interfaces/formats.md/#avro), [ORC](../../interfaces/formats.md/#orc), [Native](../../interfaces/formats.md/#native) formats and formats with `WithNames`/`WithNamesAndTypes` suffixes. :::note When this option is enabled, extended table metadata are sent from server to client. It consumes additional computing resources on the server and can reduce performance. @@ -96,7 +97,9 @@ Default value: 1. Enables or disables the initialization of [NULL](../../sql-reference/syntax.md/#null-literal) fields with [default values](../../sql-reference/statements/create/table.md/#create-default-values), if data type of these fields is not [nullable](../../sql-reference/data-types/nullable.md/#data_type-nullable). If column type is not nullable and this setting is disabled, then inserting `NULL` causes an exception. If column type is nullable, then `NULL` values are inserted as is, regardless of this setting. -This setting is applicable to [INSERT ... VALUES](../../sql-reference/statements/insert-into.md) queries for text input formats. +This setting is applicable for most input formats. + +For complex default expressions `input_format_defaults_for_omitted_fields` must be enabled too. Possible values: @@ -139,6 +142,10 @@ y Nullable(String) z IPv4 ``` +:::warning +If the `schema_inference_hints` is not formated properly, or if there is a typo or a wrong datatype, etc... the whole schema_inference_hints will be ignored. +::: + ## schema_inference_make_columns_nullable {#schema_inference_make_columns_nullable} Controls making inferred types `Nullable` in schema inference for formats without information about nullability. @@ -504,7 +511,7 @@ Enabled by default. Ignore unknown keys in json object for named tuples. -Disabled by default. +Enabled by default. ## input_format_json_defaults_for_missing_elements_in_named_tuple {#input_format_json_defaults_for_missing_elements_in_named_tuple} @@ -1099,6 +1106,12 @@ Use Parquet FIXED_LENGTH_BYTE_ARRAY type instead of Binary/String for FixedStrin Enabled by default. +### output_format_parquet_version {#output_format_parquet_version} + +The version of Parquet format used in output format. Supported versions: `1.0`, `2.4`, `2.6` and `2.latest`. + +Default value: `2.latest`. + ## Hive format settings {#hive-format-settings} ### input_format_hive_text_fields_delimiter {#input_format_hive_text_fields_delimiter} diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 1060eae1b0e..f960d2df98e 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -851,6 +851,15 @@ Result: └─────────────┴───────────┘ ``` +## log_processors_profiles {#settings-log_processors_profiles} + +Write time that processor spent during execution/waiting for data to `system.processors_profile_log` table. + +See also: + +- [`system.processors_profile_log`](../../operations/system-tables/processors_profile_log.md#system-processors_profile_log) +- [`EXPLAIN PIPELINE`](../../sql-reference/statements/explain.md#explain-pipeline) + ## max_insert_block_size {#settings-max_insert_block_size} The size of blocks (in a count of rows) to form for insertion into a table. @@ -3939,3 +3948,71 @@ Default value: `0`. :::note Use this setting only for backward compatibility if your use cases depend on old syntax. ::: + +## final {#final} + +Automatically applies [FINAL](../../sql-reference/statements/select/from/#final-modifier) modifier to all tables in a query, to tables where [FINAL](../../sql-reference/statements/select/from/#final-modifier) is applicable, including joined tables and tables in sub-queries, and +distributed tables. + +Possible values: + +- 0 - disabled +- 1 - enabled + +Default value: `0`. + +Example: + +```sql +CREATE TABLE test +( + key Int64, + some String +) +ENGINE = ReplacingMergeTree +ORDER BY key; + +INSERT INTO test FORMAT Values (1, 'first'); +INSERT INTO test FORMAT Values (1, 'second'); + +SELECT * FROM test; +┌─key─┬─some───┐ +│ 1 │ second │ +└─────┴────────┘ +┌─key─┬─some──┐ +│ 1 │ first │ +└─────┴───────┘ + +SELECT * FROM test SETTINGS final = 1; +┌─key─┬─some───┐ +│ 1 │ second │ +└─────┴────────┘ + +SET final = 1; +SELECT * FROM test; +┌─key─┬─some───┐ +│ 1 │ second │ +└─────┴────────┘ +``` + +## asterisk_include_materialized_columns {#asterisk_include_materialized_columns} + +Include [MATERIALIZED](../../sql-reference/statements/create/table/#materialized) columns for wildcard query (`SELECT *`). + +Possible values: + +- 0 - disabled +- 1 - enabled + +Default value: `0`. + +## asterisk_include_alias_columns {#asterisk_include_alias_columns} + +Include [ALIAS](../../sql-reference/statements/create/table/#alias) columns for wildcard query (`SELECT *`). + +Possible values: + +- 0 - disabled +- 1 - enabled + +Default value: `0`. diff --git a/docs/en/operations/system-tables/processors_profile_log.md b/docs/en/operations/system-tables/processors_profile_log.md new file mode 100644 index 00000000000..a2e7a9ebabd --- /dev/null +++ b/docs/en/operations/system-tables/processors_profile_log.md @@ -0,0 +1,76 @@ +# system.processors_profile_log {#system-processors_profile_log} + +This table contains profiling on processors level (that you can find in [`EXPLAIN PIPELINE`](../../sql-reference/statements/explain.md#explain-pipeline)). + +Columns: + +- `event_date` ([Date](../../sql-reference/data-types/date.md)) — The date when the event happened. +- `event_time` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — The date and time when the event happened. +- `id` ([UInt64](../../sql-reference/data-types/int-uint.md)) — ID of processor +- `parent_ids` ([Array(UInt64)](../../sql-reference/data-types/array.md)) — Parent processors IDs +- `query_id` ([String](../../sql-reference/data-types/string.md)) — ID of the query +- `name` ([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md)) — Name of the processor. +- `elapsed_us` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Number of microseconds this processor was executed. +- `input_wait_elapsed_us` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Number of microseconds this processor was waiting for data (from other processor). +- `output_wait_elapsed_us` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Number of microseconds this processor was waiting because output port was full. +- `plan_step` ([UInt64](../../sql-reference/data-types/int-uint.md)) — ID of the query plan step which created this processor. The value is zero if the processor was not added from any step. +- `plan_group` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Group of the processor if it was created by query plan step. A group is a logical partitioning of processors added from the same query plan step. Group is used only for beautifying the result of EXPLAIN PIPELINE result. +- `input_rows` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The number of rows consumed by processor. +- `input_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The number of bytes consumed by processor. +- `output_rows` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The number of rows generated by processor. +- `output_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The number of bytes generated by processor. +**Example** + +Query: + +``` sql +EXPLAIN PIPELINE +SELECT sleep(1) +┌─explain─────────────────────────┐ +│ (Expression) │ +│ ExpressionTransform │ +│ (SettingQuotaAndLimits) │ +│ (ReadFromStorage) │ +│ SourceFromSingleChunk 0 → 1 │ +└─────────────────────────────────┘ + +SELECT sleep(1) +SETTINGS log_processors_profiles = 1 +Query id: feb5ed16-1c24-4227-aa54-78c02b3b27d4 +┌─sleep(1)─┐ +│ 0 │ +└──────────┘ +1 rows in set. Elapsed: 1.018 sec. + +SELECT + name, + elapsed_us, + input_wait_elapsed_us, + output_wait_elapsed_us +FROM system.processors_profile_log +WHERE query_id = 'feb5ed16-1c24-4227-aa54-78c02b3b27d4' +ORDER BY name ASC +``` + +Result: + +``` text +┌─name────────────────────┬─elapsed_us─┬─input_wait_elapsed_us─┬─output_wait_elapsed_us─┐ +│ ExpressionTransform │ 1000497 │ 2823 │ 197 │ +│ LazyOutputFormat │ 36 │ 1002188 │ 0 │ +│ LimitsCheckingTransform │ 10 │ 1002994 │ 106 │ +│ NullSource │ 5 │ 1002074 │ 0 │ +│ NullSource │ 1 │ 1002084 │ 0 │ +│ SourceFromSingleChunk │ 45 │ 4736 │ 1000819 │ +└─────────────────────────┴────────────┴───────────────────────┴────────────────────────┘ +``` + +Here you can see: + +- `ExpressionTransform` was executing `sleep(1)` function, so it `work` will takes 1e6, and so `elapsed_us` > 1e6. +- `SourceFromSingleChunk` need to wait, because `ExpressionTransform` does not accept any data during execution of `sleep(1)`, so it will be in `PortFull` state for 1e6 us, and so `output_wait_elapsed_us` > 1e6. +- `LimitsCheckingTransform`/`NullSource`/`LazyOutputFormat` need to wait until `ExpressionTransform` will execute `sleep(1)` to process the result, so `input_wait_elapsed_us` > 1e6. + +**See Also** + +- [`EXPLAIN PIPELINE`](../../sql-reference/statements/explain.md#explain-pipeline) \ No newline at end of file diff --git a/docs/en/operations/system-tables/server_settings.md b/docs/en/operations/system-tables/server_settings.md new file mode 100644 index 00000000000..e1bf8c3d63f --- /dev/null +++ b/docs/en/operations/system-tables/server_settings.md @@ -0,0 +1,52 @@ +--- +slug: /en/operations/system-tables/server_settings +--- +# server_settings + +Contains information about global settings for the server, which were specified in `config.xml`. +Currently, the table shows only settings from the first layer of `config.xml` and doesn't support nested configs (e.g. [logger](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-logger)). + +Columns: + +- `name` ([String](../../sql-reference/data-types/string.md)) — Server setting name. +- `value` ([String](../../sql-reference/data-types/string.md)) — Server setting value. +- `default` ([String](../../sql-reference/data-types/string.md)) — Server setting default value. +- `changed` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Shows whether a setting was specified in `config.xml` +- `description` ([String](../../sql-reference/data-types/string.md)) — Short server setting description. +- `type` ([String](../../sql-reference/data-types/string.md)) — Server setting value type. + +**Example** + +The following example shows how to get information about server settings which name contains `thread_pool`. + +``` sql +SELECT * +FROM system.server_settings +WHERE name LIKE '%thread_pool%' +``` + +``` text +┌─name─────────────────────────┬─value─┬─default─┬─changed─┬─description─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┬─type───┐ +│ max_thread_pool_size │ 5000 │ 10000 │ 1 │ The maximum number of threads that could be allocated from the OS and used for query execution and background operations. │ UInt64 │ +│ max_thread_pool_free_size │ 1000 │ 1000 │ 0 │ The maximum number of threads that will always stay in a global thread pool once allocated and remain idle in case of insufficient number of tasks. │ UInt64 │ +│ thread_pool_queue_size │ 10000 │ 10000 │ 0 │ The maximum number of tasks that will be placed in a queue and wait for execution. │ UInt64 │ +│ max_io_thread_pool_size │ 100 │ 100 │ 0 │ The maximum number of threads that would be used for IO operations │ UInt64 │ +│ max_io_thread_pool_free_size │ 0 │ 0 │ 0 │ Max free size for IO thread pool. │ UInt64 │ +│ io_thread_pool_queue_size │ 10000 │ 10000 │ 0 │ Queue size for IO thread pool. │ UInt64 │ +└──────────────────────────────┴───────┴─────────┴─────────┴─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┴────────┘ +``` + +Using of `WHERE changed` can be useful, for example, when you want to check +whether settings in configuration files are loaded correctly and are in use. + + + +``` sql +SELECT * FROM system.server_settings WHERE changed AND name='max_thread_pool_size' +``` + +**See also** + +- [Settings](../../operations/system-tables/settings.md) +- [Configuration Files](../../operations/configuration-files.md) +- [Server Settings](../../operations/server-configuration-parameters/settings.md) diff --git a/docs/en/operations/system-tables/settings.md b/docs/en/operations/system-tables/settings.md index 8ce74ea3ae3..a3dfa937abe 100644 --- a/docs/en/operations/system-tables/settings.md +++ b/docs/en/operations/system-tables/settings.md @@ -16,6 +16,7 @@ Columns: - `readonly` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Shows whether the current user can change the setting: - `0` — Current user can change the setting. - `1` — Current user can’t change the setting. +- `default` ([String](../../sql-reference/data-types/string.md)) — Setting default value. **Example** diff --git a/docs/en/operations/system-tables/text_log.md b/docs/en/operations/system-tables/text_log.md index 214f8157d48..c0ddacc719c 100644 --- a/docs/en/operations/system-tables/text_log.md +++ b/docs/en/operations/system-tables/text_log.md @@ -28,6 +28,7 @@ Columns: - `revision` (UInt32) — ClickHouse revision. - `source_file` (LowCardinality(String)) — Source file from which the logging was done. - `source_line` (UInt64) — Source line from which the logging was done. +- `message_format_string` (LowCardinality(String)) — A format string that was used to format the message. **Example** @@ -51,4 +52,5 @@ message: Update period 15 seconds revision: 54440 source_file: /ClickHouse/src/Interpreters/DNSCacheUpdater.cpp; void DB::DNSCacheUpdater::start() source_line: 45 +message_format_string: Update period {} seconds ``` diff --git a/docs/en/sql-reference/aggregate-functions/reference/covarsamp.md b/docs/en/sql-reference/aggregate-functions/reference/covarsamp.md index 4d29e6f1272..bdcc6c0e3d0 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/covarsamp.md +++ b/docs/en/sql-reference/aggregate-functions/reference/covarsamp.md @@ -7,8 +7,8 @@ sidebar_position: 37 Calculates the value of `Σ((x - x̅)(y - y̅)) / (n - 1)`. -Returns Float64. When `n <= 1`, returns +∞. +Returns Float64. When `n <= 1`, returns `nan`. :::note This function uses a numerically unstable algorithm. If you need [numerical stability](https://en.wikipedia.org/wiki/Numerical_stability) in calculations, use the `covarSampStable` function. It works slower but provides a lower computational error. -::: \ No newline at end of file +::: diff --git a/docs/en/sql-reference/functions/arithmetic-functions.md b/docs/en/sql-reference/functions/arithmetic-functions.md index a4479fd8589..c5244cf62e3 100644 --- a/docs/en/sql-reference/functions/arithmetic-functions.md +++ b/docs/en/sql-reference/functions/arithmetic-functions.md @@ -48,7 +48,35 @@ When dividing by zero you get ‘inf’, ‘-inf’, or ‘nan’. ## intDiv(a, b) Calculates the quotient of the numbers. Divides into integers, rounding down (by the absolute value). -An exception is thrown when dividing by zero or when dividing a minimal negative number by minus one. + +Returns an integer of the type of the dividend (the first parameter). + +An exception is thrown when dividing by zero, when the quotient does not fit in the range of the dividend, or when dividing a minimal negative number by minus one. + +**Example** + +Query: + +```sql +SELECT + intDiv(toFloat64(1), 0.001) AS res, + toTypeName(res) +``` +```response +┌──res─┬─toTypeName(intDiv(toFloat64(1), 0.001))─┐ +│ 1000 │ Int64 │ +└──────┴─────────────────────────────────────────┘ +``` + +```sql +SELECT + intDiv(1, 0.001) AS res, + toTypeName(res) +``` +```response +Received exception from server (version 23.2.1): +Code: 153. DB::Exception: Received from localhost:9000. DB::Exception: Cannot perform integer division, because it will produce infinite or too large number: While processing intDiv(1, 0.001) AS res, toTypeName(res). (ILLEGAL_DIVISION) +``` ## intDivOrZero(a, b) diff --git a/docs/en/sql-reference/functions/math-functions.md b/docs/en/sql-reference/functions/math-functions.md index bcd118ce0be..10bc73c4a72 100644 --- a/docs/en/sql-reference/functions/math-functions.md +++ b/docs/en/sql-reference/functions/math-functions.md @@ -579,3 +579,33 @@ Result: │ 3628800 │ └───────────────┘ ``` + +## width_bucket(operand, low, high, count) + +Returns the number of the bucket in which `operand` falls in a histogram having `count` equal-width buckets spanning the range `low` to `high`. Returns `0` if `operand < low`, and returns `count+1` if `operand >= high`. + +`operand`, `low`, `high` can be any native number type. `count` can only be unsigned native integer and its value cannot be zero. + +**Syntax** + +```sql +widthBucket(operand, low, high, count) +``` + +There is also a case insensitive alias called `WIDTH_BUCKET` to provide compatibility with other databases. + +**Example** + +Query: + +``` sql +SELECT widthBucket(10.15, -8.6, 23, 18); +``` + +Result: + +``` text +┌─widthBucket(10.15, -8.6, 23, 18)─┐ +│ 11 │ +└──────────────────────────────────┘ +``` \ No newline at end of file diff --git a/docs/en/sql-reference/functions/splitting-merging-functions.md b/docs/en/sql-reference/functions/splitting-merging-functions.md index 7cad6b2fbbf..6015bb79b87 100644 --- a/docs/en/sql-reference/functions/splitting-merging-functions.md +++ b/docs/en/sql-reference/functions/splitting-merging-functions.md @@ -226,6 +226,17 @@ SELECT splitByNonAlpha(' 1! a, b. '); Concatenates string representations of values listed in the array with the separator. `separator` is an optional parameter: a constant string, set to an empty string by default. Returns the string. +**Example** + +``` sql +SELECT arrayStringConcat(['12/05/2021', '12:50:00'], ' ') AS DateString; +``` +```text +┌─DateString──────────┐ +│ 12/05/2021 12:50:00 │ +└─────────────────────┘ +``` + ## alphaTokens(s[, max_substrings]), splitByAlpha(s[, max_substrings]) Selects substrings of consecutive bytes from the ranges a-z and A-Z.Returns an array of substrings. @@ -364,4 +375,4 @@ Result: ┌─tokens────────────────────────────┐ │ ['test1','test2','test3','test4'] │ └───────────────────────────────────┘ -``` \ No newline at end of file +``` diff --git a/docs/en/sql-reference/functions/string-replace-functions.md b/docs/en/sql-reference/functions/string-replace-functions.md index d1f0e44f6b4..50e15f70f5d 100644 --- a/docs/en/sql-reference/functions/string-replace-functions.md +++ b/docs/en/sql-reference/functions/string-replace-functions.md @@ -1,7 +1,7 @@ --- slug: /en/sql-reference/functions/string-replace-functions sidebar_position: 42 -sidebar_label: For Replacing in Strings +sidebar_label: Replacing in Strings --- # Functions for Searching and Replacing in Strings diff --git a/docs/en/sql-reference/functions/string-search-functions.md b/docs/en/sql-reference/functions/string-search-functions.md index e52eb00ab44..2f660d820d1 100644 --- a/docs/en/sql-reference/functions/string-search-functions.md +++ b/docs/en/sql-reference/functions/string-search-functions.md @@ -1,7 +1,7 @@ --- slug: /en/sql-reference/functions/string-search-functions sidebar_position: 41 -sidebar_label: For Searching in Strings +sidebar_label: Searching in Strings --- # Functions for Searching in Strings @@ -382,12 +382,12 @@ Checks whether string `haystack` matches the regular expression `pattern`. The p Returns 1 in case of a match, and 0 otherwise. Matching is based on UTF-8, e.g. `.` matches the Unicode code point `¥` which is represented in UTF-8 using two bytes. The regular expression must not contain null bytes. -If the haystack or pattern contain a sequence of bytes that are not valid UTF-8, the behavior is undefined. -No automatic Unicode normalization is performed, if you need it you can use the [normalizeUTF8*()](https://clickhouse.com/docs/en/sql-reference/functions/string-functions/) functions for that. +If the haystack or the pattern are not valid UTF-8, then the behavior is undefined. +No automatic Unicode normalization is performed, you can use the [normalizeUTF8*()](https://clickhouse.com/docs/en/sql-reference/functions/string-functions/) functions for that. Unlike re2's default behavior, `.` matches line breaks. To disable this, prepend the pattern with `(?-s)`. -For patterns to search for substrings in a string, it is better to use LIKE or ‘position’, since they work much faster. +For patterns to search for substrings in a string, it is better to use functions [like](#like) or [position](#position) since they work much faster. ## multiMatchAny(haystack, \[pattern1, pattern2, …, patternn\]) @@ -529,21 +529,25 @@ Result: ## like(haystack, pattern), haystack LIKE pattern operator -Checks whether a string matches a simple regular expression. -The regular expression can contain the metasymbols `%` and `_`. +Checks whether a string matches a LIKE expression. +A LIKE expression contains a mix of normal characters and the following metasymbols: -`%` indicates any quantity of any bytes (including zero characters). +- `%` indicates an arbitrary number of arbitrary characters (including zero characters). -`_` indicates any one byte. +- `_` indicates a single arbitrary character. -Use the backslash (`\`) for escaping metasymbols. See the note on escaping in the description of the ‘match’ function. +- `\` is for escaping literals `%`, `_` and `\`. Matching is based on UTF-8, e.g. `_` matches the Unicode code point `¥` which is represented in UTF-8 using two bytes. -If the haystack or pattern contain a sequence of bytes that are not valid UTF-8, then the behavior is undefined. -No automatic Unicode normalization is performed, if you need it you can use the [normalizeUTF8*()](https://clickhouse.com/docs/en/sql-reference/functions/string-functions/) functions for that. +If the haystack or the pattern are not valid UTF-8, then the behavior is undefined. +No automatic Unicode normalization is performed, you can use the [normalizeUTF8*()](https://clickhouse.com/docs/en/sql-reference/functions/string-functions/) functions for that. -For regular expressions like `%needle%`, the code is more optimal and works as fast as the `position` function. -For other regular expressions, the code is the same as for the ‘match’ function. +To match against literals `%`, `_` and `/` (which are LIKE metacharacters), prepend them with a backslash, i.e. `\%`, `\_` and `\\`. +The backslash loses its special meaning, i.e. is interpreted literally, if it prepends a character different than `%`, `_` or `\`. +Note that ClickHouse requires backslashes in strings [to be quoted as well](../syntax.md#string), so you would actually need to write `\\%`, `\\_` and `\\\\`. + +For patterns of the form `%needle%`, the function is as fast as the `position` function. +Other LIKE expressions are internally converted to a regular expression and executed with a performance similar to function `match`. ## notLike(haystack, pattern), haystack NOT LIKE pattern operator diff --git a/docs/en/sql-reference/functions/tuple-map-functions.md b/docs/en/sql-reference/functions/tuple-map-functions.md index 1905e53af3e..4a6780df292 100644 --- a/docs/en/sql-reference/functions/tuple-map-functions.md +++ b/docs/en/sql-reference/functions/tuple-map-functions.md @@ -1,8 +1,8 @@ --- slug: /en/sql-reference/functions/tuple-map-functions sidebar_position: 46 -sidebar_label: Working with maps -title: "Functions for maps" +sidebar_label: Maps +title: "Functions for Maps" --- ## map @@ -440,7 +440,7 @@ mapApply(func, map) **Parameters** -- `func` - [Lamda function](../../sql-reference/functions/index.md#higher-order-functions---operator-and-lambdaparams-expr-function). +- `func` - [Lambda function](../../sql-reference/functions/index.md#higher-order-functions---operator-and-lambdaparams-expr-function). - `map` — [Map](../../sql-reference/data-types/map.md). **Returned value** @@ -480,7 +480,7 @@ mapFilter(func, map) **Parameters** -- `func` - [Lamda function](../../sql-reference/functions/index.md#higher-order-functions---operator-and-lambdaparams-expr-function). +- `func` - [Lambda function](../../sql-reference/functions/index.md#higher-order-functions---operator-and-lambdaparams-expr-function). - `map` — [Map](../../sql-reference/data-types/map.md). **Returned value** diff --git a/docs/en/sql-reference/functions/type-conversion-functions.md b/docs/en/sql-reference/functions/type-conversion-functions.md index e587e56b20e..5d96113fe50 100644 --- a/docs/en/sql-reference/functions/type-conversion-functions.md +++ b/docs/en/sql-reference/functions/type-conversion-functions.md @@ -6,22 +6,23 @@ sidebar_label: Type Conversion # Type Conversion Functions -## Common Issues of Numeric Conversions +## Common Issues with Data Conversion -When you convert a value from one to another data type, you should remember that if you try to fit a value from a larger data type to a smaller one (for example Int64 to Int32), or convert from one data type to another (for example `String` to `Int`), you could have data loss. Test beforehand. +Be aware of potential data loss if values of a datatype are converted to a smaller datatype (for example from `Int64` to `Int32`) or between +incompatible datatypes (for example from `String` to `Int`). Make sure to check carefully if the result is as expected. -ClickHouse has the [same behavior as C++ programs](https://en.cppreference.com/w/cpp/language/implicit_conversion). +ClickHouse generally uses the [same behavior as C++ programs](https://en.cppreference.com/w/cpp/language/implicit_conversion). ## toInt(8\|16\|32\|64\|128\|256) -Converts an input value to the [Int](/docs/en/sql-reference/data-types/int-uint.md) data type. This function family includes: +Converts an input value to a value the [Int](/docs/en/sql-reference/data-types/int-uint.md) data type. This function family includes: -- `toInt8(expr)` — Results in the `Int8` data type. -- `toInt16(expr)` — Results in the `Int16` data type. -- `toInt32(expr)` — Results in the `Int32` data type. -- `toInt64(expr)` — Results in the `Int64` data type. -- `toInt128(expr)` — Results in the `Int128` data type. -- `toInt256(expr)` — Results in the `Int256` data type. +- `toInt8(expr)` — Converts to a value of data type `Int8`. +- `toInt16(expr)` — Converts to a value of data type `Int16`. +- `toInt32(expr)` — Converts to a value of data type `Int32`. +- `toInt64(expr)` — Converts to a value of data type `Int64`. +- `toInt128(expr)` — Converts to a value of data type `Int128`. +- `toInt256(expr)` — Converts to a value of data type `Int256`. **Arguments** @@ -53,7 +54,7 @@ Result: ## toInt(8\|16\|32\|64\|128\|256)OrZero -It takes an argument of type String and tries to parse it into Int (8 \| 16 \| 32 \| 64 \| 128 \| 256). If failed, returns 0. +Takes an argument of type [String](/docs/en/sql-reference/data-types/string.md) and tries to parse it into an Int (8 \| 16 \| 32 \| 64 \| 128 \| 256). If unsuccessful, returns `0`. **Example** @@ -73,7 +74,7 @@ Result: ## toInt(8\|16\|32\|64\|128\|256)OrNull -It takes an argument of type String and tries to parse it into Int (8 \| 16 \| 32 \| 64 \| 128 \| 256). If failed, returns NULL. +It takes an argument of type String and tries to parse it into Int (8 \| 16 \| 32 \| 64 \| 128 \| 256). If unsuccessful, returns `NULL`. **Example** @@ -93,7 +94,7 @@ Result: ## toInt(8\|16\|32\|64\|128\|256)OrDefault -It takes an argument of type String and tries to parse it into Int (8 \| 16 \| 32 \| 64 \| 128 \| 256). If failed, returns the default type value. +It takes an argument of type String and tries to parse it into Int (8 \| 16 \| 32 \| 64 \| 128 \| 256). If unsuccessful, returns the default type value. **Example** @@ -116,11 +117,11 @@ Result: Converts an input value to the [UInt](/docs/en/sql-reference/data-types/int-uint.md) data type. This function family includes: -- `toUInt8(expr)` — Results in the `UInt8` data type. -- `toUInt16(expr)` — Results in the `UInt16` data type. -- `toUInt32(expr)` — Results in the `UInt32` data type. -- `toUInt64(expr)` — Results in the `UInt64` data type. -- `toUInt256(expr)` — Results in the `UInt256` data type. +- `toUInt8(expr)` — Converts to a value of data type `UInt8`. +- `toUInt16(expr)` — Converts to a value of data type `UInt16`. +- `toUInt32(expr)` — Converts to a value of data type `UInt32`. +- `toUInt64(expr)` — Converts to a value of data type `UInt64`. +- `toUInt256(expr)` — Converts to a value of data type `UInt256`. **Arguments** @@ -128,7 +129,7 @@ Converts an input value to the [UInt](/docs/en/sql-reference/data-types/int-uint **Returned value** -Integer value in the `UInt8`, `UInt16`, `UInt32`, `UInt64` or `UInt256` data type. +- Integer value in the `UInt8`, `UInt16`, `UInt32`, `UInt64` or `UInt256` data type. Functions use [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning they truncate fractional digits of numbers. @@ -166,26 +167,30 @@ Result: ## toDate -Converts the argument to `Date` data type. +Converts the argument to [Date](/docs/en/sql-reference/data-types/date.md) data type. + +If the argument is [DateTime](/docs/en/sql-reference/data-types/datetime.md) or [DateTime64](/docs/en/sql-reference/data-types/datetime64.md), it truncates it and leaves the date component of the DateTime: -If the argument is `DateTime` or `DateTime64`, it truncates it, leaving the date component of the DateTime: ```sql SELECT now() AS x, toDate(x) ``` + ```response ┌───────────────────x─┬─toDate(now())─┐ │ 2022-12-30 13:44:17 │ 2022-12-30 │ └─────────────────────┴───────────────┘ ``` -If the argument is a string, it is parsed as Date or DateTime. If it was parsed as DateTime, the date component is being used: +If the argument is a [String](/docs/en/sql-reference/data-types/string.md), it is parsed as [Date](/docs/en/sql-reference/data-types/date.md) or [DateTime](/docs/en/sql-reference/data-types/datetime.md). If it was parsed as [DateTime](/docs/en/sql-reference/data-types/datetime.md), the date component is being used: + ```sql SELECT toDate('2022-12-30') AS x, toTypeName(x) ``` + ```response ┌──────────x─┬─toTypeName(toDate('2022-12-30'))─┐ │ 2022-12-30 │ Date │ @@ -193,18 +198,20 @@ SELECT 1 row in set. Elapsed: 0.001 sec. ``` + ```sql SELECT toDate('2022-12-30 01:02:03') AS x, toTypeName(x) ``` + ```response ┌──────────x─┬─toTypeName(toDate('2022-12-30 01:02:03'))─┐ │ 2022-12-30 │ Date │ └────────────┴───────────────────────────────────────────┘ ``` -If the argument is a number and it looks like a UNIX timestamp (is greater than 65535), it is interpreted as a DateTime, then truncated to Date in the current timezone. The timezone argument can be specified as a second argument of the function. The truncation to Date depends on the timezone: +If the argument is a number and looks like a UNIX timestamp (is greater than 65535), it is interpreted as a [DateTime](/docs/en/sql-reference/data-types/datetime.md), then truncated to [Date](/docs/en/sql-reference/data-types/date.md) in the current timezone. The timezone argument can be specified as a second argument of the function. The truncation to [Date](/docs/en/sql-reference/data-types/date.md) depends on the timezone: ```sql SELECT @@ -217,6 +224,7 @@ SELECT toDate(ts) AS date_Amsterdam_2, toDate(ts, 'Pacific/Apia') AS date_Samoa_2 ``` + ```response Row 1: ────── @@ -232,7 +240,7 @@ date_Samoa_2: 2022-12-31 The example above demonstrates how the same UNIX timestamp can be interpreted as different dates in different time zones. -If the argument is a number and it is smaller than 65536, it is interpreted as the number of days since 1970-01-01 (a UNIX day) and converted to Date. It corresponds to the internal numeric representation of the `Date` data type. Example: +If the argument is a number and it is smaller than 65536, it is interpreted as the number of days since 1970-01-01 (the first UNIX day) and converted to [Date](/docs/en/sql-reference/data-types/date.md). It corresponds to the internal numeric representation of the `Date` data type. Example: ```sql SELECT toDate(12345) @@ -270,8 +278,6 @@ SELECT └─────────────────────┴───────────────┴─────────────┴─────────────────────┘ ``` -Have a nice day working with dates and times. - ## toDateOrZero ## toDateOrNull @@ -288,7 +294,7 @@ Have a nice day working with dates and times. ## toDate32 -Converts the argument to the [Date32](/docs/en/sql-reference/data-types/date32.md) data type. If the value is outside the range, `toDate32` returns the border values supported by `Date32`. If the argument has [Date](/docs/en/sql-reference/data-types/date.md) type, borders of `Date` are taken into account. +Converts the argument to the [Date32](/docs/en/sql-reference/data-types/date32.md) data type. If the value is outside the range, `toDate32` returns the border values supported by [Date32](/docs/en/sql-reference/data-types/date32.md). If the argument has [Date](/docs/en/sql-reference/data-types/date.md) type, it's borders are taken into account. **Syntax** @@ -302,9 +308,7 @@ toDate32(expr) **Returned value** -- A calendar date. - -Type: [Date32](/docs/en/sql-reference/data-types/date32.md). +- A calendar date. Type [Date32](/docs/en/sql-reference/data-types/date32.md). **Example** @@ -332,7 +336,7 @@ SELECT toDate32('1899-01-01') AS value, toTypeName(value); └────────────┴────────────────────────────────────┘ ``` -3. With `Date`-type argument: +3. With [Date](/docs/en/sql-reference/data-types/date.md) argument: ``` sql SELECT toDate32(toDate('1899-01-01')) AS value, toTypeName(value); @@ -386,7 +390,7 @@ Result: ## toDate32OrDefault -Converts the argument to the [Date32](/docs/en/sql-reference/data-types/date32.md) data type. If the value is outside the range, `toDate32OrDefault` returns the lower border value supported by `Date32`. If the argument has [Date](/docs/en/sql-reference/data-types/date.md) type, borders of `Date` are taken into account. Returns default value if an invalid argument is received. +Converts the argument to the [Date32](/docs/en/sql-reference/data-types/date32.md) data type. If the value is outside the range, `toDate32OrDefault` returns the lower border value supported by [Date32](/docs/en/sql-reference/data-types/date32.md). If the argument has [Date](/docs/en/sql-reference/data-types/date.md) type, it's borders are taken into account. Returns default value if an invalid argument is received. **Example** @@ -666,7 +670,7 @@ YYYY-MM-DD YYYY-MM-DD hh:mm:ss ``` -As an exception, if converting from UInt32, Int32, UInt64, or Int64 numeric types to Date, and if the number is greater than or equal to 65536, the number is interpreted as a Unix timestamp (and not as the number of days) and is rounded to the date. This allows support for the common occurrence of writing ‘toDate(unix_timestamp)’, which otherwise would be an error and would require writing the more cumbersome ‘toDate(toDateTime(unix_timestamp))’. +As an exception, if converting from UInt32, Int32, UInt64, or Int64 numeric types to Date, and if the number is greater than or equal to 65536, the number is interpreted as a Unix timestamp (and not as the number of days) and is rounded to the date. This allows support for the common occurrence of writing `toDate(unix_timestamp)`, which otherwise would be an error and would require writing the more cumbersome `toDate(toDateTime(unix_timestamp))`. Conversion between a date and a date with time is performed the natural way: by adding a null time or dropping the time. @@ -696,7 +700,7 @@ Also see the `toUnixTimestamp` function. ## toFixedString(s, N) -Converts a String type argument to a FixedString(N) type (a string with fixed length N). N must be a constant. +Converts a [String](/docs/en/sql-reference/data-types/string.md) type argument to a [FixedString(N)](/docs/en/sql-reference/data-types/fixedstring.md) type (a string of fixed length N). If the string has fewer bytes than N, it is padded with null bytes to the right. If the string has more bytes than N, an exception is thrown. ## toStringCutToZero(s) @@ -914,7 +918,7 @@ Result: └─────────────────────┴─────────────────────┴────────────┴─────────────────────┴───────────────────────────┘ ``` -Conversion to FixedString(N) only works for arguments of type [String](/docs/en/sql-reference/data-types/string.md) or [FixedString](/docs/en/sql-reference/data-types/fixedstring.md). +Conversion to [FixedString (N)](/docs/en/sql-reference/data-types/fixedstring.md) only works for arguments of type [String](/docs/en/sql-reference/data-types/string.md) or [FixedString](/docs/en/sql-reference/data-types/fixedstring.md). Type conversion to [Nullable](/docs/en/sql-reference/data-types/nullable.md) and back is supported. @@ -1174,7 +1178,7 @@ For all of the formats with separator the function parses months names expressed **Returned value** -- `time_string` converted to the `DateTime` data type. +- `time_string` converted to the [DateTime](/docs/en/sql-reference/data-types/datetime.md) data type. **Examples** @@ -1254,10 +1258,10 @@ Result: **See Also** -- [ISO 8601 announcement by @xkcd](https://xkcd.com/1179/) - [RFC 1123](https://tools.ietf.org/html/rfc1123) - [toDate](#todate) - [toDateTime](#todatetime) +- [ISO 8601 announcement by @xkcd](https://xkcd.com/1179/) ## parseDateTimeBestEffortUS diff --git a/docs/en/sql-reference/statements/create/settings-profile.md b/docs/en/sql-reference/statements/create/settings-profile.md index c4ca89f3284..c2424ff6046 100644 --- a/docs/en/sql-reference/statements/create/settings-profile.md +++ b/docs/en/sql-reference/statements/create/settings-profile.md @@ -19,8 +19,15 @@ CREATE SETTINGS PROFILE [IF NOT EXISTS | OR REPLACE] name1 [ON CLUSTER cluster_n ## Example +Create a user: +```sql +CREATE USER robin IDENTIFIED BY 'password'; +``` + Create the `max_memory_usage_profile` settings profile with value and constraints for the `max_memory_usage` setting and assign it to user `robin`: ``` sql -CREATE SETTINGS PROFILE max_memory_usage_profile SETTINGS max_memory_usage = 100000001 MIN 90000000 MAX 110000000 TO robin +CREATE +SETTINGS PROFILE max_memory_usage_profile SETTINGS max_memory_usage = 100000001 MIN 90000000 MAX 110000000 +TO robin ``` diff --git a/docs/en/sql-reference/statements/create/table.md b/docs/en/sql-reference/statements/create/table.md index 119f25d6d00..54977e1b0ab 100644 --- a/docs/en/sql-reference/statements/create/table.md +++ b/docs/en/sql-reference/statements/create/table.md @@ -17,10 +17,11 @@ By default, tables are created only on the current server. Distributed DDL queri ``` sql CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] ( - name1 [type1] [NULL|NOT NULL] [DEFAULT|MATERIALIZED|EPHEMERAL|ALIAS expr1] [compression_codec] [TTL expr1], - name2 [type2] [NULL|NOT NULL] [DEFAULT|MATERIALIZED|EPHEMERAL|ALIAS expr2] [compression_codec] [TTL expr2], + name1 [type1] [NULL|NOT NULL] [DEFAULT|MATERIALIZED|EPHEMERAL|ALIAS expr1] [compression_codec] [TTL expr1] [COMMENT 'comment for column'], + name2 [type2] [NULL|NOT NULL] [DEFAULT|MATERIALIZED|EPHEMERAL|ALIAS expr2] [compression_codec] [TTL expr2] [COMMENT 'comment for column'], ... ) ENGINE = engine + COMMENT 'comment for table' ``` Creates a table named `table_name` in the `db` database or the current database if `db` is not set, with the structure specified in brackets and the `engine` engine. @@ -32,6 +33,8 @@ Expressions can also be defined for default values (see below). If necessary, primary key can be specified, with one or more key expressions. +Comments can be added for columns and for the table. + ### With a Schema Similar to Other Table ``` sql @@ -127,6 +130,26 @@ Default expressions may be defined as an arbitrary expression from table constan Normal default value. If the INSERT query does not specify the corresponding column, it will be filled in by computing the corresponding expression. +Example: + +```sql +CREATE OR REPLACE TABLE test +( + id UInt64, + updated_at DateTime DEFAULT now(), + updated_at_date Date DEFAULT toDate(updated_at) +) +ENGINE = MergeTree +ORDER BY id; + +INSERT INTO test (id) Values (1); + +SELECT * FROM test; +┌─id─┬──────────updated_at─┬─updated_at_date─┐ +│ 1 │ 2023-02-24 17:06:46 │ 2023-02-24 │ +└────┴─────────────────────┴─────────────────┘ +``` + ### MATERIALIZED `MATERIALIZED expr` @@ -135,6 +158,36 @@ Materialized expression. Such a column can’t be specified for INSERT, because For an INSERT without a list of columns, these columns are not considered. In addition, this column is not substituted when using an asterisk in a SELECT query. This is to preserve the invariant that the dump obtained using `SELECT *` can be inserted back into the table using INSERT without specifying the list of columns. +Example: + +```sql +CREATE OR REPLACE TABLE test +( + id UInt64, + updated_at DateTime MATERIALIZED now(), + updated_at_date Date MATERIALIZED toDate(updated_at) +) +ENGINE = MergeTree +ORDER BY id; + +INSERT INTO test Values (1); + +SELECT * FROM test; +┌─id─┐ +│ 1 │ +└────┘ + +SELECT id, updated_at, updated_at_date FROM test; +┌─id─┬──────────updated_at─┬─updated_at_date─┐ +│ 1 │ 2023-02-24 17:08:08 │ 2023-02-24 │ +└────┴─────────────────────┴─────────────────┘ + +SELECT * FROM test SETTINGS asterisk_include_materialized_columns=1; +┌─id─┬──────────updated_at─┬─updated_at_date─┐ +│ 1 │ 2023-02-24 17:08:08 │ 2023-02-24 │ +└────┴─────────────────────┴─────────────────┘ +``` + ### EPHEMERAL `EPHEMERAL [expr]` @@ -142,6 +195,34 @@ In addition, this column is not substituted when using an asterisk in a SELECT q Ephemeral column. Such a column isn't stored in the table and cannot be SELECTed, but can be referenced in the defaults of CREATE statement. If `expr` is omitted type for column is required. INSERT without list of columns will skip such column, so SELECT/INSERT invariant is preserved - the dump obtained using `SELECT *` can be inserted back into the table using INSERT without specifying the list of columns. +Example: + +```sql +CREATE OR REPLACE TABLE test +( + id UInt64, + unhexed String EPHEMERAL, + hexed FixedString(4) DEFAULT unhex(unhexed) +) +ENGINE = MergeTree +ORDER BY id + +INSERT INTO test (id, unhexed) Values (1, '5a90b714'); + +SELECT + id, + hexed, + hex(hexed) +FROM test +FORMAT Vertical; + +Row 1: +────── +id: 1 +hexed: Z�� +hex(hexed): 5A90B714 +``` + ### ALIAS `ALIAS expr` @@ -156,6 +237,29 @@ If you add a new column to a table but later change its default expression, the It is not possible to set default values for elements in nested data structures. +```sql +CREATE OR REPLACE TABLE test +( + id UInt64, + size_bytes Int64, + size String Alias formatReadableSize(size_bytes) +) +ENGINE = MergeTree +ORDER BY id; + +INSERT INTO test Values (1, 4678899); + +SELECT id, size_bytes, size FROM test; +┌─id─┬─size_bytes─┬─size─────┐ +│ 1 │ 4678899 │ 4.46 MiB │ +└────┴────────────┴──────────┘ + +SELECT * FROM test SETTINGS asterisk_include_alias_columns=1; +┌─id─┬─size_bytes─┬─size─────┐ +│ 1 │ 4678899 │ 4.46 MiB │ +└────┴────────────┴──────────┘ +``` + ## Primary Key You can define a [primary key](../../../engines/table-engines/mergetree-family/mergetree.md#primary-keys-and-indexes-in-queries) when creating a table. Primary key can be specified in two ways: @@ -166,7 +270,7 @@ You can define a [primary key](../../../engines/table-engines/mergetree-family/m CREATE TABLE db.table_name ( name1 type1, name2 type2, ..., - PRIMARY KEY(expr1[, expr2,...])] + PRIMARY KEY(expr1[, expr2,...]) ) ENGINE = engine; ``` diff --git a/docs/en/sql-reference/statements/create/view.md b/docs/en/sql-reference/statements/create/view.md index 527b31b36a4..acdede3c673 100644 --- a/docs/en/sql-reference/statements/create/view.md +++ b/docs/en/sql-reference/statements/create/view.md @@ -54,6 +54,10 @@ SELECT * FROM view(column1=value1, column2=value2 ...) CREATE MATERIALIZED VIEW [IF NOT EXISTS] [db.]table_name [ON CLUSTER] [TO[db.]name] [ENGINE = engine] [POPULATE] AS SELECT ... ``` +:::tip +Here is a step by step guide on using [Materialized views](docs/en/guides/developer/cascading-materialized-views.md). +::: + Materialized views store data transformed by the corresponding [SELECT](../../../sql-reference/statements/select/index.md) query. When creating a materialized view without `TO [db].[table]`, you must specify `ENGINE` – the table engine for storing data. diff --git a/docs/en/sql-reference/statements/insert-into.md b/docs/en/sql-reference/statements/insert-into.md index 609740dec56..03a4ab3453c 100644 --- a/docs/en/sql-reference/statements/insert-into.md +++ b/docs/en/sql-reference/statements/insert-into.md @@ -95,7 +95,7 @@ You can insert data separately from the query by using the command-line client o If table has [constraints](../../sql-reference/statements/create/table.md#constraints), their expressions will be checked for each row of inserted data. If any of those constraints is not satisfied — server will raise an exception containing constraint name and expression, the query will be stopped. -## Inserting the Results of `SELECT` +## Inserting the Results of SELECT **Syntax** diff --git a/docs/en/sql-reference/statements/select/from.md b/docs/en/sql-reference/statements/select/from.md index b751384cb72..fb6c1f94902 100644 --- a/docs/en/sql-reference/statements/select/from.md +++ b/docs/en/sql-reference/statements/select/from.md @@ -36,6 +36,8 @@ Queries that use `FINAL` are executed slightly slower than similar queries that **In most cases, avoid using `FINAL`.** The common approach is to use different queries that assume the background processes of the `MergeTree` engine have’t happened yet and deal with it by applying aggregation (for example, to discard duplicates). +`FINAL` can be applied automatically using [FINAL](../../../operations/settings/settings.md#final) setting to all tables in a query using a session or a user profile. + ## Implementation Details If the `FROM` clause is omitted, data will be read from the `system.one` table. diff --git a/docs/en/sql-reference/syntax.md b/docs/en/sql-reference/syntax.md index 4c2054274e4..63c5042f9e8 100644 --- a/docs/en/sql-reference/syntax.md +++ b/docs/en/sql-reference/syntax.md @@ -83,9 +83,13 @@ Examples: `1`, `10_000_000`, `0xffff_ffff`, `18446744073709551615`, `0xDEADBEEF` ### String -Only string literals in single quotes are supported. The enclosed characters can be backslash-escaped. The following escape sequences have a corresponding special value: `\b`, `\f`, `\r`, `\n`, `\t`, `\0`, `\a`, `\v`, `\xHH`. In all other cases, escape sequences in the format `\c`, where `c` is any character, are converted to `c`. It means that you can use the sequences `\'`and`\\`. The value will have the [String](../sql-reference/data-types/string.md) type. +String literals must be enclosed in single quotes, double quotes are not supported. +Escaping works either -In string literals, you need to escape at least `'` and `\`. Single quotes can be escaped with the single quote, literals `'It\'s'` and `'It''s'` are equal. +- using a preceding single quote where the single-quote character `'` (and only this character) can be escaped as `''`, or +- using a preceding backslash with the following supported escape sequences: `\\`, `\'`, `\b`, `\f`, `\r`, `\n`, `\t`, `\0`, `\a`, `\v`, `\xHH`. The backslash loses its special meaning, i.e. will be interpreted literally, if it precedes characters different than the listed ones. + +In string literals, you need to escape at least `'` and `\` using escape codes `\'` (or: `''`) and `\\`. ### Compound diff --git a/docs/en/sql-reference/table-functions/format.md b/docs/en/sql-reference/table-functions/format.md index 3af48249e3c..811eae12942 100644 --- a/docs/en/sql-reference/table-functions/format.md +++ b/docs/en/sql-reference/table-functions/format.md @@ -6,25 +6,28 @@ sidebar_label: format # format -Extracts table structure from data and parses it according to specified input format. +Parses data from arguments according to specified input format. If structure argument is not specified, it's extracted from the data. **Syntax** ``` sql -format(format_name, data) +format(format_name, [structure], data) ``` **Parameters** - `format_name` — The [format](../../interfaces/formats.md#formats) of the data. +- `structure` - Structure of the table. Optional. Format 'column1_name column1_type, column2_name column2_type, ...'. - `data` — String literal or constant expression that returns a string containing data in specified format **Returned value** -A table with data parsed from `data` argument according specified format and extracted schema. +A table with data parsed from `data` argument according to specified format and specified or extracted structure. **Examples** +Without `structure` argument: + **Query:** ``` sql SELECT * FROM format(JSONEachRow, @@ -67,6 +70,29 @@ $$) └──────┴───────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ ``` +With `structure` argument: + +**Query:** +```sql +SELECT * FROM format(JSONEachRow, 'a String, b UInt32', +$$ +{"a": "Hello", "b": 111} +{"a": "World", "b": 123} +{"a": "Hello", "b": 112} +{"a": "World", "b": 124} +$$) +``` + +**Result:** +```response +┌─a─────┬───b─┐ +│ Hello │ 111 │ +│ World │ 123 │ +│ Hello │ 112 │ +│ World │ 124 │ +└───────┴─────┘ +``` + **See Also** - [Formats](../../interfaces/formats.md) diff --git a/docs/en/sql-reference/table-functions/iceberg.md b/docs/en/sql-reference/table-functions/iceberg.md new file mode 100644 index 00000000000..fda4d274005 --- /dev/null +++ b/docs/en/sql-reference/table-functions/iceberg.md @@ -0,0 +1,58 @@ +--- +slug: /en/sql-reference/table-functions/iceberg +sidebar_label: Iceberg +--- + +# iceberg Table Function + +Provides a read-only table-like interface to Apache [Iceberg](https://iceberg.apache.org/) tables in Amazon S3. + +## Syntax + +``` sql +iceberg(url [,aws_access_key_id, aws_secret_access_key] [,format] [,structure]) +``` + +## Arguments + +- `url` — Bucket url with the path to an existing Iceberg table in S3. +- `aws_access_key_id`, `aws_secret_access_key` - Long-term credentials for the [AWS](https://aws.amazon.com/) account user. You can use these to authenticate your requests. These parameters are optional. If credentials are not specified, they are used from the ClickHouse configuration. For more information see [Using S3 for Data Storage](/docs/en/engines/table-engines/mergetree-family/mergetree.md/#table_engine-mergetree-s3). +- `format` — The [format](/docs/en/interfaces/formats.md/#formats) of the file. By default `Parquet` is used. +- `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`. + +Engine parameters can be specified using [Named Collections](../../operations/named-collections.md) + +**Returned value** + +A table with the specified structure for reading data in the specified Iceberg table in S3. + +**Example** + +```sql +SELECT * FROM iceberg('http://test.s3.amazonaws.com/clickhouse-bucket/test_table', 'test', 'test') +``` + +Using named collections: + +```xml + + + + http://test.s3.amazonaws.com/clickhouse-bucket/ + test + test + auto + auto + + + +``` + +```sql +SELECT * FROM iceberg(iceberg_conf, filename = 'test_table') +DESCRIBE iceberg(iceberg_conf, filename = 'test_table') +``` + +**See Also** + +- [Iceberg engine](/docs/en/engines/table-engines/integrations/iceberg.md) diff --git a/docs/en/sql-reference/table-functions/index.md b/docs/en/sql-reference/table-functions/index.md index 94b23bc695c..b49c2f8da20 100644 --- a/docs/en/sql-reference/table-functions/index.md +++ b/docs/en/sql-reference/table-functions/index.md @@ -23,23 +23,3 @@ You can use table functions in: :::warning You can’t use table functions if the [allow_ddl](../../operations/settings/permissions-for-queries.md#settings_allow_ddl) setting is disabled. ::: - -| Function | Description | -|------------------------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------| -| [file](../../sql-reference/table-functions/file.md) | Creates a [File](../../engines/table-engines/special/file.md)-engine table. | -| [merge](../../sql-reference/table-functions/merge.md) | Creates a [Merge](../../engines/table-engines/special/merge.md)-engine table. | -| [numbers](../../sql-reference/table-functions/numbers.md) | Creates a table with a single column filled with integer numbers. | -| [remote](../../sql-reference/table-functions/remote.md) | Allows you to access remote servers without creating a [Distributed](../../engines/table-engines/special/distributed.md)-engine table. | -| [url](../../sql-reference/table-functions/url.md) | Creates a [Url](../../engines/table-engines/special/url.md)-engine table. | -| [mysql](../../sql-reference/table-functions/mysql.md) | Creates a [MySQL](../../engines/table-engines/integrations/mysql.md)-engine table. | -| [postgresql](../../sql-reference/table-functions/postgresql.md) | Creates a [PostgreSQL](../../engines/table-engines/integrations/postgresql.md)-engine table. | -| [jdbc](../../sql-reference/table-functions/jdbc.md) | Creates a [JDBC](../../engines/table-engines/integrations/jdbc.md)-engine table. | -| [odbc](../../sql-reference/table-functions/odbc.md) | Creates a [ODBC](../../engines/table-engines/integrations/odbc.md)-engine table. | -| [hdfs](../../sql-reference/table-functions/hdfs.md) | Creates a [HDFS](../../engines/table-engines/integrations/hdfs.md)-engine table. | -| [s3](../../sql-reference/table-functions/s3.md) | Creates a [S3](../../engines/table-engines/integrations/s3.md)-engine table. | -| [sqlite](../../sql-reference/table-functions/sqlite.md) | Creates a [sqlite](../../engines/table-engines/integrations/sqlite.md)-engine table. | - -:::note -Only these table functions are enabled in readonly mode : -null, view, viewIfPermitted, numbers, numbers_mt, generateRandom, values, cluster, clusterAllReplicas -::: \ No newline at end of file diff --git a/docs/en/sql-reference/window-functions/index.md b/docs/en/sql-reference/window-functions/index.md index a8186c20026..59d49830852 100644 --- a/docs/en/sql-reference/window-functions/index.md +++ b/docs/en/sql-reference/window-functions/index.md @@ -567,7 +567,7 @@ SELECT ts, value, round(avg(value) OVER (PARTITION BY metric ORDER BY toDate(ts) - Range BETWEEN 10 PRECEDING AND CURRENT ROW),2) moving_avg_10_days_temp + Range BETWEEN 10 PRECEDING AND CURRENT ROW),2) AS moving_avg_10_days_temp FROM sensors ORDER BY metric ASC, diff --git a/docs/ru/operations/system-tables/server_settings.md b/docs/ru/operations/system-tables/server_settings.md new file mode 100644 index 00000000000..4c0f4af1da2 --- /dev/null +++ b/docs/ru/operations/system-tables/server_settings.md @@ -0,0 +1,53 @@ +--- +slug: /ru/operations/system-tables/server_settings +--- +# system.server_settings + +Содержит информацию о конфигурации сервера. +В настоящий момент таблица содержит только верхнеуровневые параметры из файла `config.xml` и не поддерживает вложенные конфигурации +(например [logger](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-logger)) + +Столбцы: + +- `name` ([String](../../sql-reference/data-types/string.md)) — имя настройки. +- `value` ([String](../../sql-reference/data-types/string.md)) — значение настройки. +- `default` ([String](../../sql-reference/data-types/string.md)) — значению настройки по умолчанию. +- `changed` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — показывает, была ли настройка указана в `config.xml` или является значением по-умолчанию. +- `description` ([String](../../sql-reference/data-types/string.md)) — краткое описание настройки. +- `type` ([String](../../sql-reference/data-types/string.md)) — тип настройки. + +**Пример** + +Пример показывает как получить информацию о настройках, имена которых содержат `thread_pool`. + +``` sql +SELECT * +FROM system.server_settings +WHERE name LIKE '%thread_pool%' +``` + +``` text +┌─name─────────────────────────┬─value─┬─default─┬─changed─┬─description─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┬─type───┐ +│ max_thread_pool_size │ 5000 │ 10000 │ 1 │ The maximum number of threads that could be allocated from the OS and used for query execution and background operations. │ UInt64 │ +│ max_thread_pool_free_size │ 1000 │ 1000 │ 0 │ The maximum number of threads that will always stay in a global thread pool once allocated and remain idle in case of insufficient number of tasks. │ UInt64 │ +│ thread_pool_queue_size │ 10000 │ 10000 │ 0 │ The maximum number of tasks that will be placed in a queue and wait for execution. │ UInt64 │ +│ max_io_thread_pool_size │ 100 │ 100 │ 0 │ The maximum number of threads that would be used for IO operations │ UInt64 │ +│ max_io_thread_pool_free_size │ 0 │ 0 │ 0 │ Max free size for IO thread pool. │ UInt64 │ +│ io_thread_pool_queue_size │ 10000 │ 10000 │ 0 │ Queue size for IO thread pool. │ UInt64 │ +└──────────────────────────────┴───────┴─────────┴─────────┴─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┴────────┘ +``` + +Использование `WHERE changed` может быть полезно, например, если необходимо проверить, +что настройки корректно загрузились из конфигурационного файла и используются. + + + +``` sql +SELECT * FROM system.settings WHERE changed AND name='max_thread_pool_size' +``` + +**Cм. также** + +- [Настройки](../../operations/system-tables/settings.md) +- [Конфигурационные файлы](../../operations/configuration-files.md) +- [Настройки сервера](../../operations/server-configuration-parameters/settings.md) diff --git a/docs/ru/operations/system-tables/settings.md b/docs/ru/operations/system-tables/settings.md index 09802415054..31eb77d4d41 100644 --- a/docs/ru/operations/system-tables/settings.md +++ b/docs/ru/operations/system-tables/settings.md @@ -16,6 +16,7 @@ slug: /ru/operations/system-tables/settings - `readonly` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Показывает, может ли пользователь изменять настройку: - `0` — Текущий пользователь может изменять настройку. - `1` — Текущий пользователь не может изменять настройку. +- `default` ([String](../../sql-reference/data-types/string.md)) — значению настройки по умолчанию. **Пример** diff --git a/docs/ru/operations/system-tables/text_log.md b/docs/ru/operations/system-tables/text_log.md index 69c52471834..59ae804d85f 100644 --- a/docs/ru/operations/system-tables/text_log.md +++ b/docs/ru/operations/system-tables/text_log.md @@ -28,6 +28,7 @@ slug: /ru/operations/system-tables/text_log - `revision` (UInt32) — ревизия ClickHouse. - `source_file` (LowCardinality(String)) — исходный файл, из которого была сделана запись. - `source_line` (UInt64) — исходная строка, из которой была сделана запись. +- `message_format_string` (LowCardinality(String)) — форматная строка, с помощью которой было отформатировано сообщение. **Пример** @@ -51,4 +52,5 @@ message: Update period 15 seconds revision: 54440 source_file: /ClickHouse/src/Interpreters/DNSCacheUpdater.cpp; void DB::DNSCacheUpdater::start() source_line: 45 +message_format_string: Update period {} seconds ``` diff --git a/packages/clickhouse-server.service b/packages/clickhouse-server.service index 037be826b97..090461df988 100644 --- a/packages/clickhouse-server.service +++ b/packages/clickhouse-server.service @@ -18,12 +18,14 @@ Group=clickhouse Restart=always RestartSec=30 # Since ClickHouse is systemd aware default 1m30sec may not be enough -TimeoutStartSec=inifinity +TimeoutStartSec=infinity # %p is resolved to the systemd unit name RuntimeDirectory=%p ExecStart=/usr/bin/clickhouse-server --config=/etc/clickhouse-server/config.xml --pid-file=%t/%p/%p.pid # Minus means that this file is optional. EnvironmentFile=-/etc/default/%p +# Bring back /etc/default/clickhouse for backward compatibility +EnvironmentFile=-/etc/default/clickhouse LimitCORE=infinity LimitNOFILE=500000 CapabilityBoundingSet=CAP_NET_ADMIN CAP_IPC_LOCK CAP_SYS_NICE CAP_NET_BIND_SERVICE diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index 5674923fba5..66dff8ec050 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -327,7 +327,21 @@ try showClientVersion(); } - connect(); + try + { + connect(); + } + catch (const Exception & e) + { + if (e.code() != DB::ErrorCodes::AUTHENTICATION_FAILED || + config().has("password") || + config().getBool("ask-password", false) || + !is_interactive) + throw; + + config().setBool("ask-password", true); + connect(); + } /// Show warnings at the beginning of connection. if (is_interactive && !config().has("no-warnings")) diff --git a/programs/copier/ClusterCopier.cpp b/programs/copier/ClusterCopier.cpp index 48a3578dd7b..bc882719a08 100644 --- a/programs/copier/ClusterCopier.cpp +++ b/programs/copier/ClusterCopier.cpp @@ -908,7 +908,7 @@ bool ClusterCopier::tryProcessTable(const ConnectionTimeouts & timeouts, TaskTab /// Exit if success if (task_status != TaskStatus::Finished) { - LOG_WARNING(log, "Create destination Tale Failed "); + LOG_WARNING(log, "Create destination table failed "); return false; } @@ -1473,7 +1473,7 @@ TaskStatus ClusterCopier::processPartitionPieceTaskImpl( if (count != 0) { - LOG_INFO(log, "Partition {} piece {}is not empty. In contains {} rows.", task_partition.name, current_piece_number, count); + LOG_INFO(log, "Partition {} piece {} is not empty. In contains {} rows.", task_partition.name, current_piece_number, count); Coordination::Stat stat_shards{}; zookeeper->get(partition_piece.getPartitionPieceShardsPath(), &stat_shards); diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index 9157dd5217f..fe2b758d371 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -1,5 +1,8 @@ #include "LocalServer.h" +#include +#include +#include #include #include #include @@ -179,9 +182,9 @@ void LocalServer::tryInitPath() parent_folder = std::filesystem::temp_directory_path(); } - catch (const fs::filesystem_error& e) + catch (const fs::filesystem_error & e) { - // tmp folder don't exists? misconfiguration? chroot? + // The tmp folder doesn't exist? Is it a misconfiguration? Or chroot? LOG_DEBUG(log, "Can not get temporary folder: {}", e.what()); parent_folder = std::filesystem::current_path(); @@ -390,6 +393,21 @@ try std::cout << std::fixed << std::setprecision(3); std::cerr << std::fixed << std::setprecision(3); + /// Try to increase limit on number of open files. + { + rlimit rlim; + if (getrlimit(RLIMIT_NOFILE, &rlim)) + throw Poco::Exception("Cannot getrlimit"); + + if (rlim.rlim_cur < rlim.rlim_max) + { + rlim.rlim_cur = config().getUInt("max_open_files", static_cast(rlim.rlim_max)); + int rc = setrlimit(RLIMIT_NOFILE, &rlim); + if (rc != 0) + std::cerr << fmt::format("Cannot set max number of file descriptors to {}. Try to specify max_open_files according to your system limits. error: {}", rlim.rlim_cur, errnoToString()) << '\n'; + } + } + #if defined(FUZZING_MODE) static bool first_time = true; if (first_time) diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 57b71601ba1..a97ecacc7e5 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -6,7 +6,6 @@ #include #include #include -#include #include #include #include @@ -92,6 +91,7 @@ #include #include #include +#include #include #include @@ -663,7 +663,10 @@ try MainThreadStatus::getInstance(); - StackTrace::setShowAddresses(config().getBool("show_addresses_in_stack_traces", true)); + ServerSettings server_settings; + server_settings.loadSettingsFromConfig(config()); + + StackTrace::setShowAddresses(server_settings.show_addresses_in_stack_traces); #if USE_HDFS /// This will point libhdfs3 to the right location for its config. @@ -697,7 +700,7 @@ try { const String config_path = config().getString("config-file", "config.xml"); const auto config_dir = std::filesystem::path{config_path}.replace_filename("openssl.conf"); - setenv("OPENSSL_CONF", config_dir.string(), true); + setenv("OPENSSL_CONF", config_dir.c_str(), true); } #endif @@ -748,9 +751,9 @@ try // nodes (`from_zk`), because ZooKeeper interface uses the pool. We will // ignore `max_thread_pool_size` in configs we fetch from ZK, but oh well. GlobalThreadPool::initialize( - config().getUInt("max_thread_pool_size", 10000), - config().getUInt("max_thread_pool_free_size", 1000), - config().getUInt("thread_pool_queue_size", 10000)); + server_settings.max_thread_pool_size, + server_settings.max_thread_pool_free_size, + server_settings.thread_pool_queue_size); #if USE_AZURE_BLOB_STORAGE /// It makes sense to deinitialize libxml after joining of all threads @@ -766,9 +769,9 @@ try #endif IOThreadPool::initialize( - config().getUInt("max_io_thread_pool_size", 100), - config().getUInt("max_io_thread_pool_free_size", 0), - config().getUInt("io_thread_pool_queue_size", 10000)); + server_settings.max_io_thread_pool_size, + server_settings.max_io_thread_pool_free_size, + server_settings.io_thread_pool_queue_size); /// Initialize global local cache for remote filesystem. if (config().has("local_cache_for_remote_fs")) @@ -784,15 +787,15 @@ try } } - Poco::ThreadPool server_pool(3, config().getUInt("max_connections", 1024)); + Poco::ThreadPool server_pool(3, server_settings.max_connections); std::mutex servers_lock; std::vector servers; std::vector servers_to_start_before_tables; /// This object will periodically calculate some metrics. ServerAsynchronousMetrics async_metrics( global_context, - config().getUInt("asynchronous_metrics_update_period_s", 1), - config().getUInt("asynchronous_heavy_metrics_update_period_s", 120), + server_settings.asynchronous_metrics_update_period_s, + server_settings.asynchronous_heavy_metrics_update_period_s, [&]() -> std::vector { std::vector metrics; @@ -807,7 +810,7 @@ try } ); - ConnectionCollector::init(global_context, config().getUInt("max_threads_for_connection_collector", 10)); + ConnectionCollector::init(global_context, server_settings.max_threads_for_connection_collector); bool has_zookeeper = config().has("zookeeper"); @@ -826,6 +829,9 @@ try Settings::checkNoSettingNamesAtTopLevel(config(), config_path); + /// We need to reload server settings because config could be updated via zookeeper. + server_settings.loadSettingsFromConfig(config()); + #if defined(OS_LINUX) std::string executable_path = getExecutablePath(); @@ -945,7 +951,7 @@ try std::string path_str = getCanonicalPath(config().getString("path", DBMS_DEFAULT_PATH)); fs::path path = path_str; - std::string default_database = config().getString("default_database", "default"); + std::string default_database = server_settings.default_database.toString(); /// Check that the process user id matches the owner of the data. const auto effective_user_id = geteuid(); @@ -1036,21 +1042,18 @@ try LOG_TRACE(log, "Initialized DateLUT with time zone '{}'.", DateLUT::instance().getTimeZone()); /// Storage with temporary data for processing of heavy queries. - if (auto temporary_policy = config().getString("tmp_policy", ""); !temporary_policy.empty()) + if (!server_settings.tmp_policy.value.empty()) { - size_t max_size = config().getUInt64("max_temporary_data_on_disk_size", 0); - global_context->setTemporaryStoragePolicy(temporary_policy, max_size); + global_context->setTemporaryStoragePolicy(server_settings.tmp_policy, server_settings.max_temporary_data_on_disk_size); } - else if (auto temporary_cache = config().getString("temporary_data_in_cache", ""); !temporary_cache.empty()) + else if (!server_settings.temporary_data_in_cache.value.empty()) { - size_t max_size = config().getUInt64("max_temporary_data_on_disk_size", 0); - global_context->setTemporaryStorageInCache(temporary_cache, max_size); + global_context->setTemporaryStorageInCache(server_settings.temporary_data_in_cache, server_settings.max_temporary_data_on_disk_size); } else { std::string temporary_path = config().getString("tmp_path", path / "tmp/"); - size_t max_size = config().getUInt64("max_temporary_data_on_disk_size", 0); - global_context->setTemporaryStoragePath(temporary_path, max_size); + global_context->setTemporaryStoragePath(temporary_path, server_settings.max_temporary_data_on_disk_size); } /** Directory with 'flags': files indicating temporary settings for the server set by system administrator. @@ -1185,10 +1188,12 @@ try { Settings::checkNoSettingNamesAtTopLevel(*config, config_path); - /// Limit on total memory usage - size_t max_server_memory_usage = config->getUInt64("max_server_memory_usage", 0); + ServerSettings server_settings; + server_settings.loadSettingsFromConfig(*config); - double max_server_memory_usage_to_ram_ratio = config->getDouble("max_server_memory_usage_to_ram_ratio", 0.9); + size_t max_server_memory_usage = server_settings.max_server_memory_usage; + + double max_server_memory_usage_to_ram_ratio = server_settings.max_server_memory_usage_to_ram_ratio; size_t default_max_server_memory_usage = static_cast(memory_amount * max_server_memory_usage_to_ram_ratio); if (max_server_memory_usage == 0) @@ -1216,8 +1221,7 @@ try total_memory_tracker.setDescription("(total)"); total_memory_tracker.setMetric(CurrentMetrics::MemoryTracking); - bool allow_use_jemalloc_memory = config->getBool("allow_use_jemalloc_memory", true); - total_memory_tracker.setAllowUseJemallocMemory(allow_use_jemalloc_memory); + total_memory_tracker.setAllowUseJemallocMemory(server_settings.allow_use_jemalloc_memory); auto * global_overcommit_tracker = global_context->getGlobalOvercommitTracker(); total_memory_tracker.setOvercommitTracker(global_overcommit_tracker); @@ -1235,36 +1239,23 @@ try global_context->setRemoteHostFilter(*config); - /// Setup protection to avoid accidental DROP for big tables (that are greater than 50 GB by default) - if (config->has("max_table_size_to_drop")) - global_context->setMaxTableSizeToDrop(config->getUInt64("max_table_size_to_drop")); - - if (config->has("max_partition_size_to_drop")) - global_context->setMaxPartitionSizeToDrop(config->getUInt64("max_partition_size_to_drop")); + global_context->setMaxTableSizeToDrop(server_settings.max_table_size_to_drop); + global_context->setMaxPartitionSizeToDrop(server_settings.max_partition_size_to_drop); ConcurrencyControl::SlotCount concurrent_threads_soft_limit = ConcurrencyControl::Unlimited; - if (config->has("concurrent_threads_soft_limit_num")) + if (server_settings.concurrent_threads_soft_limit_num > 0 && server_settings.concurrent_threads_soft_limit_num < concurrent_threads_soft_limit) + concurrent_threads_soft_limit = server_settings.concurrent_threads_soft_limit_num; + if (server_settings.concurrent_threads_soft_limit_ratio_to_cores > 0) { - auto value = config->getUInt64("concurrent_threads_soft_limit_num", 0); - if (value > 0 && value < concurrent_threads_soft_limit) - concurrent_threads_soft_limit = value; - } - if (config->has("concurrent_threads_soft_limit_ratio_to_cores")) - { - auto value = config->getUInt64("concurrent_threads_soft_limit_ratio_to_cores", 0) * std::thread::hardware_concurrency(); + auto value = server_settings.concurrent_threads_soft_limit_ratio_to_cores * std::thread::hardware_concurrency(); if (value > 0 && value < concurrent_threads_soft_limit) concurrent_threads_soft_limit = value; } ConcurrencyControl::instance().setMaxConcurrency(concurrent_threads_soft_limit); - if (config->has("max_concurrent_queries")) - global_context->getProcessList().setMaxSize(config->getInt("max_concurrent_queries", 0)); - - if (config->has("max_concurrent_insert_queries")) - global_context->getProcessList().setMaxInsertQueriesAmount(config->getInt("max_concurrent_insert_queries", 0)); - - if (config->has("max_concurrent_select_queries")) - global_context->getProcessList().setMaxSelectQueriesAmount(config->getInt("max_concurrent_select_queries", 0)); + global_context->getProcessList().setMaxSize(server_settings.max_concurrent_queries); + global_context->getProcessList().setMaxInsertQueriesAmount(server_settings.max_concurrent_insert_queries); + global_context->getProcessList().setMaxSelectQueriesAmount(server_settings.max_concurrent_select_queries); if (config->has("keeper_server")) global_context->updateKeeperConfiguration(*config); @@ -1273,56 +1264,36 @@ try /// Note: If you specified it in the top level config (not it config of default profile) /// then ClickHouse will use it exactly. /// This is done for backward compatibility. - if (global_context->areBackgroundExecutorsInitialized() && (config->has("background_pool_size") || config->has("background_merges_mutations_concurrency_ratio"))) + if (global_context->areBackgroundExecutorsInitialized()) { - auto new_pool_size = config->getUInt64("background_pool_size", 16); - auto new_ratio = config->getUInt64("background_merges_mutations_concurrency_ratio", 2); + auto new_pool_size = server_settings.background_pool_size; + auto new_ratio = server_settings.background_merges_mutations_concurrency_ratio; global_context->getMergeMutateExecutor()->increaseThreadsAndMaxTasksCount(new_pool_size, new_pool_size * new_ratio); - auto new_scheduling_policy = config->getString("background_merges_mutations_scheduling_policy", "round_robin"); - global_context->getMergeMutateExecutor()->updateSchedulingPolicy(new_scheduling_policy); + global_context->getMergeMutateExecutor()->updateSchedulingPolicy(server_settings.background_merges_mutations_scheduling_policy.toString()); } - if (global_context->areBackgroundExecutorsInitialized() && config->has("background_move_pool_size")) + if (global_context->areBackgroundExecutorsInitialized()) { - auto new_pool_size = config->getUInt64("background_move_pool_size"); + auto new_pool_size = server_settings.background_move_pool_size; global_context->getMovesExecutor()->increaseThreadsAndMaxTasksCount(new_pool_size, new_pool_size); } - if (global_context->areBackgroundExecutorsInitialized() && config->has("background_fetches_pool_size")) + if (global_context->areBackgroundExecutorsInitialized()) { - auto new_pool_size = config->getUInt64("background_fetches_pool_size"); + auto new_pool_size = server_settings.background_fetches_pool_size; global_context->getFetchesExecutor()->increaseThreadsAndMaxTasksCount(new_pool_size, new_pool_size); } - if (global_context->areBackgroundExecutorsInitialized() && config->has("background_common_pool_size")) + if (global_context->areBackgroundExecutorsInitialized()) { - auto new_pool_size = config->getUInt64("background_common_pool_size"); + auto new_pool_size = server_settings.background_common_pool_size; global_context->getCommonExecutor()->increaseThreadsAndMaxTasksCount(new_pool_size, new_pool_size); } - if (config->has("background_buffer_flush_schedule_pool_size")) - { - auto new_pool_size = config->getUInt64("background_buffer_flush_schedule_pool_size"); - global_context->getBufferFlushSchedulePool().increaseThreadsCount(new_pool_size); - } - - if (config->has("background_schedule_pool_size")) - { - auto new_pool_size = config->getUInt64("background_schedule_pool_size"); - global_context->getSchedulePool().increaseThreadsCount(new_pool_size); - } - - if (config->has("background_message_broker_schedule_pool_size")) - { - auto new_pool_size = config->getUInt64("background_message_broker_schedule_pool_size"); - global_context->getMessageBrokerSchedulePool().increaseThreadsCount(new_pool_size); - } - - if (config->has("background_distributed_schedule_pool_size")) - { - auto new_pool_size = config->getUInt64("background_distributed_schedule_pool_size"); - global_context->getDistributedSchedulePool().increaseThreadsCount(new_pool_size); - } + global_context->getBufferFlushSchedulePool().increaseThreadsCount(server_settings.background_buffer_flush_schedule_pool_size); + global_context->getSchedulePool().increaseThreadsCount(server_settings.background_schedule_pool_size); + global_context->getMessageBrokerSchedulePool().increaseThreadsCount(server_settings.background_message_broker_schedule_pool_size); + global_context->getDistributedSchedulePool().increaseThreadsCount(server_settings.background_distributed_schedule_pool_size); if (config->has("resources")) { @@ -1467,18 +1438,15 @@ try }); /// Limit on total number of concurrently executed queries. - global_context->getProcessList().setMaxSize(config().getInt("max_concurrent_queries", 0)); + global_context->getProcessList().setMaxSize(server_settings.max_concurrent_queries); /// Set up caches. - /// Lower cache size on low-memory systems. - double cache_size_to_ram_max_ratio = config().getDouble("cache_size_to_ram_max_ratio", 0.5); - size_t max_cache_size = static_cast(memory_amount * cache_size_to_ram_max_ratio); + size_t max_cache_size = static_cast(memory_amount * server_settings.cache_size_to_ram_max_ratio); - /// Size of cache for uncompressed blocks. Zero means disabled. - String uncompressed_cache_policy = config().getString("uncompressed_cache_policy", "SLRU"); + String uncompressed_cache_policy = server_settings.uncompressed_cache_policy; LOG_INFO(log, "Uncompressed cache policy name {}", uncompressed_cache_policy); - size_t uncompressed_cache_size = config().getUInt64("uncompressed_cache_size", 0); + size_t uncompressed_cache_size = server_settings.uncompressed_cache_size; if (uncompressed_cache_size > max_cache_size) { uncompressed_cache_size = max_cache_size; @@ -1500,9 +1468,8 @@ try global_context, settings.async_insert_threads)); - /// Size of cache for marks (index of MergeTree family of tables). - size_t mark_cache_size = config().getUInt64("mark_cache_size", 5368709120); - String mark_cache_policy = config().getString("mark_cache_policy", "SLRU"); + size_t mark_cache_size = server_settings.mark_cache_size; + String mark_cache_policy = server_settings.mark_cache_policy; if (!mark_cache_size) LOG_ERROR(log, "Too low mark cache size will lead to severe performance degradation."); if (mark_cache_size > max_cache_size) @@ -1513,20 +1480,14 @@ try } global_context->setMarkCache(mark_cache_size, mark_cache_policy); - /// Size of cache for uncompressed blocks of MergeTree indices. Zero means disabled. - size_t index_uncompressed_cache_size = config().getUInt64("index_uncompressed_cache_size", 0); - if (index_uncompressed_cache_size) - global_context->setIndexUncompressedCache(index_uncompressed_cache_size); + if (server_settings.index_uncompressed_cache_size) + global_context->setIndexUncompressedCache(server_settings.index_uncompressed_cache_size); - /// Size of cache for index marks (index of MergeTree skip indices). - size_t index_mark_cache_size = config().getUInt64("index_mark_cache_size", 0); - if (index_mark_cache_size) - global_context->setIndexMarkCache(index_mark_cache_size); + if (server_settings.index_mark_cache_size) + global_context->setIndexMarkCache(server_settings.index_mark_cache_size); - /// A cache for mmapped files. - size_t mmap_cache_size = config().getUInt64("mmap_cache_size", 1000); /// The choice of default is arbitrary. - if (mmap_cache_size) - global_context->setMMappedFileCache(mmap_cache_size); + if (server_settings.mmap_cache_size) + global_context->setMMappedFileCache(server_settings.mmap_cache_size); /// A cache for query results. global_context->setQueryCache(config()); @@ -1612,7 +1573,7 @@ try /// context is destroyed. /// In addition this object has to be created before the loading of the tables. std::unique_ptr dns_cache_updater; - if (config().has("disable_internal_dns_cache") && config().getInt("disable_internal_dns_cache")) + if (server_settings.disable_internal_dns_cache) { /// Disable DNS caching at all DNSResolver::instance().setDisableCacheFlag(); @@ -1622,7 +1583,7 @@ try { /// Initialize a watcher periodically updating DNS cache dns_cache_updater = std::make_unique( - global_context, config().getInt("dns_cache_update_period", 15), config().getUInt("dns_max_consecutive_failures", 5)); + global_context, server_settings.dns_cache_update_period, server_settings.dns_max_consecutive_failures); } if (dns_cache_updater) @@ -1887,7 +1848,7 @@ try LOG_INFO(log, "Closed all listening sockets."); /// Killing remaining queries. - if (!config().getBool("shutdown_wait_unfinished_queries", false)) + if (server_settings.shutdown_wait_unfinished_queries) global_context->getProcessList().killAllQueries(); if (current_connections) diff --git a/src/Access/Common/AccessType.h b/src/Access/Common/AccessType.h index 497327c1bad..f57cc2886e3 100644 --- a/src/Access/Common/AccessType.h +++ b/src/Access/Common/AccessType.h @@ -135,6 +135,7 @@ enum class AccessType M(SHOW_SETTINGS_PROFILES, "SHOW PROFILES, SHOW CREATE SETTINGS PROFILE, SHOW CREATE PROFILE", GLOBAL, SHOW_ACCESS) \ M(SHOW_ACCESS, "", GROUP, ACCESS_MANAGEMENT) \ M(SHOW_NAMED_COLLECTIONS, "SHOW NAMED COLLECTIONS", GLOBAL, ACCESS_MANAGEMENT) \ + M(SHOW_NAMED_COLLECTIONS_SECRETS, "SHOW NAMED COLLECTIONS SECRETS", GLOBAL, ACCESS_MANAGEMENT) \ M(ACCESS_MANAGEMENT, "", GROUP, ALL) \ \ M(SYSTEM_SHUTDOWN, "SYSTEM KILL, SHUTDOWN", GLOBAL, SYSTEM) \ diff --git a/src/Access/Common/AllowedClientHosts.cpp b/src/Access/Common/AllowedClientHosts.cpp index 905f7ba08b5..801ccd3748b 100644 --- a/src/Access/Common/AllowedClientHosts.cpp +++ b/src/Access/Common/AllowedClientHosts.cpp @@ -1,8 +1,8 @@ #include #include +#include #include #include -#include #include #include #include diff --git a/src/Access/UsersConfigAccessStorage.cpp b/src/Access/UsersConfigAccessStorage.cpp index 58edff039ca..b893554cb8a 100644 --- a/src/Access/UsersConfigAccessStorage.cpp +++ b/src/Access/UsersConfigAccessStorage.cpp @@ -239,6 +239,12 @@ namespace user->access.revoke(AccessType::SHOW_NAMED_COLLECTIONS); } + bool show_named_collections_secrets = config.getBool(user_config + ".show_named_collections_secrets", false); + if (!show_named_collections_secrets) + { + user->access.revoke(AccessType::SHOW_NAMED_COLLECTIONS_SECRETS); + } + String default_database = config.getString(user_config + ".default_database", ""); user->default_database = default_database; diff --git a/src/AggregateFunctions/AggregateFunctionArray.h b/src/AggregateFunctions/AggregateFunctionArray.h index d1494f46f4b..21394e3ce05 100644 --- a/src/AggregateFunctions/AggregateFunctionArray.h +++ b/src/AggregateFunctions/AggregateFunctionArray.h @@ -13,7 +13,7 @@ struct Settings; namespace ErrorCodes { - extern const int SIZES_OF_ARRAYS_DOESNT_MATCH; + extern const int SIZES_OF_ARRAYS_DONT_MATCH; extern const int ILLEGAL_TYPE_OF_ARGUMENT; } @@ -129,7 +129,7 @@ public: const IColumn::Offsets & ith_offsets = ith_column.getOffsets(); if (ith_offsets[row_num] != end || (row_num != 0 && ith_offsets[row_num - 1] != begin)) - throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH, "Arrays passed to {} aggregate function have different sizes", getName()); + throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH, "Arrays passed to {} aggregate function have different sizes", getName()); } for (size_t i = begin; i < end; ++i) diff --git a/src/AggregateFunctions/AggregateFunctionForEach.h b/src/AggregateFunctions/AggregateFunctionForEach.h index e035d645222..f041dd11209 100644 --- a/src/AggregateFunctions/AggregateFunctionForEach.h +++ b/src/AggregateFunctions/AggregateFunctionForEach.h @@ -19,7 +19,7 @@ namespace ErrorCodes { extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; - extern const int SIZES_OF_ARRAYS_DOESNT_MATCH; + extern const int SIZES_OF_ARRAYS_DONT_MATCH; } @@ -197,7 +197,7 @@ public: const IColumn::Offsets & ith_offsets = ith_column.getOffsets(); if (ith_offsets[row_num] != end || (row_num != 0 && ith_offsets[row_num - 1] != begin)) - throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH, "Arrays passed to {} aggregate function have different sizes", getName()); + throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH, "Arrays passed to {} aggregate function have different sizes", getName()); } AggregateFunctionForEachData & state = ensureAggregateData(place, end - begin, *arena); diff --git a/src/AggregateFunctions/AggregateFunctionGroupArray.h b/src/AggregateFunctions/AggregateFunctionGroupArray.h index a2b5ed9c493..eaffb04e2a9 100644 --- a/src/AggregateFunctions/AggregateFunctionGroupArray.h +++ b/src/AggregateFunctions/AggregateFunctionGroupArray.h @@ -63,6 +63,9 @@ static constexpr const char * getNameByTrait() template struct GroupArraySamplerData { + /// For easy serialization. + static_assert(std::has_unique_object_representations_v || std::is_floating_point_v); + // Switch to ordinary Allocator after 4096 bytes to avoid fragmentation and trash in Arena using Allocator = MixedAlignedArenaAllocator; using Array = PODArray; @@ -97,6 +100,9 @@ struct GroupArrayNumericData; template struct GroupArrayNumericData { + /// For easy serialization. + static_assert(std::has_unique_object_representations_v || std::is_floating_point_v); + // Switch to ordinary Allocator after 4096 bytes to avoid fragmentation and trash in Arena using Allocator = MixedAlignedArenaAllocator; using Array = PODArray; diff --git a/src/AggregateFunctions/AggregateFunctionGroupArrayMoving.h b/src/AggregateFunctions/AggregateFunctionGroupArrayMoving.h index 5a2da8aa459..8d7010c10db 100644 --- a/src/AggregateFunctions/AggregateFunctionGroupArrayMoving.h +++ b/src/AggregateFunctions/AggregateFunctionGroupArrayMoving.h @@ -32,6 +32,9 @@ namespace ErrorCodes template struct MovingData { + /// For easy serialization. + static_assert(std::has_unique_object_representations_v || std::is_floating_point_v); + using Accumulator = T; /// Switch to ordinary Allocator after 4096 bytes to avoid fragmentation and trash in Arena diff --git a/src/AggregateFunctions/AggregateFunctionMaxIntersections.h b/src/AggregateFunctions/AggregateFunctionMaxIntersections.h index 9737e2e8257..563d56c6f40 100644 --- a/src/AggregateFunctions/AggregateFunctionMaxIntersections.h +++ b/src/AggregateFunctions/AggregateFunctionMaxIntersections.h @@ -117,7 +117,21 @@ public: const auto & value = this->data(place).value; size_t size = value.size(); writeVarUInt(size, buf); - buf.write(reinterpret_cast(value.data()), size * sizeof(value[0])); + + /// In this version, pairs were serialized with padding. + /// We must ensure that padding bytes are zero-filled. + + static_assert(offsetof(typename MaxIntersectionsData::Value, first) == 0); + static_assert(offsetof(typename MaxIntersectionsData::Value, second) > 0); + + char zero_padding[offsetof(typename MaxIntersectionsData::Value, second) - sizeof(value[0].first)]{}; + + for (size_t i = 0; i < size; ++i) + { + writePODBinary(value[i].first, buf); + writePODBinary(zero_padding, buf); + writePODBinary(value[i].second, buf); + } } void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional /* version */, Arena * arena) const override diff --git a/src/Analyzer/JoinNode.cpp b/src/Analyzer/JoinNode.cpp index 28a0c4ad7e0..fe4dd2c5016 100644 --- a/src/Analyzer/JoinNode.cpp +++ b/src/Analyzer/JoinNode.cpp @@ -15,6 +15,11 @@ namespace DB { +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + JoinNode::JoinNode(QueryTreeNodePtr left_table_expression_, QueryTreeNodePtr right_table_expression_, QueryTreeNodePtr join_expression_, @@ -113,4 +118,18 @@ ASTPtr JoinNode::toASTImpl() const return tables_in_select_query_ast; } +void JoinNode::crossToInner(const QueryTreeNodePtr & join_expression_) +{ + if (kind != JoinKind::Cross && kind != JoinKind::Comma) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot rewrite {} to INNER JOIN, expected CROSS", toString(kind)); + + if (children[join_expression_child_index]) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Join expression is expected to be empty for CROSS JOIN, got '{}'", + children[join_expression_child_index]->formatConvertedASTForErrorMessage()); + + kind = JoinKind::Inner; + strictness = JoinStrictness::All; + children[join_expression_child_index] = join_expression_; +} + } diff --git a/src/Analyzer/JoinNode.h b/src/Analyzer/JoinNode.h index 15ba11a0122..0d856985794 100644 --- a/src/Analyzer/JoinNode.h +++ b/src/Analyzer/JoinNode.h @@ -126,6 +126,13 @@ public: return QueryTreeNodeType::JOIN; } + /* + * Convert CROSS to INNER JOIN - changes JOIN kind and sets a new join expression + * (that was moved from WHERE clause). + * Expects the current kind to be CROSS (and join expression to be null because of that). + */ + void crossToInner(const QueryTreeNodePtr & join_expression_); + void dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const override; protected: diff --git a/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp b/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp index df96d83316c..1476a66c892 100644 --- a/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp +++ b/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp @@ -102,19 +102,21 @@ public: if (!left_argument_constant_node && !right_argument_constant_node) return; - /** If we extract negative constant, aggregate function name must be updated. + /** Need reverse max <-> min for: * - * Example: SELECT min(-1 * id); - * Result: SELECT -1 * max(id); + * max(-1*value) -> -1*min(value) + * max(value/-2) -> min(value)/-2 + * max(1-value) -> 1-min(value) */ - std::string aggregate_function_name_if_constant_is_negative; - if (arithmetic_function_name == "multiply" || arithmetic_function_name == "divide") + auto get_reverse_aggregate_function_name = [](const std::string & aggregate_function_name) -> std::string { - if (lower_aggregate_function_name == "min") - aggregate_function_name_if_constant_is_negative = "max"; - else if (lower_aggregate_function_name == "max") - aggregate_function_name_if_constant_is_negative = "min"; - } + if (aggregate_function_name == "min") + return "max"; + else if (aggregate_function_name == "max") + return "min"; + else + return aggregate_function_name; + }; size_t arithmetic_function_argument_index = 0; @@ -126,11 +128,11 @@ public: /// Rewrite `aggregate_function(inner_function(constant, argument))` into `inner_function(constant, aggregate_function(argument))` const auto & left_argument_constant_value_literal = left_argument_constant_node->getValue(); - if (!aggregate_function_name_if_constant_is_negative.empty() && - left_argument_constant_value_literal < zeroField(left_argument_constant_value_literal)) - { - lower_aggregate_function_name = aggregate_function_name_if_constant_is_negative; - } + bool need_reverse = (arithmetic_function_name == "multiply" && left_argument_constant_value_literal < zeroField(left_argument_constant_value_literal)) + || (arithmetic_function_name == "minus"); + + if (need_reverse) + lower_aggregate_function_name = get_reverse_aggregate_function_name(lower_aggregate_function_name); arithmetic_function_argument_index = 1; } @@ -138,11 +140,10 @@ public: { /// Rewrite `aggregate_function(inner_function(argument, constant))` into `inner_function(aggregate_function(argument), constant)` const auto & right_argument_constant_value_literal = right_argument_constant_node->getValue(); - if (!aggregate_function_name_if_constant_is_negative.empty() && - right_argument_constant_value_literal < zeroField(right_argument_constant_value_literal)) - { - lower_aggregate_function_name = aggregate_function_name_if_constant_is_negative; - } + bool need_reverse = (arithmetic_function_name == "multiply" || arithmetic_function_name == "divide") && right_argument_constant_value_literal < zeroField(right_argument_constant_value_literal); + + if (need_reverse) + lower_aggregate_function_name = get_reverse_aggregate_function_name(lower_aggregate_function_name); arithmetic_function_argument_index = 0; } diff --git a/src/Analyzer/Passes/AutoFinalOnQueryPass.cpp b/src/Analyzer/Passes/AutoFinalOnQueryPass.cpp index 10efebe0731..fdf818681d7 100644 --- a/src/Analyzer/Passes/AutoFinalOnQueryPass.cpp +++ b/src/Analyzer/Passes/AutoFinalOnQueryPass.cpp @@ -1,8 +1,11 @@ #include "AutoFinalOnQueryPass.h" -#include -#include #include + +#include +#include +#include +#include #include namespace DB @@ -10,52 +13,64 @@ namespace DB namespace { - class AutoFinalOnQueryPassVisitor : public InDepthQueryTreeVisitorWithContext + +class AutoFinalOnQueryPassVisitor : public InDepthQueryTreeVisitorWithContext +{ +public: + using Base = InDepthQueryTreeVisitorWithContext; + using Base::Base; + + void visitImpl(QueryTreeNodePtr & node) { - public: - using Base = InDepthQueryTreeVisitorWithContext; - using Base::Base; + const auto & context = getContext(); + if (!context->getSettingsRef().final) + return; - void visitImpl(QueryTreeNodePtr & node) + const auto * query_node = node->as(); + if (!query_node) + return; + + auto table_expressions = extractTableExpressions(query_node->getJoinTree()); + for (auto & table_expression : table_expressions) + applyFinalIfNeeded(table_expression); + } +private: + static void applyFinalIfNeeded(QueryTreeNodePtr & node) + { + auto * table_node = node->as(); + auto * table_function_node = node->as(); + if (!table_node && !table_function_node) + return; + + const auto & storage = table_node ? table_node->getStorage() : table_function_node->getStorage(); + bool is_final_supported = storage && storage->supportsFinal() && !storage->isRemote(); + if (!is_final_supported) + return; + + TableExpressionModifiers table_expression_modifiers_with_final(true /*has_final*/, {}, {}); + + if (table_node) { - if (auto * table_node = node->as()) - { - if (autoFinalOnQuery(*table_node, table_node->getStorage(), getContext())) - { - auto modifier = TableExpressionModifiers(true, std::nullopt, std::nullopt); - table_node->setTableExpressionModifiers(modifier); - } - } + if (table_node->hasTableExpressionModifiers()) + table_node->getTableExpressionModifiers()->setHasFinal(true); + else + table_node->setTableExpressionModifiers(table_expression_modifiers_with_final); } - - private: - static bool autoFinalOnQuery(TableNode & table_node, StoragePtr storage, ContextPtr context) + else if (table_function_node) { - bool is_auto_final_setting_on = context->getSettingsRef().final; - bool is_final_supported = storage && storage->supportsFinal() && !storage->isRemote(); - bool is_query_already_final = table_node.hasTableExpressionModifiers() ? table_node.getTableExpressionModifiers().has_value() : false; - - return is_auto_final_setting_on && !is_query_already_final && is_final_supported; + if (table_function_node->hasTableExpressionModifiers()) + table_function_node->getTableExpressionModifiers()->setHasFinal(true); + else + table_function_node->setTableExpressionModifiers(table_expression_modifiers_with_final); } + } +}; - }; - -} - -String AutoFinalOnQueryPass::getName() -{ - return "AutoFinalOnQueryPass"; -} - -String AutoFinalOnQueryPass::getDescription() -{ - return "Automatically applies final modifier to queries if it is supported and if user level final setting is set."; } void AutoFinalOnQueryPass::run(QueryTreeNodePtr query_tree_node, ContextPtr context) { auto visitor = AutoFinalOnQueryPassVisitor(std::move(context)); - visitor.visit(query_tree_node); } diff --git a/src/Analyzer/Passes/AutoFinalOnQueryPass.h b/src/Analyzer/Passes/AutoFinalOnQueryPass.h index eacbe0f8235..3489597108c 100644 --- a/src/Analyzer/Passes/AutoFinalOnQueryPass.h +++ b/src/Analyzer/Passes/AutoFinalOnQueryPass.h @@ -7,13 +7,23 @@ namespace DB { - +/** Automatically applies final modifier to table expressions in queries if it is supported and if user level final setting is set. + * + * Example: SELECT id, value FROM test_table; + * Result: SELECT id, value FROM test_table FINAL; + */ class AutoFinalOnQueryPass final : public IQueryTreePass { public: - String getName() override; + String getName() override + { + return "AutoFinalOnQueryPass"; + } - String getDescription() override; + String getDescription() override + { + return "Automatically applies final modifier to table expressions in queries if it is supported and if user level final setting is set"; + } void run(QueryTreeNodePtr query_tree_node, ContextPtr context) override; }; diff --git a/src/Analyzer/Passes/ConvertOrLikeChainPass.cpp b/src/Analyzer/Passes/ConvertOrLikeChainPass.cpp index c636e0bdf00..7d7362fb742 100644 --- a/src/Analyzer/Passes/ConvertOrLikeChainPass.cpp +++ b/src/Analyzer/Passes/ConvertOrLikeChainPass.cpp @@ -4,12 +4,13 @@ #include #include +#include + #include #include #include -#include #include diff --git a/src/Analyzer/Passes/CrossToInnerJoinPass.cpp b/src/Analyzer/Passes/CrossToInnerJoinPass.cpp new file mode 100644 index 00000000000..4f62d6ef068 --- /dev/null +++ b/src/Analyzer/Passes/CrossToInnerJoinPass.cpp @@ -0,0 +1,262 @@ +#include + +#include + +#include +#include +#include +#include +#include + +#include +#include + +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int INCORRECT_QUERY; +} + +namespace +{ + +void exctractJoinConditions(const QueryTreeNodePtr & node, QueryTreeNodes & equi_conditions, QueryTreeNodes & other) +{ + auto * func = node->as(); + if (!func) + { + other.push_back(node); + return; + } + + const auto & args = func->getArguments().getNodes(); + + if (args.size() == 2 && func->getFunctionName() == "equals") + { + equi_conditions.push_back(node); + } + else if (func->getFunctionName() == "and") + { + for (const auto & arg : args) + exctractJoinConditions(arg, equi_conditions, other); + } + else + { + other.push_back(node); + } +} + +const QueryTreeNodePtr & getEquiArgument(const QueryTreeNodePtr & cond, size_t index) +{ + const auto * func = cond->as(); + chassert(func && func->getFunctionName() == "equals" && func->getArguments().getNodes().size() == 2); + return func->getArguments().getNodes()[index]; +} + + +/// Check that node has only one source and return it. +/// {_, false} - multiple sources +/// {nullptr, true} - no sources +/// {source, true} - single source +std::pair getExpressionSource(const QueryTreeNodePtr & node) +{ + if (const auto * column = node->as()) + { + auto source = column->getColumnSourceOrNull(); + if (!source) + return {nullptr, false}; + return {source.get(), true}; + } + + if (const auto * func = node->as()) + { + const IQueryTreeNode * source = nullptr; + const auto & args = func->getArguments().getNodes(); + for (const auto & arg : args) + { + auto [arg_source, is_ok] = getExpressionSource(arg); + if (!is_ok) + return {nullptr, false}; + + if (!source) + source = arg_source; + else if (arg_source && !source->isEqual(*arg_source)) + return {nullptr, false}; + } + return {source, true}; + + } + + if (node->as()) + return {nullptr, true}; + + return {nullptr, false}; +} + +bool findInTableExpression(const IQueryTreeNode * source, const QueryTreeNodePtr & table_expression) +{ + if (!source) + return true; + + if (source->isEqual(*table_expression)) + return true; + + if (const auto * join_node = table_expression->as()) + { + return findInTableExpression(source, join_node->getLeftTableExpression()) + || findInTableExpression(source, join_node->getRightTableExpression()); + } + + + return false; +} + +void getJoinNodes(QueryTreeNodePtr & join_tree_node, std::vector & join_nodes) +{ + auto * join_node = join_tree_node->as(); + if (!join_node) + return; + + if (!isCrossOrComma(join_node->getKind())) + return; + + join_nodes.push_back(join_node); + getJoinNodes(join_node->getLeftTableExpression(), join_nodes); + getJoinNodes(join_node->getRightTableExpression(), join_nodes); +} + +class CrossToInnerJoinVisitor : public InDepthQueryTreeVisitorWithContext +{ +public: + using Base = InDepthQueryTreeVisitorWithContext; + using Base::Base; + + /// Returns false if can't rewrite cross to inner join + bool tryRewrite(JoinNode & join_node, QueryTreeNodePtr & where_condition) + { + if (!isCrossOrComma(join_node.getKind())) + return false; + + if (!where_condition) + return false; + + const auto & left_table = join_node.getLeftTableExpression(); + const auto & right_table = join_node.getRightTableExpression(); + + QueryTreeNodes equi_conditions; + QueryTreeNodes other_conditions; + exctractJoinConditions(where_condition, equi_conditions, other_conditions); + bool can_convert_cross_to_inner = false; + for (auto & cond : equi_conditions) + { + auto left_src = getExpressionSource(getEquiArgument(cond, 0)); + auto right_src = getExpressionSource(getEquiArgument(cond, 1)); + if (left_src.second && right_src.second && left_src.first && right_src.first) + { + bool can_join_on = (findInTableExpression(left_src.first, left_table) && findInTableExpression(right_src.first, right_table)) + || (findInTableExpression(left_src.first, right_table) && findInTableExpression(right_src.first, left_table)); + + if (can_join_on) + { + can_convert_cross_to_inner = true; + continue; + } + } + + /// Can't join on this condition, move it to other conditions + other_conditions.push_back(cond); + cond = nullptr; + } + + if (!can_convert_cross_to_inner) + return false; + + equi_conditions.erase(std::remove(equi_conditions.begin(), equi_conditions.end(), nullptr), equi_conditions.end()); + join_node.crossToInner(makeConjunction(equi_conditions)); + where_condition = makeConjunction(other_conditions); + return true; + } + + void visitImpl(QueryTreeNodePtr & node) + { + if (!isEnabled()) + return; + + auto * query_node = node->as(); + if (!query_node) + return; + + auto & where_node = query_node->getWhere(); + if (!where_node) + return; + + auto & join_tree_node = query_node->getJoinTree(); + if (!join_tree_node || join_tree_node->getNodeType() != QueryTreeNodeType::JOIN) + return; + + /// In case of multiple joins, we can try to rewrite all of them + /// Example: SELECT * FROM t1, t2, t3 WHERE t1.a = t2.a AND t2.a = t3.a + std::vector join_nodes; + getJoinNodes(join_tree_node, join_nodes); + + for (auto * join_node : join_nodes) + { + bool is_rewritten = tryRewrite(*join_node, where_node); + + if (!is_rewritten && forceRewrite(join_node->getKind())) + { + throw Exception(ErrorCodes::INCORRECT_QUERY, + "Failed to rewrite '{}' to INNER JOIN: " + "no equi-join conditions found in WHERE clause. " + "You may set setting `cross_to_inner_join_rewrite` to `1` to allow slow CROSS JOIN for this case", + join_node->formatASTForErrorMessage()); + } + } + } + +private: + bool isEnabled() const + { + return getSettings().cross_to_inner_join_rewrite; + } + + bool forceRewrite(JoinKind kind) const + { + if (kind == JoinKind::Cross) + return false; + /// Comma join can be forced to rewrite + return getSettings().cross_to_inner_join_rewrite >= 2; + } + + QueryTreeNodePtr makeConjunction(const QueryTreeNodes & nodes) + { + if (nodes.empty()) + return nullptr; + + if (nodes.size() == 1) + return nodes.front(); + + auto function_node = std::make_shared("and"); + for (const auto & node : nodes) + function_node->getArguments().getNodes().push_back(node); + + const auto & function = FunctionFactory::instance().get("and", getContext()); + function_node->resolveAsFunction(function->build(function_node->getArgumentColumns())); + return function_node; + } +}; + +} + +void CrossToInnerJoinPass::run(QueryTreeNodePtr query_tree_node, ContextPtr context) +{ + CrossToInnerJoinVisitor visitor(std::move(context)); + visitor.visit(query_tree_node); +} + +} diff --git a/src/Analyzer/Passes/CrossToInnerJoinPass.h b/src/Analyzer/Passes/CrossToInnerJoinPass.h new file mode 100644 index 00000000000..127d26dc41d --- /dev/null +++ b/src/Analyzer/Passes/CrossToInnerJoinPass.h @@ -0,0 +1,28 @@ +#pragma once + +#include + +namespace DB +{ + + +/** Replace CROSS JOIN with INNER JOIN. + * Example: + * SELECT * FROM t1 CROSS JOIN t2 WHERE t1.a = t2.a AND t1.b > 10 AND t2.b = t2.c + * We can move equality condition to ON section of INNER JOIN: + * SELECT * FROM t1 INNER JOIN t2 ON t1.a = t2.a WHERE t1.b > 10 AND t2.b = t2.c + */ +class CrossToInnerJoinPass final : public IQueryTreePass +{ +public: + String getName() override { return "CrossToInnerJoin"; } + + String getDescription() override + { + return "Replace CROSS JOIN with INNER JOIN"; + } + + void run(QueryTreeNodePtr query_tree_node, ContextPtr context) override; +}; + +} diff --git a/src/Analyzer/QueryTreeBuilder.cpp b/src/Analyzer/QueryTreeBuilder.cpp index 05b643aa6af..c7b9f9aae08 100644 --- a/src/Analyzer/QueryTreeBuilder.cpp +++ b/src/Analyzer/QueryTreeBuilder.cpp @@ -233,11 +233,43 @@ QueryTreeNodePtr QueryTreeBuilder::buildSelectExpression(const ASTPtr & select_q auto select_settings = select_query_typed.settings(); SettingsChanges settings_changes; + /// We are going to remove settings LIMIT and OFFSET and + /// further replace them with corresponding expression nodes + UInt64 limit = 0; + UInt64 offset = 0; + + /// Remove global settings limit and offset + if (const auto & settings_ref = updated_context->getSettingsRef(); settings_ref.limit || settings_ref.offset) + { + Settings settings = updated_context->getSettings(); + limit = settings.limit; + offset = settings.offset; + settings.limit = 0; + settings.offset = 0; + updated_context->setSettings(settings); + } + if (select_settings) { auto & set_query = select_settings->as(); - updated_context->applySettingsChanges(set_query.changes); - settings_changes = set_query.changes; + + /// Remove expression settings limit and offset + if (auto * limit_field = set_query.changes.tryGet("limit")) + { + limit = limit_field->safeGet(); + set_query.changes.removeSetting("limit"); + } + if (auto * offset_field = set_query.changes.tryGet("offset")) + { + offset = offset_field->safeGet(); + set_query.changes.removeSetting("offset"); + } + + if (!set_query.changes.empty()) + { + updated_context->applySettingsChanges(set_query.changes); + settings_changes = set_query.changes; + } } auto current_query_tree = std::make_shared(std::move(updated_context), std::move(settings_changes)); @@ -323,12 +355,32 @@ QueryTreeNodePtr QueryTreeBuilder::buildSelectExpression(const ASTPtr & select_q if (select_limit_by) current_query_tree->getLimitByNode() = buildExpressionList(select_limit_by, current_context); + /// Combine limit expression with limit setting auto select_limit = select_query_typed.limitLength(); - if (select_limit) + if (select_limit && limit) + { + auto function_node = std::make_shared("least"); + function_node->getArguments().getNodes().push_back(buildExpression(select_limit, current_context)); + function_node->getArguments().getNodes().push_back(std::make_shared(limit)); + current_query_tree->getLimit() = std::move(function_node); + } + else if (limit) + current_query_tree->getLimit() = std::make_shared(limit); + else if (select_limit) current_query_tree->getLimit() = buildExpression(select_limit, current_context); + /// Combine offset expression with offset setting auto select_offset = select_query_typed.limitOffset(); - if (select_offset) + if (select_offset && offset) + { + auto function_node = std::make_shared("plus"); + function_node->getArguments().getNodes().push_back(buildExpression(select_offset, current_context)); + function_node->getArguments().getNodes().push_back(std::make_shared(offset)); + current_query_tree->getOffset() = std::move(function_node); + } + else if (offset) + current_query_tree->getOffset() = std::make_shared(offset); + else if (select_offset) current_query_tree->getOffset() = buildExpression(select_offset, current_context); return current_query_tree; diff --git a/src/Analyzer/QueryTreePassManager.cpp b/src/Analyzer/QueryTreePassManager.cpp index 218e47d973f..9ba18e27f73 100644 --- a/src/Analyzer/QueryTreePassManager.cpp +++ b/src/Analyzer/QueryTreePassManager.cpp @@ -39,6 +39,7 @@ #include #include #include +#include namespace DB @@ -268,6 +269,7 @@ void addQueryTreePasses(QueryTreePassManager & manager) manager.addPass(std::make_unique()); + manager.addPass(std::make_unique()); } } diff --git a/src/Analyzer/TableExpressionModifiers.h b/src/Analyzer/TableExpressionModifiers.h index f61c2a61610..9b76c9bc0fd 100644 --- a/src/Analyzer/TableExpressionModifiers.h +++ b/src/Analyzer/TableExpressionModifiers.h @@ -28,6 +28,12 @@ public: return has_final; } + /// Set has final value + void setHasFinal(bool value) + { + has_final = value; + } + /// Returns true if sample size ratio is specified, false otherwise bool hasSampleSizeRatio() const { diff --git a/src/Analyzer/TableFunctionNode.h b/src/Analyzer/TableFunctionNode.h index 292ab740c5b..a88630ffd00 100644 --- a/src/Analyzer/TableFunctionNode.h +++ b/src/Analyzer/TableFunctionNode.h @@ -116,6 +116,12 @@ public: return table_expression_modifiers; } + /// Get table expression modifiers + std::optional & getTableExpressionModifiers() + { + return table_expression_modifiers; + } + /// Set table expression modifiers void setTableExpressionModifiers(TableExpressionModifiers table_expression_modifiers_value) { diff --git a/src/Analyzer/TableNode.h b/src/Analyzer/TableNode.h index 4965de535df..6d47f87c78b 100644 --- a/src/Analyzer/TableNode.h +++ b/src/Analyzer/TableNode.h @@ -68,6 +68,12 @@ public: return table_expression_modifiers; } + /// Get table expression modifiers + std::optional & getTableExpressionModifiers() + { + return table_expression_modifiers; + } + /// Set table expression modifiers void setTableExpressionModifiers(TableExpressionModifiers table_expression_modifiers_value) { diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index aef7dc6a38e..6c5142813c5 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -577,8 +577,8 @@ if (TARGET ch_contrib::annoy) endif() if (TARGET ch_rust::skim) - # Add only -I, library is needed only for clickhouse-client/clickhouse-local dbms_target_include_directories(PRIVATE $) + dbms_target_link_libraries(PUBLIC ch_rust::skim) endif() include ("${ClickHouse_SOURCE_DIR}/cmake/add_check.cmake") diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index bc8c43af8c6..96aff9aa304 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -481,14 +481,14 @@ void ClientBase::onLogData(Block & block) void ClientBase::onTotals(Block & block, ASTPtr parsed_query) { initOutputFormat(block, parsed_query); - output_format->setTotals(block); + output_format->setTotals(materializeBlock(block)); } void ClientBase::onExtremes(Block & block, ASTPtr parsed_query) { initOutputFormat(block, parsed_query); - output_format->setExtremes(block); + output_format->setExtremes(materializeBlock(block)); } diff --git a/src/Client/ReplxxLineReader.cpp b/src/Client/ReplxxLineReader.cpp index 405cb7af03b..fa5ba8b8ba9 100644 --- a/src/Client/ReplxxLineReader.cpp +++ b/src/Client/ReplxxLineReader.cpp @@ -432,6 +432,7 @@ ReplxxLineReader::ReplxxLineReader( }; rx.bind_key(Replxx::KEY::control('R'), interactive_history_search); +#endif /// Rebind regular incremental search to C-T. /// @@ -443,7 +444,6 @@ ReplxxLineReader::ReplxxLineReader( uint32_t reverse_search = Replxx::KEY::control('R'); return rx.invoke(Replxx::ACTION::HISTORY_INCREMENTAL_SEARCH, reverse_search); }); -#endif } ReplxxLineReader::~ReplxxLineReader() diff --git a/src/Columns/ColumnLowCardinality.cpp b/src/Columns/ColumnLowCardinality.cpp index 73e9eecb823..11d02b023d6 100644 --- a/src/Columns/ColumnLowCardinality.cpp +++ b/src/Columns/ColumnLowCardinality.cpp @@ -837,6 +837,34 @@ void ColumnLowCardinality::Dictionary::compact(ColumnPtr & positions) shared = false; } +ColumnPtr ColumnLowCardinality::cloneWithDefaultOnNull() const +{ + if (!nestedIsNullable()) + return getPtr(); + + auto res = cloneEmpty(); + auto & lc_res = assert_cast(*res); + lc_res.nestedRemoveNullable(); + size_t end = size(); + size_t start = 0; + while (start < end) + { + size_t next_null_index = start; + while (next_null_index < end && !isNullAt(next_null_index)) + ++next_null_index; + + if (next_null_index != start) + lc_res.insertRangeFrom(*this, start, next_null_index - start); + + if (next_null_index < end) + lc_res.insertDefault(); + + start = next_null_index + 1; + } + + return res; +} + bool isColumnLowCardinalityNullable(const IColumn & column) { if (const auto * lc_column = checkAndGetColumn(column)) diff --git a/src/Columns/ColumnLowCardinality.h b/src/Columns/ColumnLowCardinality.h index db0100b0aa9..eb7fd958a69 100644 --- a/src/Columns/ColumnLowCardinality.h +++ b/src/Columns/ColumnLowCardinality.h @@ -221,6 +221,8 @@ public: void nestedRemoveNullable() { dictionary.getColumnUnique().nestedRemoveNullable(); } MutableColumnPtr cloneNullable() const; + ColumnPtr cloneWithDefaultOnNull() const; + const IColumnUnique & getDictionary() const { return dictionary.getColumnUnique(); } IColumnUnique & getDictionary() { return dictionary.getColumnUnique(); } const ColumnPtr & getDictionaryPtr() const { return dictionary.getColumnUniquePtr(); } diff --git a/src/Columns/ColumnNullable.cpp b/src/Columns/ColumnNullable.cpp index db99cee54fb..f70dac20a2a 100644 --- a/src/Columns/ColumnNullable.cpp +++ b/src/Columns/ColumnNullable.cpp @@ -782,6 +782,29 @@ ColumnPtr ColumnNullable::createWithOffsets(const IColumn::Offsets & offsets, co return ColumnNullable::create(new_values, new_null_map); } +ColumnPtr ColumnNullable::getNestedColumnWithDefaultOnNull() const +{ + auto res = nested_column->cloneEmpty(); + const auto & null_map_data = getNullMapData(); + size_t start = 0; + size_t end = null_map->size(); + while (start < nested_column->size()) + { + size_t next_null_index = start; + while (next_null_index < end && !null_map_data[next_null_index]) + ++next_null_index; + + if (next_null_index != start) + res->insertRangeFrom(*nested_column, start, next_null_index - start); + + if (next_null_index < end) + res->insertDefault(); + + start = next_null_index + 1; + } + return res; +} + ColumnPtr makeNullable(const ColumnPtr & column) { if (isColumnNullable(*column)) diff --git a/src/Columns/ColumnNullable.h b/src/Columns/ColumnNullable.h index 7574cc0b501..9ea0ceb1c5a 100644 --- a/src/Columns/ColumnNullable.h +++ b/src/Columns/ColumnNullable.h @@ -188,6 +188,8 @@ public: NullMap & getNullMapData() { return getNullMapColumn().getData(); } const NullMap & getNullMapData() const { return getNullMapColumn().getData(); } + ColumnPtr getNestedColumnWithDefaultOnNull() const; + /// Apply the null byte map of a specified nullable column onto the /// null byte map of the current column by performing an element-wise OR /// between both byte maps. This method is used to determine the null byte diff --git a/src/Columns/ColumnVector.cpp b/src/Columns/ColumnVector.cpp index 96f76b70f31..3ed3ed73328 100644 --- a/src/Columns/ColumnVector.cpp +++ b/src/Columns/ColumnVector.cpp @@ -747,7 +747,7 @@ namespace */ template requires (std::is_same_v || std::is_same_v) - void replicateSSE42Int32(const IntType * __restrict data, IntType * __restrict result_data, const IColumn::Offsets & offsets) + void replicateSSE2Int32(const IntType * __restrict data, IntType * __restrict result_data, const IColumn::Offsets & offsets) { const IntType * data_copy_begin_ptr = nullptr; size_t offsets_size = offsets.size(); @@ -842,7 +842,7 @@ ColumnPtr ColumnVector::replicate(const IColumn::Offsets & offsets) const #ifdef __SSE2__ if constexpr (std::is_same_v) { - replicateSSE42Int32(getData().data(), res->getData().data(), offsets); + replicateSSE2Int32(getData().data(), res->getData().data(), offsets); return res; } #endif diff --git a/src/Common/AsynchronousMetrics.cpp b/src/Common/AsynchronousMetrics.cpp index 1278f9459b5..99073d79bcd 100644 --- a/src/Common/AsynchronousMetrics.cpp +++ b/src/Common/AsynchronousMetrics.cpp @@ -533,9 +533,16 @@ void AsynchronousMetrics::update(TimePoint update_time) AsynchronousMetricValues new_values; auto current_time = std::chrono::system_clock::now(); - auto time_after_previous_update [[maybe_unused]] = current_time - previous_update_time; + auto time_after_previous_update = current_time - previous_update_time; previous_update_time = update_time; + double update_interval = 0.; + if (first_run) + update_interval = update_period.count(); + else + update_interval = std::chrono::duration_cast(time_after_previous_update).count() / 1e6; + new_values["AsynchronousMetricsUpdateInterval"] = { update_interval, "Metrics update interval" }; + /// This is also a good indicator of system responsiveness. new_values["Jitter"] = { std::chrono::duration_cast(current_time - update_time).count() / 1e9, "The difference in time the thread for calculation of the asynchronous metrics was scheduled to wake up and the time it was in fact, woken up." diff --git a/src/Common/ErrorCodes.cpp b/src/Common/ErrorCodes.cpp index 028663a2176..c00129249e2 100644 --- a/src/Common/ErrorCodes.cpp +++ b/src/Common/ErrorCodes.cpp @@ -197,7 +197,7 @@ M(187, COLLATION_COMPARISON_FAILED) \ M(188, UNKNOWN_ACTION) \ M(189, TABLE_MUST_NOT_BE_CREATED_MANUALLY) \ - M(190, SIZES_OF_ARRAYS_DOESNT_MATCH) \ + M(190, SIZES_OF_ARRAYS_DONT_MATCH) \ M(191, SET_SIZE_LIMIT_EXCEEDED) \ M(192, UNKNOWN_USER) \ M(193, WRONG_PASSWORD) \ diff --git a/src/Common/FieldVisitorToString.cpp b/src/Common/FieldVisitorToString.cpp index e0ec6b7a335..60834afab35 100644 --- a/src/Common/FieldVisitorToString.cpp +++ b/src/Common/FieldVisitorToString.cpp @@ -117,14 +117,14 @@ String FieldVisitorToString::operator() (const Map & x) const { WriteBufferFromOwnString wb; - wb << '('; + wb << '['; for (auto it = x.begin(); it != x.end(); ++it) { if (it != x.begin()) wb << ", "; wb << applyVisitor(*this, *it); } - wb << ')'; + wb << ']'; return wb.str(); } diff --git a/src/Common/OptimizedRegularExpression.h b/src/Common/OptimizedRegularExpression.h index 0c6e32cb383..d8ed1e205c8 100644 --- a/src/Common/OptimizedRegularExpression.h +++ b/src/Common/OptimizedRegularExpression.h @@ -100,8 +100,8 @@ private: bool required_substring_is_prefix; bool is_case_insensitive; std::string required_substring; - std::optional> case_sensitive_substring_searcher; - std::optional> case_insensitive_substring_searcher; + std::optional case_sensitive_substring_searcher; + std::optional case_insensitive_substring_searcher; std::unique_ptr re2; unsigned number_of_subpatterns; diff --git a/src/Common/SettingsChanges.cpp b/src/Common/SettingsChanges.cpp index 9fb4f361e09..7790c272606 100644 --- a/src/Common/SettingsChanges.cpp +++ b/src/Common/SettingsChanges.cpp @@ -46,4 +46,30 @@ Field * SettingsChanges::tryGet(std::string_view name) return &change->value; } +bool SettingsChanges::insertSetting(std::string_view name, const Field & value) +{ + auto it = std::find_if(begin(), end(), [&name](const SettingChange & change) { return change.name == name; }); + if (it != end()) + return false; + emplace_back(name, value); + return true; +} + +void SettingsChanges::setSetting(std::string_view name, const Field & value) +{ + if (auto * setting_value = tryGet(name)) + *setting_value = value; + else + insertSetting(name, value); +} + +bool SettingsChanges::removeSetting(std::string_view name) +{ + auto it = std::find_if(begin(), end(), [&name](const SettingChange & change) { return change.name == name; }); + if (it == end()) + return false; + erase(it); + return true; +} + } diff --git a/src/Common/SettingsChanges.h b/src/Common/SettingsChanges.h index 776dacb93e8..514e9a78911 100644 --- a/src/Common/SettingsChanges.h +++ b/src/Common/SettingsChanges.h @@ -28,6 +28,13 @@ public: bool tryGet(std::string_view name, Field & out_value) const; const Field * tryGet(std::string_view name) const; Field * tryGet(std::string_view name); + + /// Inserts element if doesn't exists and returns true, otherwise just returns false + bool insertSetting(std::string_view name, const Field & value); + /// Sets element to value, inserts if doesn't exist + void setSetting(std::string_view name, const Field & value); + /// If element exists - removes it and returns true, otherwise returns false + bool removeSetting(std::string_view name); }; } diff --git a/src/Common/StringSearcher.h b/src/Common/StringSearcher.h index 3afc7635a9d..ae440f9151b 100644 --- a/src/Common/StringSearcher.h +++ b/src/Common/StringSearcher.h @@ -5,7 +5,6 @@ #include #include #include -#include #include #include #include @@ -32,27 +31,476 @@ namespace ErrorCodes * In most cases, performance is less than Volnitsky (see Volnitsky.h). */ +namespace impl +{ class StringSearcherBase { public: bool force_fallback = false; + #ifdef __SSE2__ protected: - static constexpr auto n = sizeof(__m128i); - const Int64 page_size = ::getPageSize(); + static constexpr size_t N = sizeof(__m128i); - bool pageSafe(const void * const ptr) const + bool isPageSafe(const void * const ptr) const { - return ((page_size - 1) & reinterpret_cast(ptr)) <= page_size - n; + return ((page_size - 1) & reinterpret_cast(ptr)) <= page_size - N; } + +private: + const Int64 page_size = ::getPageSize(); #endif }; -/// Performs case-sensitive and case-insensitive search of UTF-8 strings +/// Performs case-sensitive or case-insensitive search of ASCII or UTF-8 strings template class StringSearcher; + +/// Case-sensitive ASCII and UTF8 searcher +template +class StringSearcher : public StringSearcherBase +{ +private: + /// string to be searched for + const uint8_t * const needle; + const uint8_t * const needle_end; + /// first character in `needle` + uint8_t first_needle_character = 0; + +#ifdef __SSE4_1__ + /// second character of "needle" (if its length is > 1) + uint8_t second_needle_character = 0; + /// first/second needle character broadcasted into a 16 bytes vector + __m128i first_needle_character_vec; + __m128i second_needle_character_vec; + /// vector of first 16 characters of `needle` + __m128i cache = _mm_setzero_si128(); + uint16_t cachemask = 0; +#endif + +public: + template + requires (sizeof(CharT) == 1) + StringSearcher(const CharT * needle_, size_t needle_size) + : needle(reinterpret_cast(needle_)) + , needle_end(needle + needle_size) + { + if (needle_size == 0) + return; + + first_needle_character = *needle; + +#ifdef __SSE4_1__ + first_needle_character_vec = _mm_set1_epi8(first_needle_character); + if (needle_size > 1) + { + second_needle_character = *(needle + 1); + second_needle_character_vec = _mm_set1_epi8(second_needle_character); + } + const auto * needle_pos = needle; + + for (uint8_t i = 0; i < N; ++i) + { + cache = _mm_srli_si128(cache, 1); + + if (needle_pos != needle_end) + { + cache = _mm_insert_epi8(cache, *needle_pos, N - 1); + cachemask |= 1 << i; + ++needle_pos; + } + } +#endif + } + + template + requires (sizeof(CharT) == 1) + ALWAYS_INLINE bool compare(const CharT * /*haystack*/, const CharT * /*haystack_end*/, const CharT * pos) const + { +#ifdef __SSE4_1__ + if (isPageSafe(pos)) + { + const __m128i haystack_characters = _mm_loadu_si128(reinterpret_cast(pos)); + const __m128i comparison_result = _mm_cmpeq_epi8(haystack_characters, cache); + const uint16_t comparison_result_mask = _mm_movemask_epi8(comparison_result); + + if (0xffff == cachemask) + { + if (comparison_result_mask == cachemask) + { + pos += N; + const auto * needle_pos = needle + N; + + while (needle_pos < needle_end && *pos == *needle_pos) + ++pos, ++needle_pos; + + if (needle_pos == needle_end) + return true; + } + } + else if ((comparison_result_mask & cachemask) == cachemask) + return true; + + return false; + } +#endif + + if (*pos == first_needle_character) + { + ++pos; + const auto * needle_pos = needle + 1; + + while (needle_pos < needle_end && *pos == *needle_pos) + ++pos, ++needle_pos; + + if (needle_pos == needle_end) + return true; + } + + return false; + } + + template + requires (sizeof(CharT) == 1) + const CharT * search(const CharT * haystack, const CharT * const haystack_end) const + { + const auto needle_size = needle_end - needle; + + if (needle == needle_end) + return haystack; + +#ifdef __SSE4_1__ + /// Fast path for single-character needles. Compare 16 characters of the haystack against the needle character at once. + if (needle_size == 1) + { + while (haystack < haystack_end) + { + if (haystack + N <= haystack_end && isPageSafe(haystack)) + { + const __m128i haystack_characters = _mm_loadu_si128(reinterpret_cast(haystack)); + const __m128i comparison_result = _mm_cmpeq_epi8(haystack_characters, first_needle_character_vec); + const uint16_t comparison_result_mask = _mm_movemask_epi8(comparison_result); + if (comparison_result_mask == 0) + { + haystack += N; + continue; + } + + const int offset = std::countr_zero(comparison_result_mask); + haystack += offset; + + return haystack; + } + + if (haystack == haystack_end) + return haystack_end; + + if (*haystack == first_needle_character) + return haystack; + + ++haystack; + } + + return haystack_end; + } +#endif + + while (haystack < haystack_end && haystack_end - haystack >= needle_size) + { +#ifdef __SSE4_1__ + /// Compare the [0:15] bytes from haystack and broadcasted 16 bytes vector from first character of needle. + /// Compare the [1:16] bytes from haystack and broadcasted 16 bytes vector from second character of needle. + /// Bit AND the results of above two comparisons and get the mask. + if ((haystack + 1 + N) <= haystack_end && isPageSafe(haystack + 1)) + { + const __m128i haystack_characters_from_1st = _mm_loadu_si128(reinterpret_cast(haystack)); + const __m128i haystack_characters_from_2nd = _mm_loadu_si128(reinterpret_cast(haystack + 1)); + const __m128i comparison_result_1st = _mm_cmpeq_epi8(haystack_characters_from_1st, first_needle_character_vec); + const __m128i comparison_result_2nd = _mm_cmpeq_epi8(haystack_characters_from_2nd, second_needle_character_vec); + const __m128i comparison_result_combined = _mm_and_si128(comparison_result_1st, comparison_result_2nd); + const uint16_t comparison_result_mask = _mm_movemask_epi8(comparison_result_combined); + /// If the mask = 0, then first two characters [0:1] from needle are not in the [0:17] bytes of haystack. + if (comparison_result_mask == 0) + { + haystack += N; + continue; + } + + const int offset = std::countr_zero(comparison_result_mask); + haystack += offset; + + if (haystack + N <= haystack_end && isPageSafe(haystack)) + { + /// Already find the haystack position where the [pos:pos + 1] two characters exactly match the first two characters of needle. + /// Compare the 16 bytes from needle (cache) and the first 16 bytes from haystack at once if the haystack size >= 16 bytes. + const __m128i haystack_characters = _mm_loadu_si128(reinterpret_cast(haystack)); + const __m128i comparison_result_cache = _mm_cmpeq_epi8(haystack_characters, cache); + const uint16_t mask_offset = _mm_movemask_epi8(comparison_result_cache); + + if (0xffff == cachemask) + { + if (mask_offset == cachemask) + { + const auto * haystack_pos = haystack + N; + const auto * needle_pos = needle + N; + + while (haystack_pos < haystack_end && needle_pos < needle_end && + *haystack_pos == *needle_pos) + ++haystack_pos, ++needle_pos; + + if (needle_pos == needle_end) + return haystack; + } + } + else if ((mask_offset & cachemask) == cachemask) + return haystack; + + ++haystack; + continue; + } + } +#endif + + if (haystack == haystack_end) + return haystack_end; + + if (*haystack == first_needle_character) + { + const auto * haystack_pos = haystack + 1; + const auto * needle_pos = needle + 1; + + while (haystack_pos < haystack_end && needle_pos < needle_end && + *haystack_pos == *needle_pos) + ++haystack_pos, ++needle_pos; + + if (needle_pos == needle_end) + return haystack; + } + + ++haystack; + } + + return haystack_end; + } + + template + requires (sizeof(CharT) == 1) + const CharT * search(const CharT * haystack, size_t haystack_size) const + { + return search(haystack, haystack + haystack_size); + } +}; + + +/// Case-insensitive ASCII searcher +template <> +class StringSearcher : public StringSearcherBase +{ +private: + /// string to be searched for + const uint8_t * const needle; + const uint8_t * const needle_end; + /// lower and uppercase variants of the first character in `needle` + uint8_t l = 0; + uint8_t u = 0; + +#ifdef __SSE4_1__ + /// vectors filled with `l` and `u`, for determining leftmost position of the first symbol + __m128i patl, patu; + /// lower and uppercase vectors of first 16 characters of `needle` + __m128i cachel = _mm_setzero_si128(), cacheu = _mm_setzero_si128(); + int cachemask = 0; +#endif + +public: + template + requires (sizeof(CharT) == 1) + StringSearcher(const CharT * needle_, size_t needle_size) + : needle(reinterpret_cast(needle_)) + , needle_end(needle + needle_size) + { + if (needle_size == 0) + return; + + l = static_cast(std::tolower(*needle)); + u = static_cast(std::toupper(*needle)); + +#ifdef __SSE4_1__ + patl = _mm_set1_epi8(l); + patu = _mm_set1_epi8(u); + + const auto * needle_pos = needle; + + for (size_t i = 0; i < N; ++i) + { + cachel = _mm_srli_si128(cachel, 1); + cacheu = _mm_srli_si128(cacheu, 1); + + if (needle_pos != needle_end) + { + cachel = _mm_insert_epi8(cachel, std::tolower(*needle_pos), N - 1); + cacheu = _mm_insert_epi8(cacheu, std::toupper(*needle_pos), N - 1); + cachemask |= 1 << i; + ++needle_pos; + } + } +#endif + } + + template + requires (sizeof(CharT) == 1) + ALWAYS_INLINE bool compare(const CharT * /*haystack*/, const CharT * /*haystack_end*/, const CharT * pos) const + { +#ifdef __SSE4_1__ + if (isPageSafe(pos)) + { + const auto v_haystack = _mm_loadu_si128(reinterpret_cast(pos)); + const auto v_against_l = _mm_cmpeq_epi8(v_haystack, cachel); + const auto v_against_u = _mm_cmpeq_epi8(v_haystack, cacheu); + const auto v_against_l_or_u = _mm_or_si128(v_against_l, v_against_u); + const auto mask = _mm_movemask_epi8(v_against_l_or_u); + + if (0xffff == cachemask) + { + if (mask == cachemask) + { + pos += N; + const auto * needle_pos = needle + N; + + while (needle_pos < needle_end && std::tolower(*pos) == std::tolower(*needle_pos)) + { + ++pos; + ++needle_pos; + } + + if (needle_pos == needle_end) + return true; + } + } + else if ((mask & cachemask) == cachemask) + return true; + + return false; + } +#endif + + if (*pos == l || *pos == u) + { + ++pos; + const auto * needle_pos = needle + 1; + + while (needle_pos < needle_end && std::tolower(*pos) == std::tolower(*needle_pos)) + { + ++pos; + ++needle_pos; + } + + if (needle_pos == needle_end) + return true; + } + + return false; + } + + template + requires (sizeof(CharT) == 1) + const CharT * search(const CharT * haystack, const CharT * const haystack_end) const + { + if (needle == needle_end) + return haystack; + + while (haystack < haystack_end) + { +#ifdef __SSE4_1__ + if (haystack + N <= haystack_end && isPageSafe(haystack)) + { + const auto v_haystack = _mm_loadu_si128(reinterpret_cast(haystack)); + const auto v_against_l = _mm_cmpeq_epi8(v_haystack, patl); + const auto v_against_u = _mm_cmpeq_epi8(v_haystack, patu); + const auto v_against_l_or_u = _mm_or_si128(v_against_l, v_against_u); + + const auto mask = _mm_movemask_epi8(v_against_l_or_u); + + if (mask == 0) + { + haystack += N; + continue; + } + + const auto offset = __builtin_ctz(mask); + haystack += offset; + + if (haystack + N <= haystack_end && isPageSafe(haystack)) + { + const auto v_haystack_offset = _mm_loadu_si128(reinterpret_cast(haystack)); + const auto v_against_l_offset = _mm_cmpeq_epi8(v_haystack_offset, cachel); + const auto v_against_u_offset = _mm_cmpeq_epi8(v_haystack_offset, cacheu); + const auto v_against_l_or_u_offset = _mm_or_si128(v_against_l_offset, v_against_u_offset); + const auto mask_offset = _mm_movemask_epi8(v_against_l_or_u_offset); + + if (0xffff == cachemask) + { + if (mask_offset == cachemask) + { + const auto * haystack_pos = haystack + N; + const auto * needle_pos = needle + N; + + while (haystack_pos < haystack_end && needle_pos < needle_end && + std::tolower(*haystack_pos) == std::tolower(*needle_pos)) + { + ++haystack_pos; + ++needle_pos; + } + + if (needle_pos == needle_end) + return haystack; + } + } + else if ((mask_offset & cachemask) == cachemask) + return haystack; + + ++haystack; + continue; + } + } +#endif + + if (haystack == haystack_end) + return haystack_end; + + if (*haystack == l || *haystack == u) + { + const auto * haystack_pos = haystack + 1; + const auto * needle_pos = needle + 1; + + while (haystack_pos < haystack_end && needle_pos < needle_end && + std::tolower(*haystack_pos) == std::tolower(*needle_pos)) + { + ++haystack_pos; + ++needle_pos; + } + + if (needle_pos == needle_end) + return haystack; + } + + ++haystack; + } + + return haystack_end; + } + + template + requires (sizeof(CharT) == 1) + const CharT * search(const CharT * haystack, size_t haystack_size) const + { + return search(haystack, haystack + haystack_size); + } +}; + + /// Case-insensitive UTF-8 searcher template <> class StringSearcher : public StringSearcherBase @@ -65,9 +513,9 @@ private: const size_t needle_size; const uint8_t * const needle_end = needle + needle_size; /// lower and uppercase variants of the first octet of the first character in `needle` - bool first_needle_symbol_is_ascii{}; - uint8_t l{}; - uint8_t u{}; + bool first_needle_symbol_is_ascii = false; + uint8_t l = 0; + uint8_t u = 0; #ifdef __SSE4_1__ /// vectors filled with `l` and `u`, for determining leftmost position of the first symbol @@ -76,18 +524,19 @@ private: /// lower and uppercase vectors of first 16 characters of `needle` __m128i cachel = _mm_setzero_si128(); __m128i cacheu = _mm_setzero_si128(); - int cachemask{}; - size_t cache_valid_len{}; - size_t cache_actual_len{}; + int cachemask = 0; + size_t cache_valid_len = 0; + size_t cache_actual_len = 0; #endif public: template requires (sizeof(CharT) == 1) - StringSearcher(const CharT * needle_, const size_t needle_size_) - : needle{reinterpret_cast(needle_)}, needle_size{needle_size_} + StringSearcher(const CharT * needle_, size_t needle_size_) + : needle(reinterpret_cast(needle_)) + , needle_size(needle_size_) { - if (0 == needle_size) + if (needle_size == 0) return; UTF8SequenceBuffer l_seq; @@ -140,7 +589,7 @@ public: const auto * needle_pos = needle; - for (size_t i = 0; i < n;) + for (size_t i = 0; i < N;) { if (needle_pos == needle_end) { @@ -171,18 +620,18 @@ public: } cache_actual_len += src_len; - if (cache_actual_len < n) + if (cache_actual_len < N) cache_valid_len += src_len; - for (size_t j = 0; j < src_len && i < n; ++j, ++i) + for (size_t j = 0; j < src_len && i < N; ++j, ++i) { cachel = _mm_srli_si128(cachel, 1); cacheu = _mm_srli_si128(cacheu, 1); if (needle_pos != needle_end) { - cachel = _mm_insert_epi8(cachel, l_seq[j], n - 1); - cacheu = _mm_insert_epi8(cacheu, u_seq[j], n - 1); + cachel = _mm_insert_epi8(cachel, l_seq[j], N - 1); + cacheu = _mm_insert_epi8(cacheu, u_seq[j], N - 1); cachemask |= 1 << i; ++needle_pos; @@ -225,7 +674,7 @@ public: { #ifdef __SSE4_1__ - if (pageSafe(pos) && !force_fallback) + if (isPageSafe(pos) && !force_fallback) { const auto v_haystack = _mm_loadu_si128(reinterpret_cast(pos)); const auto v_against_l = _mm_cmpeq_epi8(v_haystack, cachel); @@ -269,13 +718,13 @@ public: requires (sizeof(CharT) == 1) const CharT * search(const CharT * haystack, const CharT * const haystack_end) const { - if (0 == needle_size) + if (needle_size == 0) return haystack; while (haystack < haystack_end) { #ifdef __SSE4_1__ - if (haystack + n <= haystack_end && pageSafe(haystack) && !force_fallback) + if (haystack + N <= haystack_end && isPageSafe(haystack) && !force_fallback) { const auto v_haystack = _mm_loadu_si128(reinterpret_cast(haystack)); const auto v_against_l = _mm_cmpeq_epi8(v_haystack, patl); @@ -286,7 +735,7 @@ public: if (mask == 0) { - haystack += n; + haystack += N; UTF8::syncForward(haystack, haystack_end); continue; } @@ -294,7 +743,7 @@ public: const auto offset = __builtin_ctz(mask); haystack += offset; - if (haystack + n <= haystack_end && pageSafe(haystack)) + if (haystack + N <= haystack_end && isPageSafe(haystack)) { const auto v_haystack_offset = _mm_loadu_si128(reinterpret_cast(haystack)); const auto v_against_l_offset = _mm_cmpeq_epi8(v_haystack_offset, cachel); @@ -344,404 +793,13 @@ public: template requires (sizeof(CharT) == 1) - const CharT * search(const CharT * haystack, const size_t haystack_size) const + const CharT * search(const CharT * haystack, size_t haystack_size) const { return search(haystack, haystack + haystack_size); } }; -/// Case-insensitive ASCII searcher -template <> -class StringSearcher : public StringSearcherBase -{ -private: - /// string to be searched for - const uint8_t * const needle; - const uint8_t * const needle_end; - /// lower and uppercase variants of the first character in `needle` - uint8_t l{}; - uint8_t u{}; - -#ifdef __SSE4_1__ - /// vectors filled with `l` and `u`, for determining leftmost position of the first symbol - __m128i patl, patu; - /// lower and uppercase vectors of first 16 characters of `needle` - __m128i cachel = _mm_setzero_si128(), cacheu = _mm_setzero_si128(); - int cachemask{}; -#endif - -public: - template - requires (sizeof(CharT) == 1) - StringSearcher(const CharT * needle_, const size_t needle_size) - : needle{reinterpret_cast(needle_)}, needle_end{needle + needle_size} - { - if (0 == needle_size) - return; - - l = static_cast(std::tolower(*needle)); - u = static_cast(std::toupper(*needle)); - -#ifdef __SSE4_1__ - patl = _mm_set1_epi8(l); - patu = _mm_set1_epi8(u); - - const auto * needle_pos = needle; - - for (const auto i : collections::range(0, n)) - { - cachel = _mm_srli_si128(cachel, 1); - cacheu = _mm_srli_si128(cacheu, 1); - - if (needle_pos != needle_end) - { - cachel = _mm_insert_epi8(cachel, std::tolower(*needle_pos), n - 1); - cacheu = _mm_insert_epi8(cacheu, std::toupper(*needle_pos), n - 1); - cachemask |= 1 << i; - ++needle_pos; - } - } -#endif - } - - template - requires (sizeof(CharT) == 1) - ALWAYS_INLINE bool compare(const CharT * /*haystack*/, const CharT * /*haystack_end*/, const CharT * pos) const - { -#ifdef __SSE4_1__ - if (pageSafe(pos)) - { - const auto v_haystack = _mm_loadu_si128(reinterpret_cast(pos)); - const auto v_against_l = _mm_cmpeq_epi8(v_haystack, cachel); - const auto v_against_u = _mm_cmpeq_epi8(v_haystack, cacheu); - const auto v_against_l_or_u = _mm_or_si128(v_against_l, v_against_u); - const auto mask = _mm_movemask_epi8(v_against_l_or_u); - - if (0xffff == cachemask) - { - if (mask == cachemask) - { - pos += n; - const auto * needle_pos = needle + n; - - while (needle_pos < needle_end && std::tolower(*pos) == std::tolower(*needle_pos)) - { - ++pos; - ++needle_pos; - } - - if (needle_pos == needle_end) - return true; - } - } - else if ((mask & cachemask) == cachemask) - return true; - - return false; - } -#endif - - if (*pos == l || *pos == u) - { - ++pos; - const auto * needle_pos = needle + 1; - - while (needle_pos < needle_end && std::tolower(*pos) == std::tolower(*needle_pos)) - { - ++pos; - ++needle_pos; - } - - if (needle_pos == needle_end) - return true; - } - - return false; - } - - template - requires (sizeof(CharT) == 1) - const CharT * search(const CharT * haystack, const CharT * const haystack_end) const - { - if (needle == needle_end) - return haystack; - - while (haystack < haystack_end) - { -#ifdef __SSE4_1__ - if (haystack + n <= haystack_end && pageSafe(haystack)) - { - const auto v_haystack = _mm_loadu_si128(reinterpret_cast(haystack)); - const auto v_against_l = _mm_cmpeq_epi8(v_haystack, patl); - const auto v_against_u = _mm_cmpeq_epi8(v_haystack, patu); - const auto v_against_l_or_u = _mm_or_si128(v_against_l, v_against_u); - - const auto mask = _mm_movemask_epi8(v_against_l_or_u); - - if (mask == 0) - { - haystack += n; - continue; - } - - const auto offset = __builtin_ctz(mask); - haystack += offset; - - if (haystack + n <= haystack_end && pageSafe(haystack)) - { - const auto v_haystack_offset = _mm_loadu_si128(reinterpret_cast(haystack)); - const auto v_against_l_offset = _mm_cmpeq_epi8(v_haystack_offset, cachel); - const auto v_against_u_offset = _mm_cmpeq_epi8(v_haystack_offset, cacheu); - const auto v_against_l_or_u_offset = _mm_or_si128(v_against_l_offset, v_against_u_offset); - const auto mask_offset = _mm_movemask_epi8(v_against_l_or_u_offset); - - if (0xffff == cachemask) - { - if (mask_offset == cachemask) - { - const auto * haystack_pos = haystack + n; - const auto * needle_pos = needle + n; - - while (haystack_pos < haystack_end && needle_pos < needle_end && - std::tolower(*haystack_pos) == std::tolower(*needle_pos)) - { - ++haystack_pos; - ++needle_pos; - } - - if (needle_pos == needle_end) - return haystack; - } - } - else if ((mask_offset & cachemask) == cachemask) - return haystack; - - ++haystack; - continue; - } - } -#endif - - if (haystack == haystack_end) - return haystack_end; - - if (*haystack == l || *haystack == u) - { - const auto * haystack_pos = haystack + 1; - const auto * needle_pos = needle + 1; - - while (haystack_pos < haystack_end && needle_pos < needle_end && - std::tolower(*haystack_pos) == std::tolower(*needle_pos)) - { - ++haystack_pos; - ++needle_pos; - } - - if (needle_pos == needle_end) - return haystack; - } - - ++haystack; - } - - return haystack_end; - } - - template - requires (sizeof(CharT) == 1) - const CharT * search(const CharT * haystack, const size_t haystack_size) const - { - return search(haystack, haystack + haystack_size); - } -}; - - -/// Case-sensitive searcher (both ASCII and UTF-8) -template -class StringSearcher : public StringSearcherBase -{ -private: - /// string to be searched for - const uint8_t * const needle; - const uint8_t * const needle_end; - /// first character in `needle` - uint8_t first{}; - -#ifdef __SSE4_1__ - /// vector filled `first` for determining leftmost position of the first symbol - __m128i pattern; - /// vector of first 16 characters of `needle` - __m128i cache = _mm_setzero_si128(); - int cachemask{}; -#endif - -public: - template - requires (sizeof(CharT) == 1) - StringSearcher(const CharT * needle_, const size_t needle_size) - : needle{reinterpret_cast(needle_)}, needle_end{needle + needle_size} - { - if (0 == needle_size) - return; - - first = *needle; - -#ifdef __SSE4_1__ - pattern = _mm_set1_epi8(first); - - const auto * needle_pos = needle; - - for (const auto i : collections::range(0, n)) - { - cache = _mm_srli_si128(cache, 1); - - if (needle_pos != needle_end) - { - cache = _mm_insert_epi8(cache, *needle_pos, n - 1); - cachemask |= 1 << i; - ++needle_pos; - } - } -#endif - } - - template - requires (sizeof(CharT) == 1) - ALWAYS_INLINE bool compare(const CharT * /*haystack*/, const CharT * /*haystack_end*/, const CharT * pos) const - { -#ifdef __SSE4_1__ - if (pageSafe(pos)) - { - const auto v_haystack = _mm_loadu_si128(reinterpret_cast(pos)); - const auto v_against_cache = _mm_cmpeq_epi8(v_haystack, cache); - const auto mask = _mm_movemask_epi8(v_against_cache); - - if (0xffff == cachemask) - { - if (mask == cachemask) - { - pos += n; - const auto * needle_pos = needle + n; - - while (needle_pos < needle_end && *pos == *needle_pos) - ++pos, ++needle_pos; - - if (needle_pos == needle_end) - return true; - } - } - else if ((mask & cachemask) == cachemask) - return true; - - return false; - } -#endif - - if (*pos == first) - { - ++pos; - const auto * needle_pos = needle + 1; - - while (needle_pos < needle_end && *pos == *needle_pos) - ++pos, ++needle_pos; - - if (needle_pos == needle_end) - return true; - } - - return false; - } - - template - requires (sizeof(CharT) == 1) - const CharT * search(const CharT * haystack, const CharT * const haystack_end) const - { - if (needle == needle_end) - return haystack; - - while (haystack < haystack_end) - { -#ifdef __SSE4_1__ - if (haystack + n <= haystack_end && pageSafe(haystack)) - { - /// find first character - const auto v_haystack = _mm_loadu_si128(reinterpret_cast(haystack)); - const auto v_against_pattern = _mm_cmpeq_epi8(v_haystack, pattern); - - const auto mask = _mm_movemask_epi8(v_against_pattern); - - /// first character not present in 16 octets starting at `haystack` - if (mask == 0) - { - haystack += n; - continue; - } - - const auto offset = __builtin_ctz(mask); - haystack += offset; - - if (haystack + n <= haystack_end && pageSafe(haystack)) - { - /// check for first 16 octets - const auto v_haystack_offset = _mm_loadu_si128(reinterpret_cast(haystack)); - const auto v_against_cache = _mm_cmpeq_epi8(v_haystack_offset, cache); - const auto mask_offset = _mm_movemask_epi8(v_against_cache); - - if (0xffff == cachemask) - { - if (mask_offset == cachemask) - { - const auto * haystack_pos = haystack + n; - const auto * needle_pos = needle + n; - - while (haystack_pos < haystack_end && needle_pos < needle_end && - *haystack_pos == *needle_pos) - ++haystack_pos, ++needle_pos; - - if (needle_pos == needle_end) - return haystack; - } - } - else if ((mask_offset & cachemask) == cachemask) - return haystack; - - ++haystack; - continue; - } - } -#endif - - if (haystack == haystack_end) - return haystack_end; - - if (*haystack == first) - { - const auto * haystack_pos = haystack + 1; - const auto * needle_pos = needle + 1; - - while (haystack_pos < haystack_end && needle_pos < needle_end && - *haystack_pos == *needle_pos) - ++haystack_pos, ++needle_pos; - - if (needle_pos == needle_end) - return haystack; - } - - ++haystack; - } - - return haystack_end; - } - - template - requires (sizeof(CharT) == 1) - const CharT * search(const CharT * haystack, const size_t haystack_size) const - { - return search(haystack, haystack + haystack_size); - } -}; - // Searches for needle surrounded by token-separators. // Separators are anything inside ASCII (0-128) and not alphanum. // Any value outside of basic ASCII (>=128) is considered a non-separator symbol, hence UTF-8 strings @@ -755,14 +813,12 @@ class TokenSearcher : public StringSearcherBase public: template requires (sizeof(CharT) == 1) - TokenSearcher(const CharT * needle_, const size_t needle_size_) - : searcher{needle_, needle_size_}, - needle_size(needle_size_) + TokenSearcher(const CharT * needle_, size_t needle_size_) + : searcher(needle_, needle_size_) + , needle_size(needle_size_) { if (std::any_of(needle_, needle_ + needle_size_, isTokenSeparator)) - { throw Exception(ErrorCodes::BAD_ARGUMENTS, "Needle must not contain whitespace or separator characters"); - } } @@ -799,7 +855,7 @@ public: template requires (sizeof(CharT) == 1) - const CharT * search(const CharT * haystack, const size_t haystack_size) const + const CharT * search(const CharT * haystack, size_t haystack_size) const { return search(haystack, haystack + haystack_size); } @@ -818,26 +874,27 @@ public: } }; +} -using ASCIICaseSensitiveStringSearcher = StringSearcher; -using ASCIICaseInsensitiveStringSearcher = StringSearcher; -using UTF8CaseSensitiveStringSearcher = StringSearcher; -using UTF8CaseInsensitiveStringSearcher = StringSearcher; -using ASCIICaseSensitiveTokenSearcher = TokenSearcher; -using ASCIICaseInsensitiveTokenSearcher = TokenSearcher; +using ASCIICaseSensitiveStringSearcher = impl::StringSearcher; +using ASCIICaseInsensitiveStringSearcher = impl::StringSearcher; +using UTF8CaseSensitiveStringSearcher = impl::StringSearcher; +using UTF8CaseInsensitiveStringSearcher = impl::StringSearcher; +using ASCIICaseSensitiveTokenSearcher = impl::TokenSearcher; +using ASCIICaseInsensitiveTokenSearcher = impl::TokenSearcher; /// Use only with short haystacks where cheap initialization is required. template -struct StdLibASCIIStringSearcher : public StringSearcherBase +struct StdLibASCIIStringSearcher { const char * const needle_start; const char * const needle_end; template requires (sizeof(CharT) == 1) - StdLibASCIIStringSearcher(const CharT * const needle_start_, const size_t needle_size_) - : needle_start{reinterpret_cast(needle_start_)} - , needle_end{reinterpret_cast(needle_start) + needle_size_} + StdLibASCIIStringSearcher(const CharT * const needle_start_, size_t needle_size_) + : needle_start(reinterpret_cast(needle_start_)) + , needle_end(reinterpret_cast(needle_start) + needle_size_) {} template @@ -845,22 +902,18 @@ struct StdLibASCIIStringSearcher : public StringSearcherBase const CharT * search(const CharT * haystack_start, const CharT * const haystack_end) const { if constexpr (CaseInsensitive) - { return std::search( haystack_start, haystack_end, needle_start, needle_end, [](char c1, char c2) {return std::toupper(c1) == std::toupper(c2);}); - } else - { return std::search( haystack_start, haystack_end, needle_start, needle_end, [](char c1, char c2) {return c1 == c2;}); - } } template requires (sizeof(CharT) == 1) - const CharT * search(const CharT * haystack_start, const size_t haystack_length) const + const CharT * search(const CharT * haystack_start, size_t haystack_length) const { return search(haystack_start, haystack_start + haystack_length); } diff --git a/src/Common/ThreadStatus.cpp b/src/Common/ThreadStatus.cpp index 9f9a78c4036..46c171b5cb6 100644 --- a/src/Common/ThreadStatus.cpp +++ b/src/Common/ThreadStatus.cpp @@ -237,6 +237,7 @@ void ThreadStatus::setFatalErrorCallback(std::function callback) void ThreadStatus::onFatalError() { + std::lock_guard lock(thread_group->mutex); if (fatal_error_callback) fatal_error_callback(); } diff --git a/src/Common/VersionNumber.cpp b/src/Common/VersionNumber.cpp index c8332132d2b..cb02f6f8db2 100644 --- a/src/Common/VersionNumber.cpp +++ b/src/Common/VersionNumber.cpp @@ -1,6 +1,6 @@ #include -#include -#include +#include +#include namespace DB { @@ -10,29 +10,21 @@ VersionNumber::VersionNumber(std::string version_string) if (version_string.empty()) return; - char * start = &version_string.front(); - char * end = start; - const char * eos = &version_string.back() + 1; - - do + ReadBufferFromString rb(version_string); + while (!rb.eof()) { - Int64 value = strtol(start, &end, 10); + Int64 value; + if (!tryReadIntText(value, rb)) + break; components.push_back(value); - start = end + 1; + if (!checkChar('.', rb)) + break; } - while (start < eos && (end < eos && *end == '.')); } std::string VersionNumber::toString() const { - std::string str; - for (Int64 v : components) - { - if (!str.empty()) - str += '.'; - str += std::to_string(v); - } - return str; + return fmt::format("{}", fmt::join(components, ".")); } int VersionNumber::compare(const VersionNumber & rhs) const diff --git a/src/Common/VersionNumber.h b/src/Common/VersionNumber.h index 13c35063632..94bf234c1da 100644 --- a/src/Common/VersionNumber.h +++ b/src/Common/VersionNumber.h @@ -1,6 +1,5 @@ #pragma once -#include #include #include #include @@ -10,33 +9,24 @@ namespace DB { /// Simple numeric version representation. -/// -/// Based on QVersionNumber. struct VersionNumber { explicit VersionNumber() = default; - VersionNumber(const std::initializer_list & init) - : components(init) - {} - VersionNumber(Int64 major, Int64 minor = 0, Int64 patch = 0) /// NOLINT - : components{major, minor, patch} - {} - VersionNumber(const std::vector & components_) /// NOLINT - : components(components_) - {} + VersionNumber(const std::initializer_list & init) : components(init) {} + explicit VersionNumber(Int64 major, Int64 minor = 0, Int64 patch = 0) : components{major, minor, patch} {} + explicit VersionNumber(const std::vector & components_) : components(components_) {} /// Parse version number from string. explicit VersionNumber(std::string version); - /// NOTE: operator<=> can be used once libc++ will be upgraded. - bool operator<(const VersionNumber & rhs) const { return compare(rhs.components) < 0; } - bool operator<=(const VersionNumber & rhs) const { return compare(rhs.components) <= 0; } - bool operator==(const VersionNumber & rhs) const { return compare(rhs.components) == 0; } - bool operator>(const VersionNumber & rhs) const { return compare(rhs.components) > 0; } - bool operator>=(const VersionNumber & rhs) const { return compare(rhs.components) >= 0; } + bool operator==(const VersionNumber & rhs) const = default; + + /// There might be negative version code which differs from default comparison. + auto operator<=>(const VersionNumber & rhs) const { return compare(rhs); } std::string toString() const; + friend std::ostream & operator<<(std::ostream & os, const VersionNumber & v) { return os << v.toString(); diff --git a/src/Common/Volnitsky.h b/src/Common/Volnitsky.h index 7073a9a4709..a0fce5531cd 100644 --- a/src/Common/Volnitsky.h +++ b/src/Common/Volnitsky.h @@ -11,6 +11,10 @@ #include #include +#ifdef __SSE4_1__ + #include +#endif + /** Search for a substring in a string by Volnitsky's algorithm * http://volnitsky.com/project/str_search/ * @@ -429,6 +433,10 @@ public: const auto * haystack_end = haystack + haystack_size; +#ifdef __SSE4_1__ + return fallback_searcher.search(haystack, haystack_end); +#endif + if (fallback || haystack_size <= needle_size || fallback_searcher.force_fallback) return fallback_searcher.search(haystack, haystack_end); diff --git a/src/Common/likePatternToRegexp.cpp b/src/Common/likePatternToRegexp.cpp new file mode 100644 index 00000000000..f9eb3b9c4fb --- /dev/null +++ b/src/Common/likePatternToRegexp.cpp @@ -0,0 +1,91 @@ +#include + +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int CANNOT_PARSE_ESCAPE_SEQUENCE; +} + +String likePatternToRegexp(std::string_view pattern) +{ + String res; + res.reserve(pattern.size() * 2); + + const char * pos = pattern.data(); + const char * const end = pattern.begin() + pattern.size(); + + if (pos < end && *pos == '%') + /// Eat leading % + while (++pos < end) + { + if (*pos != '%') + break; + } + else + res = "^"; + + while (pos < end) + { + switch (*pos) + { + /// Quote characters which have a special meaning in re2 + case '^': + case '$': + case '.': + case '[': + case '|': + case '(': + case ')': + case '?': + case '*': + case '+': + case '{': + res += '\\'; + res += *pos; + break; + case '%': + if (pos + 1 != end) + res += ".*"; + else + return res; + break; + case '_': + res += "."; + break; + case '\\': + if (pos + 1 == end) + throw Exception(ErrorCodes::CANNOT_PARSE_ESCAPE_SEQUENCE, "Invalid escape sequence at the end of LIKE pattern '{}'", pattern); + switch (pos[1]) + { + /// Interpret quoted LIKE metacharacters %, _ and \ as literals: + case '%': + case '_': + res += pos[1]; + ++pos; + break; + case '\\': + res += "\\\\"; /// backslash has a special meaning in re2 --> quote it + ++pos; + break; + /// Unknown escape sequence treated literally: as backslash (which must be quoted in re2) + the following character + default: + res += "\\\\"; + break; + } + break; + default: + res += *pos; + break; + } + ++pos; + } + + res += '$'; + return res; +} + +} diff --git a/src/Common/likePatternToRegexp.h b/src/Common/likePatternToRegexp.h new file mode 100644 index 00000000000..8c5033254ec --- /dev/null +++ b/src/Common/likePatternToRegexp.h @@ -0,0 +1,12 @@ +#pragma once + +#include +#include + +namespace DB +{ + +/// Transforms the [I]LIKE expression into regexp re2. For example, abc%def -> ^abc.*def$ +String likePatternToRegexp(std::string_view pattern); + +} diff --git a/src/Compression/CompressionCodecT64.cpp b/src/Compression/CompressionCodecT64.cpp index e718b24e8df..e7f1615128a 100644 --- a/src/Compression/CompressionCodecT64.cpp +++ b/src/Compression/CompressionCodecT64.cpp @@ -88,6 +88,7 @@ enum class MagicNumber : uint8_t Enum16 = 18, Decimal32 = 19, Decimal64 = 20, + IPv4 = 21, }; MagicNumber serializeTypeId(TypeIndex type_id) @@ -109,6 +110,7 @@ MagicNumber serializeTypeId(TypeIndex type_id) case TypeIndex::Enum16: return MagicNumber::Enum16; case TypeIndex::Decimal32: return MagicNumber::Decimal32; case TypeIndex::Decimal64: return MagicNumber::Decimal64; + case TypeIndex::IPv4: return MagicNumber::IPv4; default: break; } @@ -136,6 +138,7 @@ TypeIndex deserializeTypeId(uint8_t serialized_type_id) case MagicNumber::Enum16: return TypeIndex::Enum16; case MagicNumber::Decimal32: return TypeIndex::Decimal32; case MagicNumber::Decimal64: return TypeIndex::Decimal64; + case MagicNumber::IPv4: return TypeIndex::IPv4; } throw Exception(ErrorCodes::LOGICAL_ERROR, "Bad magic number in T64 codec: {}", static_cast(serialized_type_id)); @@ -171,6 +174,7 @@ TypeIndex baseType(TypeIndex type_idx) return TypeIndex::UInt16; case TypeIndex::UInt32: case TypeIndex::DateTime: + case TypeIndex::IPv4: return TypeIndex::UInt32; case TypeIndex::UInt64: return TypeIndex::UInt64; @@ -198,6 +202,7 @@ TypeIndex typeIdx(const IDataType * data_type) case TypeIndex::Date: case TypeIndex::Int32: case TypeIndex::UInt32: + case TypeIndex::IPv4: case TypeIndex::DateTime: case TypeIndex::DateTime64: case TypeIndex::Decimal32: diff --git a/src/Core/BackgroundSchedulePool.cpp b/src/Core/BackgroundSchedulePool.cpp index 165d8902e85..993cfb6ef04 100644 --- a/src/Core/BackgroundSchedulePool.cpp +++ b/src/Core/BackgroundSchedulePool.cpp @@ -252,36 +252,10 @@ void BackgroundSchedulePool::cancelDelayedTask(const TaskInfoPtr & task, std::lo } -scope_guard BackgroundSchedulePool::attachToThreadGroup() -{ - scope_guard guard = [&]() - { - if (thread_group) - CurrentThread::detachQueryIfNotDetached(); - }; - - std::lock_guard lock(delayed_tasks_mutex); - - if (thread_group) - { - /// Put all threads to one thread pool - CurrentThread::attachTo(thread_group); - } - else - { - CurrentThread::initializeQuery(); - thread_group = CurrentThread::getGroup(); - } - return guard; -} - - void BackgroundSchedulePool::threadFunction() { setThreadName(thread_name.c_str()); - auto detach_thread_guard = attachToThreadGroup(); - while (!shutdown) { TaskInfoPtr task; @@ -311,8 +285,6 @@ void BackgroundSchedulePool::delayExecutionThreadFunction() { setThreadName((thread_name + "/D").c_str()); - auto detach_thread_guard = attachToThreadGroup(); - while (!shutdown) { TaskInfoPtr task; diff --git a/src/Core/BackgroundSchedulePool.h b/src/Core/BackgroundSchedulePool.h index ba1be312f27..0fb70b1f715 100644 --- a/src/Core/BackgroundSchedulePool.h +++ b/src/Core/BackgroundSchedulePool.h @@ -90,13 +90,8 @@ private: /// Tasks ordered by scheduled time. DelayedTasks delayed_tasks; - /// Thread group used for profiling purposes - ThreadGroupStatusPtr thread_group; - CurrentMetrics::Metric tasks_metric; std::string thread_name; - - [[nodiscard]] scope_guard attachToThreadGroup(); }; diff --git a/src/Core/BaseSettings.h b/src/Core/BaseSettings.h index c49bdcf2739..521422b780e 100644 --- a/src/Core/BaseSettings.h +++ b/src/Core/BaseSettings.h @@ -107,7 +107,9 @@ public: public: const String & getName() const; Field getValue() const; + Field getDefaultValue() const; String getValueString() const; + String getDefaultValueString() const; bool isValueChanged() const; const char * getTypeName() const; const char * getDescription() const; @@ -797,6 +799,17 @@ Field BaseSettings::SettingFieldRef::getValue() const return accessor->getValue(*settings, index); } +template +Field BaseSettings::SettingFieldRef::getDefaultValue() const +{ + if constexpr (Traits::allow_custom_settings) + { + if (custom_setting) + return static_cast(custom_setting->second); + } + return accessor->getDefaultValue(index); +} + template String BaseSettings::SettingFieldRef::getValueString() const { @@ -808,6 +821,17 @@ String BaseSettings::SettingFieldRef::getValueString() const return accessor->getValueString(*settings, index); } +template +String BaseSettings::SettingFieldRef::getDefaultValueString() const +{ + if constexpr (Traits::allow_custom_settings) + { + if (custom_setting) + return custom_setting->second.toString(); + } + return accessor->getDefaultValueString(index); +} + template bool BaseSettings::SettingFieldRef::isValueChanged() const { @@ -902,7 +926,8 @@ using AliasMap = std::unordered_map; void resetValueToDefault(Data & data, size_t index) const { return field_infos[index].reset_value_to_default_function(data); } \ void writeBinary(const Data & data, size_t index, WriteBuffer & out) const { return field_infos[index].write_binary_function(data, out); } \ void readBinary(Data & data, size_t index, ReadBuffer & in) const { return field_infos[index].read_binary_function(data, in); } \ - \ + Field getDefaultValue(size_t index) const { return field_infos[index].get_default_value_function(); } \ + String getDefaultValueString(size_t index) const { return field_infos[index].get_default_value_string_function(); } \ private: \ Accessor(); \ struct FieldInfo \ @@ -923,6 +948,8 @@ using AliasMap = std::unordered_map; void (*reset_value_to_default_function)(Data &) ; \ void (*write_binary_function)(const Data &, WriteBuffer &) ; \ void (*read_binary_function)(Data &, ReadBuffer &) ; \ + Field (*get_default_value_function)() ; \ + String (*get_default_value_string_function)() ; \ }; \ std::vector field_infos; \ std::unordered_map name_to_index_map; \ @@ -1033,6 +1060,8 @@ struct DefineAliases [](const Data & data) -> bool { return data.NAME.changed; }, \ [](Data & data) { data.NAME = SettingField##TYPE{DEFAULT}; }, \ [](const Data & data, WriteBuffer & out) { data.NAME.writeBinary(out); }, \ - [](Data & data, ReadBuffer & in) { data.NAME.readBinary(in); } \ + [](Data & data, ReadBuffer & in) { data.NAME.readBinary(in); }, \ + []() -> Field { return static_cast(SettingField##TYPE{DEFAULT}); }, \ + []() -> String { return SettingField##TYPE{DEFAULT}.toString(); } \ }); } diff --git a/src/Core/Field.h b/src/Core/Field.h index 95ce43ccd44..2e772a64afc 100644 --- a/src/Core/Field.h +++ b/src/Core/Field.h @@ -108,7 +108,8 @@ struct CustomType { virtual ~CustomTypeImpl() = default; virtual const char * getTypeName() const = 0; - virtual String toString() const = 0; + virtual String toString(bool show_secrets) const = 0; + virtual bool isSecret() const = 0; virtual bool operator < (const CustomTypeImpl &) const = 0; virtual bool operator <= (const CustomTypeImpl &) const = 0; @@ -120,8 +121,9 @@ struct CustomType CustomType() = default; explicit CustomType(std::shared_ptr impl_) : impl(impl_) {} + bool isSecret() const { return impl->isSecret(); } const char * getTypeName() const { return impl->getTypeName(); } - String toString() const { return impl->toString(); } + String toString(bool show_secrets = true) const { return impl->toString(show_secrets); } const CustomTypeImpl & getImpl() { return *impl; } bool operator < (const CustomType & rhs) const { return *impl < *rhs.impl; } diff --git a/src/Core/NamesAndTypes.h b/src/Core/NamesAndTypes.h index b9c03aae0ca..78535a751c3 100644 --- a/src/Core/NamesAndTypes.h +++ b/src/Core/NamesAndTypes.h @@ -53,7 +53,17 @@ private: /// This needed to use structured bindings for NameAndTypePair /// const auto & [name, type] = name_and_type template -decltype(auto) get(const NameAndTypePair & name_and_type) +const std::tuple_element_t & get(const NameAndTypePair & name_and_type) +{ + if constexpr (I == 0) + return name_and_type.name; + else if constexpr (I == 1) + return name_and_type.type; +} + +/// auto & [name, type] = name_and_type +template +std::tuple_element_t & get(NameAndTypePair & name_and_type) { if constexpr (I == 0) return name_and_type.name; diff --git a/src/Core/ServerSettings.cpp b/src/Core/ServerSettings.cpp new file mode 100644 index 00000000000..c50a67b04c9 --- /dev/null +++ b/src/Core/ServerSettings.cpp @@ -0,0 +1,19 @@ +#include "ServerSettings.h" +#include + +namespace DB +{ + +IMPLEMENT_SETTINGS_TRAITS(ServerSettingsTraits, SERVER_SETTINGS) + +void ServerSettings::loadSettingsFromConfig(const Poco::Util::AbstractConfiguration & config) +{ + for (auto setting : all()) + { + const auto & name = setting.getName(); + if (config.has(name)) + set(name, config.getString(name)); + } +} + +} diff --git a/src/Core/ServerSettings.h b/src/Core/ServerSettings.h new file mode 100644 index 00000000000..1e884266c17 --- /dev/null +++ b/src/Core/ServerSettings.h @@ -0,0 +1,78 @@ +#pragma once + + +#include + + +namespace Poco::Util +{ +class AbstractConfiguration; +} + +namespace DB +{ + +#define SERVER_SETTINGS(M, ALIAS) \ + M(Bool, show_addresses_in_stack_traces, true, "If it is set true will show addresses in stack traces", 0) \ + M(Bool, shutdown_wait_unfinished_queries, false, "If set true ClickHouse will wait for running queries finish before shutdown.", 0) \ + M(UInt64, max_thread_pool_size, 10000, "The maximum number of threads that could be allocated from the OS and used for query execution and background operations.", 0) \ + M(UInt64, max_thread_pool_free_size, 1000, "The maximum number of threads that will always stay in a global thread pool once allocated and remain idle in case of insufficient number of tasks.", 0) \ + M(UInt64, thread_pool_queue_size, 10000, "The maximum number of tasks that will be placed in a queue and wait for execution.", 0) \ + M(UInt64, max_io_thread_pool_size, 100, "The maximum number of threads that would be used for IO operations", 0) \ + M(UInt64, max_io_thread_pool_free_size, 0, "Max free size for IO thread pool.", 0) \ + M(UInt64, io_thread_pool_queue_size, 10000, "Queue size for IO thread pool.", 0) \ + M(Int32, max_connections, 1024, "Max server connections.", 0) \ + M(UInt32, asynchronous_metrics_update_period_s, 1, "Period in seconds for updating asynchronous metrics.", 0) \ + M(UInt32, asynchronous_heavy_metrics_update_period_s, 120, "Period in seconds for updating asynchronous metrics.", 0) \ + M(UInt32, max_threads_for_connection_collector, 10, "The maximum number of threads that will be used for draining connections asynchronously in a background upon finishing executing distributed queries.", 0) \ + M(String, default_database, "default", "Default database name.", 0) \ + M(String, tmp_policy, "", "Policy for storage with temporary data.", 0) \ + M(UInt64, max_temporary_data_on_disk_size, 0, "The maximum amount of storage that could be used for external aggregation, joins or sorting., ", 0) \ + M(String, temporary_data_in_cache, "", "Cache disk name for temporary data.", 0) \ + M(UInt64, max_server_memory_usage, 0, "Limit on total memory usage. Zero means Unlimited.", 0) \ + M(Double, max_server_memory_usage_to_ram_ratio, 0.9, "Same as max_server_memory_usage but in to ram ratio. Allows to lower max memory on low-memory systems.", 0) \ + M(Bool, allow_use_jemalloc_memory, true, "Allows to use jemalloc memory.", 0) \ + \ + M(UInt64, max_concurrent_queries, 0, "Limit on total number of concurrently executed queries. Zero means Unlimited.", 0) \ + M(UInt64, max_concurrent_insert_queries, 0, "Limit on total number of concurrently insert queries. Zero means Unlimited.", 0) \ + M(UInt64, max_concurrent_select_queries, 0, "Limit on total number of concurrently select queries. Zero means Unlimited.", 0) \ + \ + M(Double, cache_size_to_ram_max_ratio, 0.5, "Set cache size ro ram max ratio. Allows to lower cache size on low-memory systems.", 0) \ + M(String, uncompressed_cache_policy, "SLRU", "Uncompressed cache policy name.", 0) \ + M(UInt64, uncompressed_cache_size, 0, "Size of cache for uncompressed blocks. Zero means disabled.", 0) \ + M(UInt64, mark_cache_size, 5368709120, "Size of cache for marks (index of MergeTree family of tables).", 0) \ + M(String, mark_cache_policy, "SLRU", "Mark cache policy name.", 0) \ + M(UInt64, index_uncompressed_cache_size, 0, "Size of cache for uncompressed blocks of MergeTree indices. Zero means disabled.", 0) \ + M(UInt64, index_mark_cache_size, 0, "Size of cache for index marks. Zero means disabled.", 0) \ + M(UInt64, mmap_cache_size, 1000, "A cache for mmapped files.", 0) /* The choice of default is arbitrary. */ \ + \ + M(Bool, disable_internal_dns_cache, false, "Disable internal DNS caching at all.", 0) \ + M(Int32, dns_cache_update_period, 15, "Internal DNS cache update period in seconds.", 0) \ + M(UInt32, dns_max_consecutive_failures, 1024, "Max server connections.", 0) \ + \ + M(UInt64, max_table_size_to_drop, 50000000000lu, "If size of a table is greater than this value (in bytes) than table could not be dropped with any DROP query.", 0) \ + M(UInt64, max_partition_size_to_drop, 50000000000lu, "Same as max_table_size_to_drop, but for the partitions.", 0) \ + M(UInt64, concurrent_threads_soft_limit_num, 0, "Sets how many concurrent thread can be allocated before applying CPU pressure. Zero means Unlimited.", 0) \ + M(UInt64, concurrent_threads_soft_limit_ratio_to_cores, 0, "Same as concurrent_threads_soft_limit_num, but with ratio to cores.", 0) \ + \ + M(UInt64, background_pool_size, 16, "The maximum number of threads what will be used for merging or mutating data parts for *MergeTree-engine tables in a background.", 0) \ + M(UInt64, background_merges_mutations_concurrency_ratio, 2, "The multiplier which shows the relation between the number of tasks that could be executed concurrently and the number of threads being used.", 0) \ + M(String, background_merges_mutations_scheduling_policy, "round_robin", "The policy on how to perform a scheduling for background merges and mutations. Possible values are: `round_robin` and `shortest_task_first`. ", 0) \ + M(UInt64, background_move_pool_size, 8, "The maximum number of threads that will be used for moving data parts to another disk or volume for *MergeTree-engine tables in a background.", 0) \ + M(UInt64, background_fetches_pool_size, 8, "The maximum number of threads that will be used for fetching data parts from another replica for *MergeTree-engine tables in a background.", 0) \ + M(UInt64, background_common_pool_size, 8, "The maximum number of threads that will be used for performing a variety of operations (mostly garbage collection) for *MergeTree-engine tables in a background.", 0) \ + M(UInt64, background_buffer_flush_schedule_pool_size, 16, "The maximum number of threads that will be used for performing flush operations for Buffer-engine tables in a background.", 0) \ + M(UInt64, background_schedule_pool_size, 16, "The maximum number of threads that will be used for constantly executing some lightweight periodic operations.", 0) \ + M(UInt64, background_message_broker_schedule_pool_size, 16, "The maximum number of threads that will be used for executing background operations for message streaming.", 0) \ + M(UInt64, background_distributed_schedule_pool_size, 16, "The maximum number of threads that will be used for executing distributed sends.", 0) \ + + +DECLARE_SETTINGS_TRAITS(ServerSettingsTraits, SERVER_SETTINGS) + +struct ServerSettings : public BaseSettings +{ + void loadSettingsFromConfig(const Poco::Util::AbstractConfiguration & config); +}; + +} + diff --git a/src/Core/Settings.h b/src/Core/Settings.h index c73c56ca5d7..a65f2ccb60f 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -489,7 +489,7 @@ class IColumn; M(Bool, optimize_trivial_count_query, true, "Process trivial 'SELECT count() FROM table' query from metadata.", 0) \ M(Bool, optimize_respect_aliases, true, "If it is set to true, it will respect aliases in WHERE/GROUP BY/ORDER BY, that will help with partition pruning/secondary indexes/optimize_aggregation_in_order/optimize_read_in_order/optimize_trivial_count", 0) \ M(UInt64, mutations_sync, 0, "Wait for synchronous execution of ALTER TABLE UPDATE/DELETE queries (mutations). 0 - execute asynchronously. 1 - wait current server. 2 - wait all replicas if they exist.", 0) \ - M(Bool, allow_experimental_lightweight_delete, false, "Enable lightweight DELETE mutations for mergetree tables. Work in progress", 0) \ + M(Bool, enable_lightweight_delete, true, "Enable lightweight DELETE mutations for mergetree tables.", 0) \ M(Bool, optimize_move_functions_out_of_any, false, "Move functions out of aggregate functions 'any', 'anyLast'.", 0) \ M(Bool, optimize_normalize_count_variants, true, "Rewrite aggregate functions that semantically equals to count() as count().", 0) \ M(Bool, optimize_injective_functions_inside_uniq, true, "Delete injective functions of one argument inside uniq*() functions.", 0) \ @@ -726,6 +726,7 @@ class IColumn; MAKE_OBSOLETE(M, Bool, allow_experimental_database_atomic, true) \ MAKE_OBSOLETE(M, Bool, allow_experimental_bigint_types, true) \ MAKE_OBSOLETE(M, Bool, allow_experimental_window_functions, true) \ + MAKE_OBSOLETE(M, Bool, allow_experimental_lightweight_delete, true) \ MAKE_OBSOLETE(M, Milliseconds, async_insert_stale_timeout_ms, 0) \ MAKE_OBSOLETE(M, HandleKafkaErrorMode, handle_kafka_error_mode, HandleKafkaErrorMode::DEFAULT) \ MAKE_OBSOLETE(M, Bool, database_replicated_ddl_output, true) \ @@ -771,7 +772,7 @@ class IColumn; M(Bool, input_format_csv_empty_as_default, true, "Treat empty fields in CSV input as default values.", 0) \ M(Bool, input_format_tsv_empty_as_default, false, "Treat empty fields in TSV input as default values.", 0) \ M(Bool, input_format_tsv_enum_as_number, false, "Treat inserted enum values in TSV formats as enum indices.", 0) \ - M(Bool, input_format_null_as_default, true, "For text input formats initialize null fields with default values if data type of this field is not nullable", 0) \ + M(Bool, input_format_null_as_default, true, "Initialize null fields with default values if the data type of this field is not nullable and it is supported by the input format", 0) \ M(Bool, input_format_arrow_import_nested, false, "Allow to insert array of structs into Nested table in Arrow input format.", 0) \ M(Bool, input_format_arrow_case_insensitive_column_matching, false, "Ignore case when matching Arrow columns with CH columns.", 0) \ M(Bool, input_format_orc_import_nested, false, "Allow to insert array of structs into Nested table in ORC input format.", 0) \ @@ -809,7 +810,7 @@ class IColumn; M(Bool, input_format_json_read_numbers_as_strings, false, "Allow to parse numbers as strings in JSON input formats", 0) \ M(Bool, input_format_json_read_objects_as_strings, true, "Allow to parse JSON objects as strings in JSON input formats", 0) \ M(Bool, input_format_json_named_tuples_as_objects, true, "Deserialize named tuple columns as JSON objects", 0) \ - M(Bool, input_format_json_ignore_unknown_keys_in_named_tuple, false, "Ignore unknown keys in json object for named tuples", 0) \ + M(Bool, input_format_json_ignore_unknown_keys_in_named_tuple, true, "Ignore unknown keys in json object for named tuples", 0) \ M(Bool, input_format_json_defaults_for_missing_elements_in_named_tuple, true, "Insert default value in named tuple element if it's missing in json object", 0) \ M(Bool, input_format_try_infer_integers, true, "Try to infer integers instead of floats while schema inference in text formats", 0) \ M(Bool, input_format_try_infer_dates, true, "Try to infer dates from string fields while schema inference in text formats", 0) \ @@ -831,6 +832,7 @@ class IColumn; M(Bool, input_format_values_deduce_templates_of_expressions, true, "For Values format: if the field could not be parsed by streaming parser, run SQL parser, deduce template of the SQL expression, try to parse all rows using template and then interpret expression for all rows.", 0) \ M(Bool, input_format_values_accurate_types_of_literals, true, "For Values format: when parsing and interpreting expressions using template, check actual type of literal to avoid possible overflow and precision issues.", 0) \ M(Bool, input_format_avro_allow_missing_fields, false, "For Avro/AvroConfluent format: when field is not found in schema use default value instead of error", 0) \ + /** This setting is obsolete and do nothing, left for compatibility reasons. */ \ M(Bool, input_format_avro_null_as_default, false, "For Avro/AvroConfluent format: insert default in case of null and non Nullable column", 0) \ M(UInt64, format_binary_max_string_size, 1_GiB, "The maximum allowed size for String in RowBinary format. It prevents allocating large amount of memory in case of corrupted data. 0 means there is no limit", 0) \ M(URI, format_avro_schema_registry_url, "", "For AvroConfluent format: Confluent Schema Registry URL.", 0) \ @@ -855,6 +857,7 @@ class IColumn; M(UInt64, output_format_parquet_row_group_size, 1000000, "Row group size in rows.", 0) \ M(Bool, output_format_parquet_string_as_string, false, "Use Parquet String type instead of Binary for String columns.", 0) \ M(Bool, output_format_parquet_fixed_string_as_fixed_byte_array, true, "Use Parquet FIXED_LENGTH_BYTE_ARRAY type instead of Binary for FixedString columns.", 0) \ + M(ParquetVersion, output_format_parquet_version, "2.latest", "Parquet format version for output format. Supported versions: 1.0, 2.4, 2.6 and 2.latest (default)", 0) \ M(String, output_format_avro_codec, "", "Compression codec used for output. Possible values: 'null', 'deflate', 'snappy'.", 0) \ M(UInt64, output_format_avro_sync_interval, 16 * 1024, "Sync interval in bytes.", 0) \ M(String, output_format_avro_string_column_pattern, "", "For Avro format: regexp of String columns to select as AVRO string.", 0) \ diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h index 48dcded1868..04f328bb665 100644 --- a/src/Core/SettingsChangesHistory.h +++ b/src/Core/SettingsChangesHistory.h @@ -80,6 +80,8 @@ namespace SettingsChangesHistory /// It's used to implement `compatibility` setting (see https://github.com/ClickHouse/ClickHouse/issues/35972) static std::map settings_changes_history = { + {"23.3", {{"output_format_parquet_version", "1.0", "2.latest", "Use latest Parquet format version for output format"}, + {"input_format_json_ignore_unknown_keys_in_named_tuple", false, true, "Improve parsing JSON objects as named tuples"}}}, {"23.2", {{"output_format_parquet_fixed_string_as_fixed_byte_array", false, true, "Use Parquet FIXED_LENGTH_BYTE_ARRAY type for FixedString by default"}, {"output_format_arrow_fixed_string_as_fixed_byte_array", false, true, "Use Arrow FIXED_SIZE_BINARY type for FixedString by default"}, {"query_plan_remove_redundant_distinct", false, true, "Remove redundant Distinct step in query plan"}, diff --git a/src/Core/SettingsEnums.cpp b/src/Core/SettingsEnums.cpp index bce4f7c0000..9e1ab585bb0 100644 --- a/src/Core/SettingsEnums.cpp +++ b/src/Core/SettingsEnums.cpp @@ -171,4 +171,12 @@ IMPLEMENT_SETTING_ENUM(LocalFSReadMethod, ErrorCodes::BAD_ARGUMENTS, {{"mmap", LocalFSReadMethod::mmap}, {"pread", LocalFSReadMethod::pread}, {"read", LocalFSReadMethod::read}}) + + +IMPLEMENT_SETTING_ENUM_WITH_RENAME(ParquetVersion, ErrorCodes::BAD_ARGUMENTS, + {{"1.0", FormatSettings::ParquetVersion::V1_0}, + {"2.4", FormatSettings::ParquetVersion::V2_4}, + {"2.6", FormatSettings::ParquetVersion::V2_6}, + {"2.latest", FormatSettings::ParquetVersion::V2_LATEST}}) + } diff --git a/src/Core/SettingsEnums.h b/src/Core/SettingsEnums.h index 35eb8eb4b6c..139a04f3a5a 100644 --- a/src/Core/SettingsEnums.h +++ b/src/Core/SettingsEnums.h @@ -72,6 +72,8 @@ DECLARE_SETTING_ENUM_WITH_RENAME(DateTimeInputFormat, FormatSettings::DateTimeIn DECLARE_SETTING_ENUM_WITH_RENAME(DateTimeOutputFormat, FormatSettings::DateTimeOutputFormat) +DECLARE_SETTING_ENUM_WITH_RENAME(ParquetVersion, FormatSettings::ParquetVersion) + enum class LogsLevel { none = 0, /// Disable diff --git a/src/Core/SettingsFields.cpp b/src/Core/SettingsFields.cpp index 4164bf1e27e..06cd53013ec 100644 --- a/src/Core/SettingsFields.cpp +++ b/src/Core/SettingsFields.cpp @@ -150,10 +150,16 @@ template struct SettingFieldNumber; template struct SettingFieldNumber; template struct SettingFieldNumber; template struct SettingFieldNumber; +template struct SettingFieldNumber; +template struct SettingFieldNumber; +template struct SettingFieldNumber; template struct SettingAutoWrapper>; template struct SettingAutoWrapper>; template struct SettingAutoWrapper>; +template struct SettingAutoWrapper>; +template struct SettingAutoWrapper>; +template struct SettingAutoWrapper>; namespace { diff --git a/src/Core/SettingsFields.h b/src/Core/SettingsFields.h index c6fe46c9f6b..3994e402c9a 100644 --- a/src/Core/SettingsFields.h +++ b/src/Core/SettingsFields.h @@ -55,7 +55,10 @@ struct SettingFieldNumber using SettingFieldUInt64 = SettingFieldNumber; using SettingFieldInt64 = SettingFieldNumber; +using SettingFieldUInt32 = SettingFieldNumber; +using SettingFieldInt32 = SettingFieldNumber; using SettingFieldFloat = SettingFieldNumber; +using SettingFieldDouble = SettingFieldNumber; using SettingFieldBool = SettingFieldNumber; /** Wraps any SettingField to support special value 'auto' that can be checked with `is_auto` flag. @@ -129,6 +132,9 @@ struct SettingAutoWrapper using SettingFieldUInt64Auto = SettingAutoWrapper; using SettingFieldInt64Auto = SettingAutoWrapper; using SettingFieldFloatAuto = SettingAutoWrapper; +using SettingFieldUInt32Auto = SettingAutoWrapper; +using SettingFieldInt32Auto = SettingAutoWrapper; +using SettingFieldDoubleAuto = SettingAutoWrapper; /* Similar to SettingFieldUInt64Auto with small differences to behave like regular UInt64, supported to compatibility. * When setting to 'auto' it becomes equal to the number of processor cores without taking into account SMT. diff --git a/src/Daemon/BaseDaemon.cpp b/src/Daemon/BaseDaemon.cpp index 4056e27ad52..60179fd5317 100644 --- a/src/Daemon/BaseDaemon.cpp +++ b/src/Daemon/BaseDaemon.cpp @@ -134,6 +134,8 @@ static void terminateRequestedSignalHandler(int sig, siginfo_t *, void *) } +static std::atomic fatal_error_printed{false}; + /** Handler for "fault" or diagnostic signals. Send data about fault to separate thread to write into log. */ static void signalHandler(int sig, siginfo_t * info, void * context) @@ -159,7 +161,16 @@ static void signalHandler(int sig, siginfo_t * info, void * context) if (sig != SIGTSTP) /// This signal is used for debugging. { /// The time that is usually enough for separate thread to print info into log. - sleepForSeconds(20); /// FIXME: use some feedback from threads that process stacktrace + /// Under MSan full stack unwinding with DWARF info about inline functions takes 101 seconds in one case. + for (size_t i = 0; i < 300; ++i) + { + /// We will synchronize with the thread printing the messages with an atomic variable to finish earlier. + if (fatal_error_printed) + break; + + /// This coarse method of synchronization is perfectly ok for fatal signals. + sleepForSeconds(1); + } call_default_signal_handler(sig); } @@ -309,7 +320,9 @@ private: } if (auto logs_queue = thread_ptr->getInternalTextLogsQueue()) + { DB::CurrentThread::attachInternalTextLogsQueue(logs_queue, DB::LogsLevel::trace); + } } std::string signal_description = "Unknown signal"; @@ -407,6 +420,8 @@ private: /// When everything is done, we will try to send these error messages to client. if (thread_ptr) thread_ptr->onFatalError(); + + fatal_error_printed = true; } }; diff --git a/src/DataTypes/NestedUtils.cpp b/src/DataTypes/NestedUtils.cpp index e5ba23b9df8..f029ac6ba27 100644 --- a/src/DataTypes/NestedUtils.cpp +++ b/src/DataTypes/NestedUtils.cpp @@ -25,7 +25,7 @@ namespace DB namespace ErrorCodes { extern const int ILLEGAL_COLUMN; - extern const int SIZES_OF_ARRAYS_DOESNT_MATCH; + extern const int SIZES_OF_ARRAYS_DONT_MATCH; } namespace Nested @@ -242,7 +242,7 @@ void validateArraySizes(const Block & block) const ColumnArray & another_array_column = assert_cast(*elem.column); if (!first_array_column.hasEqualOffsets(another_array_column)) - throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH, + throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH, "Elements '{}' and '{}' " "of Nested data structure '{}' (Array columns) have different array sizes.", block.getByPosition(it->second).name, elem.name, split.first); diff --git a/src/DataTypes/Serializations/SerializationArray.cpp b/src/DataTypes/Serializations/SerializationArray.cpp index 24aa9e8320d..73b232690c7 100644 --- a/src/DataTypes/Serializations/SerializationArray.cpp +++ b/src/DataTypes/Serializations/SerializationArray.cpp @@ -1,4 +1,5 @@ #include +#include #include #include #include @@ -510,7 +511,10 @@ void SerializationArray::deserializeTextJSON(IColumn & column, ReadBuffer & istr deserializeTextImpl(column, istr, [&](IColumn & nested_column) { - nested->deserializeTextJSON(nested_column, istr, settings); + if (settings.null_as_default) + SerializationNullable::deserializeTextJSONImpl(nested_column, istr, settings, nested); + else + nested->deserializeTextJSON(nested_column, istr, settings); }, false); } diff --git a/src/DataTypes/Serializations/SerializationMap.cpp b/src/DataTypes/Serializations/SerializationMap.cpp index 98067077178..34da0f11cae 100644 --- a/src/DataTypes/Serializations/SerializationMap.cpp +++ b/src/DataTypes/Serializations/SerializationMap.cpp @@ -1,4 +1,5 @@ #include +#include #include #include @@ -211,7 +212,10 @@ void SerializationMap::deserializeTextJSON(IColumn & column, ReadBuffer & istr, deserializeTextImpl(column, istr, [&settings](ReadBuffer & buf, const SerializationPtr & subcolumn_serialization, IColumn & subcolumn) { - subcolumn_serialization->deserializeTextJSON(subcolumn, buf, settings); + if (settings.null_as_default) + SerializationNullable::deserializeTextJSONImpl(subcolumn, buf, settings, subcolumn_serialization); + else + subcolumn_serialization->deserializeTextJSON(subcolumn, buf, settings); }); } diff --git a/src/DataTypes/Serializations/SerializationNullable.cpp b/src/DataTypes/Serializations/SerializationNullable.cpp index 8b0bdc05d00..20188f7cec5 100644 --- a/src/DataTypes/Serializations/SerializationNullable.cpp +++ b/src/DataTypes/Serializations/SerializationNullable.cpp @@ -219,13 +219,9 @@ static ReturnType safeDeserialize( /// Deserialize value into non-nullable column. In case of NULL, insert default value and return false. template , ReturnType>* = nullptr> static ReturnType safeDeserialize( - IColumn & column, const ISerialization & nested, + IColumn & column, const ISerialization &, CheckForNull && check_for_null, DeserializeNested && deserialize_nested) { - assert(!dynamic_cast(&column)); - assert(!dynamic_cast(&nested)); - UNUSED(nested); - bool insert_default = check_for_null(); if (insert_default) column.insertDefault(); diff --git a/src/DataTypes/Serializations/SerializationTuple.cpp b/src/DataTypes/Serializations/SerializationTuple.cpp index ce15e099222..b963d35d785 100644 --- a/src/DataTypes/Serializations/SerializationTuple.cpp +++ b/src/DataTypes/Serializations/SerializationTuple.cpp @@ -1,4 +1,5 @@ #include +#include #include #include #include @@ -231,7 +232,19 @@ void SerializationTuple::deserializeTextJSON(IColumn & column, ReadBuffer & istr seen_elements[element_pos] = 1; auto & element_column = extractElementColumn(column, element_pos); - elems[element_pos]->deserializeTextJSON(element_column, istr, settings); + + try + { + if (settings.null_as_default) + SerializationNullable::deserializeTextJSONImpl(element_column, istr, settings, elems[element_pos]); + else + elems[element_pos]->deserializeTextJSON(element_column, istr, settings); + } + catch (Exception & e) + { + e.addMessage("(while reading the value of nested key " + name + ")"); + throw; + } skipWhitespaceIfAny(istr); ++processed; diff --git a/src/DataTypes/registerDataTypeDateTime.cpp b/src/DataTypes/registerDataTypeDateTime.cpp index 5a3e10656b4..8080179ad47 100644 --- a/src/DataTypes/registerDataTypeDateTime.cpp +++ b/src/DataTypes/registerDataTypeDateTime.cpp @@ -64,7 +64,7 @@ static DataTypePtr create(const ASTPtr & arguments) return std::make_shared(); const auto scale = getArgument(arguments, 0, "scale", "DateTime"); - const auto timezone = getArgument(arguments, !!scale, "timezone", "DateTime"); + const auto timezone = getArgument(arguments, scale ? 1 : 0, "timezone", "DateTime"); if (!scale && !timezone) throw Exception::createDeprecated(getExceptionMessage(" has wrong type: ", 0, "scale", "DateTime", Field::Types::Which::UInt64), diff --git a/src/Dictionaries/RegExpTreeDictionary.cpp b/src/Dictionaries/RegExpTreeDictionary.cpp index c636f200324..caba2a52a51 100644 --- a/src/Dictionaries/RegExpTreeDictionary.cpp +++ b/src/Dictionaries/RegExpTreeDictionary.cpp @@ -99,6 +99,17 @@ struct RegExpTreeDictionary::RegexTreeNode return searcher.Match(haystack, 0, size, re2_st::RE2::Anchor::UNANCHORED, nullptr, 0); } + /// check if this node can cover all the attributes from the query. + bool containsAll(const std::unordered_map & matching_attributes) const + { + for (const auto & [key, value] : matching_attributes) + { + if (!attributes.contains(key)) + return false; + } + return true; + } + struct AttributeValue { Field field; @@ -498,6 +509,9 @@ std::unordered_map RegExpTreeDictionary::match( if (node_ptr->match(reinterpret_cast(keys_data.data()) + offset, length)) { match_result.insertNodeID(node_ptr->id); + /// When this node is leaf and contains all the required attributes, it means a match. + if (node_ptr->containsAll(attributes) && node_ptr->children.empty()) + break; } } diff --git a/src/Disks/getOrCreateDiskFromAST.cpp b/src/Disks/getOrCreateDiskFromAST.cpp index fc9cd7edbee..5deb9ab11b5 100644 --- a/src/Disks/getOrCreateDiskFromAST.cpp +++ b/src/Disks/getOrCreateDiskFromAST.cpp @@ -7,6 +7,7 @@ #include #include #include +#include #include namespace DB @@ -17,15 +18,6 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; } -bool isDiskFunction(ASTPtr ast) -{ - if (!ast) - return false; - - const auto * function = ast->as(); - return function && function->name == "disk" && function->arguments->as(); -} - std::string getOrCreateDiskFromDiskAST(const ASTFunction & function, ContextPtr context) { /// We need a unique name for a created custom disk, but it needs to be the same @@ -35,11 +27,6 @@ std::string getOrCreateDiskFromDiskAST(const ASTFunction & function, ContextPtr auto disk_name = DiskSelector::TMP_INTERNAL_DISK_PREFIX + toString(sipHash128(disk_setting_string.data(), disk_setting_string.size())); - LOG_TRACE( - &Poco::Logger::get("getOrCreateDiskFromDiskAST"), - "Using disk name `{}` for custom disk {}", - disk_name, disk_setting_string); - auto result_disk = context->getOrCreateDisk(disk_name, [&](const DisksMap & disks_map) -> DiskPtr { const auto * function_args_expr = assert_cast(function.arguments.get()); const auto & function_args = function_args_expr->children; diff --git a/src/Disks/getOrCreateDiskFromAST.h b/src/Disks/getOrCreateDiskFromAST.h index c1d4bda1a49..7c64707b0bd 100644 --- a/src/Disks/getOrCreateDiskFromAST.h +++ b/src/Disks/getOrCreateDiskFromAST.h @@ -15,9 +15,4 @@ class ASTFunction; */ std::string getOrCreateDiskFromDiskAST(const ASTFunction & function, ContextPtr context); -/* - * Is given ast has form of a disk() function. - */ -bool isDiskFunction(ASTPtr ast); - } diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index 5bee13075fb..aca3166a8c4 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -56,7 +56,6 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings) format_settings.avro.schema_registry_url = settings.format_avro_schema_registry_url.toString(); format_settings.avro.string_column_pattern = settings.output_format_avro_string_column_pattern.toString(); format_settings.avro.output_rows_in_file = settings.output_format_avro_rows_in_file; - format_settings.avro.null_as_default = settings.input_format_avro_null_as_default; format_settings.csv.allow_double_quotes = settings.format_csv_allow_double_quotes; format_settings.csv.allow_single_quotes = settings.format_csv_allow_single_quotes; format_settings.csv.crlf_end_of_line = settings.output_format_csv_crlf_end_of_line; @@ -111,6 +110,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings) format_settings.null_as_default = settings.input_format_null_as_default; format_settings.decimal_trailing_zeros = settings.output_format_decimal_trailing_zeros; format_settings.parquet.row_group_size = settings.output_format_parquet_row_group_size; + format_settings.parquet.output_version = settings.output_format_parquet_version; format_settings.parquet.import_nested = settings.input_format_parquet_import_nested; format_settings.parquet.case_insensitive_column_matching = settings.input_format_parquet_case_insensitive_column_matching; format_settings.parquet.allow_missing_columns = settings.input_format_parquet_allow_missing_columns; diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h index 9b657d37862..d1755a35c5f 100644 --- a/src/Formats/FormatSettings.h +++ b/src/Formats/FormatSettings.h @@ -106,7 +106,6 @@ struct FormatSettings bool allow_missing_fields = false; String string_column_pattern; UInt64 output_rows_in_file = 1; - bool null_as_default = false; } avro; String bool_true_representation = "true"; @@ -176,6 +175,14 @@ struct FormatSettings String column_for_object_name; } json_object_each_row; + enum class ParquetVersion + { + V1_0, + V2_4, + V2_6, + V2_LATEST, + }; + struct { UInt64 row_group_size = 1000000; @@ -187,6 +194,7 @@ struct FormatSettings bool output_string_as_string = false; bool output_fixed_string_as_fixed_byte_array = true; UInt64 max_block_size = 8192; + ParquetVersion output_version; } parquet; struct Pretty diff --git a/src/Formats/NativeReader.cpp b/src/Formats/NativeReader.cpp index 58baee5931b..9f8d4ba1930 100644 --- a/src/Formats/NativeReader.cpp +++ b/src/Formats/NativeReader.cpp @@ -10,6 +10,7 @@ #include #include +#include #include #include #include @@ -32,8 +33,19 @@ NativeReader::NativeReader(ReadBuffer & istr_, UInt64 server_revision_) { } -NativeReader::NativeReader(ReadBuffer & istr_, const Block & header_, UInt64 server_revision_, bool skip_unknown_columns_) - : istr(istr_), header(header_), server_revision(server_revision_), skip_unknown_columns(skip_unknown_columns_) +NativeReader::NativeReader( + ReadBuffer & istr_, + const Block & header_, + UInt64 server_revision_, + bool skip_unknown_columns_, + bool null_as_default_, + BlockMissingValues * block_missing_values_) + : istr(istr_) + , header(header_) + , server_revision(server_revision_) + , skip_unknown_columns(skip_unknown_columns_) + , null_as_default(null_as_default_) + , block_missing_values(block_missing_values_) { } @@ -187,8 +199,12 @@ Block NativeReader::read() { if (header.has(column.name)) { - /// Support insert from old clients without low cardinality type. auto & header_column = header.getByName(column.name); + + if (null_as_default) + insertNullAsDefaultIfNeeded(column, header_column, header.getPositionByName(column.name), block_missing_values); + + /// Support insert from old clients without low cardinality type. if (!header_column.type->equals(*column.type)) { column.column = recursiveTypeConversion(column.column, column.type, header.safeGetByPosition(i).type); @@ -225,12 +241,19 @@ Block NativeReader::read() /// Allow to skip columns. Fill them with default values. Block tmp_res; - for (auto & col : header) + for (size_t column_i = 0; column_i != header.columns(); ++column_i) { + auto & col = header.getByPosition(column_i); if (res.has(col.name)) + { tmp_res.insert(res.getByName(col.name)); + } else + { tmp_res.insert({col.type->createColumn()->cloneResized(rows), col.type, col.name}); + if (block_missing_values) + block_missing_values->setBits(column_i, rows); + } } tmp_res.info = res.info; diff --git a/src/Formats/NativeReader.h b/src/Formats/NativeReader.h index 3ae53d45faf..2d8b16e06eb 100644 --- a/src/Formats/NativeReader.h +++ b/src/Formats/NativeReader.h @@ -24,7 +24,13 @@ public: /// For cases when data structure (header) is known in advance. /// NOTE We may use header for data validation and/or type conversions. It is not implemented. - NativeReader(ReadBuffer & istr_, const Block & header_, UInt64 server_revision_, bool skip_unknown_columns_ = false); + NativeReader( + ReadBuffer & istr_, + const Block & header_, + UInt64 server_revision_, + bool skip_unknown_columns_ = false, + bool null_as_default_ = false, + BlockMissingValues * block_missing_values_ = nullptr); /// For cases when we have an index. It allows to skip columns. Only columns specified in the index will be read. NativeReader(ReadBuffer & istr_, UInt64 server_revision_, @@ -43,7 +49,9 @@ private: ReadBuffer & istr; Block header; UInt64 server_revision; - bool skip_unknown_columns; + bool skip_unknown_columns = false; + bool null_as_default = false; + BlockMissingValues * block_missing_values = nullptr; bool use_index = false; IndexForNativeFormat::Blocks::const_iterator index_block_it; diff --git a/src/Formats/insertNullAsDefaultIfNeeded.cpp b/src/Formats/insertNullAsDefaultIfNeeded.cpp new file mode 100644 index 00000000000..767892718c5 --- /dev/null +++ b/src/Formats/insertNullAsDefaultIfNeeded.cpp @@ -0,0 +1,37 @@ +#include +#include +#include +#include +#include + +namespace DB +{ + +void insertNullAsDefaultIfNeeded(ColumnWithTypeAndName & input_column, const ColumnWithTypeAndName & header_column, size_t column_i, BlockMissingValues * block_missing_values) +{ + if (!isNullableOrLowCardinalityNullable(input_column.type) || isNullableOrLowCardinalityNullable(header_column.type)) + return; + + if (block_missing_values) + { + for (size_t i = 0; i < input_column.column->size(); ++i) + { + if (input_column.column->isNullAt(i)) + block_missing_values->setBit(column_i, i); + } + } + + if (input_column.type->isNullable()) + { + input_column.column = assert_cast(input_column.column.get())->getNestedColumnWithDefaultOnNull(); + input_column.type = removeNullable(input_column.type); + } + else + { + input_column.column = assert_cast(input_column.column.get())->cloneWithDefaultOnNull(); + const auto * lc_type = assert_cast(input_column.type.get()); + input_column.type = std::make_shared(removeNullable(lc_type->getDictionaryType())); + } +} + +} diff --git a/src/Formats/insertNullAsDefaultIfNeeded.h b/src/Formats/insertNullAsDefaultIfNeeded.h new file mode 100644 index 00000000000..3e4dcd1e74a --- /dev/null +++ b/src/Formats/insertNullAsDefaultIfNeeded.h @@ -0,0 +1,10 @@ +#pragma once + +#include + +namespace DB +{ + +void insertNullAsDefaultIfNeeded(ColumnWithTypeAndName & input_column, const ColumnWithTypeAndName & header_column, size_t column_i, BlockMissingValues * block_missing_values); + +} diff --git a/src/Functions/FunctionHelpers.cpp b/src/Functions/FunctionHelpers.cpp index c981f666219..ff09274d907 100644 --- a/src/Functions/FunctionHelpers.cpp +++ b/src/Functions/FunctionHelpers.cpp @@ -16,7 +16,7 @@ namespace ErrorCodes { extern const int ILLEGAL_COLUMN; extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; - extern const int SIZES_OF_ARRAYS_DOESNT_MATCH; + extern const int SIZES_OF_ARRAYS_DONT_MATCH; extern const int ILLEGAL_TYPE_OF_ARGUMENT; } @@ -213,7 +213,7 @@ checkAndGetNestedArrayOffset(const IColumn ** columns, size_t num_arguments) if (i == 0) offsets = offsets_i; else if (*offsets_i != *offsets) - throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH, "Lengths of all arrays passed to aggregate function must be equal."); + throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH, "Lengths of all arrays passed to aggregate function must be equal."); } return {nested_columns, offsets->data()}; } diff --git a/src/Functions/FunctionsHashing.h b/src/Functions/FunctionsHashing.h index 69c3a299eea..7b6f4213cd3 100644 --- a/src/Functions/FunctionsHashing.h +++ b/src/Functions/FunctionsHashing.h @@ -55,7 +55,7 @@ #include #include #include - +#include namespace DB { @@ -174,7 +174,11 @@ struct HalfMD5Impl MD5_Update(&ctx, reinterpret_cast(begin), size); MD5_Final(buf.char_data, &ctx); +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + return buf.uint64_data; /// No need to flip bytes on big endian machines +#else return Poco::ByteOrder::flipBytes(static_cast(buf.uint64_data)); /// Compatibility with existing code. Cast need for old poco AND macos where UInt64 != uint64_t +#endif } static UInt64 combineHashes(UInt64 h1, UInt64 h2) @@ -1032,10 +1036,19 @@ private: } else { - if (std::is_same_v) + if constexpr (std::is_same_v) h = JavaHashImpl::apply(vec_from[i]); else - h = apply(key, reinterpret_cast(&vec_from[i]), sizeof(vec_from[i])); + { + FromType v = vec_from[i]; + if constexpr (std::endian::native == std::endian::big) + { + FromType tmp_v; + reverseMemcpy(&tmp_v, &v, sizeof(v)); + v = tmp_v; + } + h = apply(key, reinterpret_cast(&v), sizeof(v)); + } } if constexpr (first) @@ -1055,9 +1068,7 @@ private: size_t size = vec_to.size(); if constexpr (first) - { vec_to.assign(size, hash); - } else { for (size_t i = 0; i < size; ++i) @@ -1080,8 +1091,17 @@ private: size_t size = vec_from.size(); for (size_t i = 0; i < size; ++i) { - ToType h = apply(key, reinterpret_cast(&vec_from[i]), sizeof(vec_from[i])); - + ToType h; + if constexpr (std::endian::native == std::endian::little) + { + h = apply(key, reinterpret_cast(&vec_from[i]), sizeof(vec_from[i])); + } + else + { + char tmp_buffer[sizeof(vec_from[i])]; + reverseMemcpy(tmp_buffer, &vec_from[i], sizeof(vec_from[i])); + h = apply(key, reinterpret_cast(tmp_buffer), sizeof(vec_from[i])); + } if constexpr (first) vec_to[i] = h; else @@ -1092,13 +1112,20 @@ private: { auto value = col_from_const->template getValue(); - ToType h = apply(key, reinterpret_cast(&value), sizeof(value)); - + ToType h; + if constexpr (std::endian::native == std::endian::little) + { + h = apply(key, reinterpret_cast(&value), sizeof(value)); + } + else + { + char tmp_buffer[sizeof(value)]; + reverseMemcpy(tmp_buffer, &value, sizeof(value)); + h = apply(key, reinterpret_cast(tmp_buffer), sizeof(value)); + } size_t size = vec_to.size(); if constexpr (first) - { vec_to.assign(size, h); - } else { for (size_t i = 0; i < size; ++i) @@ -1229,7 +1256,7 @@ private: { /// NOTE: here, of course, you can do without the materialization of the column. ColumnPtr full_column = col_from_const->convertToFullColumn(); - executeArray(key, type, &*full_column, vec_to); + executeArray(key, type, full_column.get(), vec_to); } else throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", @@ -1241,6 +1268,10 @@ private: { WhichDataType which(from_type); + if (icolumn->size() != vec_to.size()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Argument column '{}' size {} doesn't match result column size {} of function {}", + icolumn->getName(), icolumn->size(), vec_to.size(), getName()); + if (which.isUInt8()) executeIntType(key, icolumn, vec_to); else if (which.isUInt16()) executeIntType(key, icolumn, vec_to); else if (which.isUInt32()) executeIntType(key, icolumn, vec_to); @@ -1301,10 +1332,9 @@ private: const auto & type_map = assert_cast(*type); executeForArgument(key, type_map.getNestedType().get(), map->getNestedColumnPtr().get(), vec_to, is_first); } - else if (const auto * const_map = checkAndGetColumnConstData(column)) + else if (const auto * const_map = checkAndGetColumnConst(column)) { - const auto & type_map = assert_cast(*type); - executeForArgument(key, type_map.getNestedType().get(), const_map->getNestedColumnPtr().get(), vec_to, is_first); + executeForArgument(key, type, const_map->convertToFullColumnIfConst().get(), vec_to, is_first); } else { @@ -1340,8 +1370,7 @@ public: ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { - size_t rows = input_rows_count; - auto col_to = ColumnVector::create(rows); + auto col_to = ColumnVector::create(input_rows_count); typename ColumnVector::Container & vec_to = col_to->getData(); @@ -1353,7 +1382,7 @@ public: if (arguments.size() <= first_data_argument) { /// Return a fixed random-looking magic number when input is empty - vec_to.assign(rows, static_cast(0xe28dbde7fe22e41c)); + vec_to.assign(input_rows_count, static_cast(0xe28dbde7fe22e41c)); } KeyType key{}; diff --git a/src/Functions/FunctionsJSON.cpp b/src/Functions/FunctionsJSON.cpp index 547df77be6a..5df0d1831af 100644 --- a/src/Functions/FunctionsJSON.cpp +++ b/src/Functions/FunctionsJSON.cpp @@ -647,11 +647,12 @@ public: case ElementType::OBJECT: type = '{'; break; + case ElementType::BOOL: + type = 'b'; + break; case ElementType::NULL_VALUE: type = 0; break; - default: - return false; } ColumnVector & col_vec = assert_cast &>(dest); diff --git a/src/Functions/IFunction.cpp b/src/Functions/IFunction.cpp index 1c30dee0482..1f25794536b 100644 --- a/src/Functions/IFunction.cpp +++ b/src/Functions/IFunction.cpp @@ -26,6 +26,7 @@ # pragma GCC diagnostic pop #endif + namespace DB { @@ -66,12 +67,12 @@ ColumnPtr replaceLowCardinalityColumnsByNestedAndGetDictionaryIndexes( if (!low_cardinality_type) throw Exception(ErrorCodes::LOGICAL_ERROR, - "Incompatible type for low cardinality column: {}", + "Incompatible type for LowCardinality column: {}", column.type->getName()); if (can_be_executed_on_default_arguments) { - /// Normal case, when function can be executed on values's default. + /// Normal case, when function can be executed on values' default. column.column = low_cardinality_column->getDictionary().getNestedColumn(); indexes = low_cardinality_column->getIndexesPtr(); } @@ -280,6 +281,7 @@ ColumnPtr IExecutableFunction::executeWithoutSparseColumns(const ColumnsWithType auto res = executeWithoutLowCardinalityColumns(columns_without_low_cardinality, dictionary_type, new_input_rows_count, dry_run); bool res_is_constant = isColumnConst(*res); + auto keys = res_is_constant ? res->cloneResized(1)->convertToFullColumnIfConst() : res; diff --git a/src/Functions/MatchImpl.h b/src/Functions/MatchImpl.h index db8dd55474e..fe38ac49d62 100644 --- a/src/Functions/MatchImpl.h +++ b/src/Functions/MatchImpl.h @@ -23,9 +23,10 @@ namespace ErrorCodes namespace impl { -/// Is the [I]LIKE expression reduced to finding a substring in a string? +/// Is the [I]LIKE expression equivalent to a substring search? inline bool likePatternIsSubstring(std::string_view pattern, String & res) { + /// TODO: ignore multiple leading or trailing % if (pattern.size() < 2 || !pattern.starts_with('%') || !pattern.ends_with('%')) return false; @@ -45,9 +46,25 @@ inline bool likePatternIsSubstring(std::string_view pattern, String & res) case '\\': ++pos; if (pos == end) + /// pattern ends with \% --> trailing % is to be taken literally and pattern doesn't qualify for substring search return false; else - res += *pos; + { + switch (*pos) + { + /// Known LIKE escape sequences: + case '%': + case '_': + case '\\': + res += *pos; + break; + /// For all other escape sequences, the backslash loses its special meaning + default: + res += '\\'; + res += *pos; + break; + } + } break; default: res += *pos; diff --git a/src/Functions/Regexps.h b/src/Functions/Regexps.h index 123640e4db3..4bfd10bdbf5 100644 --- a/src/Functions/Regexps.h +++ b/src/Functions/Regexps.h @@ -7,10 +7,10 @@ #include #include #include -#include #include #include #include +#include #include #include #include diff --git a/src/Functions/URL/domain.h b/src/Functions/URL/domain.h index 64362edf2c3..91f83a457be 100644 --- a/src/Functions/URL/domain.h +++ b/src/Functions/URL/domain.h @@ -75,6 +75,7 @@ exloop: if ((scheme_end - pos) > 2 && *pos == ':' && *(pos + 1) == '/' && *(pos Pos dot_pos = nullptr; Pos colon_pos = nullptr; + bool has_sub_delims = false; bool has_at_symbol = false; bool has_terminator_after_colon = false; const auto * start_of_host = pos; @@ -97,25 +98,35 @@ exloop: if ((scheme_end - pos) > 2 && *pos == ':' && *(pos + 1) == '/' && *(pos case '@': /// myemail@gmail.com if (has_terminator_after_colon) return std::string_view{}; if (has_at_symbol) goto done; + has_sub_delims = false; has_at_symbol = true; start_of_host = pos + 1; break; + case ';': + case '=': + case '&': + case '~': + case '%': + /// Symbols above are sub-delims in RFC3986 and should be + /// allowed for userinfo (named identification here). + /// + /// NOTE: that those symbols is allowed for reg-name (host) + /// too, but right now host parsing looks more like in + /// RFC1034 (in other words domains that are allowed to be + /// registered). + has_sub_delims = true; + continue; case ' ': /// restricted symbols in whole URL case '\t': case '<': case '>': - case '%': case '{': case '}': case '|': case '\\': case '^': - case '~': case '[': case ']': - case ';': - case '=': - case '&': if (colon_pos == nullptr) return std::string_view{}; else @@ -124,6 +135,8 @@ exloop: if ((scheme_end - pos) > 2 && *pos == ':' && *(pos + 1) == '/' && *(pos } done: + if (has_sub_delims) + return std::string_view{}; if (!has_at_symbol) pos = colon_pos ? colon_pos : pos; return checkAndReturnHost(pos, dot_pos, start_of_host); diff --git a/src/Functions/URL/netloc.cpp b/src/Functions/URL/netloc.cpp index 5c23f7cc06d..bc34e34a40d 100644 --- a/src/Functions/URL/netloc.cpp +++ b/src/Functions/URL/netloc.cpp @@ -7,6 +7,7 @@ namespace DB { +/// NOTE: Implementation is not RFC3986 compatible struct ExtractNetloc { /// We use the same as domain function @@ -67,6 +68,7 @@ struct ExtractNetloc /// Now pos points to the first byte after scheme (if there is). bool has_identification = false; + Pos hostname_end = end; Pos question_mark_pos = end; Pos slash_pos = end; Pos start_of_host = pos; @@ -90,25 +92,39 @@ struct ExtractNetloc return std::string_view(start_of_host, pos - start_of_host); case '@': /// foo:bar@example.ru has_identification = true; + hostname_end = end; break; + case ';': + case '=': + case '&': + case '~': + case '%': + /// Symbols above are sub-delims in RFC3986 and should be + /// allowed for userinfo (named identification here). + /// + /// NOTE: that those symbols is allowed for reg-name (host) + /// too, but right now host parsing looks more like in + /// RFC1034 (in other words domains that are allowed to be + /// registered). + if (!has_identification) + { + hostname_end = pos; + break; + } + [[fallthrough]]; case ' ': /// restricted symbols in whole URL case '\t': case '<': case '>': - case '%': case '{': case '}': case '|': case '\\': case '^': - case '~': case '[': case ']': - case ';': - case '=': - case '&': return pos > start_of_host - ? std::string_view(start_of_host, std::min(std::min(pos - 1, question_mark_pos), slash_pos) - start_of_host) + ? std::string_view(start_of_host, std::min(std::min(pos, question_mark_pos), slash_pos) - start_of_host) : std::string_view(); } } @@ -116,7 +132,7 @@ struct ExtractNetloc if (has_identification) return std::string_view(start_of_host, pos - start_of_host); else - return std::string_view(start_of_host, std::min(std::min(pos, question_mark_pos), slash_pos) - start_of_host); + return std::string_view(start_of_host, std::min(std::min(std::min(pos, question_mark_pos), slash_pos), hostname_end) - start_of_host); } static void execute(Pos data, size_t size, Pos & res_data, size_t & res_size) diff --git a/src/Functions/array/FunctionArrayMapped.h b/src/Functions/array/FunctionArrayMapped.h index 989ec500cfb..61abc607349 100644 --- a/src/Functions/array/FunctionArrayMapped.h +++ b/src/Functions/array/FunctionArrayMapped.h @@ -37,7 +37,7 @@ namespace ErrorCodes extern const int ILLEGAL_COLUMN; extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int LOGICAL_ERROR; - extern const int SIZES_OF_ARRAYS_DOESNT_MATCH; + extern const int SIZES_OF_ARRAYS_DONT_MATCH; extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; } @@ -361,7 +361,7 @@ public: if (getOffsetsPtr(*column_array) != offsets_column && getOffsets(*column_array) != typeid_cast(*offsets_column).getData()) throw Exception( - ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH, + ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH, "{}s passed to {} must have equal size", argument_type_name, getName()); diff --git a/src/Functions/array/arrayDistance.cpp b/src/Functions/array/arrayDistance.cpp index c1137848cc5..c68c89ee0d5 100644 --- a/src/Functions/array/arrayDistance.cpp +++ b/src/Functions/array/arrayDistance.cpp @@ -16,7 +16,7 @@ namespace ErrorCodes extern const int ILLEGAL_COLUMN; extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int LOGICAL_ERROR; - extern const int SIZES_OF_ARRAYS_DOESNT_MATCH; + extern const int SIZES_OF_ARRAYS_DONT_MATCH; extern const int ARGUMENT_OUT_OF_BOUND; } @@ -356,7 +356,7 @@ private: { ColumnArray::Offset prev_offset = row > 0 ? offsets_x[row] : 0; throw Exception( - ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH, + ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH, "Arguments of function {} have different array sizes: {} and {}", getName(), offsets_x[row] - prev_offset, @@ -423,7 +423,7 @@ private: if (unlikely(offsets_x[0] != offsets_y[row] - prev_offset)) { throw Exception( - ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH, + ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH, "Arguments of function {} have different array sizes: {} and {}", getName(), offsets_x[0], diff --git a/src/Functions/array/arrayEnumerateExtended.h b/src/Functions/array/arrayEnumerateExtended.h index c3d69bb6972..3f145c05b54 100644 --- a/src/Functions/array/arrayEnumerateExtended.h +++ b/src/Functions/array/arrayEnumerateExtended.h @@ -20,7 +20,7 @@ namespace ErrorCodes extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; extern const int ILLEGAL_COLUMN; extern const int ILLEGAL_TYPE_OF_ARGUMENT; - extern const int SIZES_OF_ARRAYS_DOESNT_MATCH; + extern const int SIZES_OF_ARRAYS_DONT_MATCH; } class FunctionArrayEnumerateUniq; @@ -153,7 +153,7 @@ ColumnPtr FunctionArrayEnumerateExtended::executeImpl(const ColumnsWith offsets_column = array->getOffsetsPtr(); } else if (offsets_i != *offsets) - throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH, "Lengths of all arrays passed to {} must be equal.", + throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH, "Lengths of all arrays passed to {} must be equal.", getName()); const auto * array_data = &array->getData(); diff --git a/src/Functions/array/arrayEnumerateRanked.h b/src/Functions/array/arrayEnumerateRanked.h index 73feb3e46ea..8a348c07421 100644 --- a/src/Functions/array/arrayEnumerateRanked.h +++ b/src/Functions/array/arrayEnumerateRanked.h @@ -60,7 +60,7 @@ namespace DB namespace ErrorCodes { extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; - extern const int SIZES_OF_ARRAYS_DOESNT_MATCH; + extern const int SIZES_OF_ARRAYS_DONT_MATCH; } class FunctionArrayEnumerateUniqRanked; @@ -194,7 +194,7 @@ ColumnPtr FunctionArrayEnumerateRankedExtended::executeImpl( { if (*offsets_by_depth[0] != array->getOffsets()) { - throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH, + throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH, "Lengths and effective depths of all arrays passed to {} must be equal.", getName()); } } @@ -217,7 +217,7 @@ ColumnPtr FunctionArrayEnumerateRankedExtended::executeImpl( { if (*offsets_by_depth[col_depth] != array->getOffsets()) { - throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH, + throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH, "Lengths and effective depths of all arrays passed to {} must be equal.", getName()); } } @@ -225,7 +225,7 @@ ColumnPtr FunctionArrayEnumerateRankedExtended::executeImpl( if (col_depth < arrays_depths.depths[array_num]) { - throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH, + throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH, "{}: Passed array number {} depth ({}) is more than the actual array depth ({}).", getName(), array_num, std::to_string(arrays_depths.depths[array_num]), col_depth); } diff --git a/src/Functions/array/arrayReduce.cpp b/src/Functions/array/arrayReduce.cpp index d4896595941..a4b2cc037ab 100644 --- a/src/Functions/array/arrayReduce.cpp +++ b/src/Functions/array/arrayReduce.cpp @@ -19,7 +19,7 @@ namespace DB namespace ErrorCodes { - extern const int SIZES_OF_ARRAYS_DOESNT_MATCH; + extern const int SIZES_OF_ARRAYS_DONT_MATCH; extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; extern const int ILLEGAL_COLUMN; extern const int ILLEGAL_TYPE_OF_ARGUMENT; @@ -144,7 +144,7 @@ ColumnPtr FunctionArrayReduce::executeImpl(const ColumnsWithTypeAndName & argume if (i == 0) offsets = offsets_i; else if (*offsets_i != *offsets) - throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH, "Lengths of all arrays passed to {} must be equal.", + throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH, "Lengths of all arrays passed to {} must be equal.", getName()); } const IColumn ** aggregate_arguments = aggregate_arguments_vec.data(); diff --git a/src/Functions/array/arrayReduceInRanges.cpp b/src/Functions/array/arrayReduceInRanges.cpp index 07391c963a6..790bc3ef879 100644 --- a/src/Functions/array/arrayReduceInRanges.cpp +++ b/src/Functions/array/arrayReduceInRanges.cpp @@ -21,7 +21,7 @@ namespace DB namespace ErrorCodes { - extern const int SIZES_OF_ARRAYS_DOESNT_MATCH; + extern const int SIZES_OF_ARRAYS_DONT_MATCH; extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; extern const int ILLEGAL_COLUMN; extern const int ILLEGAL_TYPE_OF_ARGUMENT; @@ -190,7 +190,7 @@ ColumnPtr FunctionArrayReduceInRanges::executeImpl( if (i == 0) offsets = offsets_i; else if (*offsets_i != *offsets) - throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH, "Lengths of all arrays passed to {} must be equal.", + throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH, "Lengths of all arrays passed to {} must be equal.", getName()); } const IColumn ** aggregate_arguments = aggregate_arguments_vec.data(); diff --git a/src/Functions/array/arrayUniq.cpp b/src/Functions/array/arrayUniq.cpp index 1d1cf4e6392..81ba5b62094 100644 --- a/src/Functions/array/arrayUniq.cpp +++ b/src/Functions/array/arrayUniq.cpp @@ -18,7 +18,7 @@ namespace DB namespace ErrorCodes { - extern const int SIZES_OF_ARRAYS_DOESNT_MATCH; + extern const int SIZES_OF_ARRAYS_DONT_MATCH; extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; extern const int ILLEGAL_COLUMN; extern const int ILLEGAL_TYPE_OF_ARGUMENT; @@ -151,7 +151,7 @@ ColumnPtr FunctionArrayUniq::executeImpl(const ColumnsWithTypeAndName & argument if (i == 0) offsets = &offsets_i; else if (offsets_i != *offsets) - throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH, "Lengths of all arrays passed to {} must be equal.", + throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH, "Lengths of all arrays passed to {} must be equal.", getName()); const auto * array_data = &array->getData(); diff --git a/src/Functions/array/arrayZip.cpp b/src/Functions/array/arrayZip.cpp index 3a50491fd4b..44c323e3fe3 100644 --- a/src/Functions/array/arrayZip.cpp +++ b/src/Functions/array/arrayZip.cpp @@ -13,7 +13,7 @@ namespace DB namespace ErrorCodes { extern const int ILLEGAL_TYPE_OF_ARGUMENT; - extern const int SIZES_OF_ARRAYS_DOESNT_MATCH; + extern const int SIZES_OF_ARRAYS_DONT_MATCH; extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; extern const int ILLEGAL_COLUMN; } @@ -81,7 +81,7 @@ public: } else if (!column_array->hasEqualOffsets(static_cast(*first_array_column))) { - throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH, + throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH, "The argument 1 and argument {} of function {} have different array sizes", i + 1, getName()); } diff --git a/src/Functions/array/range.cpp b/src/Functions/array/range.cpp index dc09facb81b..f1f0fef8fd9 100644 --- a/src/Functions/array/range.cpp +++ b/src/Functions/array/range.cpp @@ -55,14 +55,19 @@ private: getName(), arguments.size()); } - for (const auto & arg : arguments) + DataTypes arg_types; + for (size_t i = 0, size = arguments.size(); i < size; ++i) { - if (!isInteger(arg)) + if (i < 2 && WhichDataType(arguments[i]).isIPv4()) + arg_types.emplace_back(std::make_shared()); + else if (isInteger(arguments[i])) + arg_types.push_back(arguments[i]); + else throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}", - arg->getName(), getName()); + arguments[i]->getName(), getName()); } - DataTypePtr common_type = getLeastSupertype(arguments); + DataTypePtr common_type = getLeastSupertype(arg_types); return std::make_shared(common_type); } diff --git a/src/Functions/checkHyperscanRegexp.cpp b/src/Functions/checkHyperscanRegexp.cpp index e6fbc3baa1a..441e35cc5db 100644 --- a/src/Functions/checkHyperscanRegexp.cpp +++ b/src/Functions/checkHyperscanRegexp.cpp @@ -48,12 +48,12 @@ bool SlowWithHyperscanChecker::isSlowOneRepeat(std::string_view regexp) re2_st::StringPiece haystack(regexp.data(), regexp.size()); re2_st::StringPiece matches[2]; size_t start_pos = 0; - while (start_pos < regexp.size()) + while (start_pos < haystack.size()) { - if (searcher_one_repeat.Match(haystack, start_pos, regexp.size(), re2_st::RE2::Anchor::UNANCHORED, matches, 2)) + if (searcher_one_repeat.Match(haystack, start_pos, haystack.size(), re2_st::RE2::Anchor::UNANCHORED, matches, 2)) { const auto & match = matches[0]; - start_pos += (matches[0].data() - haystack.data()) + match.length(); // fwd by prefix + match length + start_pos = (matches[0].data() - haystack.data()) + match.size(); // new start pos = prefix before match + match length const auto & submatch = matches[1]; if (isLargerThanFifty({submatch.data(), submatch.size()})) return true; @@ -70,12 +70,12 @@ bool SlowWithHyperscanChecker::isSlowTwoRepeats(std::string_view regexp) re2_st::StringPiece haystack(regexp.data(), regexp.size()); re2_st::StringPiece matches[3]; size_t start_pos = 0; - while (start_pos < regexp.size()) + while (start_pos < haystack.size()) { - if (searcher_two_repeats.Match(haystack, start_pos, regexp.size(), re2_st::RE2::Anchor::UNANCHORED, matches, 3)) + if (searcher_two_repeats.Match(haystack, start_pos, haystack.size(), re2_st::RE2::Anchor::UNANCHORED, matches, 3)) { const auto & match = matches[0]; - start_pos += (matches[0].data() - haystack.data()) + match.length(); // fwd by prefix + match length + start_pos = (matches[0].data() - haystack.data()) + match.size(); // new start pos = prefix before match + match length const auto & submatch1 = matches[1]; const auto & submatch2 = matches[2]; if (isLargerThanFifty({submatch1.data(), submatch1.size()}) diff --git a/src/Functions/likePatternToRegexp.h b/src/Functions/likePatternToRegexp.h deleted file mode 100644 index 298137d7bc1..00000000000 --- a/src/Functions/likePatternToRegexp.h +++ /dev/null @@ -1,76 +0,0 @@ -#pragma once - -#include -#include - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int CANNOT_PARSE_ESCAPE_SEQUENCE; -} - -/// Transforms the [I]LIKE expression into regexp re2. For example, abc%def -> ^abc.*def$ -inline String likePatternToRegexp(std::string_view pattern) -{ - String res; - res.reserve(pattern.size() * 2); - - const char * pos = pattern.data(); - const char * const end = pattern.begin() + pattern.size(); - - if (pos < end && *pos == '%') - ++pos; - else - res = "^"; - - while (pos < end) - { - switch (*pos) - { - case '^': case '$': case '.': case '[': case '|': case '(': case ')': case '?': case '*': case '+': case '{': - res += '\\'; - res += *pos; - break; - case '%': - if (pos + 1 != end) - res += ".*"; - else - return res; - break; - case '_': - res += "."; - break; - case '\\': - if (pos + 1 == end) - throw Exception(ErrorCodes::CANNOT_PARSE_ESCAPE_SEQUENCE, "Invalid escape sequence at the end of LIKE pattern"); - /// Known escape sequences. - if (pos[1] == '%' || pos[1] == '_') - { - res += pos[1]; - ++pos; - } - else if (pos[1] == '\\') - { - res += "\\\\"; - ++pos; - } - else - { - /// Unknown escape sequence treated literally: as backslash and the following character. - res += "\\\\"; - } - break; - default: - res += *pos; - break; - } - ++pos; - } - - res += '$'; - return res; -} - -} diff --git a/src/Functions/map.cpp b/src/Functions/map.cpp index 3160c5ddb43..7fd4d3aa6d5 100644 --- a/src/Functions/map.cpp +++ b/src/Functions/map.cpp @@ -332,7 +332,7 @@ public: } size_t col_key_size = sub_map_column->size(); - auto column = is_const? ColumnConst::create(std::move(sub_map_column), std::move(col_key_size)) : std::move(sub_map_column); + auto column = is_const ? ColumnConst::create(std::move(sub_map_column), std::move(col_key_size)) : std::move(sub_map_column); ColumnsWithTypeAndName new_arguments = { diff --git a/src/Functions/nested.cpp b/src/Functions/nested.cpp index 7617951784f..b22330cd881 100644 --- a/src/Functions/nested.cpp +++ b/src/Functions/nested.cpp @@ -20,7 +20,7 @@ namespace ErrorCodes { extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; - extern const int SIZES_OF_ARRAYS_DOESNT_MATCH; + extern const int SIZES_OF_ARRAYS_DONT_MATCH; } namespace @@ -118,7 +118,7 @@ public: const auto * rhs_array = assert_cast(arguments[i].column.get()); if (!lhs_array->hasEqualOffsets(*rhs_array)) - throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH, + throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH, "The argument 1 and argument {} of function {} have different array offsets", i + 1, getName()); diff --git a/src/Functions/trap.cpp b/src/Functions/trap.cpp index 44deb901b0d..6260056ef31 100644 --- a/src/Functions/trap.cpp +++ b/src/Functions/trap.cpp @@ -27,6 +27,7 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; extern const int CANNOT_ALLOCATE_MEMORY; extern const int CANNOT_DLOPEN; + extern const int LOGICAL_ERROR; } @@ -174,6 +175,10 @@ public: if (!handle) throw Exception(ErrorCodes::CANNOT_DLOPEN, "Cannot dlopen: ({})", dlerror()); // NOLINT(concurrency-mt-unsafe) // MT-Safe on Linux, see man dlerror } + else if (mode == "logical error") + { + throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: trap"); + } else throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown trap mode"); } diff --git a/src/Functions/tupleElement.cpp b/src/Functions/tupleElement.cpp index 879b6feed40..b1fd200f5cd 100644 --- a/src/Functions/tupleElement.cpp +++ b/src/Functions/tupleElement.cpp @@ -21,7 +21,7 @@ namespace ErrorCodes extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; extern const int NOT_FOUND_COLUMN_IN_BLOCK; extern const int NUMBER_OF_DIMENSIONS_MISMATCHED; - extern const int SIZES_OF_ARRAYS_DOESNT_MATCH; + extern const int SIZES_OF_ARRAYS_DONT_MATCH; } namespace @@ -200,7 +200,7 @@ private: const auto & array_y = *assert_cast(col_y.get()); if (!array_x.hasEqualOffsets(array_y)) { - throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH, + throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH, "The argument 1 and argument 3 of function {} have different array sizes", getName()); } } @@ -222,7 +222,7 @@ private: { if (unlikely(offsets_x[0] != offsets_y[row] - prev_offset)) { - throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH, + throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH, "The argument 1 and argument 3 of function {} have different array sizes", getName()); } prev_offset = offsets_y[row]; diff --git a/src/Functions/validateNestedArraySizes.cpp b/src/Functions/validateNestedArraySizes.cpp index 7e1dbc798d8..c422637ba7f 100644 --- a/src/Functions/validateNestedArraySizes.cpp +++ b/src/Functions/validateNestedArraySizes.cpp @@ -12,7 +12,7 @@ namespace ErrorCodes { extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; - extern const int SIZES_OF_ARRAYS_DOESNT_MATCH; + extern const int SIZES_OF_ARRAYS_DONT_MATCH; } /** Function validateNestedArraySizes is used to check the consistency of Nested DataType subcolumns's offsets when Update @@ -106,7 +106,7 @@ ColumnPtr FunctionValidateNestedArraySizes::executeImpl( else if (first_length != length) { throw Exception( - ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH, + ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH, "Elements '{}' and '{}' of Nested data structure (Array columns) " "have different array sizes ({} and {} respectively) on row {}", arguments[1].name, arguments[args_idx].name, first_length, length, i); diff --git a/src/Functions/widthBucket.cpp b/src/Functions/widthBucket.cpp new file mode 100644 index 00000000000..a32fa159c2c --- /dev/null +++ b/src/Functions/widthBucket.cpp @@ -0,0 +1,290 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +namespace DB +{ +namespace ErrorCodes +{ + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int BAD_ARGUMENTS; + extern const int LOGICAL_ERROR; +} + +class FunctionWidthBucket : public IFunction +{ + template + void throwIfInvalid( + const size_t argument_index, + const ColumnConst * col_const, + const typename ColumnVector::Container * col_vec, + const size_t expected_size) const + { + if ((nullptr == col_const) ^ (nullptr != col_vec && col_vec->size() == expected_size)) + { + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "Logical error in function {}: argument {} has unexpected type or size!", + getName(), + argument_index); + } + } + + template + const typename ColumnVector::Container * getDataIfNotNull(const ColumnVector * col_vec) const + { + if (nullptr == col_vec) + { + return nullptr; + } + return &col_vec->getData(); + } + + template + static TDataType + getValue(const ColumnConst * col_const, const typename ColumnVector::Container * col_vec, const size_t index) + { + if (nullptr != col_const) + { + return col_const->getValue(); + } + return col_vec->data()[index]; + } + + static Float64 calculateRelativeBucket(const Float64 operand, const Float64 low, const Float64 high) + { + return (operand - low) / (high - low); + } + + template + std::optional checkArguments(const Float64 operand, const Float64 low, const Float64 high, const TCountType count) const + { + if (count == 0) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Last argument (count) for function {} cannot be 0.", getName()); + } + if (isNaN(operand) || isNaN(low) || isNaN(high)) + { + throw Exception( + ErrorCodes::BAD_ARGUMENTS, "The first three arguments (operand, low, high) cannot be NaN in function {}", getName()); + } + // operand can be infinity, the following conditions will take care of it + if (!isFinite(low) || !isFinite(high)) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "The second and third arguments (low, high) cannot be Inf function {}", getName()); + } + if (operand < low || low >= high) + { + return 0; + } + else if (operand >= high) + { + return count + 1; + } + return std::nullopt; + } + + template + TResultType NO_SANITIZE_UNDEFINED calculate(const Float64 operand, const Float64 low, const Float64 high, const TCountType count) const + { + if (const auto maybe_early_return = checkArguments(operand, low, high, count); maybe_early_return.has_value()) + { + return *maybe_early_return; + } + + const auto relative_bucket = calculateRelativeBucket(operand, low, high); + + if (isNaN(relative_bucket) || !isFinite(relative_bucket)) + { + throw Exception( + ErrorCodes::LOGICAL_ERROR, "The calculation resulted in NaN or Inf which is unexpected in function {}.", getName()); + } + return static_cast(count * relative_bucket + 1); + } + + template TCountType> + ColumnPtr executeForResultType(const ColumnsWithTypeAndName & arguments, size_t input_rows_count) const + { + using ResultType = typename NumberTraits::Construct::Type; + auto common_type = std::make_shared>(); + + std::vector casted_columns; + casted_columns.reserve(3); + for (const auto argument_index : collections::range(0, 3)) + { + casted_columns.push_back(castColumn(arguments[argument_index], common_type)); + } + + const auto * operands_vec = getDataIfNotNull(checkAndGetColumn>(casted_columns[0].get())); + const auto * lows_vec = getDataIfNotNull(checkAndGetColumn>(casted_columns[1].get())); + const auto * highs_vec = getDataIfNotNull(checkAndGetColumn>(casted_columns[2].get())); + const auto * counts_vec = getDataIfNotNull(checkAndGetColumn>(arguments[3].column.get())); + + const auto * operands_col_const = checkAndGetColumnConst>(casted_columns[0].get()); + const auto * lows_col_const = checkAndGetColumnConst>(casted_columns[1].get()); + const auto * highs_col_const = checkAndGetColumnConst>(casted_columns[2].get()); + const auto * counts_col_const = checkAndGetColumnConst>(arguments[3].column.get()); + + throwIfInvalid(0, operands_col_const, operands_vec, input_rows_count); + throwIfInvalid(1, lows_col_const, lows_vec, input_rows_count); + throwIfInvalid(2, highs_col_const, highs_vec, input_rows_count); + throwIfInvalid(4, counts_col_const, counts_vec, input_rows_count); + + const auto are_all_const_cols + = nullptr != operands_col_const && nullptr != lows_col_const && nullptr != highs_col_const && nullptr != counts_col_const; + + + if (are_all_const_cols) + { + throw Exception( + ErrorCodes::LOGICAL_ERROR, "Logical error in function {}: unexpected combination of argument types!", getName()); + } + + auto result_column = ColumnVector::create(); + result_column->reserve(1); + auto & result_data = result_column->getData(); + + for (const auto row_index : collections::range(0, input_rows_count)) + { + const auto operand = getValue(operands_col_const, operands_vec, row_index); + const auto low = getValue(lows_col_const, lows_vec, row_index); + const auto high = getValue(highs_col_const, highs_vec, row_index); + const auto count = getValue(counts_col_const, counts_vec, row_index); + result_data.push_back(calculate(operand, low, high, count)); + } + + return result_column; + } + +public: + static inline const char * name = "widthBucket"; + + explicit FunctionWidthBucket() = default; + + static FunctionPtr create(ContextPtr) { return std::make_shared(); } + + String getName() const override { return name; } + + size_t getNumberOfArguments() const override { return 4; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + for (const auto argument_index : collections::range(0, 3)) + { + if (!isNativeNumber(arguments[argument_index])) + { + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "The first three arguments of function {} must be a Int8, Int16, Int32, Int64, UInt8, UInt16, UInt32, UInt64, Float32 " + "or Float64.", + getName()); + } + } + if (!WhichDataType(arguments[3]).isNativeUInt()) + { + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "The last argument of function {} must be UInt8, UInt16, UInt32 or UInt64, found {}.", + getName(), + arguments[3]->getName()); + } + switch (arguments[3]->getTypeId()) + { + case TypeIndex::UInt8: + return std::make_shared(); + case TypeIndex::UInt16: + return std::make_shared(); + case TypeIndex::UInt32: + [[fallthrough]]; + case TypeIndex::UInt64: + return std::make_shared(); + default: + break; + } + + UNREACHABLE(); + } + + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } + + ColumnPtr + executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & /*result_type*/, size_t input_rows_count) const override + { + switch (arguments[3].type->getTypeId()) + { + case TypeIndex::UInt8: + return executeForResultType(arguments, input_rows_count); + case TypeIndex::UInt16: + return executeForResultType(arguments, input_rows_count); + case TypeIndex::UInt32: + return executeForResultType(arguments, input_rows_count); + case TypeIndex::UInt64: + return executeForResultType(arguments, input_rows_count); + default: + break; + } + + UNREACHABLE(); + } + + bool useDefaultImplementationForConstants() const override { return true; } +}; + +REGISTER_FUNCTION(WidthBucket) +{ + factory.registerFunction({ + R"( +Returns the number of the bucket in which `operand` falls in a histogram having `count` equal-width buckets spanning the range `low` to `high`. Returns `0` if `operand < low`, and returns `count+1` if `operand >= high`. + +`operand`, `low`, `high` can be any native number type. `count` can only be unsigned native integer and its value cannot be zero. + +**Syntax** + +```sql +widthBucket(operand, low, high, count) +``` + +There is also a case insensitive alias called `WIDTH_BUCKET` to provide compatibility with other databases. + +**Example** + +Query: +[example:simple] + +Result: + +``` text +┌─widthBucket(10.15, -8.6, 23, 18)─┐ +│ 11 │ +└──────────────────────────────────┘ +``` +)", + Documentation::Examples{ + {"simple", "SELECT widthBucket(10.15, -8.6, 23, 18)"}, + }, + Documentation::Categories{"Mathematical"}, + }); + + factory.registerAlias("width_bucket", "widthBucket", FunctionFactory::CaseInsensitive); +} + +} diff --git a/src/IO/ReadWriteBufferFromHTTP.h b/src/IO/ReadWriteBufferFromHTTP.h index 5bc72c5ff62..a3a3c318f5a 100644 --- a/src/IO/ReadWriteBufferFromHTTP.h +++ b/src/IO/ReadWriteBufferFromHTTP.h @@ -224,9 +224,9 @@ namespace detail enum class InitializeError { - RETRIABLE_ERROR, + RETRYABLE_ERROR, /// If error is not retriable, `exception` variable must be set. - NON_RETRIABLE_ERROR, + NON_RETRYABLE_ERROR, /// Allows to skip not found urls for globs SKIP_NOT_FOUND_URL, NONE, @@ -398,7 +398,7 @@ namespace detail } else if (!isRetriableError(http_status)) { - initialization_error = InitializeError::NON_RETRIABLE_ERROR; + initialization_error = InitializeError::NON_RETRYABLE_ERROR; exception = std::current_exception(); } else @@ -409,7 +409,7 @@ namespace detail } /** - * Throws if error is retriable, otherwise sets initialization_error = NON_RETRIABLE_ERROR and + * Throws if error is retryable, otherwise sets initialization_error = NON_RETRYABLE_ERROR and * saves exception into `exception` variable. In case url is not found and skip_not_found_url == true, * sets initialization_error = SKIP_NOT_FOUND_URL, otherwise throws. */ @@ -453,9 +453,9 @@ namespace detail /// Retry 200OK if (response.getStatus() == Poco::Net::HTTPResponse::HTTPStatus::HTTP_OK) - initialization_error = InitializeError::RETRIABLE_ERROR; + initialization_error = InitializeError::RETRYABLE_ERROR; else - initialization_error = InitializeError::NON_RETRIABLE_ERROR; + initialization_error = InitializeError::NON_RETRYABLE_ERROR; return; } @@ -544,7 +544,7 @@ namespace detail { initialize(); - if (initialization_error == InitializeError::NON_RETRIABLE_ERROR) + if (initialization_error == InitializeError::NON_RETRYABLE_ERROR) { assert(exception); break; @@ -553,7 +553,7 @@ namespace detail { return false; } - else if (initialization_error == InitializeError::RETRIABLE_ERROR) + else if (initialization_error == InitializeError::RETRYABLE_ERROR) { LOG_ERROR( log, @@ -582,10 +582,13 @@ namespace detail } catch (const Poco::Exception & e) { - /** - * Retry request unconditionally if nothing has been read yet. - * Otherwise if it is GET method retry with range header. - */ + /// Too many open files - non-retryable. + if (e.code() == POCO_EMFILE) + throw; + + /** Retry request unconditionally if nothing has been read yet. + * Otherwise if it is GET method retry with range header. + */ bool can_retry_request = !offset_from_begin_pos || method == Poco::Net::HTTPRequest::HTTP_GET; if (!can_retry_request) throw; diff --git a/src/IO/parseDateTimeBestEffort.cpp b/src/IO/parseDateTimeBestEffort.cpp index d4a7d04540f..b370bee6f3f 100644 --- a/src/IO/parseDateTimeBestEffort.cpp +++ b/src/IO/parseDateTimeBestEffort.cpp @@ -321,6 +321,11 @@ ReturnType parseDateTimeBestEffortImpl( return on_error(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot read DateTime: unexpected number of decimal digits after hour and minute: {}", num_digits); } } + else if (checkChar(',', in)) + { + if (month && !day_of_month) + day_of_month = hour_or_day_of_month_or_month; + } else if (checkChar('/', in) || checkChar('.', in) || checkChar('-', in)) { if (day_of_month) diff --git a/src/Interpreters/ActionsDAG.cpp b/src/Interpreters/ActionsDAG.cpp index c10c0d10bed..bfb010b6105 100644 --- a/src/Interpreters/ActionsDAG.cpp +++ b/src/Interpreters/ActionsDAG.cpp @@ -1946,6 +1946,9 @@ ActionsDAGPtr ActionsDAG::cloneActionsForFilterPushDown( } auto conjunction = getConjunctionNodes(predicate, allowed_nodes); + if (conjunction.rejected.size() == 1 && WhichDataType{conjunction.rejected.front()->result_type}.isFloat()) + return nullptr; + auto actions = cloneActionsForConjunction(conjunction.allowed, all_inputs); if (!actions) return nullptr; @@ -2011,10 +2014,12 @@ ActionsDAGPtr ActionsDAG::cloneActionsForFilterPushDown( node.children.swap(new_children); *predicate = std::move(node); } - else + else if (!WhichDataType{new_children.front()->result_type}.isFloat()) { /// If type is different, cast column. /// This case is possible, cause AND can use any numeric type as argument. + /// But casting floats to UInt8 or Bool produces different results. + /// so we can't apply this optimization to them. Node node; node.type = ActionType::COLUMN; node.result_name = predicate->result_type->getName(); @@ -2036,8 +2041,20 @@ ActionsDAGPtr ActionsDAG::cloneActionsForFilterPushDown( else { /// Predicate is function AND, which still have more then one argument. + /// Or there is only one argument that is a float and we can't just + /// remove the AND. /// Just update children and rebuild it. predicate->children.swap(new_children); + if (WhichDataType{predicate->children.front()->result_type}.isFloat()) + { + Node node; + node.type = ActionType::COLUMN; + node.result_name = "1"; + node.column = DataTypeUInt8().createColumnConst(0, 1u); + node.result_type = std::make_shared(); + const auto * const_col = &nodes.emplace_back(std::move(node)); + predicate->children.emplace_back(const_col); + } auto arguments = prepareFunctionArguments(predicate->children); FunctionOverloadResolverPtr func_builder_and = std::make_unique(std::make_shared()); diff --git a/src/Interpreters/ArithmeticOperationsInAgrFuncOptimize.cpp b/src/Interpreters/ArithmeticOperationsInAgrFuncOptimize.cpp index 66e0813b977..ef3e6739f8a 100644 --- a/src/Interpreters/ArithmeticOperationsInAgrFuncOptimize.cpp +++ b/src/Interpreters/ArithmeticOperationsInAgrFuncOptimize.cpp @@ -73,24 +73,6 @@ Field zeroField(const Field & value) throw Exception(ErrorCodes::BAD_TYPE_OF_FIELD, "Unexpected literal type in function"); } -const String & changeNameIfNeeded(const String & func_name, const String & child_name, const ASTLiteral & literal) -{ - static const std::unordered_map> matches = { - { "min", { "multiply", "divide" } }, - { "max", { "multiply", "divide" } } - }; - - static const std::unordered_map swap_to = { - { "min", "max" }, - { "max", "min" } - }; - - if (literal.value < zeroField(literal.value) && matches.contains(func_name) && matches.find(func_name)->second.contains(child_name)) - return swap_to.find(func_name)->second; - - return func_name; -} - ASTPtr tryExchangeFunctions(const ASTFunction & func) { static const std::unordered_map> supported @@ -114,19 +96,42 @@ ASTPtr tryExchangeFunctions(const ASTFunction & func) ASTPtr optimized_ast; + /** Need reverse max <-> min for: + * + * max(-1*value) -> -1*min(value) + * max(value/-2) -> min(value)/-2 + * max(1-value) -> 1-min(value) + */ + auto get_reverse_aggregate_function_name = [](const std::string & aggregate_function_name) -> std::string + { + if (aggregate_function_name == "min") + return "max"; + else if (aggregate_function_name == "max") + return "min"; + else + return aggregate_function_name; + }; + if (first_literal && !second_literal) { /// It's possible to rewrite 'sum(1/n)' with 'sum(1) * div(1/n)' but we lose accuracy. Ignored. if (child_func->name == "divide") return {}; + bool need_reverse + = (child_func->name == "multiply" && first_literal->value < zeroField(first_literal->value)) || child_func->name == "minus"; + if (need_reverse) + lower_name = get_reverse_aggregate_function_name(lower_name); - const String & new_name = changeNameIfNeeded(lower_name, child_func->name, *first_literal); - optimized_ast = exchangeExtractFirstArgument(new_name, *child_func); + optimized_ast = exchangeExtractFirstArgument(lower_name, *child_func); } else if (second_literal) /// second or both are consts { - const String & new_name = changeNameIfNeeded(lower_name, child_func->name, *second_literal); - optimized_ast = exchangeExtractSecondArgument(new_name, *child_func); + bool need_reverse + = (child_func->name == "multiply" || child_func->name == "divide") && second_literal->value < zeroField(second_literal->value); + if (need_reverse) + lower_name = get_reverse_aggregate_function_name(lower_name); + + optimized_ast = exchangeExtractSecondArgument(lower_name, *child_func); } if (optimized_ast) diff --git a/src/Interpreters/ArrayJoinAction.cpp b/src/Interpreters/ArrayJoinAction.cpp index 3650b888f9e..4f42122e98f 100644 --- a/src/Interpreters/ArrayJoinAction.cpp +++ b/src/Interpreters/ArrayJoinAction.cpp @@ -14,7 +14,7 @@ namespace DB namespace ErrorCodes { extern const int LOGICAL_ERROR; - extern const int SIZES_OF_ARRAYS_DOESNT_MATCH; + extern const int SIZES_OF_ARRAYS_DONT_MATCH; extern const int TYPE_MISMATCH; } @@ -186,7 +186,7 @@ void ArrayJoinAction::execute(Block & block) const ColumnArray & array = typeid_cast(*array_ptr); if (!is_unaligned && !array.hasEqualOffsets(*any_array)) - throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH, "Sizes of ARRAY-JOIN-ed arrays do not match"); + throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH, "Sizes of ARRAY-JOIN-ed arrays do not match"); current.column = typeid_cast(*array_ptr).getDataPtr(); current.type = type->getNestedType(); diff --git a/src/Interpreters/AsynchronousInsertQueue.cpp b/src/Interpreters/AsynchronousInsertQueue.cpp index 7157acfab09..ac8101d4ca2 100644 --- a/src/Interpreters/AsynchronousInsertQueue.cpp +++ b/src/Interpreters/AsynchronousInsertQueue.cpp @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -183,7 +184,8 @@ void AsynchronousInsertQueue::scheduleDataProcessingJob(const InsertQuery & key, }); } -std::future AsynchronousInsertQueue::push(ASTPtr query, ContextPtr query_context) +AsynchronousInsertQueue::PushResult +AsynchronousInsertQueue::push(ASTPtr query, ContextPtr query_context) { query = query->clone(); const auto & settings = query_context->getSettingsRef(); @@ -203,9 +205,32 @@ std::future AsynchronousInsertQueue::push(ASTPtr query, ContextPtr query_c String bytes; { + /// Read at most 'async_insert_max_data_size' bytes of data. + /// If limit is exceeded we will fallback to synchronous insert + /// to avoid buffering of huge amount of data in memory. + auto read_buf = getReadBufferFromASTInsertQuery(query); + LimitReadBuffer limit_buf(*read_buf, settings.async_insert_max_data_size, false); + WriteBufferFromString write_buf(bytes); - copyData(*read_buf, write_buf); + copyData(limit_buf, write_buf); + + if (!read_buf->eof()) + { + write_buf.finalize(); + + /// Concat read buffer with already extracted from insert + /// query data and with the rest data from insert query. + std::vector> buffers; + buffers.emplace_back(std::make_unique(bytes)); + buffers.emplace_back(std::move(read_buf)); + + return PushResult + { + .status = PushResult::TOO_MUCH_DATA, + .insert_data_buffer = std::make_unique(std::move(buffers)), + }; + } } if (auto quota = query_context->getQuota()) @@ -265,7 +290,11 @@ std::future AsynchronousInsertQueue::push(ASTPtr query, ContextPtr query_c else shard.are_tasks_available.notify_one(); - return insert_future; + return PushResult + { + .status = PushResult::OK, + .future = std::move(insert_future), + }; } void AsynchronousInsertQueue::processBatchDeadlines(size_t shard_num) diff --git a/src/Interpreters/AsynchronousInsertQueue.h b/src/Interpreters/AsynchronousInsertQueue.h index 4cda5078801..e6b7bff8d26 100644 --- a/src/Interpreters/AsynchronousInsertQueue.h +++ b/src/Interpreters/AsynchronousInsertQueue.h @@ -20,7 +20,25 @@ public: AsynchronousInsertQueue(ContextPtr context_, size_t pool_size_); ~AsynchronousInsertQueue(); - std::future push(ASTPtr query, ContextPtr query_context); + struct PushResult + { + enum Status + { + OK, + TOO_MUCH_DATA, + }; + + Status status; + + /// Future that allows to wait until the query is flushed. + std::future future; + + /// Read buffer that contains extracted + /// from query data in case of too much data. + std::unique_ptr insert_data_buffer; + }; + + PushResult push(ASTPtr query, ContextPtr query_context); size_t getPoolSize() const { return pool_size; } private: diff --git a/src/Interpreters/ConcurrentHashJoin.h b/src/Interpreters/ConcurrentHashJoin.h index a00c3ed1326..5e53f9845aa 100644 --- a/src/Interpreters/ConcurrentHashJoin.h +++ b/src/Interpreters/ConcurrentHashJoin.h @@ -3,7 +3,6 @@ #include #include #include -#include #include #include #include diff --git a/src/Interpreters/ConvertFunctionOrLikeVisitor.cpp b/src/Interpreters/ConvertFunctionOrLikeVisitor.cpp index 5d48391d56d..084bb0a1bb9 100644 --- a/src/Interpreters/ConvertFunctionOrLikeVisitor.cpp +++ b/src/Interpreters/ConvertFunctionOrLikeVisitor.cpp @@ -1,9 +1,9 @@ -#include #include #include #include #include #include +#include #include diff --git a/src/Interpreters/DDLWorker.cpp b/src/Interpreters/DDLWorker.cpp index 0f91212e6a9..7179ce94e0b 100644 --- a/src/Interpreters/DDLWorker.cpp +++ b/src/Interpreters/DDLWorker.cpp @@ -54,6 +54,7 @@ namespace ErrorCodes extern const int CANNOT_ASSIGN_ALTER; extern const int CANNOT_ALLOCATE_MEMORY; extern const int MEMORY_LIMIT_EXCEEDED; + extern const int NOT_IMPLEMENTED; } constexpr const char * TASK_PROCESSED_OUT_REASON = "Task has been already processed"; @@ -456,6 +457,15 @@ bool DDLWorker::tryExecuteQuery(DDLTaskBase & task, const ZooKeeperPtr & zookeep try { auto query_context = task.makeQueryContext(context, zookeeper); + + chassert(!query_context->getCurrentTransaction()); + if (query_context->getSettingsRef().implicit_transaction) + { + if (query_context->getSettingsRef().throw_on_unsupported_query_inside_transaction) + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot begin an implicit transaction inside distributed DDL query"); + query_context->setSetting("implicit_transaction", Field{0}); + } + if (!task.is_initial_query) query_scope.emplace(query_context); executeQuery(istr, ostr, !task.is_initial_query, query_context, {}); @@ -691,10 +701,9 @@ bool DDLWorker::taskShouldBeExecutedOnLeader(const ASTPtr & ast_ddl, const Stora if (auto * alter = ast_ddl->as()) { // Setting alters should be executed on all replicas - if (alter->isSettingsAlter()) - return false; - - if (alter->isFreezeAlter()) + if (alter->isSettingsAlter() || + alter->isFreezeAlter() || + alter->isMovePartitionToDiskOrVolumeAlter()) return false; } diff --git a/src/Interpreters/DatabaseCatalog.cpp b/src/Interpreters/DatabaseCatalog.cpp index 2132c6d39f1..975e0da66ce 100644 --- a/src/Interpreters/DatabaseCatalog.cpp +++ b/src/Interpreters/DatabaseCatalog.cpp @@ -147,6 +147,7 @@ void DatabaseCatalog::initializeAndLoadTemporaryDatabase() unused_dir_hide_timeout_sec = getContext()->getConfigRef().getInt64("database_catalog_unused_dir_hide_timeout_sec", unused_dir_hide_timeout_sec); unused_dir_rm_timeout_sec = getContext()->getConfigRef().getInt64("database_catalog_unused_dir_rm_timeout_sec", unused_dir_rm_timeout_sec); unused_dir_cleanup_period_sec = getContext()->getConfigRef().getInt64("database_catalog_unused_dir_cleanup_period_sec", unused_dir_cleanup_period_sec); + drop_error_cooldown_sec = getContext()->getConfigRef().getInt64("database_catalog_drop_error_cooldown_sec", drop_error_cooldown_sec); auto db_for_temporary_and_external_tables = std::make_shared(TEMPORARY_DATABASE, getContext()); attachDatabase(TEMPORARY_DATABASE, db_for_temporary_and_external_tables); diff --git a/src/Interpreters/DatabaseCatalog.h b/src/Interpreters/DatabaseCatalog.h index 11a855ebd79..4200373018d 100644 --- a/src/Interpreters/DatabaseCatalog.h +++ b/src/Interpreters/DatabaseCatalog.h @@ -279,7 +279,6 @@ private: bool maybeRemoveDirectory(const String & disk_name, const DiskPtr & disk, const String & unused_dir); static constexpr size_t reschedule_time_ms = 100; - static constexpr time_t drop_error_cooldown_sec = 5; mutable std::mutex databases_mutex; @@ -326,6 +325,9 @@ private: time_t unused_dir_rm_timeout_sec = default_unused_dir_rm_timeout_sec; static constexpr time_t default_unused_dir_cleanup_period_sec = 24 * 60 * 60; /// 1 day time_t unused_dir_cleanup_period_sec = default_unused_dir_cleanup_period_sec; + + static constexpr time_t default_drop_error_cooldown_sec = 5; + time_t drop_error_cooldown_sec = default_drop_error_cooldown_sec; }; /// This class is useful when creating a table or database. diff --git a/src/Interpreters/InterpreterDeleteQuery.cpp b/src/Interpreters/InterpreterDeleteQuery.cpp index f8974a19f45..fabd1c38ce3 100644 --- a/src/Interpreters/InterpreterDeleteQuery.cpp +++ b/src/Interpreters/InterpreterDeleteQuery.cpp @@ -78,10 +78,10 @@ BlockIO InterpreterDeleteQuery::execute() } else if (table->supportsLightweightDelete()) { - if (!getContext()->getSettingsRef().allow_experimental_lightweight_delete) + if (!getContext()->getSettingsRef().enable_lightweight_delete) throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, - "Lightweight delete mutate is experimental. " - "Set `allow_experimental_lightweight_delete` setting to enable it"); + "Lightweight delete mutate is disabled. " + "Set `enable_lightweight_delete` setting to enable it"); /// Build "ALTER ... UPDATE _row_exists = 0 WHERE predicate" query String alter_query = diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index 2569e6ddc33..b4a19ea7403 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -540,7 +540,11 @@ BlockIO InterpreterInsertQuery::execute() if (query.hasInlinedData() && !async_insert) { /// can execute without additional data - auto pipe = getSourceFromASTInsertQuery(query_ptr, true, query_sample_block, getContext(), nullptr); + auto format = getInputFormatFromASTInsertQuery(query_ptr, true, query_sample_block, getContext(), nullptr); + for (auto && buffer : owned_buffers) + format->addBuffer(std::move(buffer)); + + auto pipe = getSourceFromInputFormat(query_ptr, std::move(format), getContext(), nullptr); res.pipeline.complete(std::move(pipe)); } } diff --git a/src/Interpreters/InterpreterInsertQuery.h b/src/Interpreters/InterpreterInsertQuery.h index 9b3f617e4b3..bb6509a9102 100644 --- a/src/Interpreters/InterpreterInsertQuery.h +++ b/src/Interpreters/InterpreterInsertQuery.h @@ -52,6 +52,8 @@ public: bool supportsTransactions() const override { return true; } + void addBuffer(std::unique_ptr buffer) { owned_buffers.push_back(std::move(buffer)); } + private: Block getSampleBlock(const Names & names, const StoragePtr & table, const StorageMetadataPtr & metadata_snapshot) const; @@ -61,6 +63,8 @@ private: const bool no_destination; const bool async_insert; + std::vector> owned_buffers; + Chain buildChainImpl( const StoragePtr & table, const StorageMetadataPtr & metadata_snapshot, diff --git a/src/Interpreters/QueryNormalizer.cpp b/src/Interpreters/QueryNormalizer.cpp index 4db61501d3d..56b81b3d224 100644 --- a/src/Interpreters/QueryNormalizer.cpp +++ b/src/Interpreters/QueryNormalizer.cpp @@ -118,6 +118,15 @@ void QueryNormalizer::visit(ASTIdentifier & node, ASTPtr & ast, Data & data) alias_node->checkSize(data.settings.max_expanded_ast_elements); ast = alias_node->clone(); ast->setAlias(node_alias); + + /// If the cloned AST was finished, this one should also be considered finished + if (data.finished_asts.contains(alias_node)) + data.finished_asts[ast] = ast; + + /// If we had an alias for node_alias, point it instead to the new node so we don't have to revisit it + /// on subsequent calls + if (auto existing_alias = data.aliases.find(node_alias); existing_alias != data.aliases.end()) + existing_alias->second = ast; } } else @@ -127,6 +136,15 @@ void QueryNormalizer::visit(ASTIdentifier & node, ASTPtr & ast, Data & data) auto alias_name = ast->getAliasOrColumnName(); ast = alias_node->clone(); ast->setAlias(alias_name); + + /// If the cloned AST was finished, this one should also be considered finished + if (data.finished_asts.contains(alias_node)) + data.finished_asts[ast] = ast; + + /// If we had an alias for node_alias, point it instead to the new node so we don't have to revisit it + /// on subsequent calls + if (auto existing_alias = data.aliases.find(node_alias); existing_alias != data.aliases.end()) + existing_alias->second = ast; } } } diff --git a/src/Interpreters/ServerAsynchronousMetrics.cpp b/src/Interpreters/ServerAsynchronousMetrics.cpp index 0d2032bed78..e6e1a03f11c 100644 --- a/src/Interpreters/ServerAsynchronousMetrics.cpp +++ b/src/Interpreters/ServerAsynchronousMetrics.cpp @@ -362,11 +362,14 @@ void ServerAsynchronousMetrics::updateHeavyMetricsIfNeeded(TimePoint current_tim const auto time_after_previous_update = current_time - heavy_metric_previous_update_time; const bool update_heavy_metric = time_after_previous_update >= heavy_metric_update_period || first_run; + Stopwatch watch; if (update_heavy_metric) { heavy_metric_previous_update_time = update_time; - - Stopwatch watch; + if (first_run) + heavy_update_interval = heavy_metric_update_period.count(); + else + heavy_update_interval = std::chrono::duration_cast(time_after_previous_update).count() / 1e6; /// Test shows that listing 100000 entries consuming around 0.15 sec. updateDetachedPartsStats(); @@ -390,7 +393,9 @@ void ServerAsynchronousMetrics::updateHeavyMetricsIfNeeded(TimePoint current_tim watch.elapsedSeconds()); } + new_values["AsynchronousHeavyMetricsCalculationTimeSpent"] = { watch.elapsedSeconds(), "Time in seconds spent for calculation of asynchronous heavy (tables related) metrics (this is the overhead of asynchronous metrics)." }; + new_values["AsynchronousHeavyMetricsUpdateInterval"] = { heavy_update_interval, "Heavy (tables related) metrics update interval" }; new_values["NumberOfDetachedParts"] = { detached_parts_stats.count, "The total number of parts detached from MergeTree tables. A part can be detached by a user with the `ALTER TABLE DETACH` query or by the server itself it the part is broken, unexpected or unneeded. The server does not care about detached parts and they can be removed." }; new_values["NumberOfDetachedByUserParts"] = { detached_parts_stats.detached_by_user, "The total number of parts detached from MergeTree tables by users with the `ALTER TABLE DETACH` query (as opposed to unexpected, broken or ignored parts). The server does not care about detached parts and they can be removed." }; diff --git a/src/Interpreters/ServerAsynchronousMetrics.h b/src/Interpreters/ServerAsynchronousMetrics.h index 81047e2fdf9..8243699a111 100644 --- a/src/Interpreters/ServerAsynchronousMetrics.h +++ b/src/Interpreters/ServerAsynchronousMetrics.h @@ -21,6 +21,7 @@ private: const Duration heavy_metric_update_period; TimePoint heavy_metric_previous_update_time; + double heavy_update_interval = 0.; struct DetachedPartsStats { diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index ca7544df4b9..435401796a0 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -590,6 +590,7 @@ static std::tuple executeQueryImpl( bool async_insert = false; auto * queue = context->getAsynchronousInsertQueue(); + auto * logger = &Poco::Logger::get("executeQuery"); if (insert_query && settings.async_insert) { @@ -605,41 +606,62 @@ static std::tuple executeQueryImpl( async_insert = true; if (!async_insert) - { - LOG_DEBUG(&Poco::Logger::get("executeQuery"), - "Setting async_insert=1, but INSERT query will be executed synchronously (reason: {})", reason); - } + LOG_DEBUG(logger, "Setting async_insert=1, but INSERT query will be executed synchronously (reason: {})", reason); } + bool quota_checked = false; + std::unique_ptr insert_data_buffer_holder; + if (async_insert) { + if (context->getCurrentTransaction() && settings.throw_on_unsupported_query_inside_transaction) + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Async inserts inside transactions are not supported"); + if (settings.implicit_transaction && settings.throw_on_unsupported_query_inside_transaction) + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Async inserts with 'implicit_transaction' are not supported"); + quota = context->getQuota(); if (quota) { + quota_checked = true; quota->used(QuotaType::QUERY_INSERTS, 1); quota->used(QuotaType::QUERIES, 1); quota->checkExceeded(QuotaType::ERRORS); } - auto insert_future = queue->push(ast, context); + auto result = queue->push(ast, context); - if (settings.wait_for_async_insert) + if (result.status == AsynchronousInsertQueue::PushResult::OK) { - auto timeout = settings.wait_for_async_insert_timeout.totalMilliseconds(); - auto source = std::make_shared(std::move(insert_future), timeout); - res.pipeline = QueryPipeline(Pipe(std::move(source))); + if (settings.wait_for_async_insert) + { + auto timeout = settings.wait_for_async_insert_timeout.totalMilliseconds(); + auto source = std::make_shared(std::move(result.future), timeout); + res.pipeline = QueryPipeline(Pipe(std::move(source))); + } + + const auto & table_id = insert_query->table_id; + if (!table_id.empty()) + context->setInsertionTable(table_id); } + else if (result.status == AsynchronousInsertQueue::PushResult::TOO_MUCH_DATA) + { + async_insert = false; + insert_data_buffer_holder = std::move(result.insert_data_buffer); - const auto & table_id = insert_query->table_id; - if (!table_id.empty()) - context->setInsertionTable(table_id); + if (insert_query->data) + { + /// Reset inlined data because it will be + /// available from tail read buffer. + insert_query->end = insert_query->data; + insert_query->data = nullptr; + } - if (context->getCurrentTransaction() && settings.throw_on_unsupported_query_inside_transaction) - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Async inserts inside transactions are not supported"); - if (settings.implicit_transaction && settings.throw_on_unsupported_query_inside_transaction) - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Async inserts with 'implicit_transaction' are not supported"); + insert_query->tail = insert_data_buffer_holder.get(); + LOG_DEBUG(logger, "Setting async_insert=1, but INSERT query will be executed synchronously because it has too much data"); + } } - else + + if (!async_insert) { /// We need to start the (implicit) transaction before getting the interpreter as this will get links to the latest snapshots if (!context->getCurrentTransaction() && settings.implicit_transaction && !ast->as()) @@ -671,7 +693,7 @@ static std::tuple executeQueryImpl( context->getSettingsRef().throw_on_unsupported_query_inside_transaction) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Transactions are not supported for this type of query ({})", ast->getID()); - if (!interpreter->ignoreQuota()) + if (!interpreter->ignoreQuota() && !quota_checked) { quota = context->getQuota(); if (quota) @@ -695,12 +717,15 @@ static std::tuple executeQueryImpl( limits.size_limits = SizeLimits(settings.max_result_rows, settings.max_result_bytes, settings.result_overflow_mode); } - if (const auto * insert_interpreter = typeid_cast(&*interpreter)) + if (auto * insert_interpreter = typeid_cast(&*interpreter)) { /// Save insertion table (not table function). TODO: support remote() table function. auto table_id = insert_interpreter->getDatabaseTable(); if (!table_id.empty()) context->setInsertionTable(std::move(table_id)); + + if (insert_data_buffer_holder) + insert_interpreter->addBuffer(std::move(insert_data_buffer_holder)); } { @@ -1251,6 +1276,12 @@ void executeQuery( std::tie(ast, streams) = executeQueryImpl(begin, end, context, false, QueryProcessingStage::Complete, &istr); auto & pipeline = streams.pipeline; + QueryResultDetails result_details + { + .query_id = context->getClientInfo().current_query_id, + .timezone = DateLUT::instance().getTimeZone(), + }; + std::unique_ptr compressed_buffer; try { @@ -1309,9 +1340,8 @@ void executeQuery( out->onProgress(progress); }); - if (set_result_details) - set_result_details( - context->getClientInfo().current_query_id, out->getContentType(), format_name, DateLUT::instance().getTimeZone()); + result_details.content_type = out->getContentType(); + result_details.format = format_name; pipeline.complete(std::move(out)); } @@ -1320,6 +1350,9 @@ void executeQuery( pipeline.setProgressCallback(context->getProgressCallback()); } + if (set_result_details) + set_result_details(result_details); + if (pipeline.initialized()) { CompletedPipelineExecutor executor(pipeline); diff --git a/src/Interpreters/executeQuery.h b/src/Interpreters/executeQuery.h index 9c561d8b88c..93152cc1de6 100644 --- a/src/Interpreters/executeQuery.h +++ b/src/Interpreters/executeQuery.h @@ -11,7 +11,15 @@ namespace DB class ReadBuffer; class WriteBuffer; -using SetResultDetailsFunc = std::function; +struct QueryResultDetails +{ + String query_id; + std::optional content_type; + std::optional format; + std::optional timezone; +}; + +using SetResultDetailsFunc = std::function; /// Parse and execute a query. void executeQuery( diff --git a/src/Interpreters/parseColumnsListForTableFunction.cpp b/src/Interpreters/parseColumnsListForTableFunction.cpp index 9e6326b431a..e7302b6324a 100644 --- a/src/Interpreters/parseColumnsListForTableFunction.cpp +++ b/src/Interpreters/parseColumnsListForTableFunction.cpp @@ -92,12 +92,11 @@ ColumnsDescription parseColumnsListFromString(const std::string & structure, con return columns; } -bool tryParseColumnsListFromString(const std::string & structure, ColumnsDescription & columns, const ContextPtr & context) +bool tryParseColumnsListFromString(const std::string & structure, ColumnsDescription & columns, const ContextPtr & context, String & error) { ParserColumnDeclarationList parser(true, true); const Settings & settings = context->getSettingsRef(); - String error; const char * start = structure.data(); const char * end = structure.data() + structure.size(); ASTPtr columns_list_raw = tryParseQuery(parser, start, end, error, false, "columns declaration list", false, settings.max_query_size, settings.max_parser_depth); @@ -106,7 +105,10 @@ bool tryParseColumnsListFromString(const std::string & structure, ColumnsDescrip auto * columns_list = dynamic_cast(columns_list_raw.get()); if (!columns_list) + { + error = fmt::format("Invalid columns declaration list: \"{}\"", structure); return false; + } try { @@ -118,6 +120,7 @@ bool tryParseColumnsListFromString(const std::string & structure, ColumnsDescrip } catch (...) { + error = getCurrentExceptionMessage(false); return false; } } diff --git a/src/Interpreters/parseColumnsListForTableFunction.h b/src/Interpreters/parseColumnsListForTableFunction.h index 97923bcad77..212c378d3b5 100644 --- a/src/Interpreters/parseColumnsListForTableFunction.h +++ b/src/Interpreters/parseColumnsListForTableFunction.h @@ -33,6 +33,6 @@ void validateDataType(const DataTypePtr & type, const DataTypeValidationSettings /// Parses a common argument for table functions such as table structure given in string ColumnsDescription parseColumnsListFromString(const std::string & structure, const ContextPtr & context); -bool tryParseColumnsListFromString(const std::string & structure, ColumnsDescription & columns, const ContextPtr & context); +bool tryParseColumnsListFromString(const std::string & structure, ColumnsDescription & columns, const ContextPtr & context, String & error); } diff --git a/src/Interpreters/threadPoolCallbackRunner.h b/src/Interpreters/threadPoolCallbackRunner.h index 7990c538c03..1d2f67e8e59 100644 --- a/src/Interpreters/threadPoolCallbackRunner.h +++ b/src/Interpreters/threadPoolCallbackRunner.h @@ -25,8 +25,16 @@ ThreadPoolCallbackRunner threadPoolCallbackRunner(ThreadPool & CurrentThread::attachTo(thread_group); SCOPE_EXIT_SAFE({ + { + /// Release all captutred resources before detaching thread group + /// Releasing has to use proper memory tracker which has been set here before callback + + [[maybe_unused]] auto tmp = std::move(callback); + } + if (thread_group) CurrentThread::detachQueryIfNotDetached(); + }); setThreadName(thread_name.data()); diff --git a/src/Parsers/ASTAlterQuery.cpp b/src/Parsers/ASTAlterQuery.cpp index 5d347446d37..426b63a9d28 100644 --- a/src/Parsers/ASTAlterQuery.cpp +++ b/src/Parsers/ASTAlterQuery.cpp @@ -533,6 +533,24 @@ bool ASTAlterQuery::isDropPartitionAlter() const return isOneCommandTypeOnly(ASTAlterCommand::DROP_PARTITION) || isOneCommandTypeOnly(ASTAlterCommand::DROP_DETACHED_PARTITION); } +bool ASTAlterQuery::isMovePartitionToDiskOrVolumeAlter() const +{ + if (command_list) + { + if (command_list->children.empty()) + return false; + for (const auto & child : command_list->children) + { + const auto & command = child->as(); + if (command.type != ASTAlterCommand::MOVE_PARTITION || + (command.move_destination_type != DataDestinationType::DISK && command.move_destination_type != DataDestinationType::VOLUME)) + return false; + } + return true; + } + return false; +} + /** Get the text that identifies this element. */ String ASTAlterQuery::getID(char delim) const diff --git a/src/Parsers/ASTAlterQuery.h b/src/Parsers/ASTAlterQuery.h index 4a8c9c14ea9..2a48f5bbd9e 100644 --- a/src/Parsers/ASTAlterQuery.h +++ b/src/Parsers/ASTAlterQuery.h @@ -239,6 +239,8 @@ public: bool isDropPartitionAlter() const; + bool isMovePartitionToDiskOrVolumeAlter() const; + String getID(char) const override; ASTPtr clone() const override; diff --git a/src/Parsers/ASTColumnsMatcher.cpp b/src/Parsers/ASTColumnsMatcher.cpp index 940030577d6..ba398b995be 100644 --- a/src/Parsers/ASTColumnsMatcher.cpp +++ b/src/Parsers/ASTColumnsMatcher.cpp @@ -18,6 +18,7 @@ namespace ErrorCodes ASTPtr ASTColumnsRegexpMatcher::clone() const { auto clone = std::make_shared(*this); + clone->children.clear(); if (expression) { clone->expression = expression->clone(); clone->children.push_back(clone->expression); } if (transformers) { clone->transformers = transformers->clone(); clone->children.push_back(clone->transformers); } @@ -91,6 +92,7 @@ bool ASTColumnsRegexpMatcher::isColumnMatching(const String & column_name) const ASTPtr ASTColumnsListMatcher::clone() const { auto clone = std::make_shared(*this); + clone->children.clear(); if (expression) { clone->expression = expression->clone(); clone->children.push_back(clone->expression); } if (transformers) { clone->transformers = transformers->clone(); clone->children.push_back(clone->transformers); } @@ -150,6 +152,7 @@ void ASTColumnsListMatcher::formatImpl(const FormatSettings & settings, FormatSt ASTPtr ASTQualifiedColumnsRegexpMatcher::clone() const { auto clone = std::make_shared(*this); + clone->children.clear(); if (transformers) { clone->transformers = transformers->clone(); clone->children.push_back(clone->transformers); } @@ -216,6 +219,7 @@ void ASTQualifiedColumnsRegexpMatcher::formatImpl(const FormatSettings & setting ASTPtr ASTQualifiedColumnsListMatcher::clone() const { auto clone = std::make_shared(*this); + clone->children.clear(); if (transformers) { clone->transformers = transformers->clone(); clone->children.push_back(clone->transformers); } diff --git a/src/Parsers/ASTSelectQuery.cpp b/src/Parsers/ASTSelectQuery.cpp index fe8ebacec15..2936c2e9ef1 100644 --- a/src/Parsers/ASTSelectQuery.cpp +++ b/src/Parsers/ASTSelectQuery.cpp @@ -113,10 +113,8 @@ void ASTSelectQuery::formatImpl(const FormatSettings & s, FormatState & state, F if (group_by_with_cube) s.ostr << (s.hilite ? hilite_keyword : "") << s.nl_or_ws << indent_str << (s.one_line ? "" : " ") << "WITH CUBE" << (s.hilite ? hilite_none : ""); - if (group_by_with_grouping_sets) + if (group_by_with_grouping_sets && groupBy()) { - if (!groupBy()) /// sanity check, issue 43049 - throw Exception(ErrorCodes::LOGICAL_ERROR, "Corrupt AST"); auto nested_frame = frame; nested_frame.surround_each_list_element_with_parens = true; nested_frame.expression_list_prepend_whitespace = false; diff --git a/src/Parsers/ASTSetQuery.cpp b/src/Parsers/ASTSetQuery.cpp index 26420f4988c..0b8d76dbb89 100644 --- a/src/Parsers/ASTSetQuery.cpp +++ b/src/Parsers/ASTSetQuery.cpp @@ -34,7 +34,11 @@ void ASTSetQuery::formatImpl(const FormatSettings & format, FormatState &, Forma first = false; formatSettingName(change.name, format.ostr); - format.ostr << " = " << applyVisitor(FieldVisitorToString(), change.value); + CustomType custom; + if (!format.show_secrets && change.value.tryGet(custom) && custom.isSecret()) + format.ostr << " = " << custom.toString(false); + else + format.ostr << " = " << applyVisitor(FieldVisitorToString(), change.value); } for (const auto & setting_name : default_settings) diff --git a/src/Parsers/FieldFromAST.cpp b/src/Parsers/FieldFromAST.cpp index 7b7302696ed..3cd10c1cf80 100644 --- a/src/Parsers/FieldFromAST.cpp +++ b/src/Parsers/FieldFromAST.cpp @@ -1,10 +1,19 @@ #include +#include +#include +#include +#include +#include +#include +#include + namespace DB { namespace ErrorCodes { extern const int LOGICAL_ERROR; + extern const int BAD_ARGUMENTS; } Field createFieldFromAST(ASTPtr ast) @@ -17,4 +26,51 @@ Field createFieldFromAST(ASTPtr ast) throw Exception(ErrorCodes::LOGICAL_ERROR, "Method {} not implemented for {}", method, getTypeName()); } +bool FieldFromASTImpl::isSecret() const +{ + return isDiskFunction(ast); +} + +String FieldFromASTImpl::toString(bool show_secrets) const +{ + if (!show_secrets && isDiskFunction(ast)) + { + auto hidden = ast->clone(); + const auto & disk_function = assert_cast(*hidden); + const auto * disk_function_args_expr = assert_cast(disk_function.arguments.get()); + const auto & disk_function_args = disk_function_args_expr->children; + + auto is_secret_arg = [](const std::string & arg_name) + { + return arg_name != "type"; + }; + + for (const auto & arg : disk_function_args) + { + auto * setting_function = arg->as(); + if (!setting_function || setting_function->name != "equals") + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Bad format: expected equals function"); + + auto * function_args_expr = assert_cast(setting_function->arguments.get()); + if (!function_args_expr) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Bad format: expected arguments"); + + auto & function_args = function_args_expr->children; + if (function_args.empty()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Bad format: expected non zero number of arguments"); + + auto * key_identifier = function_args[0]->as(); + if (!key_identifier) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Bad format: expected Identifier"); + + const std::string & key = key_identifier->name(); + if (is_secret_arg(key)) + function_args[1] = std::make_shared("[HIDDEN]"); + } + return serializeAST(*hidden); + } + + return serializeAST(*ast); +} + } diff --git a/src/Parsers/FieldFromAST.h b/src/Parsers/FieldFromAST.h index 132f7e3e705..a69c086a170 100644 --- a/src/Parsers/FieldFromAST.h +++ b/src/Parsers/FieldFromAST.h @@ -1,7 +1,6 @@ #pragma once #include #include -#include namespace DB { @@ -13,7 +12,8 @@ struct FieldFromASTImpl : public CustomType::CustomTypeImpl explicit FieldFromASTImpl(ASTPtr ast_) : ast(ast_) {} const char * getTypeName() const override { return name; } - String toString() const override { return serializeAST(*ast); } + String toString(bool show_secrets) const override; + bool isSecret() const override; [[noreturn]] void throwNotImplemented(std::string_view method) const; diff --git a/src/Parsers/isDiskFunction.cpp b/src/Parsers/isDiskFunction.cpp new file mode 100644 index 00000000000..e60229cb3f7 --- /dev/null +++ b/src/Parsers/isDiskFunction.cpp @@ -0,0 +1,16 @@ +#include +#include + +namespace DB +{ + +bool isDiskFunction(ASTPtr ast) +{ + if (!ast) + return false; + + const auto * function = ast->as(); + return function && function->name == "disk" && function->arguments->as(); +} + +} diff --git a/src/Parsers/isDiskFunction.h b/src/Parsers/isDiskFunction.h new file mode 100644 index 00000000000..97b3c58fa17 --- /dev/null +++ b/src/Parsers/isDiskFunction.h @@ -0,0 +1,9 @@ +#pragma once +#include + +namespace DB +{ + +bool isDiskFunction(ASTPtr ast); + +} diff --git a/src/Planner/PlannerActionsVisitor.cpp b/src/Planner/PlannerActionsVisitor.cpp index 2c5c8ab5e62..614e5c03bc1 100644 --- a/src/Planner/PlannerActionsVisitor.cpp +++ b/src/Planner/PlannerActionsVisitor.cpp @@ -65,6 +65,15 @@ public: return node_name_to_node.find(node_name) != node_name_to_node.end(); } + [[maybe_unused]] bool containsInputNode(const std::string & node_name) + { + const auto * node = tryGetNode(node_name); + if (node && node->type == ActionsDAG::ActionType::INPUT) + return true; + + return false; + } + [[maybe_unused]] const ActionsDAG::Node * tryGetNode(const std::string & node_name) { auto it = node_name_to_node.find(node_name); @@ -421,7 +430,16 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::vi auto function_node_name = calculateActionNodeName(node, *planner_context, node_to_node_name); - if (function_node.isAggregateFunction() || function_node.isWindowFunction()) + /* Aggregate functions, window functions, and GROUP BY expressions were already analyzed in the previous steps. + * If we have already visited some expression, we don't need to revisit it or its arguments again. + * For example, the expression from the aggregation step is also present in the projection: + * SELECT foo(a, b, c) as x FROM table GROUP BY foo(a, b, c) + * In this case we should not analyze `a`, `b`, `c` again. + * Moreover, it can lead to an error if we have arrayJoin in the arguments because it will be calculated twice. + */ + bool is_input_node = function_node.isAggregateFunction() || function_node.isWindowFunction() + || actions_stack.front().containsInputNode(function_node_name); + if (is_input_node) { size_t actions_stack_size = actions_stack.size(); diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp index 59b09f91888..f2259ca7e33 100644 --- a/src/Planner/PlannerJoinTree.cpp +++ b/src/Planner/PlannerJoinTree.cpp @@ -334,6 +334,16 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(const QueryTreeNodePtr & tabl } else if (query_node || union_node) { + if (table_expression_data.getColumnNames().empty()) + { + const auto & projection_columns = query_node ? query_node->getProjectionColumns() : union_node->computeProjectionColumns(); + NamesAndTypesList projection_columns_list(projection_columns.begin(), projection_columns.end()); + auto additional_column_to_read = ExpressionActions::getSmallestColumn(projection_columns_list); + + const auto & column_identifier = planner_context->getGlobalPlannerContext()->createColumnIdentifier(additional_column_to_read, table_expression); + table_expression_data.addColumn(additional_column_to_read, column_identifier); + } + auto subquery_options = select_query_options.subquery(); Planner subquery_planner(table_expression, subquery_options, planner_context->getGlobalPlannerContext()); /// Propagate storage limits to subquery diff --git a/src/Processors/Executors/StreamingFormatExecutor.cpp b/src/Processors/Executors/StreamingFormatExecutor.cpp index 91db2c09a20..2223721439e 100644 --- a/src/Processors/Executors/StreamingFormatExecutor.cpp +++ b/src/Processors/Executors/StreamingFormatExecutor.cpp @@ -8,6 +8,7 @@ namespace DB namespace ErrorCodes { extern const int LOGICAL_ERROR; + extern const int UNKNOWN_EXCEPTION; } StreamingFormatExecutor::StreamingFormatExecutor( @@ -93,6 +94,18 @@ size_t StreamingFormatExecutor::execute() format->resetParser(); return on_error(result_columns, e); } + catch (std::exception & e) + { + format->resetParser(); + auto exception = Exception(Exception::CreateFromSTDTag{}, e); + return on_error(result_columns, exception); + } + catch (...) + { + format->resetParser(); + auto exception = Exception(ErrorCodes::UNKNOWN_EXCEPTION, "Unknowk exception while executing StreamingFormatExecutor with format {}", format->getName()); + return on_error(result_columns, exception); + } } } diff --git a/src/Processors/Executors/StreamingFormatExecutor.h b/src/Processors/Executors/StreamingFormatExecutor.h index f948d833095..f5a1562a340 100644 --- a/src/Processors/Executors/StreamingFormatExecutor.h +++ b/src/Processors/Executors/StreamingFormatExecutor.h @@ -24,7 +24,7 @@ public: StreamingFormatExecutor( const Block & header_, InputFormatPtr format_, - ErrorCallback on_error_ = [](const MutableColumns &, Exception &) -> size_t { throw; }, + ErrorCallback on_error_ = [](const MutableColumns &, Exception & e) -> size_t { throw std::move(e); }, SimpleTransformPtr adding_defaults_transform_ = nullptr); /// Returns numbers of new read rows. diff --git a/src/Processors/Formats/ISchemaReader.cpp b/src/Processors/Formats/ISchemaReader.cpp index 48cb093f0ab..c96cb373a2d 100644 --- a/src/Processors/Formats/ISchemaReader.cpp +++ b/src/Processors/Formats/ISchemaReader.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #include #include @@ -15,20 +16,38 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; } -void checkFinalInferredType(DataTypePtr & type, const String & name, const FormatSettings & settings, const DataTypePtr & default_type, size_t rows_read) +void checkFinalInferredType( + DataTypePtr & type, + const String & name, + const FormatSettings & settings, + const DataTypePtr & default_type, + size_t rows_read, + const String & hints_parsing_error) { if (!checkIfTypeIsComplete(type)) { if (!default_type) - throw Exception( - ErrorCodes::ONLY_NULLS_WHILE_READING_SCHEMA, - "Cannot determine type for column '{}' by first {} rows " - "of data, most likely this column contains only Nulls or empty " - "Arrays/Maps. You can specify the type for this column using setting schema_inference_hints. " - "If your data contains complex JSON objects, try enabling one " - "of the settings allow_experimental_object_type/input_format_json_read_objects_as_strings", - name, - rows_read); + { + if (hints_parsing_error.empty()) + throw Exception( + ErrorCodes::ONLY_NULLS_WHILE_READING_SCHEMA, + "Cannot determine type for column '{}' by first {} rows " + "of data, most likely this column contains only Nulls or empty " + "Arrays/Maps. You can specify the type for this column using setting schema_inference_hints. " + "If your data contains complex JSON objects, try enabling one " + "of the settings allow_experimental_object_type/input_format_json_read_objects_as_strings", + name, + rows_read); + else + throw Exception( + ErrorCodes::ONLY_NULLS_WHILE_READING_SCHEMA, + "Cannot determine type for column '{}' by first {} rows " + "of data, most likely this column contains only Nulls or empty Arrays/Maps. " + "Column types from setting schema_inference_hints couldn't be parsed because of error: {}", + name, + rows_read, + hints_parsing_error); + } type = default_type; } @@ -46,11 +65,15 @@ IIRowSchemaReader::IIRowSchemaReader(ReadBuffer & in_, const FormatSettings & fo void IIRowSchemaReader::setContext(ContextPtr & context) { ColumnsDescription columns; - if (tryParseColumnsListFromString(hints_str, columns, context)) + if (tryParseColumnsListFromString(hints_str, columns, context, hints_parsing_error)) { for (const auto & [name, type] : columns.getAll()) hints[name] = type; } + else + { + LOG_WARNING(&Poco::Logger::get("IIRowSchemaReader"), "Couldn't parse schema inference hints: {}. This setting will be ignored", hints_parsing_error); + } } void IIRowSchemaReader::transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type) @@ -137,7 +160,14 @@ NamesAndTypesList IRowSchemaReader::readSchema() if (!new_data_types[field_index] || hints.contains(column_names[field_index])) continue; - chooseResultColumnType(*this, data_types[field_index], new_data_types[field_index], getDefaultType(field_index), std::to_string(field_index + 1), rows_read); + chooseResultColumnType( + *this, + data_types[field_index], + new_data_types[field_index], + getDefaultType(field_index), + std::to_string(field_index + 1), + rows_read, + hints_parsing_error); } } @@ -149,7 +179,7 @@ NamesAndTypesList IRowSchemaReader::readSchema() { transformFinalTypeIfNeeded(data_types[field_index]); /// Check that we could determine the type of this column. - checkFinalInferredType(data_types[field_index], column_names[field_index], format_settings, getDefaultType(field_index), rows_read); + checkFinalInferredType(data_types[field_index], column_names[field_index], format_settings, getDefaultType(field_index), rows_read, hints_parsing_error); } result.emplace_back(column_names[field_index], data_types[field_index]); } @@ -246,7 +276,7 @@ NamesAndTypesList IRowWithNamesSchemaReader::readSchema() continue; auto & type = it->second; - chooseResultColumnType(*this, type, new_type, default_type, name, rows_read); + chooseResultColumnType(*this, type, new_type, default_type, name, rows_read, hints_parsing_error); } } @@ -263,7 +293,7 @@ NamesAndTypesList IRowWithNamesSchemaReader::readSchema() { transformFinalTypeIfNeeded(type); /// Check that we could determine the type of this column. - checkFinalInferredType(type, name, format_settings, default_type, rows_read); + checkFinalInferredType(type, name, format_settings, default_type, rows_read, hints_parsing_error); } result.emplace_back(name, type); } diff --git a/src/Processors/Formats/ISchemaReader.h b/src/Processors/Formats/ISchemaReader.h index edc5c6068c3..81bc94afa6c 100644 --- a/src/Processors/Formats/ISchemaReader.h +++ b/src/Processors/Formats/ISchemaReader.h @@ -65,6 +65,7 @@ protected: String hints_str; FormatSettings format_settings; std::unordered_map hints; + String hints_parsing_error; }; /// Base class for schema inference for formats that read data row by row. @@ -145,7 +146,8 @@ void chooseResultColumnType( DataTypePtr & new_type, const DataTypePtr & default_type, const String & column_name, - size_t row) + size_t row, + const String & hints_parsing_error = "") { if (!type) { @@ -166,14 +168,25 @@ void chooseResultColumnType( type = default_type; else { - throw Exception( - ErrorCodes::TYPE_MISMATCH, - "Automatically defined type {} for column '{}' in row {} differs from type defined by previous rows: {}. " - "You can specify the type for this column using setting schema_inference_hints", - type->getName(), - column_name, - row, - new_type->getName()); + if (hints_parsing_error.empty()) + throw Exception( + ErrorCodes::TYPE_MISMATCH, + "Automatically defined type {} for column '{}' in row {} differs from type defined by previous rows: {}. " + "You can specify the type for this column using setting schema_inference_hints", + type->getName(), + column_name, + row, + new_type->getName()); + else + throw Exception( + ErrorCodes::TYPE_MISMATCH, + "Automatically defined type {} for column '{}' in row {} differs from type defined by previous rows: {}. " + "Column types from setting schema_inference_hints couldn't be parsed because of error: {}", + type->getName(), + column_name, + row, + new_type->getName(), + hints_parsing_error); } } @@ -196,7 +209,13 @@ void chooseResultColumnTypes( chooseResultColumnType(schema_reader, types[i], new_types[i], default_type, column_names[i], row); } -void checkFinalInferredType(DataTypePtr & type, const String & name, const FormatSettings & settings, const DataTypePtr & default_type, size_t rows_read); +void checkFinalInferredType( + DataTypePtr & type, + const String & name, + const FormatSettings & settings, + const DataTypePtr & default_type, + size_t rows_read, + const String & hints_parsing_error); Strings splitColumnNames(const String & column_names_str); diff --git a/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp b/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp index ed963d8a500..cd8facb83eb 100644 --- a/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp @@ -71,13 +71,10 @@ Chunk ArrowBlockInputFormat::generate() ++record_batch_current; - arrow_column_to_ch_column->arrowTableToCHChunk(res, *table_result, (*table_result)->num_rows()); - /// If defaults_for_omitted_fields is true, calculate the default values from default expression for omitted fields. /// Otherwise fill the missing columns with zero values of its type. - if (format_settings.defaults_for_omitted_fields) - for (const auto & column_idx : missing_columns) - block_missing_values.setBits(column_idx, res.getNumRows()); + BlockMissingValues * block_missing_values_ptr = format_settings.defaults_for_omitted_fields ? &block_missing_values : nullptr; + arrow_column_to_ch_column->arrowTableToCHChunk(res, *table_result, (*table_result)->num_rows(), block_missing_values_ptr); return res; } @@ -143,8 +140,8 @@ void ArrowBlockInputFormat::prepareReader() "Arrow", format_settings.arrow.import_nested, format_settings.arrow.allow_missing_columns, + format_settings.null_as_default, format_settings.arrow.case_insensitive_column_matching); - missing_columns = arrow_column_to_ch_column->getMissingColumns(*schema); if (stream) record_batch_total = -1; diff --git a/src/Processors/Formats/Impl/ArrowBlockInputFormat.h b/src/Processors/Formats/Impl/ArrowBlockInputFormat.h index 02648d28048..3db76777891 100644 --- a/src/Processors/Formats/Impl/ArrowBlockInputFormat.h +++ b/src/Processors/Formats/Impl/ArrowBlockInputFormat.h @@ -47,7 +47,6 @@ private: int record_batch_total = 0; int record_batch_current = 0; - std::vector missing_columns; BlockMissingValues block_missing_values; const FormatSettings format_settings; diff --git a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp index ef25b5a332f..f73846f15e6 100644 --- a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp +++ b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -826,16 +827,18 @@ ArrowColumnToCHColumn::ArrowColumnToCHColumn( const std::string & format_name_, bool import_nested_, bool allow_missing_columns_, + bool null_as_default_, bool case_insensitive_matching_) : header(header_) , format_name(format_name_) , import_nested(import_nested_) , allow_missing_columns(allow_missing_columns_) + , null_as_default(null_as_default_) , case_insensitive_matching(case_insensitive_matching_) { } -void ArrowColumnToCHColumn::arrowTableToCHChunk(Chunk & res, std::shared_ptr & table, size_t num_rows) +void ArrowColumnToCHColumn::arrowTableToCHChunk(Chunk & res, std::shared_ptr & table, size_t num_rows, BlockMissingValues * block_missing_values) { NameToColumnPtr name_to_column_ptr; for (auto column_name : table->ColumnNames()) @@ -849,10 +852,10 @@ void ArrowColumnToCHColumn::arrowTableToCHChunk(Chunk & res, std::shared_ptrcloneResized(num_rows); columns_list.push_back(std::move(column.column)); + if (block_missing_values) + block_missing_values->setBits(column_i, num_rows); continue; } } @@ -927,6 +932,9 @@ void ArrowColumnToCHColumn::arrowColumnsToCHChunk(Chunk & res, NameToColumnPtr & arrow_column, header_column.name, format_name, false, dictionary_infos, true, false, skipped, header_column.type); } + if (null_as_default) + insertNullAsDefaultIfNeeded(column, header_column, column_i, block_missing_values); + try { column.column = castColumn(column, header_column.type); @@ -948,28 +956,6 @@ void ArrowColumnToCHColumn::arrowColumnsToCHChunk(Chunk & res, NameToColumnPtr & res.setColumns(columns_list, num_rows); } -std::vector ArrowColumnToCHColumn::getMissingColumns(const arrow::Schema & schema) const -{ - std::vector missing_columns; - auto block_from_arrow = arrowSchemaToCHHeader(schema, format_name, false, &header, case_insensitive_matching); - NestedColumnExtractHelper nested_columns_extractor(block_from_arrow, case_insensitive_matching); - - for (size_t i = 0, columns = header.columns(); i < columns; ++i) - { - const auto & header_column = header.getByPosition(i); - if (!block_from_arrow.has(header_column.name, case_insensitive_matching)) - { - if (!import_nested || !nested_columns_extractor.extractColumn(header_column.name)) - { - if (!allow_missing_columns) - throw Exception{ErrorCodes::THERE_IS_NO_COLUMN, "Column '{}' is not presented in input data.", header_column.name}; - missing_columns.push_back(i); - } - } - } - return missing_columns; -} - } #endif diff --git a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h index dd9f44eb94e..64ff99c70ac 100644 --- a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h +++ b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h @@ -26,14 +26,12 @@ public: const std::string & format_name_, bool import_nested_, bool allow_missing_columns_, + bool null_as_default_, bool case_insensitive_matching_ = false); - void arrowTableToCHChunk(Chunk & res, std::shared_ptr & table, size_t num_rows); + void arrowTableToCHChunk(Chunk & res, std::shared_ptr & table, size_t num_rows, BlockMissingValues * block_missing_values = nullptr); - void arrowColumnsToCHChunk(Chunk & res, NameToColumnPtr & name_to_column_ptr, size_t num_rows); - - /// Get missing columns that exists in header but not in arrow::Schema - std::vector getMissingColumns(const arrow::Schema & schema) const; + void arrowColumnsToCHChunk(Chunk & res, NameToColumnPtr & name_to_column_ptr, size_t num_rows, BlockMissingValues * block_missing_values = nullptr); /// Transform arrow schema to ClickHouse header. If hint_header is provided, /// we will skip columns in schema that are not in hint_header. @@ -58,6 +56,7 @@ private: bool import_nested; /// If false, throw exception if some columns in header not exists in arrow table. bool allow_missing_columns; + bool null_as_default; bool case_insensitive_matching; /// Map {column name : dictionary column}. diff --git a/src/Processors/Formats/Impl/AvroRowInputFormat.cpp b/src/Processors/Formats/Impl/AvroRowInputFormat.cpp index eacd29e0db1..c3ea1b5e23b 100644 --- a/src/Processors/Formats/Impl/AvroRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/AvroRowInputFormat.cpp @@ -176,8 +176,9 @@ AvroDeserializer::DeserializeFn AvroDeserializer::createDeserializeFn(avro::Node { auto & lc_column = assert_cast(column); auto tmp_column = lc_column.getDictionary().getNestedColumn()->cloneEmpty(); - dict_deserialize(*tmp_column, decoder); + auto res = dict_deserialize(*tmp_column, decoder); lc_column.insertFromFullColumn(*tmp_column, 0); + return res; }; } @@ -198,6 +199,7 @@ AvroDeserializer::DeserializeFn AvroDeserializer::createDeserializeFn(avro::Node UUID uuid; parseUUID(reinterpret_cast(tmp.data()), std::reverse_iterator(reinterpret_cast(&uuid) + 16)); assert_cast(column).insertValue(uuid); + return true; }; } if (target.isString() || target.isFixedString()) @@ -206,6 +208,7 @@ AvroDeserializer::DeserializeFn AvroDeserializer::createDeserializeFn(avro::Node { decoder.decodeString(tmp); column.insertData(tmp.c_str(), tmp.length()); + return true; }; } break; @@ -215,6 +218,7 @@ AvroDeserializer::DeserializeFn AvroDeserializer::createDeserializeFn(avro::Node return [target](IColumn & column, avro::Decoder & decoder) { insertNumber(column, target, decoder.decodeInt()); + return true; }; } break; @@ -224,6 +228,7 @@ AvroDeserializer::DeserializeFn AvroDeserializer::createDeserializeFn(avro::Node return [target](IColumn & column, avro::Decoder & decoder) { insertNumber(column, target, decoder.decodeLong()); + return true; }; } break; @@ -233,6 +238,7 @@ AvroDeserializer::DeserializeFn AvroDeserializer::createDeserializeFn(avro::Node return [target](IColumn & column, avro::Decoder & decoder) { insertNumber(column, target, decoder.decodeFloat()); + return true; }; } break; @@ -242,6 +248,7 @@ AvroDeserializer::DeserializeFn AvroDeserializer::createDeserializeFn(avro::Node return [target](IColumn & column, avro::Decoder & decoder) { insertNumber(column, target, decoder.decodeDouble()); + return true; }; } break; @@ -251,6 +258,7 @@ AvroDeserializer::DeserializeFn AvroDeserializer::createDeserializeFn(avro::Node return [target](IColumn & column, avro::Decoder & decoder) { insertNumber(column, target, decoder.decodeBool()); + return true; }; } break; @@ -275,6 +283,7 @@ AvroDeserializer::DeserializeFn AvroDeserializer::createDeserializeFn(avro::Node } } offsets.push_back(offsets.back() + total); + return true; }; } break; @@ -301,24 +310,33 @@ AvroDeserializer::DeserializeFn AvroDeserializer::createDeserializeFn(avro::Node { col.insertDefault(); } + return true; }; } - - /// If the Union is ['Null', Nested-Type], since the Nested-Type can not be inside - /// Nullable, so we will get Nested-Type, instead of Nullable type. - if (null_as_default || !target.isNullable()) + else if (null_as_default) { auto nested_deserialize = this->createDeserializeFn(root_node->leafAt(non_null_union_index), target_type); return [non_null_union_index, nested_deserialize](IColumn & column, avro::Decoder & decoder) { int union_index = static_cast(decoder.decodeUnionIndex()); if (union_index == non_null_union_index) + { nested_deserialize(column, decoder); - else - column.insertDefault(); + return true; + } + column.insertDefault(); + return false; }; } - + else + { + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "Cannot insert Avro Union(Null, {}) into non-nullable type {}. To use default value on NULL, enable setting " + "input_format_null_as_default", + avro::toString(root_node->leafAt(non_null_union_index)->type()), + target_type->getName()); + } } break; } @@ -331,6 +349,7 @@ AvroDeserializer::DeserializeFn AvroDeserializer::createDeserializeFn(avro::Node return [](IColumn &, avro::Decoder & decoder) { decoder.decodeNull(); + return true; }; } else @@ -340,10 +359,26 @@ AvroDeserializer::DeserializeFn AvroDeserializer::createDeserializeFn(avro::Node ColumnNullable & col = assert_cast(column); decoder.decodeNull(); col.insertDefault(); + return true; }; } } - break; + else if (null_as_default) + { + return [](IColumn & column, avro::Decoder & decoder) + { + decoder.decodeNull(); + column.insertDefault(); + return false; + }; + } + else + { + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "Cannot insert Avro Null into non-nullable type {}. To use default value on NULL, enable setting " + "input_format_null_as_default", target_type->getName()); + } case avro::AVRO_ENUM: if (target.isString()) { @@ -358,6 +393,7 @@ AvroDeserializer::DeserializeFn AvroDeserializer::createDeserializeFn(avro::Node size_t enum_index = decoder.decodeEnum(); const auto & enum_symbol = symbols[enum_index]; column.insertData(enum_symbol.c_str(), enum_symbol.length()); + return true; }; } if (target.isEnum()) @@ -372,6 +408,7 @@ AvroDeserializer::DeserializeFn AvroDeserializer::createDeserializeFn(avro::Node { size_t enum_index = decoder.decodeEnum(); column.insert(symbol_mapping[enum_index]); + return true; }; } break; @@ -384,6 +421,7 @@ AvroDeserializer::DeserializeFn AvroDeserializer::createDeserializeFn(avro::Node { decoder.decodeFixed(tmp_fixed.size(), tmp_fixed); column.insertData(reinterpret_cast(tmp_fixed.data()), tmp_fixed.size()); + return true; }; } break; @@ -415,6 +453,7 @@ AvroDeserializer::DeserializeFn AvroDeserializer::createDeserializeFn(avro::Node auto nested_columns = column_tuple.getColumns(); for (const auto & [nested_deserializer, pos] : nested_deserializers) nested_deserializer(*nested_columns[pos], decoder); + return true; }; } break; @@ -449,6 +488,7 @@ AvroDeserializer::DeserializeFn AvroDeserializer::createDeserializeFn(avro::Node } } offsets.push_back(offsets.back() + total); + return true; }; } break; @@ -465,6 +505,7 @@ AvroDeserializer::DeserializeFn AvroDeserializer::createDeserializeFn(avro::Node ColumnNullable & col = assert_cast(column); nested_deserialize(col.getNestedColumn(), decoder); col.getNullMapData().push_back(0); + return true; }; } @@ -593,7 +634,6 @@ void AvroDeserializer::Action::deserializeNested(MutableColumns & columns, avro: ColumnArray & column_array = assert_cast(*columns[index]); arrays_offsets.push_back(&column_array.getOffsets()); nested_columns.push_back(&column_array.getData()); - ext.read_columns[index] = true; } size_t total = 0; @@ -603,7 +643,7 @@ void AvroDeserializer::Action::deserializeNested(MutableColumns & columns, avro: for (size_t i = 0; i < n; ++i) { for (size_t j = 0; j != nested_deserializers.size(); ++j) - nested_deserializers[j](*nested_columns[j], decoder); + ext.read_columns[nested_column_indexes[j]] = nested_deserializers[j](*nested_columns[j], decoder); } } @@ -738,14 +778,14 @@ AvroDeserializer::AvroDeserializer(const Block & header, avro::ValidSchema schem void AvroDeserializer::deserializeRow(MutableColumns & columns, avro::Decoder & decoder, RowReadExtension & ext) const { + size_t row = columns[0]->size() + 1; ext.read_columns.assign(columns.size(), false); row_action.execute(columns, decoder, ext); for (size_t i = 0; i < ext.read_columns.size(); ++i) { - if (!ext.read_columns[i]) - { + /// Insert default in missing columns. + if (columns[i]->size() != row) columns[i]->insertDefault(); - } } } @@ -759,7 +799,7 @@ void AvroRowInputFormat::readPrefix() { file_reader_ptr = std::make_unique(std::make_unique(*in)); deserializer_ptr = std::make_unique( - output.getHeader(), file_reader_ptr->dataSchema(), format_settings.avro.allow_missing_fields, format_settings.avro.null_as_default); + output.getHeader(), file_reader_ptr->dataSchema(), format_settings.avro.allow_missing_fields, format_settings.null_as_default); file_reader_ptr->init(); } @@ -950,7 +990,7 @@ const AvroDeserializer & AvroConfluentRowInputFormat::getOrCreateDeserializer(Sc { auto schema = schema_registry->getSchema(schema_id); AvroDeserializer deserializer( - output.getHeader(), schema, format_settings.avro.allow_missing_fields, format_settings.avro.null_as_default); + output.getHeader(), schema, format_settings.avro.allow_missing_fields, format_settings.null_as_default); it = deserializer_cache.emplace(schema_id, deserializer).first; } return it->second; diff --git a/src/Processors/Formats/Impl/AvroRowInputFormat.h b/src/Processors/Formats/Impl/AvroRowInputFormat.h index d25317302d7..25589880c14 100644 --- a/src/Processors/Formats/Impl/AvroRowInputFormat.h +++ b/src/Processors/Formats/Impl/AvroRowInputFormat.h @@ -52,8 +52,8 @@ public: void deserializeRow(MutableColumns & columns, avro::Decoder & decoder, RowReadExtension & ext) const; private: - using DeserializeFn = std::function; - using DeserializeNestedFn = std::function; + using DeserializeFn = std::function; + using DeserializeNestedFn = std::function; using SkipFn = std::function; DeserializeFn createDeserializeFn(avro::NodePtr root_node, DataTypePtr target_type); @@ -103,8 +103,7 @@ private: case Noop: break; case Deserialize: - deserialize_fn(*columns[target_column_idx], decoder); - ext.read_columns[target_column_idx] = true; + ext.read_columns[target_column_idx] = deserialize_fn(*columns[target_column_idx], decoder); break; case Skip: skip_fn(decoder); diff --git a/src/Processors/Formats/Impl/BSONEachRowRowInputFormat.cpp b/src/Processors/Formats/Impl/BSONEachRowRowInputFormat.cpp index c4ffce2bc65..5d747888b92 100644 --- a/src/Processors/Formats/Impl/BSONEachRowRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/BSONEachRowRowInputFormat.cpp @@ -773,6 +773,14 @@ bool BSONEachRowRowInputFormat::readRow(MutableColumns & columns, RowReadExtensi return true; } +void BSONEachRowRowInputFormat::resetParser() +{ + IRowInputFormat::resetParser(); + read_columns.clear(); + seen_columns.clear(); + prev_positions.clear(); +} + BSONEachRowSchemaReader::BSONEachRowSchemaReader(ReadBuffer & in_, const FormatSettings & settings_) : IRowWithNamesSchemaReader(in_, settings_) { diff --git a/src/Processors/Formats/Impl/BSONEachRowRowInputFormat.h b/src/Processors/Formats/Impl/BSONEachRowRowInputFormat.h index d0830ca2781..ad6d712b6dd 100644 --- a/src/Processors/Formats/Impl/BSONEachRowRowInputFormat.h +++ b/src/Processors/Formats/Impl/BSONEachRowRowInputFormat.h @@ -54,7 +54,7 @@ public: ReadBuffer & in_, const Block & header_, Params params_, const FormatSettings & format_settings_); String getName() const override { return "BSONEachRowRowInputFormat"; } - void resetParser() override { } + void resetParser() override; private: void readPrefix() override { } diff --git a/src/Processors/Formats/Impl/CSVRowInputFormat.cpp b/src/Processors/Formats/Impl/CSVRowInputFormat.cpp index ad8d88a2b65..de955d81651 100644 --- a/src/Processors/Formats/Impl/CSVRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/CSVRowInputFormat.cpp @@ -102,6 +102,12 @@ void CSVRowInputFormat::setReadBuffer(ReadBuffer & in_) buf->setSubBuffer(in_); } +void CSVRowInputFormat::resetParser() +{ + RowInputFormatWithNamesAndTypes::resetParser(); + buf->reset(); +} + static void skipEndOfLine(ReadBuffer & in) { /// \n (Unix) or \r\n (DOS/Windows) or \n\r (Mac OS Classic) diff --git a/src/Processors/Formats/Impl/CSVRowInputFormat.h b/src/Processors/Formats/Impl/CSVRowInputFormat.h index 86f7fe3466c..f51f674e4af 100644 --- a/src/Processors/Formats/Impl/CSVRowInputFormat.h +++ b/src/Processors/Formats/Impl/CSVRowInputFormat.h @@ -27,6 +27,7 @@ public: String getName() const override { return "CSVRowInputFormat"; } void setReadBuffer(ReadBuffer & in_) override; + void resetParser() override; protected: CSVRowInputFormat(const Block & header_, std::shared_ptr in_, const Params & params_, diff --git a/src/Processors/Formats/Impl/JSONColumnsBlockInputFormatBase.cpp b/src/Processors/Formats/Impl/JSONColumnsBlockInputFormatBase.cpp index 204a5077e31..a39722950e4 100644 --- a/src/Processors/Formats/Impl/JSONColumnsBlockInputFormatBase.cpp +++ b/src/Processors/Formats/Impl/JSONColumnsBlockInputFormatBase.cpp @@ -182,7 +182,7 @@ JSONColumnsSchemaReaderBase::JSONColumnsSchemaReaderBase( void JSONColumnsSchemaReaderBase::setContext(ContextPtr & ctx) { ColumnsDescription columns; - if (tryParseColumnsListFromString(hints_str, columns, ctx)) + if (tryParseColumnsListFromString(hints_str, columns, ctx, hints_parsing_error)) { for (const auto & [name, type] : columns.getAll()) hints[name] = type; @@ -238,7 +238,7 @@ NamesAndTypesList JSONColumnsSchemaReaderBase::readSchema() rows_in_block = 0; auto column_type = readColumnAndGetDataType( column_name, rows_in_block, format_settings.max_rows_to_read_for_schema_inference - total_rows_read); - chooseResultColumnType(*this, names_to_types[column_name], column_type, nullptr, column_name, total_rows_read + 1); + chooseResultColumnType(*this, names_to_types[column_name], column_type, nullptr, column_name, total_rows_read + 1, hints_parsing_error); } ++iteration; @@ -260,7 +260,7 @@ NamesAndTypesList JSONColumnsSchemaReaderBase::readSchema() { transformJSONTupleToArrayIfPossible(type, format_settings, &inference_info); /// Check that we could determine the type of this column. - checkFinalInferredType(type, name, format_settings, nullptr, format_settings.max_rows_to_read_for_schema_inference); + checkFinalInferredType(type, name, format_settings, nullptr, format_settings.max_rows_to_read_for_schema_inference, hints_parsing_error); } result.emplace_back(name, type); } diff --git a/src/Processors/Formats/Impl/JSONColumnsBlockInputFormatBase.h b/src/Processors/Formats/Impl/JSONColumnsBlockInputFormatBase.h index 3292b5649c9..2babc0734f9 100644 --- a/src/Processors/Formats/Impl/JSONColumnsBlockInputFormatBase.h +++ b/src/Processors/Formats/Impl/JSONColumnsBlockInputFormatBase.h @@ -91,6 +91,7 @@ private: const FormatSettings format_settings; String hints_str; std::unordered_map hints; + String hints_parsing_error; std::unique_ptr reader; Names column_names_from_settings; JSONInferenceInfo inference_info; diff --git a/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp b/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp index f337eedbb05..488f4ff9a73 100644 --- a/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp @@ -45,11 +45,11 @@ namespace ErrorCodes extern const int UNEXPECTED_END_OF_FILE; } -MsgPackRowInputFormat::MsgPackRowInputFormat(const Block & header_, ReadBuffer & in_, Params params_) - : MsgPackRowInputFormat(header_, std::make_unique(in_), params_) {} +MsgPackRowInputFormat::MsgPackRowInputFormat(const Block & header_, ReadBuffer & in_, Params params_, const FormatSettings & settings) + : MsgPackRowInputFormat(header_, std::make_unique(in_), params_, settings) {} -MsgPackRowInputFormat::MsgPackRowInputFormat(const Block & header_, std::unique_ptr buf_, Params params_) - : IRowInputFormat(header_, *buf_, std::move(params_)), buf(std::move(buf_)), parser(visitor), data_types(header_.getDataTypes()) {} +MsgPackRowInputFormat::MsgPackRowInputFormat(const Block & header_, std::unique_ptr buf_, Params params_, const FormatSettings & settings) + : IRowInputFormat(header_, *buf_, std::move(params_)), buf(std::move(buf_)), visitor(settings.null_as_default), parser(visitor), data_types(header_.getDataTypes()) {} void MsgPackRowInputFormat::resetParser() { @@ -58,13 +58,13 @@ void MsgPackRowInputFormat::resetParser() visitor.reset(); } -void MsgPackVisitor::set_info(IColumn & column, DataTypePtr type) // NOLINT +void MsgPackVisitor::set_info(IColumn & column, DataTypePtr type, UInt8 & read) // NOLINT { while (!info_stack.empty()) { info_stack.pop(); } - info_stack.push(Info{column, type}); + info_stack.push(Info{column, type, &read}); } void MsgPackVisitor::reset() @@ -228,11 +228,11 @@ static void insertFloat64(IColumn & column, DataTypePtr type, Float64 value) // assert_cast(column).insertValue(value); } -static void insertNull(IColumn & column, DataTypePtr type) +static void insertNull(IColumn & column, DataTypePtr type, UInt8 * read, bool null_as_default) { auto insert_func = [&](IColumn & column_, DataTypePtr type_) { - insertNull(column_, type_); + insertNull(column_, type_, read, null_as_default); }; /// LowCardinality(Nullable(...)) @@ -240,7 +240,16 @@ static void insertNull(IColumn & column, DataTypePtr type) return; if (!type->isNullable()) - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot insert MessagePack null into non-nullable column with type {}.", type->getName()); + { + if (!null_as_default) + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, "Cannot insert MessagePack null into non-nullable column with type {}.", type->getName()); + column.insertDefault(); + /// In case of default on null column can have defined DEFAULT expression that should be used. + if (read) + *read = false; + return; + } assert_cast(column).insertDefault(); } @@ -316,7 +325,7 @@ bool MsgPackVisitor::start_array(size_t size) // NOLINT ColumnArray::Offsets & offsets = column_array.getOffsets(); IColumn & nested_column = column_array.getData(); offsets.push_back(offsets.back() + size); - info_stack.push(Info{nested_column, nested_type}); + info_stack.push(Info{nested_column, nested_type, nullptr}); return true; } @@ -340,7 +349,7 @@ bool MsgPackVisitor::start_map_key() // NOLINT { auto key_column = assert_cast(info_stack.top().column).getNestedData().getColumns()[0]; auto key_type = assert_cast(*info_stack.top().type).getKeyType(); - info_stack.push(Info{*key_column, key_type}); + info_stack.push(Info{*key_column, key_type, nullptr}); return true; } @@ -354,7 +363,7 @@ bool MsgPackVisitor::start_map_value() // NOLINT { auto value_column = assert_cast(info_stack.top().column).getNestedData().getColumns()[1]; auto value_type = assert_cast(*info_stack.top().type).getValueType(); - info_stack.push(Info{*value_column, value_type}); + info_stack.push(Info{*value_column, value_type, nullptr}); return true; } @@ -366,7 +375,7 @@ bool MsgPackVisitor::end_map_value() // NOLINT bool MsgPackVisitor::visit_nil() { - insertNull(info_stack.top().column, info_stack.top().type); + insertNull(info_stack.top().column, info_stack.top().type, info_stack.top().read, null_as_default); return true; } @@ -407,13 +416,14 @@ bool MsgPackRowInputFormat::readObject() return true; } -bool MsgPackRowInputFormat::readRow(MutableColumns & columns, RowReadExtension &) +bool MsgPackRowInputFormat::readRow(MutableColumns & columns, RowReadExtension & ext) { size_t column_index = 0; bool has_more_data = true; + ext.read_columns.resize(columns.size(), true); for (; column_index != columns.size(); ++column_index) { - visitor.set_info(*columns[column_index], data_types[column_index]); + visitor.set_info(*columns[column_index], data_types[column_index], ext.read_columns[column_index]); has_more_data = readObject(); if (!has_more_data) break; @@ -547,9 +557,9 @@ void registerInputFormatMsgPack(FormatFactory & factory) ReadBuffer & buf, const Block & sample, const RowInputFormatParams & params, - const FormatSettings &) + const FormatSettings & settings) { - return std::make_shared(sample, buf, params); + return std::make_shared(sample, buf, params, settings); }); factory.registerFileExtension("messagepack", "MsgPack"); } diff --git a/src/Processors/Formats/Impl/MsgPackRowInputFormat.h b/src/Processors/Formats/Impl/MsgPackRowInputFormat.h index 64bb8b569e0..5eaa3719d0c 100644 --- a/src/Processors/Formats/Impl/MsgPackRowInputFormat.h +++ b/src/Processors/Formats/Impl/MsgPackRowInputFormat.h @@ -19,10 +19,13 @@ class ReadBuffer; class MsgPackVisitor : public msgpack::null_visitor { public: + MsgPackVisitor(bool null_as_default_) : null_as_default(null_as_default_) {} + struct Info { IColumn & column; DataTypePtr type; + UInt8 * read; }; /// These functions are called when parser meets corresponding object in parsed data @@ -47,25 +50,26 @@ public: [[noreturn]] void parse_error(size_t parsed_offset, size_t error_offset); /// Update info_stack - void set_info(IColumn & column, DataTypePtr type); + void set_info(IColumn & column, DataTypePtr type, UInt8 & read); void reset(); private: /// Stack is needed to process arrays and maps std::stack info_stack; + bool null_as_default; }; class MsgPackRowInputFormat : public IRowInputFormat { public: - MsgPackRowInputFormat(const Block & header_, ReadBuffer & in_, Params params_); + MsgPackRowInputFormat(const Block & header_, ReadBuffer & in_, Params params_, const FormatSettings & settings); String getName() const override { return "MagPackRowInputFormat"; } void resetParser() override; void setReadBuffer(ReadBuffer & in_) override; private: - MsgPackRowInputFormat(const Block & header_, std::unique_ptr buf_, Params params_); + MsgPackRowInputFormat(const Block & header_, std::unique_ptr buf_, Params params_, const FormatSettings & settings); bool readRow(MutableColumns & columns, RowReadExtension & ext) override; diff --git a/src/Processors/Formats/Impl/NativeFormat.cpp b/src/Processors/Formats/Impl/NativeFormat.cpp index 4b74cb882c6..bd1b13ce2ef 100644 --- a/src/Processors/Formats/Impl/NativeFormat.cpp +++ b/src/Processors/Formats/Impl/NativeFormat.cpp @@ -17,7 +17,13 @@ class NativeInputFormat final : public IInputFormat public: NativeInputFormat(ReadBuffer & buf, const Block & header_, const FormatSettings & settings) : IInputFormat(header_, buf) - , reader(std::make_unique(buf, header_, 0, settings.skip_unknown_fields)) + , reader(std::make_unique( + buf, + header_, + 0, + settings.skip_unknown_fields, + settings.null_as_default, + settings.defaults_for_omitted_fields ? &block_missing_values : nullptr)) , header(header_) {} String getName() const override { return "Native"; } @@ -30,6 +36,7 @@ public: Chunk generate() override { + block_missing_values.clear(); auto block = reader->read(); if (!block) return {}; @@ -47,9 +54,12 @@ public: IInputFormat::setReadBuffer(in_); } + const BlockMissingValues & getMissingValues() const override { return block_missing_values; } + private: std::unique_ptr reader; Block header; + BlockMissingValues block_missing_values; }; class NativeOutputFormat final : public IOutputFormat diff --git a/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp b/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp index 2e45d817506..03f056e22b3 100644 --- a/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp @@ -67,12 +67,10 @@ Chunk ORCBlockInputFormat::generate() ++stripe_current; Chunk res; - arrow_column_to_ch_column->arrowTableToCHChunk(res, table, num_rows); /// If defaults_for_omitted_fields is true, calculate the default values from default expression for omitted fields. /// Otherwise fill the missing columns with zero values of its type. - if (format_settings.defaults_for_omitted_fields) - for (const auto & column_idx : missing_columns) - block_missing_values.setBits(column_idx, res.getNumRows()); + BlockMissingValues * block_missing_values_ptr = format_settings.defaults_for_omitted_fields ? &block_missing_values : nullptr; + arrow_column_to_ch_column->arrowTableToCHChunk(res, table, num_rows, block_missing_values_ptr); return res; } @@ -128,8 +126,8 @@ void ORCBlockInputFormat::prepareReader() "ORC", format_settings.orc.import_nested, format_settings.orc.allow_missing_columns, + format_settings.null_as_default, format_settings.orc.case_insensitive_column_matching); - missing_columns = arrow_column_to_ch_column->getMissingColumns(*schema); ArrowFieldIndexUtil field_util( format_settings.orc.case_insensitive_column_matching, diff --git a/src/Processors/Formats/Impl/ORCBlockInputFormat.h b/src/Processors/Formats/Impl/ORCBlockInputFormat.h index bc2abe41cc1..3d8bc781278 100644 --- a/src/Processors/Formats/Impl/ORCBlockInputFormat.h +++ b/src/Processors/Formats/Impl/ORCBlockInputFormat.h @@ -49,7 +49,6 @@ private: // indices of columns to read from ORC file std::vector include_indices; - std::vector missing_columns; BlockMissingValues block_missing_values; const FormatSettings format_settings; diff --git a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp index d2ec3c02eed..fca097d8ea7 100644 --- a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp @@ -71,7 +71,10 @@ Chunk ParquetBlockInputFormat::generate() if (*batch) { auto tmp_table = arrow::Table::FromRecordBatches({*batch}); - arrow_column_to_ch_column->arrowTableToCHChunk(res, *tmp_table, (*tmp_table)->num_rows()); + /// If defaults_for_omitted_fields is true, calculate the default values from default expression for omitted fields. + /// Otherwise fill the missing columns with zero values of its type. + BlockMissingValues * block_missing_values_ptr = format_settings.defaults_for_omitted_fields ? &block_missing_values : nullptr; + arrow_column_to_ch_column->arrowTableToCHChunk(res, *tmp_table, (*tmp_table)->num_rows(), block_missing_values_ptr); } else { @@ -80,12 +83,6 @@ Chunk ParquetBlockInputFormat::generate() return {}; } - /// If defaults_for_omitted_fields is true, calculate the default values from default expression for omitted fields. - /// Otherwise fill the missing columns with zero values of its type. - if (format_settings.defaults_for_omitted_fields) - for (const auto & column_idx : missing_columns) - block_missing_values.setBits(column_idx, res.getNumRows()); - return res; } @@ -133,8 +130,8 @@ void ParquetBlockInputFormat::prepareReader() "Parquet", format_settings.parquet.import_nested, format_settings.parquet.allow_missing_columns, + format_settings.null_as_default, format_settings.parquet.case_insensitive_column_matching); - missing_columns = arrow_column_to_ch_column->getMissingColumns(*schema); ArrowFieldIndexUtil field_util( format_settings.parquet.case_insensitive_column_matching, diff --git a/src/Processors/Formats/Impl/ParquetBlockInputFormat.h b/src/Processors/Formats/Impl/ParquetBlockInputFormat.h index 37878a94dd9..afc46939c79 100644 --- a/src/Processors/Formats/Impl/ParquetBlockInputFormat.h +++ b/src/Processors/Formats/Impl/ParquetBlockInputFormat.h @@ -42,7 +42,6 @@ private: // indices of columns to read from Parquet file std::vector column_indices; std::unique_ptr arrow_column_to_ch_column; - std::vector missing_columns; BlockMissingValues block_missing_values; const FormatSettings format_settings; const std::unordered_set & skip_row_groups; diff --git a/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp b/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp index 0fce98f8a11..18c81f8fd6a 100644 --- a/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp @@ -16,6 +16,21 @@ namespace ErrorCodes extern const int UNKNOWN_EXCEPTION; } +static parquet::ParquetVersion::type getParquetVersion(const FormatSettings & settings) +{ + switch (settings.parquet.output_version) + { + case FormatSettings::ParquetVersion::V1_0: + return parquet::ParquetVersion::PARQUET_1_0; + case FormatSettings::ParquetVersion::V2_4: + return parquet::ParquetVersion::PARQUET_2_4; + case FormatSettings::ParquetVersion::V2_6: + return parquet::ParquetVersion::PARQUET_2_6; + case FormatSettings::ParquetVersion::V2_LATEST: + return parquet::ParquetVersion::PARQUET_2_LATEST; + } +} + ParquetBlockOutputFormat::ParquetBlockOutputFormat(WriteBuffer & out_, const Block & header_, const FormatSettings & format_settings_) : IOutputFormat(header_, out_), format_settings{format_settings_} { @@ -44,6 +59,7 @@ void ParquetBlockOutputFormat::consume(Chunk chunk) auto sink = std::make_shared(out); parquet::WriterProperties::Builder builder; + builder.version(getParquetVersion(format_settings)); #if USE_SNAPPY builder.compression(parquet::Compression::SNAPPY); #endif diff --git a/src/Processors/Formats/Impl/ProtobufRowInputFormat.cpp b/src/Processors/Formats/Impl/ProtobufRowInputFormat.cpp index 40f6a2a54a7..ee60501dba5 100644 --- a/src/Processors/Formats/Impl/ProtobufRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/ProtobufRowInputFormat.cpp @@ -13,21 +13,31 @@ namespace DB ProtobufRowInputFormat::ProtobufRowInputFormat(ReadBuffer & in_, const Block & header_, const Params & params_, const FormatSchemaInfo & schema_info_, bool with_length_delimiter_, bool flatten_google_wrappers_) : IRowInputFormat(header_, in_, params_) - , reader(std::make_unique(in_)) - , serializer(ProtobufSerializer::create( - header_.getNames(), - header_.getDataTypes(), - missing_column_indices, - *ProtobufSchemas::instance().getMessageTypeForFormatSchema(schema_info_, ProtobufSchemas::WithEnvelope::No), - with_length_delimiter_, - /* with_envelope = */ false, - flatten_google_wrappers_, - *reader)) + , message_descriptor(ProtobufSchemas::instance().getMessageTypeForFormatSchema(schema_info_, ProtobufSchemas::WithEnvelope::No)) + , with_length_delimiter(with_length_delimiter_) + , flatten_google_wrappers(flatten_google_wrappers_) { } +void ProtobufRowInputFormat::createReaderAndSerializer() +{ + reader = std::make_unique(*in); + serializer = ProtobufSerializer::create( + getPort().getHeader().getNames(), + getPort().getHeader().getDataTypes(), + missing_column_indices, + *message_descriptor, + with_length_delimiter, + /* with_envelope = */ false, + flatten_google_wrappers, + *reader); +} + bool ProtobufRowInputFormat::readRow(MutableColumns & columns, RowReadExtension & row_read_extension) { + if (!reader) + createReaderAndSerializer(); + if (reader->eof()) return false; @@ -46,7 +56,8 @@ bool ProtobufRowInputFormat::readRow(MutableColumns & columns, RowReadExtension void ProtobufRowInputFormat::setReadBuffer(ReadBuffer & in_) { - reader->setReadBuffer(in_); + if (reader) + reader->setReadBuffer(in_); IRowInputFormat::setReadBuffer(in_); } @@ -60,6 +71,13 @@ void ProtobufRowInputFormat::syncAfterError() reader->endMessage(true); } +void ProtobufRowInputFormat::resetParser() +{ + IRowInputFormat::resetParser(); + serializer.reset(); + reader.reset(); +} + void registerInputFormatProtobuf(FormatFactory & factory) { for (bool with_length_delimiter : {false, true}) diff --git a/src/Processors/Formats/Impl/ProtobufRowInputFormat.h b/src/Processors/Formats/Impl/ProtobufRowInputFormat.h index 2e0ed49b768..5c042f7c5ab 100644 --- a/src/Processors/Formats/Impl/ProtobufRowInputFormat.h +++ b/src/Processors/Formats/Impl/ProtobufRowInputFormat.h @@ -6,6 +6,7 @@ # include # include # include +# include namespace DB { @@ -39,15 +40,22 @@ public: String getName() const override { return "ProtobufRowInputFormat"; } void setReadBuffer(ReadBuffer & in_) override; + void resetParser() override; private: bool readRow(MutableColumns & columns, RowReadExtension & row_read_extension) override; bool allowSyncAfterError() const override; void syncAfterError() override; + void createReaderAndSerializer(); + std::unique_ptr reader; std::vector missing_column_indices; std::unique_ptr serializer; + + const google::protobuf::Descriptor * message_descriptor; + bool with_length_delimiter; + bool flatten_google_wrappers; }; class ProtobufSchemaReader : public IExternalSchemaReader diff --git a/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp b/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp index 448b6e26387..d3021110b46 100644 --- a/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp @@ -193,7 +193,10 @@ bool TSKVRowInputFormat::readRow(MutableColumns & columns, RowReadExtension & ex header.getByPosition(i).type->insertDefaultInto(*columns[i]); /// return info about defaults set - ext.read_columns = read_columns; + if (format_settings.defaults_for_omitted_fields) + ext.read_columns = read_columns; + else + ext.read_columns.assign(num_columns, true); return true; } diff --git a/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp b/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp index 23a0f3a0bc2..af5f1f90732 100644 --- a/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp @@ -70,6 +70,12 @@ void TabSeparatedRowInputFormat::setReadBuffer(ReadBuffer & in_) buf->setSubBuffer(in_); } +void TabSeparatedRowInputFormat::resetParser() +{ + RowInputFormatWithNamesAndTypes::resetParser(); + buf->reset(); +} + TabSeparatedFormatReader::TabSeparatedFormatReader(PeekableReadBuffer & in_, const FormatSettings & format_settings_, bool is_raw_) : FormatWithNamesAndTypesReader(in_, format_settings_), buf(&in_), is_raw(is_raw_) { diff --git a/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.h b/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.h index 9edcf86b5de..0f4bff8d7d0 100644 --- a/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.h +++ b/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.h @@ -23,6 +23,7 @@ public: String getName() const override { return "TabSeparatedRowInputFormat"; } void setReadBuffer(ReadBuffer & in_) override; + void resetParser() override; private: TabSeparatedRowInputFormat(const Block & header_, std::unique_ptr in_, const Params & params_, diff --git a/src/Processors/QueryPlan/Optimizations/removeRedundantSorting.cpp b/src/Processors/QueryPlan/Optimizations/removeRedundantSorting.cpp index 20d964dcb4f..41e30dee83e 100644 --- a/src/Processors/QueryPlan/Optimizations/removeRedundantSorting.cpp +++ b/src/Processors/QueryPlan/Optimizations/removeRedundantSorting.cpp @@ -188,7 +188,14 @@ private: return false; /// remove sorting - parent_node->children.front() = sorting_node->children.front(); + for (auto & child : parent_node->children) + { + if (child == sorting_node) + { + child = sorting_node->children.front(); + break; + } + } /// sorting removed, so need to update sorting traits for upstream steps const DataStream * input_stream = &parent_node->children.front()->step->getOutputStream(); diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index c70f6c6da30..2e19f21a69d 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -196,7 +196,10 @@ ReadFromMergeTree::ReadFromMergeTree( /// When async reading is enabled, allow to read using more streams. /// Will add resize to output_streams_limit to reduce memory usage. output_streams_limit = std::min(requested_num_streams, settings.max_streams_for_merge_tree_reading); - requested_num_streams = std::max(requested_num_streams, settings.max_streams_for_merge_tree_reading); + /// We intentionally set `max_streams` to 1 in InterpreterSelectQuery in case of small limit. + /// Changing it here to `max_streams_for_merge_tree_reading` proven itself as a threat for performance. + if (requested_num_streams != 1) + requested_num_streams = std::max(requested_num_streams, settings.max_streams_for_merge_tree_reading); } else /// Just limit requested_num_streams otherwise. diff --git a/src/Processors/Transforms/FillingTransform.cpp b/src/Processors/Transforms/FillingTransform.cpp index bfee5ed8bf9..e0c79d50141 100644 --- a/src/Processors/Transforms/FillingTransform.cpp +++ b/src/Processors/Transforms/FillingTransform.cpp @@ -269,6 +269,9 @@ void FillingTransform::transform(Chunk & chunk) if (on_totals) return; + if (!chunk.hasRows() && !generate_suffix) + return; + Columns old_fill_columns; Columns old_interpolate_columns; Columns old_other_columns; diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp index a58d70a8428..870106d794f 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.cpp +++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp @@ -387,7 +387,8 @@ Chain buildPushingToViewsChain( chains.emplace_back(std::move(out)); /// Add the view to the query access info so it can appear in system.query_log - if (!no_destination) + /// hasQueryContext - for materialized tables with background replication process query context is not added + if (!no_destination && context->hasQueryContext()) { context->getQueryContext()->addQueryAccessInfo( backQuoteIfNeed(view_id.getDatabaseName()), views_data->views.back().runtime_stats->target_name, {}, "", view_id.getFullTableName()); @@ -757,7 +758,6 @@ IProcessor::Status FinalizingViewsTransform::prepare() output.finish(); return Status::Finished; } - return Status::NeedData; } diff --git a/src/Processors/Transforms/getSourceFromASTInsertQuery.cpp b/src/Processors/Transforms/getSourceFromASTInsertQuery.cpp index ab7cfca3de2..6c7c7447070 100644 --- a/src/Processors/Transforms/getSourceFromASTInsertQuery.cpp +++ b/src/Processors/Transforms/getSourceFromASTInsertQuery.cpp @@ -64,15 +64,13 @@ InputFormatPtr getInputFormatFromASTInsertQuery( return source; } -Pipe getSourceFromASTInsertQuery( +Pipe getSourceFromInputFormat( const ASTPtr & ast, - bool with_buffers, - const Block & header, + InputFormatPtr format, ContextPtr context, const ASTPtr & input_function) { - auto source = getInputFormatFromASTInsertQuery(ast, with_buffers, header, context, input_function); - Pipe pipe(source); + Pipe pipe(format); const auto * ast_insert_query = ast->as(); if (context->getSettingsRef().input_format_defaults_for_omitted_fields && ast_insert_query->table_id && !input_function) @@ -84,7 +82,7 @@ Pipe getSourceFromASTInsertQuery( { pipe.addSimpleTransform([&](const Block & cur_header) { - return std::make_shared(cur_header, columns, *source, context); + return std::make_shared(cur_header, columns, *format, context); }); } } @@ -92,6 +90,17 @@ Pipe getSourceFromASTInsertQuery( return pipe; } +Pipe getSourceFromASTInsertQuery( + const ASTPtr & ast, + bool with_buffers, + const Block & header, + ContextPtr context, + const ASTPtr & input_function) +{ + auto format = getInputFormatFromASTInsertQuery(ast, with_buffers, header, context, input_function); + return getSourceFromInputFormat(ast, std::move(format), std::move(context), input_function); +} + std::unique_ptr getReadBufferFromASTInsertQuery(const ASTPtr & ast) { const auto * insert_query = ast->as(); diff --git a/src/Processors/Transforms/getSourceFromASTInsertQuery.h b/src/Processors/Transforms/getSourceFromASTInsertQuery.h index 4a5ed952efc..dc541873972 100644 --- a/src/Processors/Transforms/getSourceFromASTInsertQuery.h +++ b/src/Processors/Transforms/getSourceFromASTInsertQuery.h @@ -14,19 +14,27 @@ class Pipe; /// Prepares a input format, which produce data containing in INSERT query. InputFormatPtr getInputFormatFromASTInsertQuery( - const ASTPtr & ast, - bool with_buffers, - const Block & header, - ContextPtr context, - const ASTPtr & input_function); + const ASTPtr & ast, + bool with_buffers, + const Block & header, + ContextPtr context, + const ASTPtr & input_function); + +/// Prepares a pipe from input format got from ASTInsertQuery, +/// which produce data containing in INSERT query. +Pipe getSourceFromInputFormat( + const ASTPtr & ast, + InputFormatPtr format, + ContextPtr context, + const ASTPtr & input_function); /// Prepares a pipe which produce data containing in INSERT query. Pipe getSourceFromASTInsertQuery( - const ASTPtr & ast, - bool with_buffers, - const Block & header, - ContextPtr context, - const ASTPtr & input_function); + const ASTPtr & ast, + bool with_buffers, + const Block & header, + ContextPtr context, + const ASTPtr & input_function); class ReadBuffer; diff --git a/src/QueryPipeline/RemoteQueryExecutor.cpp b/src/QueryPipeline/RemoteQueryExecutor.cpp index 5c9995a418d..a10d70d22e9 100644 --- a/src/QueryPipeline/RemoteQueryExecutor.cpp +++ b/src/QueryPipeline/RemoteQueryExecutor.cpp @@ -322,7 +322,7 @@ RemoteQueryExecutor::ReadResult RemoteQueryExecutor::read(std::unique_ptr(*connections); } - do + while (true) { if (!read_context->resumeRoutine()) return ReadResult(Block()); @@ -349,7 +349,6 @@ RemoteQueryExecutor::ReadResult RemoteQueryExecutor::read(std::unique_ptr #include +#include + +namespace fs = std::filesystem; #if USE_SSL #include @@ -619,7 +622,7 @@ void HTTPHandler::processQuery( if (buffer_until_eof) { const std::string tmp_path(server.context()->getTemporaryVolume()->getDisk()->getPath()); - const std::string tmp_path_template(tmp_path + "http_buffers/"); + const std::string tmp_path_template(fs::path(tmp_path) / "http_buffers/"); auto create_tmp_disk_buffer = [tmp_path_template] (const WriteBufferPtr &) { @@ -831,12 +834,20 @@ void HTTPHandler::processQuery( customizeContext(request, context); executeQuery(*in, *used_output.out_maybe_delayed_and_compressed, /* allow_into_outfile = */ false, context, - [&response, this] (const String & current_query_id, const String & content_type, const String & format, const String & timezone) + [&response, this] (const QueryResultDetails & details) { - response.setContentType(content_type_override.value_or(content_type)); - response.add("X-ClickHouse-Query-Id", current_query_id); - response.add("X-ClickHouse-Format", format); - response.add("X-ClickHouse-Timezone", timezone); + response.add("X-ClickHouse-Query-Id", details.query_id); + + if (content_type_override) + response.setContentType(*content_type_override); + else if (details.content_type) + response.setContentType(*details.content_type); + + if (details.format) + response.add("X-ClickHouse-Format", *details.format); + + if (details.timezone) + response.add("X-ClickHouse-Timezone", *details.timezone); } ); diff --git a/src/Server/MySQLHandler.cpp b/src/Server/MySQLHandler.cpp index 3715d658730..d8ea359ce5f 100644 --- a/src/Server/MySQLHandler.cpp +++ b/src/Server/MySQLHandler.cpp @@ -352,11 +352,15 @@ void MySQLHandler::comQuery(ReadBuffer & payload) format_settings.mysql_wire.max_packet_size = max_packet_size; format_settings.mysql_wire.sequence_id = &sequence_id; - auto set_result_details = [&with_output](const String &, const String &, const String &format, const String &) + auto set_result_details = [&with_output](const QueryResultDetails & details) { - if (format != "MySQLWire") - throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "MySQL protocol does not support custom output formats"); - with_output = true; + if (details.format) + { + if (*details.format != "MySQLWire") + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "MySQL protocol does not support custom output formats"); + + with_output = true; + } }; executeQuery(should_replace ? replacement : payload, *out, false, query_context, set_result_details, format_settings); diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index 870a34fa665..a307b472a64 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -611,6 +611,8 @@ void TCPHandler::runImpl() /// It is important to destroy query context here. We do not want it to live arbitrarily longer than the query. query_context.reset(); + CurrentThread::setFatalErrorCallback({}); + if (is_interserver_mode) { /// We don't really have session in interserver mode, new one is created for each query. It's better to reset it now. diff --git a/src/Storages/Distributed/DirectoryMonitor.cpp b/src/Storages/Distributed/DirectoryMonitor.cpp index 7aa7aac2ef3..cb6659e59ce 100644 --- a/src/Storages/Distributed/DirectoryMonitor.cpp +++ b/src/Storages/Distributed/DirectoryMonitor.cpp @@ -465,6 +465,7 @@ void StorageDistributedDirectoryMonitor::run() tryLogCurrentException(getLoggerName().data()); status.last_exception = std::current_exception(); + status.last_exception_time = std::chrono::system_clock::now(); } } else diff --git a/src/Storages/Distributed/DirectoryMonitor.h b/src/Storages/Distributed/DirectoryMonitor.h index 7015fca0311..030d6acf6e2 100644 --- a/src/Storages/Distributed/DirectoryMonitor.h +++ b/src/Storages/Distributed/DirectoryMonitor.h @@ -58,6 +58,7 @@ public: struct InternalStatus { std::exception_ptr last_exception; + std::chrono::system_clock::time_point last_exception_time; size_t error_count = 0; diff --git a/src/Storages/Kafka/KafkaSource.cpp b/src/Storages/Kafka/KafkaSource.cpp index c456ab1550a..3c5253f7640 100644 --- a/src/Storages/Kafka/KafkaSource.cpp +++ b/src/Storages/Kafka/KafkaSource.cpp @@ -133,7 +133,7 @@ Chunk KafkaSource::generateImpl() { e.addMessage("while parsing Kafka message (topic: {}, partition: {}, offset: {})'", consumer->currentTopic(), consumer->currentPartition(), consumer->currentOffset()); - throw; + throw std::move(e); } }; diff --git a/src/Storages/Kafka/StorageKafka.cpp b/src/Storages/Kafka/StorageKafka.cpp index 50fb7dffa34..7b97273d8af 100644 --- a/src/Storages/Kafka/StorageKafka.cpp +++ b/src/Storages/Kafka/StorageKafka.cpp @@ -175,22 +175,69 @@ namespace const auto CLEANUP_TIMEOUT_MS = 3000; const auto MAX_THREAD_WORK_DURATION_MS = 60000; // once per minute leave do reschedule (we can't lock threads in pool forever) - /// Configuration prefix - const String CONFIG_PREFIX = "kafka"; + const String CONFIG_KAFKA_TAG = "kafka"; + const String CONFIG_KAFKA_TOPIC_TAG = "kafka_topic"; + const String CONFIG_NAME_TAG = "name"; - void loadFromConfig(cppkafka::Configuration & conf, const Poco::Util::AbstractConfiguration & config, const std::string & path) + /// Read server configuration into cppkafka configuration, used by global configuration and by legacy per-topic configuration + void loadFromConfig(cppkafka::Configuration & kafka_config, const Poco::Util::AbstractConfiguration & config, const String & config_prefix) { - Poco::Util::AbstractConfiguration::Keys keys; - config.keys(path, keys); + /// Read all tags one level below + Poco::Util::AbstractConfiguration::Keys tags; + config.keys(config_prefix, tags); - for (const auto & key : keys) + for (const auto & tag : tags) { - const String key_path = path + "." + key; - // log_level has valid underscore, rest librdkafka setting use dot.separated.format - // which is not acceptable for XML. - // See also https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md - const String key_name = (key == "log_level") ? key : boost::replace_all_copy(key, "_", "."); - conf.set(key_name, config.getString(key_path)); + if (tag.starts_with(CONFIG_KAFKA_TOPIC_TAG)) /// multiple occurrences given as "kafka_topic", "kafka_topic[1]", etc. + continue; /// used by new per-topic configuration, ignore + + const String setting_path = config_prefix + "." + tag; + const String setting_value = config.getString(setting_path); + + /// "log_level" has valid underscore, the remaining librdkafka setting use dot.separated.format which isn't acceptable for XML. + /// See https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md + const String setting_name_in_kafka_config = (tag == "log_level") ? tag : boost::replace_all_copy(tag, "_", "."); + kafka_config.set(setting_name_in_kafka_config, setting_value); + } + } + + /// Read server configuration into cppkafa configuration, used by new per-topic configuration + void loadTopicConfig(cppkafka::Configuration & kafka_config, const Poco::Util::AbstractConfiguration & config, const String & config_prefix, const String & topic) + { + /// Read all tags one level below + Poco::Util::AbstractConfiguration::Keys tags; + config.keys(config_prefix, tags); + + for (const auto & tag : tags) + { + /// Only consider tag . Multiple occurrences given as "kafka_topic", "kafka_topic[1]", etc. + if (!tag.starts_with(CONFIG_KAFKA_TOPIC_TAG)) + continue; + + /// Read topic name between ... + const String kafka_topic_path = config_prefix + "." + tag; + const String kafpa_topic_name_path = kafka_topic_path + "." + CONFIG_NAME_TAG; + + const String topic_name = config.getString(kafpa_topic_name_path); + if (topic_name == topic) + { + /// Found it! Now read the per-topic configuration into cppkafka. + Poco::Util::AbstractConfiguration::Keys inner_tags; + config.keys(kafka_topic_path, inner_tags); + for (const auto & inner_tag : inner_tags) + { + if (inner_tag == CONFIG_NAME_TAG) + continue; // ignore + + /// "log_level" has valid underscore, the remaining librdkafka setting use dot.separated.format which isn't acceptable for XML. + /// See https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md + const String setting_path = kafka_topic_path + "." + inner_tag; + const String setting_value = config.getString(setting_path); + + const String setting_name_in_kafka_config = (inner_tag == "log_level") ? inner_tag : boost::replace_all_copy(inner_tag, "_", "."); + kafka_config.set(setting_name_in_kafka_config, setting_value); + } + } } } } @@ -509,29 +556,33 @@ size_t StorageKafka::getPollTimeoutMillisecond() const String StorageKafka::getConfigPrefix() const { if (!collection_name.empty()) - return "named_collections." + collection_name + "." + CONFIG_PREFIX; /// Add one more level to separate librdkafka configuration. - return CONFIG_PREFIX; + return "named_collections." + collection_name + "." + CONFIG_KAFKA_TAG; /// Add one more level to separate librdkafka configuration. + return CONFIG_KAFKA_TAG; } -void StorageKafka::updateConfiguration(cppkafka::Configuration & conf) +void StorageKafka::updateConfiguration(cppkafka::Configuration & kafka_config) { - // Update consumer configuration from the configuration + // Update consumer configuration from the configuration. Example: + // + // 250 + // 100000 + // const auto & config = getContext()->getConfigRef(); auto config_prefix = getConfigPrefix(); if (config.has(config_prefix)) - loadFromConfig(conf, config, config_prefix); + loadFromConfig(kafka_config, config, config_prefix); - #if USE_KRB5 - if (conf.has_property("sasl.kerberos.kinit.cmd")) +#if USE_KRB5 + if (kafka_config.has_property("sasl.kerberos.kinit.cmd")) LOG_WARNING(log, "sasl.kerberos.kinit.cmd configuration parameter is ignored."); - conf.set("sasl.kerberos.kinit.cmd",""); - conf.set("sasl.kerberos.min.time.before.relogin","0"); + kafka_config.set("sasl.kerberos.kinit.cmd",""); + kafka_config.set("sasl.kerberos.min.time.before.relogin","0"); - if (conf.has_property("sasl.kerberos.keytab") && conf.has_property("sasl.kerberos.principal")) + if (kafka_config.has_property("sasl.kerberos.keytab") && kafka_config.has_property("sasl.kerberos.principal")) { - String keytab = conf.get("sasl.kerberos.keytab"); - String principal = conf.get("sasl.kerberos.principal"); + String keytab = kafka_config.get("sasl.kerberos.keytab"); + String principal = kafka_config.get("sasl.kerberos.principal"); LOG_DEBUG(log, "Running KerberosInit"); try { @@ -543,21 +594,47 @@ void StorageKafka::updateConfiguration(cppkafka::Configuration & conf) } LOG_DEBUG(log, "Finished KerberosInit"); } - #else // USE_KRB5 - if (conf.has_property("sasl.kerberos.keytab") || conf.has_property("sasl.kerberos.principal")) - LOG_WARNING(log, "Kerberos-related parameters are ignored because ClickHouse was built without support of krb5 library."); - #endif // USE_KRB5 +#else // USE_KRB5 + if (kafka_config.has_property("sasl.kerberos.keytab") || kafka_config.has_property("sasl.kerberos.principal")) + LOG_WARNING(log, "Ignoring Kerberos-related parameters because ClickHouse was built without krb5 library support."); +#endif // USE_KRB5 - // Update consumer topic-specific configuration + // Update consumer topic-specific configuration (legacy syntax, retained for compatibility). Example with topic "football": + // + // 250 + // 100000 + // + // The legacy syntax has the problem that periods in topic names (e.g. "sports.football") are not supported because the Poco + // configuration framework hierarchy is based on periods as level separators. Besides that, per-topic tags at the same level + // as are ugly. for (const auto & topic : topics) { const auto topic_config_key = config_prefix + "_" + topic; if (config.has(topic_config_key)) - loadFromConfig(conf, config, topic_config_key); + loadFromConfig(kafka_config, config, topic_config_key); } + // Update consumer topic-specific configuration (new syntax). Example with topics "football" and "baseball": + // + // + // football + // 250 + // 5000 + // + // + // baseball + // 300 + // 2000 + // + // + // Advantages: The period restriction no longer applies (e.g. sports.football will work), everything + // Kafka-related is below . + for (const auto & topic : topics) + if (config.has(config_prefix)) + loadTopicConfig(kafka_config, config, config_prefix, topic); + // No need to add any prefix, messages can be distinguished - conf.set_log_callback([this](cppkafka::KafkaHandleBase &, int level, const std::string & facility, const std::string & message) + kafka_config.set_log_callback([this](cppkafka::KafkaHandleBase &, int level, const std::string & facility, const std::string & message) { auto [poco_level, client_logs_level] = parseSyslogLevel(level); LOG_IMPL(log, client_logs_level, poco_level, "[rdk:{}] {}", facility, message); @@ -573,13 +650,13 @@ void StorageKafka::updateConfiguration(cppkafka::Configuration & conf) int status; - status = rd_kafka_conf_interceptor_add_on_new(conf.get_handle(), + status = rd_kafka_conf_interceptor_add_on_new(kafka_config.get_handle(), "init", StorageKafkaInterceptors::rdKafkaOnNew, self); if (status != RD_KAFKA_RESP_ERR_NO_ERROR) LOG_ERROR(log, "Cannot set new interceptor due to {} error", status); // cppkafka always copy the configuration - status = rd_kafka_conf_interceptor_add_on_conf_dup(conf.get_handle(), + status = rd_kafka_conf_interceptor_add_on_conf_dup(kafka_config.get_handle(), "init", StorageKafkaInterceptors::rdKafkaOnConfDup, self); if (status != RD_KAFKA_RESP_ERR_NO_ERROR) LOG_ERROR(log, "Cannot set dup conf interceptor due to {} error", status); diff --git a/src/Storages/Kafka/StorageKafka.h b/src/Storages/Kafka/StorageKafka.h index 890eb5a82e6..3559129cf74 100644 --- a/src/Storages/Kafka/StorageKafka.h +++ b/src/Storages/Kafka/StorageKafka.h @@ -126,7 +126,7 @@ private: std::atomic shutdown_called = false; // Update Kafka configuration with values from CH user configuration. - void updateConfiguration(cppkafka::Configuration & conf); + void updateConfiguration(cppkafka::Configuration & kafka_config); String getConfigPrefix() const; void threadFunc(size_t idx); diff --git a/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp b/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp index d8769a94347..175df9b6e28 100644 --- a/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp +++ b/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp @@ -684,12 +684,6 @@ void DataPartStorageOnDiskBase::clearDirectory( request.emplace_back(fs::path(dir) / "delete-on-destroy.txt", true); request.emplace_back(fs::path(dir) / "txn_version.txt", true); - /// Inverted index - request.emplace_back(fs::path(dir) / "skp_idx_af.gin_dict", true); - request.emplace_back(fs::path(dir) / "skp_idx_af.gin_post", true); - request.emplace_back(fs::path(dir) / "skp_idx_af.gin_seg", true); - request.emplace_back(fs::path(dir) / "skp_idx_af.gin_sid", true); - disk->removeSharedFiles(request, !can_remove_shared_data, names_not_to_remove); disk->removeDirectory(dir); } diff --git a/src/Storages/MergeTree/DataPartsExchange.cpp b/src/Storages/MergeTree/DataPartsExchange.cpp index b671106f46a..c6efe9c9589 100644 --- a/src/Storages/MergeTree/DataPartsExchange.cpp +++ b/src/Storages/MergeTree/DataPartsExchange.cpp @@ -173,6 +173,7 @@ void Service::processQuery(const HTMLForm & params, ReadBuffer & /*body*/, Write writeUUIDText(part->uuid, out); String remote_fs_metadata = parse(params.get("remote_fs_metadata", "")); + std::regex re("\\s*,\\s*"); Strings capability( std::sregex_token_iterator(remote_fs_metadata.begin(), remote_fs_metadata.end(), re, -1), @@ -477,6 +478,22 @@ MergeTreeData::MutableDataPartPtr Fetcher::fetchSelectedPart( int server_protocol_version = parse(in->getResponseCookie("server_protocol_version", "0")); + String remote_fs_metadata = parse(in->getResponseCookie("remote_fs_metadata", "")); + + DiskPtr preffered_disk = disk; + + if (!preffered_disk) + { + for (const auto & disk_candidate : data.getDisks()) + { + if (toString(disk_candidate->getDataSourceDescription().type) == remote_fs_metadata) + { + preffered_disk = disk_candidate; + break; + } + } + } + ReservationPtr reservation; size_t sum_files_size = 0; if (server_protocol_version >= REPLICATION_PROTOCOL_VERSION_WITH_PARTS_SIZE) @@ -493,31 +510,32 @@ MergeTreeData::MutableDataPartPtr Fetcher::fetchSelectedPart( if (!disk) { - LOG_TRACE(log, "Disk for fetch is not provided, reserving space using storage balanced reservation"); + LOG_TEST(log, "Disk for fetch is not provided, reserving space using storage balanced reservation"); reservation = data.balancedReservation(metadata_snapshot, sum_files_size, 0, part_name, part_info, {}, tagger_ptr, &ttl_infos, true); + if (!reservation) { - LOG_TRACE(log, "Disk for fetch is not provided, reserving space using TTL rules"); + LOG_TEST(log, "Disk for fetch is not provided, reserving space using TTL rules"); reservation - = data.reserveSpacePreferringTTLRules(metadata_snapshot, sum_files_size, ttl_infos, std::time(nullptr), 0, true); + = data.reserveSpacePreferringTTLRules(metadata_snapshot, sum_files_size, ttl_infos, std::time(nullptr), 0, true, preffered_disk); } } } else if (!disk) { - LOG_TRACE(log, "Making balanced reservation"); + LOG_TEST(log, "Making balanced reservation"); reservation = data.balancedReservation(metadata_snapshot, sum_files_size, 0, part_name, part_info, {}, tagger_ptr, nullptr); if (!reservation) { - LOG_TRACE(log, "Making simple reservation"); + LOG_TEST(log, "Making simple reservation"); reservation = data.reserveSpace(sum_files_size); } } } else if (!disk) { - LOG_TRACE(log, "Making reservation on the largest disk"); + LOG_TEST(log, "Making reservation on the largest disk"); /// We don't know real size of part because sender server version is too old reservation = data.makeEmptyReservationOnLargestDisk(); } @@ -525,11 +543,11 @@ MergeTreeData::MutableDataPartPtr Fetcher::fetchSelectedPart( if (!disk) { disk = reservation->getDisk(); - LOG_INFO(log, "Disk for fetch is not provided, getting disk from reservation {} with type {}", disk->getName(), toString(disk->getDataSourceDescription().type)); + LOG_TRACE(log, "Disk for fetch is not provided, getting disk from reservation {} with type '{}'", disk->getName(), toString(disk->getDataSourceDescription().type)); } else { - LOG_INFO(log, "Disk for fetch is disk {} with type {}", disk->getName(), toString(disk->getDataSourceDescription().type)); + LOG_TEST(log, "Disk for fetch is disk {} with type {}", disk->getName(), toString(disk->getDataSourceDescription().type)); } UInt64 revision = parse(in->getResponseCookie("disk_revision", "0")); @@ -552,8 +570,6 @@ MergeTreeData::MutableDataPartPtr Fetcher::fetchSelectedPart( if (server_protocol_version >= REPLICATION_PROTOCOL_VERSION_WITH_PARTS_UUID) readUUIDText(part_uuid, *in); - String remote_fs_metadata = parse(in->getResponseCookie("remote_fs_metadata", "")); - size_t projections = 0; if (server_protocol_version >= REPLICATION_PROTOCOL_VERSION_WITH_PARTS_PROJECTION) readBinary(projections, *in); diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index 06b4cb07f1f..85420cabb8d 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -880,7 +880,7 @@ void IMergeTreeDataPart::writeMetadata(const String & filename, const WriteSetti } catch (...) { - tryLogCurrentException("DataPartStorageOnDiskFull"); + tryLogCurrentException("IMergeTreeDataPart"); } throw; diff --git a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp b/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp index 8bbd15c6fe4..0ad91d84d29 100644 --- a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp +++ b/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp @@ -79,7 +79,13 @@ IMergeTreeSelectAlgorithm::IMergeTreeSelectAlgorithm( result_header = header_without_const_virtual_columns; injectPartConstVirtualColumns(0, result_header, nullptr, partition_value_type, virt_column_names); - LOG_TEST(log, "PREWHERE actions: {}", (prewhere_actions ? prewhere_actions->dump() : std::string(""))); + if (prewhere_actions) + LOG_TRACE(log, "PREWHERE condition was split into {} steps: {}", prewhere_actions->steps.size(), prewhere_actions->dumpConditions()); + + if (prewhere_info) + LOG_TEST(log, "Original PREWHERE DAG:\n{}\nPREWHERE actions:\n{}", + (prewhere_info->prewhere_actions ? prewhere_info->prewhere_actions->dumpDAG(): std::string("")), + (prewhere_actions ? prewhere_actions->dump() : std::string(""))); } bool tryBuildPrewhereSteps(PrewhereInfoPtr prewhere_info, const ExpressionActionsSettings & actions_settings, PrewhereExprInfo & prewhere); diff --git a/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp b/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp index edb36cf6e55..4c3d4bc8aa0 100644 --- a/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp +++ b/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp @@ -141,7 +141,6 @@ MergeTreeReadTask::MergeTreeReadTask( size_t part_index_in_query_, const NameSet & column_name_set_, const MergeTreeReadTaskColumns & task_columns_, - bool remove_prewhere_column_, MergeTreeBlockSizePredictorPtr size_predictor_, int64_t priority_, std::future reader_, @@ -151,7 +150,6 @@ MergeTreeReadTask::MergeTreeReadTask( , part_index_in_query{part_index_in_query_} , column_name_set{column_name_set_} , task_columns{task_columns_} - , remove_prewhere_column{remove_prewhere_column_} , size_predictor{size_predictor_} , reader(std::move(reader_)) , pre_reader_for_step(std::move(pre_reader_for_step_)) diff --git a/src/Storages/MergeTree/MergeTreeBlockReadUtils.h b/src/Storages/MergeTree/MergeTreeBlockReadUtils.h index 66127c675a1..e7dad5173ff 100644 --- a/src/Storages/MergeTree/MergeTreeBlockReadUtils.h +++ b/src/Storages/MergeTree/MergeTreeBlockReadUtils.h @@ -59,8 +59,6 @@ struct MergeTreeReadTask const NameSet & column_name_set; /// column names to read during PREWHERE and WHERE const MergeTreeReadTaskColumns & task_columns; - /// should PREWHERE column be returned to requesting side? - const bool remove_prewhere_column; /// Used to satistfy preferred_block_size_bytes limitation MergeTreeBlockSizePredictorPtr size_predictor; /// Used to save current range processing status @@ -87,7 +85,6 @@ struct MergeTreeReadTask size_t part_index_in_query_, const NameSet & column_name_set_, const MergeTreeReadTaskColumns & task_columns_, - bool remove_prewhere_column_, MergeTreeBlockSizePredictorPtr size_predictor_, int64_t priority_ = 0, std::future reader_ = {}, diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 0d4e54453d7..1da99cb4117 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -54,6 +54,7 @@ #include #include #include +#include #include #include #include @@ -1893,11 +1894,11 @@ void MergeTreeData::stopOutdatedDataPartsLoadingTask() /// (Only files on the first level of nesting are considered). static bool isOldPartDirectory(const DiskPtr & disk, const String & directory_path, time_t threshold) { - if (!disk->isDirectory(directory_path) || disk->getLastModified(directory_path).epochTime() >= threshold) + if (!disk->isDirectory(directory_path) || disk->getLastModified(directory_path).epochTime() > threshold) return false; for (auto it = disk->iterateDirectory(directory_path); it->isValid(); it->next()) - if (disk->getLastModified(it->path()).epochTime() >= threshold) + if (disk->getLastModified(it->path()).epochTime() > threshold) return false; return true; @@ -4603,8 +4604,18 @@ void MergeTreeData::movePartitionToDisk(const ASTPtr & partition, const String & throw Exception(ErrorCodes::UNKNOWN_DISK, "All parts of partition '{}' are already on disk '{}'", partition_id, disk->getName()); } - if (!movePartsToSpace(parts, std::static_pointer_cast(disk))) - throw Exception(ErrorCodes::ABORTED, "Cannot move parts because moves are manually disabled"); + MovePartsOutcome moves_outcome = movePartsToSpace(parts, std::static_pointer_cast(disk)); + switch (moves_outcome) + { + case MovePartsOutcome::MovesAreCancelled: + throw Exception(ErrorCodes::ABORTED, "Cannot move parts because moves are manually disabled"); + case MovePartsOutcome::NothingToMove: + throw Exception(ErrorCodes::NO_SUCH_DATA_PART, "No parts to move are found in partition {}", partition_id); + case MovePartsOutcome::MoveWasPostponedBecauseOfZeroCopy: + throw Exception(ErrorCodes::PART_IS_TEMPORARILY_LOCKED, "Move was not finished, because zero copy mode is enabled and someone other is moving the same parts right now"); + case MovePartsOutcome::PartsMoved: + break; + } } @@ -4656,8 +4667,18 @@ void MergeTreeData::movePartitionToVolume(const ASTPtr & partition, const String throw Exception(ErrorCodes::UNKNOWN_DISK, "All parts of partition '{}' are already on volume '{}'", partition_id, volume->getName()); } - if (!movePartsToSpace(parts, std::static_pointer_cast(volume))) - throw Exception(ErrorCodes::ABORTED, "Cannot move parts because moves are manually disabled"); + MovePartsOutcome moves_outcome = movePartsToSpace(parts, std::static_pointer_cast(volume)); + switch (moves_outcome) + { + case MovePartsOutcome::MovesAreCancelled: + throw Exception(ErrorCodes::ABORTED, "Cannot move parts because moves are manually disabled"); + case MovePartsOutcome::NothingToMove: + throw Exception(ErrorCodes::NO_SUCH_DATA_PART, "No parts to move are found in partition {}", partition_id); + case MovePartsOutcome::MoveWasPostponedBecauseOfZeroCopy: + throw Exception(ErrorCodes::PART_IS_TEMPORARILY_LOCKED, "Move was not finished, because zero copy mode is enabled and someone other is moving the same parts right now"); + case MovePartsOutcome::PartsMoved: + break; + } } void MergeTreeData::movePartitionToShard(const ASTPtr & /*partition*/, bool /*move_part*/, const String & /*to*/, ContextPtr /*query_context*/) @@ -7442,7 +7463,7 @@ bool MergeTreeData::scheduleDataMovingJob(BackgroundJobsAssignee & assignee) assignee.scheduleMoveTask(std::make_shared( [this, moving_tagger] () mutable { - return moveParts(moving_tagger); + return moveParts(moving_tagger) == MovePartsOutcome::PartsMoved; }, moves_assignee_trigger, getStorageID())); return true; } @@ -7457,14 +7478,14 @@ bool MergeTreeData::areBackgroundMovesNeeded() const return policy->getVolumes().size() == 1 && policy->getVolumes()[0]->getDisks().size() > 1; } -bool MergeTreeData::movePartsToSpace(const DataPartsVector & parts, SpacePtr space) +MovePartsOutcome MergeTreeData::movePartsToSpace(const DataPartsVector & parts, SpacePtr space) { if (parts_mover.moves_blocker.isCancelled()) - return false; + return MovePartsOutcome::MovesAreCancelled; auto moving_tagger = checkPartsForMove(parts, space); if (moving_tagger->parts_to_move.empty()) - return false; + return MovePartsOutcome::NothingToMove; return moveParts(moving_tagger); } @@ -7521,13 +7542,13 @@ MergeTreeData::CurrentlyMovingPartsTaggerPtr MergeTreeData::checkPartsForMove(co return std::make_shared(std::move(parts_to_move), *this); } -bool MergeTreeData::moveParts(const CurrentlyMovingPartsTaggerPtr & moving_tagger) +MovePartsOutcome MergeTreeData::moveParts(const CurrentlyMovingPartsTaggerPtr & moving_tagger) { LOG_INFO(log, "Got {} parts to move.", moving_tagger->parts_to_move.size()); const auto settings = getSettings(); - bool result = true; + MovePartsOutcome result{MovePartsOutcome::PartsMoved}; for (const auto & moving_part : moving_tagger->parts_to_move) { Stopwatch stopwatch; @@ -7583,7 +7604,7 @@ bool MergeTreeData::moveParts(const CurrentlyMovingPartsTaggerPtr & moving_tagge { /// Move will be retried but with backoff. LOG_DEBUG(log, "Move of part {} postponed, because zero copy mode enabled and someone other moving this part right now", moving_part.part->name); - result = false; + result = MovePartsOutcome::MoveWasPostponedBecauseOfZeroCopy; continue; } } @@ -7763,6 +7784,25 @@ void MergeTreeData::removeQueryIdNoLock(const String & query_id) const query_id_set.erase(query_id); } +std::shared_ptr MergeTreeData::getQueryIdHolder(const String & query_id, UInt64 max_concurrent_queries) const +{ + auto lock = std::lock_guard(query_id_set_mutex); + if (insertQueryIdOrThrowNoLock(query_id, max_concurrent_queries)) + { + try + { + return std::make_shared(query_id, *this); + } + catch (...) + { + /// If we fail to construct the holder, remove query_id explicitly to avoid leak. + removeQueryIdNoLock(query_id); + throw; + } + } + return nullptr; +} + ReservationPtr MergeTreeData::balancedReservation( const StorageMetadataPtr & metadata_snapshot, size_t part_size, diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index faee69b137f..4a1aafe20b6 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -958,7 +958,7 @@ public: /// section from config.xml. CompressionCodecPtr getCompressionCodecForPart(size_t part_size_compressed, const IMergeTreeDataPart::TTLInfos & ttl_infos, time_t current_time) const; - std::lock_guard getQueryIdSetLock() const { return std::lock_guard(query_id_set_mutex); } + std::shared_ptr getQueryIdHolder(const String & query_id, UInt64 max_concurrent_queries) const; /// Record current query id where querying the table. Throw if there are already `max_queries` queries accessing the same table. /// Returns false if the `query_id` already exists in the running set, otherwise return true. @@ -1315,7 +1315,7 @@ protected: /// MergeTree because they store mutations in different way. virtual MutationCommands getFirstAlterMutationCommandsForPart(const DataPartPtr & part) const = 0; /// Moves part to specified space, used in ALTER ... MOVE ... queries - bool movePartsToSpace(const DataPartsVector & parts, SpacePtr space); + MovePartsOutcome movePartsToSpace(const DataPartsVector & parts, SpacePtr space); /// Makes backup entries to backup the parts of this table. BackupEntries backupParts(const DataPartsVector & data_parts, const String & data_path_in_backup, const ContextPtr & local_context); @@ -1456,7 +1456,7 @@ private: using CurrentlyMovingPartsTaggerPtr = std::shared_ptr; /// Move selected parts to corresponding disks - bool moveParts(const CurrentlyMovingPartsTaggerPtr & moving_tagger); + MovePartsOutcome moveParts(const CurrentlyMovingPartsTaggerPtr & moving_tagger); /// Select parts for move and disks for them. Used in background moving processes. CurrentlyMovingPartsTaggerPtr selectPartsForMove(); diff --git a/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp b/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp index 719a60b2f31..8f4d066baa3 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp @@ -75,6 +75,10 @@ void MergeTreeDataPartChecksums::checkEqual(const MergeTreeDataPartChecksums & r { const String & name = it.first; + /// Exclude files written by inverted index from check. No correct checksums are available for them currently. + if (name.ends_with(".gin_dict") || name.ends_with(".gin_post") || name.ends_with(".gin_seg") || name.ends_with(".gin_sid")) + continue; + auto jt = rhs.files.find(name); if (jt == rhs.files.end()) throw Exception(ErrorCodes::NO_FILE_IN_DATA_PART, "No file {} in data part", name); diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp index 1dec7c2cd7c..b0101bb962c 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp @@ -208,26 +208,26 @@ void MergeTreeDataPartWriterOnDisk::initSkipIndices() auto ast = parseQuery(codec_parser, "(" + Poco::toUpper(settings.marks_compression_codec) + ")", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); CompressionCodecPtr marks_compression_codec = CompressionCodecFactory::instance().get(ast, nullptr); - for (const auto & index_helper : skip_indices) + for (const auto & skip_index : skip_indices) { - String stream_name = index_helper->getFileName(); + String stream_name = skip_index->getFileName(); skip_indices_streams.emplace_back( std::make_unique( stream_name, data_part->getDataPartStoragePtr(), - stream_name, index_helper->getSerializedFileExtension(), + stream_name, skip_index->getSerializedFileExtension(), stream_name, marks_file_extension, default_codec, settings.max_compress_block_size, marks_compression_codec, settings.marks_compress_block_size, settings.query_write_settings)); GinIndexStorePtr store = nullptr; - if (dynamic_cast(&*index_helper) != nullptr) + if (typeid_cast(&*skip_index) != nullptr) { store = std::make_shared(stream_name, data_part->getDataPartStoragePtr(), data_part->getDataPartStoragePtr(), storage.getSettings()->max_digestion_size_per_segment); gin_index_stores[stream_name] = store; } - skip_indices_aggregators.push_back(index_helper->createIndexAggregatorForPart(store)); + skip_indices_aggregators.push_back(skip_index->createIndexAggregatorForPart(store)); skip_index_accumulated_marks.push_back(0); } } @@ -284,7 +284,7 @@ void MergeTreeDataPartWriterOnDisk::calculateAndSerializeSkipIndices(const Block WriteBuffer & marks_out = stream.compress_marks ? stream.marks_compressed_hashing : stream.marks_hashing; GinIndexStorePtr store; - if (dynamic_cast(&*index_helper) != nullptr) + if (typeid_cast(&*index_helper) != nullptr) { String stream_name = index_helper->getFileName(); auto it = gin_index_stores.find(stream_name); @@ -388,6 +388,18 @@ void MergeTreeDataPartWriterOnDisk::fillSkipIndicesChecksums(MergeTreeData::Data auto & stream = *skip_indices_streams[i]; if (!skip_indices_aggregators[i]->empty()) skip_indices_aggregators[i]->getGranuleAndReset()->serializeBinary(stream.compressed_hashing); + + /// Register additional files written only by the inverted index. Required because otherwise DROP TABLE complains about unknown + /// files. Note that the provided actual checksums are bogus. The problem is that at this point the file writes happened already and + /// we'd need to re-open + hash the files (fixing this is TODO). For now, CHECK TABLE skips these four files. + if (typeid_cast(&*skip_indices[i]) != nullptr) + { + String filename_without_extension = skip_indices[i]->getFileName(); + checksums.files[filename_without_extension + ".gin_dict"] = MergeTreeDataPartChecksums::Checksum(); + checksums.files[filename_without_extension + ".gin_post"] = MergeTreeDataPartChecksums::Checksum(); + checksums.files[filename_without_extension + ".gin_seg"] = MergeTreeDataPartChecksums::Checksum(); + checksums.files[filename_without_extension + ".gin_sid"] = MergeTreeDataPartChecksums::Checksum(); + } } for (auto & stream : skip_indices_streams) diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index 4fe470efcce..db358e1b6a9 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -1198,7 +1198,6 @@ std::shared_ptr MergeTreeDataSelectExecutor::checkLimits( const MergeTreeData & data, const ReadFromMergeTree::AnalysisResult & result, const ContextPtr & context) - TSA_NO_THREAD_SAFETY_ANALYSIS // disabled because TSA is confused by guaranteed copy elision in data.getQueryIdSetLock() { const auto & settings = context->getSettingsRef(); const auto data_settings = data.getSettings(); @@ -1222,22 +1221,7 @@ std::shared_ptr MergeTreeDataSelectExecutor::checkLimits( { auto query_id = context->getCurrentQueryId(); if (!query_id.empty()) - { - auto lock = data.getQueryIdSetLock(); - if (data.insertQueryIdOrThrowNoLock(query_id, data_settings->max_concurrent_queries)) - { - try - { - return std::make_shared(query_id, data); - } - catch (...) - { - /// If we fail to construct the holder, remove query_id explicitly to avoid leak. - data.removeQueryIdNoLock(query_id); - throw; - } - } - } + return data.getQueryIdHolder(query_id, data_settings->max_concurrent_queries); } return nullptr; } diff --git a/src/Storages/MergeTree/MergeTreeInOrderSelectProcessor.cpp b/src/Storages/MergeTree/MergeTreeInOrderSelectProcessor.cpp index bd7aa34ec0e..813f144ee98 100644 --- a/src/Storages/MergeTree/MergeTreeInOrderSelectProcessor.cpp +++ b/src/Storages/MergeTree/MergeTreeInOrderSelectProcessor.cpp @@ -56,7 +56,6 @@ try task = std::make_unique( data_part, mark_ranges_for_task, part_index_in_query, column_name_set, task_columns, - prewhere_info && prewhere_info->remove_prewhere_column, std::move(size_predictor)); return true; diff --git a/src/Storages/MergeTree/MergeTreePartsMover.h b/src/Storages/MergeTree/MergeTreePartsMover.h index dfb4bb954d7..1cee98bcba9 100644 --- a/src/Storages/MergeTree/MergeTreePartsMover.h +++ b/src/Storages/MergeTree/MergeTreePartsMover.h @@ -11,6 +11,13 @@ namespace DB { +enum class MovePartsOutcome +{ + PartsMoved, + NothingToMove, + MovesAreCancelled, + MoveWasPostponedBecauseOfZeroCopy, +}; /// Active part from storage and destination reservation where it has to be moved struct MergeTreeMoveEntry diff --git a/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp b/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp index d5d55277149..d76b8522f42 100644 --- a/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp +++ b/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp @@ -509,7 +509,7 @@ MergeTreePrefetchedReadPool::ThreadsTasks MergeTreePrefetchedReadPool::createThr auto read_task = std::make_unique( part.data_part, ranges_to_get_from_part, part.part_index_in_query, - part.column_name_set, part.task_columns, prewhere_info && prewhere_info->remove_prewhere_column, + part.column_name_set, part.task_columns, std::move(curr_task_size_predictor)); read_task->priority = priority; diff --git a/src/Storages/MergeTree/MergeTreePrefetchedReadPool.h b/src/Storages/MergeTree/MergeTreePrefetchedReadPool.h index bad158cd7a7..98cfe28c563 100644 --- a/src/Storages/MergeTree/MergeTreePrefetchedReadPool.h +++ b/src/Storages/MergeTree/MergeTreePrefetchedReadPool.h @@ -4,7 +4,6 @@ #include #include #include -#include #include #include diff --git a/src/Storages/MergeTree/MergeTreeRangeReader.cpp b/src/Storages/MergeTree/MergeTreeRangeReader.cpp index 44f6cf9f70d..e3b87d48ce6 100644 --- a/src/Storages/MergeTree/MergeTreeRangeReader.cpp +++ b/src/Storages/MergeTree/MergeTreeRangeReader.cpp @@ -1076,7 +1076,7 @@ MergeTreeRangeReader::ReadResult MergeTreeRangeReader::read(size_t max_rows, Mar read_result.checkInternalConsistency(); - if (!read_result.can_return_prewhere_column_without_filtering) + if (!read_result.can_return_prewhere_column_without_filtering && last_reader_in_chain) { if (!read_result.filterWasApplied()) { @@ -1380,17 +1380,14 @@ void MergeTreeRangeReader::executePrewhereActionsAndFilterColumns(ReadResult & r current_step_filter = result.columns[prewhere_column_pos]; } + /// In case when we are returning prewhere column the caller expects it to serve as a final filter: + /// it must contain 0s not only from the current step but also from all the previous steps. + /// One way to achieve this is to apply the final_filter if we know that the final_filter was not applied at + /// several previous steps but was accumulated instead. + result.can_return_prewhere_column_without_filtering = result.filterWasApplied(); + if (prewhere_info->remove_column) result.columns.erase(result.columns.begin() + prewhere_column_pos); - else - { - /// In case when we are not removing prewhere column the caller expects it to serve as a final filter: - /// it must contain 0s not only from the current step but also from all the previous steps. - /// One way to achieve this is to apply the final_filter if we know that the final _filter was not applied at - /// several previous steps but was accumulated instead. - result.can_return_prewhere_column_without_filtering = - (!result.final_filter.present() || result.final_filter.countBytesInFilter() == result.num_rows); - } FilterWithCachedCount current_filter(current_step_filter); @@ -1430,4 +1427,14 @@ std::string PrewhereExprInfo::dump() const return s.str(); } +std::string PrewhereExprInfo::dumpConditions() const +{ + WriteBufferFromOwnString s; + + for (size_t i = 0; i < steps.size(); ++i) + s << (i == 0 ? "\"" : ", \"") << steps[i].column_name << "\""; + + return s.str(); +} + } diff --git a/src/Storages/MergeTree/MergeTreeRangeReader.h b/src/Storages/MergeTree/MergeTreeRangeReader.h index 039a499e9c1..5ffd464cfe2 100644 --- a/src/Storages/MergeTree/MergeTreeRangeReader.h +++ b/src/Storages/MergeTree/MergeTreeRangeReader.h @@ -35,6 +35,8 @@ struct PrewhereExprInfo std::vector steps; std::string dump() const; + + std::string dumpConditions() const; }; class FilterWithCachedCount diff --git a/src/Storages/MergeTree/MergeTreeReadPool.cpp b/src/Storages/MergeTree/MergeTreeReadPool.cpp index 023afa5cc93..54a040724fc 100644 --- a/src/Storages/MergeTree/MergeTreeReadPool.cpp +++ b/src/Storages/MergeTree/MergeTreeReadPool.cpp @@ -208,7 +208,7 @@ MergeTreeReadTaskPtr MergeTreeReadPool::getTask(size_t thread) return std::make_unique( part.data_part, ranges_to_get_from_part, part.part_index_in_query, per_part.column_name_set, per_part.task_columns, - prewhere_info && prewhere_info->remove_prewhere_column, std::move(curr_task_size_predictor)); + std::move(curr_task_size_predictor)); } Block MergeTreeReadPool::getHeader() const @@ -459,7 +459,6 @@ MergeTreeReadTaskPtr MergeTreeReadPoolParallelReplicas::getTask(size_t thread) part.part_index_in_query, per_part.column_name_set, per_part.task_columns, - prewhere_info && prewhere_info->remove_prewhere_column, std::move(curr_task_size_predictor)); } diff --git a/src/Storages/MergeTree/MergeTreeReverseSelectProcessor.cpp b/src/Storages/MergeTree/MergeTreeReverseSelectProcessor.cpp index 367818c7af1..da2d0b0ae4a 100644 --- a/src/Storages/MergeTree/MergeTreeReverseSelectProcessor.cpp +++ b/src/Storages/MergeTree/MergeTreeReverseSelectProcessor.cpp @@ -49,8 +49,7 @@ bool MergeTreeReverseSelectAlgorithm::getNewTaskOrdinaryReading() task = std::make_unique( data_part, mark_ranges_for_task, part_index_in_query, column_name_set, - task_columns, prewhere_info && prewhere_info->remove_prewhere_column, - std::move(size_predictor)); + task_columns, std::move(size_predictor)); return true; @@ -88,8 +87,7 @@ bool MergeTreeReverseSelectAlgorithm::getNewTaskParallelReplicas() task = std::make_unique( data_part, mark_ranges_for_task, part_index_in_query, column_name_set, - task_columns, prewhere_info && prewhere_info->remove_prewhere_column, - std::move(size_predictor)); + task_columns, std::move(size_predictor)); return true; } diff --git a/src/Storages/MergeTree/MergeTreeSettings.cpp b/src/Storages/MergeTree/MergeTreeSettings.cpp index e5af0c772ba..e951b8f54cf 100644 --- a/src/Storages/MergeTree/MergeTreeSettings.cpp +++ b/src/Storages/MergeTree/MergeTreeSettings.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h index 933ad8e913a..6d280f3aaec 100644 --- a/src/Storages/MergeTree/MergeTreeSettings.h +++ b/src/Storages/MergeTree/MergeTreeSettings.h @@ -43,7 +43,7 @@ struct Settings; M(UInt64, merge_max_block_size, DEFAULT_MERGE_BLOCK_SIZE, "How many rows in blocks should be formed for merge operations.", 0) \ M(UInt64, max_bytes_to_merge_at_max_space_in_pool, 150ULL * 1024 * 1024 * 1024, "Maximum in total size of parts to merge, when there are maximum free threads in background pool (or entries in replication queue).", 0) \ M(UInt64, max_bytes_to_merge_at_min_space_in_pool, 1024 * 1024, "Maximum in total size of parts to merge, when there are minimum free threads in background pool (or entries in replication queue).", 0) \ - M(UInt64, max_replicated_merges_in_queue, 16, "How many tasks of merging and mutating parts are allowed simultaneously in ReplicatedMergeTree queue.", 0) \ + M(UInt64, max_replicated_merges_in_queue, 1000, "How many tasks of merging and mutating parts are allowed simultaneously in ReplicatedMergeTree queue.", 0) \ M(UInt64, max_replicated_mutations_in_queue, 8, "How many tasks of mutating parts are allowed simultaneously in ReplicatedMergeTree queue.", 0) \ M(UInt64, max_replicated_merges_with_ttl_in_queue, 1, "How many tasks of merging parts with TTL are allowed simultaneously in ReplicatedMergeTree queue.", 0) \ M(UInt64, number_of_free_entries_in_pool_to_lower_max_size_of_merge, 8, "When there is less than specified number of free entries in pool (or replicated queue), start to lower maximum size of merge to process (or to put in queue). This is to allow small merges to process - not filling the pool with long running merges.", 0) \ diff --git a/src/Storages/MergeTree/MergeTreeSplitPrewhereIntoReadSteps.cpp b/src/Storages/MergeTree/MergeTreeSplitPrewhereIntoReadSteps.cpp index b42900d239d..533875d80cd 100644 --- a/src/Storages/MergeTree/MergeTreeSplitPrewhereIntoReadSteps.cpp +++ b/src/Storages/MergeTree/MergeTreeSplitPrewhereIntoReadSteps.cpp @@ -197,10 +197,6 @@ bool tryBuildPrewhereSteps(PrewhereInfoPtr prewhere_info, const ExpressionAction if (!prewhere_info || !prewhere_info->prewhere_actions) return true; - Poco::Logger * log = &Poco::Logger::get("tryBuildPrewhereSteps"); - - LOG_TRACE(log, "Original PREWHERE DAG:\n{}", prewhere_info->prewhere_actions->dumpDAG()); - /// 1. List all condition nodes that are combined with AND into PREWHERE condition const auto & condition_root = prewhere_info->prewhere_actions->findInOutputs(prewhere_info->prewhere_column_name); const bool is_conjunction = (condition_root.type == ActionsDAG::ActionType::FUNCTION && condition_root.function_base->getName() == "and"); @@ -339,8 +335,6 @@ bool tryBuildPrewhereSteps(PrewhereInfoPtr prewhere_info, const ExpressionAction prewhere.steps.back().need_filter = prewhere_info->need_filter; } - LOG_TRACE(log, "Resulting PREWHERE:\n{}", prewhere.dump()); - return true; } diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp index 31fd99f0aa1..035cbdac55e 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp @@ -985,7 +985,7 @@ std::vector ReplicatedMergeTreeSinkImpl::commitPart( } else if (Coordination::isUserError(multi_code)) { - String failed_op_path = zkutil::KeeperMultiException(multi_code, ops, responses).getPathForFirstFailedOp(); + String failed_op_path = ops[zkutil::getFailedOpIndex(multi_code, responses)]->getPath(); auto contains = [](const auto & block_ids, const String & path) { @@ -1002,7 +1002,7 @@ std::vector ReplicatedMergeTreeSinkImpl::commitPart( if (multi_code == Coordination::Error::ZNODEEXISTS && deduplicate_block && contains(block_id_path, failed_op_path)) { - /// Block with the same id have just appeared in table (or other replica), rollback thee insertion. + /// Block with the same id have just appeared in table (or other replica), rollback the insertion. LOG_INFO(log, "Block with ID {} already exists (it was just appeared). Renaming part {} back to {}. Will retry write.", toString(block_id), part->name, temporary_part_relative_path); diff --git a/src/Storages/MergeTree/checkDataPart.cpp b/src/Storages/MergeTree/checkDataPart.cpp index 2ec83d99eeb..de31258b2f9 100644 --- a/src/Storages/MergeTree/checkDataPart.cpp +++ b/src/Storages/MergeTree/checkDataPart.cpp @@ -157,25 +157,29 @@ IMergeTreeDataPart::Checksums checkDataPart( } NameSet projections_on_disk; - const auto & checksum_files_txt = checksums_txt.files; + const auto & checksums_txt_files = checksums_txt.files; for (auto it = data_part_storage.iterate(); it->isValid(); it->next()) { auto file_name = it->name(); /// We will check projections later. - if (data_part_storage.isDirectory(file_name) && endsWith(file_name, ".proj")) + if (data_part_storage.isDirectory(file_name) && file_name.ends_with(".proj")) { projections_on_disk.insert(file_name); continue; } + /// Exclude files written by inverted index from check. No correct checksums are available for them currently. + if (file_name.ends_with(".gin_dict") || file_name.ends_with(".gin_post") || file_name.ends_with(".gin_seg") || file_name.ends_with(".gin_sid")) + continue; + auto checksum_it = checksums_data.files.find(file_name); /// Skip files that we already calculated. Also skip metadata files that are not checksummed. if (checksum_it == checksums_data.files.end() && !files_without_checksums.contains(file_name)) { - auto txt_checksum_it = checksum_files_txt.find(file_name); - if (txt_checksum_it == checksum_files_txt.end() || txt_checksum_it->second.uncompressed_size == 0) + auto txt_checksum_it = checksums_txt_files.find(file_name); + if (txt_checksum_it == checksums_txt_files.end() || txt_checksum_it->second.uncompressed_size == 0) { /// The file is not compressed. checksum_file(file_name); diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp index b1ac2e2ba0f..0ca29e2826a 100644 --- a/src/Storages/StorageMerge.cpp +++ b/src/Storages/StorageMerge.cpp @@ -44,6 +44,27 @@ #include +namespace +{ + +using namespace DB; +bool columnIsPhysical(ColumnDefaultKind kind) +{ + return kind == ColumnDefaultKind::Default || kind == ColumnDefaultKind::Materialized; +} +bool columnDefaultKindHasSameType(ColumnDefaultKind lhs, ColumnDefaultKind rhs) +{ + if (lhs == rhs) + return true; + + if (columnIsPhysical(lhs) == columnIsPhysical(rhs)) + return true; + + return false; +} + +} + namespace DB { @@ -172,11 +193,13 @@ std::optional StorageMerge::supportedPrewhereColumns() const NameSet supported_columns; - std::unordered_map>> column_type_default; + std::unordered_map> column_info; for (const auto & name_type : columns.getAll()) { - column_type_default.emplace(name_type.name, std::make_pair( - name_type.type.get(), columns.getDefault(name_type.name))); + const auto & column_default = columns.getDefault(name_type.name).value_or(ColumnDefault{}); + column_info.emplace(name_type.name, std::make_pair( + name_type.type.get(), + column_default.kind)); supported_columns.emplace(name_type.name); } @@ -191,11 +214,10 @@ std::optional StorageMerge::supportedPrewhereColumns() const const auto & table_columns = table_metadata_ptr->getColumns(); for (const auto & column : table_columns.getAll()) { - const auto & root_type_default = column_type_default[column.name]; - const IDataType * root_type = root_type_default.first; - const std::optional & src_default = root_type_default.second; + const auto & column_default = table_columns.getDefault(column.name).value_or(ColumnDefault{}); + const auto & [root_type, src_default_kind] = column_info[column.name]; if ((root_type && !root_type->equals(*column.type)) || - src_default != table_columns.getDefault(column.name)) + !columnDefaultKindHasSameType(src_default_kind, column_default.kind)) { supported_columns.erase(column.name); } diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 0de44e62899..429339386b4 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -905,17 +905,16 @@ void StorageReplicatedMergeTree::drop() /// in this case, has_metadata_in_zookeeper = false, and we also permit to drop the table. bool maybe_has_metadata_in_zookeeper = !has_metadata_in_zookeeper.has_value() || *has_metadata_in_zookeeper; + zkutil::ZooKeeperPtr zookeeper; if (maybe_has_metadata_in_zookeeper) { /// Table can be shut down, restarting thread is not active /// and calling StorageReplicatedMergeTree::getZooKeeper()/getAuxiliaryZooKeeper() won't suffice. - zkutil::ZooKeeperPtr zookeeper = getZooKeeperIfTableShutDown(); + zookeeper = getZooKeeperIfTableShutDown(); /// If probably there is metadata in ZooKeeper, we don't allow to drop the table. if (!zookeeper) throw Exception(ErrorCodes::TABLE_IS_READ_ONLY, "Can't drop readonly replicated table (need to drop data in ZooKeeper as well)"); - - dropReplica(zookeeper, zookeeper_path, replica_name, log, getSettings()); } /// Wait for loading of all outdated parts because @@ -929,10 +928,17 @@ void StorageReplicatedMergeTree::drop() } dropAllData(); + + if (maybe_has_metadata_in_zookeeper) + { + /// Session could expire, get it again + zookeeper = getZooKeeperIfTableShutDown(); + dropReplica(zookeeper, zookeeper_path, replica_name, log, getSettings(), &has_metadata_in_zookeeper); + } } void StorageReplicatedMergeTree::dropReplica(zkutil::ZooKeeperPtr zookeeper, const String & zookeeper_path, const String & replica, - Poco::Logger * logger, MergeTreeSettingsPtr table_settings) + Poco::Logger * logger, MergeTreeSettingsPtr table_settings, std::optional * has_metadata_out) { if (zookeeper->expired()) throw Exception(ErrorCodes::TABLE_WAS_NOT_DROPPED, "Table was not dropped because ZooKeeper session has expired."); @@ -990,12 +996,16 @@ void StorageReplicatedMergeTree::dropReplica(zkutil::ZooKeeperPtr zookeeper, con Coordination::errorMessage(code), remote_replica_path); /// And finally remove everything else recursively - zookeeper->tryRemoveRecursive(remote_replica_path); - } + /// It may left some garbage if replica_path subtree is concurrently modified + zookeeper->tryRemoveChildrenRecursive(remote_replica_path); - /// It may left some garbage if replica_path subtree are concurrently modified - if (zookeeper->exists(remote_replica_path)) - LOG_ERROR(logger, "Replica was not completely removed from ZooKeeper, {} still exists and may contain some garbage.", remote_replica_path); + /// Update has_metadata_in_zookeeper to avoid retries. Otherwise we can accidentally remove metadata of a new table on retries + if (has_metadata_out) + *has_metadata_out = false; + + if (zookeeper->tryRemove(remote_replica_path) != Coordination::Error::ZOK) + LOG_ERROR(logger, "Replica was not completely removed from ZooKeeper, {} still exists and may contain some garbage.", remote_replica_path); + } /// Check that `zookeeper_path` exists: it could have been deleted by another replica after execution of previous line. Strings replicas; @@ -8183,6 +8193,12 @@ StorageReplicatedMergeTree::unlockSharedData(const IMergeTreeDataPart & part, co auto shared_id = getTableSharedID(); if (shared_id == toString(UUIDHelpers::Nil)) { + if (zookeeper->exists(zookeeper_path)) + { + LOG_WARNING(log, "Not removing shared data for part {} because replica does not have metadata in ZooKeeper, " + "but table path exist and other replicas may exist. It may leave some garbage on S3", part.name); + return std::make_pair(false, NameSet{}); + } LOG_TRACE(log, "Part {} blobs can be removed, because table {} completely dropped", part.name, getStorageID().getNameForLogs()); return std::make_pair(true, NameSet{}); } @@ -8208,9 +8224,18 @@ StorageReplicatedMergeTree::unlockSharedData(const IMergeTreeDataPart & part, co return std::make_pair(true, NameSet{}); } - /// If table was completely dropped (no meta in zookeeper) we can safely remove parts if (has_metadata_in_zookeeper.has_value() && !has_metadata_in_zookeeper) + { + if (zookeeper->exists(zookeeper_path)) + { + LOG_WARNING(log, "Not removing shared data for part {} because replica does not have metadata in ZooKeeper, " + "but table path exist and other replicas may exist. It may leave some garbage on S3", part.name); + return std::make_pair(false, NameSet{}); + } + + /// If table was completely dropped (no meta in zookeeper) we can safely remove parts return std::make_pair(true, NameSet{}); + } /// We remove parts during table shutdown. If exception happen, restarting thread will be already turned /// off and nobody will reconnect our zookeeper connection. In this case we use zookeeper connection from diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h index 5a8f06d0a63..46c78e9064a 100644 --- a/src/Storages/StorageReplicatedMergeTree.h +++ b/src/Storages/StorageReplicatedMergeTree.h @@ -229,7 +229,7 @@ public: /** Remove a specific replica from zookeeper. */ static void dropReplica(zkutil::ZooKeeperPtr zookeeper, const String & zookeeper_path, const String & replica, - Poco::Logger * logger, MergeTreeSettingsPtr table_settings = nullptr); + Poco::Logger * logger, MergeTreeSettingsPtr table_settings = nullptr, std::optional * has_metadata_out = nullptr); /// Removes table from ZooKeeper after the last replica was dropped static bool removeTableNodesFromZooKeeper(zkutil::ZooKeeperPtr zookeeper, const String & zookeeper_path, diff --git a/src/Storages/System/StorageSystemContributors.generated.cpp b/src/Storages/System/StorageSystemContributors.generated.cpp index 6ca6a9db046..ca19687918c 100644 --- a/src/Storages/System/StorageSystemContributors.generated.cpp +++ b/src/Storages/System/StorageSystemContributors.generated.cpp @@ -9,9 +9,11 @@ const char * auto_contributors[] { "3ldar-nasyrov", "546", "7", + "7vikpeculiar", "821008736@qq.com", "94rain", "ANDREI STAROVEROV", + "AVMusorin", "Aaron Katz", "Adam Rutkowski", "Adri Fernandez", @@ -83,6 +85,7 @@ const char * auto_contributors[] { "Alexey Ilyukhov", "Alexey Ivanov", "Alexey Milovidov", + "Alexey Perevyshin", "Alexey Tronov", "Alexey Vasiliev", "Alexey Zatelepin", @@ -261,6 +264,7 @@ const char * auto_contributors[] { "Denys Golotiuk", "Derek Chia", "Derek Perkins", + "Diego Nieto", "Diego Nieto (lesandie)", "DimaAmega", "Ding Xiang Fei", @@ -294,6 +298,7 @@ const char * auto_contributors[] { "DuckSoft", "Egor O'Sten", "Egor Savin", + "Eirik", "Ekaterina", "Eldar Zaitov", "Elena", @@ -322,6 +327,7 @@ const char * auto_contributors[] { "Evgeny Markov", "Ewout", "FArthur-cmd", + "FFFFFFFHHHHHHH", "Fabian Stäber", "Fabiano Francesconi", "Fadi Hadzh", @@ -432,6 +438,7 @@ const char * auto_contributors[] { "JackyWoo", "Jacob Hayes", "Jacob Herrington", + "Jake Bamrah", "Jake Liu", "Jakub Kuklis", "James Maidment", @@ -462,6 +469,7 @@ const char * auto_contributors[] { "Julian Gilyadov", "Julian Zhou", "Julio Jimenez", + "Junfu Wu", "Jus", "Justin Hilliard", "Kang Liu", @@ -472,12 +480,14 @@ const char * auto_contributors[] { "Kerry Clendinning", "Kevin Chiang", "Kevin Michel", + "Kevin Zhang", "KinderRiven", "Kiran", "Kirill Danshin", "Kirill Ershov", "Kirill Malev", "Kirill Shvakov", + "KitKatKKK", "Koblikov Mihail", "KochetovNicolai", "Konstantin Bogdanov", @@ -495,6 +505,7 @@ const char * auto_contributors[] { "Kruglov Pavel", "Krzysztof Góralski", "Kseniia Sumarokova", + "Kunal Gurnani", "Kuz Le", "Ky Li", "LAL2211", @@ -748,6 +759,7 @@ const char * auto_contributors[] { "Roman", "Roman Bug", "Roman Chyrva", + "Roman Heinrich", "Roman Lipovsky", "Roman Nikolaev", "Roman Nikonov", @@ -767,6 +779,7 @@ const char * auto_contributors[] { "Saad Ur Rahman", "Sabyanin Maxim", "Sachin", + "SadiHassan", "Safronov Michail", "SaltTan", "Salvatore Mesoraca", @@ -832,6 +845,7 @@ const char * auto_contributors[] { "SuperBot", "SuperDJY", "Suzy Wang", + "SuzyWangIBMer", "Sébastien", "Sébastien Launay", "TABLUM.IO", @@ -986,6 +1000,7 @@ const char * auto_contributors[] { "aaapetrenko", "abdrakhmanov", "abel-wang", + "abidino", "abyss7", "achimbab", "achulkov2", @@ -1023,6 +1038,7 @@ const char * auto_contributors[] { "artpaul", "asiana21", "atereh", + "attack204", "avasiliev", "avogar", "avsharapov", @@ -1120,6 +1136,7 @@ const char * auto_contributors[] { "feng lv", "fenglv", "fessmage", + "fhbai", "fibersel", "filimonov", "filipe", @@ -1176,6 +1193,7 @@ const char * auto_contributors[] { "imgbot[bot]", "ip", "it1804", + "ivan-klass", "ivan-kush", "ivanzhukov", "ivoleg", @@ -1202,6 +1220,7 @@ const char * auto_contributors[] { "kevin wan", "kgurjev", "khamadiev", + "kigerzhang", "kirillikoff", "kmeaw", "koloshmet", @@ -1243,6 +1262,7 @@ const char * auto_contributors[] { "litao91", "liu-bov", "liumaojing", + "liuneng", "liuneng1994", "liuyangkuan", "liuyimin", @@ -1271,6 +1291,7 @@ const char * auto_contributors[] { "martincholuj", "mastertheknife", "mateng0915", + "mateng915", "maxim", "maxim-babenko", "maxkuzn", @@ -1358,10 +1379,14 @@ const char * auto_contributors[] { "ritaank", "rnbondarenko", "robert", + "robot-ch-test-poll", "robot-ch-test-poll1", + "robot-ch-test-poll2", + "robot-ch-test-poll3", "robot-ch-test-poll4", "robot-clickhouse", "robot-clickhouse-ci-1", + "robot-clickhouse-ci-2", "robot-metrika-test", "rodrigargar", "roman", @@ -1371,6 +1396,7 @@ const char * auto_contributors[] { "ruct", "ryzuo", "s-kat", + "sanjam", "santaux", "santrancisco", "satanson", @@ -1474,6 +1500,7 @@ const char * auto_contributors[] { "ylchou", "yonesko", "youenn lebras", + "youennL-cs", "young scott", "yuanyimeng", "yuchuansun", @@ -1486,6 +1513,7 @@ const char * auto_contributors[] { "zhang2014", "zhanghuajie", "zhanglistar", + "zhangnew", "zhangshengyu", "zhangxiao018", "zhangxiao871", @@ -1499,6 +1527,7 @@ const char * auto_contributors[] { "zhoubintao", "zhukai", "zimv", + "zk_kiger", "zkun", "zlx19950903", "zombee0", @@ -1523,6 +1552,7 @@ const char * auto_contributors[] { "万康", "何李夫", "凌涛", + "刘陶峰", "吴健", "小蝌蚪", "小路", diff --git a/src/Storages/System/StorageSystemDetachedParts.cpp b/src/Storages/System/StorageSystemDetachedParts.cpp index 494f9c9c31f..66e610ca653 100644 --- a/src/Storages/System/StorageSystemDetachedParts.cpp +++ b/src/Storages/System/StorageSystemDetachedParts.cpp @@ -8,10 +8,251 @@ #include #include #include +#include +#include + +#include namespace DB { +namespace +{ + +void calculateTotalSizeOnDiskImpl(const DiskPtr & disk, const String & from, UInt64 & total_size) +{ + /// Files or directories of detached part may not exist. Only count the size of existing files. + if (disk->isFile(from)) + { + total_size += disk->getFileSize(from); + } + else + { + for (auto it = disk->iterateDirectory(from); it->isValid(); it->next()) + calculateTotalSizeOnDiskImpl(disk, fs::path(from) / it->name(), total_size); + } +} + +UInt64 calculateTotalSizeOnDisk(const DiskPtr & disk, const String & from) +{ + UInt64 total_size = 0; + try + { + calculateTotalSizeOnDiskImpl(disk, from, total_size); + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + } + return total_size; +} + +class SourceState +{ + std::mutex mutex; + StoragesInfoStream stream; + +public: + explicit SourceState(StoragesInfoStream && stream_) + : stream(std::move(stream_)) + {} + + StoragesInfo next() + { + std::lock_guard lock(mutex); + return stream.next(); + } +}; + + +struct WorkerState +{ + struct Task + { + DiskPtr disk; + String path; + std::atomic * counter = nullptr; + }; + + std::vector tasks; + std::atomic next_task = {0}; +}; + +class DetachedPartsSource : public ISource +{ +public: + DetachedPartsSource(Block header_, std::shared_ptr state_, std::vector columns_mask_, UInt64 block_size_, + bool has_bytes_on_disk_column_) + : ISource(std::move(header_)) + , state(state_) + , columns_mask(std::move(columns_mask_)) + , block_size(block_size_) + , has_bytes_on_disk_column(has_bytes_on_disk_column_) + {} + + String getName() const override { return "DataPartsSource"; } + +protected: + static Chunk nullWhenNoRows(MutableColumns && new_columns) + { + chassert(!new_columns.empty()); + const auto rows = new_columns[0]->size(); + + if (!rows) + return {}; + + return {std::move(new_columns), rows}; + } + + Chunk generate() override + { + MutableColumns new_columns = getPort().getHeader().cloneEmptyColumns(); + chassert(!new_columns.empty()); + + while (new_columns[0]->size() < block_size) + { + if (detached_parts.empty()) + getMoreParts(); + + if (detached_parts.empty()) + return nullWhenNoRows(std::move(new_columns)); + + generateRows(new_columns, block_size - new_columns[0]->size()); + } + + return nullWhenNoRows(std::move(new_columns)); + } + +private: + std::shared_ptr state; + const std::vector columns_mask; + const UInt64 block_size; + const bool has_bytes_on_disk_column; + const size_t support_threads = 35; + + StoragesInfo current_info; + DetachedPartsInfo detached_parts; + + void getMoreParts() + { + chassert(detached_parts.empty()); + + while (detached_parts.empty()) + { + current_info = state->next(); + if (!current_info) + return; + + detached_parts = current_info.data->getDetachedParts(); + } + } + + void calculatePartSizeOnDisk(size_t begin, std::vector> & parts_sizes) + { + if (!has_bytes_on_disk_column) + return; + + WorkerState worker_state; + + for (auto p_id = begin; p_id < detached_parts.size(); ++p_id) + { + auto & part = detached_parts[p_id]; + auto part_path = fs::path(MergeTreeData::DETACHED_DIR_NAME) / part.dir_name; + auto relative_path = fs::path(current_info.data->getRelativeDataPath()) / part_path; + worker_state.tasks.push_back({part.disk, relative_path, &parts_sizes.at(p_id - begin)}); + } + + std::vector> futures; + SCOPE_EXIT_SAFE({ + /// Cancel all workers + worker_state.next_task.store(worker_state.tasks.size()); + /// Exceptions are not propagated + for (auto & future : futures) + if (future.valid()) + future.wait(); + futures.clear(); + }); + + auto max_thread_to_run = std::max(size_t(1), std::min(support_threads, worker_state.tasks.size() / 10)); + futures.reserve(max_thread_to_run); + + for (size_t i = 0; i < max_thread_to_run; ++i) + { + if (worker_state.next_task.load() >= worker_state.tasks.size()) + break; + + auto worker = [&worker_state] () + { + for (auto id = worker_state.next_task++; id < worker_state.tasks.size(); id = worker_state.next_task++) + { + auto & task = worker_state.tasks.at(id); + size_t size = calculateTotalSizeOnDisk(task.disk, task.path); + task.counter->store(size); + } + }; + + futures.push_back( + scheduleFromThreadPool( + std::move(worker), + IOThreadPool::get(), + "DP_BytesOnDisk")); + } + + /// Exceptions are propagated + for (auto & future : futures) + future.get(); + } + + void generateRows(MutableColumns & new_columns, size_t max_rows) + { + chassert(current_info); + + auto rows = std::min(max_rows, detached_parts.size()); + auto begin = detached_parts.size() - rows; + + std::vector> parts_sizes(rows); + calculatePartSizeOnDisk(begin, parts_sizes); + + for (auto p_id = begin; p_id < detached_parts.size(); ++p_id) + { + auto & p = detached_parts.at(p_id); + + size_t src_index = 0; + size_t res_index = 0; + if (columns_mask[src_index++]) + new_columns[res_index++]->insert(current_info.database); + if (columns_mask[src_index++]) + new_columns[res_index++]->insert(current_info.table); + if (columns_mask[src_index++]) + new_columns[res_index++]->insert(p.valid_name ? p.partition_id : Field()); + if (columns_mask[src_index++]) + new_columns[res_index++]->insert(p.dir_name); + if (columns_mask[src_index++]) + { + chassert(has_bytes_on_disk_column); + size_t bytes_on_disk = parts_sizes.at(p_id - begin).load(); + new_columns[res_index++]->insert(bytes_on_disk); + } + if (columns_mask[src_index++]) + new_columns[res_index++]->insert(p.disk->getName()); + if (columns_mask[src_index++]) + new_columns[res_index++]->insert((fs::path(current_info.data->getFullPathOnDisk(p.disk)) / MergeTreeData::DETACHED_DIR_NAME / p.dir_name).string()); + if (columns_mask[src_index++]) + new_columns[res_index++]->insert(p.valid_name ? p.prefix : Field()); + if (columns_mask[src_index++]) + new_columns[res_index++]->insert(p.valid_name ? p.min_block : Field()); + if (columns_mask[src_index++]) + new_columns[res_index++]->insert(p.valid_name ? p.max_block : Field()); + if (columns_mask[src_index++]) + new_columns[res_index++]->insert(p.valid_name ? p.level : Field()); + } + + detached_parts.resize(begin); + } +}; + +} + StorageSystemDetachedParts::StorageSystemDetachedParts(const StorageID & table_id_) : IStorage(table_id_) { @@ -31,33 +272,6 @@ StorageSystemDetachedParts::StorageSystemDetachedParts(const StorageID & table_i }}); setInMemoryMetadata(storage_metadata); } -static void calculateTotalSizeOnDiskImpl(const DiskPtr & disk, const String & from, UInt64 & total_size) -{ - /// Files or directories of detached part may not exist. Only count the size of existing files. - if (disk->isFile(from)) - { - total_size += disk->getFileSize(from); - } - else - { - for (auto it = disk->iterateDirectory(from); it->isValid(); it->next()) - calculateTotalSizeOnDiskImpl(disk, fs::path(from) / it->name(), total_size); - } -} - -static UInt64 calculateTotalSizeOnDisk(const DiskPtr & disk, const String & from) -{ - UInt64 total_size = 0; - try - { - calculateTotalSizeOnDiskImpl(disk, from, total_size); - } - catch (...) - { - tryLogCurrentException(__PRETTY_FUNCTION__); - } - return total_size; -} Pipe StorageSystemDetachedParts::read( const Names & column_names, @@ -65,66 +279,39 @@ Pipe StorageSystemDetachedParts::read( SelectQueryInfo & query_info, ContextPtr context, QueryProcessingStage::Enum /*processed_stage*/, - const size_t /*max_block_size*/, - const size_t /*num_streams*/) + const size_t max_block_size, + const size_t num_streams) { storage_snapshot->check(column_names); - - StoragesInfoStream stream(query_info, context); - - /// Create the result. - Block block = storage_snapshot->metadata->getSampleBlock(); + Block sample_block = storage_snapshot->metadata->getSampleBlock(); NameSet names_set(column_names.begin(), column_names.end()); - std::vector columns_mask(block.columns()); - Block header; - for (size_t i = 0; i < block.columns(); ++i) + Block header; + std::vector columns_mask(sample_block.columns()); + + for (size_t i = 0; i < columns_mask.size(); ++i) { - if (names_set.contains(block.getByPosition(i).name)) + if (names_set.contains(sample_block.getByPosition(i).name)) { columns_mask[i] = 1; - header.insert(block.getByPosition(i)); + header.insert(sample_block.getByPosition(i)); } } - MutableColumns new_columns = header.cloneEmptyColumns(); - while (StoragesInfo info = stream.next()) + bool has_bytes_on_disk_column = names_set.contains("bytes_on_disk"); + + auto state = std::make_shared(StoragesInfoStream(query_info, context)); + + Pipe pipe; + + for (size_t i = 0; i < num_streams; ++i) { - const auto parts = info.data->getDetachedParts(); - for (const auto & p : parts) - { - size_t src_index = 0, res_index = 0; - String detached_part_path = fs::path(MergeTreeData::DETACHED_DIR_NAME) / p.dir_name; - if (columns_mask[src_index++]) - new_columns[res_index++]->insert(info.database); - if (columns_mask[src_index++]) - new_columns[res_index++]->insert(info.table); - if (columns_mask[src_index++]) - new_columns[res_index++]->insert(p.valid_name ? p.partition_id : Field()); - if (columns_mask[src_index++]) - new_columns[res_index++]->insert(p.dir_name); - if (columns_mask[src_index++]) - new_columns[res_index++]->insert(calculateTotalSizeOnDisk(p.disk, fs::path(info.data->getRelativeDataPath()) / detached_part_path)); - if (columns_mask[src_index++]) - new_columns[res_index++]->insert(p.disk->getName()); - if (columns_mask[src_index++]) - new_columns[res_index++]->insert((fs::path(info.data->getFullPathOnDisk(p.disk)) / detached_part_path).string()); - if (columns_mask[src_index++]) - new_columns[res_index++]->insert(p.valid_name ? p.prefix : Field()); - if (columns_mask[src_index++]) - new_columns[res_index++]->insert(p.valid_name ? p.min_block : Field()); - if (columns_mask[src_index++]) - new_columns[res_index++]->insert(p.valid_name ? p.max_block : Field()); - if (columns_mask[src_index++]) - new_columns[res_index++]->insert(p.valid_name ? p.level : Field()); - } + auto source = std::make_shared(header.cloneEmpty(), state, columns_mask, max_block_size, has_bytes_on_disk_column); + pipe.addSource(std::move(source)); } - UInt64 num_rows = new_columns.at(0)->size(); - Chunk chunk(std::move(new_columns), num_rows); - - return Pipe(std::make_shared(std::move(header), std::move(chunk))); + return pipe; } } diff --git a/src/Storages/System/StorageSystemDistributionQueue.cpp b/src/Storages/System/StorageSystemDistributionQueue.cpp index 5297c4eb93c..34cff7df65d 100644 --- a/src/Storages/System/StorageSystemDistributionQueue.cpp +++ b/src/Storages/System/StorageSystemDistributionQueue.cpp @@ -101,6 +101,7 @@ NamesAndTypesList StorageSystemDistributionQueue::getNamesAndTypes() { "broken_data_files", std::make_shared() }, { "broken_data_compressed_bytes", std::make_shared() }, { "last_exception", std::make_shared() }, + { "last_exception_time", std::make_shared() }, }; } @@ -190,6 +191,7 @@ void StorageSystemDistributionQueue::fillData(MutableColumns & res_columns, Cont res_columns[col_num++]->insert(getExceptionMessage(status.last_exception, false)); else res_columns[col_num++]->insertDefault(); + res_columns[col_num++]->insert(static_cast(std::chrono::system_clock::to_time_t(status.last_exception_time))); } } } diff --git a/src/Storages/System/StorageSystemNamedCollections.cpp b/src/Storages/System/StorageSystemNamedCollections.cpp index aa095f48179..2851b3ac55e 100644 --- a/src/Storages/System/StorageSystemNamedCollections.cpp +++ b/src/Storages/System/StorageSystemNamedCollections.cpp @@ -9,6 +9,7 @@ #include #include #include +#include namespace DB @@ -30,6 +31,7 @@ StorageSystemNamedCollections::StorageSystemNamedCollections(const StorageID & t void StorageSystemNamedCollections::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const { context->checkAccess(AccessType::SHOW_NAMED_COLLECTIONS); + const auto & access = context->getAccess(); NamedCollectionUtils::loadIfNot(); @@ -49,7 +51,10 @@ void StorageSystemNamedCollections::fillData(MutableColumns & res_columns, Conte for (const auto & key : collection->getKeys()) { key_column.insertData(key.data(), key.size()); - value_column.insert(collection->get(key)); + if (access->isGranted(AccessType::SHOW_NAMED_COLLECTIONS_SECRETS)) + value_column.insert(collection->get(key)); + else + value_column.insert("[HIDDEN]"); size++; } diff --git a/src/Storages/System/StorageSystemServerSettings.cpp b/src/Storages/System/StorageSystemServerSettings.cpp new file mode 100644 index 00000000000..ad52c6896ac --- /dev/null +++ b/src/Storages/System/StorageSystemServerSettings.cpp @@ -0,0 +1,39 @@ +#include +#include +#include +#include +#include + +namespace DB +{ +NamesAndTypesList StorageSystemServerSettings::getNamesAndTypes() +{ + return { + {"name", std::make_shared()}, + {"value", std::make_shared()}, + {"default", std::make_shared()}, + {"changed", std::make_shared()}, + {"description", std::make_shared()}, + {"type", std::make_shared()}, + }; +} + +void StorageSystemServerSettings::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const +{ + const auto & config = context->getConfigRef(); + ServerSettings settings; + settings.loadSettingsFromConfig(config); + + for (const auto & setting : settings.all()) + { + const auto & setting_name = setting.getName(); + res_columns[0]->insert(setting_name); + res_columns[1]->insert(setting.getValueString()); + res_columns[2]->insert(setting.getDefaultValueString()); + res_columns[3]->insert(setting.isValueChanged()); + res_columns[4]->insert(setting.getDescription()); + res_columns[5]->insert(setting.getTypeName()); + } +} + +} diff --git a/src/Storages/System/StorageSystemServerSettings.h b/src/Storages/System/StorageSystemServerSettings.h new file mode 100644 index 00000000000..b3aa8055853 --- /dev/null +++ b/src/Storages/System/StorageSystemServerSettings.h @@ -0,0 +1,27 @@ +#pragma once + +#include + + +namespace DB +{ + +class Context; + + +/** implements system table "settings", which allows to get information about the current settings. + */ +class StorageSystemServerSettings final : public IStorageSystemOneBlock +{ +public: + std::string getName() const override { return "SystemServerSettings"; } + + static NamesAndTypesList getNamesAndTypes(); + +protected: + using IStorageSystemOneBlock::IStorageSystemOneBlock; + + void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const override; +}; + +} diff --git a/src/Storages/System/StorageSystemSettings.cpp b/src/Storages/System/StorageSystemSettings.cpp index bf3a4842187..e13f6c308c4 100644 --- a/src/Storages/System/StorageSystemSettings.cpp +++ b/src/Storages/System/StorageSystemSettings.cpp @@ -19,6 +19,7 @@ NamesAndTypesList StorageSystemSettings::getNamesAndTypes() {"max", std::make_shared(std::make_shared())}, {"readonly", std::make_shared()}, {"type", std::make_shared()}, + {"default", std::make_shared()}, {"alias_for", std::make_shared()}, }; } @@ -53,6 +54,7 @@ void StorageSystemSettings::fillData(MutableColumns & res_columns, ContextPtr co res_columns[5]->insert(max); res_columns[6]->insert(writability == SettingConstraintWritability::CONST); res_columns[7]->insert(setting.getTypeName()); + res_columns[8]->insert(setting.getDefaultValueString()); }; const auto & settings_to_aliases = Settings::Traits::settingsToAliases(); @@ -62,7 +64,7 @@ void StorageSystemSettings::fillData(MutableColumns & res_columns, ContextPtr co res_columns[0]->insert(setting_name); fill_data_for_setting(setting_name, setting); - res_columns[8]->insert(""); + res_columns[9]->insert(""); if (auto it = settings_to_aliases.find(setting_name); it != settings_to_aliases.end()) { @@ -70,7 +72,7 @@ void StorageSystemSettings::fillData(MutableColumns & res_columns, ContextPtr co { res_columns[0]->insert(alias); fill_data_for_setting(alias, setting); - res_columns[8]->insert(setting_name); + res_columns[9]->insert(setting_name); } } } diff --git a/src/Storages/System/attachSystemTables.cpp b/src/Storages/System/attachSystemTables.cpp index 07db151069f..61329ab834b 100644 --- a/src/Storages/System/attachSystemTables.cpp +++ b/src/Storages/System/attachSystemTables.cpp @@ -39,6 +39,7 @@ #include #include #include +#include #include #include #include @@ -105,6 +106,7 @@ void attachSystemTablesLocal(ContextPtr context, IDatabase & system_database) attach(context, system_database, "functions"); attach(context, system_database, "events"); attach(context, system_database, "settings"); + attach(context, system_database, "server_settings"); attach(context, system_database, "settings_changes"); attach>(context, system_database, "merge_tree_settings"); attach>(context, system_database, "replicated_merge_tree_settings"); diff --git a/src/TableFunctions/TableFunctionNull.cpp b/src/TableFunctions/TableFunctionNull.cpp index d2c091a7b5f..d25b9e15aa7 100644 --- a/src/TableFunctions/TableFunctionNull.cpp +++ b/src/TableFunctions/TableFunctionNull.cpp @@ -34,16 +34,21 @@ void TableFunctionNull::parseArguments(const ASTPtr & ast_function, ContextPtr c ColumnsDescription TableFunctionNull::getActualTableStructure(ContextPtr context) const { - return parseColumnsListFromString(structure, context); + if (structure != "auto") + return parseColumnsListFromString(structure, context); + return default_structure; } StoragePtr TableFunctionNull::executeImpl(const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/) const { ColumnsDescription columns; if (structure != "auto") - columns = getActualTableStructure(context); + columns = parseColumnsListFromString(structure, context); else if (!structure_hint.empty()) columns = structure_hint; + else + columns = default_structure; + auto res = std::make_shared(StorageID(getDatabaseName(), table_name), columns, ConstraintsDescription(), String{}); res->startup(); return res; diff --git a/src/TableFunctions/TableFunctionNull.h b/src/TableFunctions/TableFunctionNull.h index 329315e00c9..4fece9e6da9 100644 --- a/src/TableFunctions/TableFunctionNull.h +++ b/src/TableFunctions/TableFunctionNull.h @@ -1,7 +1,8 @@ #pragma once -#include #include +#include +#include namespace DB @@ -20,6 +21,7 @@ public: bool needStructureHint() const override { return structure == "auto"; } void setStructureHint(const ColumnsDescription & structure_hint_) override { structure_hint = structure_hint_; } + private: StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const String & table_name, ColumnsDescription cached_columns) const override; const char * getStorageTypeName() const override { return "Null"; } @@ -29,6 +31,7 @@ private: String structure = "auto"; ColumnsDescription structure_hint; -}; + const ColumnsDescription default_structure{NamesAndTypesList{{"dummy", std::make_shared()}}}; +}; } diff --git a/src/TableFunctions/TableFunctionValues.cpp b/src/TableFunctions/TableFunctionValues.cpp index 545427f30c9..cf0e20c624c 100644 --- a/src/TableFunctions/TableFunctionValues.cpp +++ b/src/TableFunctions/TableFunctionValues.cpp @@ -92,7 +92,8 @@ void TableFunctionValues::parseArguments(const ASTPtr & ast_function, ContextPtr const auto & literal = args[0]->as(); String value; - if (args.size() > 1 && literal && literal->value.tryGet(value) && tryParseColumnsListFromString(value, structure, context)) + String error; + if (args.size() > 1 && literal && literal->value.tryGet(value) && tryParseColumnsListFromString(value, structure, context, error)) { has_structure_in_arguments = true; return; diff --git a/tests/ci/ast_fuzzer_check.py b/tests/ci/ast_fuzzer_check.py index 932964ddcd6..3c2fa05016f 100644 --- a/tests/ci/ast_fuzzer_check.py +++ b/tests/ci/ast_fuzzer_check.py @@ -9,7 +9,7 @@ from github import Github from build_download_helper import get_build_name_for_check, read_build_urls from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickhouse -from commit_status_helper import post_commit_status +from commit_status_helper import format_description, post_commit_status from docker_pull_helper import get_image_with_version from env_helper import ( GITHUB_REPOSITORY, @@ -145,11 +145,13 @@ if __name__ == "__main__": with open( os.path.join(workspace_path, "description.txt"), "r", encoding="utf-8" ) as desc_f: - description = desc_f.readline().rstrip("\n")[:140] + description = desc_f.readline().rstrip("\n") except: status = "failure" description = "Task failed: $?=" + str(retcode) + description = format_description(description) + test_result = TestResult(description, "OK") if "fail" in status: test_result.status = "FAIL" diff --git a/tests/ci/build_check.py b/tests/ci/build_check.py index f37b2656be3..ecc36b1c4e3 100644 --- a/tests/ci/build_check.py +++ b/tests/ci/build_check.py @@ -9,6 +9,10 @@ import time from shutil import rmtree from typing import List, Tuple +from ccache_utils import get_ccache_if_not_exists, upload_ccache +from ci_config import CI_CONFIG, BuildConfig +from commit_status_helper import get_commit_filtered_statuses, get_commit +from docker_pull_helper import get_image_with_version from env_helper import ( CACHES_PATH, GITHUB_JOB, @@ -18,18 +22,17 @@ from env_helper import ( S3_DOWNLOAD, TEMP_PATH, ) -from s3_helper import S3Helper +from get_robot_token import get_best_robot_token +from github_helper import GitHub from pr_info import PRInfo +from s3_helper import S3Helper +from tee_popen import TeePopen from version_helper import ( ClickHouseVersion, Git, get_version_from_repo, update_version_local, ) -from ccache_utils import get_ccache_if_not_exists, upload_ccache -from ci_config import CI_CONFIG, BuildConfig -from docker_pull_helper import get_image_with_version -from tee_popen import TeePopen IMAGE_NAME = "clickhouse/binary-builder" BUILD_LOG_NAME = "build_log.log" @@ -122,8 +125,7 @@ def check_for_success_run( logged_prefix = os.path.join(S3_BUILDS_BUCKET, s3_prefix, "") logging.info("Checking for artifacts in %s", logged_prefix) try: - # TODO: theoretically, it would miss performance artifact for pr==0, - # but luckily we rerun only really failed tasks now, so we're safe + # Performance artifacts are now part of regular build, so we're safe build_results = s3_helper.list_prefix(s3_prefix) except Exception as ex: logging.info("Got exception while listing %s: %s\nRerun", logged_prefix, ex) @@ -231,6 +233,29 @@ def upload_master_static_binaries( print(f"::notice ::Binary static URL: {url}") +def mark_failed_reports_pending(build_name: str, sha: str) -> None: + try: + gh = GitHub(get_best_robot_token()) + commit = get_commit(gh, sha) + statuses = get_commit_filtered_statuses(commit) + report_status = [ + name + for name, builds in CI_CONFIG["builds_report_config"].items() + if build_name in builds + ][0] + for status in statuses: + if status.context == report_status and status.state in ["failure", "error"]: + logging.info( + "Commit already have failed status for '%s', setting it to 'pending'", + report_status, + ) + commit.create_status( + "pending", status.url, "Set to pending on rerun", report_status + ) + except: # we do not care about any exception here + logging.info("Failed to get or mark the reports status as pending, continue") + + def main(): logging.basicConfig(level=logging.INFO) @@ -260,6 +285,9 @@ def main(): # put them as github actions artifact (result) check_for_success_run(s3_helper, s3_path_prefix, build_name, build_config) + # If it's a latter running, we need to mark possible failed status + mark_failed_reports_pending(build_name, pr_info.sha) + docker_image = get_image_with_version(IMAGES_PATH, IMAGE_NAME) image_version = docker_image.version diff --git a/tests/ci/cherry_pick.py b/tests/ci/cherry_pick.py index 147b49d910e..585cfd52e87 100644 --- a/tests/ci/cherry_pick.py +++ b/tests/ci/cherry_pick.py @@ -52,7 +52,7 @@ class Labels: class ReleaseBranch: - CHERRYPICK_DESCRIPTION = """This pull-request is a first step of an automated \ + CHERRYPICK_DESCRIPTION = f"""This pull-request is a first step of an automated \ backporting. It contains changes like after calling a local command `git cherry-pick`. If you intend to continue backporting this changes, then resolve all conflicts if any. @@ -60,13 +60,16 @@ Otherwise, if you do not want to backport them, then just close this pull-reques The check results does not matter at this step - you can safely ignore them. Also this pull-request will be merged automatically as it reaches the mergeable state, \ - but you always can merge it manually. +**do not merge it manually**. + +If it stuck, check the original PR for `{Labels.BACKPORTS_CREATED}` and delete it if \ +necessary. """ BACKPORT_DESCRIPTION = """This pull-request is a last step of an automated \ backporting. Treat it as a standard pull-request: look at the checks and resolve conflicts. Merge it only if you intend to backport changes to the target branch, otherwise just \ - close it. +close it. """ REMOTE = "" @@ -508,7 +511,7 @@ def main(): logging.getLogger("git_helper").setLevel(logging.DEBUG) token = args.token or get_best_robot_token() - gh = GitHub(token, create_cache_dir=False, per_page=100) + gh = GitHub(token, create_cache_dir=False) bp = Backport(gh, args.repo, args.dry_run) # https://github.com/python/mypy/issues/3004 bp.gh.cache_path = f"{TEMP_PATH}/gh_cache" # type: ignore diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index 05742acb314..08cd2d466d0 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -289,6 +289,18 @@ CI_CONFIG = { "Stress test (debug)": { "required_build": "package_debug", }, + "Upgrade check (asan)": { + "required_build": "package_asan", + }, + "Upgrade check (tsan)": { + "required_build": "package_tsan", + }, + "Upgrade check (msan)": { + "required_build": "package_msan", + }, + "Upgrade check (debug)": { + "required_build": "package_debug", + }, "Integration tests (asan)": { "required_build": "package_asan", }, diff --git a/tests/ci/commit_status_helper.py b/tests/ci/commit_status_helper.py index 785250c3904..9b2b9e4bcd9 100644 --- a/tests/ci/commit_status_helper.py +++ b/tests/ci/commit_status_helper.py @@ -3,18 +3,20 @@ import csv import os import time -from typing import List +from typing import List, Literal import logging -from ci_config import CI_CONFIG, REQUIRED_CHECKS -from env_helper import GITHUB_REPOSITORY, GITHUB_RUN_URL from github import Github from github.Commit import Commit from github.CommitStatus import CommitStatus + +from ci_config import CI_CONFIG, REQUIRED_CHECKS +from env_helper import GITHUB_REPOSITORY, GITHUB_RUN_URL from pr_info import PRInfo, SKIP_MERGEABLE_CHECK_LABEL RETRY = 5 CommitStatuses = List[CommitStatus] +MERGEABLE_NAME = "Mergeable Check" def override_status(status: str, check_name: str, invert: bool = False) -> str: @@ -102,59 +104,69 @@ def post_labels(gh: Github, pr_info: PRInfo, labels_names: List[str]) -> None: pull_request.add_to_labels(label) -def fail_mergeable_check(commit: Commit, description: str) -> None: - commit.create_status( - context="Mergeable Check", - description=description, - state="failure", - target_url=GITHUB_RUN_URL, - ) +def format_description(description: str) -> str: + if len(description) > 140: + description = description[:137] + "..." + return description -def reset_mergeable_check(commit: Commit, description: str = "") -> None: +def set_mergeable_check( + commit: Commit, + description: str = "", + state: Literal["success", "failure"] = "success", +) -> None: commit.create_status( - context="Mergeable Check", + context=MERGEABLE_NAME, description=description, - state="success", + state=state, target_url=GITHUB_RUN_URL, ) def update_mergeable_check(gh: Github, pr_info: PRInfo, check_name: str) -> None: - if SKIP_MERGEABLE_CHECK_LABEL in pr_info.labels: + not_run = ( + pr_info.labels.intersection({SKIP_MERGEABLE_CHECK_LABEL, "release"}) + or check_name not in REQUIRED_CHECKS + or pr_info.release_pr + or pr_info.number == 0 + ) + if not_run: + # Let's avoid unnecessary work return logging.info("Update Mergeable Check by %s", check_name) commit = get_commit(gh, pr_info.sha) - checks = { - check.context: check.state - for check in filter( - lambda check: (check.context in REQUIRED_CHECKS), - # get_statuses() returns generator, which cannot be reversed - we need comprehension - # pylint: disable=unnecessary-comprehension - reversed([status for status in commit.get_statuses()]), - ) - } + statuses = get_commit_filtered_statuses(commit) + + required_checks = [ + status for status in statuses if status.context in REQUIRED_CHECKS + ] + + mergeable_status = None + for status in statuses: + if status.context == MERGEABLE_NAME: + mergeable_status = status + break success = [] fail = [] - for name, state in checks.items(): - if state == "success": - success.append(name) + for status in required_checks: + if status.state == "success": + success.append(status.context) else: - fail.append(name) + fail.append(status.context) if fail: description = "failed: " + ", ".join(fail) if success: description += "; succeeded: " + ", ".join(success) - if len(description) > 140: - description = description[:137] + "..." - fail_mergeable_check(commit, description) + description = format_description(description) + if mergeable_status is None or mergeable_status.description != description: + set_mergeable_check(commit, description, "failure") return description = ", ".join(success) - if len(description) > 140: - description = description[:137] + "..." - reset_mergeable_check(commit, description) + description = format_description(description) + if mergeable_status is None or mergeable_status.description != description: + set_mergeable_check(commit, description) diff --git a/tests/ci/docker_images_check.py b/tests/ci/docker_images_check.py index d5d1b1a1085..192d216614e 100644 --- a/tests/ci/docker_images_check.py +++ b/tests/ci/docker_images_check.py @@ -14,7 +14,7 @@ from typing import Any, Dict, List, Optional, Set, Tuple, Union from github import Github from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickhouse -from commit_status_helper import post_commit_status +from commit_status_helper import format_description, post_commit_status from env_helper import GITHUB_WORKSPACE, RUNNER_TEMP, GITHUB_RUN_URL from get_robot_token import get_best_robot_token, get_parameter_from_ssm from pr_info import PRInfo @@ -456,8 +456,7 @@ def main(): else: description = "Nothing to update" - if len(description) >= 140: - description = description[:136] + "..." + description = format_description(description) with open(changed_json, "w", encoding="utf-8") as images_file: json.dump(result_images, images_file) diff --git a/tests/ci/docker_manifests_merge.py b/tests/ci/docker_manifests_merge.py index 9a77a91647e..0484ea8f641 100644 --- a/tests/ci/docker_manifests_merge.py +++ b/tests/ci/docker_manifests_merge.py @@ -10,7 +10,7 @@ from typing import List, Dict, Tuple from github import Github from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickhouse -from commit_status_helper import post_commit_status +from commit_status_helper import format_description, post_commit_status from env_helper import RUNNER_TEMP from get_robot_token import get_best_robot_token, get_parameter_from_ssm from pr_info import PRInfo @@ -218,8 +218,7 @@ def main(): else: description = "Nothing to update" - if len(description) >= 140: - description = description[:136] + "..." + description = format_description(description) gh = Github(get_best_robot_token(), per_page=100) post_commit_status(gh, pr_info.sha, NAME, description, status, url) diff --git a/tests/ci/docker_server.py b/tests/ci/docker_server.py index fbe934367b4..c6854c5aa78 100644 --- a/tests/ci/docker_server.py +++ b/tests/ci/docker_server.py @@ -15,7 +15,7 @@ from github import Github from build_check import get_release_or_pr from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickhouse -from commit_status_helper import post_commit_status +from commit_status_helper import format_description, post_commit_status from docker_images_check import DockerImage from env_helper import CI, GITHUB_RUN_URL, RUNNER_TEMP, S3_BUILDS_BUCKET, S3_DOWNLOAD from get_robot_token import get_best_robot_token, get_parameter_from_ssm @@ -369,8 +369,7 @@ def main(): description = f"Processed tags: {', '.join(tags)}" - if len(description) >= 140: - description = description[:136] + "..." + description = format_description(description) gh = Github(get_best_robot_token(), per_page=100) post_commit_status(gh, pr_info.sha, NAME, description, status, url) diff --git a/tests/ci/github_helper.py b/tests/ci/github_helper.py index 1b8861b92a6..834c8247cb8 100644 --- a/tests/ci/github_helper.py +++ b/tests/ci/github_helper.py @@ -5,12 +5,13 @@ from datetime import date, datetime, timedelta from pathlib import Path from os import path as p from time import sleep -from typing import List, Optional, Tuple +from typing import List, Optional, Tuple, Union import github # explicit reimport # pylint: disable=useless-import-alias +from github.AuthenticatedUser import AuthenticatedUser from github.GithubException import ( RateLimitExceededException as RateLimitExceededException, ) @@ -35,6 +36,8 @@ class GitHub(github.Github): self._cache_path = Path(CACHE_PATH) if create_cache_dir: self.cache_path = self.cache_path + if not kwargs.get("per_page"): + kwargs["per_page"] = 100 # And set Path super().__init__(*args, **kwargs) self._retries = 0 @@ -155,7 +158,7 @@ class GitHub(github.Github): def get_user_cached( self, login: str, obj_updated_at: Optional[datetime] = None - ) -> NamedUser: + ) -> Union[AuthenticatedUser, NamedUser]: cache_file = self.cache_path / f"user-{login}.pickle" if cache_file.is_file(): diff --git a/tests/ci/install_check.py b/tests/ci/install_check.py index 48d0da195ce..54245670b26 100644 --- a/tests/ci/install_check.py +++ b/tests/ci/install_check.py @@ -18,7 +18,11 @@ from clickhouse_helper import ( mark_flaky_tests, prepare_tests_results_for_clickhouse, ) -from commit_status_helper import post_commit_status, update_mergeable_check +from commit_status_helper import ( + format_description, + post_commit_status, + update_mergeable_check, +) from compress_files import compress_fast from docker_pull_helper import get_image_with_version, DockerImage from env_helper import CI, TEMP_PATH as TEMP, REPORTS_PATH @@ -341,8 +345,7 @@ def main(): ch_helper = ClickHouseHelper() mark_flaky_tests(ch_helper, args.check_name, test_results) - if len(description) >= 140: - description = description[:136] + "..." + description = format_description(description) post_commit_status(gh, pr_info.sha, args.check_name, description, state, report_url) diff --git a/tests/ci/mark_release_ready.py b/tests/ci/mark_release_ready.py index 0ef134b9280..b103dd053bb 100755 --- a/tests/ci/mark_release_ready.py +++ b/tests/ci/mark_release_ready.py @@ -43,7 +43,7 @@ def main(): description = "the release can be created from the commit" args.token = args.token or get_best_robot_token() - gh = GitHub(args.token, create_cache_dir=False, per_page=100) + gh = GitHub(args.token, create_cache_dir=False) # Get the rate limits for a quick fail gh.get_rate_limit() commit = get_commit(gh, args.commit) diff --git a/tests/ci/merge_pr.py b/tests/ci/merge_pr.py index 4a21bfcdd70..fedac48f24d 100644 --- a/tests/ci/merge_pr.py +++ b/tests/ci/merge_pr.py @@ -217,7 +217,7 @@ def main(): args = parse_args() logging.info("Going to process PR #%s in repo %s", args.pr, args.repo) token = args.token or get_best_robot_token() - gh = GitHub(token, per_page=100) + gh = GitHub(token) repo = gh.get_repo(args.repo) # An ugly and not nice fix to patch the wrong organization URL, # see https://github.com/PyGithub/PyGithub/issues/2395#issuecomment-1378629710 diff --git a/tests/ci/release.py b/tests/ci/release.py index a4fe4046572..e221bd3d008 100755 --- a/tests/ci/release.py +++ b/tests/ci/release.py @@ -199,6 +199,10 @@ class Release: raise ValueError( "The relese type must be 'major' for minor versions>=12" ) + if self._version.minor < 12 and self.release_type == "major": + raise ValueError( + "The relese type must be 'minor' for minor versions<12" + ) with self._checkout(self.release_commit, True): # Checkout to the commit, it will provide the correct current version @@ -265,7 +269,13 @@ class Release: f"for {self.release_type} release" ) - def _commit_cmake_contributors(self, version: ClickHouseVersion) -> None: + def _update_cmake_contributors( + self, version: ClickHouseVersion, reset_tweak: bool = True + ) -> None: + if reset_tweak: + desc = version.description + version = version.reset_tweak() + version.with_description(desc) update_cmake_version(version) update_contributors(raise_error=True) if self.dry_run: @@ -274,9 +284,15 @@ class Release: self.run(f"git diff '{self.CMAKE_PATH}' '{self.CONTRIBUTORS_PATH}'"), ) self.run(f"git checkout '{self.CMAKE_PATH}' '{self.CONTRIBUTORS_PATH}'") + + def _commit_cmake_contributors( + self, version: ClickHouseVersion, reset_tweak: bool = True + ) -> None: + if reset_tweak: + version = version.reset_tweak() self.run( - f"git commit -m 'Update version to {version.string}' " - f"'{self.CMAKE_PATH}' '{self.CONTRIBUTORS_PATH}'", + f"git commit '{self.CMAKE_PATH}' '{self.CONTRIBUTORS_PATH}' " + f"-m 'Update autogenerated version to {version.string} and contributors'", dry_run=self.dry_run, ) @@ -321,27 +337,12 @@ class Release: with self._create_gh_release(False): self.version = self.version.update(self.release_type) self.version.with_description(version_type) - update_cmake_version(self.version) - update_contributors(raise_error=True) - if self.dry_run: - logging.info( - "Dry running, resetting the following changes in the repo:\n%s", - self.run( - f"git diff '{self.CMAKE_PATH}' '{self.CONTRIBUTORS_PATH}'" - ), - ) - self.run(f"git checkout '{self.CMAKE_PATH}' '{self.CONTRIBUTORS_PATH}'") - + self._update_cmake_contributors(self.version) # Checkouting the commit of the branch and not the branch itself, # then we are able to skip rollback with self._checkout(f"{self.release_branch}^0", False): current_commit = self.run("git rev-parse HEAD") - self.run( - f"git commit -m " - f"'Update version to {self.version.string}' " - f"'{self.CMAKE_PATH}' '{self.CONTRIBUTORS_PATH}'", - dry_run=self.dry_run, - ) + self._commit_cmake_contributors(self.version) with self._push( "HEAD", with_rollback_on_fail=False, remote_ref=self.release_branch ): @@ -406,6 +407,7 @@ class Release: if version_type == VersionType.LTS: pr_labels += " --label release-lts" new_version.with_description(version_type) + self._update_cmake_contributors(new_version) self._commit_cmake_contributors(new_version) with self._push(self.release_branch): with self._create_gh_label( @@ -434,6 +436,7 @@ class Release: self.read_version() self.version = self.version.update(self.release_type) self.version.with_description(VersionType.TESTING) + self._update_cmake_contributors(self.version) self._commit_cmake_contributors(self.version) with self._push(helper_branch): body_file = get_abs_path(".github/PULL_REQUEST_TEMPLATE.md") diff --git a/tests/ci/run_check.py b/tests/ci/run_check.py index 7119f443719..c6810173f7a 100644 --- a/tests/ci/run_check.py +++ b/tests/ci/run_check.py @@ -7,10 +7,11 @@ from typing import Tuple from github import Github from commit_status_helper import ( + format_description, get_commit, post_labels, remove_labels, - reset_mergeable_check, + set_mergeable_check, ) from env_helper import GITHUB_RUN_URL, GITHUB_REPOSITORY, GITHUB_SERVER_URL from get_robot_token import get_best_robot_token @@ -157,7 +158,7 @@ def check_pr_description(pr_info: PRInfo) -> Tuple[str, str]: + second_category + "'" ) - return result_status[:140], category + return result_status, category elif re.match( r"(?i)^[#>*_ ]*(short\s*description|change\s*log\s*entry)", lines[i] @@ -199,6 +200,7 @@ if __name__ == "__main__": pr_info = PRInfo(need_orgs=True, pr_event_from_api=True, need_changed_files=True) can_run, description, labels_state = should_run_checks_for_pr(pr_info) + description = format_description(description) gh = Github(get_best_robot_token(), per_page=100) commit = get_commit(gh, pr_info.sha) @@ -231,7 +233,7 @@ if __name__ == "__main__": if pr_labels_to_remove: remove_labels(gh, pr_info, pr_labels_to_remove) - reset_mergeable_check(commit, "skipped") + set_mergeable_check(commit, "skipped") if description_error: print( @@ -249,7 +251,7 @@ if __name__ == "__main__": ) commit.create_status( context=NAME, - description=description_error[:139], + description=format_description(description_error), state="failure", target_url=url, ) diff --git a/tests/ci/sqlancer_check.py b/tests/ci/sqlancer_check.py index 73802740975..1a6c4d14616 100644 --- a/tests/ci/sqlancer_check.py +++ b/tests/ci/sqlancer_check.py @@ -10,7 +10,7 @@ from github import Github from build_download_helper import get_build_name_for_check, read_build_urls from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickhouse -from commit_status_helper import post_commit_status +from commit_status_helper import format_description, post_commit_status from docker_pull_helper import get_image_with_version from env_helper import ( GITHUB_REPOSITORY, @@ -171,11 +171,13 @@ def main(): with open( os.path.join(workspace_path, "description.txt"), "r", encoding="utf-8" ) as desc_f: - description = desc_f.readline().rstrip("\n")[:140] + description = desc_f.readline().rstrip("\n") except: # status = "failure" description = "Task failed: $?=" + str(retcode) + description = format_description(description) + report_url = upload_results( s3_helper, pr_info.number, diff --git a/docker/test/stress/stress b/tests/ci/stress.py similarity index 91% rename from docker/test/stress/stress rename to tests/ci/stress.py index c142da1be9e..4f723dba101 100755 --- a/docker/test/stress/stress +++ b/tests/ci/stress.py @@ -8,13 +8,13 @@ import logging import time -def get_options(i, backward_compatibility_check): +def get_options(i, upgrade_check): options = [] client_options = [] - if 0 < i: + if i > 0: options.append("--order=random") - if i % 3 == 2 and not backward_compatibility_check: + if i % 3 == 2 and not upgrade_check: options.append( '''--db-engine="Replicated('/test/db/test_{}', 's1', 'r1')"'''.format(i) ) @@ -38,16 +38,16 @@ def get_options(i, backward_compatibility_check): client_options.append("join_algorithm='full_sorting_merge'") if join_alg_num % 4 == 3: client_options.append("join_algorithm='auto'") - client_options.append('max_rows_in_join=1000') + client_options.append("max_rows_in_join=1000") if i % 5 == 1: client_options.append("memory_tracker_fault_probability=0.001") - if i % 2 == 1 and not backward_compatibility_check: + if i % 2 == 1 and not upgrade_check: client_options.append("group_by_use_nulls=1") # 12 % 3 == 0, so it's Atomic database - if i == 12 and not backward_compatibility_check: + if i == 12 and not upgrade_check: client_options.append("implicit_transaction=1") client_options.append("throw_on_unsupported_query_inside_transaction=0") @@ -63,11 +63,9 @@ def run_func_test( num_processes, skip_tests_option, global_time_limit, - backward_compatibility_check, + upgrade_check, ): - backward_compatibility_check_option = ( - "--backward-compatibility-check" if backward_compatibility_check else "" - ) + upgrade_check_option = "--upgrade-check" if upgrade_check else "" global_time_limit_option = "" if global_time_limit: global_time_limit_option = "--global_time_limit={}".format(global_time_limit) @@ -77,14 +75,14 @@ def run_func_test( for i in range(num_processes) ] pipes = [] - for i in range(0, len(output_paths)): - f = open(output_paths[i], "w") + for i, path in enumerate(output_paths): + f = open(path, "w") full_command = "{} {} {} {} {}".format( cmd, - get_options(i, backward_compatibility_check), + get_options(i, upgrade_check), global_time_limit_option, skip_tests_option, - backward_compatibility_check_option, + upgrade_check_option, ) logging.info("Run func tests '%s'", full_command) p = Popen(full_command, shell=True, stdout=f, stderr=f) @@ -176,7 +174,7 @@ def prepare_for_hung_check(drop_databases): for db in databases: if db == "system": continue - command = make_query_command(f'DETACH DATABASE {db}') + command = make_query_command(f"DETACH DATABASE {db}") # we don't wait for drop Popen(command, shell=True) break @@ -211,7 +209,7 @@ def prepare_for_hung_check(drop_databases): # Even if all clickhouse-test processes are finished, there are probably some sh scripts, # which still run some new queries. Let's ignore them. try: - query = """clickhouse client -q "SELECT count() FROM system.processes where where elapsed > 300" """ + query = """clickhouse client -q "SELECT count() FROM system.processes where elapsed > 300" """ output = ( check_output(query, shell=True, stderr=STDOUT, timeout=30) .decode("utf-8") @@ -235,7 +233,7 @@ if __name__ == "__main__": parser.add_argument("--output-folder") parser.add_argument("--global-time-limit", type=int, default=1800) parser.add_argument("--num-parallel", type=int, default=cpu_count()) - parser.add_argument("--backward-compatibility-check", action="store_true") + parser.add_argument("--upgrade-check", action="store_true") parser.add_argument("--hung-check", action="store_true", default=False) # make sense only for hung check parser.add_argument("--drop-databases", action="store_true", default=False) @@ -250,7 +248,7 @@ if __name__ == "__main__": args.num_parallel, args.skip_func_tests, args.global_time_limit, - args.backward_compatibility_check, + args.upgrade_check, ) logging.info("Will wait functests to finish") @@ -303,11 +301,12 @@ if __name__ == "__main__": ] ) hung_check_log = os.path.join(args.output_folder, "hung_check.log") - tee = Popen(['/usr/bin/tee', hung_check_log], stdin=PIPE) + tee = Popen(["/usr/bin/tee", hung_check_log], stdin=PIPE) res = call(cmd, shell=True, stdout=tee.stdin, stderr=STDOUT) - tee.stdin.close() + if tee.stdin is not None: + tee.stdin.close() if res != 0 and have_long_running_queries: - logging.info("Hung check failed with exit code {}".format(res)) + logging.info("Hung check failed with exit code %d", res) else: hung_check_status = "No queries hung\tOK\t\\N\t\n" with open( @@ -316,5 +315,4 @@ if __name__ == "__main__": results.write(hung_check_status) os.remove(hung_check_log) - logging.info("Stress test finished") diff --git a/tests/ci/stress_check.py b/tests/ci/stress_check.py index fb38969cb23..7596a81ebc9 100644 --- a/tests/ci/stress_check.py +++ b/tests/ci/stress_check.py @@ -36,7 +36,6 @@ def get_run_command( "docker run --cap-add=SYS_PTRACE " # a static link, don't use S3_URL or S3_DOWNLOAD "-e S3_URL='https://s3.amazonaws.com/clickhouse-datasets' " - f"-e DISABLE_BC_CHECK={os.environ.get('DISABLE_BC_CHECK', '0')} " # For dmesg and sysctl "--privileged " f"--volume={build_path}:/package_folder " @@ -109,7 +108,7 @@ def process_results( return state, description, test_results, additional_files -def main(): +def run_stress_test(docker_image_name): logging.basicConfig(level=logging.INFO) stopwatch = Stopwatch() @@ -132,7 +131,7 @@ def main(): logging.info("Check is already finished according to github status, exiting") sys.exit(0) - docker_image = get_image_with_version(reports_path, "clickhouse/stress-test") + docker_image = get_image_with_version(reports_path, docker_image_name) packages_path = os.path.join(temp_path, "packages") if not os.path.exists(packages_path): @@ -199,4 +198,4 @@ def main(): if __name__ == "__main__": - main() + run_stress_test("clickhouse/stress-test") diff --git a/tests/ci/stress_tests.lib b/tests/ci/stress_tests.lib new file mode 100644 index 00000000000..75195baaeeb --- /dev/null +++ b/tests/ci/stress_tests.lib @@ -0,0 +1,309 @@ +#!/bin/bash + +# core.COMM.PID-TID +sysctl kernel.core_pattern='core.%e.%p-%P' + +OK="\tOK\t\\N\t" +FAIL="\tFAIL\t\\N\t" + +FAILURE_CONTEXT_LINES=100 +FAILURE_CONTEXT_MAX_LINE_WIDTH=300 + +function escaped() +{ + # That's the simplest way I found to escape a string in bash. Yep, bash is the most convenient programming language. + # Also limit lines width just in case (too long lines are not really useful usually) + clickhouse local -S 's String' --input-format=LineAsString -q "select substr(s, 1, $FAILURE_CONTEXT_MAX_LINE_WIDTH) + from table format CustomSeparated settings format_custom_row_after_delimiter='\\\\\\\\n'" +} + +function head_escaped() +{ + head -n $FAILURE_CONTEXT_LINES $1 | escaped +} + +function unts() +{ + grep -Po "[0-9][0-9]:[0-9][0-9] \K.*" +} + +function trim_server_logs() +{ + head -n $FAILURE_CONTEXT_LINES "/test_output/$1" | grep -Eo " \[ [0-9]+ \] \{.*" | escaped +} + +function install_packages() +{ + dpkg -i $1/clickhouse-common-static_*.deb + dpkg -i $1/clickhouse-common-static-dbg_*.deb + dpkg -i $1/clickhouse-server_*.deb + dpkg -i $1/clickhouse-client_*.deb +} + +function configure() +{ + # install test configs + export USE_DATABASE_ORDINARY=1 + export EXPORT_S3_STORAGE_POLICIES=1 + /usr/share/clickhouse-test/config/install.sh + + # avoid too slow startup + sudo cat /etc/clickhouse-server/config.d/keeper_port.xml \ + | sed "s|100000|10000|" \ + > /etc/clickhouse-server/config.d/keeper_port.xml.tmp + sudo mv /etc/clickhouse-server/config.d/keeper_port.xml.tmp /etc/clickhouse-server/config.d/keeper_port.xml + sudo chown clickhouse /etc/clickhouse-server/config.d/keeper_port.xml + sudo chgrp clickhouse /etc/clickhouse-server/config.d/keeper_port.xml + + # for clickhouse-server (via service) + echo "ASAN_OPTIONS='malloc_context_size=10 verbosity=1 allocator_release_to_os_interval_ms=10000'" >> /etc/environment + # for clickhouse-client + export ASAN_OPTIONS='malloc_context_size=10 allocator_release_to_os_interval_ms=10000' + + # since we run clickhouse from root + sudo chown root: /var/lib/clickhouse + + # Set more frequent update period of asynchronous metrics to more frequently update information about real memory usage (less chance of OOM). + echo "1" \ + > /etc/clickhouse-server/config.d/asynchronous_metrics_update_period_s.xml + + local total_mem + total_mem=$(awk '/MemTotal/ { print $(NF-1) }' /proc/meminfo) # KiB + total_mem=$(( total_mem*1024 )) # bytes + + # Set maximum memory usage as half of total memory (less chance of OOM). + # + # But not via max_server_memory_usage but via max_memory_usage_for_user, + # so that we can override this setting and execute service queries, like: + # - hung check + # - show/drop database + # - ... + # + # So max_memory_usage_for_user will be a soft limit, and + # max_server_memory_usage will be hard limit, and queries that should be + # executed regardless memory limits will use max_memory_usage_for_user=0, + # instead of relying on max_untracked_memory + + max_server_memory_usage_to_ram_ratio=0.5 + echo "Setting max_server_memory_usage_to_ram_ratio to ${max_server_memory_usage_to_ram_ratio}" + cat > /etc/clickhouse-server/config.d/max_server_memory_usage.xml < + ${max_server_memory_usage_to_ram_ratio} + +EOL + + local max_users_mem + max_users_mem=$((total_mem*30/100)) # 30% + echo "Setting max_memory_usage_for_user=$max_users_mem and max_memory_usage for queries to 10G" + cat > /etc/clickhouse-server/users.d/max_memory_usage_for_user.xml < + + + 10G + ${max_users_mem} + + + +EOL + + cat > /etc/clickhouse-server/config.d/core.xml < + + + 107374182400 + + + $PWD + +EOL + + # Analyzer is not yet ready for testing + cat > /etc/clickhouse-server/users.d/no_analyzer.xml < + + + + + + + + + + +EOL + +} + +function stop() +{ + local max_tries="${1:-90}" + local check_hang="${2:-true}" + local pid + # Preserve the pid, since the server can hung after the PID will be deleted. + pid="$(cat /var/run/clickhouse-server/clickhouse-server.pid)" + + clickhouse stop --max-tries "$max_tries" --do-not-kill && return + + if [ $check_hang == true ] + then + # We failed to stop the server with SIGTERM. Maybe it hang, let's collect stacktraces. + echo -e "Possible deadlock on shutdown (see gdb.log)$FAIL" >> /test_output/test_results.tsv + kill -TERM "$(pidof gdb)" ||: + sleep 5 + echo "thread apply all backtrace (on stop)" >> /test_output/gdb.log + timeout 30m gdb -batch -ex 'thread apply all backtrace' -p "$pid" | ts '%Y-%m-%d %H:%M:%S' >> /test_output/gdb.log + clickhouse stop --force + fi +} + +function start() +{ + counter=0 + until clickhouse-client --query "SELECT 1" + do + if [ "$counter" -gt ${1:-120} ] + then + echo "Cannot start clickhouse-server" + rg --text ".*Application" /var/log/clickhouse-server/clickhouse-server.log > /test_output/application_errors.txt ||: + echo -e "Cannot start clickhouse-server$FAIL$(trim_server_logs application_errors.txt)" >> /test_output/test_results.tsv + cat /var/log/clickhouse-server/stdout.log + tail -n100 /var/log/clickhouse-server/stderr.log + tail -n100000 /var/log/clickhouse-server/clickhouse-server.log | rg -F -v -e ' RaftInstance:' -e ' RaftInstance' | tail -n100 + break + fi + # use root to match with current uid + clickhouse start --user root >/var/log/clickhouse-server/stdout.log 2>>/var/log/clickhouse-server/stderr.log + sleep 0.5 + counter=$((counter + 1)) + done + + # Set follow-fork-mode to parent, because we attach to clickhouse-server, not to watchdog + # and clickhouse-server can do fork-exec, for example, to run some bridge. + # Do not set nostop noprint for all signals, because some it may cause gdb to hang, + # explicitly ignore non-fatal signals that are used by server. + # Number of SIGRTMIN can be determined only in runtime. + RTMIN=$(kill -l SIGRTMIN) + echo " +set follow-fork-mode parent +handle SIGHUP nostop noprint pass +handle SIGINT nostop noprint pass +handle SIGQUIT nostop noprint pass +handle SIGPIPE nostop noprint pass +handle SIGTERM nostop noprint pass +handle SIGUSR1 nostop noprint pass +handle SIGUSR2 nostop noprint pass +handle SIG$RTMIN nostop noprint pass +info signals +continue +backtrace full +thread apply all backtrace full +info registers +disassemble /s +up +disassemble /s +up +disassemble /s +p \"done\" +detach +quit +" > script.gdb + + # FIXME Hung check may work incorrectly because of attached gdb + # 1. False positives are possible + # 2. We cannot attach another gdb to get stacktraces if some queries hung + gdb -batch -command script.gdb -p "$(cat /var/run/clickhouse-server/clickhouse-server.pid)" | ts '%Y-%m-%d %H:%M:%S' >> /test_output/gdb.log & + sleep 5 + # gdb will send SIGSTOP, spend some time loading debug info and then send SIGCONT, wait for it (up to send_timeout, 300s) + time clickhouse-client --query "SELECT 'Connected to clickhouse-server after attaching gdb'" ||: +} + +function check_server_start() +{ + clickhouse-client --query "SELECT 'Server successfully started', 'OK', NULL, ''" >> /test_output/test_results.tsv \ + || (rg --text ".*Application" /var/log/clickhouse-server/clickhouse-server.log > /test_output/application_errors.txt \ + && echo -e "Server failed to start (see application_errors.txt and clickhouse-server.clean.log)$FAIL$(trim_server_logs application_errors.txt)" \ + >> /test_output/test_results.tsv) + + # Remove file application_errors.txt if it's empty + [ -s /test_output/application_errors.txt ] || rm /test_output/application_errors.txt +} + +function check_logs_for_critical_errors() +{ + # Sanitizer asserts + rg -Fa "==================" /var/log/clickhouse-server/stderr.log | rg -v "in query:" >> /test_output/tmp + rg -Fa "WARNING" /var/log/clickhouse-server/stderr.log >> /test_output/tmp + rg -Fav -e "ASan doesn't fully support makecontext/swapcontext functions" -e "DB::Exception" /test_output/tmp > /dev/null \ + && echo -e "Sanitizer assert (in stderr.log)$FAIL$(head_escaped /test_output/tmp)" >> /test_output/test_results.tsv \ + || echo -e "No sanitizer asserts$OK" >> /test_output/test_results.tsv + rm -f /test_output/tmp + + # OOM + rg -Fa " Application: Child process was terminated by signal 9" /var/log/clickhouse-server/clickhouse-server*.log > /dev/null \ + && echo -e "Signal 9 in clickhouse-server.log$FAIL" >> /test_output/test_results.tsv \ + || echo -e "No OOM messages in clickhouse-server.log$OK" >> /test_output/test_results.tsv + + # Logical errors + rg -Fa "Code: 49. DB::Exception: " /var/log/clickhouse-server/clickhouse-server*.log > /test_output/logical_errors.txt \ + && echo -e "Logical error thrown (see clickhouse-server.log or logical_errors.txt)$FAIL$(head_escaped /test_output/logical_errors.txt)" >> /test_output/test_results.tsv \ + || echo -e "No logical errors$OK" >> /test_output/test_results.tsv + # Remove file logical_errors.txt if it's empty + [ -s /test_output/logical_errors.txt ] || rm /test_output/logical_errors.txt + + # No such key errors + rg --text "Code: 499.*The specified key does not exist" /var/log/clickhouse-server/clickhouse-server*.log > /test_output/no_such_key_errors.txt \ + && echo -e "S3_ERROR No such key thrown (see clickhouse-server.log or no_such_key_errors.txt)$FAIL$(trim_server_logs no_such_key_errors.txt)" >> /test_output/test_results.tsv \ + || echo -e "No lost s3 keys$OK" >> /test_output/test_results.tsv + + # Remove file no_such_key_errors.txt if it's empty + [ -s /test_output/no_such_key_errors.txt ] || rm /test_output/no_such_key_errors.txt + + # Crash + rg -Fa "########################################" /var/log/clickhouse-server/clickhouse-server*.log > /dev/null \ + && echo -e "Killed by signal (in clickhouse-server.log)$FAIL" >> /test_output/test_results.tsv \ + || echo -e "Not crashed$OK" >> /test_output/test_results.tsv + + # It also checks for crash without stacktrace (printed by watchdog) + rg -Fa " " /var/log/clickhouse-server/clickhouse-server*.log > /test_output/fatal_messages.txt \ + && echo -e "Fatal message in clickhouse-server.log (see fatal_messages.txt)$FAIL$(trim_server_logs fatal_messages.txt)" >> /test_output/test_results.tsv \ + || echo -e "No fatal messages in clickhouse-server.log$OK" >> /test_output/test_results.tsv + + # Remove file fatal_messages.txt if it's empty + [ -s /test_output/fatal_messages.txt ] || rm /test_output/fatal_messages.txt + + rg -Fa "########################################" /test_output/* > /dev/null \ + && echo -e "Killed by signal (output files)$FAIL" >> /test_output/test_results.tsv + + function get_gdb_log_context() + { + rg -A50 -Fa " received signal " /test_output/gdb.log | head_escaped + } + + rg -Fa " received signal " /test_output/gdb.log > /dev/null \ + && echo -e "Found signal in gdb.log$FAIL$(get_gdb_log_context)" >> /test_output/test_results.tsv + + dmesg -T > /test_output/dmesg.log + + # OOM in dmesg -- those are real + grep -q -F -e 'Out of memory: Killed process' -e 'oom_reaper: reaped process' -e 'oom-kill:constraint=CONSTRAINT_NONE' /test_output/dmesg.log \ + && echo -e "OOM in dmesg$FAIL$(head_escaped /test_output/dmesg.log)" >> /test_output/test_results.tsv \ + || echo -e "No OOM in dmesg$OK" >> /test_output/test_results.tsv +} + +function collect_query_and_trace_logs() +{ + for table in query_log trace_log + do + clickhouse-local --path /var/lib/clickhouse/ --only-system-tables -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.tsv.zst ||: + done +} + +function collect_core_dumps() +{ + find . -type f -maxdepth 1 -name 'core.*' | while read core; do + zstd --threads=0 $core + mv $core.zst /test_output/ + done +} diff --git a/tests/ci/style_check.py b/tests/ci/style_check.py index 9350785b33b..89878990c2c 100644 --- a/tests/ci/style_check.py +++ b/tests/ci/style_check.py @@ -148,7 +148,7 @@ def main(): if args.push: checkout_head(pr_info) - gh = GitHub(get_best_robot_token(), per_page=100, create_cache_dir=False) + gh = GitHub(get_best_robot_token(), create_cache_dir=False) atexit.register(update_mergeable_check, gh, pr_info, NAME) diff --git a/tests/ci/upgrade_check.py b/tests/ci/upgrade_check.py new file mode 100644 index 00000000000..83b6f9e299f --- /dev/null +++ b/tests/ci/upgrade_check.py @@ -0,0 +1,4 @@ +import stress_check + +if __name__ == "__main__": + stress_check.run_stress_test("clickhouse/upgrade-check") diff --git a/tests/ci/version_helper.py b/tests/ci/version_helper.py index 69cfba64be3..372974f4eda 100755 --- a/tests/ci/version_helper.py +++ b/tests/ci/version_helper.py @@ -58,6 +58,7 @@ class ClickHouseVersion: elif self._git is not None: self._tweak = self._git.tweak self._describe = "" + self._description = "" def update(self, part: Literal["major", "minor", "patch"]) -> "ClickHouseVersion": """If part is valid, returns a new version""" @@ -88,6 +89,13 @@ class ClickHouseVersion: self.major, self.minor, self.patch + 1, self.revision, self._git ) + def reset_tweak(self) -> "ClickHouseVersion": + if self._git is not None: + self._git.update() + return ClickHouseVersion( + self.major, self.minor, self.patch, self.revision, self._git, "1" + ) + @property def major(self) -> int: return self._major @@ -118,6 +126,10 @@ class ClickHouseVersion: def describe(self): return self._describe + @property + def description(self) -> str: + return self._description + @property def string(self): return ".".join( @@ -142,6 +154,7 @@ class ClickHouseVersion: def with_description(self, version_type): if version_type not in VersionType.VALID: raise ValueError(f"version type {version_type} not in {VersionType.VALID}") + self._description = version_type self._describe = f"v{self.string}-{version_type}" def __eq__(self, other: Any) -> bool: diff --git a/tests/ci/worker/init_runner.sh b/tests/ci/worker/init_runner.sh index 64f11b41777..c8b11bc6e37 100644 --- a/tests/ci/worker/init_runner.sh +++ b/tests/ci/worker/init_runner.sh @@ -66,7 +66,8 @@ terminate-and-exit() { INSTANCE_ID=$(ec2metadata --instance-id) # We execute it with at to not have it as an orphan process # GH Runners kill all remain processes - echo "sleep 10; aws ec2 terminate-instances --instance-ids $INSTANCE_ID" | at now + echo "sleep 10; aws ec2 terminate-instances --instance-ids $INSTANCE_ID" | at now || \ + aws ec2 terminate-instances --instance-ids "$INSTANCE_ID" # workaround for complete out of space exit 0 } diff --git a/tests/ci/workflow_approve_rerun_lambda/app.py b/tests/ci/workflow_approve_rerun_lambda/app.py index a4a5a013c36..decf6ce0393 100644 --- a/tests/ci/workflow_approve_rerun_lambda/app.py +++ b/tests/ci/workflow_approve_rerun_lambda/app.py @@ -123,6 +123,7 @@ TRUSTED_CONTRIBUTORS = { "tonickkozlov", # Cloudflare "tylerhannan", # ClickHouse Employee "myrrc", # Mike Kot, DoubleCloud + "thevar1able", # ClickHouse Employee ] } diff --git a/tests/clickhouse-test b/tests/clickhouse-test index e47ba8e10ba..68bca93c55c 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -252,7 +252,7 @@ def get_processlist_with_stacktraces(args): -- NOTE: view() here to do JOIN on shards, instead of initiator FROM clusterAllReplicas('test_cluster_database_replicated', view( SELECT - groupArray((s.thread_id, arrayStringConcat(arrayMap( + arrayStringConcat(groupArray('Thread ID ' || toString(s.thread_id) || '\n' || arrayStringConcat(arrayMap( x -> concat(addressToLine(x), '::', demangle(addressToSymbol(x))), s.trace), '\n') AS stacktrace )) AS stacktraces, @@ -273,7 +273,7 @@ def get_processlist_with_stacktraces(args): args, """ SELECT - groupArray((s.thread_id, arrayStringConcat(arrayMap( + arrayStringConcat(groupArray('Thread ID ' || toString(s.thread_id) || '\n' || arrayStringConcat(arrayMap( x -> concat(addressToLine(x), '::', demangle(addressToSymbol(x))), s.trace), '\n') AS stacktrace )) AS stacktraces, @@ -450,7 +450,7 @@ class FailureReason(enum.Enum): REPLICATED_DB = "replicated-database" S3_STORAGE = "s3-storage" BUILD = "not running for current build" - BACKWARD_INCOMPATIBLE = "test is backward incompatible" + NO_UPGRADE_CHECK = "not running for upgrade check" NO_PARALLEL_REPLICAS = "smth in not supported with parallel replicas" # UNKNOWN reasons @@ -773,35 +773,6 @@ class TestCase: else "" ) - # Check if test contains tag "no-backward-compatibility-check" and we should skip it - def check_backward_incompatible_tag(self) -> bool: - for tag in self.tags: - if tag.startswith("no-backward-compatibility-check"): - split = tag.split(":") - - # If version is not specified in tag, always skip this test. - if len(split) == 1: - return True - version_from_tag = split[1] - - # Check if extracted string from tag is a real ClickHouse version, if not - always skip test. - if re.match(VERSION_PATTERN, version_from_tag) is None: - return True - - server_version = str( - clickhouse_execute(args, "SELECT version()").decode() - ) - # If server version is less or equal from the version specified in tag, we should skip this test. - version_from_tag_split = list(map(int, version_from_tag.split("."))) - server_version_split = list(map(int, server_version.split("."))) - if ( - server_version_split[: len(version_from_tag_split)] - <= version_from_tag_split - ): - return True - - return False - # should skip test, should increment skipped_total, skip reason def should_skip_test(self, suite) -> Optional[FailureReason]: tags = self.tags @@ -852,10 +823,10 @@ class TestCase: elif tags and ("no-replicated-database" in tags) and args.replicated_database: return FailureReason.REPLICATED_DB - elif ( - args.backward_compatibility_check and self.check_backward_incompatible_tag() - ): - return FailureReason.BACKWARD_INCOMPATIBLE + # TODO: remove checking "no-upgrade-check" after 23.1 + elif args.upgrade_check and ( + "no-upgrade-check" in tags or "no-upgrade-check" in tags): + return FailureReason.NO_UPGRADE_CHECK elif tags and ("no-s3-storage" in tags) and args.s3_storage: return FailureReason.S3_STORAGE @@ -2175,6 +2146,7 @@ def main(args): print(json.dumps(processlist, indent=4)) print(get_transactions_list(args)) + print_stacktraces() exit_code.value = 1 else: print(colored("\nNo queries hung.", args, "green", attrs=["bold"])) @@ -2460,9 +2432,9 @@ def parse_args(): ) group.add_argument( - "--backward-compatibility-check", + "--upgrade-check", action="store_true", - help="Run tests for further backward compatibility testing by ignoring all" + help="Run tests for further server upgrade testing by ignoring all" "drop queries in tests for collecting data from new version of server", ) parser.add_argument( @@ -2613,7 +2585,7 @@ if __name__ == "__main__": else: args.client_database = "default" - if args.backward_compatibility_check: + if args.upgrade_check: args.client += " --fake-drop" if args.client_option or args.secure: diff --git a/tests/config/config.d/keeper_port.xml b/tests/config/config.d/keeper_port.xml index cffd325e968..2066dedfa56 100644 --- a/tests/config/config.d/keeper_port.xml +++ b/tests/config/config.d/keeper_port.xml @@ -9,7 +9,8 @@ 10000 100000 10000 - false + + true 240000 1000000000000000 diff --git a/tests/config/users.d/access_management.xml b/tests/config/users.d/access_management.xml index 8f4d82805be..f7963cdb7f2 100644 --- a/tests/config/users.d/access_management.xml +++ b/tests/config/users.d/access_management.xml @@ -3,6 +3,7 @@ 1 1 + 1 diff --git a/tests/config/users.d/prefetch_settings.xml b/tests/config/users.d/prefetch_settings.xml index 33ac0a4eb01..d2b6d8a9386 100644 --- a/tests/config/users.d/prefetch_settings.xml +++ b/tests/config/users.d/prefetch_settings.xml @@ -1,7 +1,7 @@ - 1 + 0 0 1Gi diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index 879eb43ba7b..e0470f6ee17 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -3264,7 +3264,7 @@ class ClickHouseInstance: sleep_time=0.5, check_callback=lambda x: True, ): - logging.debug(f"Executing query {sql} on {self.name}") + # logging.debug(f"Executing query {sql} on {self.name}") result = None for i in range(retry_count): try: @@ -3283,7 +3283,7 @@ class ClickHouseInstance: return result time.sleep(sleep_time) except Exception as ex: - logging.debug("Retry {} got exception {}".format(i + 1, ex)) + # logging.debug("Retry {} got exception {}".format(i + 1, ex)) time.sleep(sleep_time) if result is not None: @@ -4405,6 +4405,17 @@ class ClickHouseInstance: objects = objects + self.get_s3_data_objects(path) return objects + def create_format_schema(self, file_name, content): + self.exec_in_container( + [ + "bash", + "-c", + "echo '{}' > {}".format( + content, "/var/lib/clickhouse/format_schemas/" + file_name + ), + ] + ) + class ClickHouseKiller(object): def __init__(self, clickhouse_node): diff --git a/tests/integration/test_access_control_on_cluster/configs/users.d/users.xml b/tests/integration/test_access_control_on_cluster/configs/users.d/users.xml index fb5e2028d6e..8556e73c82f 100644 --- a/tests/integration/test_access_control_on_cluster/configs/users.d/users.xml +++ b/tests/integration/test_access_control_on_cluster/configs/users.d/users.xml @@ -5,6 +5,7 @@ default default 1 + 1 diff --git a/tests/integration/test_backup_restore_new/test.py b/tests/integration/test_backup_restore_new/test.py index a903821071b..3f67fe8e5f7 100644 --- a/tests/integration/test_backup_restore_new/test.py +++ b/tests/integration/test_backup_restore_new/test.py @@ -648,24 +648,15 @@ def test_async_backups_to_same_destination(interface): "", ) - ids_succeeded = ( - instance.query( - f"SELECT id FROM system.backups WHERE id IN {ids_for_query} AND status == 'BACKUP_CREATED'" - ) - .rstrip("\n") - .split("\n") - ) + ids_succeeded = instance.query( + f"SELECT id FROM system.backups WHERE id IN {ids_for_query} AND status == 'BACKUP_CREATED'" + ).splitlines() - ids_failed = ( - instance.query( - f"SELECT id FROM system.backups WHERE id IN {ids_for_query} AND status == 'BACKUP_FAILED'" - ) - .rstrip("\n") - .split("\n") - ) + ids_failed = instance.query( + f"SELECT id FROM system.backups WHERE id IN {ids_for_query} AND status == 'BACKUP_FAILED'" + ).splitlines() assert len(ids_succeeded) == 1 - assert len(ids_failed) <= 1 assert set(ids_succeeded + ids_failed) == set(ids) # Check that the first backup is all right. diff --git a/tests/integration/test_backup_restore_on_cluster/test_disallow_concurrency.py b/tests/integration/test_backup_restore_on_cluster/test_disallow_concurrency.py index 43e7682ec1d..43c8adda65a 100644 --- a/tests/integration/test_backup_restore_on_cluster/test_disallow_concurrency.py +++ b/tests/integration/test_backup_restore_on_cluster/test_disallow_concurrency.py @@ -82,8 +82,12 @@ def drop_after_test(): try: yield finally: - node0.query("DROP TABLE IF EXISTS tbl ON CLUSTER 'cluster' NO DELAY") - node0.query("DROP DATABASE IF EXISTS mydb ON CLUSTER 'cluster' NO DELAY") + node0.query( + "DROP TABLE IF EXISTS tbl ON CLUSTER 'cluster' NO DELAY", + settings={ + "distributed_ddl_task_timeout": 360, + }, + ) backup_id_counter = 0 @@ -138,7 +142,12 @@ def test_concurrent_backups_on_same_node(): # This restore part is added to confirm creating an internal backup & restore work # even when a concurrent backup is stopped - nodes[0].query(f"DROP TABLE tbl ON CLUSTER 'cluster' NO DELAY") + nodes[0].query( + f"DROP TABLE tbl ON CLUSTER 'cluster' NO DELAY", + settings={ + "distributed_ddl_task_timeout": 360, + }, + ) nodes[0].query(f"RESTORE TABLE tbl ON CLUSTER 'cluster' FROM {backup_name}") nodes[0].query("SYSTEM SYNC REPLICA ON CLUSTER 'cluster' tbl") @@ -158,9 +167,14 @@ def test_concurrent_backups_on_different_nodes(): f"SELECT status FROM system.backups WHERE status == 'CREATING_BACKUP' AND id = '{id}'", "CREATING_BACKUP", ) - assert "Concurrent backups not supported" in nodes[2].query_and_get_error( + assert "Concurrent backups not supported" in nodes[0].query_and_get_error( f"BACKUP TABLE tbl ON CLUSTER 'cluster' TO {backup_name}" ) + assert_eq_with_retry( + nodes[1], + f"SELECT status FROM system.backups WHERE status == 'BACKUP_CREATED' AND id = '{id}'", + "BACKUP_CREATED", + ) def test_concurrent_restores_on_same_node(): @@ -185,11 +199,20 @@ def test_concurrent_restores_on_same_node(): "BACKUP_CREATED", ) - nodes[0].query(f"DROP TABLE tbl ON CLUSTER 'cluster' NO DELAY") - nodes[0].query(f"RESTORE TABLE tbl ON CLUSTER 'cluster' FROM {backup_name} ASYNC") + nodes[0].query( + f"DROP TABLE tbl ON CLUSTER 'cluster' NO DELAY", + settings={ + "distributed_ddl_task_timeout": 360, + }, + ) + restore_id = ( + nodes[0] + .query(f"RESTORE TABLE tbl ON CLUSTER 'cluster' FROM {backup_name} ASYNC") + .split("\t")[0] + ) assert_eq_with_retry( nodes[0], - f"SELECT status FROM system.backups WHERE status == 'RESTORING'", + f"SELECT status FROM system.backups WHERE status == 'RESTORING' AND id == '{restore_id}'", "RESTORING", ) assert "Concurrent restores not supported" in nodes[0].query_and_get_error( @@ -219,8 +242,17 @@ def test_concurrent_restores_on_different_node(): "BACKUP_CREATED", ) - nodes[0].query(f"DROP TABLE tbl ON CLUSTER 'cluster' NO DELAY") - nodes[0].query(f"RESTORE TABLE tbl ON CLUSTER 'cluster' FROM {backup_name} ASYNC") + nodes[0].query( + f"DROP TABLE tbl ON CLUSTER 'cluster' NO DELAY", + settings={ + "distributed_ddl_task_timeout": 360, + }, + ) + restore_id = ( + nodes[0] + .query(f"RESTORE TABLE tbl ON CLUSTER 'cluster' FROM {backup_name} ASYNC") + .split("\t")[0] + ) assert_eq_with_retry( nodes[0], f"SELECT status FROM system.backups WHERE status == 'RESTORING'", @@ -229,3 +261,9 @@ def test_concurrent_restores_on_different_node(): assert "Concurrent restores not supported" in nodes[1].query_and_get_error( f"RESTORE TABLE tbl ON CLUSTER 'cluster' FROM {backup_name}" ) + + assert_eq_with_retry( + nodes[0], + f"SELECT status FROM system.backups WHERE status == 'RESTORED' AND id == '{restore_id}'", + "RESTORED", + ) diff --git a/tests/integration/test_backup_with_other_granularity/test.py b/tests/integration/test_backup_with_other_granularity/test.py index d30c45c3691..2a82fc71951 100644 --- a/tests/integration/test_backup_with_other_granularity/test.py +++ b/tests/integration/test_backup_with_other_granularity/test.py @@ -54,7 +54,8 @@ def test_backup_from_old_version(started_cluster): node1.query("ALTER TABLE source_table FREEZE PARTITION tuple();") - node1.restart_with_latest_version(fix_metadata=True) + # We don't want to wait old outdated version to finish properly, just terminate it + node1.restart_with_latest_version(fix_metadata=True, signal=9) node1.query( "CREATE TABLE dest_table (A Int64, B String, Y String) ENGINE = ReplicatedMergeTree('/test/dest_table1', '1') ORDER BY tuple()" @@ -107,7 +108,8 @@ def test_backup_from_old_version_setting(started_cluster): node2.query("ALTER TABLE source_table FREEZE PARTITION tuple();") - node2.restart_with_latest_version(fix_metadata=True) + # We don't want to wait old outdated version to finish properly, just terminate it + node2.restart_with_latest_version(fix_metadata=True, signal=9) node2.query( "CREATE TABLE dest_table (A Int64, B String, Y String) ENGINE = ReplicatedMergeTree('/test/dest_table2', '1') ORDER BY tuple() SETTINGS enable_mixed_granularity_parts = 1" @@ -163,7 +165,10 @@ def test_backup_from_old_version_config(started_cluster): "1", ) - node3.restart_with_latest_version(callback_onstop=callback, fix_metadata=True) + # We don't want to wait old outdated version to finish properly, just terminate it + node3.restart_with_latest_version( + callback_onstop=callback, fix_metadata=True, signal=9 + ) node3.query( "CREATE TABLE dest_table (A Int64, B String, Y String) ENGINE = ReplicatedMergeTree('/test/dest_table3', '1') ORDER BY tuple() SETTINGS enable_mixed_granularity_parts = 1" diff --git a/tests/integration/test_cluster_copier/test.py b/tests/integration/test_cluster_copier/test.py index 0aadcadc064..b261f7e3a39 100644 --- a/tests/integration/test_cluster_copier/test.py +++ b/tests/integration/test_cluster_copier/test.py @@ -565,13 +565,20 @@ def test_copy_with_recovering(started_cluster, use_sample_offset): str(COPYING_FAIL_PROBABILITY), "--experimental-use-sample-offset", "1", + "--max-table-tries", + "10", ], ) else: execute_task( started_cluster, Task1(started_cluster), - ["--copy-fault-probability", str(COPYING_FAIL_PROBABILITY)], + [ + "--copy-fault-probability", + str(COPYING_FAIL_PROBABILITY), + "--max-table-tries", + "10", + ], ) @@ -606,7 +613,12 @@ def test_copy_month_to_week_partition_with_recovering(started_cluster): execute_task( started_cluster, Task2(started_cluster, "test2"), - ["--copy-fault-probability", str(COPYING_FAIL_PROBABILITY)], + [ + "--copy-fault-probability", + str(COPYING_FAIL_PROBABILITY), + "--max-table-tries", + "10", + ], ) diff --git a/tests/integration/test_concurrent_queries_restriction_by_query_kind/test.py b/tests/integration/test_concurrent_queries_restriction_by_query_kind/test.py index 777c35f8b50..830090a1c0d 100644 --- a/tests/integration/test_concurrent_queries_restriction_by_query_kind/test.py +++ b/tests/integration/test_concurrent_queries_restriction_by_query_kind/test.py @@ -9,8 +9,16 @@ cluster = ClickHouseCluster(__file__) node_insert = cluster.add_instance( "node_insert", main_configs=["configs/concurrent_insert_restriction.xml"] ) -node_select = cluster.add_instance( - "node_select", main_configs=["configs/concurrent_select_restriction.xml"] +node_select1 = cluster.add_instance( + "node_select1", main_configs=["configs/concurrent_select_restriction.xml"] +) + +node_select2 = cluster.add_instance( + "node_select2", main_configs=["configs/concurrent_select_restriction.xml"] +) + +node_select3 = cluster.add_instance( + "node_select3", main_configs=["configs/concurrent_select_restriction.xml"] ) @@ -18,7 +26,13 @@ node_select = cluster.add_instance( def started_cluster(): try: cluster.start() - node_select.query( + node_select1.query( + "create table test_concurrent_insert (x UInt64) ENGINE = MergeTree() order by tuple()" + ) + node_select2.query( + "create table test_concurrent_insert (x UInt64) ENGINE = MergeTree() order by tuple()" + ) + node_select3.query( "create table test_concurrent_insert (x UInt64) ENGINE = MergeTree() order by tuple()" ) node_insert.query( @@ -79,7 +93,7 @@ def common_pattern(node, query_kind, restricted_sql, normal_sql, limit, wait_tim def test_select(started_cluster): common_pattern( - node_select, + node_select1, "select", "select sleep(3)", "insert into test_concurrent_insert values (0)", @@ -89,7 +103,7 @@ def test_select(started_cluster): # subquery is not counted execute_with_background( - node_select, + node_select2, "select sleep(3)", "insert into test_concurrent_insert select sleep(3)", 2, @@ -98,7 +112,7 @@ def test_select(started_cluster): # intersect and except are counted common_pattern( - node_select, + node_select3, "select", "select sleep(1) INTERSECT select sleep(1) EXCEPT select sleep(1)", "insert into test_concurrent_insert values (0)", diff --git a/tests/integration/test_config_xml_full/test.py b/tests/integration/test_config_xml_full/test.py index ada3dc3f027..5f0679e5ef8 100644 --- a/tests/integration/test_config_xml_full/test.py +++ b/tests/integration/test_config_xml_full/test.py @@ -58,5 +58,19 @@ def test_xml_full_conf(): == "64999\n" ) + assert ( + node.query( + "select value from system.server_settings where name = 'max_connections'" + ) + == "4096\n" + ) + + assert ( + node.query( + "select changed from system.server_settings where name = 'max_connections'" + ) + == "1\n" + ) + finally: cluster.shutdown() diff --git a/tests/integration/test_config_xml_yaml_mix/test.py b/tests/integration/test_config_xml_yaml_mix/test.py index 373c42b2dea..9896d700ac1 100644 --- a/tests/integration/test_config_xml_yaml_mix/test.py +++ b/tests/integration/test_config_xml_yaml_mix/test.py @@ -64,6 +64,12 @@ def test_extra_yaml_mix(): ) == "64999\n" ) + assert ( + node.query( + "select value from system.server_settings where name = 'mark_cache_size'" + ) + == "8956\n" + ) finally: cluster.shutdown() diff --git a/tests/integration/test_create_query_constraints/configs/users.xml b/tests/integration/test_create_query_constraints/configs/users.xml index fb5e2028d6e..8556e73c82f 100644 --- a/tests/integration/test_create_query_constraints/configs/users.xml +++ b/tests/integration/test_create_query_constraints/configs/users.xml @@ -5,6 +5,7 @@ default default 1 + 1 diff --git a/tests/integration/test_disk_configuration/test.py b/tests/integration/test_disk_configuration/test.py index 60d75e4dac1..34f8bea219f 100644 --- a/tests/integration/test_disk_configuration/test.py +++ b/tests/integration/test_disk_configuration/test.py @@ -262,7 +262,7 @@ def test_merge_tree_custom_disk_setting(start_cluster): ) expected = """ - SETTINGS disk = disk(type = s3, endpoint = \\'http://minio1:9001/root/data2/\\', access_key_id = \\'minio\\', secret_access_key = \\'minio123\\'), index_granularity = 8192 + SETTINGS disk = disk(type = s3, endpoint = \\'[HIDDEN]\\', access_key_id = \\'[HIDDEN]\\', secret_access_key = \\'[HIDDEN]\\'), index_granularity = 8192 """ assert expected.strip() in node1.query(f"SHOW CREATE TABLE {TABLE_NAME}_4").strip() diff --git a/tests/integration/test_global_overcommit_tracker/configs/users.xml b/tests/integration/test_global_overcommit_tracker/configs/users.xml index fb5e2028d6e..8556e73c82f 100644 --- a/tests/integration/test_global_overcommit_tracker/configs/users.xml +++ b/tests/integration/test_global_overcommit_tracker/configs/users.xml @@ -5,6 +5,7 @@ default default 1 + 1 diff --git a/tests/integration/test_grant_and_revoke/configs/users.d/users.xml b/tests/integration/test_grant_and_revoke/configs/users.d/users.xml index fb5e2028d6e..8556e73c82f 100644 --- a/tests/integration/test_grant_and_revoke/configs/users.d/users.xml +++ b/tests/integration/test_grant_and_revoke/configs/users.d/users.xml @@ -5,6 +5,7 @@ default default 1 + 1 diff --git a/tests/queries/0_stateless/02666_progress_when_no_rows_from_prewhere.reference b/tests/integration/test_kafka_bad_messages/__init__.py similarity index 100% rename from tests/queries/0_stateless/02666_progress_when_no_rows_from_prewhere.reference rename to tests/integration/test_kafka_bad_messages/__init__.py diff --git a/tests/integration/test_kafka_bad_messages/configs/kafka.xml b/tests/integration/test_kafka_bad_messages/configs/kafka.xml new file mode 100644 index 00000000000..f05f81f59b7 --- /dev/null +++ b/tests/integration/test_kafka_bad_messages/configs/kafka.xml @@ -0,0 +1,20 @@ + + + earliest + + cgrp,consumer,topic,protocol + + + + + 300 + + 6000 + + diff --git a/tests/integration/test_kafka_bad_messages/test.py b/tests/integration/test_kafka_bad_messages/test.py new file mode 100644 index 00000000000..da3cf36c853 --- /dev/null +++ b/tests/integration/test_kafka_bad_messages/test.py @@ -0,0 +1,285 @@ +import time +import logging +import pytest + +from helpers.cluster import ClickHouseCluster +from kafka import KafkaAdminClient, KafkaProducer, KafkaConsumer, BrokerConnection +from kafka.admin import NewTopic + +cluster = ClickHouseCluster(__file__) +instance = cluster.add_instance( + "instance", + main_configs=["configs/kafka.xml"], + with_kafka=True, +) + + +def kafka_create_topic( + admin_client, + topic_name, + num_partitions=1, + replication_factor=1, + max_retries=50, + config=None, +): + logging.debug( + f"Kafka create topic={topic_name}, num_partitions={num_partitions}, replication_factor={replication_factor}" + ) + topics_list = [ + NewTopic( + name=topic_name, + num_partitions=num_partitions, + replication_factor=replication_factor, + topic_configs=config, + ) + ] + retries = 0 + while True: + try: + admin_client.create_topics(new_topics=topics_list, validate_only=False) + logging.debug("Admin client succeed") + return + except Exception as e: + retries += 1 + time.sleep(0.5) + if retries < max_retries: + logging.warning(f"Failed to create topic {e}") + else: + raise + + +def kafka_delete_topic(admin_client, topic, max_retries=50): + result = admin_client.delete_topics([topic]) + for topic, e in result.topic_error_codes: + if e == 0: + logging.debug(f"Topic {topic} deleted") + else: + logging.error(f"Failed to delete topic {topic}: {e}") + + retries = 0 + while True: + topics_listed = admin_client.list_topics() + logging.debug(f"TOPICS LISTED: {topics_listed}") + if topic not in topics_listed: + return + else: + retries += 1 + time.sleep(0.5) + if retries > max_retries: + raise Exception(f"Failed to delete topics {topic}, {result}") + + +def get_kafka_producer(port, serializer, retries): + errors = [] + for _ in range(retries): + try: + producer = KafkaProducer( + bootstrap_servers="localhost:{}".format(port), + value_serializer=serializer, + ) + logging.debug("Kafka Connection establised: localhost:{}".format(port)) + return producer + except Exception as e: + errors += [str(e)] + time.sleep(1) + + raise Exception("Connection not establised, {}".format(errors)) + + +def producer_serializer(x): + return x.encode() if isinstance(x, str) else x + + +def kafka_produce(kafka_cluster, topic, messages, timestamp=None, retries=15): + logging.debug( + "kafka_produce server:{}:{} topic:{}".format( + "localhost", kafka_cluster.kafka_port, topic + ) + ) + producer = get_kafka_producer( + kafka_cluster.kafka_port, producer_serializer, retries + ) + for message in messages: + producer.send(topic=topic, value=message, timestamp_ms=timestamp) + producer.flush() + + +@pytest.fixture(scope="module") +def kafka_cluster(): + try: + cluster.start() + kafka_id = instance.cluster.kafka_docker_id + print(("kafka_id is {}".format(kafka_id))) + yield cluster + finally: + cluster.shutdown() + + +def test_bad_messages_parsing(kafka_cluster): + admin_client = KafkaAdminClient( + bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port) + ) + + for format_name in [ + "TSV", + "TSKV", + "CSV", + "Values", + "JSON", + "JSONEachRow", + "JSONCompactEachRow", + "JSONObjectEachRow", + "Avro", + "RowBinary", + "JSONColumns", + "JSONColumnsWithMetadata", + "Native", + "Arrow", + "ArrowStream", + "Parquet", + "ORC", + "JSONCompactColumns", + "BSONEachRow", + "MySQLDump", + ]: + print(format_name) + + kafka_create_topic(admin_client, f"{format_name}_err") + + instance.query( + f""" + DROP TABLE IF EXISTS view; + DROP TABLE IF EXISTS kafka; + + CREATE TABLE kafka (key UInt64, value UInt64) + ENGINE = Kafka + SETTINGS kafka_broker_list = 'kafka1:19092', + kafka_topic_list = '{format_name}_err', + kafka_group_name = '{format_name}', + kafka_format = '{format_name}', + kafka_handle_error_mode='stream'; + + CREATE MATERIALIZED VIEW view Engine=Log AS + SELECT _error FROM kafka WHERE length(_error) != 0 ; + """ + ) + + messages = ["qwertyuiop", "asdfghjkl", "zxcvbnm"] + kafka_produce(kafka_cluster, f"{format_name}_err", messages) + + attempt = 0 + rows = 0 + while attempt < 500: + rows = int(instance.query("SELECT count() FROM view")) + if rows == len(messages): + break + attempt += 1 + + assert rows == len(messages) + + kafka_delete_topic(admin_client, f"{format_name}_err") + + protobuf_schema = """ +syntax = "proto3"; + +message Message { + uint64 key = 1; + uint64 value = 2; +}; +""" + + instance.create_format_schema("schema_test_errors.proto", protobuf_schema) + + for format_name in ["Protobuf", "ProtobufSingle", "ProtobufList"]: + instance.query( + f""" + DROP TABLE IF EXISTS view; + DROP TABLE IF EXISTS kafka; + + CREATE TABLE kafka (key UInt64, value UInt64) + ENGINE = Kafka + SETTINGS kafka_broker_list = 'kafka1:19092', + kafka_topic_list = '{format_name}_err', + kafka_group_name = '{format_name}', + kafka_format = '{format_name}', + kafka_handle_error_mode='stream', + kafka_schema='schema_test_errors:Message'; + + CREATE MATERIALIZED VIEW view Engine=Log AS + SELECT _error FROM kafka WHERE length(_error) != 0 ; + """ + ) + + print(format_name) + + kafka_create_topic(admin_client, f"{format_name}_err") + + messages = ["qwertyuiop", "poiuytrewq", "zxcvbnm"] + kafka_produce(kafka_cluster, f"{format_name}_err", messages) + + attempt = 0 + rows = 0 + while attempt < 500: + rows = int(instance.query("SELECT count() FROM view")) + if rows == len(messages): + break + attempt += 1 + + assert rows == len(messages) + + kafka_delete_topic(admin_client, f"{format_name}_err") + + capn_proto_schema = """ +@0xd9dd7b35452d1c4f; + +struct Message +{ + key @0 : UInt64; + value @1 : UInt64; +} +""" + + instance.create_format_schema("schema_test_errors.capnp", capn_proto_schema) + instance.query( + f""" + DROP TABLE IF EXISTS view; + DROP TABLE IF EXISTS kafka; + + CREATE TABLE kafka (key UInt64, value UInt64) + ENGINE = Kafka + SETTINGS kafka_broker_list = 'kafka1:19092', + kafka_topic_list = 'CapnProto_err', + kafka_group_name = 'CapnProto', + kafka_format = 'CapnProto', + kafka_handle_error_mode='stream', + kafka_schema='schema_test_errors:Message'; + + CREATE MATERIALIZED VIEW view Engine=Log AS + SELECT _error FROM kafka WHERE length(_error) != 0 ; + """ + ) + + print("CapnProto") + + kafka_create_topic(admin_client, "CapnProto_err") + + messages = ["qwertyuiop", "asdfghjkl", "zxcvbnm"] + kafka_produce(kafka_cluster, "CapnProto_err", messages) + + attempt = 0 + rows = 0 + while attempt < 500: + rows = int(instance.query("SELECT count() FROM view")) + if rows == len(messages): + break + attempt += 1 + + assert rows == len(messages) + + kafka_delete_topic(admin_client, "CapnProto_err") + + +if __name__ == "__main__": + cluster.start() + input("Cluster created, press any key to destroy...") + cluster.shutdown() diff --git a/tests/integration/test_merge_tree_azure_blob_storage/test.py b/tests/integration/test_merge_tree_azure_blob_storage/test.py index 6c1733fc72f..d1b7b64b56f 100644 --- a/tests/integration/test_merge_tree_azure_blob_storage/test.py +++ b/tests/integration/test_merge_tree_azure_blob_storage/test.py @@ -96,8 +96,11 @@ def test_read_after_cache_is_wiped(cluster): node = cluster.instances[NODE_NAME] create_table(node, TABLE_NAME) - values = "('2021-11-13',3,'hello'),('2021-11-14',4,'heyo')" - + # We insert into different partitions, so do it separately to avoid + # test flakyness when retrying the query in case of retriable exception. + values = "('2021-11-13',3,'hello')" + azure_query(node, f"INSERT INTO {TABLE_NAME} VALUES {values}") + values = "('2021-11-14',4,'heyo')" azure_query(node, f"INSERT INTO {TABLE_NAME} VALUES {values}") # Wipe cache @@ -108,8 +111,10 @@ def test_read_after_cache_is_wiped(cluster): # After cache is populated again, only .bin files should be accessed from Blob Storage. assert ( - azure_query(node, f"SELECT * FROM {TABLE_NAME} order by dt, id FORMAT Values") - == values + azure_query( + node, f"SELECT * FROM {TABLE_NAME} order by dt, id FORMAT Values" + ).strip() + == "('2021-11-13',3,'hello'),('2021-11-14',4,'heyo')" ) diff --git a/tests/integration/test_move_partition_to_disk_on_cluster/__init__.py b/tests/integration/test_move_partition_to_disk_on_cluster/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_move_partition_to_disk_on_cluster/configs/config.d/cluster.xml b/tests/integration/test_move_partition_to_disk_on_cluster/configs/config.d/cluster.xml new file mode 100644 index 00000000000..2316050b629 --- /dev/null +++ b/tests/integration/test_move_partition_to_disk_on_cluster/configs/config.d/cluster.xml @@ -0,0 +1,17 @@ + + + + + true + + node1 + 9000 + + + node2 + 9000 + + + + + \ No newline at end of file diff --git a/tests/integration/test_move_partition_to_disk_on_cluster/configs/config.d/storage_configuration.xml b/tests/integration/test_move_partition_to_disk_on_cluster/configs/config.d/storage_configuration.xml new file mode 100644 index 00000000000..3289186c175 --- /dev/null +++ b/tests/integration/test_move_partition_to_disk_on_cluster/configs/config.d/storage_configuration.xml @@ -0,0 +1,28 @@ + + + + + + /jbod1/ + + + /external/ + + + + + + +
+ jbod1 +
+ + external + +
+
+
+ +
+ +
diff --git a/tests/integration/test_move_partition_to_disk_on_cluster/test.py b/tests/integration/test_move_partition_to_disk_on_cluster/test.py new file mode 100644 index 00000000000..90753fc8ce3 --- /dev/null +++ b/tests/integration/test_move_partition_to_disk_on_cluster/test.py @@ -0,0 +1,92 @@ +import pytest +from helpers.client import QueryRuntimeException +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) + +node1 = cluster.add_instance( + "node1", + main_configs=[ + "configs/config.d/storage_configuration.xml", + "configs/config.d/cluster.xml", + ], + with_zookeeper=True, + stay_alive=True, + tmpfs=["/jbod1:size=10M", "/external:size=10M"], + macros={"shard": 0, "replica": 1}, +) + +node2 = cluster.add_instance( + "node2", + main_configs=[ + "configs/config.d/storage_configuration.xml", + "configs/config.d/cluster.xml", + ], + with_zookeeper=True, + stay_alive=True, + tmpfs=["/jbod1:size=10M", "/external:size=10M"], + macros={"shard": 0, "replica": 2}, +) + + +@pytest.fixture(scope="module") +def start_cluster(): + try: + cluster.start() + yield cluster + + finally: + cluster.shutdown() + + +def test_move_partition_to_disk_on_cluster(start_cluster): + for node in [node1, node2]: + node.query( + sql="CREATE TABLE test_local_table" + "(x UInt64) " + "ENGINE=ReplicatedMergeTree('/clickhouse/tables/test_local_table', '{replica}') " + "ORDER BY tuple()" + "SETTINGS storage_policy = 'jbod_with_external';", + ) + + node1.query("INSERT INTO test_local_table VALUES (0)") + node1.query("SYSTEM SYNC REPLICA test_local_table", timeout=30) + + try: + node1.query( + sql="ALTER TABLE test_local_table ON CLUSTER 'test_cluster' MOVE PARTITION tuple() TO DISK 'jbod1';", + ) + except QueryRuntimeException: + pass + + for node in [node1, node2]: + assert ( + node.query( + "SELECT partition_id, disk_name FROM system.parts WHERE table = 'test_local_table' FORMAT Values" + ) + == "('all','jbod1')" + ) + + node1.query( + sql="ALTER TABLE test_local_table ON CLUSTER 'test_cluster' MOVE PARTITION tuple() TO DISK 'external';", + ) + + for node in [node1, node2]: + assert ( + node.query( + "SELECT partition_id, disk_name FROM system.parts WHERE table = 'test_local_table' FORMAT Values" + ) + == "('all','external')" + ) + + node1.query( + sql="ALTER TABLE test_local_table ON CLUSTER 'test_cluster' MOVE PARTITION tuple() TO VOLUME 'main';", + ) + + for node in [node1, node2]: + assert ( + node.query( + "SELECT partition_id, disk_name FROM system.parts WHERE table = 'test_local_table' FORMAT Values" + ) + == "('all','jbod1')" + ) diff --git a/tests/integration/test_named_collections/configs/users.d/users.xml b/tests/integration/test_named_collections/configs/users.d/users.xml index fb5e2028d6e..8556e73c82f 100644 --- a/tests/integration/test_named_collections/configs/users.d/users.xml +++ b/tests/integration/test_named_collections/configs/users.d/users.xml @@ -5,6 +5,7 @@ default default 1 + 1 diff --git a/tests/integration/test_named_collections/test.py b/tests/integration/test_named_collections/test.py index 3b102f1aa70..ba62880e9de 100644 --- a/tests/integration/test_named_collections/test.py +++ b/tests/integration/test_named_collections/test.py @@ -102,7 +102,32 @@ def test_access(cluster): ["bash", "-c", f"cat /etc/clickhouse-server/users.d/users.xml"] ) node.restart_clickhouse() - assert int(node.query("select count() from system.named_collections")) > 0 + assert ( + node.query("select collection['key1'] from system.named_collections").strip() + == "value1" + ) + replace_in_users_config( + node, "show_named_collections_secrets>1", "show_named_collections_secrets>0" + ) + assert "show_named_collections_secrets>0" in node.exec_in_container( + ["bash", "-c", f"cat /etc/clickhouse-server/users.d/users.xml"] + ) + node.restart_clickhouse() + assert ( + node.query("select collection['key1'] from system.named_collections").strip() + == "[HIDDEN]" + ) + replace_in_users_config( + node, "show_named_collections_secrets>0", "show_named_collections_secrets>1" + ) + assert "show_named_collections_secrets>1" in node.exec_in_container( + ["bash", "-c", f"cat /etc/clickhouse-server/users.d/users.xml"] + ) + node.restart_clickhouse() + assert ( + node.query("select collection['key1'] from system.named_collections").strip() + == "value1" + ) def test_config_reload(cluster): diff --git a/tests/integration/test_overcommit_tracker/configs/users.d/users.xml b/tests/integration/test_overcommit_tracker/configs/users.d/users.xml index fb5e2028d6e..8556e73c82f 100644 --- a/tests/integration/test_overcommit_tracker/configs/users.d/users.xml +++ b/tests/integration/test_overcommit_tracker/configs/users.d/users.xml @@ -5,6 +5,7 @@ default default 1 + 1 diff --git a/tests/integration/test_postgresql_replica_database_engine_2/test.py b/tests/integration/test_postgresql_replica_database_engine_2/test.py index 68c7cb96b71..33796336550 100644 --- a/tests/integration/test_postgresql_replica_database_engine_2/test.py +++ b/tests/integration/test_postgresql_replica_database_engine_2/test.py @@ -628,6 +628,27 @@ def test_table_override(started_cluster): assert_eq_with_retry(instance, query, expected) +def test_materialized_view(started_cluster): + cursor = pg_manager.get_db_cursor() + cursor.execute(f"DROP TABLE IF EXISTS test_table") + cursor.execute(f"CREATE TABLE test_table (key integer PRIMARY KEY, value integer)") + cursor.execute(f"INSERT INTO test_table SELECT 1, 2") + instance.query("DROP DATABASE IF EXISTS test_database") + instance.query( + "CREATE DATABASE test_database ENGINE = MaterializedPostgreSQL(postgres1) SETTINGS materialized_postgresql_tables_list='test_table'" + ) + check_tables_are_synchronized(instance, "test_table") + instance.query("DROP TABLE IF EXISTS mv") + instance.query( + "CREATE MATERIALIZED VIEW mv ENGINE=MergeTree ORDER BY tuple() POPULATE AS SELECT * FROM test_database.test_table" + ) + assert "1\t2" == instance.query("SELECT * FROM mv").strip() + cursor.execute(f"INSERT INTO test_table SELECT 3, 4") + check_tables_are_synchronized(instance, "test_table") + assert "1\t2\n3\t4" == instance.query("SELECT * FROM mv ORDER BY 1, 2").strip() + pg_manager.drop_materialized_db() + + if __name__ == "__main__": cluster.start() input("Cluster created, press any key to destroy...") diff --git a/tests/integration/test_replicated_database/test.py b/tests/integration/test_replicated_database/test.py index 8160a6b47a7..ead9a762b1b 100644 --- a/tests/integration/test_replicated_database/test.py +++ b/tests/integration/test_replicated_database/test.py @@ -232,11 +232,7 @@ def test_delete_from_table(started_cluster, engine): dummy_node.query("INSERT INTO TABLE {} VALUES(1, 'bbbb');".format(name)) dummy_node.query("INSERT INTO TABLE {} VALUES(2, 'bbbb');".format(name)) - main_node.query( - "SET allow_experimental_lightweight_delete=1; DELETE FROM {} WHERE id=2;".format( - name - ) - ) + main_node.query("DELETE FROM {} WHERE id=2;".format(name)) expected = "1\taaaa\n1\tbbbb" diff --git a/tests/integration/test_replicated_merge_tree_s3_zero_copy/configs/config.d/storage_conf.xml b/tests/integration/test_replicated_merge_tree_s3_zero_copy/configs/config.d/storage_conf.xml index bd59694f65a..15239041478 100644 --- a/tests/integration/test_replicated_merge_tree_s3_zero_copy/configs/config.d/storage_conf.xml +++ b/tests/integration/test_replicated_merge_tree_s3_zero_copy/configs/config.d/storage_conf.xml @@ -53,4 +53,6 @@ 0 + 3 + 0 diff --git a/tests/integration/test_replicated_merge_tree_s3_zero_copy/test.py b/tests/integration/test_replicated_merge_tree_s3_zero_copy/test.py index 60a1b9b9746..f0bc12e3125 100644 --- a/tests/integration/test_replicated_merge_tree_s3_zero_copy/test.py +++ b/tests/integration/test_replicated_merge_tree_s3_zero_copy/test.py @@ -1,9 +1,11 @@ import logging import random import string +import time import pytest from helpers.cluster import ClickHouseCluster +from helpers.network import PartitionManager logging.getLogger().setLevel(logging.INFO) logging.getLogger().addHandler(logging.StreamHandler()) @@ -127,3 +129,75 @@ def test_insert_select_replicated(cluster, min_rows_for_wide_part, files_per_par assert len( list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True)) ) == (3 * FILES_OVERHEAD) + (files_per_part * 3) + + +def test_drop_table(cluster): + node = list(cluster.instances.values())[0] + node2 = list(cluster.instances.values())[1] + node.query( + "create table test_drop_table (n int) engine=ReplicatedMergeTree('/test/drop_table', '1') order by n partition by n % 99 settings storage_policy='s3'" + ) + node2.query( + "create table test_drop_table (n int) engine=ReplicatedMergeTree('/test/drop_table', '2') order by n partition by n % 99 settings storage_policy='s3'" + ) + node.query("insert into test_drop_table select * from numbers(1000)") + node2.query("system sync replica test_drop_table") + + with PartitionManager() as pm: + pm._add_rule( + { + "probability": 0.01, + "destination": node.ip_address, + "source_port": 2181, + "action": "REJECT --reject-with tcp-reset", + } + ) + pm._add_rule( + { + "probability": 0.01, + "source": node.ip_address, + "destination_port": 2181, + "action": "REJECT --reject-with tcp-reset", + } + ) + + # Will drop in background with retries + node.query("drop table test_drop_table") + + # It should not be possible to create a replica with the same path until the previous one is completely dropped + for i in range(0, 100): + node.query_and_get_answer_with_error( + "create table if not exists test_drop_table (n int) " + "engine=ReplicatedMergeTree('/test/drop_table', '1') " + "order by n partition by n % 99 settings storage_policy='s3'" + ) + time.sleep(0.2) + + # Wait for drop to actually finish + node.wait_for_log_line( + "Removing metadata /var/lib/clickhouse/metadata_dropped/default.test_drop_table", + timeout=60, + look_behind_lines=1000000, + ) + + # It could leave some leftovers, remove them + replicas = node.query_with_retry( + "select name from system.zookeeper where path='/test/drop_table/replicas'" + ) + if "1" in replicas and "test_drop_table" not in node.query("show tables"): + node2.query("system drop replica '1' from table test_drop_table") + + # Just in case table was not created due to connection errors + node.query( + "create table if not exists test_drop_table (n int) engine=ReplicatedMergeTree('/test/drop_table', '1') " + "order by n partition by n % 99 settings storage_policy='s3'" + ) + node.query_with_retry( + "system sync replica test_drop_table", + settings={"receive_timeout": 10}, + retry_count=5, + ) + node2.query("drop table test_drop_table") + assert "1000\t499500\n" == node.query( + "select count(n), sum(n) from test_drop_table" + ) diff --git a/tests/integration/test_s3_zero_copy_ttl/test_ttl_move_memory_usage.py b/tests/integration/test_s3_zero_copy_ttl/test_ttl_move_memory_usage.py index 29177b6a67b..5fbe426074f 100644 --- a/tests/integration/test_s3_zero_copy_ttl/test_ttl_move_memory_usage.py +++ b/tests/integration/test_s3_zero_copy_ttl/test_ttl_move_memory_usage.py @@ -2,6 +2,12 @@ import time import pytest + +# FIXME This test is too flaky +# https://github.com/ClickHouse/ClickHouse/issues/45887 + +pytestmark = pytest.mark.skip + from helpers.cluster import ClickHouseCluster diff --git a/tests/integration/test_settings_constraints_distributed/configs/users.d/users.xml b/tests/integration/test_settings_constraints_distributed/configs/users.d/users.xml index fb5e2028d6e..8556e73c82f 100644 --- a/tests/integration/test_settings_constraints_distributed/configs/users.d/users.xml +++ b/tests/integration/test_settings_constraints_distributed/configs/users.d/users.xml @@ -5,6 +5,7 @@ default default 1 + 1 diff --git a/tests/integration/test_storage_kafka/configs/kafka.xml b/tests/integration/test_storage_kafka/configs/kafka.xml index f05f81f59b7..062c98a2ac7 100644 --- a/tests/integration/test_storage_kafka/configs/kafka.xml +++ b/tests/integration/test_storage_kafka/configs/kafka.xml @@ -9,12 +9,14 @@ XXX: for now this messages will appears in stderr. --> cgrp,consumer,topic,protocol + + + consumer_hang + + 300 + + 6000 +
- - - 300 - - 6000 - diff --git a/tests/integration/test_storage_kerberized_kafka/configs/kafka.xml b/tests/integration/test_storage_kerberized_kafka/configs/kafka.xml index 68eea654f1c..61558181fe6 100644 --- a/tests/integration/test_storage_kerberized_kafka/configs/kafka.xml +++ b/tests/integration/test_storage_kerberized_kafka/configs/kafka.xml @@ -15,12 +15,13 @@ kafkauser/instance@TEST.CLICKHOUSE.TECH security false + + consumer_hang + + 300 + + 6000 +
- - - 300 - - 6000 - diff --git a/tests/integration/test_zero_copy_fetch/__init__.py b/tests/integration/test_zero_copy_fetch/__init__.py new file mode 100644 index 00000000000..e5a0d9b4834 --- /dev/null +++ b/tests/integration/test_zero_copy_fetch/__init__.py @@ -0,0 +1 @@ +#!/usr/bin/env python3 diff --git a/tests/integration/test_zero_copy_fetch/configs/storage_conf.xml b/tests/integration/test_zero_copy_fetch/configs/storage_conf.xml new file mode 100644 index 00000000000..257ae0a355c --- /dev/null +++ b/tests/integration/test_zero_copy_fetch/configs/storage_conf.xml @@ -0,0 +1,30 @@ + + + + + s3 + http://minio1:9001/root/data/ + minio + minio123 + + + + + + + default + +
+ s3 + False + True +
+
+
+
+
+ + + true + +
diff --git a/tests/integration/test_zero_copy_fetch/test.py b/tests/integration/test_zero_copy_fetch/test.py new file mode 100644 index 00000000000..f13eac5e9d1 --- /dev/null +++ b/tests/integration/test_zero_copy_fetch/test.py @@ -0,0 +1,104 @@ +#!/usr/bin/env python3 + +import logging +import random +import string +import time + +import pytest +from helpers.cluster import ClickHouseCluster + + +cluster = ClickHouseCluster(__file__) + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + + cluster.add_instance( + "node1", + main_configs=["configs/storage_conf.xml"], + with_minio=True, + with_zookeeper=True, + ) + cluster.add_instance( + "node2", + main_configs=["configs/storage_conf.xml"], + with_minio=True, + with_zookeeper=True, + ) + cluster.start() + + yield cluster + finally: + cluster.shutdown() + + +def test_fetch_correct_volume(started_cluster): + node1 = cluster.instances["node1"] + node2 = cluster.instances["node2"] + + node1.query( + """ +CREATE TABLE test1 (EventDate Date, CounterID UInt32) +ENGINE = ReplicatedMergeTree('/clickhouse-tables/test1', 'r1') +PARTITION BY toMonday(EventDate) +ORDER BY (CounterID, EventDate) +SETTINGS index_granularity = 8192, storage_policy = 's3'""" + ) + + node1.query( + "INSERT INTO test1 SELECT toDate('2023-01-01') + toIntervalDay(number), number + 1000 from system.numbers limit 20" + ) + + def get_part_to_disk(query_result): + part_to_disk = {} + for row in query_result.strip().split("\n"): + print(row) + disk, part = row.split("\t") + part_to_disk[part] = disk + return part_to_disk + + part_to_disk = get_part_to_disk( + node1.query( + "SELECT disk_name, name FROM system.parts where table = 'test1' and active" + ) + ) + for disk in part_to_disk.values(): + assert disk == "default" + + node1.query("ALTER TABLE test1 MOVE PARTITION '2022-12-26' TO DISK 's3'") + node1.query("ALTER TABLE test1 MOVE PARTITION '2023-01-02' TO DISK 's3'") + node1.query("ALTER TABLE test1 MOVE PARTITION '2023-01-09' TO DISK 's3'") + + part_to_disk = get_part_to_disk( + node1.query( + "SELECT disk_name, name FROM system.parts where table = 'test1' and active" + ) + ) + assert part_to_disk["20221226_0_0_0"] == "s3" + assert part_to_disk["20230102_0_0_0"] == "s3" + assert part_to_disk["20230109_0_0_0"] == "s3" + assert part_to_disk["20230116_0_0_0"] == "default" + + node2.query( + """ +CREATE TABLE test1 (EventDate Date, CounterID UInt32) +ENGINE = ReplicatedMergeTree('/clickhouse-tables/test1', 'r2') +PARTITION BY toMonday(EventDate) +ORDER BY (CounterID, EventDate) +SETTINGS index_granularity = 8192, storage_policy = 's3'""" + ) + + node2.query("SYSTEM SYNC REPLICA test1") + + part_to_disk = get_part_to_disk( + node2.query( + "SELECT disk_name, name FROM system.parts where table = 'test1' and active" + ) + ) + assert part_to_disk["20221226_0_0_0"] == "s3" + assert part_to_disk["20230102_0_0_0"] == "s3" + assert part_to_disk["20230109_0_0_0"] == "s3" + assert part_to_disk["20230116_0_0_0"] == "default" diff --git a/tests/performance/aggregation_by_partitions.xml b/tests/performance/aggregation_by_partitions.xml index dbeaf3ce6aa..403f94d8ac8 100644 --- a/tests/performance/aggregation_by_partitions.xml +++ b/tests/performance/aggregation_by_partitions.xml @@ -1,9 +1,8 @@ - - - - + 1 + 1 + 256 0 256 diff --git a/tests/performance/lightweight_delete.xml b/tests/performance/lightweight_delete.xml index b29684f177f..e9f42140534 100644 --- a/tests/performance/lightweight_delete.xml +++ b/tests/performance/lightweight_delete.xml @@ -19,7 +19,6 @@ 1 1 - 1 diff --git a/tests/queries/0_stateless/00061_merge_tree_alter.sql b/tests/queries/0_stateless/00061_merge_tree_alter.sql index ee5694518d9..2e46b1e16d6 100644 --- a/tests/queries/0_stateless/00061_merge_tree_alter.sql +++ b/tests/queries/0_stateless/00061_merge_tree_alter.sql @@ -1,4 +1,4 @@ --- Tags: no-backward-compatibility-check +-- Tags: no-upgrade-check DROP TABLE IF EXISTS alter_00061; set allow_deprecated_syntax_for_merge_tree=1; diff --git a/tests/queries/0_stateless/00187_like_regexp_prefix.reference b/tests/queries/0_stateless/00187_like_regexp_prefix.reference index d00491fd7e5..6ed281c757a 100644 --- a/tests/queries/0_stateless/00187_like_regexp_prefix.reference +++ b/tests/queries/0_stateless/00187_like_regexp_prefix.reference @@ -1 +1,2 @@ 1 +1 diff --git a/tests/queries/0_stateless/00187_like_regexp_prefix.sql b/tests/queries/0_stateless/00187_like_regexp_prefix.sql index 7fec21b68ad..430b86e8b39 100644 --- a/tests/queries/0_stateless/00187_like_regexp_prefix.sql +++ b/tests/queries/0_stateless/00187_like_regexp_prefix.sql @@ -1 +1,3 @@ SELECT materialize('prepre_f') LIKE '%pre_f%'; + +SELECT materialize('prepre_f') LIKE '%%%pre_f%'; diff --git a/tests/queries/0_stateless/00398_url_functions.reference b/tests/queries/0_stateless/00398_url_functions.reference index 39d740e55cd..6913b3f4fb7 100644 --- a/tests/queries/0_stateless/00398_url_functions.reference +++ b/tests/queries/0_stateless/00398_url_functions.reference @@ -20,7 +20,10 @@ www.example.com 127.0.0.1 www.example.com www.example.com -www.example.com + + + +www.example4.com example.com example.com example.com @@ -38,7 +41,10 @@ www.example.com 127.0.0.1 www.example.com www.example.com -www.example.com + +example2.com +example3.com +www.example4.com example.com example.com example.com @@ -53,6 +59,10 @@ paul:zozo@example.ru www.example.com www.example.com example.com +foo:foo%@foo.com +foo:foo%41bar@foo.com +foo:foo%41%42bar@foo.com +foo:foo%41bar@foo ====DOMAIN==== com diff --git a/tests/queries/0_stateless/00398_url_functions.sql.j2 b/tests/queries/0_stateless/00398_url_functions.sql.j2 index dd7da2ce6ad..95ac536f18b 100644 --- a/tests/queries/0_stateless/00398_url_functions.sql.j2 +++ b/tests/queries/0_stateless/00398_url_functions.sql.j2 @@ -23,7 +23,10 @@ SELECT domain{{ suffix }}('http://www.example.com?q=4') AS Host; SELECT domain{{ suffix }}('http://127.0.0.1:443/') AS Host; SELECT domain{{ suffix }}('//www.example.com') AS Host; SELECT domain{{ suffix }}('//paul@www.example.com') AS Host; -SELECT domain{{ suffix }}('www.example.com') as Host; +SELECT domain{{ suffix }}('//foo:bar%41%40@e-%41-example1.com') AS Host; +SELECT domain{{ suffix }}('//foo:bar%41%40@example2.com') AS Host; +SELECT domain{{ suffix }}('//foo%41%40:bar@example3.com') AS Host; +SELECT domain{{ suffix }}('www.example4.com') as Host; SELECT domain{{ suffix }}('example.com') as Host; SELECT domainWithoutWWW{{ suffix }}('//paul@www.example.com') AS Host; SELECT domainWithoutWWW{{ suffix }}('http://paul@www.example.com:80/') AS Host; @@ -41,6 +44,10 @@ SELECT netloc('svn+ssh://paul:zozo@example.ru/?q=hello%20world') AS Netloc; SELECT netloc('//www.example.com') AS Netloc; SELECT netloc('www.example.com') as Netloc; SELECT netloc('example.com') as Netloc; +SELECT netloc('http://foo:foo%@foo.com') as Netloc; +SELECT netloc('http://foo:foo%41bar@foo.com') as Netloc; +SELECT netloc('http://foo:foo%41%42bar@foo.com') as Netloc; +SELECT netloc('http://foo:foo%41bar@foo%41.com') as Netloc; SELECT '====DOMAIN===='; SELECT topLevelDomain('http://paul@www.example.com:80/') AS Domain; diff --git a/tests/queries/0_stateless/00626_replace_partition_from_table_zookeeper.sh b/tests/queries/0_stateless/00626_replace_partition_from_table_zookeeper.sh index 1aa02864815..8b07d9abe35 100755 --- a/tests/queries/0_stateless/00626_replace_partition_from_table_zookeeper.sh +++ b/tests/queries/0_stateless/00626_replace_partition_from_table_zookeeper.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: zookeeper, no-parallel, no-s3-storage, no-backward-compatibility-check +# Tags: zookeeper, no-parallel, no-s3-storage, no-upgrade-check # Because REPLACE PARTITION does not forces immediate removal of replaced data parts from local filesystem # (it tries to do it as quick as possible, but it still performed in separate thread asynchronously) diff --git a/tests/queries/0_stateless/00732_quorum_insert_lost_part_and_alive_part_zookeeper_long.sql b/tests/queries/0_stateless/00732_quorum_insert_lost_part_and_alive_part_zookeeper_long.sql index e8d923389e5..9c02ac795ed 100644 --- a/tests/queries/0_stateless/00732_quorum_insert_lost_part_and_alive_part_zookeeper_long.sql +++ b/tests/queries/0_stateless/00732_quorum_insert_lost_part_and_alive_part_zookeeper_long.sql @@ -1,4 +1,4 @@ --- Tags: long, zookeeper, no-replicated-database, no-backward-compatibility-check +-- Tags: long, zookeeper, no-replicated-database, no-upgrade-check -- Tag no-replicated-database: Fails due to additional replicas or shards SET send_logs_level = 'fatal'; diff --git a/tests/queries/0_stateless/00849_multiple_comma_join_2.reference b/tests/queries/0_stateless/00849_multiple_comma_join_2.reference index 2652a82ab54..16f228a5569 100644 --- a/tests/queries/0_stateless/00849_multiple_comma_join_2.reference +++ b/tests/queries/0_stateless/00849_multiple_comma_join_2.reference @@ -1,205 +1,33 @@ -SELECT a -FROM t1 -CROSS JOIN t2 -SELECT a -FROM t1 -ALL INNER JOIN t2 ON a = t2.a -WHERE a = t2.a -SELECT a -FROM t1 -ALL INNER JOIN t2 ON b = t2.b -WHERE b = t2.b -SELECT `--t1.a` AS `t1.a` -FROM -( - SELECT - a AS `--t1.a`, - t2.a AS `--t2.a` - FROM t1 - ALL INNER JOIN t2 ON `--t1.a` = `--t2.a` -) AS `--.s` -ALL INNER JOIN t3 ON `--t1.a` = a -WHERE (`--t1.a` = `--t2.a`) AND (`--t1.a` = a) -SELECT `--t1.a` AS `t1.a` -FROM -( - SELECT - b AS `--t1.b`, - a AS `--t1.a`, - t2.b AS `--t2.b` - FROM t1 - ALL INNER JOIN t2 ON `--t1.b` = `--t2.b` -) AS `--.s` -ALL INNER JOIN t3 ON `--t1.b` = b -WHERE (`--t1.b` = `--t2.b`) AND (`--t1.b` = b) -SELECT `--t1.a` AS `t1.a` -FROM -( - SELECT - `--t1.a`, - `--t2.a`, - a AS `--t3.a` - FROM - ( - SELECT - a AS `--t1.a`, - t2.a AS `--t2.a` - FROM t1 - ALL INNER JOIN t2 ON `--t1.a` = `--t2.a` - ) AS `--.s` - ALL INNER JOIN t3 ON `--t1.a` = `--t3.a` -) AS `--.s` -ALL INNER JOIN t4 ON `--t1.a` = a -WHERE (`--t1.a` = `--t2.a`) AND (`--t1.a` = `--t3.a`) AND (`--t1.a` = a) -SELECT `--t1.a` AS `t1.a` -FROM -( - SELECT - `--t1.b`, - `--t1.a`, - `--t2.b`, - b AS `--t3.b` - FROM - ( - SELECT - b AS `--t1.b`, - a AS `--t1.a`, - t2.b AS `--t2.b` - FROM t1 - ALL INNER JOIN t2 ON `--t1.b` = `--t2.b` - ) AS `--.s` - ALL INNER JOIN t3 ON `--t1.b` = `--t3.b` -) AS `--.s` -ALL INNER JOIN t4 ON `--t1.b` = b -WHERE (`--t1.b` = `--t2.b`) AND (`--t1.b` = `--t3.b`) AND (`--t1.b` = b) -SELECT `--t1.a` AS `t1.a` -FROM -( - SELECT - `--t1.a`, - `--t2.a`, - a AS `--t3.a` - FROM - ( - SELECT - a AS `--t1.a`, - t2.a AS `--t2.a` - FROM t1 - ALL INNER JOIN t2 ON `--t2.a` = `--t1.a` - ) AS `--.s` - ALL INNER JOIN t3 ON `--t2.a` = `--t3.a` -) AS `--.s` -ALL INNER JOIN t4 ON `--t2.a` = a -WHERE (`--t2.a` = `--t1.a`) AND (`--t2.a` = `--t3.a`) AND (`--t2.a` = a) -SELECT `--t1.a` AS `t1.a` -FROM -( - SELECT - `--t1.a`, - `--t2.a`, - a AS `--t3.a` - FROM - ( - SELECT - a AS `--t1.a`, - t2.a AS `--t2.a` - FROM t1 - CROSS JOIN t2 - ) AS `--.s` - ALL INNER JOIN t3 ON (`--t3.a` = `--t1.a`) AND (`--t3.a` = `--t2.a`) -) AS `--.s` -ALL INNER JOIN t4 ON `--t3.a` = a -WHERE (`--t3.a` = `--t1.a`) AND (`--t3.a` = `--t2.a`) AND (`--t3.a` = a) -SELECT `--t1.a` AS `t1.a` -FROM -( - SELECT - `--t1.a`, - `--t2.a`, - a AS `--t3.a` - FROM - ( - SELECT - a AS `--t1.a`, - t2.a AS `--t2.a` - FROM t1 - CROSS JOIN t2 - ) AS `--.s` - CROSS JOIN t3 -) AS `--.s` -ALL INNER JOIN t4 ON (a = `--t1.a`) AND (a = `--t2.a`) AND (a = `--t3.a`) -WHERE (a = `--t1.a`) AND (a = `--t2.a`) AND (a = `--t3.a`) -SELECT `--t1.a` AS `t1.a` -FROM -( - SELECT - `--t1.a`, - `--t2.a`, - a AS `--t3.a` - FROM - ( - SELECT - a AS `--t1.a`, - t2.a AS `--t2.a` - FROM t1 - ALL INNER JOIN t2 ON `--t1.a` = `--t2.a` - ) AS `--.s` - ALL INNER JOIN t3 ON `--t2.a` = `--t3.a` -) AS `--.s` -ALL INNER JOIN t4 ON `--t3.a` = a -WHERE (`--t1.a` = `--t2.a`) AND (`--t2.a` = `--t3.a`) AND (`--t3.a` = a) -SELECT `--t1.a` AS `t1.a` -FROM -( - SELECT `--t1.a` - FROM - ( - SELECT a AS `--t1.a` - FROM t1 - CROSS JOIN t2 - ) AS `--.s` - CROSS JOIN t3 -) AS `--.s` -CROSS JOIN t4 -SELECT `--t1.a` AS `t1.a` -FROM -( - SELECT `--t1.a` - FROM - ( - SELECT a AS `--t1.a` - FROM t1 - CROSS JOIN t2 - ) AS `--.s` - CROSS JOIN t3 -) AS `--.s` -CROSS JOIN t4 -SELECT `--t1.a` AS `t1.a` -FROM -( - SELECT a AS `--t1.a` - FROM t1 - CROSS JOIN t2 -) AS `--.s` -CROSS JOIN t3 -SELECT `--t1.a` AS `t1.a` -FROM -( - SELECT a AS `--t1.a` - FROM t1 - ALL INNER JOIN t2 USING (a) -) AS `--.s` -CROSS JOIN t3 -SELECT `--t1.a` AS `t1.a` -FROM -( - SELECT - a AS `--t1.a`, - t2.a AS `--t2.a` - FROM t1 - ALL INNER JOIN t2 ON `--t1.a` = `--t2.a` -) AS `--.s` -CROSS JOIN t3 +0 1 +0 1 +0 2 +0 2 +0 3 +0 3 +0 3 +1 2 +2 1 +0 3 +3 0 +3 0 +2 0 +1 1 +1 1 +0 1 +0 1 +0 2 +0 2 +0 3 +0 3 +0 3 +1 2 +2 1 +0 3 +3 0 +3 0 +2 0 +1 1 +1 1 SELECT * FROM t1, t2 1 1 1 1 1 1 1 \N diff --git a/tests/queries/0_stateless/00849_multiple_comma_join_2.sql b/tests/queries/0_stateless/00849_multiple_comma_join_2.sql index eb803450ff7..db8b27c4d4d 100644 --- a/tests/queries/0_stateless/00849_multiple_comma_join_2.sql +++ b/tests/queries/0_stateless/00849_multiple_comma_join_2.sql @@ -12,31 +12,107 @@ CREATE TABLE t2 (a UInt32, b Nullable(Int32)) ENGINE = Memory; CREATE TABLE t3 (a UInt32, b Nullable(Int32)) ENGINE = Memory; CREATE TABLE t4 (a UInt32, b Nullable(Int32)) ENGINE = Memory; -EXPLAIN SYNTAX SELECT t1.a FROM t1, t2; -EXPLAIN SYNTAX SELECT t1.a FROM t1, t2 WHERE t1.a = t2.a; -EXPLAIN SYNTAX SELECT t1.a FROM t1, t2 WHERE t1.b = t2.b; -EXPLAIN SYNTAX SELECT t1.a FROM t1, t2, t3 WHERE t1.a = t2.a AND t1.a = t3.a; -EXPLAIN SYNTAX SELECT t1.a FROM t1, t2, t3 WHERE t1.b = t2.b AND t1.b = t3.b; -EXPLAIN SYNTAX SELECT t1.a FROM t1, t2, t3, t4 WHERE t1.a = t2.a AND t1.a = t3.a AND t1.a = t4.a; -EXPLAIN SYNTAX SELECT t1.a FROM t1, t2, t3, t4 WHERE t1.b = t2.b AND t1.b = t3.b AND t1.b = t4.b; +SET allow_experimental_analyzer = 0; -EXPLAIN SYNTAX SELECT t1.a FROM t1, t2, t3, t4 WHERE t2.a = t1.a AND t2.a = t3.a AND t2.a = t4.a; -EXPLAIN SYNTAX SELECT t1.a FROM t1, t2, t3, t4 WHERE t3.a = t1.a AND t3.a = t2.a AND t3.a = t4.a; -EXPLAIN SYNTAX SELECT t1.a FROM t1, t2, t3, t4 WHERE t4.a = t1.a AND t4.a = t2.a AND t4.a = t3.a; -EXPLAIN SYNTAX SELECT t1.a FROM t1, t2, t3, t4 WHERE t1.a = t2.a AND t2.a = t3.a AND t3.a = t4.a; +--- EXPLAIN SYNTAX (old AST based optimization) +SELECT countIf(explain like '%COMMA%' OR explain like '%CROSS%'), countIf(explain like '%INNER%') FROM ( + EXPLAIN SYNTAX SELECT t1.a FROM t1, t2 WHERE t1.a = t2.a); -EXPLAIN SYNTAX SELECT t1.a FROM t1, t2, t3, t4; -EXPLAIN SYNTAX SELECT t1.a FROM t1 CROSS JOIN t2 CROSS JOIN t3 CROSS JOIN t4; +SELECT countIf(explain like '%COMMA%' OR explain like '%CROSS%'), countIf(explain like '%INNER%') FROM ( + EXPLAIN SYNTAX SELECT t1.a FROM t1, t2 WHERE t1.b = t2.b); -EXPLAIN SYNTAX SELECT t1.a FROM t1, t2 CROSS JOIN t3; -EXPLAIN SYNTAX SELECT t1.a FROM t1 JOIN t2 USING a CROSS JOIN t3; -EXPLAIN SYNTAX SELECT t1.a FROM t1 JOIN t2 ON t1.a = t2.a CROSS JOIN t3; +SELECT countIf(explain like '%COMMA%' OR explain like '%CROSS%'), countIf(explain like '%INNER%') FROM ( + EXPLAIN SYNTAX SELECT t1.a FROM t1, t2, t3 WHERE t1.a = t2.a AND t1.a = t3.a); + +SELECT countIf(explain like '%COMMA%' OR explain like '%CROSS%'), countIf(explain like '%INNER%') FROM ( + EXPLAIN SYNTAX SELECT t1.a FROM t1, t2, t3 WHERE t1.b = t2.b AND t1.b = t3.b); + +SELECT countIf(explain like '%COMMA%' OR explain like '%CROSS%'), countIf(explain like '%INNER%') FROM ( + EXPLAIN SYNTAX SELECT t1.a FROM t1, t2, t3, t4 WHERE t1.a = t2.a AND t1.a = t3.a AND t1.a = t4.a); + +SELECT countIf(explain like '%COMMA%' OR explain like '%CROSS%'), countIf(explain like '%INNER%') FROM ( + EXPLAIN SYNTAX SELECT t1.a FROM t1, t2, t3, t4 WHERE t1.b = t2.b AND t1.b = t3.b AND t1.b = t4.b); + +SELECT countIf(explain like '%COMMA%' OR explain like '%CROSS%'), countIf(explain like '%INNER%') FROM ( + EXPLAIN SYNTAX SELECT t1.a FROM t1, t2, t3, t4 WHERE t2.a = t1.a AND t2.a = t3.a AND t2.a = t4.a); + +SELECT countIf(explain like '%COMMA%' OR explain like '%CROSS%'), countIf(explain like '%INNER%') FROM ( + EXPLAIN SYNTAX SELECT t1.a FROM t1, t2, t3, t4 WHERE t3.a = t1.a AND t3.a = t2.a AND t3.a = t4.a); + +SELECT countIf(explain like '%COMMA%' OR explain like '%CROSS%'), countIf(explain like '%INNER%') FROM ( + EXPLAIN SYNTAX SELECT t1.a FROM t1, t2, t3, t4 WHERE t4.a = t1.a AND t4.a = t2.a AND t4.a = t3.a); + +SELECT countIf(explain like '%COMMA%' OR explain like '%CROSS%'), countIf(explain like '%INNER%') FROM ( + EXPLAIN SYNTAX SELECT t1.a FROM t1, t2, t3, t4 WHERE t1.a = t2.a AND t2.a = t3.a AND t3.a = t4.a); + +SELECT countIf(explain like '%COMMA%' OR explain like '%CROSS%'), countIf(explain like '%INNER%') FROM ( + EXPLAIN SYNTAX SELECT t1.a FROM t1, t2, t3, t4); + +SELECT countIf(explain like '%COMMA%' OR explain like '%CROSS%'), countIf(explain like '%INNER%') FROM ( + EXPLAIN SYNTAX SELECT t1.a FROM t1 CROSS JOIN t2 CROSS JOIN t3 CROSS JOIN t4); + +SELECT countIf(explain like '%COMMA%' OR explain like '%CROSS%'), countIf(explain like '%INNER%') FROM ( + EXPLAIN SYNTAX SELECT t1.a FROM t1, t2 CROSS JOIN t3); + +SELECT countIf(explain like '%COMMA%' OR explain like '%CROSS%'), countIf(explain like '%INNER%') FROM ( + EXPLAIN SYNTAX SELECT t1.a FROM t1 JOIN t2 USING a CROSS JOIN t3); + +SELECT countIf(explain like '%COMMA%' OR explain like '%CROSS%'), countIf(explain like '%INNER%') FROM ( + EXPLAIN SYNTAX SELECT t1.a FROM t1 JOIN t2 ON t1.a = t2.a CROSS JOIN t3); + +--- EXPLAIN QUERY TREE +SELECT countIf(explain like '%COMMA%' OR explain like '%CROSS%'), countIf(explain like '%INNER%') FROM ( + EXPLAIN QUERY TREE SELECT t1.a FROM t1, t2 WHERE t1.a = t2.a); + +SELECT countIf(explain like '%COMMA%' OR explain like '%CROSS%'), countIf(explain like '%INNER%') FROM ( + EXPLAIN QUERY TREE SELECT t1.a FROM t1, t2 WHERE t1.b = t2.b); + +SELECT countIf(explain like '%COMMA%' OR explain like '%CROSS%'), countIf(explain like '%INNER%') FROM ( + EXPLAIN QUERY TREE SELECT t1.a FROM t1, t2, t3 WHERE t1.a = t2.a AND t1.a = t3.a); + +SELECT countIf(explain like '%COMMA%' OR explain like '%CROSS%'), countIf(explain like '%INNER%') FROM ( + EXPLAIN QUERY TREE SELECT t1.a FROM t1, t2, t3 WHERE t1.b = t2.b AND t1.b = t3.b); + +SELECT countIf(explain like '%COMMA%' OR explain like '%CROSS%'), countIf(explain like '%INNER%') FROM ( + EXPLAIN QUERY TREE SELECT t1.a FROM t1, t2, t3, t4 WHERE t1.a = t2.a AND t1.a = t3.a AND t1.a = t4.a); + +SELECT countIf(explain like '%COMMA%' OR explain like '%CROSS%'), countIf(explain like '%INNER%') FROM ( + EXPLAIN QUERY TREE SELECT t1.a FROM t1, t2, t3, t4 WHERE t1.b = t2.b AND t1.b = t3.b AND t1.b = t4.b); + +SELECT countIf(explain like '%COMMA%' OR explain like '%CROSS%'), countIf(explain like '%INNER%') FROM ( + EXPLAIN QUERY TREE SELECT t1.a FROM t1, t2, t3, t4 WHERE t2.a = t1.a AND t2.a = t3.a AND t2.a = t4.a); + +SELECT countIf(explain like '%COMMA%' OR explain like '%CROSS%'), countIf(explain like '%INNER%') FROM ( + EXPLAIN QUERY TREE SELECT t1.a FROM t1, t2, t3, t4 WHERE t3.a = t1.a AND t3.a = t2.a AND t3.a = t4.a); + +SELECT countIf(explain like '%COMMA%' OR explain like '%CROSS%'), countIf(explain like '%INNER%') FROM ( + EXPLAIN QUERY TREE SELECT t1.a FROM t1, t2, t3, t4 WHERE t4.a = t1.a AND t4.a = t2.a AND t4.a = t3.a); + +SELECT countIf(explain like '%COMMA%' OR explain like '%CROSS%'), countIf(explain like '%INNER%') FROM ( + EXPLAIN QUERY TREE SELECT t1.a FROM t1, t2, t3, t4 WHERE t1.a = t2.a AND t2.a = t3.a AND t3.a = t4.a); + +SELECT countIf(explain like '%COMMA%' OR explain like '%CROSS%'), countIf(explain like '%INNER%') FROM ( + EXPLAIN QUERY TREE SELECT t1.a FROM t1, t2, t3, t4); + +SELECT countIf(explain like '%COMMA%' OR explain like '%CROSS%'), countIf(explain like '%INNER%') FROM ( + EXPLAIN QUERY TREE SELECT t1.a FROM t1 CROSS JOIN t2 CROSS JOIN t3 CROSS JOIN t4); + +SELECT countIf(explain like '%COMMA%' OR explain like '%CROSS%'), countIf(explain like '%INNER%') FROM ( + EXPLAIN QUERY TREE SELECT t1.a FROM t1, t2 CROSS JOIN t3); + +SELECT countIf(explain like '%COMMA%' OR explain like '%CROSS%'), countIf(explain like '%INNER%') FROM ( + EXPLAIN QUERY TREE SELECT t1.a FROM t1 JOIN t2 USING a CROSS JOIN t3); + +SELECT countIf(explain like '%COMMA%' OR explain like '%CROSS%'), countIf(explain like '%INNER%') FROM ( + EXPLAIN QUERY TREE SELECT t1.a FROM t1 JOIN t2 ON t1.a = t2.a CROSS JOIN t3); INSERT INTO t1 values (1,1), (2,2), (3,3), (4,4); INSERT INTO t2 values (1,1), (1, Null); INSERT INTO t3 values (1,1), (1, Null); INSERT INTO t4 values (1,1), (1, Null); +SET allow_experimental_analyzer = 1; + SELECT 'SELECT * FROM t1, t2'; SELECT * FROM t1, t2 ORDER BY t1.a, t2.b; diff --git a/tests/queries/0_stateless/00918_json_functions.reference b/tests/queries/0_stateless/00918_json_functions.reference index 7f5d5fabf12..72e0df5bfda 100644 --- a/tests/queries/0_stateless/00918_json_functions.reference +++ b/tests/queries/0_stateless/00918_json_functions.reference @@ -19,6 +19,7 @@ a --JSONType-- Object Array +Bool --JSONExtract-- -100 200 @@ -172,6 +173,7 @@ a --JSONType-- Object Array +Bool --JSONExtract-- -100 200 diff --git a/tests/queries/0_stateless/00918_json_functions.sql b/tests/queries/0_stateless/00918_json_functions.sql index dfae54dfd16..3314a5b762d 100644 --- a/tests/queries/0_stateless/00918_json_functions.sql +++ b/tests/queries/0_stateless/00918_json_functions.sql @@ -28,6 +28,7 @@ SELECT JSONKey('{"a": "hello", "b": [-100, 200.0, 300]}', -2); SELECT '--JSONType--'; SELECT JSONType('{"a": "hello", "b": [-100, 200.0, 300]}'); SELECT JSONType('{"a": "hello", "b": [-100, 200.0, 300]}', 'b'); +SELECT JSONType('{"a": true}', 'a'); SELECT '--JSONExtract--'; SELECT JSONExtractInt('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 1); @@ -196,6 +197,7 @@ SELECT JSONKey('{"a": "hello", "b": [-100, 200.0, 300]}', -2); SELECT '--JSONType--'; SELECT JSONType('{"a": "hello", "b": [-100, 200.0, 300]}'); SELECT JSONType('{"a": "hello", "b": [-100, 200.0, 300]}', 'b'); +SELECT JSONType('{"a": true}', 'a'); SELECT '--JSONExtract--'; SELECT JSONExtractInt('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 1); diff --git a/tests/queries/0_stateless/00942_dataparts_500.sh b/tests/queries/0_stateless/00942_dataparts_500.sh index 7e1a7f15810..a6c3fcd4303 100755 --- a/tests/queries/0_stateless/00942_dataparts_500.sh +++ b/tests/queries/0_stateless/00942_dataparts_500.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-backward-compatibility-check +# Tags: no-upgrade-check # Test fix for issue #5066 CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) diff --git a/tests/queries/0_stateless/00993_system_parts_race_condition_drop_zookeeper.sh b/tests/queries/0_stateless/00993_system_parts_race_condition_drop_zookeeper.sh index 5ccef802c0c..f143c97bdf4 100755 --- a/tests/queries/0_stateless/00993_system_parts_race_condition_drop_zookeeper.sh +++ b/tests/queries/0_stateless/00993_system_parts_race_condition_drop_zookeeper.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: race, zookeeper, no-parallel, no-backward-compatibility-check +# Tags: race, zookeeper, no-parallel, no-upgrade-check CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/01079_parallel_alter_detach_table_zookeeper.sh b/tests/queries/0_stateless/01079_parallel_alter_detach_table_zookeeper.sh index 1f316b4b389..aec27792603 100755 --- a/tests/queries/0_stateless/01079_parallel_alter_detach_table_zookeeper.sh +++ b/tests/queries/0_stateless/01079_parallel_alter_detach_table_zookeeper.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: zookeeper, no-parallel, no-fasttest, no-backward-compatibility-check +# Tags: zookeeper, no-parallel, no-fasttest, no-upgrade-check CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/01111_create_drop_replicated_db_stress.sh b/tests/queries/0_stateless/01111_create_drop_replicated_db_stress.sh index bbe3a5a51c0..8c9efb75e96 100755 --- a/tests/queries/0_stateless/01111_create_drop_replicated_db_stress.sh +++ b/tests/queries/0_stateless/01111_create_drop_replicated_db_stress.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: race, zookeeper, no-backward-compatibility-check +# Tags: race, zookeeper, no-upgrade-check CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/01176_mysql_client_interactive.expect b/tests/queries/0_stateless/01176_mysql_client_interactive.expect index 8d23b3bef60..2bb6ba8726b 100755 --- a/tests/queries/0_stateless/01176_mysql_client_interactive.expect +++ b/tests/queries/0_stateless/01176_mysql_client_interactive.expect @@ -12,9 +12,9 @@ match_max 100000 expect_after { # Do not ignore eof from expect - eof { exp_continue } + -i $any_spawn_id eof { exp_continue } # A default timeout action is to do nothing, change it to fail - timeout { exit 1 } + -i $any_spawn_id timeout { exit 1 } } spawn bash -c "source $basedir/../shell_config.sh ; \$MYSQL_CLIENT_BINARY \$MYSQL_CLIENT_OPT" diff --git a/tests/queries/0_stateless/01179_insert_values_semicolon.expect b/tests/queries/0_stateless/01179_insert_values_semicolon.expect index 35713a90297..cb22fb8265c 100755 --- a/tests/queries/0_stateless/01179_insert_values_semicolon.expect +++ b/tests/queries/0_stateless/01179_insert_values_semicolon.expect @@ -1,5 +1,4 @@ #!/usr/bin/expect -f -# Tags: long set basedir [file dirname $argv0] set basename [file tail $argv0] @@ -11,9 +10,9 @@ set timeout 60 match_max 100000 expect_after { # Do not ignore eof from expect - eof { exp_continue } + -i $any_spawn_id eof { exp_continue } # A default timeout action is to do nothing, change it to fail - timeout { exit 1 } + -i $any_spawn_id timeout { exit 1 } } spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion --history_file=$history_file" @@ -22,7 +21,7 @@ expect ":) " send -- "DROP TABLE IF EXISTS test_01179\r" expect "Ok." -send -- "CREATE TABLE test_01179 (date DateTime) ENGINE=Memory()\r" +send -- "CREATE TABLE test_01179 (date DateTime64(3)) ENGINE=Memory()\r" expect "Ok." send -- "INSERT INTO test_01179 values ('2020-01-01')\r" @@ -31,14 +30,14 @@ expect "Ok." send -- "INSERT INTO test_01179 values ('2020-01-01'); \r" expect "Ok." -send -- "INSERT INTO test_01179 values ('2020-01-01'); (1) \r" +send -- "INSERT INTO test_01179 values ('2020-01-01 0'); (1) \r" expect "Cannot read data after semicolon" send -- "SELECT date, count() FROM test_01179 GROUP BY date FORMAT TSV\r" -expect "2020-01-01 00:00:00\t3" +expect "2020-01-01 00:00:00.000\t2" send -- "DROP TABLE test_01179\r" expect "Ok." -send -- "\4" +send -- "exit\r" expect eof diff --git a/tests/queries/0_stateless/01180_client_syntax_errors.expect b/tests/queries/0_stateless/01180_client_syntax_errors.expect index c1fd0f93510..508ebc3433b 100755 --- a/tests/queries/0_stateless/01180_client_syntax_errors.expect +++ b/tests/queries/0_stateless/01180_client_syntax_errors.expect @@ -10,9 +10,9 @@ set timeout 60 match_max 100000 expect_after { # Do not ignore eof from expect - eof { exp_continue } + -i $any_spawn_id eof { exp_continue } # A default timeout action is to do nothing, change it to fail - timeout { exit 1 } + -i $any_spawn_id timeout { exit 1 } } spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion --history_file=$history_file" @@ -31,6 +31,7 @@ expect "Syntax error: failed at position 93 ('UInt64'):*" send -- "select (1, 2\r" expect "Syntax error: failed at position 8 ('('):" expect "Unmatched parentheses: (" +expect ":) " send -- "\4" expect eof diff --git a/tests/queries/0_stateless/01191_rename_dictionary.sql b/tests/queries/0_stateless/01191_rename_dictionary.sql index ed9bc8af61b..8074e84f0ed 100644 --- a/tests/queries/0_stateless/01191_rename_dictionary.sql +++ b/tests/queries/0_stateless/01191_rename_dictionary.sql @@ -1,4 +1,4 @@ --- Tags: no-parallel, no-backward-compatibility-check +-- Tags: no-parallel, no-upgrade-check DROP DATABASE IF EXISTS test_01191; CREATE DATABASE test_01191 ENGINE=Atomic; diff --git a/tests/queries/0_stateless/01221_system_settings.reference b/tests/queries/0_stateless/01221_system_settings.reference index 41f96bdceb5..399b3778b66 100644 --- a/tests/queries/0_stateless/01221_system_settings.reference +++ b/tests/queries/0_stateless/01221_system_settings.reference @@ -1,4 +1,4 @@ -send_timeout 300 0 Timeout for sending data to network, in seconds. If client needs to sent some data, but it did not able to send any bytes in this interval, exception is thrown. If you set this setting on client, the \'receive_timeout\' for the socket will be also set on the corresponding connection end on the server. \N \N 0 Seconds +send_timeout 300 0 Timeout for sending data to network, in seconds. If client needs to sent some data, but it did not able to send any bytes in this interval, exception is thrown. If you set this setting on client, the \'receive_timeout\' for the socket will be also set on the corresponding connection end on the server. \N \N 0 Seconds 300 storage_policy default 0 Name of storage disk policy \N \N 0 String 1 1 diff --git a/tests/queries/0_stateless/01271_optimize_arithmetic_operations_in_aggr_func_long.reference b/tests/queries/0_stateless/01271_optimize_arithmetic_operations_in_aggr_func_long.reference index b50519b9b3a..ea04f155f24 100644 --- a/tests/queries/0_stateless/01271_optimize_arithmetic_operations_in_aggr_func_long.reference +++ b/tests/queries/0_stateless/01271_optimize_arithmetic_operations_in_aggr_func_long.reference @@ -22,7 +22,7 @@ SELECT min(n) + 1, 1 + min(n), min(n) - 1, - 1 - min(n) + 1 - max(n) FROM ( SELECT number AS n @@ -42,7 +42,7 @@ SELECT max(n) + 1, 1 + max(n), max(n) - 1, - 1 - max(n) + 1 - min(n) FROM ( SELECT number AS n @@ -82,7 +82,7 @@ SELECT min(n) + -1, -1 + min(n), min(n) - -1, - -1 - min(n) + -1 - max(n) FROM ( SELECT number AS n @@ -102,7 +102,7 @@ SELECT max(n) + -1, -1 + max(n), max(n) - -1, - -1 - max(n) + -1 - min(n) FROM ( SELECT number AS n @@ -142,7 +142,7 @@ SELECT min(abs(2)) + 1, min(abs(2) + n), min(n - abs(2)), - 1 - min(abs(2)) + 1 - max(abs(2)) FROM ( SELECT number AS n @@ -162,7 +162,7 @@ SELECT max(abs(2)) + 1, max(abs(2) + n), max(n - abs(2)), - 1 - max(abs(2)) + 1 - min(abs(2)) FROM ( SELECT number AS n @@ -202,7 +202,7 @@ SELECT min(abs(n)) + 1, min(abs(n) + n), min(n - abs(n)), - 1 - min(abs(n)) + 1 - max(abs(n)) FROM ( SELECT number AS n @@ -222,7 +222,7 @@ SELECT max(abs(n)) + 1, max(abs(n) + n), max(n - abs(n)), - 1 - max(abs(n)) + 1 - min(abs(n)) FROM ( SELECT number AS n @@ -262,7 +262,7 @@ SELECT min(n * n) + 1, 1 + min(n * n), min(n * n) - 1, - 1 - min(n * n) + 1 - max(n * n) FROM ( SELECT number AS n @@ -282,7 +282,7 @@ SELECT max(n * n) + 1, 1 + max(n * n), max(n * n) - 1, - 1 - max(n * n) + 1 - min(n * n) FROM ( SELECT number AS n @@ -382,7 +382,7 @@ SELECT (min(n) + -1) + -1, (-1 + min(n)) + -1, (min(n) - -1) + -1, - (-1 - min(n)) + -1 + (-1 - max(n)) + -1 FROM ( SELECT number AS n @@ -402,7 +402,7 @@ SELECT (max(n) + -1) + -1, (-1 + max(n)) + -1, (max(n) - -1) + -1, - (-1 - max(n)) + -1 + (-1 - min(n)) + -1 FROM ( SELECT number AS n @@ -430,7 +430,7 @@ FROM SELECT number AS n FROM numbers(10) ) -SELECT (((min(n) + 1) + (1 + min(n))) + (min(n) - 1)) + (1 - min(n)) +SELECT (((min(n) + 1) + (1 + min(n))) + (min(n) - 1)) + (1 - max(n)) FROM ( SELECT number AS n @@ -442,7 +442,7 @@ FROM SELECT number AS n FROM numbers(10) ) -SELECT (((max(n) + 1) + (1 + max(n))) + (max(n) - 1)) + (1 - max(n)) +SELECT (((max(n) + 1) + (1 + max(n))) + (max(n) - 1)) + (1 - min(n)) FROM ( SELECT number AS n @@ -456,15 +456,15 @@ FROM ) 55 55 35 -35 90 90 22.5 inf -1 1 -1 1 +1 1 -1 -8 0 0 0 0.1111111111111111 -10 10 8 -8 +10 10 8 1 18 18 4.5 inf 35 35 55 -55 -90 -90 -22.5 -inf --1 -1 1 -1 +-1 -1 1 -10 -18 -18 -4.5 -inf -8 8 10 -10 +8 8 10 -1 0 0 -0 -0.1111111111111111 30 65 25 -10 40 90 22.5 5 @@ -474,15 +474,15 @@ FROM 4 18 4.5 0.5 55 90 0 -35 90 285 nan inf -1 0 0 1 +1 0 0 -8 0 0 nan 0.1111111111111111 -10 18 0 -8 +10 18 0 1 18 81 nan inf 295 295 275 -275 570 570 142.5 nan -1 1 -1 1 +1 1 -1 -80 0 0 0 nan -82 82 80 -80 +82 82 80 1 162 162 40.5 nan 65 65 45 -25 100 100 32.5 inf @@ -492,15 +492,15 @@ FROM 19 19 5.5 inf 25 25 45 -65 90 90 22.5 inf --2 -2 0 -2 +-2 -2 0 -11 0 0 0 0.1111111111111111 -7 7 9 -11 +7 7 9 -2 18 18 4.5 inf 110 inf -2 +-7 0.1111111111111111 -20 +29 inf -15444 68.62157087543459 diff --git a/tests/queries/0_stateless/01271_show_privileges.reference b/tests/queries/0_stateless/01271_show_privileges.reference index 58b1cab6e20..c061eb95a65 100644 --- a/tests/queries/0_stateless/01271_show_privileges.reference +++ b/tests/queries/0_stateless/01271_show_privileges.reference @@ -90,6 +90,7 @@ SHOW QUOTAS ['SHOW CREATE QUOTA'] GLOBAL SHOW ACCESS SHOW SETTINGS PROFILES ['SHOW PROFILES','SHOW CREATE SETTINGS PROFILE','SHOW CREATE PROFILE'] GLOBAL SHOW ACCESS SHOW ACCESS [] \N ACCESS MANAGEMENT SHOW NAMED COLLECTIONS ['SHOW NAMED COLLECTIONS'] GLOBAL ACCESS MANAGEMENT +SHOW NAMED COLLECTIONS SECRETS ['SHOW NAMED COLLECTIONS SECRETS'] GLOBAL ACCESS MANAGEMENT ACCESS MANAGEMENT [] \N ALL SYSTEM SHUTDOWN ['SYSTEM KILL','SHUTDOWN'] GLOBAL SYSTEM SYSTEM DROP DNS CACHE ['SYSTEM DROP DNS','DROP DNS CACHE','DROP DNS'] GLOBAL SYSTEM DROP CACHE diff --git a/tests/queries/0_stateless/01293_client_interactive_vertical_multiline.expect b/tests/queries/0_stateless/01293_client_interactive_vertical_multiline.expect index 629698b4565..3bfd454bb1f 100755 --- a/tests/queries/0_stateless/01293_client_interactive_vertical_multiline.expect +++ b/tests/queries/0_stateless/01293_client_interactive_vertical_multiline.expect @@ -11,9 +11,9 @@ match_max 100000 expect_after { # Do not ignore eof from expect - eof { exp_continue } + -i $any_spawn_id eof { exp_continue } # A default timeout action is to do nothing, change it to fail - timeout { exit 1 } + -i $any_spawn_id timeout { exit 1 } } # useful debugging configuration diff --git a/tests/queries/0_stateless/01293_client_interactive_vertical_singleline.expect b/tests/queries/0_stateless/01293_client_interactive_vertical_singleline.expect index 6b11b1eee15..1ded43d3fed 100755 --- a/tests/queries/0_stateless/01293_client_interactive_vertical_singleline.expect +++ b/tests/queries/0_stateless/01293_client_interactive_vertical_singleline.expect @@ -10,9 +10,9 @@ set timeout 60 match_max 100000 expect_after { # Do not ignore eof from expect - eof { exp_continue } + -i $any_spawn_id eof { exp_continue } # A default timeout action is to do nothing, change it to fail - timeout { exit 1 } + -i $any_spawn_id timeout { exit 1 } } spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion --history_file=$history_file" diff --git a/tests/queries/0_stateless/01300_client_save_history_when_terminated_long.expect b/tests/queries/0_stateless/01300_client_save_history_when_terminated_long.expect index de485383024..a593075bb9a 100755 --- a/tests/queries/0_stateless/01300_client_save_history_when_terminated_long.expect +++ b/tests/queries/0_stateless/01300_client_save_history_when_terminated_long.expect @@ -1,5 +1,4 @@ #!/usr/bin/expect -f -# Tags: long set basedir [file dirname $argv0] set basename [file tail $argv0] @@ -11,9 +10,9 @@ set timeout 60 match_max 100000 expect_after { # Do not ignore eof from expect - eof { exp_continue } + -i $any_spawn_id eof { exp_continue } # A default timeout action is to do nothing, change it to fail - timeout { exit 1 } + -i $any_spawn_id timeout { exit 1 } } spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion --history_file=$history_file" diff --git a/tests/queries/0_stateless/01318_long_unsuccessful_mutation_zookeeper.sh b/tests/queries/0_stateless/01318_long_unsuccessful_mutation_zookeeper.sh index f7615974237..f9a2ec8a34c 100755 --- a/tests/queries/0_stateless/01318_long_unsuccessful_mutation_zookeeper.sh +++ b/tests/queries/0_stateless/01318_long_unsuccessful_mutation_zookeeper.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, zookeeper, no-parallel, no-backward-compatibility-check +# Tags: long, zookeeper, no-parallel, no-upgrade-check CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/01370_client_autocomplete_word_break_characters.expect b/tests/queries/0_stateless/01370_client_autocomplete_word_break_characters.expect index 8547be839d4..90e19e077ec 100755 --- a/tests/queries/0_stateless/01370_client_autocomplete_word_break_characters.expect +++ b/tests/queries/0_stateless/01370_client_autocomplete_word_break_characters.expect @@ -10,9 +10,9 @@ set timeout 60 match_max 100000 expect_after { # Do not ignore eof from expect - eof { exp_continue } + -i $any_spawn_id eof { exp_continue } # A default timeout action is to do nothing, change it to fail - timeout { exit 1 } + -i $any_spawn_id timeout { exit 1 } } spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --history_file=$history_file" diff --git a/tests/queries/0_stateless/01378_alter_rename_with_ttl_zookeeper.sql b/tests/queries/0_stateless/01378_alter_rename_with_ttl_zookeeper.sql index 8717c93b468..43c9fa43104 100644 --- a/tests/queries/0_stateless/01378_alter_rename_with_ttl_zookeeper.sql +++ b/tests/queries/0_stateless/01378_alter_rename_with_ttl_zookeeper.sql @@ -1,4 +1,4 @@ --- Tags: zookeeper, no-backward-compatibility-check +-- Tags: zookeeper, no-upgrade-check DROP TABLE IF EXISTS table_rename_with_ttl; diff --git a/tests/queries/0_stateless/01391_join_on_dict_crash.sql b/tests/queries/0_stateless/01391_join_on_dict_crash.sql index 13ebd080621..5321e03767f 100644 --- a/tests/queries/0_stateless/01391_join_on_dict_crash.sql +++ b/tests/queries/0_stateless/01391_join_on_dict_crash.sql @@ -1,4 +1,4 @@ --- Tags: no-parallel, no-backward-compatibility-check +-- Tags: no-parallel, no-upgrade-check DROP DATABASE IF EXISTS db_01391; CREATE DATABASE db_01391; diff --git a/tests/queries/0_stateless/01442_date_time_with_params.reference b/tests/queries/0_stateless/01442_date_time_with_params.reference index bc819d5f8fc..b5478665a1c 100644 --- a/tests/queries/0_stateless/01442_date_time_with_params.reference +++ b/tests/queries/0_stateless/01442_date_time_with_params.reference @@ -15,6 +15,9 @@ parseDateTimeBestEffort 2020-05-14 03:37:03.253 DateTime64(3, \'UTC\') 2021-12-28 00:00:00.123 DateTime64(3, \'UTC\') 2021-12-28 00:00:00 DateTime(\'UTC\') +2021-12-15 00:00:00 DateTime +2021-12-15 00:00:00 DateTime(\'UTC\') +2021-12-15 00:00:00.000 DateTime64(3, \'UTC\') parseDateTimeBestEffortOrNull \N Nullable(DateTime64(3)) 2020-05-14 03:37:03.000 Nullable(DateTime64(3, \'UTC\')) @@ -29,6 +32,9 @@ parseDateTimeBestEffortOrNull 2020-05-14 03:37:03.253 Nullable(DateTime64(3, \'UTC\')) 2021-12-28 00:00:00.123 Nullable(DateTime64(3, \'UTC\')) 2021-12-28 00:00:00 Nullable(DateTime(\'UTC\')) +2021-12-15 00:00:00 Nullable(DateTime) +2021-12-15 00:00:00 Nullable(DateTime(\'UTC\')) +2021-12-15 00:00:00.000 Nullable(DateTime64(3, \'UTC\')) parseDateTimeBestEffortOrZero 1970-01-01 00:00:00.000 DateTime64(3, \'UTC\') 2020-05-14 03:37:03.000 DateTime64(3, \'UTC\') @@ -43,6 +49,9 @@ parseDateTimeBestEffortOrZero 2020-05-14 03:37:03.253 DateTime64(3, \'UTC\') 2021-12-28 00:00:00.123 DateTime64(3, \'UTC\') 2021-12-28 00:00:00 DateTime(\'UTC\') +2021-12-15 00:00:00 DateTime +2021-12-15 00:00:00 DateTime(\'UTC\') +2021-12-15 00:00:00.000 DateTime64(3, \'UTC\') parseDateTime32BestEffort 2020-05-14 03:37:03 DateTime(\'UTC\') 2020-05-14 03:37:03 DateTime(\'UTC\') @@ -55,6 +64,8 @@ parseDateTime32BestEffort 2020-05-14 06:37:03 DateTime(\'Europe/Minsk\') 2020-05-14 03:37:03 DateTime(\'UTC\') 2021-12-28 00:00:00 DateTime(\'UTC\') +2021-12-15 00:00:00 DateTime +2021-12-15 00:00:00 DateTime(\'UTC\') parseDateTime32BestEffortOrNull \N Nullable(DateTime) 2020-05-14 03:37:03 Nullable(DateTime(\'UTC\')) @@ -68,6 +79,8 @@ parseDateTime32BestEffortOrNull 2020-05-14 06:37:03 Nullable(DateTime(\'Europe/Minsk\')) 2020-05-14 03:37:03 Nullable(DateTime(\'UTC\')) 2021-12-28 00:00:00 Nullable(DateTime(\'UTC\')) +2021-12-15 00:00:00 Nullable(DateTime) +2021-12-15 00:00:00 Nullable(DateTime(\'UTC\')) parseDateTime32BestEffortOrZero 1970-01-01 00:00:00 DateTime(\'UTC\') 2020-05-14 03:37:03 DateTime(\'UTC\') @@ -81,3 +94,5 @@ parseDateTime32BestEffortOrZero 2020-05-14 06:37:03 DateTime(\'Europe/Minsk\') 2020-05-14 03:37:03 DateTime(\'UTC\') 2021-12-28 00:00:00 DateTime(\'UTC\') +2021-12-15 00:00:00 DateTime +2021-12-15 00:00:00 DateTime(\'UTC\') diff --git a/tests/queries/0_stateless/01442_date_time_with_params.sql b/tests/queries/0_stateless/01442_date_time_with_params.sql index aeb9aa597ef..6b32ee4919d 100644 --- a/tests/queries/0_stateless/01442_date_time_with_params.sql +++ b/tests/queries/0_stateless/01442_date_time_with_params.sql @@ -26,6 +26,9 @@ SELECT parseDateTimeBestEffort('2020-05-14T03:37:03.253184Z', 3, 'Europe/Minsk') SELECT parseDateTimeBestEffort(materialize('2020-05-14T03:37:03.253184Z'), 3, 'UTC') AS a, toTypeName(a); SELECT parseDateTimeBestEffort('1640649600123', 3, 'UTC') AS a, toTypeName(a); SELECT parseDateTimeBestEffort('1640649600123', 'UTC') AS a, toTypeName(a); +SELECT parseDateTimeBestEffort('Dec 15, 2021') AS a, toTypeName(a); +SELECT parseDateTimeBestEffort('Dec 15, 2021', 'UTC') AS a, toTypeName(a); +SELECT parseDateTimeBestEffort('Dec 15, 2021', 3, 'UTC') AS a, toTypeName(a); SELECT 'parseDateTimeBestEffortOrNull'; SELECT parseDateTimeBestEffortOrNull('', 3) AS a, toTypeName(a); @@ -41,6 +44,9 @@ SELECT parseDateTimeBestEffortOrNull('2020-05-14T03:37:03.253184Z', 3, 'Europe/M SELECT parseDateTimeBestEffortOrNull(materialize('2020-05-14T03:37:03.253184Z'), 3, 'UTC') AS a, toTypeName(a); SELECT parseDateTimeBestEffortOrNull('1640649600123', 3, 'UTC') AS a, toTypeName(a); SELECT parseDateTimeBestEffortOrNull('1640649600123', 'UTC') AS a, toTypeName(a); +SELECT parseDateTimeBestEffortOrNull('Dec 15, 2021') AS a, toTypeName(a); +SELECT parseDateTimeBestEffortOrNull('Dec 15, 2021', 'UTC') AS a, toTypeName(a); +SELECT parseDateTimeBestEffortOrNull('Dec 15, 2021', 3, 'UTC') AS a, toTypeName(a); SELECT 'parseDateTimeBestEffortOrZero'; SELECT parseDateTimeBestEffortOrZero('', 3, 'UTC') AS a, toTypeName(a); @@ -56,6 +62,9 @@ SELECT parseDateTimeBestEffortOrZero('2020-05-14T03:37:03.253184Z', 3, 'Europe/M SELECT parseDateTimeBestEffortOrZero(materialize('2020-05-14T03:37:03.253184Z'), 3, 'UTC') AS a, toTypeName(a); SELECT parseDateTimeBestEffortOrZero('1640649600123', 3, 'UTC') AS a, toTypeName(a); SELECT parseDateTimeBestEffortOrZero('1640649600123', 'UTC') AS a, toTypeName(a); +SELECT parseDateTimeBestEffortOrZero('Dec 15, 2021') AS a, toTypeName(a); +SELECT parseDateTimeBestEffortOrZero('Dec 15, 2021', 'UTC') AS a, toTypeName(a); +SELECT parseDateTimeBestEffortOrZero('Dec 15, 2021', 3, 'UTC') AS a, toTypeName(a); SELECT 'parseDateTime32BestEffort'; SELECT parseDateTime32BestEffort('') AS a, toTypeName(a); -- {serverError 41} @@ -70,6 +79,8 @@ SELECT parseDateTime32BestEffort('2020-05-14T03:37:03.253184Z', 'UTC') AS a, toT SELECT parseDateTime32BestEffort('2020-05-14T03:37:03.253184Z', 'Europe/Minsk') AS a, toTypeName(a); SELECT parseDateTime32BestEffort(materialize('2020-05-14T03:37:03.253184Z'), 'UTC') AS a, toTypeName(a); SELECT parseDateTime32BestEffort('1640649600123', 'UTC') AS a, toTypeName(a); +SELECT parseDateTime32BestEffort('Dec 15, 2021') AS a, toTypeName(a); +SELECT parseDateTime32BestEffort('Dec 15, 2021', 'UTC') AS a, toTypeName(a); SELECT 'parseDateTime32BestEffortOrNull'; SELECT parseDateTime32BestEffortOrNull('') AS a, toTypeName(a); @@ -84,6 +95,8 @@ SELECT parseDateTime32BestEffortOrNull('2020-05-14T03:37:03.253184Z', 'UTC') AS SELECT parseDateTime32BestEffortOrNull('2020-05-14T03:37:03.253184Z', 'Europe/Minsk') AS a, toTypeName(a); SELECT parseDateTime32BestEffortOrNull(materialize('2020-05-14T03:37:03.253184Z'), 'UTC') AS a, toTypeName(a); SELECT parseDateTime32BestEffortOrNull('1640649600123', 'UTC') AS a, toTypeName(a); +SELECT parseDateTime32BestEffortOrNull('Dec 15, 2021') AS a, toTypeName(a); +SELECT parseDateTime32BestEffortOrNull('Dec 15, 2021', 'UTC') AS a, toTypeName(a); SELECT 'parseDateTime32BestEffortOrZero'; SELECT parseDateTime32BestEffortOrZero('', 'UTC') AS a, toTypeName(a); @@ -98,5 +111,7 @@ SELECT parseDateTime32BestEffortOrZero('2020-05-14T03:37:03.253184Z', 'UTC') AS SELECT parseDateTime32BestEffortOrZero('2020-05-14T03:37:03.253184Z', 'Europe/Minsk') AS a, toTypeName(a); SELECT parseDateTime32BestEffortOrZero(materialize('2020-05-14T03:37:03.253184Z'), 'UTC') AS a, toTypeName(a); SELECT parseDateTime32BestEffortOrZero('1640649600123', 'UTC') AS a, toTypeName(a); +SELECT parseDateTime32BestEffortOrZero('Dec 15, 2021') AS a, toTypeName(a); +SELECT parseDateTime32BestEffortOrZero('Dec 15, 2021', 'UTC') AS a, toTypeName(a); DROP TABLE IF EXISTS test; diff --git a/tests/queries/0_stateless/01520_client_print_query_id.expect b/tests/queries/0_stateless/01520_client_print_query_id.expect index cbeacc6a4ec..2034483a73d 100755 --- a/tests/queries/0_stateless/01520_client_print_query_id.expect +++ b/tests/queries/0_stateless/01520_client_print_query_id.expect @@ -10,9 +10,9 @@ set timeout 60 match_max 100000 expect_after { # Do not ignore eof from expect - eof { exp_continue } + -i $any_spawn_id eof { exp_continue } # A default timeout action is to do nothing, change it to fail - timeout { exit 1 } + -i $any_spawn_id timeout { exit 1 } } spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion --history_file=$history_file" @@ -21,10 +21,12 @@ expect ":) " # Make a query send -- "SELECT 'print query id'\r" expect { - "Query id: *" { } + # (?n) - Do not match new lines [1] + # [1]: https://www.tcl.tk/man/tcl8.6/TclCmd/re_syntax.html + -re "(?n)Query id: .*" { } timeout { exit 1 } } -expect "print query id" +expect "'print query id" expect ":) " send -- "\4" diff --git a/tests/queries/0_stateless/01555_system_distribution_queue_mask.reference b/tests/queries/0_stateless/01555_system_distribution_queue_mask.reference index bd0eac10816..745160a517e 100644 --- a/tests/queries/0_stateless/01555_system_distribution_queue_mask.reference +++ b/tests/queries/0_stateless/01555_system_distribution_queue_mask.reference @@ -1,4 +1,4 @@ masked -3,"default:*@127%2E0%2E0%2E1:9000,default:*@127%2E0%2E0%2E2:9000" +3,"default:*@127%2E0%2E0%2E1:9000,default:*@127%2E0%2E0%2E2:9000","AUTHENTICATION_FAILED",1 no masking 1,"default@localhost:9000" diff --git a/tests/queries/0_stateless/01555_system_distribution_queue_mask.sql b/tests/queries/0_stateless/01555_system_distribution_queue_mask.sql index bdcde1adbad..fea75e1439f 100644 --- a/tests/queries/0_stateless/01555_system_distribution_queue_mask.sql +++ b/tests/queries/0_stateless/01555_system_distribution_queue_mask.sql @@ -1,4 +1,4 @@ --- Tags: no-backward-compatibility-check +-- Tags: no-upgrade-check -- force data path with the user/pass in it set use_compact_format_in_distributed_parts_names=0; @@ -18,7 +18,7 @@ create table dist_01555 (key Int) Engine=Distributed(test_cluster_with_incorrect insert into dist_01555 values (1)(2); -- since test_cluster_with_incorrect_pw contains incorrect password ignore error system flush distributed dist_01555; -- { serverError 516; } -select length(splitByChar('*', data_path)), replaceRegexpOne(data_path, '^.*/([^/]*)/' , '\\1') from system.distribution_queue where database = currentDatabase() and table = 'dist_01555' format CSV; +select length(splitByChar('*', data_path)), replaceRegexpOne(data_path, '^.*/([^/]*)/' , '\\1'), extract(last_exception, 'AUTHENTICATION_FAILED'), dateDiff('s', last_exception_time, now()) < 5 from system.distribution_queue where database = currentDatabase() and table = 'dist_01555' format CSV; drop table dist_01555; diff --git a/tests/queries/0_stateless/01565_query_loop_after_client_error.expect b/tests/queries/0_stateless/01565_query_loop_after_client_error.expect index bf701225605..0faf8f0192b 100755 --- a/tests/queries/0_stateless/01565_query_loop_after_client_error.expect +++ b/tests/queries/0_stateless/01565_query_loop_after_client_error.expect @@ -1,5 +1,4 @@ #!/usr/bin/expect -f -# Tags: long # This is a separate test, because we want to test the interactive mode. # https://github.com/ClickHouse/ClickHouse/issues/19353 diff --git a/tests/queries/0_stateless/01576_alter_low_cardinality_and_select.sh b/tests/queries/0_stateless/01576_alter_low_cardinality_and_select.sh index 27de10ab16a..4a9b4beee5b 100755 --- a/tests/queries/0_stateless/01576_alter_low_cardinality_and_select.sh +++ b/tests/queries/0_stateless/01576_alter_low_cardinality_and_select.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-backward-compatibility-check +# Tags: no-upgrade-check CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/01650_fetch_patition_with_macro_in_zk_path_long.sql b/tests/queries/0_stateless/01650_fetch_patition_with_macro_in_zk_path_long.sql index 1dae8e7b383..b45a1974611 100644 --- a/tests/queries/0_stateless/01650_fetch_patition_with_macro_in_zk_path_long.sql +++ b/tests/queries/0_stateless/01650_fetch_patition_with_macro_in_zk_path_long.sql @@ -1,4 +1,4 @@ --- Tags: long, no-backward-compatibility-check +-- Tags: long, no-upgrade-check DROP TABLE IF EXISTS test_01640; DROP TABLE IF EXISTS restore_01640; diff --git a/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.sh b/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.sh index 056613c11b5..617148de5a3 100755 --- a/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.sh +++ b/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.sh @@ -34,9 +34,9 @@ set timeout 60 match_max 100000 expect_after { # Do not ignore eof from expect - eof { exp_continue } + -i \$any_spawn_id eof { exp_continue } # A default timeout action is to do nothing, change it to fail - timeout { exit 1 } + -i \$any_spawn_id timeout { exit 1 } } spawn bash -c "$*" diff --git a/tests/queries/0_stateless/01710_normal_projections.sh b/tests/queries/0_stateless/01710_normal_projections.sh index 70e38b3722a..3f2114b9a2b 100755 --- a/tests/queries/0_stateless/01710_normal_projections.sh +++ b/tests/queries/0_stateless/01710_normal_projections.sh @@ -4,7 +4,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -$CLICKHOUSE_CLIENT -q "CREATE TABLE test_sort_proj (x UInt32, y UInt32, PROJECTION p (SELECT x, y ORDER BY y)) ENGINE = MergeTree ORDER BY x" +$CLICKHOUSE_CLIENT -q "CREATE TABLE test_sort_proj (x UInt32, y UInt32, PROJECTION p (SELECT x, y ORDER BY y)) ENGINE = MergeTree ORDER BY x SETTINGS index_granularity=8192, index_granularity_bytes='10Mi'" $CLICKHOUSE_CLIENT -q "insert into test_sort_proj select number, toUInt32(-number - 1) from numbers(100)" echo "select where x < 10" diff --git a/tests/queries/0_stateless/01755_client_highlight_multi_line_comment_regression.expect b/tests/queries/0_stateless/01755_client_highlight_multi_line_comment_regression.expect index 223690f1f8b..a7e4b45eb44 100755 --- a/tests/queries/0_stateless/01755_client_highlight_multi_line_comment_regression.expect +++ b/tests/queries/0_stateless/01755_client_highlight_multi_line_comment_regression.expect @@ -10,9 +10,9 @@ set timeout 60 match_max 100000 expect_after { # Do not ignore eof from expect - eof { exp_continue } + -i $any_spawn_id eof { exp_continue } # A default timeout action is to do nothing, change it to fail - timeout { exit 1 } + -i $any_spawn_id timeout { exit 1 } } spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion --history_file=$history_file" diff --git a/tests/queries/0_stateless/01780_column_sparse_alter.sql b/tests/queries/0_stateless/01780_column_sparse_alter.sql index 925b81ea2c2..bc2f6f7c91f 100644 --- a/tests/queries/0_stateless/01780_column_sparse_alter.sql +++ b/tests/queries/0_stateless/01780_column_sparse_alter.sql @@ -1,4 +1,4 @@ --- Tags: no-backward-compatibility-check +-- Tags: no-upgrade-check SET mutations_sync = 2; diff --git a/tests/queries/0_stateless/01910_client_replxx_container_overflow_long.expect b/tests/queries/0_stateless/01910_client_replxx_container_overflow_long.expect index 0e06c2f99df..9105cf30f79 100755 --- a/tests/queries/0_stateless/01910_client_replxx_container_overflow_long.expect +++ b/tests/queries/0_stateless/01910_client_replxx_container_overflow_long.expect @@ -1,5 +1,4 @@ #!/usr/bin/expect -f -# Tags: long set basedir [file dirname $argv0] set basename [file tail $argv0] @@ -10,9 +9,9 @@ set timeout 60 match_max 100000 expect_after { # Do not ignore eof from expect - eof { exp_continue } + -i $any_spawn_id eof { exp_continue } # A default timeout action is to do nothing, change it to fail - timeout { exit 1 } + -i $any_spawn_id timeout { exit 1 } } # history file is not required, in-memory history is enough diff --git a/tests/queries/0_stateless/01933_client_replxx_convert_history.expect b/tests/queries/0_stateless/01933_client_replxx_convert_history.expect index 0c95b630742..69c5ff0118e 100755 --- a/tests/queries/0_stateless/01933_client_replxx_convert_history.expect +++ b/tests/queries/0_stateless/01933_client_replxx_convert_history.expect @@ -10,9 +10,9 @@ set timeout 60 match_max 100000 expect_after { # Do not ignore eof from expect - eof { exp_continue } + -i $any_spawn_id eof { exp_continue } # A default timeout action is to do nothing, change it to fail - timeout { exit 1 } + -i $any_spawn_id timeout { exit 1 } } exec bash -c "echo select 1 > $history_file.txt" diff --git a/tests/queries/0_stateless/01945_show_debug_warning.expect b/tests/queries/0_stateless/01945_show_debug_warning.expect index c93635b3b27..4e6dd3e1b0f 100755 --- a/tests/queries/0_stateless/01945_show_debug_warning.expect +++ b/tests/queries/0_stateless/01945_show_debug_warning.expect @@ -14,9 +14,9 @@ match_max 100000 expect_after { # Do not ignore eof from expect - eof { exp_continue } + -i $any_spawn_id eof { exp_continue } # A default timeout action is to do nothing, change it to fail - timeout { exit 1 } + -i $any_spawn_id timeout { exit 1 } } set Debug_type 0 diff --git a/tests/queries/0_stateless/02003_memory_limit_in_client.expect b/tests/queries/0_stateless/02003_memory_limit_in_client.expect index 4f28fafc1e6..377656fa641 100755 --- a/tests/queries/0_stateless/02003_memory_limit_in_client.expect +++ b/tests/queries/0_stateless/02003_memory_limit_in_client.expect @@ -1,5 +1,4 @@ #!/usr/bin/expect -f -# Tags: no-fasttest # This is a test for system.warnings. Testing in interactive mode is necessary, # as we want to see certain warnings from client @@ -15,9 +14,9 @@ match_max 100000 expect_after { # Do not ignore eof from expect - eof { exp_continue } + -i $any_spawn_id eof { exp_continue } # A default timeout action is to do nothing, change it to fail - timeout { exit 1 } + -i $any_spawn_id timeout { exit 1 } } # diff --git a/tests/queries/0_stateless/02022_storage_filelog_one_file.sh b/tests/queries/0_stateless/02022_storage_filelog_one_file.sh index 2f43423e13e..3abf5c52031 100755 --- a/tests/queries/0_stateless/02022_storage_filelog_one_file.sh +++ b/tests/queries/0_stateless/02022_storage_filelog_one_file.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-backward-compatibility-check +# Tags: no-upgrade-check set -eu diff --git a/tests/queries/0_stateless/02025_storage_filelog_virtual_col.sh b/tests/queries/0_stateless/02025_storage_filelog_virtual_col.sh index e4041b2d755..e0f0114d030 100755 --- a/tests/queries/0_stateless/02025_storage_filelog_virtual_col.sh +++ b/tests/queries/0_stateless/02025_storage_filelog_virtual_col.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-backward-compatibility-check +# Tags: no-upgrade-check set -eu diff --git a/tests/queries/0_stateless/02047_client_exception.expect b/tests/queries/0_stateless/02047_client_exception.expect index 69f468907a3..4dfdf211ba2 100755 --- a/tests/queries/0_stateless/02047_client_exception.expect +++ b/tests/queries/0_stateless/02047_client_exception.expect @@ -6,14 +6,14 @@ exp_internal -f $env(CLICKHOUSE_TMP)/$basename.debuglog 0 set history_file $env(CLICKHOUSE_TMP)/$basename.history log_user 0 -set timeout 20 +set timeout 60 match_max 100000 expect_after { # Do not ignore eof from expect - eof { exp_continue } + -i $any_spawn_id eof { exp_continue } # A default timeout action is to do nothing, change it to fail - timeout { exit 1 } + -i $any_spawn_id timeout { exit 1 } } spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion --history_file=$history_file" @@ -31,5 +31,5 @@ expect "Received exception from server" send -- "DROP TABLE test_02047\r" expect "Ok." -send -- "\4" +send -- "exit\r" expect eof diff --git a/tests/queries/0_stateless/02049_clickhouse_local_merge_tree.expect b/tests/queries/0_stateless/02049_clickhouse_local_merge_tree.expect index a9905128ad5..a1454696253 100755 --- a/tests/queries/0_stateless/02049_clickhouse_local_merge_tree.expect +++ b/tests/queries/0_stateless/02049_clickhouse_local_merge_tree.expect @@ -5,14 +5,14 @@ set basename [file tail $argv0] exp_internal -f $env(CLICKHOUSE_TMP)/$basename.debuglog 0 log_user 0 -set timeout 20 +set timeout 60 match_max 100000 expect_after { # Do not ignore eof from expect - eof { exp_continue } + -i $any_spawn_id eof { exp_continue } # A default timeout action is to do nothing, change it to fail - timeout { exit 1 } + -i $any_spawn_id timeout { exit 1 } } spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_LOCAL --disable_suggestion" @@ -50,5 +50,5 @@ expect "Ok." send -- "drop table t\r" expect "Ok." -send -- "\4" +send -- "exit\r" expect eof diff --git a/tests/queries/0_stateless/02067_lost_part_s3.sql b/tests/queries/0_stateless/02067_lost_part_s3.sql index c4e69f68a5d..12afdcd4421 100644 --- a/tests/queries/0_stateless/02067_lost_part_s3.sql +++ b/tests/queries/0_stateless/02067_lost_part_s3.sql @@ -1,4 +1,4 @@ --- Tags: no-backward-compatibility-check, no-fasttest +-- Tags: no-upgrade-check, no-fasttest DROP TABLE IF EXISTS partslost_0; DROP TABLE IF EXISTS partslost_1; diff --git a/tests/queries/0_stateless/02105_backslash_letter_commands.expect b/tests/queries/0_stateless/02105_backslash_letter_commands.expect index 8f8ec1f5abd..984e6f6d2eb 100755 --- a/tests/queries/0_stateless/02105_backslash_letter_commands.expect +++ b/tests/queries/0_stateless/02105_backslash_letter_commands.expect @@ -10,9 +10,9 @@ set timeout 60 match_max 100000 expect_after { # Do not ignore eof from expect - eof { exp_continue } + -i $any_spawn_id eof { exp_continue } # A default timeout action is to do nothing, change it to fail - timeout { exit 1 } + -i $any_spawn_id timeout { exit 1 } } spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion --history_file=$history_file" @@ -20,15 +20,15 @@ expect ":) " # Send a command send -- "\\ld;\r" -expect "Syntax error: *" +expect "Syntax error: " expect ":) " send -- "\\c;\r" -expect "Syntax error: *" +expect "Syntax error: " expect ":) " send -- " \\l ; \\d; \r" -expect "Syntax error (Multi-statements are not allowed): *" +expect "Syntax error (Multi-statements are not allowed): " expect ":) " send -- " \\l ;\r" diff --git a/tests/queries/0_stateless/02112_delayed_clickhouse_client_with_queries_file.expect b/tests/queries/0_stateless/02112_delayed_clickhouse_client_with_queries_file.expect index 4fd430a4a69..5f882ae9824 100755 --- a/tests/queries/0_stateless/02112_delayed_clickhouse_client_with_queries_file.expect +++ b/tests/queries/0_stateless/02112_delayed_clickhouse_client_with_queries_file.expect @@ -1,5 +1,4 @@ #!/usr/bin/expect -f -# Tags: no-parallel set basedir [file dirname $argv0] set basename [file tail $argv0] @@ -11,20 +10,18 @@ match_max 100000 expect_after { # Do not ignore eof from expect - eof { exp_continue } + -i $any_spawn_id eof { exp_continue } # A default timeout action is to do nothing, change it to fail - timeout { exit 1 } + -i $any_spawn_id timeout { exit 1 } } -system "$basedir/helpers/02112_prepare.sh" -spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT --disable_suggestion --interactive --queries-file $basedir/file_02112" +system "echo \"drop table if exists t; create table t(i String) engine=Memory; insert into t select 'test string'\" > $env(CLICKHOUSE_TMP)/file_02112" +spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT --disable_suggestion --interactive --queries-file $env(CLICKHOUSE_TMP)/file_02112" expect ":) " -send -- "select * from t format TSV\r" -expect "1" +send -- "select i from t format TSV\r" +expect "test string" expect ":) " send "" expect eof - -system "$basedir/helpers/02112_clean.sh" diff --git a/tests/queries/0_stateless/02112_delayed_clickhouse_local.expect b/tests/queries/0_stateless/02112_delayed_clickhouse_local.expect index a90e85d1069..3413651fe68 100755 --- a/tests/queries/0_stateless/02112_delayed_clickhouse_local.expect +++ b/tests/queries/0_stateless/02112_delayed_clickhouse_local.expect @@ -10,9 +10,9 @@ match_max 100000 expect_after { # Do not ignore eof from expect - eof { exp_continue } + -i $any_spawn_id eof { exp_continue } # A default timeout action is to do nothing, change it to fail - timeout { exit 1 } + -i $any_spawn_id timeout { exit 1 } } spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_LOCAL --disable_suggestion --interactive --query 'create table t(i Int32) engine=Memory; insert into t select 1'" diff --git a/tests/queries/0_stateless/02112_delayed_clickhouse_local_with_queries_file.expect b/tests/queries/0_stateless/02112_delayed_clickhouse_local_with_queries_file.expect index 34eac360132..0a136ff2e74 100755 --- a/tests/queries/0_stateless/02112_delayed_clickhouse_local_with_queries_file.expect +++ b/tests/queries/0_stateless/02112_delayed_clickhouse_local_with_queries_file.expect @@ -1,5 +1,4 @@ #!/usr/bin/expect -f -# Tags: no-parallel set basedir [file dirname $argv0] set basename [file tail $argv0] @@ -11,20 +10,18 @@ match_max 100000 expect_after { # Do not ignore eof from expect - eof { exp_continue } + -i $any_spawn_id eof { exp_continue } # A default timeout action is to do nothing, change it to fail - timeout { exit 1 } + -i $any_spawn_id timeout { exit 1 } } -system "$basedir/helpers/02112_prepare.sh" -spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_LOCAL --disable_suggestion --interactive --queries-file $basedir/file_02112" +system "echo \"drop table if exists t; create table t(i String) engine=Memory; insert into t select 'test string'\" > $env(CLICKHOUSE_TMP)/file_02112" +spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_LOCAL --disable_suggestion --interactive --queries-file $env(CLICKHOUSE_TMP)/file_02112" expect ":) " -send -- "select * from t format TSV\r" -expect "1" +send -- "select \* from t format TSV\r" +expect "test string" expect ":) " send "" expect eof - -system "$basedir/helpers/02112_clean.sh" diff --git a/tests/queries/0_stateless/02116_interactive_hello.expect b/tests/queries/0_stateless/02116_interactive_hello.expect index 7e895196304..3f2140861ea 100755 --- a/tests/queries/0_stateless/02116_interactive_hello.expect +++ b/tests/queries/0_stateless/02116_interactive_hello.expect @@ -1,5 +1,4 @@ #!/usr/bin/expect -f -# Tags: long set basedir [file dirname $argv0] set basename [file tail $argv0] @@ -12,16 +11,17 @@ match_max 100000 expect_after { # Do not ignore eof from expect - eof { exp_continue } + -i $any_spawn_id eof { exp_continue } # A default timeout action is to do nothing, change it to fail - timeout { exit 1 } + -i $any_spawn_id timeout { exit 1 } } spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion --history_file=$history_file" -expect -re "ClickHouse client version \[\\d\]{2}.\[\\d\]{1,2}.\[\\d\]{1,2}.\[\\d\]{1,2}.\r" -expect -re "Connecting to database .* at localhost:9000 as user default.\r" -expect -re "Connected to ClickHouse server version \[\\d\]{2}.\[\\d\]{1,2}.\[\\d\]{1,2} revision .*\r" +# (?n) - Do not match new lines +expect -re "(?n)ClickHouse client version \[\\d\]{2}\.\[\\d\]{1,2}\.\[\\d\]{1,2}\.\[\\d\]{1,}.*\r" +expect -re "(?n)Connecting to database .* at localhost:9000 as user default\.\r" +expect -re "(?n)Connected to ClickHouse server version \[\\d\]{2}\.\[\\d\]{1,2}\.\[\\d\]{1,2} revision \[\\d\]{1,}\.\r" expect ":) " send -- "" diff --git a/tests/queries/0_stateless/02117_show_create_table_system.reference b/tests/queries/0_stateless/02117_show_create_table_system.reference index aabe05ea5e2..f77076bcd5c 100644 --- a/tests/queries/0_stateless/02117_show_create_table_system.reference +++ b/tests/queries/0_stateless/02117_show_create_table_system.reference @@ -229,7 +229,8 @@ CREATE TABLE system.distribution_queue `data_compressed_bytes` UInt64, `broken_data_files` UInt64, `broken_data_compressed_bytes` UInt64, - `last_exception` String + `last_exception` String, + `last_exception_time` DateTime ) ENGINE = SystemDistributionQueue COMMENT 'SYSTEM TABLE is built on the fly.' @@ -288,7 +289,7 @@ CREATE TABLE system.grants ( `user_name` Nullable(String), `role_name` Nullable(String), - `access_type` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE FUNCTION' = 53, 'CREATE NAMED COLLECTION' = 54, 'CREATE' = 55, 'DROP DATABASE' = 56, 'DROP TABLE' = 57, 'DROP VIEW' = 58, 'DROP DICTIONARY' = 59, 'DROP FUNCTION' = 60, 'DROP NAMED COLLECTION' = 61, 'DROP' = 62, 'TRUNCATE' = 63, 'OPTIMIZE' = 64, 'BACKUP' = 65, 'KILL QUERY' = 66, 'KILL TRANSACTION' = 67, 'MOVE PARTITION BETWEEN SHARDS' = 68, 'CREATE USER' = 69, 'ALTER USER' = 70, 'DROP USER' = 71, 'CREATE ROLE' = 72, 'ALTER ROLE' = 73, 'DROP ROLE' = 74, 'ROLE ADMIN' = 75, 'CREATE ROW POLICY' = 76, 'ALTER ROW POLICY' = 77, 'DROP ROW POLICY' = 78, 'CREATE QUOTA' = 79, 'ALTER QUOTA' = 80, 'DROP QUOTA' = 81, 'CREATE SETTINGS PROFILE' = 82, 'ALTER SETTINGS PROFILE' = 83, 'DROP SETTINGS PROFILE' = 84, 'SHOW USERS' = 85, 'SHOW ROLES' = 86, 'SHOW ROW POLICIES' = 87, 'SHOW QUOTAS' = 88, 'SHOW SETTINGS PROFILES' = 89, 'SHOW ACCESS' = 90, 'SHOW NAMED COLLECTIONS' = 91, 'ACCESS MANAGEMENT' = 92, 'SYSTEM SHUTDOWN' = 93, 'SYSTEM DROP DNS CACHE' = 94, 'SYSTEM DROP MARK CACHE' = 95, 'SYSTEM DROP UNCOMPRESSED CACHE' = 96, 'SYSTEM DROP MMAP CACHE' = 97, 'SYSTEM DROP QUERY CACHE' = 98, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 99, 'SYSTEM DROP FILESYSTEM CACHE' = 100, 'SYSTEM DROP SCHEMA CACHE' = 101, 'SYSTEM DROP S3 CLIENT CACHE' = 102, 'SYSTEM DROP CACHE' = 103, 'SYSTEM RELOAD CONFIG' = 104, 'SYSTEM RELOAD USERS' = 105, 'SYSTEM RELOAD SYMBOLS' = 106, 'SYSTEM RELOAD DICTIONARY' = 107, 'SYSTEM RELOAD MODEL' = 108, 'SYSTEM RELOAD FUNCTION' = 109, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 110, 'SYSTEM RELOAD' = 111, 'SYSTEM RESTART DISK' = 112, 'SYSTEM MERGES' = 113, 'SYSTEM TTL MERGES' = 114, 'SYSTEM FETCHES' = 115, 'SYSTEM MOVES' = 116, 'SYSTEM DISTRIBUTED SENDS' = 117, 'SYSTEM REPLICATED SENDS' = 118, 'SYSTEM SENDS' = 119, 'SYSTEM REPLICATION QUEUES' = 120, 'SYSTEM DROP REPLICA' = 121, 'SYSTEM SYNC REPLICA' = 122, 'SYSTEM RESTART REPLICA' = 123, 'SYSTEM RESTORE REPLICA' = 124, 'SYSTEM WAIT LOADING PARTS' = 125, 'SYSTEM SYNC DATABASE REPLICA' = 126, 'SYSTEM SYNC TRANSACTION LOG' = 127, 'SYSTEM SYNC FILE CACHE' = 128, 'SYSTEM FLUSH DISTRIBUTED' = 129, 'SYSTEM FLUSH LOGS' = 130, 'SYSTEM FLUSH' = 131, 'SYSTEM THREAD FUZZER' = 132, 'SYSTEM UNFREEZE' = 133, 'SYSTEM' = 134, 'dictGet' = 135, 'addressToLine' = 136, 'addressToLineWithInlines' = 137, 'addressToSymbol' = 138, 'demangle' = 139, 'INTROSPECTION' = 140, 'FILE' = 141, 'URL' = 142, 'REMOTE' = 143, 'MONGO' = 144, 'MEILISEARCH' = 145, 'MYSQL' = 146, 'POSTGRES' = 147, 'SQLITE' = 148, 'ODBC' = 149, 'JDBC' = 150, 'HDFS' = 151, 'S3' = 152, 'HIVE' = 153, 'SOURCES' = 154, 'CLUSTER' = 155, 'ALL' = 156, 'NONE' = 157), + `access_type` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE FUNCTION' = 53, 'CREATE NAMED COLLECTION' = 54, 'CREATE' = 55, 'DROP DATABASE' = 56, 'DROP TABLE' = 57, 'DROP VIEW' = 58, 'DROP DICTIONARY' = 59, 'DROP FUNCTION' = 60, 'DROP NAMED COLLECTION' = 61, 'DROP' = 62, 'TRUNCATE' = 63, 'OPTIMIZE' = 64, 'BACKUP' = 65, 'KILL QUERY' = 66, 'KILL TRANSACTION' = 67, 'MOVE PARTITION BETWEEN SHARDS' = 68, 'CREATE USER' = 69, 'ALTER USER' = 70, 'DROP USER' = 71, 'CREATE ROLE' = 72, 'ALTER ROLE' = 73, 'DROP ROLE' = 74, 'ROLE ADMIN' = 75, 'CREATE ROW POLICY' = 76, 'ALTER ROW POLICY' = 77, 'DROP ROW POLICY' = 78, 'CREATE QUOTA' = 79, 'ALTER QUOTA' = 80, 'DROP QUOTA' = 81, 'CREATE SETTINGS PROFILE' = 82, 'ALTER SETTINGS PROFILE' = 83, 'DROP SETTINGS PROFILE' = 84, 'SHOW USERS' = 85, 'SHOW ROLES' = 86, 'SHOW ROW POLICIES' = 87, 'SHOW QUOTAS' = 88, 'SHOW SETTINGS PROFILES' = 89, 'SHOW ACCESS' = 90, 'SHOW NAMED COLLECTIONS' = 91, 'SHOW NAMED COLLECTIONS SECRETS' = 92, 'ACCESS MANAGEMENT' = 93, 'SYSTEM SHUTDOWN' = 94, 'SYSTEM DROP DNS CACHE' = 95, 'SYSTEM DROP MARK CACHE' = 96, 'SYSTEM DROP UNCOMPRESSED CACHE' = 97, 'SYSTEM DROP MMAP CACHE' = 98, 'SYSTEM DROP QUERY CACHE' = 99, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 100, 'SYSTEM DROP FILESYSTEM CACHE' = 101, 'SYSTEM DROP SCHEMA CACHE' = 102, 'SYSTEM DROP S3 CLIENT CACHE' = 103, 'SYSTEM DROP CACHE' = 104, 'SYSTEM RELOAD CONFIG' = 105, 'SYSTEM RELOAD USERS' = 106, 'SYSTEM RELOAD SYMBOLS' = 107, 'SYSTEM RELOAD DICTIONARY' = 108, 'SYSTEM RELOAD MODEL' = 109, 'SYSTEM RELOAD FUNCTION' = 110, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 111, 'SYSTEM RELOAD' = 112, 'SYSTEM RESTART DISK' = 113, 'SYSTEM MERGES' = 114, 'SYSTEM TTL MERGES' = 115, 'SYSTEM FETCHES' = 116, 'SYSTEM MOVES' = 117, 'SYSTEM DISTRIBUTED SENDS' = 118, 'SYSTEM REPLICATED SENDS' = 119, 'SYSTEM SENDS' = 120, 'SYSTEM REPLICATION QUEUES' = 121, 'SYSTEM DROP REPLICA' = 122, 'SYSTEM SYNC REPLICA' = 123, 'SYSTEM RESTART REPLICA' = 124, 'SYSTEM RESTORE REPLICA' = 125, 'SYSTEM WAIT LOADING PARTS' = 126, 'SYSTEM SYNC DATABASE REPLICA' = 127, 'SYSTEM SYNC TRANSACTION LOG' = 128, 'SYSTEM SYNC FILE CACHE' = 129, 'SYSTEM FLUSH DISTRIBUTED' = 130, 'SYSTEM FLUSH LOGS' = 131, 'SYSTEM FLUSH' = 132, 'SYSTEM THREAD FUZZER' = 133, 'SYSTEM UNFREEZE' = 134, 'SYSTEM' = 135, 'dictGet' = 136, 'addressToLine' = 137, 'addressToLineWithInlines' = 138, 'addressToSymbol' = 139, 'demangle' = 140, 'INTROSPECTION' = 141, 'FILE' = 142, 'URL' = 143, 'REMOTE' = 144, 'MONGO' = 145, 'MEILISEARCH' = 146, 'MYSQL' = 147, 'POSTGRES' = 148, 'SQLITE' = 149, 'ODBC' = 150, 'JDBC' = 151, 'HDFS' = 152, 'S3' = 153, 'HIVE' = 154, 'SOURCES' = 155, 'CLUSTER' = 156, 'ALL' = 157, 'NONE' = 158), `database` Nullable(String), `table` Nullable(String), `column` Nullable(String), @@ -569,10 +570,10 @@ ENGINE = SystemPartsColumns COMMENT 'SYSTEM TABLE is built on the fly.' CREATE TABLE system.privileges ( - `privilege` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE FUNCTION' = 53, 'CREATE NAMED COLLECTION' = 54, 'CREATE' = 55, 'DROP DATABASE' = 56, 'DROP TABLE' = 57, 'DROP VIEW' = 58, 'DROP DICTIONARY' = 59, 'DROP FUNCTION' = 60, 'DROP NAMED COLLECTION' = 61, 'DROP' = 62, 'TRUNCATE' = 63, 'OPTIMIZE' = 64, 'BACKUP' = 65, 'KILL QUERY' = 66, 'KILL TRANSACTION' = 67, 'MOVE PARTITION BETWEEN SHARDS' = 68, 'CREATE USER' = 69, 'ALTER USER' = 70, 'DROP USER' = 71, 'CREATE ROLE' = 72, 'ALTER ROLE' = 73, 'DROP ROLE' = 74, 'ROLE ADMIN' = 75, 'CREATE ROW POLICY' = 76, 'ALTER ROW POLICY' = 77, 'DROP ROW POLICY' = 78, 'CREATE QUOTA' = 79, 'ALTER QUOTA' = 80, 'DROP QUOTA' = 81, 'CREATE SETTINGS PROFILE' = 82, 'ALTER SETTINGS PROFILE' = 83, 'DROP SETTINGS PROFILE' = 84, 'SHOW USERS' = 85, 'SHOW ROLES' = 86, 'SHOW ROW POLICIES' = 87, 'SHOW QUOTAS' = 88, 'SHOW SETTINGS PROFILES' = 89, 'SHOW ACCESS' = 90, 'SHOW NAMED COLLECTIONS' = 91, 'ACCESS MANAGEMENT' = 92, 'SYSTEM SHUTDOWN' = 93, 'SYSTEM DROP DNS CACHE' = 94, 'SYSTEM DROP MARK CACHE' = 95, 'SYSTEM DROP UNCOMPRESSED CACHE' = 96, 'SYSTEM DROP MMAP CACHE' = 97, 'SYSTEM DROP QUERY CACHE' = 98, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 99, 'SYSTEM DROP FILESYSTEM CACHE' = 100, 'SYSTEM DROP SCHEMA CACHE' = 101, 'SYSTEM DROP S3 CLIENT CACHE' = 102, 'SYSTEM DROP CACHE' = 103, 'SYSTEM RELOAD CONFIG' = 104, 'SYSTEM RELOAD USERS' = 105, 'SYSTEM RELOAD SYMBOLS' = 106, 'SYSTEM RELOAD DICTIONARY' = 107, 'SYSTEM RELOAD MODEL' = 108, 'SYSTEM RELOAD FUNCTION' = 109, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 110, 'SYSTEM RELOAD' = 111, 'SYSTEM RESTART DISK' = 112, 'SYSTEM MERGES' = 113, 'SYSTEM TTL MERGES' = 114, 'SYSTEM FETCHES' = 115, 'SYSTEM MOVES' = 116, 'SYSTEM DISTRIBUTED SENDS' = 117, 'SYSTEM REPLICATED SENDS' = 118, 'SYSTEM SENDS' = 119, 'SYSTEM REPLICATION QUEUES' = 120, 'SYSTEM DROP REPLICA' = 121, 'SYSTEM SYNC REPLICA' = 122, 'SYSTEM RESTART REPLICA' = 123, 'SYSTEM RESTORE REPLICA' = 124, 'SYSTEM WAIT LOADING PARTS' = 125, 'SYSTEM SYNC DATABASE REPLICA' = 126, 'SYSTEM SYNC TRANSACTION LOG' = 127, 'SYSTEM SYNC FILE CACHE' = 128, 'SYSTEM FLUSH DISTRIBUTED' = 129, 'SYSTEM FLUSH LOGS' = 130, 'SYSTEM FLUSH' = 131, 'SYSTEM THREAD FUZZER' = 132, 'SYSTEM UNFREEZE' = 133, 'SYSTEM' = 134, 'dictGet' = 135, 'addressToLine' = 136, 'addressToLineWithInlines' = 137, 'addressToSymbol' = 138, 'demangle' = 139, 'INTROSPECTION' = 140, 'FILE' = 141, 'URL' = 142, 'REMOTE' = 143, 'MONGO' = 144, 'MEILISEARCH' = 145, 'MYSQL' = 146, 'POSTGRES' = 147, 'SQLITE' = 148, 'ODBC' = 149, 'JDBC' = 150, 'HDFS' = 151, 'S3' = 152, 'HIVE' = 153, 'SOURCES' = 154, 'CLUSTER' = 155, 'ALL' = 156, 'NONE' = 157), + `privilege` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE FUNCTION' = 53, 'CREATE NAMED COLLECTION' = 54, 'CREATE' = 55, 'DROP DATABASE' = 56, 'DROP TABLE' = 57, 'DROP VIEW' = 58, 'DROP DICTIONARY' = 59, 'DROP FUNCTION' = 60, 'DROP NAMED COLLECTION' = 61, 'DROP' = 62, 'TRUNCATE' = 63, 'OPTIMIZE' = 64, 'BACKUP' = 65, 'KILL QUERY' = 66, 'KILL TRANSACTION' = 67, 'MOVE PARTITION BETWEEN SHARDS' = 68, 'CREATE USER' = 69, 'ALTER USER' = 70, 'DROP USER' = 71, 'CREATE ROLE' = 72, 'ALTER ROLE' = 73, 'DROP ROLE' = 74, 'ROLE ADMIN' = 75, 'CREATE ROW POLICY' = 76, 'ALTER ROW POLICY' = 77, 'DROP ROW POLICY' = 78, 'CREATE QUOTA' = 79, 'ALTER QUOTA' = 80, 'DROP QUOTA' = 81, 'CREATE SETTINGS PROFILE' = 82, 'ALTER SETTINGS PROFILE' = 83, 'DROP SETTINGS PROFILE' = 84, 'SHOW USERS' = 85, 'SHOW ROLES' = 86, 'SHOW ROW POLICIES' = 87, 'SHOW QUOTAS' = 88, 'SHOW SETTINGS PROFILES' = 89, 'SHOW ACCESS' = 90, 'SHOW NAMED COLLECTIONS' = 91, 'SHOW NAMED COLLECTIONS SECRETS' = 92, 'ACCESS MANAGEMENT' = 93, 'SYSTEM SHUTDOWN' = 94, 'SYSTEM DROP DNS CACHE' = 95, 'SYSTEM DROP MARK CACHE' = 96, 'SYSTEM DROP UNCOMPRESSED CACHE' = 97, 'SYSTEM DROP MMAP CACHE' = 98, 'SYSTEM DROP QUERY CACHE' = 99, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 100, 'SYSTEM DROP FILESYSTEM CACHE' = 101, 'SYSTEM DROP SCHEMA CACHE' = 102, 'SYSTEM DROP S3 CLIENT CACHE' = 103, 'SYSTEM DROP CACHE' = 104, 'SYSTEM RELOAD CONFIG' = 105, 'SYSTEM RELOAD USERS' = 106, 'SYSTEM RELOAD SYMBOLS' = 107, 'SYSTEM RELOAD DICTIONARY' = 108, 'SYSTEM RELOAD MODEL' = 109, 'SYSTEM RELOAD FUNCTION' = 110, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 111, 'SYSTEM RELOAD' = 112, 'SYSTEM RESTART DISK' = 113, 'SYSTEM MERGES' = 114, 'SYSTEM TTL MERGES' = 115, 'SYSTEM FETCHES' = 116, 'SYSTEM MOVES' = 117, 'SYSTEM DISTRIBUTED SENDS' = 118, 'SYSTEM REPLICATED SENDS' = 119, 'SYSTEM SENDS' = 120, 'SYSTEM REPLICATION QUEUES' = 121, 'SYSTEM DROP REPLICA' = 122, 'SYSTEM SYNC REPLICA' = 123, 'SYSTEM RESTART REPLICA' = 124, 'SYSTEM RESTORE REPLICA' = 125, 'SYSTEM WAIT LOADING PARTS' = 126, 'SYSTEM SYNC DATABASE REPLICA' = 127, 'SYSTEM SYNC TRANSACTION LOG' = 128, 'SYSTEM SYNC FILE CACHE' = 129, 'SYSTEM FLUSH DISTRIBUTED' = 130, 'SYSTEM FLUSH LOGS' = 131, 'SYSTEM FLUSH' = 132, 'SYSTEM THREAD FUZZER' = 133, 'SYSTEM UNFREEZE' = 134, 'SYSTEM' = 135, 'dictGet' = 136, 'addressToLine' = 137, 'addressToLineWithInlines' = 138, 'addressToSymbol' = 139, 'demangle' = 140, 'INTROSPECTION' = 141, 'FILE' = 142, 'URL' = 143, 'REMOTE' = 144, 'MONGO' = 145, 'MEILISEARCH' = 146, 'MYSQL' = 147, 'POSTGRES' = 148, 'SQLITE' = 149, 'ODBC' = 150, 'JDBC' = 151, 'HDFS' = 152, 'S3' = 153, 'HIVE' = 154, 'SOURCES' = 155, 'CLUSTER' = 156, 'ALL' = 157, 'NONE' = 158), `aliases` Array(String), `level` Nullable(Enum8('GLOBAL' = 0, 'DATABASE' = 1, 'TABLE' = 2, 'DICTIONARY' = 3, 'VIEW' = 4, 'COLUMN' = 5)), - `parent_group` Nullable(Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE FUNCTION' = 53, 'CREATE NAMED COLLECTION' = 54, 'CREATE' = 55, 'DROP DATABASE' = 56, 'DROP TABLE' = 57, 'DROP VIEW' = 58, 'DROP DICTIONARY' = 59, 'DROP FUNCTION' = 60, 'DROP NAMED COLLECTION' = 61, 'DROP' = 62, 'TRUNCATE' = 63, 'OPTIMIZE' = 64, 'BACKUP' = 65, 'KILL QUERY' = 66, 'KILL TRANSACTION' = 67, 'MOVE PARTITION BETWEEN SHARDS' = 68, 'CREATE USER' = 69, 'ALTER USER' = 70, 'DROP USER' = 71, 'CREATE ROLE' = 72, 'ALTER ROLE' = 73, 'DROP ROLE' = 74, 'ROLE ADMIN' = 75, 'CREATE ROW POLICY' = 76, 'ALTER ROW POLICY' = 77, 'DROP ROW POLICY' = 78, 'CREATE QUOTA' = 79, 'ALTER QUOTA' = 80, 'DROP QUOTA' = 81, 'CREATE SETTINGS PROFILE' = 82, 'ALTER SETTINGS PROFILE' = 83, 'DROP SETTINGS PROFILE' = 84, 'SHOW USERS' = 85, 'SHOW ROLES' = 86, 'SHOW ROW POLICIES' = 87, 'SHOW QUOTAS' = 88, 'SHOW SETTINGS PROFILES' = 89, 'SHOW ACCESS' = 90, 'SHOW NAMED COLLECTIONS' = 91, 'ACCESS MANAGEMENT' = 92, 'SYSTEM SHUTDOWN' = 93, 'SYSTEM DROP DNS CACHE' = 94, 'SYSTEM DROP MARK CACHE' = 95, 'SYSTEM DROP UNCOMPRESSED CACHE' = 96, 'SYSTEM DROP MMAP CACHE' = 97, 'SYSTEM DROP QUERY CACHE' = 98, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 99, 'SYSTEM DROP FILESYSTEM CACHE' = 100, 'SYSTEM DROP SCHEMA CACHE' = 101, 'SYSTEM DROP S3 CLIENT CACHE' = 102, 'SYSTEM DROP CACHE' = 103, 'SYSTEM RELOAD CONFIG' = 104, 'SYSTEM RELOAD USERS' = 105, 'SYSTEM RELOAD SYMBOLS' = 106, 'SYSTEM RELOAD DICTIONARY' = 107, 'SYSTEM RELOAD MODEL' = 108, 'SYSTEM RELOAD FUNCTION' = 109, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 110, 'SYSTEM RELOAD' = 111, 'SYSTEM RESTART DISK' = 112, 'SYSTEM MERGES' = 113, 'SYSTEM TTL MERGES' = 114, 'SYSTEM FETCHES' = 115, 'SYSTEM MOVES' = 116, 'SYSTEM DISTRIBUTED SENDS' = 117, 'SYSTEM REPLICATED SENDS' = 118, 'SYSTEM SENDS' = 119, 'SYSTEM REPLICATION QUEUES' = 120, 'SYSTEM DROP REPLICA' = 121, 'SYSTEM SYNC REPLICA' = 122, 'SYSTEM RESTART REPLICA' = 123, 'SYSTEM RESTORE REPLICA' = 124, 'SYSTEM WAIT LOADING PARTS' = 125, 'SYSTEM SYNC DATABASE REPLICA' = 126, 'SYSTEM SYNC TRANSACTION LOG' = 127, 'SYSTEM SYNC FILE CACHE' = 128, 'SYSTEM FLUSH DISTRIBUTED' = 129, 'SYSTEM FLUSH LOGS' = 130, 'SYSTEM FLUSH' = 131, 'SYSTEM THREAD FUZZER' = 132, 'SYSTEM UNFREEZE' = 133, 'SYSTEM' = 134, 'dictGet' = 135, 'addressToLine' = 136, 'addressToLineWithInlines' = 137, 'addressToSymbol' = 138, 'demangle' = 139, 'INTROSPECTION' = 140, 'FILE' = 141, 'URL' = 142, 'REMOTE' = 143, 'MONGO' = 144, 'MEILISEARCH' = 145, 'MYSQL' = 146, 'POSTGRES' = 147, 'SQLITE' = 148, 'ODBC' = 149, 'JDBC' = 150, 'HDFS' = 151, 'S3' = 152, 'HIVE' = 153, 'SOURCES' = 154, 'CLUSTER' = 155, 'ALL' = 156, 'NONE' = 157)) + `parent_group` Nullable(Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE FUNCTION' = 53, 'CREATE NAMED COLLECTION' = 54, 'CREATE' = 55, 'DROP DATABASE' = 56, 'DROP TABLE' = 57, 'DROP VIEW' = 58, 'DROP DICTIONARY' = 59, 'DROP FUNCTION' = 60, 'DROP NAMED COLLECTION' = 61, 'DROP' = 62, 'TRUNCATE' = 63, 'OPTIMIZE' = 64, 'BACKUP' = 65, 'KILL QUERY' = 66, 'KILL TRANSACTION' = 67, 'MOVE PARTITION BETWEEN SHARDS' = 68, 'CREATE USER' = 69, 'ALTER USER' = 70, 'DROP USER' = 71, 'CREATE ROLE' = 72, 'ALTER ROLE' = 73, 'DROP ROLE' = 74, 'ROLE ADMIN' = 75, 'CREATE ROW POLICY' = 76, 'ALTER ROW POLICY' = 77, 'DROP ROW POLICY' = 78, 'CREATE QUOTA' = 79, 'ALTER QUOTA' = 80, 'DROP QUOTA' = 81, 'CREATE SETTINGS PROFILE' = 82, 'ALTER SETTINGS PROFILE' = 83, 'DROP SETTINGS PROFILE' = 84, 'SHOW USERS' = 85, 'SHOW ROLES' = 86, 'SHOW ROW POLICIES' = 87, 'SHOW QUOTAS' = 88, 'SHOW SETTINGS PROFILES' = 89, 'SHOW ACCESS' = 90, 'SHOW NAMED COLLECTIONS' = 91, 'SHOW NAMED COLLECTIONS SECRETS' = 92, 'ACCESS MANAGEMENT' = 93, 'SYSTEM SHUTDOWN' = 94, 'SYSTEM DROP DNS CACHE' = 95, 'SYSTEM DROP MARK CACHE' = 96, 'SYSTEM DROP UNCOMPRESSED CACHE' = 97, 'SYSTEM DROP MMAP CACHE' = 98, 'SYSTEM DROP QUERY CACHE' = 99, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 100, 'SYSTEM DROP FILESYSTEM CACHE' = 101, 'SYSTEM DROP SCHEMA CACHE' = 102, 'SYSTEM DROP S3 CLIENT CACHE' = 103, 'SYSTEM DROP CACHE' = 104, 'SYSTEM RELOAD CONFIG' = 105, 'SYSTEM RELOAD USERS' = 106, 'SYSTEM RELOAD SYMBOLS' = 107, 'SYSTEM RELOAD DICTIONARY' = 108, 'SYSTEM RELOAD MODEL' = 109, 'SYSTEM RELOAD FUNCTION' = 110, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 111, 'SYSTEM RELOAD' = 112, 'SYSTEM RESTART DISK' = 113, 'SYSTEM MERGES' = 114, 'SYSTEM TTL MERGES' = 115, 'SYSTEM FETCHES' = 116, 'SYSTEM MOVES' = 117, 'SYSTEM DISTRIBUTED SENDS' = 118, 'SYSTEM REPLICATED SENDS' = 119, 'SYSTEM SENDS' = 120, 'SYSTEM REPLICATION QUEUES' = 121, 'SYSTEM DROP REPLICA' = 122, 'SYSTEM SYNC REPLICA' = 123, 'SYSTEM RESTART REPLICA' = 124, 'SYSTEM RESTORE REPLICA' = 125, 'SYSTEM WAIT LOADING PARTS' = 126, 'SYSTEM SYNC DATABASE REPLICA' = 127, 'SYSTEM SYNC TRANSACTION LOG' = 128, 'SYSTEM SYNC FILE CACHE' = 129, 'SYSTEM FLUSH DISTRIBUTED' = 130, 'SYSTEM FLUSH LOGS' = 131, 'SYSTEM FLUSH' = 132, 'SYSTEM THREAD FUZZER' = 133, 'SYSTEM UNFREEZE' = 134, 'SYSTEM' = 135, 'dictGet' = 136, 'addressToLine' = 137, 'addressToLineWithInlines' = 138, 'addressToSymbol' = 139, 'demangle' = 140, 'INTROSPECTION' = 141, 'FILE' = 142, 'URL' = 143, 'REMOTE' = 144, 'MONGO' = 145, 'MEILISEARCH' = 146, 'MYSQL' = 147, 'POSTGRES' = 148, 'SQLITE' = 149, 'ODBC' = 150, 'JDBC' = 151, 'HDFS' = 152, 'S3' = 153, 'HIVE' = 154, 'SOURCES' = 155, 'CLUSTER' = 156, 'ALL' = 157, 'NONE' = 158)) ) ENGINE = SystemPrivileges COMMENT 'SYSTEM TABLE is built on the fly.' @@ -976,6 +977,7 @@ CREATE TABLE system.settings `max` Nullable(String), `readonly` UInt8, `type` String, + `default` String, `alias_for` String ) ENGINE = SystemSettings diff --git a/tests/queries/0_stateless/02132_client_history_navigation.expect b/tests/queries/0_stateless/02132_client_history_navigation.expect index 3316f26d552..dc7e44b41ee 100755 --- a/tests/queries/0_stateless/02132_client_history_navigation.expect +++ b/tests/queries/0_stateless/02132_client_history_navigation.expect @@ -11,9 +11,9 @@ match_max 100000 expect_after { # Do not ignore eof from expect - eof { exp_continue } + -i $any_spawn_id eof { exp_continue } # A default timeout action is to do nothing, change it to fail - timeout { exit 1 } + -i $any_spawn_id timeout { exit 1 } } # useful debugging configuration @@ -23,16 +23,20 @@ spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \ expect ":) " # Make a query -send -- "SELECT 1\r" +send -- "SELECT 'unique1'\r" expect "1" expect ":) " -send -- "SELECT 2" +send -- "SELECT 'unique2'" # NOTE: it does not work for alacritty with TERM=xterm send -- "\033\[A" -expect "SELECT 1" +expect "SELECT 'unique1'" send -- "\033\[B" -expect "SELECT 2" +expect "SELECT 'unique2'" send -- "\r" -expect "2" +# First is echoed command +expect "'unique2'" +# Second is the response from the server +expect "'unique2'" + send -- "exit\r" expect eof diff --git a/tests/queries/0_stateless/02149_schema_inference_formats_with_schema.reference b/tests/queries/0_stateless/02149_schema_inference_formats_with_schema.reference index f3bfd9f1048..47391a77ee8 100644 --- a/tests/queries/0_stateless/02149_schema_inference_formats_with_schema.reference +++ b/tests/queries/0_stateless/02149_schema_inference_formats_with_schema.reference @@ -72,7 +72,7 @@ uint8 Nullable(UInt8) int16 Nullable(Int16) uint16 Nullable(UInt16) int32 Nullable(Int32) -uint32 Nullable(Int64) +uint32 Nullable(UInt32) int64 Nullable(Int64) uint64 Nullable(UInt64) 0 0 0 0 0 0 0 0 diff --git a/tests/queries/0_stateless/02160_client_autocomplete_parse_query.expect b/tests/queries/0_stateless/02160_client_autocomplete_parse_query.expect index 41d32891e98..01d61a6ad0f 100755 --- a/tests/queries/0_stateless/02160_client_autocomplete_parse_query.expect +++ b/tests/queries/0_stateless/02160_client_autocomplete_parse_query.expect @@ -11,9 +11,9 @@ set uuid "" match_max 100000 expect_after { # Do not ignore eof from read. - eof { exp_continue } + -i $any_spawn_id eof { exp_continue } # A default timeout action is to do nothing, change it to fail - timeout { exit 1 } + -i $any_spawn_id timeout { exit 1 } } spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --history_file=$history_file" @@ -44,9 +44,8 @@ expect ":) " # Generate UIUD to avoid matching old database/tables/columns from previous test runs. send -- "select 'begin-' || replace(toString(generateUUIDv4()), '-', '') || '-end' format TSV\r" -expect -re TSV.*TSV.*begin-(.*)-end.* +expect -re "TSV.*TSV.*begin-(.*)-end.*:\\) " set uuid $expect_out(1,string) -expect ":) " # CREATE DATABASE send -- "create database new_${uuid}_database\r" diff --git a/tests/queries/0_stateless/02164_clickhouse_local_interactive_exception.expect b/tests/queries/0_stateless/02164_clickhouse_local_interactive_exception.expect index f70b699c71f..57a2d1c432b 100755 --- a/tests/queries/0_stateless/02164_clickhouse_local_interactive_exception.expect +++ b/tests/queries/0_stateless/02164_clickhouse_local_interactive_exception.expect @@ -9,15 +9,15 @@ set timeout 20 match_max 100000 expect_after { - eof { exp_continue } - timeout { exit 1 } + -i $any_spawn_id eof { exp_continue } + -i $any_spawn_id timeout { exit 1 } } spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_LOCAL --disable_suggestion" expect ":) " send -- "insert into table function null() format TSV some trash here 123 \n 456\r" -expect -re ".*DB::Exception: Table function 'null' requires 'structure'.*\r" +expect "CANNOT_PARSE_INPUT_ASSERTION_FAILED" expect ":) " send -- "" diff --git a/tests/queries/0_stateless/02222_create_table_without_columns_metadata.sh b/tests/queries/0_stateless/02222_create_table_without_columns_metadata.sh index 261c389c9f2..26646bd91a0 100755 --- a/tests/queries/0_stateless/02222_create_table_without_columns_metadata.sh +++ b/tests/queries/0_stateless/02222_create_table_without_columns_metadata.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-fasttest, no-parallel, no-backward-compatibility-check +# Tags: no-fasttest, no-parallel, no-upgrade-check CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/02242_delete_user_race.sh b/tests/queries/0_stateless/02242_delete_user_race.sh index 8c28cdb57bd..f22b7796bd4 100755 --- a/tests/queries/0_stateless/02242_delete_user_race.sh +++ b/tests/queries/0_stateless/02242_delete_user_race.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: race, no-fasttest, no-parallel, no-backward-compatibility-check +# Tags: race, no-fasttest, no-parallel, no-upgrade-check # Test tries to reproduce a race between threads: # - deletes user diff --git a/tests/queries/0_stateless/02243_drop_user_grant_race.sh b/tests/queries/0_stateless/02243_drop_user_grant_race.sh index d36db47e562..e36be96aa02 100755 --- a/tests/queries/0_stateless/02243_drop_user_grant_race.sh +++ b/tests/queries/0_stateless/02243_drop_user_grant_race.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: race, no-fasttest, no-parallel, no-backward-compatibility-check +# Tags: race, no-fasttest, no-parallel, no-upgrade-check set -e diff --git a/tests/queries/0_stateless/02293_hashid.sql b/tests/queries/0_stateless/02293_hashid.sql index 9938154f169..06af0b5e1d8 100644 --- a/tests/queries/0_stateless/02293_hashid.sql +++ b/tests/queries/0_stateless/02293_hashid.sql @@ -1,4 +1,4 @@ --- Tags: no-backward-compatibility-check +-- Tags: no-upgrade-check SET allow_experimental_hash_functions = 1; select number, hashid(number) from system.numbers limit 5; diff --git a/tests/queries/0_stateless/02302_join_auto_lc_nullable_bug.sql b/tests/queries/0_stateless/02302_join_auto_lc_nullable_bug.sql index 469476c82bf..8e0fb4a55a0 100644 --- a/tests/queries/0_stateless/02302_join_auto_lc_nullable_bug.sql +++ b/tests/queries/0_stateless/02302_join_auto_lc_nullable_bug.sql @@ -1,4 +1,4 @@ --- Tags: no-backward-compatibility-check +-- Tags: no-upgrade-check SET max_bytes_in_join = '100', join_algorithm = 'auto'; diff --git a/tests/queries/0_stateless/02306_window_move_row_number_fix.sql b/tests/queries/0_stateless/02306_window_move_row_number_fix.sql index 5bc0c41b3ee..f73525f92be 100644 --- a/tests/queries/0_stateless/02306_window_move_row_number_fix.sql +++ b/tests/queries/0_stateless/02306_window_move_row_number_fix.sql @@ -1,2 +1,2 @@ --- Tags: no-backward-compatibility-check +-- Tags: no-upgrade-check SELECT nth_value(NULL, 1048577) OVER (Rows BETWEEN 1023 FOLLOWING AND UNBOUNDED FOLLOWING) diff --git a/tests/queries/0_stateless/02310_clickhouse_client_INSERT_progress_profile_events.expect b/tests/queries/0_stateless/02310_clickhouse_client_INSERT_progress_profile_events.expect index 07815e57610..9f6937cf80d 100755 --- a/tests/queries/0_stateless/02310_clickhouse_client_INSERT_progress_profile_events.expect +++ b/tests/queries/0_stateless/02310_clickhouse_client_INSERT_progress_profile_events.expect @@ -1,5 +1,4 @@ #!/usr/bin/expect -f -# Tags: long # This is the regression for the concurrent access in ProgressIndication, # so it is important to read enough rows here (10e6). @@ -17,8 +16,8 @@ match_max 100000 set stty_init "rows 25 cols 120" expect_after { - eof { exp_continue } - timeout { exit 1 } + -i $any_spawn_id eof { exp_continue } + -i $any_spawn_id timeout { exit 1 } } spawn bash diff --git a/tests/queries/0_stateless/02310_clickhouse_local_INSERT_progress_profile_events.expect b/tests/queries/0_stateless/02310_clickhouse_local_INSERT_progress_profile_events.expect index 3333ee93468..5e514cd7c12 100755 --- a/tests/queries/0_stateless/02310_clickhouse_local_INSERT_progress_profile_events.expect +++ b/tests/queries/0_stateless/02310_clickhouse_local_INSERT_progress_profile_events.expect @@ -1,5 +1,4 @@ #!/usr/bin/expect -f -# Tags: long # This is the regression for the concurrent access in ProgressIndication, # so it is important to read enough rows here (10e6). @@ -17,8 +16,8 @@ match_max 100000 set stty_init "rows 25 cols 120" expect_after { - eof { exp_continue } - timeout { exit 1 } + -i $any_spawn_id eof { exp_continue } + -i $any_spawn_id timeout { exit 1 } } spawn bash diff --git a/tests/queries/0_stateless/02313_cross_join_dup_col_names.sql b/tests/queries/0_stateless/02313_cross_join_dup_col_names.sql index 44a4797ae3c..08e8843f763 100644 --- a/tests/queries/0_stateless/02313_cross_join_dup_col_names.sql +++ b/tests/queries/0_stateless/02313_cross_join_dup_col_names.sql @@ -1,4 +1,4 @@ --- Tags: no-backward-compatibility-check +-- Tags: no-upgrade-check -- https://github.com/ClickHouse/ClickHouse/issues/37561 diff --git a/tests/queries/0_stateless/02315_pmj_union_ubsan_35857.sql b/tests/queries/0_stateless/02315_pmj_union_ubsan_35857.sql index 47b47101a79..df20e5c42d4 100644 --- a/tests/queries/0_stateless/02315_pmj_union_ubsan_35857.sql +++ b/tests/queries/0_stateless/02315_pmj_union_ubsan_35857.sql @@ -1,4 +1,4 @@ --- Tags: no-backward-compatibility-check +-- Tags: no-upgrade-check SET join_algorithm = 'partial_merge'; diff --git a/tests/queries/0_stateless/02316_cast_to_ip_address_default_column.sql b/tests/queries/0_stateless/02316_cast_to_ip_address_default_column.sql index 35f210be43d..cac7992e305 100644 --- a/tests/queries/0_stateless/02316_cast_to_ip_address_default_column.sql +++ b/tests/queries/0_stateless/02316_cast_to_ip_address_default_column.sql @@ -1,6 +1,3 @@ --- Tags: no-backward-compatibility-check --- TODO: remove no-backward-compatibility-check after new 22.6 release - SET cast_ipv4_ipv6_default_on_conversion_error = 1; DROP TABLE IF EXISTS ipv4_test; diff --git a/tests/queries/0_stateless/02316_const_string_intersact.sql b/tests/queries/0_stateless/02316_const_string_intersact.sql index 18af398aa5d..148d048952b 100644 --- a/tests/queries/0_stateless/02316_const_string_intersact.sql +++ b/tests/queries/0_stateless/02316_const_string_intersact.sql @@ -1,3 +1,3 @@ --- Tags: no-backward-compatibility-check +-- Tags: no-upgrade-check SELECT 'Play ClickHouse' InterSect SELECT 'Play ClickHouse' diff --git a/tests/queries/0_stateless/02319_lightweight_delete_on_merge_tree.sql b/tests/queries/0_stateless/02319_lightweight_delete_on_merge_tree.sql index 7a9627b04ef..00b90bb38b5 100644 --- a/tests/queries/0_stateless/02319_lightweight_delete_on_merge_tree.sql +++ b/tests/queries/0_stateless/02319_lightweight_delete_on_merge_tree.sql @@ -5,7 +5,6 @@ CREATE TABLE merge_table_standard_delete(id Int32, name String) ENGINE = MergeTr INSERT INTO merge_table_standard_delete select number, toString(number) from numbers(100); SET mutations_sync = 0; -SET allow_experimental_lightweight_delete = 1; DELETE FROM merge_table_standard_delete WHERE id = 10; @@ -108,3 +107,10 @@ DELETE FROM t_proj WHERE a < 100; -- { serverError BAD_ARGUMENTS } SELECT avg(a), avg(b), count() FROM t_proj; DROP TABLE t_proj; + +CREATE TABLE merge_table_standard_delete(id Int32, name String) ENGINE = MergeTree order by id settings min_bytes_for_wide_part=0; +SET allow_experimental_lightweight_delete = false; +DELETE FROM merge_table_standard_delete WHERE id = 10; -- allow_experimental_lightweight_delete=false is now ignored +SET enable_lightweight_delete = false; +DELETE FROM merge_table_standard_delete WHERE id = 10; -- { serverError SUPPORT_IS_DISABLED } +DROP TABLE merge_table_standard_delete; diff --git a/tests/queries/0_stateless/02319_lightweight_delete_on_merge_tree_compact_parts.sql b/tests/queries/0_stateless/02319_lightweight_delete_on_merge_tree_compact_parts.sql index 068806638f3..4e9f3db0b96 100644 --- a/tests/queries/0_stateless/02319_lightweight_delete_on_merge_tree_compact_parts.sql +++ b/tests/queries/0_stateless/02319_lightweight_delete_on_merge_tree_compact_parts.sql @@ -7,7 +7,6 @@ INSERT INTO merge_table_standard_delete select number, toString(number) from num SELECT COUNT(), part_type FROM system.parts WHERE database = currentDatabase() AND table = 'merge_table_standard_delete' AND active GROUP BY part_type ORDER BY part_type; SET mutations_sync = 0; -SET allow_experimental_lightweight_delete = 1; DELETE FROM merge_table_standard_delete WHERE id = 10; SELECT COUNT(), part_type FROM system.parts WHERE database = currentDatabase() AND table = 'merge_table_standard_delete' AND active GROUP BY part_type ORDER BY part_type; diff --git a/tests/queries/0_stateless/02320_mapped_array_witn_const_nullable.sql b/tests/queries/0_stateless/02320_mapped_array_witn_const_nullable.sql index 08651590c76..734c597051e 100644 --- a/tests/queries/0_stateless/02320_mapped_array_witn_const_nullable.sql +++ b/tests/queries/0_stateless/02320_mapped_array_witn_const_nullable.sql @@ -1,4 +1,4 @@ --- Tags: no-backward-compatibility-check +-- Tags: no-upgrade-check select arrayMap(x -> toNullable(1), range(number)) from numbers(3); select arrayFilter(x -> toNullable(1), range(number)) from numbers(3); diff --git a/tests/queries/0_stateless/02332_dist_insert_send_logs_level.sh b/tests/queries/0_stateless/02332_dist_insert_send_logs_level.sh index 653cb25172a..503b94be715 100755 --- a/tests/queries/0_stateless/02332_dist_insert_send_logs_level.sh +++ b/tests/queries/0_stateless/02332_dist_insert_send_logs_level.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-backward-compatibility-check +# Tags: no-upgrade-check CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL=trace CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) diff --git a/tests/queries/0_stateless/02345_partial_sort_transform_optimization.sql b/tests/queries/0_stateless/02345_partial_sort_transform_optimization.sql index eb395e5ec41..07f705acd84 100644 --- a/tests/queries/0_stateless/02345_partial_sort_transform_optimization.sql +++ b/tests/queries/0_stateless/02345_partial_sort_transform_optimization.sql @@ -1,4 +1,4 @@ --- Tags: no-backward-compatibility-check +-- Tags: no-upgrade-check -- Regression for PartialSortingTransform optimization that requires at least 1500 rows. SELECT * FROM (SELECT * FROM (SELECT 0 a, toNullable(number) b, toString(number) c FROM numbers(1e6)) ORDER BY a DESC, b DESC, c LIMIT 1500) limit 10; diff --git a/tests/queries/0_stateless/02346_full_text_search.reference b/tests/queries/0_stateless/02346_full_text_search.reference index e035e93867b..f1e21e511d0 100644 --- a/tests/queries/0_stateless/02346_full_text_search.reference +++ b/tests/queries/0_stateless/02346_full_text_search.reference @@ -1,4 +1,5 @@ af inverted +1 101 Alick a01 1 101 Alick a01 diff --git a/tests/queries/0_stateless/02346_full_text_search.sql b/tests/queries/0_stateless/02346_full_text_search.sql index af49c5d52c2..2b10800e78f 100644 --- a/tests/queries/0_stateless/02346_full_text_search.sql +++ b/tests/queries/0_stateless/02346_full_text_search.sql @@ -15,6 +15,9 @@ INSERT INTO tab VALUES (101, 'Alick a01'), (102, 'Blick a02'), (103, 'Click a03' -- check inverted index was created SELECT name, type FROM system.data_skipping_indices WHERE table =='tab' AND database = currentDatabase() LIMIT 1; +-- throw in a random consistency check +CHECK TABLE tab; + -- search inverted index with == SELECT * FROM tab WHERE s == 'Alick a01'; diff --git a/tests/queries/0_stateless/02352_interactive_queries_from_file.expect b/tests/queries/0_stateless/02352_interactive_queries_from_file.expect index a34fc9909f8..e28fb38862c 100755 --- a/tests/queries/0_stateless/02352_interactive_queries_from_file.expect +++ b/tests/queries/0_stateless/02352_interactive_queries_from_file.expect @@ -11,9 +11,9 @@ match_max 100000 expect_after { # Do not ignore eof from expect - eof { exp_continue } + -i $any_spawn_id eof { exp_continue } # A default timeout action is to do nothing, change it to fail - timeout { exit 1 } + -i $any_spawn_id timeout { exit 1 } } spawn bash -c "echo 'select 1;\nselect 2;\nselect 3' > queries_02352" diff --git a/tests/queries/0_stateless/02352_lightweight_delete.sql b/tests/queries/0_stateless/02352_lightweight_delete.sql index 8a88e48b207..e1759e56a3a 100644 --- a/tests/queries/0_stateless/02352_lightweight_delete.sql +++ b/tests/queries/0_stateless/02352_lightweight_delete.sql @@ -5,7 +5,6 @@ CREATE TABLE lwd_test (id UInt64 , value String) ENGINE MergeTree() ORDER BY id; INSERT INTO lwd_test SELECT number, randomString(10) FROM system.numbers LIMIT 1000000; SET mutations_sync = 0; -SET allow_experimental_lightweight_delete = 1; SELECT 'Rows in parts', SUM(rows) FROM system.parts WHERE database = currentDatabase() AND table = 'lwd_test' AND active; SELECT 'Count', count() FROM lwd_test; diff --git a/tests/queries/0_stateless/02352_lightweight_delete_on_replicated_merge_tree.sql b/tests/queries/0_stateless/02352_lightweight_delete_on_replicated_merge_tree.sql index 24e7b5684ec..4c3955bf746 100644 --- a/tests/queries/0_stateless/02352_lightweight_delete_on_replicated_merge_tree.sql +++ b/tests/queries/0_stateless/02352_lightweight_delete_on_replicated_merge_tree.sql @@ -9,7 +9,6 @@ CREATE TABLE replicated_table_r2(id Int32, name String) ENGINE = ReplicatedMerge INSERT INTO replicated_table_r1 select number, toString(number) FROM numbers(100); SET mutations_sync = 0; -SET allow_experimental_lightweight_delete = 1; DELETE FROM replicated_table_r1 WHERE id = 10; diff --git a/tests/queries/0_stateless/02354_annoy.sh b/tests/queries/0_stateless/02354_annoy.sh index 7b49a338955..87258debf0f 100755 --- a/tests/queries/0_stateless/02354_annoy.sh +++ b/tests/queries/0_stateless/02354_annoy.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-fasttest, no-ubsan, no-cpu-aarch64, no-backward-compatibility-check +# Tags: no-fasttest, no-ubsan, no-cpu-aarch64, no-upgrade-check CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/02363_mapupdate_improve.sql b/tests/queries/0_stateless/02363_mapupdate_improve.sql index 6b7723cc9b4..b4a4b8e5d91 100644 --- a/tests/queries/0_stateless/02363_mapupdate_improve.sql +++ b/tests/queries/0_stateless/02363_mapupdate_improve.sql @@ -1,4 +1,4 @@ --- Tags: no-backward-compatibility-check +-- Tags: no-upgrade-check DROP TABLE IF EXISTS map_test; CREATE TABLE map_test(`tags` Map(String, String)) ENGINE = MergeTree PRIMARY KEY tags ORDER BY tags SETTINGS index_granularity = 8192; INSERT INTO map_test (tags) VALUES (map('fruit','apple','color','red')); diff --git a/tests/queries/0_stateless/02364_setting_cross_to_inner_rewrite.sql b/tests/queries/0_stateless/02364_setting_cross_to_inner_rewrite.sql index cdbac93937e..86a8414e799 100644 --- a/tests/queries/0_stateless/02364_setting_cross_to_inner_rewrite.sql +++ b/tests/queries/0_stateless/02364_setting_cross_to_inner_rewrite.sql @@ -1,5 +1,3 @@ - - DROP TABLE IF EXISTS t1; DROP TABLE IF EXISTS t2; diff --git a/tests/queries/0_stateless/02366_direct_dictionary_dict_has.sql b/tests/queries/0_stateless/02366_direct_dictionary_dict_has.sql index 9d6950051f0..cf9f2971cb0 100644 --- a/tests/queries/0_stateless/02366_direct_dictionary_dict_has.sql +++ b/tests/queries/0_stateless/02366_direct_dictionary_dict_has.sql @@ -1,4 +1,4 @@ --- Tags: no-backward-compatibility-check +-- Tags: no-upgrade-check DROP TABLE IF EXISTS test_table; CREATE TABLE test_table diff --git a/tests/queries/0_stateless/02366_with_fill_date.sql b/tests/queries/0_stateless/02366_with_fill_date.sql index 64e23b845f8..4d41facf423 100644 --- a/tests/queries/0_stateless/02366_with_fill_date.sql +++ b/tests/queries/0_stateless/02366_with_fill_date.sql @@ -1,4 +1,4 @@ --- Tags: no-backward-compatibility-check +-- Tags: no-upgrade-check SELECT toDate('2022-02-01') AS d1 FROM numbers(18) AS number diff --git a/tests/queries/0_stateless/02377_extend_protocol_with_query_parameters.reference b/tests/queries/0_stateless/02377_extend_protocol_with_query_parameters.reference index 1da5cd0b7b3..8b384f5e0d1 100644 --- a/tests/queries/0_stateless/02377_extend_protocol_with_query_parameters.reference +++ b/tests/queries/0_stateless/02377_extend_protocol_with_query_parameters.reference @@ -9,8 +9,8 @@ UInt64 String DateTime Map(UUID, Array(Float32)) 1 _CAST(42, \'Int64\') Int64 _CAST([1, 2, 3], \'Array(UInt8)\') Array(UInt8) -_CAST(((\'abc\', 22), (\'def\', 33)), \'Map(String, UInt8)\') Map(String, UInt8) +_CAST([(\'abc\', 22), (\'def\', 33)], \'Map(String, UInt8)\') Map(String, UInt8) _CAST([[4, 5, 6], [7], [8, 9]], \'Array(Array(UInt8))\') Array(Array(UInt8)) -_CAST(((10, [11, 12]), (13, [14, 15])), \'Map(UInt8, Array(UInt8))\') Map(UInt8, Array(UInt8)) -_CAST(((\'ghj\', ((\'klm\', [16, 17]))), (\'nop\', ((\'rst\', [18])))), \'Map(String, Map(String, Array(UInt8)))\') Map(String, Map(String, Array(UInt8))) +_CAST([(10, [11, 12]), (13, [14, 15])], \'Map(UInt8, Array(UInt8))\') Map(UInt8, Array(UInt8)) +_CAST([(\'ghj\', [(\'klm\', [16, 17])]), (\'nop\', [(\'rst\', [18])])], \'Map(String, Map(String, Array(UInt8)))\') Map(String, Map(String, Array(UInt8))) a Int8 diff --git a/tests/queries/0_stateless/02381_compress_marks_and_primary_key.sql b/tests/queries/0_stateless/02381_compress_marks_and_primary_key.sql index 7eaa46242ae..842e22ba87d 100644 --- a/tests/queries/0_stateless/02381_compress_marks_and_primary_key.sql +++ b/tests/queries/0_stateless/02381_compress_marks_and_primary_key.sql @@ -1,4 +1,4 @@ --- Tags: no-backward-compatibility-check, no-random-merge-tree-settings +-- Tags: no-upgrade-check, no-random-merge-tree-settings drop table if exists test_02381; create table test_02381(a UInt64, b UInt64) ENGINE = MergeTree order by (a, b) SETTINGS compress_marks=false, compress_primary_key=false; diff --git a/tests/queries/0_stateless/02397_system_parts_race_condition_drop_rm.sh b/tests/queries/0_stateless/02397_system_parts_race_condition_drop_rm.sh index 2372d30497e..548179b94c9 100755 --- a/tests/queries/0_stateless/02397_system_parts_race_condition_drop_rm.sh +++ b/tests/queries/0_stateless/02397_system_parts_race_condition_drop_rm.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: race, zookeeper, no-parallel, no-backward-compatibility-check, disabled +# Tags: race, zookeeper, no-parallel, no-upgrade-check, disabled CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/02404_lightweight_delete_vertical_merge.sql b/tests/queries/0_stateless/02404_lightweight_delete_vertical_merge.sql index 9af0825dcd6..39c1ae3c218 100644 --- a/tests/queries/0_stateless/02404_lightweight_delete_vertical_merge.sql +++ b/tests/queries/0_stateless/02404_lightweight_delete_vertical_merge.sql @@ -32,7 +32,6 @@ ORDER BY name, column; SET mutations_sync = 0; -SET allow_experimental_lightweight_delete = 1; -- delete some rows using LWD DELETE FROM lwd_test WHERE (id % 3) = 0; diff --git a/tests/queries/0_stateless/02417_repeat_input_commands.expect b/tests/queries/0_stateless/02417_repeat_input_commands.expect index 3658d5d8494..8070200c55c 100755 --- a/tests/queries/0_stateless/02417_repeat_input_commands.expect +++ b/tests/queries/0_stateless/02417_repeat_input_commands.expect @@ -11,9 +11,9 @@ match_max 100000 expect_after { # Do not ignore eof from expect - eof { exp_continue } + -i $any_spawn_id eof { exp_continue } # A default timeout action is to do nothing, change it to fail - timeout { exit 1 } + -i $any_spawn_id timeout { exit 1 } } spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion --history_file=$history_file" diff --git a/tests/queries/0_stateless/02428_delete_with_settings.sql b/tests/queries/0_stateless/02428_delete_with_settings.sql index 2d49fc55fd0..071a3f74184 100644 --- a/tests/queries/0_stateless/02428_delete_with_settings.sql +++ b/tests/queries/0_stateless/02428_delete_with_settings.sql @@ -1,6 +1,5 @@ drop table if exists test; create table test (id Int32, key String) engine=MergeTree() order by tuple(); insert into test select number, toString(number) from numbers(1000000); -set allow_experimental_lightweight_delete=1; delete from test where id % 2 = 0 SETTINGS mutations_sync=0; select count() from test; diff --git a/tests/queries/0_stateless/02429_low_cardinality_trash.sh b/tests/queries/0_stateless/02429_low_cardinality_trash.sh index 258f02b4bb6..91618cb2796 100755 --- a/tests/queries/0_stateless/02429_low_cardinality_trash.sh +++ b/tests/queries/0_stateless/02429_low_cardinality_trash.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, no-backward-compatibility-check +# Tags: long, no-upgrade-check CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/02450_kill_distributed_query_deadlock.sh b/tests/queries/0_stateless/02450_kill_distributed_query_deadlock.sh index 11ca3f43d8f..abcf1bf4c5b 100755 --- a/tests/queries/0_stateless/02450_kill_distributed_query_deadlock.sh +++ b/tests/queries/0_stateless/02450_kill_distributed_query_deadlock.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, no-backward-compatibility-check +# Tags: long, no-upgrade-check CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/02454_create_table_with_custom_disk.reference b/tests/queries/0_stateless/02454_create_table_with_custom_disk.reference index 378722b5166..1d8610c59c9 100644 --- a/tests/queries/0_stateless/02454_create_table_with_custom_disk.reference +++ b/tests/queries/0_stateless/02454_create_table_with_custom_disk.reference @@ -6,6 +6,6 @@ ENGINE = MergeTree ORDER BY tuple() SETTINGS disk = disk(type = local, path = \'/var/lib/clickhouse/disks/local/\') 100 -CREATE TABLE default.test\n(\n `a` Int32\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS disk = disk(type = local, path = \'/var/lib/clickhouse/disks/local/\'), index_granularity = 8192 +CREATE TABLE default.test\n(\n `a` Int32\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS disk = disk(type = local, path = \'[HIDDEN]\'), index_granularity = 8192 a Int32 200 diff --git a/tests/queries/0_stateless/02456_progress_tty.expect b/tests/queries/0_stateless/02456_progress_tty.expect index ba6cc0537eb..3d1d92e5400 100755 --- a/tests/queries/0_stateless/02456_progress_tty.expect +++ b/tests/queries/0_stateless/02456_progress_tty.expect @@ -10,8 +10,8 @@ match_max 100000 set stty_init "rows 25 cols 120" expect_after { - eof { exp_continue } - timeout { exit 1 } + -i $any_spawn_id eof { exp_continue } + -i $any_spawn_id timeout { exit 1 } } spawn bash diff --git a/tests/queries/0_stateless/02461_prewhere_row_level_policy_lightweight_delete.sql.j2 b/tests/queries/0_stateless/02461_prewhere_row_level_policy_lightweight_delete.sql.j2 index e1ec348e6ac..1e4258cef7e 100644 --- a/tests/queries/0_stateless/02461_prewhere_row_level_policy_lightweight_delete.sql.j2 +++ b/tests/queries/0_stateless/02461_prewhere_row_level_policy_lightweight_delete.sql.j2 @@ -24,7 +24,6 @@ insert into url_na_log select 209, '2022-08-21' from numbers(10000); SET mutations_sync=2; -SET allow_experimental_lightweight_delete=1; OPTIMIZE TABLE url_na_log FINAL; diff --git a/tests/queries/0_stateless/02473_multistep_prewhere.python b/tests/queries/0_stateless/02473_multistep_prewhere.python index ca90c90b70e..37a7280dac2 100644 --- a/tests/queries/0_stateless/02473_multistep_prewhere.python +++ b/tests/queries/0_stateless/02473_multistep_prewhere.python @@ -126,7 +126,7 @@ class Tester: def main(): # Set mutations to synchronous mode and enable lightweight DELETE's - url = os.environ['CLICKHOUSE_URL'] + '&allow_experimental_lightweight_delete=1&max_threads=1' + url = os.environ['CLICKHOUSE_URL'] + '&max_threads=1' default_index_granularity = 10; total_rows = 8 * default_index_granularity diff --git a/tests/queries/0_stateless/02473_multistep_split_prewhere.python b/tests/queries/0_stateless/02473_multistep_split_prewhere.python new file mode 100644 index 00000000000..41d8a746e11 --- /dev/null +++ b/tests/queries/0_stateless/02473_multistep_split_prewhere.python @@ -0,0 +1,131 @@ +#!/usr/bin/env python3 +import requests +import os +import sys + +CURDIR = os.path.dirname(os.path.realpath(__file__)) +sys.path.insert(0, os.path.join(CURDIR, 'helpers')) + +from pure_http_client import ClickHouseClient + + +class Tester: + ''' + - Creates test table with multiple integer columns + - Runs read queries with multiple range conditions on different columns in PREWHERE and check that the result is correct + ''' + def __init__(self, session, url, index_granularity, total_rows): + self.session = session + self.url = url + self.index_granularity = index_granularity + self.total_rows = total_rows + self.reported_errors = set() + self.repro_queries = [] + + def report_error(self): + print('Repro steps:', '\n\n\t'.join(self.repro_queries)) + exit(1) + + def query(self, query_text, include_in_repro_steps = True, expected_data = None): + self.repro_queries.append(query_text) + resp = self.session.post(self.url, data=query_text) + if resp.status_code != 200: + # Group similar errors + error = resp.text[0:40] + if error not in self.reported_errors: + self.reported_errors.add(error) + print('Code:', resp.status_code) + print('Result:', resp.text) + self.report_error() + + result = resp.text + # Check that the result is as expected + if ((not expected_data is None) and (int(result) != len(expected_data))): + print('Expected {} rows, got {}'.format(len(expected_data), result)) + print('Expected data:' + str(expected_data)) + self.report_error() + + if not include_in_repro_steps: + self.repro_queries.pop() + + + def check_data(self, all_data, c_range_start, c_range_end, d_range_start, d_range_end): + for to_select in ['count()', 'sum(e)']: # Test reading with and without column with default value + self.query('SELECT {} FROM tab_02473;'.format(to_select), False, all_data) + + delta = 10 + for b_range_start in [0, delta]: + for b_range_end in [self.total_rows - delta]: #, self.total_rows]: + expected = all_data[ + (all_data.a == 0) & + (all_data.b > b_range_start) & + (all_data.b <= b_range_end)] + self.query('SELECT {} from tab_02473 PREWHERE b > {} AND b <= {} WHERE a == 0;'.format( + to_select, b_range_start, b_range_end), False, expected) + + expected = all_data[ + (all_data.a == 0) & + (all_data.b > b_range_start) & + (all_data.b <= b_range_end) & + (all_data.c > c_range_start) & + (all_data.c <= c_range_end)] + self.query('SELECT {} from tab_02473 PREWHERE b > {} AND b <= {} AND c > {} AND c <= {} WHERE a == 0;'.format( + to_select, b_range_start, b_range_end, c_range_start, c_range_end), False, expected) + + expected = all_data[ + (all_data.a == 0) & + (all_data.b > b_range_start) & + (all_data.b <= b_range_end) & + (all_data.c > c_range_start) & + (all_data.c <= c_range_end) & + (all_data.d > d_range_start) & + (all_data.d <= d_range_end)] + self.query('SELECT {} from tab_02473 PREWHERE b > {} AND b <= {} AND c > {} AND c <= {} AND d > {} AND d <= {} WHERE a == 0;'.format( + to_select, b_range_start, b_range_end, c_range_start, c_range_end, d_range_start, d_range_end), False, expected) + + + def run_test(self, c_range_start, c_range_end, d_range_start, d_range_end): + self.repro_queries = [] + + self.query(''' + CREATE TABLE tab_02473 (a Int8, b Int32, c Int32, d Int32, PRIMARY KEY (a)) + ENGINE = MergeTree() ORDER BY (a, b) + SETTINGS min_bytes_for_wide_part = 0, index_granularity = {};'''.format(self.index_granularity)) + + self.query('INSERT INTO tab_02473 select 0, number+1, number+1, number+1 FROM numbers({});'.format(self.total_rows)) + + client = ClickHouseClient() + all_data = client.query_return_df("SELECT a, b, c, d, 1 as e FROM tab_02473 FORMAT TabSeparatedWithNames;") + + self.query('OPTIMIZE TABLE tab_02473 FINAL SETTINGS mutations_sync=2;') + + # After all data has been written add a column with default value + self.query('ALTER TABLE tab_02473 ADD COLUMN e Int64 DEFAULT 1;') + + self.check_data(all_data, c_range_start, c_range_end, d_range_start, d_range_end) + + self.query('DROP TABLE tab_02473;') + + + +def main(): + # Enable multiple prewhere read steps + url = os.environ['CLICKHOUSE_URL'] + '&enable_multiple_prewhere_read_steps=1&move_all_conditions_to_prewhere=0&max_threads=1' + + default_index_granularity = 10; + total_rows = 8 * default_index_granularity + step = default_index_granularity + session = requests.Session() + for index_granularity in [default_index_granularity-1, default_index_granularity]: + tester = Tester(session, url, index_granularity, total_rows) + # Test combinations of ranges of columns c and d + for c_range_start in range(0, total_rows, int(2.3 * step)): + for c_range_end in range(c_range_start + 3 * step, total_rows, int(2.1 * step)): + for d_range_start in range(int(0.5 * step), total_rows, int(2.7 * step)): + for d_range_end in range(d_range_start + 3 * step, total_rows, int(2.2 * step)): + tester.run_test(c_range_start, c_range_end, d_range_start, d_range_end) + + +if __name__ == "__main__": + main() + diff --git a/tests/queries/0_stateless/02473_multistep_split_prewhere.reference b/tests/queries/0_stateless/02473_multistep_split_prewhere.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02473_multistep_split_prewhere.sh b/tests/queries/0_stateless/02473_multistep_split_prewhere.sh new file mode 100755 index 00000000000..c5482c42028 --- /dev/null +++ b/tests/queries/0_stateless/02473_multistep_split_prewhere.sh @@ -0,0 +1,11 @@ +#!/usr/bin/env bash +# Tags: long + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +# We should have correct env vars from shell_config.sh to run this test + +python3 "$CURDIR"/02473_multistep_split_prewhere.python + diff --git a/tests/queries/0_stateless/02480_client_option_print_num_processed_rows.expect b/tests/queries/0_stateless/02480_client_option_print_num_processed_rows.expect index 77e219e804e..dd3c9309b2d 100755 --- a/tests/queries/0_stateless/02480_client_option_print_num_processed_rows.expect +++ b/tests/queries/0_stateless/02480_client_option_print_num_processed_rows.expect @@ -10,8 +10,8 @@ match_max 100000 set stty_init "rows 25 cols 120" expect_after { - eof { exp_continue } - timeout { exit 1 } + -i $any_spawn_id eof { exp_continue } + -i $any_spawn_id timeout { exit 1 } } spawn bash diff --git a/tests/queries/0_stateless/02481_prewhere_filtered_rows_div_by_zero.reference b/tests/queries/0_stateless/02481_prewhere_filtered_rows_div_by_zero.reference index f9c1c174a7d..b0ec7cd7698 100644 --- a/tests/queries/0_stateless/02481_prewhere_filtered_rows_div_by_zero.reference +++ b/tests/queries/0_stateless/02481_prewhere_filtered_rows_div_by_zero.reference @@ -43,7 +43,7 @@ SELECT intDiv(b, c) FROM test_filter PREWHERE c != 0 WHERE b%2 != 0; 5 9 13 -SET mutations_sync = 2, allow_experimental_lightweight_delete = 1; +SET mutations_sync = 2; -- Delete all rows where division by zero could occur DELETE FROM test_filter WHERE c = 0; -- Test that now division by zero doesn't occur without explicit condition diff --git a/tests/queries/0_stateless/02481_prewhere_filtered_rows_div_by_zero.sql b/tests/queries/0_stateless/02481_prewhere_filtered_rows_div_by_zero.sql index ab675df75cf..e3ceda15676 100644 --- a/tests/queries/0_stateless/02481_prewhere_filtered_rows_div_by_zero.sql +++ b/tests/queries/0_stateless/02481_prewhere_filtered_rows_div_by_zero.sql @@ -15,7 +15,7 @@ SELECT intDiv(b, c) FROM test_filter PREWHERE c != 0; SELECT intDiv(b, c) FROM test_filter PREWHERE c != 0 WHERE b%2 != 0; -SET mutations_sync = 2, allow_experimental_lightweight_delete = 1; +SET mutations_sync = 2; -- Delete all rows where division by zero could occur DELETE FROM test_filter WHERE c = 0; diff --git a/tests/queries/0_stateless/02493_inconsistent_hex_and_binary_number.expect b/tests/queries/0_stateless/02493_inconsistent_hex_and_binary_number.expect index a391756ba22..2d595b0f492 100755 --- a/tests/queries/0_stateless/02493_inconsistent_hex_and_binary_number.expect +++ b/tests/queries/0_stateless/02493_inconsistent_hex_and_binary_number.expect @@ -10,8 +10,8 @@ match_max 100000 set stty_init "rows 25 cols 120" expect_after { - eof { exp_continue } - timeout { exit 1 } + -i $any_spawn_id eof { exp_continue } + -i $any_spawn_id timeout { exit 1 } } spawn bash diff --git a/tests/queries/0_stateless/02498_analyzer_aggregate_functions_arithmetic_operations_pass_fix.reference b/tests/queries/0_stateless/02498_analyzer_aggregate_functions_arithmetic_operations_pass_fix.reference index 4f9430ef608..43282d09bab 100644 --- a/tests/queries/0_stateless/02498_analyzer_aggregate_functions_arithmetic_operations_pass_fix.reference +++ b/tests/queries/0_stateless/02498_analyzer_aggregate_functions_arithmetic_operations_pass_fix.reference @@ -1 +1,2 @@ 4 2 +100 99 diff --git a/tests/queries/0_stateless/02498_analyzer_aggregate_functions_arithmetic_operations_pass_fix.sql b/tests/queries/0_stateless/02498_analyzer_aggregate_functions_arithmetic_operations_pass_fix.sql index e3e508e17be..5eba14ea528 100644 --- a/tests/queries/0_stateless/02498_analyzer_aggregate_functions_arithmetic_operations_pass_fix.sql +++ b/tests/queries/0_stateless/02498_analyzer_aggregate_functions_arithmetic_operations_pass_fix.sql @@ -12,3 +12,5 @@ INSERT INTO test_table VALUES (1, 1); INSERT INTO test_table VALUES (1, 1); SELECT sum((2 * id) as func), func FROM test_table GROUP BY id; + +SELECT max(100-number), min(100-number) FROM numbers(2); diff --git a/tests/queries/0_stateless/02513_prewhere_combine_step_filters.sql b/tests/queries/0_stateless/02513_prewhere_combine_step_filters.sql index 771893ce674..7445fa05c66 100644 --- a/tests/queries/0_stateless/02513_prewhere_combine_step_filters.sql +++ b/tests/queries/0_stateless/02513_prewhere_combine_step_filters.sql @@ -4,7 +4,6 @@ CREATE TABLE table_02513 (n UInt64) ENGINE=MergeTree() ORDER BY tuple() SETTINGS INSERT INTO table_02513 SELECT number+11*13*1000 FROM numbers(20); -SET allow_experimental_lightweight_delete=1; SET mutations_sync=2; SET max_threads=1; diff --git a/tests/queries/0_stateless/02518_delete_on_materialized_view.sql b/tests/queries/0_stateless/02518_delete_on_materialized_view.sql index 73abca4ea53..f655d0ce6cb 100644 --- a/tests/queries/0_stateless/02518_delete_on_materialized_view.sql +++ b/tests/queries/0_stateless/02518_delete_on_materialized_view.sql @@ -7,7 +7,6 @@ CREATE MATERIALIZED VIEW kekv ENGINE = MergeTree ORDER BY tuple() AS SELECT * FR INSERT INTO kek VALUES (1); DELETE FROM kekv WHERE a = 1; -- { serverError BAD_ARGUMENTS} -SET allow_experimental_lightweight_delete=1; DELETE FROM kekv WHERE a = 1; -- { serverError BAD_ARGUMENTS} DROP TABLE IF EXISTS kek; diff --git a/tests/queries/0_stateless/02521_lightweight_delete_and_ttl.sql b/tests/queries/0_stateless/02521_lightweight_delete_and_ttl.sql index cf2fe452e4b..1600761bb84 100644 --- a/tests/queries/0_stateless/02521_lightweight_delete_and_ttl.sql +++ b/tests/queries/0_stateless/02521_lightweight_delete_and_ttl.sql @@ -11,7 +11,6 @@ INSERT INTO lwd_test_02521 SELECT number, randomString(10), now() FROM numbers(5 OPTIMIZE TABLE lwd_test_02521 FINAL SETTINGS mutations_sync = 1; SET mutations_sync=1; -SET allow_experimental_lightweight_delete = 1; -- { echoOn } SELECT 'Rows in parts', SUM(rows) FROM system.parts WHERE database = currentDatabase() AND table = 'lwd_test_02521' AND active; diff --git a/tests/queries/0_stateless/02532_json_missing_named_tuple_elements.sh b/tests/queries/0_stateless/02532_json_missing_named_tuple_elements.sh index 97847b08203..2c12248cafd 100755 --- a/tests/queries/0_stateless/02532_json_missing_named_tuple_elements.sh +++ b/tests/queries/0_stateless/02532_json_missing_named_tuple_elements.sh @@ -13,7 +13,7 @@ echo '{"t" : {}}' | $CLICKHOUSE_LOCAL --input-format=NDJSON --structure='t Tuple echo '{"t" : {"a" : 1, "b" : 2}}' | $CLICKHOUSE_LOCAL --input-format=NDJSON --structure='t Tuple(a Nullable(UInt32), b Nullable(UInt32), c Nullable(UInt32))' -q "select * from table" --input_format_json_defaults_for_missing_elements_in_named_tuple=0 2>&1 | grep -F "INCORRECT_DATA" -c -echo '{"t" : {"a" : 1, "d" : 2}}' | $CLICKHOUSE_LOCAL --input-format=NDJSON --structure='t Tuple(a Nullable(UInt32), b Nullable(UInt32), c Nullable(UInt32))' -q "select * from table" 2>&1 | grep -F "NOT_FOUND_COLUMN_IN_BLOCK" -c +echo '{"t" : {"a" : 1, "d" : 2}}' | $CLICKHOUSE_LOCAL --input_format_json_ignore_unknown_keys_in_named_tuple=0 --input-format=NDJSON --structure='t Tuple(a Nullable(UInt32), b Nullable(UInt32), c Nullable(UInt32))' -q "select * from table" 2>&1 | grep -F "NOT_FOUND_COLUMN_IN_BLOCK" -c -echo '{"t" : {"a" : 1, "b" : 2, "c" : 3, "d" : 4}}' | $CLICKHOUSE_LOCAL --input-format=NDJSON --structure='t Tuple(a Nullable(UInt32), b Nullable(UInt32), c Nullable(UInt32))' -q "select * from table" 2>&1 | grep -F "INCORRECT_DATA" -c +echo '{"t" : {"a" : 1, "b" : 2, "c" : 3, "d" : 4}}' | $CLICKHOUSE_LOCAL --input_format_json_ignore_unknown_keys_in_named_tuple=0 --input-format=NDJSON --structure='t Tuple(a Nullable(UInt32), b Nullable(UInt32), c Nullable(UInt32))' -q "select * from table" 2>&1 | grep -F "INCORRECT_DATA" -c diff --git a/tests/queries/0_stateless/02532_send_logs_level_test.reference b/tests/queries/0_stateless/02532_send_logs_level_test.reference index e3378ac8c3d..dbd49cfc0a4 100644 --- a/tests/queries/0_stateless/02532_send_logs_level_test.reference +++ b/tests/queries/0_stateless/02532_send_logs_level_test.reference @@ -1,3 +1,2 @@ - MergeTreeBaseSelectProcessor: PREWHERE actions: MergeTreeRangeReader: First reader returned: num_rows: 1, columns: 1, total_rows_per_granule: 1, no filter, column[0]: Int32(size = 1), requested columns: key MergeTreeRangeReader: read() returned num_rows: 1, columns: 1, total_rows_per_granule: 1, no filter, column[0]: Int32(size = 1), sample block key diff --git a/tests/queries/0_stateless/02540_input_format_json_ignore_unknown_keys_in_named_tuple.reference b/tests/queries/0_stateless/02540_input_format_json_ignore_unknown_keys_in_named_tuple.reference index a1b4e2b5a83..b7edddf46e0 100644 --- a/tests/queries/0_stateless/02540_input_format_json_ignore_unknown_keys_in_named_tuple.reference +++ b/tests/queries/0_stateless/02540_input_format_json_ignore_unknown_keys_in_named_tuple.reference @@ -3,11 +3,11 @@ INCORRECT_DATA NOT_FOUND_COLUMN_IN_BLOCK (1) { - "row_1": {"type":"CreateEvent","actor":{"login":"foobar"},"repo":{"name":"ClickHouse\/ClickHouse"},"created_at":"2023-01-26 10:48:02","payload":{"updated_at":"1970-01-01 00:00:00","action":"","comment":{"id":"0","path":"","position":0,"line":0,"user":{"login":""},"diff_hunk":"","original_position":0,"commit_id":"","original_commit_id":""},"review":{"body":"","author_association":"","state":""},"ref":"backport","ref_type":"branch","issue":{"number":0,"title":"","labels":[],"state":"","locked":0,"assignee":{"login":""},"assignees":[],"comment":"","closed_at":"1970-01-01 00:00:00"},"pull_request":{"merged_at":null,"merge_commit_sha":"","requested_reviewers":[],"requested_teams":[],"head":{"ref":"","sha":""},"base":{"ref":"","sha":""},"merged":0,"mergeable":0,"rebaseable":0,"mergeable_state":"","merged_by":null,"review_comments":0,"maintainer_can_modify":0,"commits":0,"additions":0,"deletions":0,"changed_files":0},"size":0,"distinct_size":0,"member":{"login":""},"release":{"tag_name":"","name":""}}} + "row_1": {"type":"CreateEvent","actor":{"login":"foobar"},"repo":{"name":"ClickHouse\/ClickHouse"},"created_at":"2023-01-26 10:48:02","payload":{"updated_at":"1970-01-01 00:00:00","action":"","comment":{"id":"0","path":"","position":0,"line":0,"user":{"login":""},"diff_hunk":"","original_position":0,"commit_id":"","original_commit_id":""},"review":{"body":"","author_association":"","state":""},"ref":"backport","ref_type":"branch","issue":{"number":0,"title":"","labels":[],"state":"","locked":0,"assignee":{"login":""},"assignees":[],"comment":"","closed_at":"1970-01-01 00:00:00"},"pull_request":{"merged_at":"1970-01-01 00:00:00","merge_commit_sha":"","requested_reviewers":[],"requested_teams":[],"head":{"ref":"","sha":""},"base":{"ref":"","sha":""},"merged":0,"mergeable":0,"rebaseable":0,"mergeable_state":"","merged_by":{"login":""},"review_comments":0,"maintainer_can_modify":0,"commits":0,"additions":0,"deletions":0,"changed_files":0},"size":0,"distinct_size":0,"member":{"login":""},"release":{"tag_name":"","name":""}}} } { - "row_1": {"labels":[],"merged_by":""}, + "row_1": {"labels":[],"merged_by":""}, "row_2": {"labels":[],"merged_by":"foobar"}, - "row_3": {"labels":[],"merged_by":""}, - "row_4": {"labels":["backport"],"merged_by":""} + "row_3": {"labels":[],"merged_by":""}, + "row_4": {"labels":["backport"],"merged_by":""} } diff --git a/tests/queries/0_stateless/02540_input_format_json_ignore_unknown_keys_in_named_tuple.sh b/tests/queries/0_stateless/02540_input_format_json_ignore_unknown_keys_in_named_tuple.sh index f37a36fa192..eccac543215 100755 --- a/tests/queries/0_stateless/02540_input_format_json_ignore_unknown_keys_in_named_tuple.sh +++ b/tests/queries/0_stateless/02540_input_format_json_ignore_unknown_keys_in_named_tuple.sh @@ -60,7 +60,7 @@ gharchive_structure=( closed_at DateTime('UTC') ), pull_request Tuple( - merged_at Nullable(DateTime('UTC')), + merged_at DateTime('UTC'), merge_commit_sha String, requested_reviewers Nested( login String @@ -80,16 +80,9 @@ gharchive_structure=( mergeable UInt8, rebaseable UInt8, mergeable_state String, - merged_by Nullable(String), - /* NOTE: correct type is Tuple, however Tuple cannot be Nullable, - * so you still have to use Nullable(String) and rely on - * input_format_json_read_objects_as_strings, but see also - * https://github.com/ClickHouse/ClickHouse/issues/36464 - */ - /* merged_by Tuple( - * login String - * ), - */ + merged_by Tuple( + login String + ), review_comments UInt32, maintainer_can_modify UInt8, commits UInt32, @@ -122,12 +115,10 @@ EOL # NOTE: due to [1] we cannot use dot.dot notation, only tupleElement() # # [1]: https://github.com/ClickHouse/ClickHouse/issues/24607 -$CLICKHOUSE_LOCAL "${gharchive_settings[@]}" --structure="${gharchive_structure[*]}" -q " - WITH - tupleElement(tupleElement(payload, 'pull_request'), 'merged_by') AS merged_by_ +$CLICKHOUSE_LOCAL --allow_experimental_analyzer=1 "${gharchive_settings[@]}" --structure="${gharchive_structure[*]}" -q " SELECT - tupleElement(tupleElement(tupleElement(payload, 'issue'), 'labels'), 'name') AS labels, - if(merged_by_ IS NULL, '', JSONExtractString(merged_by_, 'login')) AS merged_by + payload.issue.labels.name AS labels, + payload.pull_request.merged_by.login AS merged_by FROM table " < 1000, + [toString(number)], + emptyArrayString()) + FROM numbers(2000); + +SET enable_multiple_prewhere_read_steps=1, move_all_conditions_to_prewhere=1; + +SELECT * FROM t_02559 +WHERE (key < 5 OR key > 500) + AND NOT has(value, toString(key)) + AND length(value) == 1 +LIMIT 10 +SETTINGS max_block_size = 81, + max_threads = 1; + +DROP TABLE IF EXISTS t_02559; diff --git a/tests/queries/0_stateless/02661_window_ntile.reference b/tests/queries/0_stateless/02560_window_ntile.reference similarity index 100% rename from tests/queries/0_stateless/02661_window_ntile.reference rename to tests/queries/0_stateless/02560_window_ntile.reference diff --git a/tests/queries/0_stateless/02661_window_ntile.sql b/tests/queries/0_stateless/02560_window_ntile.sql similarity index 100% rename from tests/queries/0_stateless/02661_window_ntile.sql rename to tests/queries/0_stateless/02560_window_ntile.sql diff --git a/tests/queries/0_stateless/02561_null_as_default_more_formats.reference b/tests/queries/0_stateless/02561_null_as_default_more_formats.reference new file mode 100644 index 00000000000..f5d4f41efe8 --- /dev/null +++ b/tests/queries/0_stateless/02561_null_as_default_more_formats.reference @@ -0,0 +1,36 @@ +Parquet +1 +0 0 0 +0 0 +0 0 0 +42 0 42 +Arrow +1 +0 0 0 +0 0 +0 0 0 +42 0 42 +ORC +1 +0 0 0 +0 0 +0 0 0 +42 0 42 +Avro +1 +0 0 0 +0 0 +0 0 0 +42 0 42 +MsgPack +1 +0 0 0 +0 0 +0 0 0 +42 0 42 +Native +1 +0 0 0 +0 0 +0 0 0 +42 0 42 diff --git a/tests/queries/0_stateless/02561_null_as_default_more_formats.sh b/tests/queries/0_stateless/02561_null_as_default_more_formats.sh new file mode 100755 index 00000000000..eacd8e964a6 --- /dev/null +++ b/tests/queries/0_stateless/02561_null_as_default_more_formats.sh @@ -0,0 +1,21 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT -q "drop table if exists test" +$CLICKHOUSE_CLIENT -q "create table test (x UInt64 default 42, y UInt64, z LowCardinality(String) default '42') engine=Memory"; +for format in Parquet Arrow ORC Avro MsgPack Native +do + echo $format + $CLICKHOUSE_CLIENT -q "select number % 2 ? NULL : number as x, x as y, CAST(number % 2 ? NULL : toString(number), 'LowCardinality(Nullable(String))') as z from numbers(2) format $format" | $CLICKHOUSE_CLIENT -q "insert into test settings input_format_null_as_default=0 format $format" 2>&1 | grep "Exception" -c + $CLICKHOUSE_CLIENT -q "select number % 2 ? NULL : number as x, x as y, CAST(number % 2 ? NULL : toString(number), 'LowCardinality(Nullable(String))') as z from numbers(2) format $format settings output_format_arrow_low_cardinality_as_dictionary=1" | $CLICKHOUSE_CLIENT -q "insert into test settings input_format_null_as_default=1, input_format_defaults_for_omitted_fields=0 format $format" + $CLICKHOUSE_CLIENT -q "select * from test" + $CLICKHOUSE_CLIENT -q "truncate table test" + $CLICKHOUSE_CLIENT -q "select number % 2 ? NULL : number as x, x as y, CAST(number % 2 ? NULL : toString(number), 'LowCardinality(Nullable(String))') as z from numbers(2) format $format settings output_format_arrow_low_cardinality_as_dictionary=1" | $CLICKHOUSE_CLIENT -q "insert into test settings input_format_null_as_default=1, input_format_defaults_for_omitted_fields=1 format $format" + $CLICKHOUSE_CLIENT -q "select * from test" + $CLICKHOUSE_CLIENT -q "truncate table test" +done + diff --git a/tests/queries/0_stateless/02662_sorting_constants_and_distinct_crash.reference b/tests/queries/0_stateless/02561_sorting_constants_and_distinct_crash.reference similarity index 100% rename from tests/queries/0_stateless/02662_sorting_constants_and_distinct_crash.reference rename to tests/queries/0_stateless/02561_sorting_constants_and_distinct_crash.reference diff --git a/tests/queries/0_stateless/02662_sorting_constants_and_distinct_crash.sql b/tests/queries/0_stateless/02561_sorting_constants_and_distinct_crash.sql similarity index 100% rename from tests/queries/0_stateless/02662_sorting_constants_and_distinct_crash.sql rename to tests/queries/0_stateless/02561_sorting_constants_and_distinct_crash.sql diff --git a/tests/queries/0_stateless/02562_native_tskv_default_for_omitted_fields.reference b/tests/queries/0_stateless/02562_native_tskv_default_for_omitted_fields.reference new file mode 100644 index 00000000000..17197fa3563 --- /dev/null +++ b/tests/queries/0_stateless/02562_native_tskv_default_for_omitted_fields.reference @@ -0,0 +1,4 @@ +1 0 +1 42 +1 0 +1 42 diff --git a/tests/queries/0_stateless/02562_native_tskv_default_for_omitted_fields.sh b/tests/queries/0_stateless/02562_native_tskv_default_for_omitted_fields.sh new file mode 100755 index 00000000000..a08c948705d --- /dev/null +++ b/tests/queries/0_stateless/02562_native_tskv_default_for_omitted_fields.sh @@ -0,0 +1,25 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT -q "drop table if exists test" +$CLICKHOUSE_CLIENT -q "insert into function file(02562_data.native) select 1::UInt64 as x settings engine_file_truncate_on_insert=1" +$CLICKHOUSE_CLIENT -q "create table test (x UInt64, y UInt64 default 42) engine=File(Native, '02562_data.native') settings input_format_defaults_for_omitted_fields=0" +$CLICKHOUSE_CLIENT -q "select * from test" +$CLICKHOUSE_CLIENT -q "drop table test" + +$CLICKHOUSE_CLIENT -q "create table test (x UInt64, y UInt64 default 42) engine=File(Native, '02562_data.native') settings input_format_defaults_for_omitted_fields=1" +$CLICKHOUSE_CLIENT -q "select * from test" +$CLICKHOUSE_CLIENT -q "drop table test" + +$CLICKHOUSE_CLIENT -q "insert into function file(02562_data.tskv) select 1::UInt64 as x settings engine_file_truncate_on_insert=1" +$CLICKHOUSE_CLIENT -q "create table test (x UInt64, y UInt64 default 42) engine=File(TSKV, '02562_data.tskv') settings input_format_defaults_for_omitted_fields=0" +$CLICKHOUSE_CLIENT -q "select * from test" +$CLICKHOUSE_CLIENT -q "drop table test" + +$CLICKHOUSE_CLIENT -q "create table test (x UInt64, y UInt64 default 42) engine=File(TSKV, '02562_data.tskv') settings input_format_defaults_for_omitted_fields=1" +$CLICKHOUSE_CLIENT -q "select * from test" +$CLICKHOUSE_CLIENT -q "drop table test" + diff --git a/tests/queries/0_stateless/02665_regexp_extract.reference b/tests/queries/0_stateless/02562_regexp_extract.reference similarity index 100% rename from tests/queries/0_stateless/02665_regexp_extract.reference rename to tests/queries/0_stateless/02562_regexp_extract.reference diff --git a/tests/queries/0_stateless/02665_regexp_extract.sql b/tests/queries/0_stateless/02562_regexp_extract.sql similarity index 100% rename from tests/queries/0_stateless/02665_regexp_extract.sql rename to tests/queries/0_stateless/02562_regexp_extract.sql diff --git a/tests/queries/0_stateless/02563_progress_when_no_rows_from_prewhere.reference b/tests/queries/0_stateless/02563_progress_when_no_rows_from_prewhere.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02666_progress_when_no_rows_from_prewhere.sh b/tests/queries/0_stateless/02563_progress_when_no_rows_from_prewhere.sh similarity index 100% rename from tests/queries/0_stateless/02666_progress_when_no_rows_from_prewhere.sh rename to tests/queries/0_stateless/02563_progress_when_no_rows_from_prewhere.sh diff --git a/tests/queries/0_stateless/02564_analyzer_cross_to_inner.reference b/tests/queries/0_stateless/02564_analyzer_cross_to_inner.reference new file mode 100644 index 00000000000..e4d7ff55b86 --- /dev/null +++ b/tests/queries/0_stateless/02564_analyzer_cross_to_inner.reference @@ -0,0 +1,205 @@ +5 6 5 6 5 +3 4 3 4 5 +3 4 3 4 7 +3 4 3 4 9 +5 6 5 6 5 +5 6 5 6 7 +5 6 5 6 9 +-- { echoOn } + +EXPLAIN QUERY TREE +SELECT * FROM t1, t2, (SELECT a as x from t3 where a + 1 = b ) as t3 +WHERE t1.a = if(t2.b > 0, t2.a, 0) AND t2.a = t3.x AND 1; +QUERY id: 0 + PROJECTION COLUMNS + t1.a UInt64 + t1.b UInt64 + t2.a UInt64 + t2.b UInt64 + x UInt64 + PROJECTION + LIST id: 1, nodes: 5 + COLUMN id: 2, column_name: a, result_type: UInt64, source_id: 3 + COLUMN id: 4, column_name: b, result_type: UInt64, source_id: 3 + COLUMN id: 5, column_name: a, result_type: UInt64, source_id: 6 + COLUMN id: 7, column_name: b, result_type: UInt64, source_id: 6 + COLUMN id: 8, column_name: x, result_type: UInt64, source_id: 9 + JOIN TREE + JOIN id: 10, strictness: ALL, kind: INNER + LEFT TABLE EXPRESSION + JOIN id: 11, strictness: ALL, kind: INNER + LEFT TABLE EXPRESSION + TABLE id: 3, table_name: default.t1 + RIGHT TABLE EXPRESSION + TABLE id: 6, table_name: default.t2 + JOIN EXPRESSION + FUNCTION id: 12, function_name: equals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 13, nodes: 2 + COLUMN id: 14, column_name: a, result_type: UInt64, source_id: 3 + FUNCTION id: 15, function_name: if, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 16, nodes: 3 + FUNCTION id: 17, function_name: greater, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 18, nodes: 2 + COLUMN id: 19, column_name: b, result_type: UInt64, source_id: 6 + CONSTANT id: 20, constant_value: UInt64_0, constant_value_type: UInt8 + COLUMN id: 21, column_name: a, result_type: UInt64, source_id: 6 + CONSTANT id: 22, constant_value: UInt64_0, constant_value_type: UInt8 + RIGHT TABLE EXPRESSION + QUERY id: 9, alias: t3, is_subquery: 1 + PROJECTION COLUMNS + x UInt64 + PROJECTION + LIST id: 23, nodes: 1 + COLUMN id: 24, column_name: a, result_type: UInt64, source_id: 25 + JOIN TREE + TABLE id: 25, table_name: default.t3 + WHERE + FUNCTION id: 26, function_name: equals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 27, nodes: 2 + FUNCTION id: 28, function_name: plus, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 29, nodes: 2 + COLUMN id: 24, column_name: a, result_type: UInt64, source_id: 25 + CONSTANT id: 30, constant_value: UInt64_1, constant_value_type: UInt8 + COLUMN id: 31, column_name: b, result_type: UInt64, source_id: 25 + JOIN EXPRESSION + FUNCTION id: 32, function_name: equals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 33, nodes: 2 + COLUMN id: 21, column_name: a, result_type: UInt64, source_id: 6 + COLUMN id: 34, column_name: x, result_type: UInt64, source_id: 9 + WHERE + CONSTANT id: 35, constant_value: UInt64_1, constant_value_type: UInt8 +EXPLAIN QUERY TREE +SELECT * FROM t1, t2, (SELECT a as x from t3 where a + 1 = b ) as t3 +WHERE t1.a = if(t2.b > 0, t2.a, 0) AND t2.a = t3.x AND 1 +SETTINGS cross_to_inner_join_rewrite = 0; +QUERY id: 0 + PROJECTION COLUMNS + t1.a UInt64 + t1.b UInt64 + t2.a UInt64 + t2.b UInt64 + x UInt64 + PROJECTION + LIST id: 1, nodes: 5 + COLUMN id: 2, column_name: a, result_type: UInt64, source_id: 3 + COLUMN id: 4, column_name: b, result_type: UInt64, source_id: 3 + COLUMN id: 5, column_name: a, result_type: UInt64, source_id: 6 + COLUMN id: 7, column_name: b, result_type: UInt64, source_id: 6 + COLUMN id: 8, column_name: x, result_type: UInt64, source_id: 9 + JOIN TREE + JOIN id: 10, kind: COMMA + LEFT TABLE EXPRESSION + JOIN id: 11, kind: COMMA + LEFT TABLE EXPRESSION + TABLE id: 3, table_name: default.t1 + RIGHT TABLE EXPRESSION + TABLE id: 6, table_name: default.t2 + RIGHT TABLE EXPRESSION + QUERY id: 9, alias: t3, is_subquery: 1 + PROJECTION COLUMNS + x UInt64 + PROJECTION + LIST id: 12, nodes: 1 + COLUMN id: 13, column_name: a, result_type: UInt64, source_id: 14 + JOIN TREE + TABLE id: 14, table_name: default.t3 + WHERE + FUNCTION id: 15, function_name: equals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 16, nodes: 2 + FUNCTION id: 17, function_name: plus, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 18, nodes: 2 + COLUMN id: 13, column_name: a, result_type: UInt64, source_id: 14 + CONSTANT id: 19, constant_value: UInt64_1, constant_value_type: UInt8 + COLUMN id: 20, column_name: b, result_type: UInt64, source_id: 14 + WHERE + FUNCTION id: 21, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 22, nodes: 3 + FUNCTION id: 23, function_name: equals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 24, nodes: 2 + COLUMN id: 25, column_name: a, result_type: UInt64, source_id: 3 + FUNCTION id: 26, function_name: if, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 27, nodes: 3 + FUNCTION id: 28, function_name: greater, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 29, nodes: 2 + COLUMN id: 30, column_name: b, result_type: UInt64, source_id: 6 + CONSTANT id: 31, constant_value: UInt64_0, constant_value_type: UInt8 + COLUMN id: 32, column_name: a, result_type: UInt64, source_id: 6 + CONSTANT id: 33, constant_value: UInt64_0, constant_value_type: UInt8 + FUNCTION id: 34, function_name: equals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 35, nodes: 2 + COLUMN id: 32, column_name: a, result_type: UInt64, source_id: 6 + COLUMN id: 36, column_name: x, result_type: UInt64, source_id: 9 + CONSTANT id: 37, constant_value: UInt64_1, constant_value_type: UInt8 + SETTINGS cross_to_inner_join_rewrite=0 +EXPLAIN QUERY TREE +SELECT * FROM t1, t2, (SELECT a as x from t3 where a + 1 = b ) as t3 +WHERE t1.a = if(t2.b > 0, t2.a, 0); +QUERY id: 0 + PROJECTION COLUMNS + t1.a UInt64 + t1.b UInt64 + t2.a UInt64 + t2.b UInt64 + x UInt64 + PROJECTION + LIST id: 1, nodes: 5 + COLUMN id: 2, column_name: a, result_type: UInt64, source_id: 3 + COLUMN id: 4, column_name: b, result_type: UInt64, source_id: 3 + COLUMN id: 5, column_name: a, result_type: UInt64, source_id: 6 + COLUMN id: 7, column_name: b, result_type: UInt64, source_id: 6 + COLUMN id: 8, column_name: x, result_type: UInt64, source_id: 9 + JOIN TREE + JOIN id: 10, kind: COMMA + LEFT TABLE EXPRESSION + JOIN id: 11, strictness: ALL, kind: INNER + LEFT TABLE EXPRESSION + TABLE id: 3, table_name: default.t1 + RIGHT TABLE EXPRESSION + TABLE id: 6, table_name: default.t2 + JOIN EXPRESSION + FUNCTION id: 12, function_name: equals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 13, nodes: 2 + COLUMN id: 14, column_name: a, result_type: UInt64, source_id: 3 + FUNCTION id: 15, function_name: if, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 16, nodes: 3 + FUNCTION id: 17, function_name: greater, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 18, nodes: 2 + COLUMN id: 19, column_name: b, result_type: UInt64, source_id: 6 + CONSTANT id: 20, constant_value: UInt64_0, constant_value_type: UInt8 + COLUMN id: 21, column_name: a, result_type: UInt64, source_id: 6 + CONSTANT id: 22, constant_value: UInt64_0, constant_value_type: UInt8 + RIGHT TABLE EXPRESSION + QUERY id: 9, alias: t3, is_subquery: 1 + PROJECTION COLUMNS + x UInt64 + PROJECTION + LIST id: 23, nodes: 1 + COLUMN id: 24, column_name: a, result_type: UInt64, source_id: 25 + JOIN TREE + TABLE id: 25, table_name: default.t3 + WHERE + FUNCTION id: 26, function_name: equals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 27, nodes: 2 + FUNCTION id: 28, function_name: plus, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 29, nodes: 2 + COLUMN id: 24, column_name: a, result_type: UInt64, source_id: 25 + CONSTANT id: 30, constant_value: UInt64_1, constant_value_type: UInt8 + COLUMN id: 31, column_name: b, result_type: UInt64, source_id: 25 diff --git a/tests/queries/0_stateless/02564_analyzer_cross_to_inner.sql b/tests/queries/0_stateless/02564_analyzer_cross_to_inner.sql new file mode 100644 index 00000000000..a83cd238982 --- /dev/null +++ b/tests/queries/0_stateless/02564_analyzer_cross_to_inner.sql @@ -0,0 +1,50 @@ +SET allow_experimental_analyzer = 1; + +DROP TABLE IF EXISTS t1; +DROP TABLE IF EXISTS t2; +DROP TABLE IF EXISTS t3; + +CREATE TABLE t1 (a UInt64, b UInt64) ENGINE = Memory; +INSERT INTO t1 VALUES (1, 2), (3, 4), (5, 6); + +CREATE TABLE t2 (a UInt64, b UInt64) ENGINE = Memory; +INSERT INTO t2 VALUES (3, 4), (5, 6), (7, 8); + +CREATE TABLE t3 (a UInt64, b UInt64) ENGINE = Memory; +INSERT INTO t3 VALUES (5, 6), (7, 8), (9, 10); + +SET cross_to_inner_join_rewrite = 1; + +SELECT * FROM t1, t2, (SELECT a as x from t3 where a + 1 = b ) as t3 +WHERE t1.a = if(t2.b > 0, t2.a, 0) AND t2.a = t3.x AND 1 +; + +SELECT * FROM t1, t2, (SELECT a as x from t3 where a + 1 = b ) as t3 +WHERE t1.a = if(t2.b > 0, t2.a, 0) +ORDER BY t1.a, t2.a, t3.x +; + +-- { echoOn } + +EXPLAIN QUERY TREE +SELECT * FROM t1, t2, (SELECT a as x from t3 where a + 1 = b ) as t3 +WHERE t1.a = if(t2.b > 0, t2.a, 0) AND t2.a = t3.x AND 1; + +EXPLAIN QUERY TREE +SELECT * FROM t1, t2, (SELECT a as x from t3 where a + 1 = b ) as t3 +WHERE t1.a = if(t2.b > 0, t2.a, 0) AND t2.a = t3.x AND 1 +SETTINGS cross_to_inner_join_rewrite = 0; + +EXPLAIN QUERY TREE +SELECT * FROM t1, t2, (SELECT a as x from t3 where a + 1 = b ) as t3 +WHERE t1.a = if(t2.b > 0, t2.a, 0); + +-- { echoOff } + +SELECT * FROM t1, t2, (SELECT a as x from t3 where a + 1 = b ) as t3 +WHERE t1.a = if(t2.b > 0, t2.a, 0) +SETTINGS cross_to_inner_join_rewrite = 2; -- { serverError INCORRECT_QUERY } + +DROP TABLE IF EXISTS t1; +DROP TABLE IF EXISTS t2; +DROP TABLE IF EXISTS t3; diff --git a/tests/queries/0_stateless/02564_query_id_header.reference b/tests/queries/0_stateless/02564_query_id_header.reference new file mode 100644 index 00000000000..413e8929f36 --- /dev/null +++ b/tests/queries/0_stateless/02564_query_id_header.reference @@ -0,0 +1,22 @@ +CREATE TABLE t_query_id_header (a UInt64) ENGINE = Memory +< Content-Type: text/plain; charset=UTF-8 +< X-ClickHouse-Query-Id: query_id +< X-ClickHouse-Timezone: timezone +INSERT INTO t_query_id_header VALUES (1) +< Content-Type: text/plain; charset=UTF-8 +< X-ClickHouse-Query-Id: query_id +< X-ClickHouse-Timezone: timezone +EXISTS TABLE t_query_id_header +< Content-Type: text/tab-separated-values; charset=UTF-8 +< X-ClickHouse-Format: TabSeparated +< X-ClickHouse-Query-Id: query_id +< X-ClickHouse-Timezone: timezone +SELECT * FROM t_query_id_header +< Content-Type: text/tab-separated-values; charset=UTF-8 +< X-ClickHouse-Format: TabSeparated +< X-ClickHouse-Query-Id: query_id +< X-ClickHouse-Timezone: timezone +DROP TABLE t_query_id_header +< Content-Type: text/plain; charset=UTF-8 +< X-ClickHouse-Query-Id: query_id +< X-ClickHouse-Timezone: timezone diff --git a/tests/queries/0_stateless/02564_query_id_header.sh b/tests/queries/0_stateless/02564_query_id_header.sh new file mode 100755 index 00000000000..67ddbcfcc46 --- /dev/null +++ b/tests/queries/0_stateless/02564_query_id_header.sh @@ -0,0 +1,30 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +CLICKHOUSE_TIMEZONE_ESCAPED=$($CLICKHOUSE_CLIENT --query="SELECT timezone()" | sed 's/[]\/$*.^+:()[]/\\&/g') + +function run_and_check_headers() +{ + query=$1 + query_id="${CLICKHOUSE_DATABASE}_${RANDOM}" + + echo "$query" + + ${CLICKHOUSE_CURL} -sS -v "${CLICKHOUSE_URL}&query_id=$query_id" -d "$1" 2>&1 \ + | grep -e "< X-ClickHouse-Query-Id" -e "< X-ClickHouse-Timezone" -e "< X-ClickHouse-Format" -e "< Content-Type" \ + | sed "s/$CLICKHOUSE_TIMEZONE_ESCAPED/timezone/" \ + | sed "s/$query_id/query_id/" \ + | sed "s/\r$//" \ + | sort +} + +${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS t_query_id_header" + +run_and_check_headers "CREATE TABLE t_query_id_header (a UInt64) ENGINE = Memory" +run_and_check_headers "INSERT INTO t_query_id_header VALUES (1)" +run_and_check_headers "EXISTS TABLE t_query_id_header" +run_and_check_headers "SELECT * FROM t_query_id_header" +run_and_check_headers "DROP TABLE t_query_id_header" diff --git a/tests/queries/0_stateless/02666_read_in_order_final_desc.reference b/tests/queries/0_stateless/02564_read_in_order_final_desc.reference similarity index 100% rename from tests/queries/0_stateless/02666_read_in_order_final_desc.reference rename to tests/queries/0_stateless/02564_read_in_order_final_desc.reference diff --git a/tests/queries/0_stateless/02666_read_in_order_final_desc.sql b/tests/queries/0_stateless/02564_read_in_order_final_desc.sql similarity index 100% rename from tests/queries/0_stateless/02666_read_in_order_final_desc.sql rename to tests/queries/0_stateless/02564_read_in_order_final_desc.sql diff --git a/tests/queries/0_stateless/02565_analyzer_limit_settings.reference b/tests/queries/0_stateless/02565_analyzer_limit_settings.reference new file mode 100644 index 00000000000..6f23097612e --- /dev/null +++ b/tests/queries/0_stateless/02565_analyzer_limit_settings.reference @@ -0,0 +1,70 @@ +-- { echoOn } +SET limit = 0; +SELECT * FROM numbers(10); +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +SELECT * FROM numbers(10) SETTINGS limit=5, offset=2; +2 +3 +4 +5 +6 +SELECT count(*) FROM (SELECT * FROM numbers(10)); +10 +SELECT count(*) FROM (SELECT * FROM numbers(10) SETTINGS limit=5); +5 +SELECT count(*) FROM (SELECT * FROM numbers(10)) SETTINGS limit=5; +10 +SELECT count(*) FROM view(SELECT * FROM numbers(10)); +10 +SELECT count(*) FROM view(SELECT * FROM numbers(10) SETTINGS limit=5); +5 +SELECT count(*) FROM view(SELECT * FROM numbers(10)) SETTINGS limit=5; +10 +SET limit = 3; +SELECT * FROM numbers(10); +0 +1 +2 +SELECT * FROM numbers(10) SETTINGS limit=5, offset=2; +2 +3 +4 +5 +6 +SELECT count(*) FROM (SELECT * FROM numbers(10)); +10 +SELECT count(*) FROM (SELECT * FROM numbers(10) SETTINGS limit=5); +5 +SELECT count(*) FROM (SELECT * FROM numbers(10)) SETTINGS limit=5; +10 +SELECT count(*) FROM view(SELECT * FROM numbers(10)); +10 +SELECT count(*) FROM view(SELECT * FROM numbers(10) SETTINGS limit=5); +5 +SELECT count(*) FROM view(SELECT * FROM numbers(10)) SETTINGS limit=5; +10 +SET limit = 4; +SET offset = 1; +SELECT * FROM numbers(10); +1 +2 +3 +4 +SELECT * FROM numbers(10) LIMIT 3 OFFSET 2; +3 +4 +5 +SELECT * FROM numbers(10) LIMIT 5 OFFSET 2; +3 +4 +5 +6 diff --git a/tests/queries/0_stateless/02565_analyzer_limit_settings.sql b/tests/queries/0_stateless/02565_analyzer_limit_settings.sql new file mode 100644 index 00000000000..7c02c2d0d20 --- /dev/null +++ b/tests/queries/0_stateless/02565_analyzer_limit_settings.sql @@ -0,0 +1,30 @@ +SET allow_experimental_analyzer = 1; + +-- { echoOn } +SET limit = 0; + +SELECT * FROM numbers(10); +SELECT * FROM numbers(10) SETTINGS limit=5, offset=2; +SELECT count(*) FROM (SELECT * FROM numbers(10)); +SELECT count(*) FROM (SELECT * FROM numbers(10) SETTINGS limit=5); +SELECT count(*) FROM (SELECT * FROM numbers(10)) SETTINGS limit=5; +SELECT count(*) FROM view(SELECT * FROM numbers(10)); +SELECT count(*) FROM view(SELECT * FROM numbers(10) SETTINGS limit=5); +SELECT count(*) FROM view(SELECT * FROM numbers(10)) SETTINGS limit=5; + +SET limit = 3; +SELECT * FROM numbers(10); +SELECT * FROM numbers(10) SETTINGS limit=5, offset=2; +SELECT count(*) FROM (SELECT * FROM numbers(10)); +SELECT count(*) FROM (SELECT * FROM numbers(10) SETTINGS limit=5); +SELECT count(*) FROM (SELECT * FROM numbers(10)) SETTINGS limit=5; +SELECT count(*) FROM view(SELECT * FROM numbers(10)); +SELECT count(*) FROM view(SELECT * FROM numbers(10) SETTINGS limit=5); +SELECT count(*) FROM view(SELECT * FROM numbers(10)) SETTINGS limit=5; + +SET limit = 4; +SET offset = 1; +SELECT * FROM numbers(10); +SELECT * FROM numbers(10) LIMIT 3 OFFSET 2; +SELECT * FROM numbers(10) LIMIT 5 OFFSET 2; +-- { echoOff } diff --git a/tests/queries/0_stateless/02566_analyzer_limit_settings_distributed.reference b/tests/queries/0_stateless/02566_analyzer_limit_settings_distributed.reference new file mode 100644 index 00000000000..466e80931e5 --- /dev/null +++ b/tests/queries/0_stateless/02566_analyzer_limit_settings_distributed.reference @@ -0,0 +1,14 @@ +limit 0 +limit 1 +limit 2 +limit 3 +limit 4 +offset 5 +offset 6 +offset 7 +offset 8 +offset 9 +limit w/ GROUP BY 4 4 +limit w/ GROUP BY 4 3 +limit/offset w/ GROUP BY 4 2 +limit/offset w/ GROUP BY 4 1 diff --git a/tests/queries/0_stateless/02566_analyzer_limit_settings_distributed.sql b/tests/queries/0_stateless/02566_analyzer_limit_settings_distributed.sql new file mode 100644 index 00000000000..1624344b5a9 --- /dev/null +++ b/tests/queries/0_stateless/02566_analyzer_limit_settings_distributed.sql @@ -0,0 +1,34 @@ +-- Tags: distributed + +SET allow_experimental_analyzer = 1; + +SELECT 'limit', * FROM remote('127.1', view(SELECT * FROM numbers(10))) SETTINGS limit=5; +SELECT 'offset', * FROM remote('127.1', view(SELECT * FROM numbers(10))) SETTINGS offset=5; + +SELECT + 'limit w/ GROUP BY', + count(), + number +FROM remote('127.{1,2}', view( + SELECT intDiv(number, 2) AS number + FROM numbers(10) +)) +GROUP BY number +ORDER BY + count() ASC, + number DESC +SETTINGS limit=2; + +SELECT + 'limit/offset w/ GROUP BY', + count(), + number +FROM remote('127.{1,2}', view( + SELECT intDiv(number, 2) AS number + FROM numbers(10) +)) +GROUP BY number +ORDER BY + count() ASC, + number DESC +SETTINGS limit=2, offset=2; diff --git a/tests/queries/0_stateless/02567_and_consistency.reference b/tests/queries/0_stateless/02567_and_consistency.reference new file mode 100644 index 00000000000..bcb2b5aecfb --- /dev/null +++ b/tests/queries/0_stateless/02567_and_consistency.reference @@ -0,0 +1,23 @@ +true +===== +true +===== +true +===== +true +===== +===== +1 +===== +===== +allow_experimental_analyzer +true +#45440 +2086579505 0 1 0 0 +-542998757 -542998757 1 0 0 += +2086579505 0 1 0 0 +-542998757 -542998757 1 0 0 += +2086579505 0 1 0 0 +-542998757 -542998757 1 0 0 diff --git a/tests/queries/0_stateless/02567_and_consistency.sql b/tests/queries/0_stateless/02567_and_consistency.sql new file mode 100644 index 00000000000..f02185a1a52 --- /dev/null +++ b/tests/queries/0_stateless/02567_and_consistency.sql @@ -0,0 +1,106 @@ +SELECT toBool(sin(SUM(number))) AS x +FROM +( + SELECT 1 AS number +) +GROUP BY number +HAVING 1 AND sin(sum(number)) +SETTINGS enable_optimize_predicate_expression = 0; + +SELECT '====='; + +SELECT toBool(sin(SUM(number))) AS x +FROM +( + SELECT 1 AS number +) +GROUP BY number +HAVING 1 AND sin(1) +SETTINGS enable_optimize_predicate_expression = 0; + +SELECT '====='; + +SELECT toBool(sin(SUM(number))) AS x +FROM +( + SELECT 1 AS number +) +GROUP BY number +HAVING x AND sin(sum(number)) +SETTINGS enable_optimize_predicate_expression = 1; + +SELECT '====='; + +SELECT toBool(sin(SUM(number))) AS x +FROM +( + SELECT 1 AS number +) +GROUP BY number +HAVING 1 AND sin(sum(number)) +SETTINGS enable_optimize_predicate_expression = 0; + +SELECT '====='; + +SELECT toBool(sin(SUM(number))) AS x +FROM +( + SELECT 1 AS number +) +GROUP BY number +HAVING 1 AND sin(sum(number)) +SETTINGS enable_optimize_predicate_expression = 1; -- { serverError 59 } + +SELECT '====='; + +SELECT 1 and sin(1); + +SELECT '====='; + +SELECT toBool(sin(SUM(number))) AS x +FROM +( + SELECT 1 AS number +) +GROUP BY number +HAVING x AND sin(1) +SETTINGS enable_optimize_predicate_expression = 0; -- { serverError 59 } + +SELECT '====='; +SELECT 'allow_experimental_analyzer'; + +SET allow_experimental_analyzer = 1; + +SELECT toBool(sin(SUM(number))) AS x +FROM +( + SELECT 1 AS number +) +GROUP BY number +HAVING 1 AND sin(sum(number)) +SETTINGS enable_optimize_predicate_expression = 1; + +select '#45440'; + +DROP TABLE IF EXISTS t2; +CREATE TABLE t2(c0 Int32) ENGINE = MergeTree ORDER BY c0; +INSERT INTO t2 VALUES (928386547), (1541944097), (2086579505), (1990427322), (-542998757), (390253678), (554855248), (203290629), (1504693323); + +SELECT + MAX(left.c0), + min2(left.c0, -(-left.c0) * (radians(left.c0) - radians(left.c0))) AS g, + (((-1925024212 IS NOT NULL) IS NOT NULL) != radians(tan(1216286224))) AND cos(lcm(MAX(left.c0), -1966575216) OR (MAX(left.c0) * 1180517420)) AS h, + NOT h, + h IS NULL +FROM t2 AS left +GROUP BY g; +select '='; +SELECT MAX(left.c0), min2(left.c0, -(-left.c0) * (radians(left.c0) - radians(left.c0))) as g, (((-1925024212 IS NOT NULL) IS NOT NULL) != radians(tan(1216286224))) AND cos(lcm(MAX(left.c0), -1966575216) OR (MAX(left.c0) * 1180517420)) as h, not h, h is null + FROM t2 AS left + GROUP BY g HAVING h SETTINGS enable_optimize_predicate_expression = 0; +select '='; +SELECT MAX(left.c0), min2(left.c0, -(-left.c0) * (radians(left.c0) - radians(left.c0))) as g, (((-1925024212 IS NOT NULL) IS NOT NULL) != radians(tan(1216286224))) AND cos(lcm(MAX(left.c0), -1966575216) OR (MAX(left.c0) * 1180517420)) as h, not h, h is null + FROM t2 AS left + GROUP BY g HAVING h SETTINGS enable_optimize_predicate_expression = 1; + +DROP TABLE IF EXISTS t2; diff --git a/tests/queries/0_stateless/02667_array_map_const_low_cardinality.reference b/tests/queries/0_stateless/02568_array_map_const_low_cardinality.reference similarity index 100% rename from tests/queries/0_stateless/02667_array_map_const_low_cardinality.reference rename to tests/queries/0_stateless/02568_array_map_const_low_cardinality.reference diff --git a/tests/queries/0_stateless/02667_array_map_const_low_cardinality.sql b/tests/queries/0_stateless/02568_array_map_const_low_cardinality.sql similarity index 100% rename from tests/queries/0_stateless/02667_array_map_const_low_cardinality.sql rename to tests/queries/0_stateless/02568_array_map_const_low_cardinality.sql diff --git a/tests/queries/0_stateless/02667_json_array_length.reference b/tests/queries/0_stateless/02568_json_array_length.reference similarity index 100% rename from tests/queries/0_stateless/02667_json_array_length.reference rename to tests/queries/0_stateless/02568_json_array_length.reference diff --git a/tests/queries/0_stateless/02667_json_array_length.sql b/tests/queries/0_stateless/02568_json_array_length.sql similarity index 100% rename from tests/queries/0_stateless/02667_json_array_length.sql rename to tests/queries/0_stateless/02568_json_array_length.sql diff --git a/tests/queries/0_stateless/02569_order_by_aggregation_result.reference b/tests/queries/0_stateless/02569_order_by_aggregation_result.reference new file mode 100644 index 00000000000..a89e39d87b3 --- /dev/null +++ b/tests/queries/0_stateless/02569_order_by_aggregation_result.reference @@ -0,0 +1,7 @@ +0 0 +0 1 █████████████████████████████████████████████████▉ +1 2 0.5 +1 0.1 1.1 +00000000-0000-0000-0000-000000000000 b 1 +417ddc5d-e556-4d27-95dd-a34d84e46a50 c 1 +notEmpty a 1 diff --git a/tests/queries/0_stateless/02569_order_by_aggregation_result.sql b/tests/queries/0_stateless/02569_order_by_aggregation_result.sql new file mode 100644 index 00000000000..3fef0374d83 --- /dev/null +++ b/tests/queries/0_stateless/02569_order_by_aggregation_result.sql @@ -0,0 +1,60 @@ +-- Github issues: +-- - https://github.com/ClickHouse/ClickHouse/issues/46268 +-- - https://github.com/ClickHouse/ClickHouse/issues/46273 + +-- Queries that the original PR (https://github.com/ClickHouse/ClickHouse/pull/42827) tried to fix +SELECT (number = 1) AND (number = 2) AS value, sum(value) OVER () FROM numbers(1) WHERE 1; +SELECT time, round(exp_smooth, 10), bar(exp_smooth, -9223372036854775807, 1048575, 50) AS bar FROM (SELECT 2 OR (number = 0) OR (number >= 1) AS value, number AS time, exponentialTimeDecayedSum(2147483646)(value, time) OVER (RANGE BETWEEN CURRENT ROW AND CURRENT ROW) AS exp_smooth FROM numbers(1) WHERE 10) WHERE 25; + +CREATE TABLE ttttttt +( + `timestamp` DateTime, + `col1` Float64, + `col2` Float64, + `col3` Float64 +) +ENGINE = MergeTree() +ORDER BY tuple(); + +INSERT INTO ttttttt VALUES ('2023-02-20 00:00:00', 1, 2, 3); + +-- Query that https://github.com/ClickHouse/ClickHouse/pull/42827 broke +SELECT + argMax(col1, timestamp) AS col1, + argMax(col2, timestamp) AS col2, + col1 / col2 AS final_col +FROM ttttttt +GROUP BY + col3 +ORDER BY final_col DESC; + +SELECT + argMax(col1, timestamp) AS col1, + col1 / 10 AS final_col, + final_col + 1 AS final_col2 +FROM ttttttt +GROUP BY col3; + +-- https://github.com/ClickHouse/ClickHouse/issues/46724 + +CREATE TABLE table1 +( + id String, + device UUID +) +ENGINE = MergeTree() ORDER BY tuple(); + +INSERT INTO table1 VALUES ('notEmpty', '417ddc5d-e556-4d27-95dd-a34d84e46a50'); +INSERT INTO table1 VALUES ('', '417ddc5d-e556-4d27-95dd-a34d84e46a50'); +INSERT INTO table1 VALUES ('', '00000000-0000-0000-0000-000000000000'); + +SELECT + if(empty(id), toString(device), id) AS device, + multiIf( + notEmpty(id),'a', + device == '00000000-0000-0000-0000-000000000000', 'b', + 'c' ) AS device_id_type, + count() +FROM table1 +GROUP BY device, device_id_type +ORDER BY device; diff --git a/tests/queries/0_stateless/02570_fallback_from_async_insert.reference b/tests/queries/0_stateless/02570_fallback_from_async_insert.reference new file mode 100644 index 00000000000..7aa58724b9e --- /dev/null +++ b/tests/queries/0_stateless/02570_fallback_from_async_insert.reference @@ -0,0 +1,23 @@ +1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +id_0 +id_1 +id_2 +id_3 +id_4 diff --git a/tests/queries/0_stateless/02570_fallback_from_async_insert.sh b/tests/queries/0_stateless/02570_fallback_from_async_insert.sh new file mode 100755 index 00000000000..9c158d6241b --- /dev/null +++ b/tests/queries/0_stateless/02570_fallback_from_async_insert.sh @@ -0,0 +1,52 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +message="INSERT query will be executed synchronously because it has too much data" + +$CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS t_async_insert_fallback" +$CLICKHOUSE_CLIENT --query "CREATE TABLE t_async_insert_fallback (a UInt64) ENGINE = Memory" + +query_id_suffix="${CLICKHOUSE_DATABASE}_${RANDOM}" + +# inlined data via native protocol +$CLICKHOUSE_CLIENT \ + --query_id "0_$query_id_suffix" \ + --async_insert 1 \ + --async_insert_max_data_size 5 \ + --query "INSERT INTO t_async_insert_fallback VALUES (1) (2) (3)" + +# inlined data via http +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&query_id=1_$query_id_suffix&async_insert=1&async_insert_max_data_size=3" \ + -d "INSERT INTO t_async_insert_fallback VALUES (4) (5) (6)" + +# partially inlined partially sent via post data +${CLICKHOUSE_CURL} -sS -X POST \ + "${CLICKHOUSE_URL}&query_id=2_$query_id_suffix&async_insert=1&async_insert_max_data_size=5&query=INSERT+INTO+t_async_insert_fallback+VALUES+(7)" \ + --data-binary @- <<< "(8) (9)" + +# partially inlined partially sent via post data +${CLICKHOUSE_CURL} -sS -X POST \ + "${CLICKHOUSE_URL}&query_id=3_$query_id_suffix&async_insert=1&async_insert_max_data_size=5&query=INSERT+INTO+t_async_insert_fallback+VALUES+(10)+(11)" \ + --data-binary @- <<< "(12)" + +# sent via post data +${CLICKHOUSE_CURL} -sS -X POST \ + "${CLICKHOUSE_URL}&query_id=4_$query_id_suffix&async_insert=1&async_insert_max_data_size=5&query=INSERT+INTO+t_async_insert_fallback+FORMAT+Values" \ + --data-binary @- <<< "(13) (14) (15)" + +# no limit for async insert size +${CLICKHOUSE_CURL} -sS -X POST \ + "${CLICKHOUSE_URL}&query_id=5_$query_id_suffix&async_insert=1&query=INSERT+INTO+t_async_insert_fallback+FORMAT+Values" \ + --data-binary @- <<< "(16) (17) (18)" + +$CLICKHOUSE_CLIENT --query "SELECT * FROM t_async_insert_fallback ORDER BY a" +$CLICKHOUSE_CLIENT --query "SYSTEM FLUSH LOGS" +$CLICKHOUSE_CLIENT --query " + SELECT 'id_' || splitByChar('_', query_id)[1] AS id FROM system.text_log + WHERE query_id LIKE '%$query_id_suffix' AND message LIKE '%$message%' +" + +$CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS t_async_insert_fallback" diff --git a/tests/queries/0_stateless/02571_local_desc_abort_on_twitter_json.reference b/tests/queries/0_stateless/02571_local_desc_abort_on_twitter_json.reference new file mode 100644 index 00000000000..d00491fd7e5 --- /dev/null +++ b/tests/queries/0_stateless/02571_local_desc_abort_on_twitter_json.reference @@ -0,0 +1 @@ +1 diff --git a/tests/queries/0_stateless/02571_local_desc_abort_on_twitter_json.sh b/tests/queries/0_stateless/02571_local_desc_abort_on_twitter_json.sh new file mode 100755 index 00000000000..e4f738f18ff --- /dev/null +++ b/tests/queries/0_stateless/02571_local_desc_abort_on_twitter_json.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +$CLICKHOUSE_LOCAL -q "desc file('$CUR_DIR/data_json/twitter.jsonl')" 2>&1 | grep -c "ONLY_NULLS_WHILE_READING_SCHEMA" + diff --git a/tests/queries/0_stateless/02572_max_intersections.reference b/tests/queries/0_stateless/02572_max_intersections.reference new file mode 100644 index 00000000000..30e9e0b1342 --- /dev/null +++ b/tests/queries/0_stateless/02572_max_intersections.reference @@ -0,0 +1 @@ +04010000000000000001000000000000000300000000000000FFFFFFFFFFFFFFFF030000000000000001000000000000000500000000000000FFFFFFFFFFFFFFFF diff --git a/tests/queries/0_stateless/02572_max_intersections.sql b/tests/queries/0_stateless/02572_max_intersections.sql new file mode 100644 index 00000000000..5ac865222fe --- /dev/null +++ b/tests/queries/0_stateless/02572_max_intersections.sql @@ -0,0 +1 @@ +SELECT hex(maxIntersectionsState(*)) FROM VALUES((1, 3), (3, 5)); diff --git a/tests/queries/0_stateless/02573_insert_null_as_default_null_as_empty_nested.reference b/tests/queries/0_stateless/02573_insert_null_as_default_null_as_empty_nested.reference new file mode 100644 index 00000000000..e337b8d87c4 --- /dev/null +++ b/tests/queries/0_stateless/02573_insert_null_as_default_null_as_empty_nested.reference @@ -0,0 +1,42 @@ +-- { echo } +--- ensure that input_format_null_as_default allow writes to Nullable columns too +select * from format(JSONEachRow, 'payload Tuple(pull_request Tuple(merged_by Tuple(login Nullable(String))))', '{"payload" : {"pull_request": {"merged_by": {"login": "root"}}}}') settings input_format_null_as_default=1; +((('root'))) +select * from format(JSONEachRow, 'payload Tuple(pull_request Tuple(merged_by Tuple(login Nullable(String))))', '{"payload" : {"pull_request": {"merged_by": null}}}') settings input_format_null_as_default=1; +(((NULL))) +--- tuple +select * from format(JSONEachRow, 'payload Tuple(pull_request Tuple(merged_by Tuple(login String)))', '{"payload" : {"pull_request": {"merged_by": {"login": "root"}}}}') settings input_format_null_as_default=0; +((('root'))) +select * from format(JSONEachRow, 'payload Tuple(pull_request Tuple(merged_by Tuple(login String)))', '{"payload" : {"pull_request": {"merged_by": {"login": "root"}}}}') settings input_format_null_as_default=1; +((('root'))) +select * from format(JSONEachRow, 'payload Tuple(pull_request Tuple(merged_by Tuple(login String)))', '{"payload" : {}}') settings input_format_null_as_default=0; +(((''))) +select * from format(JSONEachRow, 'payload Tuple(pull_request Tuple(merged_by Tuple(login String)))', '{"payload" : {}}') settings input_format_null_as_default=1; +(((''))) +select * from format(JSONEachRow, 'payload Tuple(pull_request Tuple(merged_by Tuple(login String)))', '{"payload" : {"pull_request": {"merged_by": null}}}') settings input_format_null_as_default=0; -- { serverError CANNOT_PARSE_INPUT_ASSERTION_FAILED } +select * from format(JSONEachRow, 'payload Tuple(pull_request Tuple(merged_by Tuple(login String)))', '{"payload" : {"pull_request": {"merged_by": null}}}') settings input_format_null_as_default=1; +(((''))) +--- map +select * from format(JSONEachRow, '{"payload" : {"pull_request": {"merged_by": {"login": "root"}}}}') settings input_format_null_as_default=0; +{'pull_request':{'merged_by':{'login':'root'}}} +select * from format(JSONEachRow, '{"payload" : {"pull_request": {"merged_by": {"login": "root"}}}}') settings input_format_null_as_default=1; +{'pull_request':{'merged_by':{'login':'root'}}} +select * from format(JSONEachRow, 'payload Map(String, String)', '{"payload" : {}}') settings input_format_null_as_default=0; +{} +select * from format(JSONEachRow, 'payload Map(String, String)', '{"payload" : {}}') settings input_format_null_as_default=1; +{} +select * from format(JSONEachRow, 'payload Map(String, Map(String, Map(String, String)))', '{"payload" : {"pull_request": {"merged_by": null}}}') settings input_format_null_as_default=0; -- { serverError CANNOT_PARSE_INPUT_ASSERTION_FAILED } +select * from format(JSONEachRow, 'payload Map(String, Map(String, Map(String, String)))', '{"payload" : {"pull_request": {"merged_by": null}}}') settings input_format_null_as_default=1; +{'pull_request':{'merged_by':{}}} +--- array +select * from format(JSONEachRow, 'payload Array(String)', '{"payload" : ["root"]}') settings input_format_null_as_default=0; +['root'] +select * from format(JSONEachRow, 'payload Array(String)', '{"payload" : ["root"]}') settings input_format_null_as_default=1; +['root'] +select * from format(JSONEachRow, 'payload Array(String)', '{"payload" : []}') settings input_format_null_as_default=0; +[] +select * from format(JSONEachRow, 'payload Array(String)', '{"payload" : []}') settings input_format_null_as_default=1; +[] +select * from format(JSONEachRow, 'payload Array(String)', '{"payload" : null}') settings input_format_null_as_default=0; -- { serverError CANNOT_READ_ARRAY_FROM_TEXT } +select * from format(JSONEachRow, 'payload Array(String)', '{"payload" : null}') settings input_format_null_as_default=1; +[] diff --git a/tests/queries/0_stateless/02573_insert_null_as_default_null_as_empty_nested.sql b/tests/queries/0_stateless/02573_insert_null_as_default_null_as_empty_nested.sql new file mode 100644 index 00000000000..084831b0062 --- /dev/null +++ b/tests/queries/0_stateless/02573_insert_null_as_default_null_as_empty_nested.sql @@ -0,0 +1,25 @@ +-- { echo } +--- ensure that input_format_null_as_default allow writes to Nullable columns too +select * from format(JSONEachRow, 'payload Tuple(pull_request Tuple(merged_by Tuple(login Nullable(String))))', '{"payload" : {"pull_request": {"merged_by": {"login": "root"}}}}') settings input_format_null_as_default=1; +select * from format(JSONEachRow, 'payload Tuple(pull_request Tuple(merged_by Tuple(login Nullable(String))))', '{"payload" : {"pull_request": {"merged_by": null}}}') settings input_format_null_as_default=1; +--- tuple +select * from format(JSONEachRow, 'payload Tuple(pull_request Tuple(merged_by Tuple(login String)))', '{"payload" : {"pull_request": {"merged_by": {"login": "root"}}}}') settings input_format_null_as_default=0; +select * from format(JSONEachRow, 'payload Tuple(pull_request Tuple(merged_by Tuple(login String)))', '{"payload" : {"pull_request": {"merged_by": {"login": "root"}}}}') settings input_format_null_as_default=1; +select * from format(JSONEachRow, 'payload Tuple(pull_request Tuple(merged_by Tuple(login String)))', '{"payload" : {}}') settings input_format_null_as_default=0; +select * from format(JSONEachRow, 'payload Tuple(pull_request Tuple(merged_by Tuple(login String)))', '{"payload" : {}}') settings input_format_null_as_default=1; +select * from format(JSONEachRow, 'payload Tuple(pull_request Tuple(merged_by Tuple(login String)))', '{"payload" : {"pull_request": {"merged_by": null}}}') settings input_format_null_as_default=0; -- { serverError CANNOT_PARSE_INPUT_ASSERTION_FAILED } +select * from format(JSONEachRow, 'payload Tuple(pull_request Tuple(merged_by Tuple(login String)))', '{"payload" : {"pull_request": {"merged_by": null}}}') settings input_format_null_as_default=1; +--- map +select * from format(JSONEachRow, '{"payload" : {"pull_request": {"merged_by": {"login": "root"}}}}') settings input_format_null_as_default=0; +select * from format(JSONEachRow, '{"payload" : {"pull_request": {"merged_by": {"login": "root"}}}}') settings input_format_null_as_default=1; +select * from format(JSONEachRow, 'payload Map(String, String)', '{"payload" : {}}') settings input_format_null_as_default=0; +select * from format(JSONEachRow, 'payload Map(String, String)', '{"payload" : {}}') settings input_format_null_as_default=1; +select * from format(JSONEachRow, 'payload Map(String, Map(String, Map(String, String)))', '{"payload" : {"pull_request": {"merged_by": null}}}') settings input_format_null_as_default=0; -- { serverError CANNOT_PARSE_INPUT_ASSERTION_FAILED } +select * from format(JSONEachRow, 'payload Map(String, Map(String, Map(String, String)))', '{"payload" : {"pull_request": {"merged_by": null}}}') settings input_format_null_as_default=1; +--- array +select * from format(JSONEachRow, 'payload Array(String)', '{"payload" : ["root"]}') settings input_format_null_as_default=0; +select * from format(JSONEachRow, 'payload Array(String)', '{"payload" : ["root"]}') settings input_format_null_as_default=1; +select * from format(JSONEachRow, 'payload Array(String)', '{"payload" : []}') settings input_format_null_as_default=0; +select * from format(JSONEachRow, 'payload Array(String)', '{"payload" : []}') settings input_format_null_as_default=1; +select * from format(JSONEachRow, 'payload Array(String)', '{"payload" : null}') settings input_format_null_as_default=0; -- { serverError CANNOT_READ_ARRAY_FROM_TEXT } +select * from format(JSONEachRow, 'payload Array(String)', '{"payload" : null}') settings input_format_null_as_default=1; diff --git a/tests/queries/0_stateless/02573_quantile_fuse_msan.reference b/tests/queries/0_stateless/02573_quantile_fuse_msan.reference new file mode 100644 index 00000000000..f6eca3b9698 --- /dev/null +++ b/tests/queries/0_stateless/02573_quantile_fuse_msan.reference @@ -0,0 +1,2 @@ +1970-01-01 00:00:00 1970-01-01 00:00:00 +1 diff --git a/tests/queries/0_stateless/02573_quantile_fuse_msan.sql b/tests/queries/0_stateless/02573_quantile_fuse_msan.sql new file mode 100644 index 00000000000..efeef0b0ebf --- /dev/null +++ b/tests/queries/0_stateless/02573_quantile_fuse_msan.sql @@ -0,0 +1,5 @@ +SET optimize_syntax_fuse_functions=1; +CREATE TEMPORARY TABLE datetime (`d` DateTime('UTC')); +SELECT quantile(0.1)(d), quantile(0.5)(d) FROM datetime; +INSERT INTO datetime SELECT * FROM generateRandom() LIMIT 10; +SELECT max(cityHash64(*)) > 0 FROM (SELECT quantile(0.1)(d), quantile(0.5)(d) FROM datetime); diff --git a/tests/queries/0_stateless/02574_suspicious_low_cardinality_msan.reference b/tests/queries/0_stateless/02574_suspicious_low_cardinality_msan.reference new file mode 100644 index 00000000000..7dd7c8eece6 --- /dev/null +++ b/tests/queries/0_stateless/02574_suspicious_low_cardinality_msan.reference @@ -0,0 +1,20 @@ +1023 \N \N 0 2147483646 +1023 \N \N 0 2147483646 +1023 \N \N 0 2147483646 +1023 \N \N 0 2147483646 +1023 \N \N 0 2147483646 +1023 \N \N 0 2147483646 +1023 \N \N 0 2147483646 +1023 \N \N 0 2147483646 +1023 \N \N 0 2147483646 +1023 \N \N 0 2147483646 +1023 \N \N 0 2147483646 +1023 \N \N 0 2147483646 +1023 \N \N 0 2147483646 +1023 \N \N 0 2147483646 +1023 \N \N 0 2147483646 +1023 \N \N 0 2147483646 +1023 \N \N 0 2147483646 +1023 \N \N 0 2147483646 +1023 \N \N 0 2147483646 +1023 \N \N 0 2147483646 diff --git a/tests/queries/0_stateless/02574_suspicious_low_cardinality_msan.sql b/tests/queries/0_stateless/02574_suspicious_low_cardinality_msan.sql new file mode 100644 index 00000000000..f2841ac8897 --- /dev/null +++ b/tests/queries/0_stateless/02574_suspicious_low_cardinality_msan.sql @@ -0,0 +1,10 @@ +DROP TABLE IF EXISTS table1__fuzz_19; + +SET allow_suspicious_low_cardinality_types = 1; +CREATE TABLE table1__fuzz_19 (`id` LowCardinality(UInt16), `v` DateTime64(3, 'UTC')) ENGINE = ReplacingMergeTree(v) PARTITION BY id % 200 ORDER BY id; +INSERT INTO table1__fuzz_19 SELECT number - 205, number FROM numbers(10); +INSERT INTO table1__fuzz_19 SELECT number - 205, number FROM numbers(400, 10); + +SELECT 1023, (((id % -9223372036854775807) = NULL) OR ((id % NULL) = 100) OR ((id % NULL) = 65537)) = ((id % inf) = 9223372036854775806), (id % NULL) = NULL, (id % 3.4028234663852886e38) = 1023, 2147483646 FROM table1__fuzz_19 ORDER BY (((id % 1048577) = 1024) % id) = 1023 DESC NULLS FIRST, id % 2147483646 ASC NULLS FIRST, ((id % 1) = 9223372036854775807) OR ((id % NULL) = 257) DESC NULLS FIRST; + +DROP TABLE table1__fuzz_19; diff --git a/tests/queries/0_stateless/02575_map_hashing_msan.reference b/tests/queries/0_stateless/02575_map_hashing_msan.reference new file mode 100644 index 00000000000..d3fbc1377d4 --- /dev/null +++ b/tests/queries/0_stateless/02575_map_hashing_msan.reference @@ -0,0 +1,6 @@ +4786021384179797717 +5368498105280294197 +42 15687122600100720591 +42 15687122600100720591 +42 15687122600100720591 +\N diff --git a/tests/queries/0_stateless/02575_map_hashing_msan.sql b/tests/queries/0_stateless/02575_map_hashing_msan.sql new file mode 100644 index 00000000000..2fe3620e68c --- /dev/null +++ b/tests/queries/0_stateless/02575_map_hashing_msan.sql @@ -0,0 +1,7 @@ +SELECT cityHash64(map(1, 'Hello'), CAST(materialize('World') AS LowCardinality(String))); +SELECT cityHash64(map(), CAST(materialize('') AS LowCardinality(Nullable(String)))); +SELECT materialize(42) as last_element, cityHash64(map(), CAST(materialize('') AS LowCardinality(Nullable(String))), last_element) from numbers(3); + +SET allow_suspicious_low_cardinality_types = 1; +CREATE TEMPORARY TABLE datetime__fuzz_14 (`d` LowCardinality(Nullable(UInt128))); +SELECT max(mapPopulateSeries(mapPopulateSeries(map(toInt64(1048), toInt64(9223), 3, -2147))), toInt64(1048), map('11', 257, '', NULL), cityHash64(*)) > NULL FROM (SELECT max(cityHash64(mapPopulateSeries(mapPopulateSeries(map(toInt64(1048), toInt64(2147), 655, -2147))), *)) > NULL, map(toInt64(-2147), toInt64(100.0001), -2147, NULL), mapPopulateSeries(map(toInt64(1024), toInt64(1048), 1048, -1)), map(toInt64(256), toInt64(NULL), -1, NULL), quantile(0.0001)(d) FROM datetime__fuzz_14 WITH TOTALS); diff --git a/tests/queries/0_stateless/02575_merge_prewhere_default_expression.reference b/tests/queries/0_stateless/02575_merge_prewhere_default_expression.reference new file mode 100644 index 00000000000..434384b3d77 --- /dev/null +++ b/tests/queries/0_stateless/02575_merge_prewhere_default_expression.reference @@ -0,0 +1,8 @@ +-- { echoOn } +SELECT * FROM m PREWHERE a = 'OK' ORDER BY a, f; +OK 1 +OK 2 +SELECT * FROM m PREWHERE f = 1 ORDER BY a, f; +OK 1 +SELECT * FROM m WHERE f = 0 SETTINGS optimize_move_to_prewhere=0; +SELECT * FROM m WHERE f = 0 SETTINGS optimize_move_to_prewhere=1; diff --git a/tests/queries/0_stateless/02575_merge_prewhere_default_expression.sql b/tests/queries/0_stateless/02575_merge_prewhere_default_expression.sql new file mode 100644 index 00000000000..83c1d51269d --- /dev/null +++ b/tests/queries/0_stateless/02575_merge_prewhere_default_expression.sql @@ -0,0 +1,38 @@ +-- Allow PREWHERE when Merge() and MergeTree has different DEFAULT expression + +DROP TABLE IF EXISTS m; +DROP TABLE IF EXISTS t1; +DROP TABLE IF EXISTS t2; + +CREATE TABLE m +( + `a` String, + `f` UInt8 DEFAULT 0 +) +ENGINE = Merge(currentDatabase(), '^(t1|t2)$'); + +CREATE TABLE t1 +( + a String, + f UInt8 DEFAULT 1 +) +ENGINE = MergeTree +ORDER BY tuple() +SETTINGS index_granularity = 8192; +INSERT INTO t1 (a) VALUES ('OK'); + +CREATE TABLE t2 +( + a String, + f UInt8 DEFAULT 2 +) +ENGINE = MergeTree +ORDER BY tuple() +SETTINGS index_granularity = 8192; +INSERT INTO t2 (a) VALUES ('OK'); + +-- { echoOn } +SELECT * FROM m PREWHERE a = 'OK' ORDER BY a, f; +SELECT * FROM m PREWHERE f = 1 ORDER BY a, f; +SELECT * FROM m WHERE f = 0 SETTINGS optimize_move_to_prewhere=0; +SELECT * FROM m WHERE f = 0 SETTINGS optimize_move_to_prewhere=1; diff --git a/tests/queries/0_stateless/02570_merge_alias_prewhere.reference b/tests/queries/0_stateless/02575_merge_prewhere_different_default_kind.reference similarity index 100% rename from tests/queries/0_stateless/02570_merge_alias_prewhere.reference rename to tests/queries/0_stateless/02575_merge_prewhere_different_default_kind.reference diff --git a/tests/queries/0_stateless/02570_merge_alias_prewhere.sql b/tests/queries/0_stateless/02575_merge_prewhere_different_default_kind.sql similarity index 90% rename from tests/queries/0_stateless/02570_merge_alias_prewhere.sql rename to tests/queries/0_stateless/02575_merge_prewhere_different_default_kind.sql index 59ca717a418..0f1d582a26e 100644 --- a/tests/queries/0_stateless/02570_merge_alias_prewhere.sql +++ b/tests/queries/0_stateless/02575_merge_prewhere_different_default_kind.sql @@ -1,3 +1,5 @@ +-- Prohibit PREWHERE when Merge and MergeTree has different default type of the column + DROP TABLE IF EXISTS m; DROP TABLE IF EXISTS t1; DROP TABLE IF EXISTS t2; diff --git a/tests/queries/0_stateless/02575_merge_prewhere_ephemeral.reference b/tests/queries/0_stateless/02575_merge_prewhere_ephemeral.reference new file mode 100644 index 00000000000..9f214b8c536 --- /dev/null +++ b/tests/queries/0_stateless/02575_merge_prewhere_ephemeral.reference @@ -0,0 +1,11 @@ +-- { echoOn } +SELECT * FROM m PREWHERE a = 'OK' ORDER BY a; +OK +OK +SELECT * FROM m PREWHERE f = 1 ORDER BY a; -- { serverError ILLEGAL_PREWHERE } +SELECT * FROM m WHERE a = 'OK' SETTINGS optimize_move_to_prewhere=0; +OK +OK +SELECT * FROM m WHERE a = 'OK' SETTINGS optimize_move_to_prewhere=1; +OK +OK diff --git a/tests/queries/0_stateless/02575_merge_prewhere_ephemeral.sql b/tests/queries/0_stateless/02575_merge_prewhere_ephemeral.sql new file mode 100644 index 00000000000..85e03647d62 --- /dev/null +++ b/tests/queries/0_stateless/02575_merge_prewhere_ephemeral.sql @@ -0,0 +1,38 @@ +-- You cannot query EPHEMERAL + +DROP TABLE IF EXISTS m; +DROP TABLE IF EXISTS t1; +DROP TABLE IF EXISTS t2; + +CREATE TABLE m +( + `a` String, + `f` UInt8 EPHEMERAL 0 +) +ENGINE = Merge(currentDatabase(), '^(t1|t2)$'); + +CREATE TABLE t1 +( + a String, + f UInt8 DEFAULT 1 +) +ENGINE = MergeTree +ORDER BY tuple() +SETTINGS index_granularity = 8192; +INSERT INTO t1 (a) VALUES ('OK'); + +CREATE TABLE t2 +( + a String, + f UInt8 DEFAULT 2 +) +ENGINE = MergeTree +ORDER BY tuple() +SETTINGS index_granularity = 8192; +INSERT INTO t2 (a) VALUES ('OK'); + +-- { echoOn } +SELECT * FROM m PREWHERE a = 'OK' ORDER BY a; +SELECT * FROM m PREWHERE f = 1 ORDER BY a; -- { serverError ILLEGAL_PREWHERE } +SELECT * FROM m WHERE a = 'OK' SETTINGS optimize_move_to_prewhere=0; +SELECT * FROM m WHERE a = 'OK' SETTINGS optimize_move_to_prewhere=1; diff --git a/tests/queries/0_stateless/02575_merge_prewhere_materialized.reference b/tests/queries/0_stateless/02575_merge_prewhere_materialized.reference new file mode 100644 index 00000000000..434384b3d77 --- /dev/null +++ b/tests/queries/0_stateless/02575_merge_prewhere_materialized.reference @@ -0,0 +1,8 @@ +-- { echoOn } +SELECT * FROM m PREWHERE a = 'OK' ORDER BY a, f; +OK 1 +OK 2 +SELECT * FROM m PREWHERE f = 1 ORDER BY a, f; +OK 1 +SELECT * FROM m WHERE f = 0 SETTINGS optimize_move_to_prewhere=0; +SELECT * FROM m WHERE f = 0 SETTINGS optimize_move_to_prewhere=1; diff --git a/tests/queries/0_stateless/02575_merge_prewhere_materialized.sql b/tests/queries/0_stateless/02575_merge_prewhere_materialized.sql new file mode 100644 index 00000000000..eae72274c31 --- /dev/null +++ b/tests/queries/0_stateless/02575_merge_prewhere_materialized.sql @@ -0,0 +1,43 @@ +-- Allow PREWHERE when Merge has DEFAULT and MergeTree has MATERIALIZED + +DROP TABLE IF EXISTS m; +DROP TABLE IF EXISTS t1; +DROP TABLE IF EXISTS t2; + +CREATE TABLE m +( + `a` String, + `f` UInt8 DEFAULT 0 +) +ENGINE = Merge(currentDatabase(), '^(t1|t2)$'); + +CREATE TABLE t1 +( + a String, + f UInt8 MATERIALIZED 1 +) +ENGINE = MergeTree +ORDER BY tuple() +SETTINGS index_granularity = 8192; +INSERT INTO t1 (a) VALUES ('OK'); + +CREATE TABLE t2 +( + a String, + f UInt8 DEFAULT 2 +) +ENGINE = MergeTree +ORDER BY tuple() +SETTINGS index_granularity = 8192; +INSERT INTO t2 (a) VALUES ('OK'); + +-- { echoOn } +SELECT * FROM m PREWHERE a = 'OK' ORDER BY a, f; +SELECT * FROM m PREWHERE f = 1 ORDER BY a, f; +SELECT * FROM m WHERE f = 0 SETTINGS optimize_move_to_prewhere=0; +SELECT * FROM m WHERE f = 0 SETTINGS optimize_move_to_prewhere=1; +-- { echoOff } + +DROP TABLE m; +DROP TABLE t1; +DROP TABLE t2; diff --git a/tests/queries/0_stateless/25337_predicate_push_down_sorting_fix.reference b/tests/queries/0_stateless/02576_predicate_push_down_sorting_fix.reference similarity index 100% rename from tests/queries/0_stateless/25337_predicate_push_down_sorting_fix.reference rename to tests/queries/0_stateless/02576_predicate_push_down_sorting_fix.reference diff --git a/tests/queries/0_stateless/25337_predicate_push_down_sorting_fix.sql b/tests/queries/0_stateless/02576_predicate_push_down_sorting_fix.sql similarity index 100% rename from tests/queries/0_stateless/25337_predicate_push_down_sorting_fix.sql rename to tests/queries/0_stateless/02576_predicate_push_down_sorting_fix.sql diff --git a/tests/queries/0_stateless/02660_rewrite_array_exists_to_has.reference b/tests/queries/0_stateless/02576_rewrite_array_exists_to_has.reference similarity index 100% rename from tests/queries/0_stateless/02660_rewrite_array_exists_to_has.reference rename to tests/queries/0_stateless/02576_rewrite_array_exists_to_has.reference diff --git a/tests/queries/0_stateless/02660_rewrite_array_exists_to_has.sql b/tests/queries/0_stateless/02576_rewrite_array_exists_to_has.sql similarity index 100% rename from tests/queries/0_stateless/02660_rewrite_array_exists_to_has.sql rename to tests/queries/0_stateless/02576_rewrite_array_exists_to_has.sql diff --git a/tests/queries/0_stateless/02577_analyzer_array_join_calc_twice.reference b/tests/queries/0_stateless/02577_analyzer_array_join_calc_twice.reference new file mode 100644 index 00000000000..bc42121fb39 --- /dev/null +++ b/tests/queries/0_stateless/02577_analyzer_array_join_calc_twice.reference @@ -0,0 +1,6 @@ +2 +3 +4 +2 +3 +4 diff --git a/tests/queries/0_stateless/02577_analyzer_array_join_calc_twice.sql b/tests/queries/0_stateless/02577_analyzer_array_join_calc_twice.sql new file mode 100644 index 00000000000..b6bb258db28 --- /dev/null +++ b/tests/queries/0_stateless/02577_analyzer_array_join_calc_twice.sql @@ -0,0 +1,5 @@ +SET allow_experimental_analyzer = 1; + +SELECT 1 + arrayJoin(a) AS m FROM (SELECT [1, 2, 3] AS a) GROUP BY m; + +SELECT 1 + arrayJoin(a) AS m FROM (SELECT [1, 2, 3] AS a) GROUP BY 1 + arrayJoin(a); diff --git a/tests/queries/0_stateless/02661_keepermap_delete_update.reference b/tests/queries/0_stateless/02577_keepermap_delete_update.reference similarity index 100% rename from tests/queries/0_stateless/02661_keepermap_delete_update.reference rename to tests/queries/0_stateless/02577_keepermap_delete_update.reference diff --git a/tests/queries/0_stateless/02661_keepermap_delete_update.sql b/tests/queries/0_stateless/02577_keepermap_delete_update.sql similarity index 100% rename from tests/queries/0_stateless/02661_keepermap_delete_update.sql rename to tests/queries/0_stateless/02577_keepermap_delete_update.sql diff --git a/tests/queries/0_stateless/02578_ipv4_codec_t64.reference b/tests/queries/0_stateless/02578_ipv4_codec_t64.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02578_ipv4_codec_t64.sql b/tests/queries/0_stateless/02578_ipv4_codec_t64.sql new file mode 100644 index 00000000000..63a19cba5a2 --- /dev/null +++ b/tests/queries/0_stateless/02578_ipv4_codec_t64.sql @@ -0,0 +1,3 @@ +DROP TABLE IF EXISTS ipv4_t64; +CREATE TABLE ipv4_t64 (uid Int16, ip IPv4 CODEC(T64), INDEX ip_idx ip TYPE bloom_filter GRANULARITY 4) ENGINE=MergeTree ORDER BY uid; +DROP TABLE IF EXISTS ipv4_t64; diff --git a/tests/queries/0_stateless/02661_parameterized_rename_queries.reference b/tests/queries/0_stateless/02578_parameterized_rename_queries.reference similarity index 100% rename from tests/queries/0_stateless/02661_parameterized_rename_queries.reference rename to tests/queries/0_stateless/02578_parameterized_rename_queries.reference diff --git a/tests/queries/0_stateless/02661_parameterized_rename_queries.sql b/tests/queries/0_stateless/02578_parameterized_rename_queries.sql similarity index 100% rename from tests/queries/0_stateless/02661_parameterized_rename_queries.sql rename to tests/queries/0_stateless/02578_parameterized_rename_queries.sql diff --git a/tests/queries/0_stateless/02579_fill_empty_chunk.reference b/tests/queries/0_stateless/02579_fill_empty_chunk.reference new file mode 100644 index 00000000000..4dd51b52f2f --- /dev/null +++ b/tests/queries/0_stateless/02579_fill_empty_chunk.reference @@ -0,0 +1,11 @@ +1 \N +2 \N +2 \N +2 \N +3 \N +4 \N +5 \N +6 \N +7 \N +8 \N +9 \N diff --git a/tests/queries/0_stateless/02579_fill_empty_chunk.sql b/tests/queries/0_stateless/02579_fill_empty_chunk.sql new file mode 100644 index 00000000000..14ae322d8c9 --- /dev/null +++ b/tests/queries/0_stateless/02579_fill_empty_chunk.sql @@ -0,0 +1,10 @@ +-- this SELECT produces empty chunk in FillingTransform + +SELECT + 2 AS x, + arrayJoin([NULL, NULL, NULL]) +GROUP BY + GROUPING SETS ( + (0), + ([NULL, NULL, NULL])) +ORDER BY x ASC WITH FILL FROM 1 TO 10; diff --git a/tests/queries/0_stateless/02661_parameterized_replace.reference b/tests/queries/0_stateless/02579_parameterized_replace.reference similarity index 100% rename from tests/queries/0_stateless/02661_parameterized_replace.reference rename to tests/queries/0_stateless/02579_parameterized_replace.reference diff --git a/tests/queries/0_stateless/02661_parameterized_replace.sql b/tests/queries/0_stateless/02579_parameterized_replace.sql similarity index 100% rename from tests/queries/0_stateless/02661_parameterized_replace.sql rename to tests/queries/0_stateless/02579_parameterized_replace.sql diff --git a/tests/queries/0_stateless/02580_like_substring_search_bug.reference b/tests/queries/0_stateless/02580_like_substring_search_bug.reference new file mode 100644 index 00000000000..d00491fd7e5 --- /dev/null +++ b/tests/queries/0_stateless/02580_like_substring_search_bug.reference @@ -0,0 +1 @@ +1 diff --git a/tests/queries/0_stateless/02580_like_substring_search_bug.sql b/tests/queries/0_stateless/02580_like_substring_search_bug.sql new file mode 100644 index 00000000000..10ce3cdde36 --- /dev/null +++ b/tests/queries/0_stateless/02580_like_substring_search_bug.sql @@ -0,0 +1 @@ +SELECT 'Win\Sys' LIKE '%Win\Sys%'; diff --git a/tests/queries/0_stateless/02664_async_reading_with_small_limit.reference b/tests/queries/0_stateless/02664_async_reading_with_small_limit.reference new file mode 100644 index 00000000000..a7f994d4b13 --- /dev/null +++ b/tests/queries/0_stateless/02664_async_reading_with_small_limit.reference @@ -0,0 +1,7 @@ +(Expression) +ExpressionTransform + (Limit) + Limit + (ReadFromMergeTree) + Concat 3 → 1 + MergeTreeInOrder × 3 0 → 1 diff --git a/tests/queries/0_stateless/02664_async_reading_with_small_limit.sql b/tests/queries/0_stateless/02664_async_reading_with_small_limit.sql new file mode 100644 index 00000000000..6ea6f880712 --- /dev/null +++ b/tests/queries/0_stateless/02664_async_reading_with_small_limit.sql @@ -0,0 +1,17 @@ +create table t(a UInt64) engine=MergeTree order by tuple(); + +system stop merges t; + +insert into t select * from numbers_mt(1e3); +insert into t select * from numbers_mt(1e3); +insert into t select * from numbers_mt(1e3); + +set allow_asynchronous_read_from_io_pool_for_merge_tree = 1; +set max_streams_for_merge_tree_reading = 64; +set max_block_size = 65409; + +-- slightly different transforms will be generated by reading steps if we let settings randomisation to change this setting value -- +set read_in_order_two_level_merge_threshold = 1000; + +-- for pretty simple queries (no filter, aggregation and so on) with a limit smaller than the `max_block_size` we request reading using only a single stream for better performance -- +explain pipeline select * from t limit 100; diff --git a/tests/queries/0_stateless/02670_map_literal_cast.reference b/tests/queries/0_stateless/02670_map_literal_cast.reference new file mode 100644 index 00000000000..9f4e9fd8d08 --- /dev/null +++ b/tests/queries/0_stateless/02670_map_literal_cast.reference @@ -0,0 +1,4 @@ +{'a':1,'b':2} +{'abc':22,'def':33} +{10:[11,12],13:[14,15]} +{'ghj':{'klm':[16,17]},'nop':{'rst':[18]}} diff --git a/tests/queries/0_stateless/02670_map_literal_cast.sql b/tests/queries/0_stateless/02670_map_literal_cast.sql new file mode 100644 index 00000000000..23bb880889b --- /dev/null +++ b/tests/queries/0_stateless/02670_map_literal_cast.sql @@ -0,0 +1,9 @@ +SELECT CAST([('a', 1), ('b', 2)], 'Map(String, UInt8)'); +SELECT CAST([('abc', 22), ('def', 33)], 'Map(String, UInt8)'); +SELECT CAST([(10, [11, 12]), (13, [14, 15])], 'Map(UInt8, Array(UInt8))'); +SELECT CAST([('ghj', [('klm', [16, 17])]), ('nop', [('rst', [18])])], 'Map(String, Map(String, Array(UInt8)))'); + +SELECT CAST((('a', 1), ('b', 2)), 'Map(String, UInt8)'); -- { serverError TYPE_MISMATCH } +SELECT CAST((('abc', 22), ('def', 33)), 'Map(String, UInt8)'); -- { serverError TYPE_MISMATCH } +SELECT CAST(((10, [11, 12]), (13, [14, 15])), 'Map(UInt8, Array(UInt8))'); -- { serverError TYPE_MISMATCH } +SELECT CAST((('ghj', (('klm', [16, 17]))), ('nop', (('rst', [18])))), 'Map(String, Map(String, Array(UInt8)))'); -- { serverError TYPE_MISMATCH } diff --git a/tests/queries/0_stateless/02674_null_default_structure.reference b/tests/queries/0_stateless/02674_null_default_structure.reference new file mode 100644 index 00000000000..922b3e34fb4 --- /dev/null +++ b/tests/queries/0_stateless/02674_null_default_structure.reference @@ -0,0 +1 @@ +dummy UInt8 diff --git a/tests/queries/0_stateless/02674_null_default_structure.sql b/tests/queries/0_stateless/02674_null_default_structure.sql new file mode 100644 index 00000000000..fcc5af216d9 --- /dev/null +++ b/tests/queries/0_stateless/02674_null_default_structure.sql @@ -0,0 +1,3 @@ +SELECT * FROM null(); +DESCRIBE null(); +insert into table function null() select 1, 'str'; diff --git a/tests/queries/0_stateless/02674_range_ipv4.reference b/tests/queries/0_stateless/02674_range_ipv4.reference new file mode 100644 index 00000000000..76fc0c45bd3 --- /dev/null +++ b/tests/queries/0_stateless/02674_range_ipv4.reference @@ -0,0 +1,3 @@ +[2887712768,2887712769,2887712770,2887712771,2887712772,2887712773,2887712774,2887712775,2887712776,2887712777] +[2887712768,2887712769,2887712770,2887712771,2887712772,2887712773,2887712774,2887712775,2887712776,2887712777] +[2887712768,2887712769,2887712770,2887712771,2887712772,2887712773,2887712774,2887712775,2887712776,2887712777] diff --git a/tests/queries/0_stateless/02674_range_ipv4.sql b/tests/queries/0_stateless/02674_range_ipv4.sql new file mode 100644 index 00000000000..1241b727014 --- /dev/null +++ b/tests/queries/0_stateless/02674_range_ipv4.sql @@ -0,0 +1,3 @@ +SELECT range(toIPv4('172.31.0.0'), toIPv4('172.31.0.10')); +SELECT range(2887712768, toIPv4('172.31.0.10')); +SELECT range(toIPv4('172.31.0.0'), 2887712778); diff --git a/tests/queries/0_stateless/25337_width_bucket.reference b/tests/queries/0_stateless/25337_width_bucket.reference new file mode 100644 index 00000000000..aa0e8f2612c --- /dev/null +++ b/tests/queries/0_stateless/25337_width_bucket.reference @@ -0,0 +1,156 @@ +-7.6 -10 0 4 1 +-6 -5 -1 2 0 +0 0 10 4 1 +1 3 0 1 0 +3 -100 200 10 4 +3 0 10 3 1 +4.333 1 11 3 1 +4.34 1 11 3 2 +---------- +0 +11 +---------- +-7 -10 0 4 2 +-6 -5 -1 2 0 +0 0 10 4 1 +1 3 0 1 0 +3 -100 200 10 4 +3 0 10 3 1 +4 1 11 3 1 +4 1 11 3 1 +---------- +249 65526 0 4 0 +250 65531 4294967295 2 0 +0 0 10 4 1 +1 3 0 1 0 +3 65436 200 10 0 +3 0 10 3 1 +4 1 11 3 1 +4 1 11 3 1 +---------- +---------- +UInt16 +UInt32 +UInt64 +UInt64 +---------- +5 +3 +1 +0 +4 +1 +1 +1 +0 +0 +0 +0 +1 +1 +1 +1 +0 +0 +1 +0 +1 +1 +1 +1 +1 +0 +1 +0 +10 +3 +4 +4 +5 +3 +5 +2 +11 +4 +4 +4 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +2 +0 +1 +0 +3 +3 +3 +3 +10 +10 +9 +0 +4 +9 +8 +8 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +1 +0 +0 +0 +2 +0 +1 +0 +12 +4 +4 +4 +14 +14 +14 +14 +14 +14 +14 +14 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 diff --git a/tests/queries/0_stateless/25337_width_bucket.sql b/tests/queries/0_stateless/25337_width_bucket.sql new file mode 100644 index 00000000000..b375623246a --- /dev/null +++ b/tests/queries/0_stateless/25337_width_bucket.sql @@ -0,0 +1,62 @@ +CREATE TABLE mytable +( + operand Float64, + low Float64, + high Float64, + count UInt64, + PRIMARY KEY (operand, low, high, count) +) ENGINE = MergeTree(); + +INSERT INTO mytable VALUES (3, -100, 200, 10), (0, 0, 10, 4), (3, 0, 10, 3), (4.333, 1, 11, 3), (4.34, 1, 11, 3), (-7.6, -10, 0, 4), (-6, -5, -1, 2), (1, 3, 0, 1), (3, 2, 5, 0); + +SELECT operand, low, high, count, WIDTH_BUCKET(operand, low, high, count) FROM mytable WHERE count != 0; +SELECT '----------'; +-- zero is not valid for count +SELECT operand, low, high, count, WIDTH_BUCKET(operand, low, high, count) FROM mytable WHERE count = 0; -- { serverError BAD_ARGUMENTS } +-- operand, low and high cannot be NaN +SELECT WIDTH_BUCKET(0, 10, NaN, 10); -- { serverError BAD_ARGUMENTS } +SELECT WIDTH_BUCKET(NaN, 0, 10, 10); -- { serverError BAD_ARGUMENTS } +SELECT WIDTH_BUCKET(0, NaN, 10, 10); -- { serverError BAD_ARGUMENTS } +-- low and high cannot be Inf +SELECT WIDTH_BUCKET(1, -Inf, 10, 10); -- { serverError BAD_ARGUMENTS } +-- low and high cannot be Inf +SELECT WIDTH_BUCKET(1, 0, Inf, 10); -- { serverError BAD_ARGUMENTS } +-- operand can be Inf +SELECT WIDTH_BUCKET(-Inf, 0, 10, 10); +SELECT WIDTH_BUCKET(Inf, 0, 10, 10); +SELECT '----------'; +-- IntXX types +SELECT toInt64(operand) AS operand, toInt32(low) AS low, toInt16(high) AS high, count, WIDTH_BUCKET(operand, low, high, count) FROM mytable WHERE count != 0; +SELECT '----------'; +-- UIntXX types +SELECT toUInt8(toInt8(operand)) AS operand, toUInt16(toInt16(low)) AS low, toUInt32(toInt32(high)) AS high, count, WIDTH_BUCKET(operand, low, high, count) FROM mytable WHERE count != 0; +SELECT '----------'; +SELECT WIDTH_BUCKET(1, 2, 3, -1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT WIDTH_BUCKET(1, 2, 3, 1.3); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT WIDTH_BUCKET('a', 1, 2, 3); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT WIDTH_BUCKET(1, toUInt128(42), 2, 3); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT WIDTH_BUCKET(1, 2, toInt128(42), 3); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT WIDTH_BUCKET(1, 2, 3, toInt256(42)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT '----------'; +-- Return type checks +SELECT toTypeName(WIDTH_BUCKET(1, 2, 3, toUInt8(1))); +SELECT toTypeName(WIDTH_BUCKET(1, 2, 3, toUInt16(1))); +SELECT toTypeName(WIDTH_BUCKET(1, 2, 3, toUInt32(1))); +SELECT toTypeName(WIDTH_BUCKET(1, 2, 3, toUInt64(1))); +SELECT '----------'; +-- Test handling ColumnConst +SELECT WIDTH_BUCKET(1, low, high, count) FROM mytable WHERE count != 0; +SELECT WIDTH_BUCKET(operand, 2, high, count) FROM mytable WHERE count != 0; +SELECT WIDTH_BUCKET(3, 3, high, count) FROM mytable WHERE count != 0; +SELECT WIDTH_BUCKET(operand, low, 4, count) FROM mytable WHERE count != 0; +SELECT WIDTH_BUCKET(5, low, 5, count) FROM mytable WHERE count != 0; +SELECT WIDTH_BUCKET(operand, 6, 6, count) FROM mytable WHERE count != 0; +SELECT WIDTH_BUCKET(7, 7, 7, count) FROM mytable WHERE count != 0; +SELECT WIDTH_BUCKET(operand, low, high, 8) FROM mytable WHERE count != 0; +SELECT WIDTH_BUCKET(9, low, high, 9) FROM mytable WHERE count != 0; +SELECT WIDTH_BUCKET(operand, 10, high, 10) FROM mytable WHERE count != 0; +SELECT WIDTH_BUCKET(11, 11, high, 11) FROM mytable WHERE count != 0; +SELECT WIDTH_BUCKET(operand, low, 12, 12) FROM mytable WHERE count != 0; +SELECT WIDTH_BUCKET(13, low, 13, 13) FROM mytable WHERE count != 0; +SELECT WIDTH_BUCKET(operand, 14, 14, 14) FROM mytable WHERE count != 0; +SELECT WIDTH_BUCKET(15, 15, 15, 15) FROM mytable WHERE count != 0; \ No newline at end of file diff --git a/tests/queries/0_stateless/25339_analyzer_join_subquery_empty_column_list.reference b/tests/queries/0_stateless/25339_analyzer_join_subquery_empty_column_list.reference new file mode 100644 index 00000000000..73c1795f6f3 --- /dev/null +++ b/tests/queries/0_stateless/25339_analyzer_join_subquery_empty_column_list.reference @@ -0,0 +1,37 @@ +-- { echoOn } + +SELECT a FROM ( select 1 AS a ) AS t1, ( select 2 AS b, 3 AS c) AS t2; +1 +SELECT a FROM ( select 1 AS a UNION ALL select 1 as a ) AS t1, ( select 2 AS b, 3 AS c) AS t2; +1 +1 +SELECT a FROM ( select 1 AS a ) AS t1, ( select 2 AS b, 3 AS c UNION ALL select 2 as b, 3 as c) AS t2; +1 +1 +SELECT a FROM ( select 1 AS a UNION ALL select 1 as a ) AS t1, ( select 2 AS b, 3 AS c UNION ALL select 2 as b, 3 as c) AS t2; +1 +1 +1 +1 +SELECT a FROM ( select * from ( select 1 AS a UNION ALL select 1 as a) ) AS t1, ( select * from ( select 2 AS b, 3 AS c UNION ALL select 2 as b, 3 as c )) AS t2; +1 +1 +1 +1 +SELECT b FROM ( select 1 AS a UNION ALL select 1 as a ) AS t1, ( select 2 AS b, 3 AS c UNION ALL select 2 as b, 3 as c) AS t2; +2 +2 +2 +2 +SELECT c FROM ( select 1 AS a UNION ALL select 1 as a ) AS t1, ( select 2 AS b, 3 AS c UNION ALL select 2 as b, 3 as c) AS t2; +3 +3 +3 +3 +SELECT 42 FROM ( select 1 AS a UNION ALL select 1 as a ) AS t1, ( select 2 AS b, 3 AS c UNION ALL select 2 as b, 3 as c) AS t2; +42 +42 +42 +42 +SELECT count() FROM ( select 1 AS a UNION ALL select 1 as a ) AS t1, ( select 2 AS b, 3 AS c UNION ALL select 2 as b, 3 as c) AS t2; +4 diff --git a/tests/queries/0_stateless/25339_analyzer_join_subquery_empty_column_list.sql b/tests/queries/0_stateless/25339_analyzer_join_subquery_empty_column_list.sql new file mode 100644 index 00000000000..10e5871cc44 --- /dev/null +++ b/tests/queries/0_stateless/25339_analyzer_join_subquery_empty_column_list.sql @@ -0,0 +1,14 @@ +SET allow_experimental_analyzer = 1; +-- { echoOn } + +SELECT a FROM ( select 1 AS a ) AS t1, ( select 2 AS b, 3 AS c) AS t2; +SELECT a FROM ( select 1 AS a UNION ALL select 1 as a ) AS t1, ( select 2 AS b, 3 AS c) AS t2; +SELECT a FROM ( select 1 AS a ) AS t1, ( select 2 AS b, 3 AS c UNION ALL select 2 as b, 3 as c) AS t2; +SELECT a FROM ( select 1 AS a UNION ALL select 1 as a ) AS t1, ( select 2 AS b, 3 AS c UNION ALL select 2 as b, 3 as c) AS t2; +SELECT a FROM ( select * from ( select 1 AS a UNION ALL select 1 as a) ) AS t1, ( select * from ( select 2 AS b, 3 AS c UNION ALL select 2 as b, 3 as c )) AS t2; +SELECT b FROM ( select 1 AS a UNION ALL select 1 as a ) AS t1, ( select 2 AS b, 3 AS c UNION ALL select 2 as b, 3 as c) AS t2; +SELECT c FROM ( select 1 AS a UNION ALL select 1 as a ) AS t1, ( select 2 AS b, 3 AS c UNION ALL select 2 as b, 3 as c) AS t2; +SELECT 42 FROM ( select 1 AS a UNION ALL select 1 as a ) AS t1, ( select 2 AS b, 3 AS c UNION ALL select 2 as b, 3 as c) AS t2; +SELECT count() FROM ( select 1 AS a UNION ALL select 1 as a ) AS t1, ( select 2 AS b, 3 AS c UNION ALL select 2 as b, 3 as c) AS t2; + + diff --git a/tests/queries/0_stateless/data_json/twitter.jsonl b/tests/queries/0_stateless/data_json/twitter.jsonl new file mode 100644 index 00000000000..e498cb13e3f Binary files /dev/null and b/tests/queries/0_stateless/data_json/twitter.jsonl differ diff --git a/tests/queries/0_stateless/helpers/02112_clean.sh b/tests/queries/0_stateless/helpers/02112_clean.sh deleted file mode 100755 index 95af0cede9c..00000000000 --- a/tests/queries/0_stateless/helpers/02112_clean.sh +++ /dev/null @@ -1,5 +0,0 @@ -#!/usr/bin/env bash - -CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -FILE=${CURDIR}/../file_02112 -rm "$FILE" diff --git a/tests/queries/0_stateless/helpers/02112_prepare.sh b/tests/queries/0_stateless/helpers/02112_prepare.sh deleted file mode 100755 index c2791b01140..00000000000 --- a/tests/queries/0_stateless/helpers/02112_prepare.sh +++ /dev/null @@ -1,5 +0,0 @@ -#!/usr/bin/env bash - -CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -FILE=${CURDIR}/../file_02112 -echo "drop table if exists t;create table t(i Int32) engine=Memory; insert into t select 1" > "$FILE" diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index 00477f0fb8f..1ad7432a5bf 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -24,6 +24,7 @@ CapnProto CentOS ClickHouse ClickHouse's +CodeBlock Config ConnectionDetails Contrib @@ -55,6 +56,7 @@ Hostname IPv IntN Integrations +invariants JSONAsString JSONAsObject JSONColumns @@ -129,6 +131,7 @@ ProtobufSingle QTCreator QueryCacheHits QueryCacheMisses +QEMU RBAC RawBLOB RedHat @@ -151,6 +154,7 @@ Submodules Subqueries TSVRaw TSan +TabItem TabSeparated TabSeparatedRaw TabSeparatedRawWithNames diff --git a/utils/check-style/check-style b/utils/check-style/check-style index 8436d3378d9..53165d14f96 100755 --- a/utils/check-style/check-style +++ b/utils/check-style/check-style @@ -338,6 +338,13 @@ for test_case in "${expect_tests[@]}"; do pattern="^spawn.*CLICKHOUSE_CLIENT_BINARY.*--history_file$" grep -q "$pattern" "$test_case" || echo "Missing '$pattern' in '$test_case'" fi + + # Otherwise expect_after/expect_before will not bail without stdin attached + # (and actually this is a hack anyway, correct way is to use $any_spawn_id) + pattern="-i \$any_spawn_id timeout" + grep -q -- "$pattern" "$test_case" || echo "Missing '$pattern' in '$test_case'" + pattern="-i \$any_spawn_id eof" + grep -q -- "$pattern" "$test_case" || echo "Missing '$pattern' in '$test_case'" done # Conflict markers diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv index d313c4bfb78..3814e94bf24 100644 --- a/utils/list-versions/version_date.tsv +++ b/utils/list-versions/version_date.tsv @@ -1,9 +1,11 @@ +v23.2.1.2537-stable 2023-02-23 v23.1.3.5-stable 2023-02-03 v23.1.2.9-stable 2023-01-29 v23.1.1.3077-stable 2023-01-25 v22.12.3.5-stable 2023-01-10 v22.12.2.25-stable 2023-01-06 v22.12.1.1752-stable 2022-12-15 +v22.11.6.44-stable 2023-02-23 v22.11.5.15-stable 2023-01-29 v22.11.4.3-stable 2023-01-10 v22.11.3.47-stable 2023-01-09 @@ -61,6 +63,7 @@ v22.4.5.9-stable 2022-05-06 v22.4.4.7-stable 2022-04-29 v22.4.3.3-stable 2022-04-26 v22.4.2.1-stable 2022-04-22 +v22.3.19.6-lts 2023-02-27 v22.3.18.37-lts 2023-02-15 v22.3.17.13-lts 2023-01-12 v22.3.16.1190-lts 2023-01-09