diff --git a/.clang-tidy b/.clang-tidy index 3903911a277..0dacf813c7e 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -10,7 +10,7 @@ # TODO Let clang-tidy check headers in further directories # --> HeaderFilterRegex: '^.*/(src|base|programs|utils)/.*(h|hpp)$' -HeaderFilterRegex: '^.*/(base)/.*(h|hpp)$' +HeaderFilterRegex: '^.*/(base|programs|utils)/.*(h|hpp)$' Checks: '*, -abseil-*, diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index db170c3e28f..7fb2abebbbb 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -12,6 +12,7 @@ tests/ci/cancel_and_rerun_workflow_lambda/app.py - Build/Testing/Packaging Improvement - Documentation (changelog entry is not required) - Bug Fix (user-visible misbehavior in an official stable release) +- CI Fix or Improvement (changelog entry is not required) - Not for changelog (changelog entry is not required) diff --git a/.github/workflows/backport_branches.yml b/.github/workflows/backport_branches.yml index 6b05f1fe9f4..2a98722414b 100644 --- a/.github/workflows/backport_branches.yml +++ b/.github/workflows/backport_branches.yml @@ -11,7 +11,7 @@ on: # yamllint disable-line rule:truthy - 'backport/**' jobs: RunConfig: - runs-on: [self-hosted, style-checker] + runs-on: [self-hosted, style-checker-aarch64] outputs: data: ${{ steps.runconfig.outputs.CI_DATA }} steps: @@ -67,8 +67,6 @@ jobs: test_name: Compatibility check (amd64) runner_type: style-checker data: ${{ needs.RunConfig.outputs.data }} - run_command: | - python3 compatibility_check.py --check-name "Compatibility check (amd64)" --check-glibc --check-distributions CompatibilityCheckAarch64: needs: [RunConfig, BuilderDebAarch64] if: ${{ !failure() && !cancelled() }} @@ -77,8 +75,6 @@ jobs: test_name: Compatibility check (aarch64) runner_type: style-checker data: ${{ needs.RunConfig.outputs.data }} - run_command: | - python3 compatibility_check.py --check-name "Compatibility check (aarch64)" --check-glibc ######################################################################################### #################################### ORDINARY BUILDS #################################### ######################################################################################### @@ -146,9 +142,6 @@ jobs: test_name: Docker server image runner_type: style-checker data: ${{ needs.RunConfig.outputs.data }} - run_command: | - python3 docker_server.py --release-type head --no-push \ - --image-repo clickhouse/clickhouse-server --image-path docker/server --allow-build-reuse DockerKeeperImage: needs: [RunConfig, BuilderDebRelease, BuilderDebAarch64] if: ${{ !failure() && !cancelled() }} @@ -157,9 +150,6 @@ jobs: test_name: Docker keeper image runner_type: style-checker data: ${{ needs.RunConfig.outputs.data }} - run_command: | - python3 docker_server.py --release-type head --no-push \ - --image-repo clickhouse/clickhouse-keeper --image-path docker/keeper --allow-build-reuse ############################################################################################ ##################################### BUILD REPORTER ####################################### ############################################################################################ @@ -176,14 +166,8 @@ jobs: uses: ./.github/workflows/reusable_test.yml with: test_name: ClickHouse build check - runner_type: style-checker + runner_type: style-checker-aarch64 data: ${{ needs.RunConfig.outputs.data }} - additional_envs: | - NEEDS_DATA< #ci_set_reduced #ci_set_arm +#ci_set_integration +#ci_set_analyzer ## To run specified job in CI: #job_ #job_stateless_tests_release #job_package_debug #job_integration_tests_asan + +## To run only specified batches for multi-batch job(s) +#batch_2 +#btach_1_2_3 diff --git a/CHANGELOG.md b/CHANGELOG.md index b3e5dd709ab..9df678d4b9a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,9 +1,165 @@ ### Table of Contents +**[ClickHouse release v24.2, 2024-02-29](#242)**
**[ClickHouse release v24.1, 2024-01-30](#241)**
**[Changelog for 2023](https://clickhouse.com/docs/en/whats-new/changelog/2023/)**
# 2024 Changelog +### ClickHouse release 24.2, 2024-02-29 + +#### Backward Incompatible Change +* Validate suspicious/experimental types in nested types. Previously we didn't validate such types (except JSON) in nested types like Array/Tuple/Map. [#59385](https://github.com/ClickHouse/ClickHouse/pull/59385) ([Kruglov Pavel](https://github.com/Avogar)). +* Add sanity check for number of threads and block sizes. [#60138](https://github.com/ClickHouse/ClickHouse/pull/60138) ([Raúl Marín](https://github.com/Algunenano)). +* Don't infer floats in exponential notation by default. Add a setting `input_format_try_infer_exponent_floats` that will restore previous behaviour (disabled by default). Closes [#59476](https://github.com/ClickHouse/ClickHouse/issues/59476). [#59500](https://github.com/ClickHouse/ClickHouse/pull/59500) ([Kruglov Pavel](https://github.com/Avogar)). +* Allow alter operations to be surrounded by parenthesis. The emission of parentheses can be controlled by the `format_alter_operations_with_parentheses` config. By default, in formatted queries the parentheses are emitted as we store the formatted alter operations in some places as metadata (e.g.: mutations). The new syntax clarifies some of the queries where alter operations end in a list. E.g.: `ALTER TABLE x MODIFY TTL date GROUP BY a, b, DROP COLUMN c` cannot be parsed properly with the old syntax. In the new syntax the query `ALTER TABLE x (MODIFY TTL date GROUP BY a, b), (DROP COLUMN c)` is obvious. Older versions are not able to read the new syntax, therefore using the new syntax might cause issues if newer and older version of ClickHouse are mixed in a single cluster. [#59532](https://github.com/ClickHouse/ClickHouse/pull/59532) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). + +#### New Feature +* Added new syntax which allows to specify definer user in View/Materialized View. This allows to execute selects/inserts from views without explicit grants for underlying tables. So, a View will encapsulate the grants. [#54901](https://github.com/ClickHouse/ClickHouse/pull/54901) [#60439](https://github.com/ClickHouse/ClickHouse/pull/60439) ([pufit](https://github.com/pufit)). +* Try to detect file format automatically during schema inference if it's unknown in `file/s3/hdfs/url/azureBlobStorage` engines. Closes [#50576](https://github.com/ClickHouse/ClickHouse/issues/50576). [#59092](https://github.com/ClickHouse/ClickHouse/pull/59092) ([Kruglov Pavel](https://github.com/Avogar)). +* Implement auto-adjustment for asynchronous insert timeouts. The following settings are introduced: async_insert_poll_timeout_ms, async_insert_use_adaptive_busy_timeout, async_insert_busy_timeout_min_ms, async_insert_busy_timeout_max_ms, async_insert_busy_timeout_increase_rate, async_insert_busy_timeout_decrease_rate. [#58486](https://github.com/ClickHouse/ClickHouse/pull/58486) ([Julia Kartseva](https://github.com/jkartseva)). +* Allow to set up a quota for maximum sequential login failures. [#54737](https://github.com/ClickHouse/ClickHouse/pull/54737) ([Alexey Gerasimchuck](https://github.com/Demilivor)). +* A new aggregate function `groupArrayIntersect`. Follows up: [#49862](https://github.com/ClickHouse/ClickHouse/issues/49862). [#59598](https://github.com/ClickHouse/ClickHouse/pull/59598) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Backup & Restore support for `AzureBlobStorage`. Resolves [#50747](https://github.com/ClickHouse/ClickHouse/issues/50747). [#56988](https://github.com/ClickHouse/ClickHouse/pull/56988) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* The user can now specify the template string directly in the query using `format_schema_rows_template` as an alternative to `format_template_row`. Closes [#31363](https://github.com/ClickHouse/ClickHouse/issues/31363). [#59088](https://github.com/ClickHouse/ClickHouse/pull/59088) ([Shaun Struwig](https://github.com/Blargian)). +* Implemented automatic conversion of merge tree tables of different kinds to replicated engine. Create empty `convert_to_replicated` file in table's data directory (`/clickhouse/store/xxx/xxxyyyyy-yyyy-yyyy-yyyy-yyyyyyyyyyyy/`) and that table will be converted automatically on next server start. [#57798](https://github.com/ClickHouse/ClickHouse/pull/57798) ([Kirill](https://github.com/kirillgarbar)). +* Added query `ALTER TABLE table FORGET PARTITION partition` that removes ZooKeeper nodes, related to an empty partition. [#59507](https://github.com/ClickHouse/ClickHouse/pull/59507) ([Sergei Trifonov](https://github.com/serxa)). This is an expert-level feature. +* Support JWT credentials file for the NATS table engine. [#59543](https://github.com/ClickHouse/ClickHouse/pull/59543) ([Nickolaj Jepsen](https://github.com/nickolaj-jepsen)). +* Implemented `system.dns_cache` table, which can be useful for debugging DNS issues. [#59856](https://github.com/ClickHouse/ClickHouse/pull/59856) ([Kirill Nikiforov](https://github.com/allmazz)). +* The codec `LZ4HC` will accept a new level 2, which is faster than the previous minimum level 3, at the expense of less compression. In previous versions, `LZ4HC(2)` and less was the same as `LZ4HC(3)`. Author: [Cyan4973](https://github.com/Cyan4973). [#60090](https://github.com/ClickHouse/ClickHouse/pull/60090) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Implemented `system.dns_cache` table, which can be useful for debugging DNS issues. New server setting dns_cache_max_size. [#60257](https://github.com/ClickHouse/ClickHouse/pull/60257) ([Kirill Nikiforov](https://github.com/allmazz)). +* Support single-argument version for the `merge` table function, as `merge(['db_name', ] 'tables_regexp')`. [#60372](https://github.com/ClickHouse/ClickHouse/pull/60372) ([豪肥肥](https://github.com/HowePa)). +* Support negative positional arguments. Closes [#57736](https://github.com/ClickHouse/ClickHouse/issues/57736). [#58292](https://github.com/ClickHouse/ClickHouse/pull/58292) ([flynn](https://github.com/ucasfl)). +* Support specifying a set of permitted users for specific S3 settings in config using `user` key. [#60144](https://github.com/ClickHouse/ClickHouse/pull/60144) ([Antonio Andelic](https://github.com/antonio2368)). +* Added table function `mergeTreeIndex`. It represents the contents of index and marks files of `MergeTree` tables. It can be used for introspection. Syntax: `mergeTreeIndex(database, table, [with_marks = true])` where `database.table` is an existing table with `MergeTree` engine. [#58140](https://github.com/ClickHouse/ClickHouse/pull/58140) ([Anton Popov](https://github.com/CurtizJ)). + +#### Experimental Feature +* Added function `seriesOutliersDetectTukey` to detect outliers in series data using Tukey's fences algorithm. [#58632](https://github.com/ClickHouse/ClickHouse/pull/58632) ([Bhavna Jindal](https://github.com/bhavnajindal)). Keep in mind that the behavior will be changed in the next patch release. +* Add function `variantType` that returns Enum with variant type name for each row. [#59398](https://github.com/ClickHouse/ClickHouse/pull/59398) ([Kruglov Pavel](https://github.com/Avogar)). +* Support `LEFT JOIN`, `ALL INNER JOIN`, and simple subqueries for parallel replicas (only with analyzer). New setting `parallel_replicas_prefer_local_join` chooses local `JOIN` execution (by default) vs `GLOBAL JOIN`. All tables should exist on every replica from `cluster_for_parallel_replicas`. New settings `min_external_table_block_size_rows` and `min_external_table_block_size_bytes` are used to squash small blocks that are sent for temporary tables (only with analyzer). [#58916](https://github.com/ClickHouse/ClickHouse/pull/58916) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Allow concurrent table creation in the `Replicated` database during adding or recovering a new replica. [#59277](https://github.com/ClickHouse/ClickHouse/pull/59277) ([Konstantin Bogdanov](https://github.com/thevar1able)). +* Implement comparison operator for `Variant` values and proper Field inserting into `Variant` column. Don't allow creating `Variant` type with similar variant types by default (allow uder a setting `allow_suspicious_variant_types`) Closes [#59996](https://github.com/ClickHouse/ClickHouse/issues/59996). Closes [#59850](https://github.com/ClickHouse/ClickHouse/issues/59850). [#60198](https://github.com/ClickHouse/ClickHouse/pull/60198) ([Kruglov Pavel](https://github.com/Avogar)). +* Disable parallel replicas JOIN with CTE (not analyzer) [#59239](https://github.com/ClickHouse/ClickHouse/pull/59239) ([Raúl Marín](https://github.com/Algunenano)). + +#### Performance Improvement +* Primary key will use less amount of memory. [#60049](https://github.com/ClickHouse/ClickHouse/pull/60049) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Improve memory usage for primary key and some other operations. [#60050](https://github.com/ClickHouse/ClickHouse/pull/60050) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* The tables' primary keys will be loaded in memory lazily on first access. This is controlled by the new MergeTree setting `primary_key_lazy_load`, which is on by default. This provides several advantages: - it will not be loaded for tables that are not used; - if there is not enough memory, an exception will be thrown on first use instead of at server startup. This provides several disadvantages: - the latency of loading the primary key will be paid on the first query rather than before accepting connections; this theoretically may introduce a thundering-herd problem. This closes [#11188](https://github.com/ClickHouse/ClickHouse/issues/11188). [#60093](https://github.com/ClickHouse/ClickHouse/pull/60093) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Vectorized distance functions used in vector search. [#58866](https://github.com/ClickHouse/ClickHouse/pull/58866) ([Robert Schulze](https://github.com/rschu1ze)). +* Vectorized function `dotProduct` which is useful for vector search. [#60202](https://github.com/ClickHouse/ClickHouse/pull/60202) ([Robert Schulze](https://github.com/rschu1ze)). +* Add short-circuit ability for `dictGetOrDefault` function. Closes [#52098](https://github.com/ClickHouse/ClickHouse/issues/52098). [#57767](https://github.com/ClickHouse/ClickHouse/pull/57767) ([jsc0218](https://github.com/jsc0218)). +* Keeper improvement: cache only a certain amount of logs in-memory controlled by `latest_logs_cache_size_threshold` and `commit_logs_cache_size_threshold`. [#59460](https://github.com/ClickHouse/ClickHouse/pull/59460) ([Antonio Andelic](https://github.com/antonio2368)). +* Keeper improvement: reduce size of data node even more. [#59592](https://github.com/ClickHouse/ClickHouse/pull/59592) ([Antonio Andelic](https://github.com/antonio2368)). +* Continue optimizing branch miss of `if` function when result type is `Float*/Decimal*/*Int*`, follow up of https://github.com/ClickHouse/ClickHouse/pull/57885. [#59148](https://github.com/ClickHouse/ClickHouse/pull/59148) ([æŽæ‰¬](https://github.com/taiyang-li)). +* Optimize `if` function when the input type is `Map`, the speed-up is up to ~10x. [#59413](https://github.com/ClickHouse/ClickHouse/pull/59413) ([æŽæ‰¬](https://github.com/taiyang-li)). +* Improve performance of the `Int8` type by implementing strict aliasing (we already have it for `UInt8` and all other integer types). [#59485](https://github.com/ClickHouse/ClickHouse/pull/59485) ([Raúl Marín](https://github.com/Algunenano)). +* Optimize performance of sum/avg conditionally for bigint and big decimal types by reducing branch miss. [#59504](https://github.com/ClickHouse/ClickHouse/pull/59504) ([æŽæ‰¬](https://github.com/taiyang-li)). +* Improve performance of SELECTs with active mutations. [#59531](https://github.com/ClickHouse/ClickHouse/pull/59531) ([Azat Khuzhin](https://github.com/azat)). +* Optimized function `isNotNull` with AVX2. [#59621](https://github.com/ClickHouse/ClickHouse/pull/59621) ([æŽæ‰¬](https://github.com/taiyang-li)). +* Improve ASOF JOIN performance for sorted or almost sorted data. [#59731](https://github.com/ClickHouse/ClickHouse/pull/59731) ([Maksim Kita](https://github.com/kitaisreal)). +* The previous default value equals to 1 MB for `async_insert_max_data_size` appeared to be too small. The new one would be 10 MiB. [#59536](https://github.com/ClickHouse/ClickHouse/pull/59536) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Use multiple threads while reading the metadata of tables from a backup while executing the RESTORE command. [#60040](https://github.com/ClickHouse/ClickHouse/pull/60040) ([Vitaly Baranov](https://github.com/vitlibar)). +* Now if `StorageBuffer` has more than 1 shard (`num_layers` > 1) background flush will happen simultaneously for all shards in multiple threads. [#60111](https://github.com/ClickHouse/ClickHouse/pull/60111) ([alesapin](https://github.com/alesapin)). + +#### Improvement +* When output format is `Pretty` format and a block consists of a single numeric value which exceeds one million, A readable number will be printed on table right. [#60379](https://github.com/ClickHouse/ClickHouse/pull/60379) ([rogeryk](https://github.com/rogeryk)). +* Added settings `split_parts_ranges_into_intersecting_and_non_intersecting_final` and `split_intersecting_parts_ranges_into_layers_final`. These settings are needed to disable optimizations for queries with `FINAL` and needed for debug only. [#59705](https://github.com/ClickHouse/ClickHouse/pull/59705) ([Maksim Kita](https://github.com/kitaisreal)). Actually not only for that - they can also lower memory usage at the expense of performance. +* Rename the setting `extract_kvp_max_pairs_per_row` to `extract_key_value_pairs_max_pairs_per_row`. The issue (unnecessary abbreviation in the setting name) was introduced in https://github.com/ClickHouse/ClickHouse/pull/43606. Fix the documentation of this setting. [#59683](https://github.com/ClickHouse/ClickHouse/pull/59683) ([Alexey Milovidov](https://github.com/alexey-milovidov)). [#59960](https://github.com/ClickHouse/ClickHouse/pull/59960) ([jsc0218](https://github.com/jsc0218)). +* Running `ALTER COLUMN MATERIALIZE` on a column with `DEFAULT` or `MATERIALIZED` expression now precisely follows the semantics. [#58023](https://github.com/ClickHouse/ClickHouse/pull/58023) ([Duc Canh Le](https://github.com/canhld94)). +* Enabled an exponential backoff logic for errors during mutations. It will reduce the CPU usage, memory usage and log file sizes. [#58036](https://github.com/ClickHouse/ClickHouse/pull/58036) ([MikhailBurdukov](https://github.com/MikhailBurdukov)). +* Add improvement to count the `InitialQuery` Profile Event. [#58195](https://github.com/ClickHouse/ClickHouse/pull/58195) ([Unalian](https://github.com/Unalian)). +* Allow to define `volume_priority` in `storage_configuration`. [#58533](https://github.com/ClickHouse/ClickHouse/pull/58533) ([Andrey Zvonov](https://github.com/zvonand)). +* Add support for the `Date32` type in the `T64` codec. [#58738](https://github.com/ClickHouse/ClickHouse/pull/58738) ([Hongbin Ma](https://github.com/binmahone)). +* Allow trailing commas in types with several items. [#59119](https://github.com/ClickHouse/ClickHouse/pull/59119) ([Aleksandr Musorin](https://github.com/AVMusorin)). +* Settings for the Distributed table engine can now be specified in the server configuration file (similar to MergeTree settings), e.g. ` false `. [#59291](https://github.com/ClickHouse/ClickHouse/pull/59291) ([Azat Khuzhin](https://github.com/azat)). +* Retry disconnects and expired sessions when reading `system.zookeeper`. This is helpful when reading many rows from `system.zookeeper` table especially in the presence of fault-injected disconnects. [#59388](https://github.com/ClickHouse/ClickHouse/pull/59388) ([Alexander Gololobov](https://github.com/davenger)). +* Do not interpret numbers with leading zeroes as octals when `input_format_values_interpret_expressions=0`. [#59403](https://github.com/ClickHouse/ClickHouse/pull/59403) ([Joanna Hulboj](https://github.com/jh0x)). +* At startup and whenever config files are changed, ClickHouse updates the hard memory limits of its total memory tracker. These limits are computed based on various server settings and cgroups limits (on Linux). Previously, setting `/sys/fs/cgroup/memory.max` (for cgroups v2) was hard-coded. As a result, cgroup v2 memory limits configured for nested groups (hierarchies), e.g. `/sys/fs/cgroup/my/nested/group/memory.max` were ignored. This is now fixed. The behavior of v1 memory limits remains unchanged. [#59435](https://github.com/ClickHouse/ClickHouse/pull/59435) ([Robert Schulze](https://github.com/rschu1ze)). +* New profile events added to observe the time spent on calculating PK/projections/secondary indices during `INSERT`-s. [#59436](https://github.com/ClickHouse/ClickHouse/pull/59436) ([Nikita Taranov](https://github.com/nickitat)). +* Allow to define a starting point for S3Queue with Ordered mode at the creation using a setting `s3queue_last_processed_path`. [#59446](https://github.com/ClickHouse/ClickHouse/pull/59446) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Made comments for system tables also available in `system.tables` in `clickhouse-local`. [#59493](https://github.com/ClickHouse/ClickHouse/pull/59493) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* `system.zookeeper` table: previously the whole result was accumulated in memory and returned as one big chunk. This change should help to reduce memory consumption when reading many rows from `system.zookeeper`, allow showing intermediate progress (how many rows have been read so far) and avoid hitting connection timeout when result set is big. [#59545](https://github.com/ClickHouse/ClickHouse/pull/59545) ([Alexander Gololobov](https://github.com/davenger)). +* Now dashboard understands both compressed and uncompressed state of URL's #hash (backward compatibility). Continuation of [#59124](https://github.com/ClickHouse/ClickHouse/issues/59124) . [#59548](https://github.com/ClickHouse/ClickHouse/pull/59548) ([Amos Bird](https://github.com/amosbird)). +* Bumped Intel QPL (used by codec `DEFLATE_QPL`) from v1.3.1 to v1.4.0 . Also fixed a bug for polling timeout mechanism, as we observed in same cases timeout won't work properly, if timeout happen, IAA and CPU may process buffer concurrently. So far, we'd better make sure IAA codec status is not QPL_STS_BEING_PROCESSED, then fallback to SW codec. [#59551](https://github.com/ClickHouse/ClickHouse/pull/59551) ([jasperzhu](https://github.com/jinjunzh)). +* Do not show a warning about the server version in ClickHouse Cloud because ClickHouse Cloud handles seamless upgrades automatically. [#59657](https://github.com/ClickHouse/ClickHouse/pull/59657) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* After self-extraction temporary binary is moved instead copying. [#59661](https://github.com/ClickHouse/ClickHouse/pull/59661) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Fix stack unwinding on Apple macOS. This closes [#53653](https://github.com/ClickHouse/ClickHouse/issues/53653). [#59690](https://github.com/ClickHouse/ClickHouse/pull/59690) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Check for stack overflow in parsers even if the user misconfigured the `max_parser_depth` setting to a very high value. This closes [#59622](https://github.com/ClickHouse/ClickHouse/issues/59622). [#59697](https://github.com/ClickHouse/ClickHouse/pull/59697) ([Alexey Milovidov](https://github.com/alexey-milovidov)). [#60434](https://github.com/ClickHouse/ClickHouse/pull/60434) +* Unify XML and SQL created named collection behaviour in Kafka storage. [#59710](https://github.com/ClickHouse/ClickHouse/pull/59710) ([Pervakov Grigorii](https://github.com/GrigoryPervakov)). +* In case when `merge_max_block_size_bytes` is small enough and tables contain wide rows (strings or tuples) background merges may stuck in an endless loop. This behaviour is fixed. Follow-up for https://github.com/ClickHouse/ClickHouse/pull/59340. [#59812](https://github.com/ClickHouse/ClickHouse/pull/59812) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Allow uuid in replica_path if CREATE TABLE explicitly has it. [#59908](https://github.com/ClickHouse/ClickHouse/pull/59908) ([Azat Khuzhin](https://github.com/azat)). +* Add column `metadata_version` of ReplicatedMergeTree table in `system.tables` system table. [#59942](https://github.com/ClickHouse/ClickHouse/pull/59942) ([Maksim Kita](https://github.com/kitaisreal)). +* Keeper improvement: send only Keeper related metrics/events for Prometheus. [#59945](https://github.com/ClickHouse/ClickHouse/pull/59945) ([Antonio Andelic](https://github.com/antonio2368)). +* The dashboard will display metrics across different ClickHouse versions even if the structure of system tables has changed after the upgrade. [#59967](https://github.com/ClickHouse/ClickHouse/pull/59967) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Allow loading AZ info from a file. [#59976](https://github.com/ClickHouse/ClickHouse/pull/59976) ([Konstantin Bogdanov](https://github.com/thevar1able)). +* Keeper improvement: add retries on failures for Disk related operations. [#59980](https://github.com/ClickHouse/ClickHouse/pull/59980) ([Antonio Andelic](https://github.com/antonio2368)). +* Add new config setting `backups.remove_backup_files_after_failure`: ` true `. [#60002](https://github.com/ClickHouse/ClickHouse/pull/60002) ([Vitaly Baranov](https://github.com/vitlibar)). +* Copy S3 file GCP fallback to buffer copy in case GCP returned `Internal Error` with `GATEWAY_TIMEOUT` HTTP error code. [#60164](https://github.com/ClickHouse/ClickHouse/pull/60164) ([Maksim Kita](https://github.com/kitaisreal)). +* Short circuit execution for `ULIDStringToDateTime`. [#60211](https://github.com/ClickHouse/ClickHouse/pull/60211) ([Juan Madurga](https://github.com/jlmadurga)). +* Added `query_id` column for tables `system.backups` and `system.backup_log`. Added error stacktrace to `error` column. [#60220](https://github.com/ClickHouse/ClickHouse/pull/60220) ([Maksim Kita](https://github.com/kitaisreal)). +* Connections through the MySQL port now automatically run with setting `prefer_column_name_to_alias = 1` to support QuickSight out-of-the-box. Also, settings `mysql_map_string_to_text_in_show_columns` and `mysql_map_fixed_string_to_text_in_show_columns` are now enabled by default, affecting also only MySQL connections. This increases compatibility with more BI tools. [#60365](https://github.com/ClickHouse/ClickHouse/pull/60365) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix a race condition in JavaScript code leading to duplicate charts on top of each other. [#60392](https://github.com/ClickHouse/ClickHouse/pull/60392) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### Build/Testing/Packaging Improvement +* Added builds and tests with coverage collection with introspection. Continuation of [#56102](https://github.com/ClickHouse/ClickHouse/issues/56102). [#58792](https://github.com/ClickHouse/ClickHouse/pull/58792) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Update the Rust toolchain in `corrosion-cmake` when the CMake cross-compilation toolchain variable is set. [#59309](https://github.com/ClickHouse/ClickHouse/pull/59309) ([Aris Tritas](https://github.com/aris-aiven)). +* Add some fuzzing to ASTLiterals. [#59383](https://github.com/ClickHouse/ClickHouse/pull/59383) ([Raúl Marín](https://github.com/Algunenano)). +* If you want to run initdb scripts every time when ClickHouse container is starting you shoud initialize environment varible CLICKHOUSE_ALWAYS_RUN_INITDB_SCRIPTS. [#59808](https://github.com/ClickHouse/ClickHouse/pull/59808) ([Alexander Nikolaev](https://github.com/AlexNik)). +* Remove ability to disable generic clickhouse components (like server/client/...), but keep some that requires extra libraries (like ODBC or keeper). [#59857](https://github.com/ClickHouse/ClickHouse/pull/59857) ([Azat Khuzhin](https://github.com/azat)). +* Query fuzzer will fuzz SETTINGS inside queries. [#60087](https://github.com/ClickHouse/ClickHouse/pull/60087) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add support for building ClickHouse with clang-19 (master). [#60448](https://github.com/ClickHouse/ClickHouse/pull/60448) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### Bug Fix (user-visible misbehavior in an official stable release) +* Fix a "Non-ready set" error in TTL WHERE. [#57430](https://github.com/ClickHouse/ClickHouse/pull/57430) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix a bug in the `quantilesGK` function [#58216](https://github.com/ClickHouse/ClickHouse/pull/58216) ([æŽæ‰¬](https://github.com/taiyang-li)). +* Fix a wrong behavior with `intDiv` for Decimal arguments [#59243](https://github.com/ClickHouse/ClickHouse/pull/59243) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Fix `translate` with FixedString input [#59356](https://github.com/ClickHouse/ClickHouse/pull/59356) ([Raúl Marín](https://github.com/Algunenano)). +* Fix digest calculation in Keeper [#59439](https://github.com/ClickHouse/ClickHouse/pull/59439) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix stacktraces for binaries without debug symbols [#59444](https://github.com/ClickHouse/ClickHouse/pull/59444) ([Azat Khuzhin](https://github.com/azat)). +* Fix `ASTAlterCommand::formatImpl` in case of column specific settings… [#59445](https://github.com/ClickHouse/ClickHouse/pull/59445) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Fix `SELECT * FROM [...] ORDER BY ALL` with Analyzer [#59462](https://github.com/ClickHouse/ClickHouse/pull/59462) ([zhongyuankai](https://github.com/zhongyuankai)). +* Fix possible uncaught exception during distributed query cancellation [#59487](https://github.com/ClickHouse/ClickHouse/pull/59487) ([Azat Khuzhin](https://github.com/azat)). +* Make MAX use the same rules as permutation for complex types [#59498](https://github.com/ClickHouse/ClickHouse/pull/59498) ([Raúl Marín](https://github.com/Algunenano)). +* Fix corner case when passing `update_insert_deduplication_token_in_dependent_materialized_views` [#59544](https://github.com/ClickHouse/ClickHouse/pull/59544) ([Jordi Villar](https://github.com/jrdi)). +* Fix incorrect result of arrayElement / map on empty value [#59594](https://github.com/ClickHouse/ClickHouse/pull/59594) ([Raúl Marín](https://github.com/Algunenano)). +* Fix crash in topK when merging empty states [#59603](https://github.com/ClickHouse/ClickHouse/pull/59603) ([Raúl Marín](https://github.com/Algunenano)). +* Fix distributed table with a constant sharding key [#59606](https://github.com/ClickHouse/ClickHouse/pull/59606) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix KQL issue found by WingFuzz [#59626](https://github.com/ClickHouse/ClickHouse/pull/59626) ([Yong Wang](https://github.com/kashwy)). +* Fix error "Read beyond last offset" for AsynchronousBoundedReadBuffer [#59630](https://github.com/ClickHouse/ClickHouse/pull/59630) ([Vitaly Baranov](https://github.com/vitlibar)). +* Maintain function alias in RewriteSumFunctionWithSumAndCountVisitor [#59658](https://github.com/ClickHouse/ClickHouse/pull/59658) ([Raúl Marín](https://github.com/Algunenano)). +* Fix query start time on non initial queries [#59662](https://github.com/ClickHouse/ClickHouse/pull/59662) ([Raúl Marín](https://github.com/Algunenano)). +* Validate types of arguments for `minmax` skipping index [#59733](https://github.com/ClickHouse/ClickHouse/pull/59733) ([Anton Popov](https://github.com/CurtizJ)). +* Fix leftPad / rightPad function with FixedString input [#59739](https://github.com/ClickHouse/ClickHouse/pull/59739) ([Raúl Marín](https://github.com/Algunenano)). +* Fix AST fuzzer issue in function `countMatches` [#59752](https://github.com/ClickHouse/ClickHouse/pull/59752) ([Robert Schulze](https://github.com/rschu1ze)). +* RabbitMQ: fix having neither acked nor nacked messages [#59775](https://github.com/ClickHouse/ClickHouse/pull/59775) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix StorageURL doing some of the query execution in single thread [#59833](https://github.com/ClickHouse/ClickHouse/pull/59833) ([Michael Kolupaev](https://github.com/al13n321)). +* S3Queue: fix uninitialized value [#59897](https://github.com/ClickHouse/ClickHouse/pull/59897) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix parsing of partition expressions surrounded by parens [#59901](https://github.com/ClickHouse/ClickHouse/pull/59901) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Fix crash in JSONColumnsWithMetadata format over HTTP [#59925](https://github.com/ClickHouse/ClickHouse/pull/59925) ([Kruglov Pavel](https://github.com/Avogar)). +* Do not rewrite sum to count if the return value differs in Analyzer [#59926](https://github.com/ClickHouse/ClickHouse/pull/59926) ([Azat Khuzhin](https://github.com/azat)). +* UniqExactSet read crash fix [#59928](https://github.com/ClickHouse/ClickHouse/pull/59928) ([Maksim Kita](https://github.com/kitaisreal)). +* ReplicatedMergeTree invalid metadata_version fix [#59946](https://github.com/ClickHouse/ClickHouse/pull/59946) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix data race in `StorageDistributed` [#59987](https://github.com/ClickHouse/ClickHouse/pull/59987) ([Nikita Taranov](https://github.com/nickitat)). +* Docker: run init scripts when option is enabled rather than disabled [#59991](https://github.com/ClickHouse/ClickHouse/pull/59991) ([jktng](https://github.com/jktng)). +* Fix INSERT into `SQLite` with single quote (by escaping single quotes with a quote instead of backslash) [#60015](https://github.com/ClickHouse/ClickHouse/pull/60015) ([Azat Khuzhin](https://github.com/azat)). +* Fix several logical errors in `arrayFold` [#60022](https://github.com/ClickHouse/ClickHouse/pull/60022) ([Raúl Marín](https://github.com/Algunenano)). +* Fix optimize_uniq_to_count removing the column alias [#60026](https://github.com/ClickHouse/ClickHouse/pull/60026) ([Raúl Marín](https://github.com/Algunenano)). +* Fix possible exception from S3Queue table on drop [#60036](https://github.com/ClickHouse/ClickHouse/pull/60036) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix formatting of NOT with single literals [#60042](https://github.com/ClickHouse/ClickHouse/pull/60042) ([Raúl Marín](https://github.com/Algunenano)). +* Use max_query_size from context in DDLLogEntry instead of hardcoded 4096 [#60083](https://github.com/ClickHouse/ClickHouse/pull/60083) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix inconsistent formatting of queries containing tables named `table`. Fix wrong formatting of queries with `UNION ALL`, `INTERSECT`, and `EXCEPT` when their structure wasn't linear. This closes #52349. Fix wrong formatting of `SYSTEM` queries, including `SYSTEM ... DROP FILESYSTEM CACHE`, `SYSTEM ... REFRESH/START/STOP/CANCEL/TEST VIEW`, `SYSTEM ENABLE/DISABLE FAILPOINT`. Fix formatting of parameterized DDL queries. Fix the formatting of the `DESCRIBE FILESYSTEM CACHE` query. Fix incorrect formatting of the `SET param_...` (a query setting a parameter). Fix incorrect formatting of `CREATE INDEX` queries. Fix inconsistent formatting of `CREATE USER` and similar queries. Fix inconsistent formatting of `CREATE SETTINGS PROFILE`. Fix incorrect formatting of `ALTER ... MODIFY REFRESH`. Fix inconsistent formatting of window functions if frame offsets were expressions. Fix inconsistent formatting of `RESPECT NULLS` and `IGNORE NULLS` if they were used after a function that implements an operator (such as `plus`). Fix idiotic formatting of `SYSTEM SYNC REPLICA ... LIGHTWEIGHT FROM ...`. Fix inconsistent formatting of invalid queries with `GROUP BY GROUPING SETS ... WITH ROLLUP/CUBE/TOTALS`. Fix inconsistent formatting of `GRANT CURRENT GRANTS`. Fix inconsistent formatting of `CREATE TABLE (... COLLATE)`. Additionally, I fixed the incorrect formatting of `EXPLAIN` in subqueries (#60102). Fixed incorrect formatting of lambda functions (#60012). Added a check so there is no way to miss these abominations in the future. [#60095](https://github.com/ClickHouse/ClickHouse/pull/60095) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix inconsistent formatting of explain in subqueries [#60102](https://github.com/ClickHouse/ClickHouse/pull/60102) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix cosineDistance crash with Nullable [#60150](https://github.com/ClickHouse/ClickHouse/pull/60150) ([Raúl Marín](https://github.com/Algunenano)). +* Allow casting of bools in string representation to true bools [#60160](https://github.com/ClickHouse/ClickHouse/pull/60160) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix `system.s3queue_log` [#60166](https://github.com/ClickHouse/ClickHouse/pull/60166) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix arrayReduce with nullable aggregate function name [#60188](https://github.com/ClickHouse/ClickHouse/pull/60188) ([Raúl Marín](https://github.com/Algunenano)). +* Hide sensitive info for `S3Queue` [#60233](https://github.com/ClickHouse/ClickHouse/pull/60233) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix http exception codes. [#60252](https://github.com/ClickHouse/ClickHouse/pull/60252) ([Austin Kothig](https://github.com/kothiga)). +* S3Queue: fix a bug (also fixes flaky test_storage_s3_queue/test.py::test_shards_distributed) [#60282](https://github.com/ClickHouse/ClickHouse/pull/60282) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix use-of-uninitialized-value and invalid result in hashing functions with IPv6 [#60359](https://github.com/ClickHouse/ClickHouse/pull/60359) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix OptimizeDateOrDateTimeConverterWithPreimageVisitor with null arguments [#60453](https://github.com/ClickHouse/ClickHouse/pull/60453) ([Raúl Marín](https://github.com/Algunenano)). +* Fixed a minor bug that prevented distributed table queries sent from either KQL or PRQL dialect clients to be executed on replicas. [#59674](https://github.com/ClickHouse/ClickHouse/issues/59674). [#60470](https://github.com/ClickHouse/ClickHouse/pull/60470) ([Alexey Milovidov](https://github.com/alexey-milovidov)) [#59674](https://github.com/ClickHouse/ClickHouse/pull/59674) ([Austin Kothig](https://github.com/kothiga)). + + ### ClickHouse release 24.1, 2024-01-30 #### Backward Incompatible Change diff --git a/CMakeLists.txt b/CMakeLists.txt index 3bd179a799c..9ffb4789dc9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -61,8 +61,8 @@ if (ENABLE_CHECK_HEAVY_BUILDS) # set CPU time limit to 1000 seconds set (RLIMIT_CPU 1000) - # -fsanitize=memory is too heavy - if (SANITIZE STREQUAL "memory") + # -fsanitize=memory and address are too heavy + if (SANITIZE) set (RLIMIT_DATA 10000000000) # 10G endif() @@ -319,7 +319,8 @@ if (COMPILER_CLANG) endif() endif () -set (COMPILER_FLAGS "${COMPILER_FLAGS}") +# Disable floating-point expression contraction in order to get consistent floating point calculation results across platforms +set (COMPILER_FLAGS "${COMPILER_FLAGS} -ffp-contract=off") # Our built-in unwinder only supports DWARF version up to 4. set (DEBUG_INFO_FLAGS "-g") diff --git a/README.md b/README.md index 9ada350d173..e00ce42a60b 100644 --- a/README.md +++ b/README.md @@ -31,15 +31,30 @@ curl https://clickhouse.com/ | sh * [Static Analysis (SonarCloud)](https://sonarcloud.io/project/issues?resolved=false&id=ClickHouse_ClickHouse) proposes C++ quality improvements. * [Contacts](https://clickhouse.com/company/contact) can help to get your questions answered if there are any. +## Monthly Release & Community Call + +Every month we get together with the community (users, contributors, customers, those interested in learning more about ClickHouse) to discuss what is coming in the latest release. If you are interested in sharing what you've built on ClickHouse, let us know. + +* [v24.3 Community Call](https://clickhouse.com/company/events/v24-3-community-release-call) - Mar 26 +* [v24.4 Community Call](https://clickhouse.com/company/events/v24-4-community-release-call) - Apr 30 + ## Upcoming Events -Keep an eye out for upcoming meetups around the world. Somewhere else you want us to be? Please feel free to reach out to tyler `` clickhouse `` com. +Keep an eye out for upcoming meetups and eventsaround the world. Somewhere else you want us to be? Please feel free to reach out to tyler `` clickhouse `` com. You can also peruse [ClickHouse Events](https://clickhouse.com/company/news-events) for a list of all upcoming trainings, meetups, speaking engagements, etc. + +* [ClickHouse Meetup in Bellevue](https://www.meetup.com/clickhouse-seattle-user-group/events/298650371/) - Mar 11 +* [ClickHouse Meetup at Ramp's Offices in NYC](https://www.meetup.com/clickhouse-new-york-user-group/events/298640542/) - Mar 19 +* [ClickHouse Melbourne Meetup](https://www.meetup.com/clickhouse-australia-user-group/events/299479750/) - Mar 20 +* [ClickHouse Meetup in Paris](https://www.meetup.com/clickhouse-france-user-group/events/298997115/) - Mar 21 +* [ClickHouse Meetup in Bengaluru](https://www.meetup.com/clickhouse-bangalore-user-group/events/299479850/) - Mar 23 +* [ClickHouse Meetup in Zurich](https://www.meetup.com/clickhouse-switzerland-meetup-group/events/299628922/) - Apr 16 +* [ClickHouse Meetup in Copenhagen](https://www.meetup.com/clickhouse-denmark-meetup-group/events/299629133/) - Apr 23 +* [ClickHouse Meetup in Dubai](https://www.meetup.com/clickhouse-dubai-meetup-group/events/299629189/) - May 28 + ## Recent Recordings * **Recent Meetup Videos**: [Meetup Playlist](https://www.youtube.com/playlist?list=PL0Z2YDlm0b3iNDUzpY1S3L_iV4nARda_U) Whenever possible recordings of the ClickHouse Community Meetups are edited and presented as individual talks. Current featuring "Modern SQL in 2023", "Fast, Concurrent, and Consistent Asynchronous INSERTS in ClickHouse", and "Full-Text Indices: Design and Experiments" -* **Recording available**: [**v24.1 Release Webinar**](https://www.youtube.com/watch?v=pBF9g0wGAGs) All the features of 24.1, one convenient video! Watch it now! -* **All release webinar recordings**: [YouTube playlist](https://www.youtube.com/playlist?list=PL0Z2YDlm0b3jAlSy1JxyP8zluvXaN3nxU) - +* **Recording available**: [**v24.2 Release Call**](https://www.youtube.com/watch?v=iN2y-TK8f3A) All the features of 24.2, one convenient video! Watch it now! ## Interested in joining ClickHouse and making it your full-time job? diff --git a/SECURITY.md b/SECURITY.md index 79ca0269838..86578b188d8 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -13,9 +13,10 @@ The following versions of ClickHouse server are currently being supported with s | Version | Supported | |:-|:-| +| 24.2 | âœ”ï¸ | | 24.1 | âœ”ï¸ | | 23.12 | âœ”ï¸ | -| 23.11 | âœ”ï¸ | +| 23.11 | ⌠| | 23.10 | ⌠| | 23.9 | ⌠| | 23.8 | âœ”ï¸ | diff --git a/base/base/CMakeLists.txt b/base/base/CMakeLists.txt index 025687d2c59..610877eae73 100644 --- a/base/base/CMakeLists.txt +++ b/base/base/CMakeLists.txt @@ -10,8 +10,10 @@ set (CMAKE_CXX_STANDARD 20) set (SRCS argsToConfig.cpp + cgroupsv2.cpp coverage.cpp demangle.cpp + Decimal.cpp getAvailableMemoryAmount.cpp getFQDNOrHostName.cpp getMemoryAmount.cpp diff --git a/base/base/Decimal.cpp b/base/base/Decimal.cpp new file mode 100644 index 00000000000..7e65c0eb8d1 --- /dev/null +++ b/base/base/Decimal.cpp @@ -0,0 +1,87 @@ +#include +#include + +namespace DB +{ + +/// Explicit template instantiations. + +#define FOR_EACH_UNDERLYING_DECIMAL_TYPE(M) \ + M(Int32) \ + M(Int64) \ + M(Int128) \ + M(Int256) + +#define FOR_EACH_UNDERLYING_DECIMAL_TYPE_PASS(M, X) \ + M(Int32, X) \ + M(Int64, X) \ + M(Int128, X) \ + M(Int256, X) + +template const Decimal & Decimal::operator += (const T & x) { value += x; return *this; } +template const Decimal & Decimal::operator -= (const T & x) { value -= x; return *this; } +template const Decimal & Decimal::operator *= (const T & x) { value *= x; return *this; } +template const Decimal & Decimal::operator /= (const T & x) { value /= x; return *this; } +template const Decimal & Decimal::operator %= (const T & x) { value %= x; return *this; } + +template void NO_SANITIZE_UNDEFINED Decimal::addOverflow(const T & x) { value += x; } + +/// Maybe this explicit instantiation affects performance since operators cannot be inlined. + +template template const Decimal & Decimal::operator += (const Decimal & x) { value += static_cast(x.value); return *this; } +template template const Decimal & Decimal::operator -= (const Decimal & x) { value -= static_cast(x.value); return *this; } +template template const Decimal & Decimal::operator *= (const Decimal & x) { value *= static_cast(x.value); return *this; } +template template const Decimal & Decimal::operator /= (const Decimal & x) { value /= static_cast(x.value); return *this; } +template template const Decimal & Decimal::operator %= (const Decimal & x) { value %= static_cast(x.value); return *this; } + +#define DISPATCH(TYPE_T, TYPE_U) \ + template const Decimal & Decimal::operator += (const Decimal & x); \ + template const Decimal & Decimal::operator -= (const Decimal & x); \ + template const Decimal & Decimal::operator *= (const Decimal & x); \ + template const Decimal & Decimal::operator /= (const Decimal & x); \ + template const Decimal & Decimal::operator %= (const Decimal & x); +#define INVOKE(X) FOR_EACH_UNDERLYING_DECIMAL_TYPE_PASS(DISPATCH, X) +FOR_EACH_UNDERLYING_DECIMAL_TYPE(INVOKE); +#undef INVOKE +#undef DISPATCH + +#define DISPATCH(TYPE) template struct Decimal; +FOR_EACH_UNDERLYING_DECIMAL_TYPE(DISPATCH) +#undef DISPATCH + +template bool operator< (const Decimal & x, const Decimal & y) { return x.value < y.value; } +template bool operator> (const Decimal & x, const Decimal & y) { return x.value > y.value; } +template bool operator<= (const Decimal & x, const Decimal & y) { return x.value <= y.value; } +template bool operator>= (const Decimal & x, const Decimal & y) { return x.value >= y.value; } +template bool operator== (const Decimal & x, const Decimal & y) { return x.value == y.value; } +template bool operator!= (const Decimal & x, const Decimal & y) { return x.value != y.value; } + +#define DISPATCH(TYPE) \ +template bool operator< (const Decimal & x, const Decimal & y); \ +template bool operator> (const Decimal & x, const Decimal & y); \ +template bool operator<= (const Decimal & x, const Decimal & y); \ +template bool operator>= (const Decimal & x, const Decimal & y); \ +template bool operator== (const Decimal & x, const Decimal & y); \ +template bool operator!= (const Decimal & x, const Decimal & y); +FOR_EACH_UNDERLYING_DECIMAL_TYPE(DISPATCH) +#undef DISPATCH + + +template Decimal operator+ (const Decimal & x, const Decimal & y) { return x.value + y.value; } +template Decimal operator- (const Decimal & x, const Decimal & y) { return x.value - y.value; } +template Decimal operator* (const Decimal & x, const Decimal & y) { return x.value * y.value; } +template Decimal operator/ (const Decimal & x, const Decimal & y) { return x.value / y.value; } +template Decimal operator- (const Decimal & x) { return -x.value; } + +#define DISPATCH(TYPE) \ +template Decimal operator+ (const Decimal & x, const Decimal & y); \ +template Decimal operator- (const Decimal & x, const Decimal & y); \ +template Decimal operator* (const Decimal & x, const Decimal & y); \ +template Decimal operator/ (const Decimal & x, const Decimal & y); \ +template Decimal operator- (const Decimal & x); +FOR_EACH_UNDERLYING_DECIMAL_TYPE(DISPATCH) +#undef DISPATCH + +#undef FOR_EACH_UNDERLYING_DECIMAL_TYPE_PASS +#undef FOR_EACH_UNDERLYING_DECIMAL_TYPE +} diff --git a/base/base/Decimal.h b/base/base/Decimal.h index afa186faf5b..42f9e67c49d 100644 --- a/base/base/Decimal.h +++ b/base/base/Decimal.h @@ -1,20 +1,28 @@ #pragma once + #include #include +#include +#include -#if !defined(NO_SANITIZE_UNDEFINED) -#if defined(__clang__) - #define NO_SANITIZE_UNDEFINED __attribute__((__no_sanitize__("undefined"))) -#else - #define NO_SANITIZE_UNDEFINED -#endif -#endif namespace DB { template struct Decimal; class DateTime64; +#define FOR_EACH_UNDERLYING_DECIMAL_TYPE(M) \ + M(Int32) \ + M(Int64) \ + M(Int128) \ + M(Int256) + +#define FOR_EACH_UNDERLYING_DECIMAL_TYPE_PASS(M, X) \ + M(Int32, X) \ + M(Int64, X) \ + M(Int128, X) \ + M(Int256, X) + using Decimal32 = Decimal; using Decimal64 = Decimal; using Decimal128 = Decimal; @@ -55,36 +63,73 @@ struct Decimal return static_cast(value); } - const Decimal & operator += (const T & x) { value += x; return *this; } - const Decimal & operator -= (const T & x) { value -= x; return *this; } - const Decimal & operator *= (const T & x) { value *= x; return *this; } - const Decimal & operator /= (const T & x) { value /= x; return *this; } - const Decimal & operator %= (const T & x) { value %= x; return *this; } + const Decimal & operator += (const T & x); + const Decimal & operator -= (const T & x); + const Decimal & operator *= (const T & x); + const Decimal & operator /= (const T & x); + const Decimal & operator %= (const T & x); - template const Decimal & operator += (const Decimal & x) { value += x.value; return *this; } - template const Decimal & operator -= (const Decimal & x) { value -= x.value; return *this; } - template const Decimal & operator *= (const Decimal & x) { value *= x.value; return *this; } - template const Decimal & operator /= (const Decimal & x) { value /= x.value; return *this; } - template const Decimal & operator %= (const Decimal & x) { value %= x.value; return *this; } + template const Decimal & operator += (const Decimal & x); + template const Decimal & operator -= (const Decimal & x); + template const Decimal & operator *= (const Decimal & x); + template const Decimal & operator /= (const Decimal & x); + template const Decimal & operator %= (const Decimal & x); /// This is to avoid UB for sumWithOverflow() - void NO_SANITIZE_UNDEFINED addOverflow(const T & x) { value += x; } + void NO_SANITIZE_UNDEFINED addOverflow(const T & x); T value; }; -template inline bool operator< (const Decimal & x, const Decimal & y) { return x.value < y.value; } -template inline bool operator> (const Decimal & x, const Decimal & y) { return x.value > y.value; } -template inline bool operator<= (const Decimal & x, const Decimal & y) { return x.value <= y.value; } -template inline bool operator>= (const Decimal & x, const Decimal & y) { return x.value >= y.value; } -template inline bool operator== (const Decimal & x, const Decimal & y) { return x.value == y.value; } -template inline bool operator!= (const Decimal & x, const Decimal & y) { return x.value != y.value; } +#define DISPATCH(TYPE) extern template struct Decimal; +FOR_EACH_UNDERLYING_DECIMAL_TYPE(DISPATCH) +#undef DISPATCH -template inline Decimal operator+ (const Decimal & x, const Decimal & y) { return x.value + y.value; } -template inline Decimal operator- (const Decimal & x, const Decimal & y) { return x.value - y.value; } -template inline Decimal operator* (const Decimal & x, const Decimal & y) { return x.value * y.value; } -template inline Decimal operator/ (const Decimal & x, const Decimal & y) { return x.value / y.value; } -template inline Decimal operator- (const Decimal & x) { return -x.value; } +#define DISPATCH(TYPE_T, TYPE_U) \ + extern template const Decimal & Decimal::operator += (const Decimal & x); \ + extern template const Decimal & Decimal::operator -= (const Decimal & x); \ + extern template const Decimal & Decimal::operator *= (const Decimal & x); \ + extern template const Decimal & Decimal::operator /= (const Decimal & x); \ + extern template const Decimal & Decimal::operator %= (const Decimal & x); +#define INVOKE(X) FOR_EACH_UNDERLYING_DECIMAL_TYPE_PASS(DISPATCH, X) +FOR_EACH_UNDERLYING_DECIMAL_TYPE(INVOKE); +#undef INVOKE +#undef DISPATCH + +template bool operator< (const Decimal & x, const Decimal & y); +template bool operator> (const Decimal & x, const Decimal & y); +template bool operator<= (const Decimal & x, const Decimal & y); +template bool operator>= (const Decimal & x, const Decimal & y); +template bool operator== (const Decimal & x, const Decimal & y); +template bool operator!= (const Decimal & x, const Decimal & y); + +#define DISPATCH(TYPE) \ +extern template bool operator< (const Decimal & x, const Decimal & y); \ +extern template bool operator> (const Decimal & x, const Decimal & y); \ +extern template bool operator<= (const Decimal & x, const Decimal & y); \ +extern template bool operator>= (const Decimal & x, const Decimal & y); \ +extern template bool operator== (const Decimal & x, const Decimal & y); \ +extern template bool operator!= (const Decimal & x, const Decimal & y); +FOR_EACH_UNDERLYING_DECIMAL_TYPE(DISPATCH) +#undef DISPATCH + +template Decimal operator+ (const Decimal & x, const Decimal & y); +template Decimal operator- (const Decimal & x, const Decimal & y); +template Decimal operator* (const Decimal & x, const Decimal & y); +template Decimal operator/ (const Decimal & x, const Decimal & y); +template Decimal operator- (const Decimal & x); + +#define DISPATCH(TYPE) \ +extern template Decimal operator+ (const Decimal & x, const Decimal & y); \ +extern template Decimal operator- (const Decimal & x, const Decimal & y); \ +extern template Decimal operator* (const Decimal & x, const Decimal & y); \ +extern template Decimal operator/ (const Decimal & x, const Decimal & y); \ +extern template Decimal operator- (const Decimal & x); +FOR_EACH_UNDERLYING_DECIMAL_TYPE(DISPATCH) +#undef DISPATCH + +#undef FOR_EACH_UNDERLYING_DECIMAL_TYPE_PASS +#undef FOR_EACH_UNDERLYING_DECIMAL_TYPE /// Distinguishable type to allow function resolution/deduction based on value type, /// but also relatively easy to convert to/from Decimal64. diff --git a/base/base/Decimal_fwd.h b/base/base/Decimal_fwd.h index 589d6224917..beb228cea3c 100644 --- a/base/base/Decimal_fwd.h +++ b/base/base/Decimal_fwd.h @@ -1,6 +1,7 @@ #pragma once #include +#include namespace wide { @@ -44,3 +45,8 @@ concept is_over_big_int = || std::is_same_v || std::is_same_v; } + +template <> struct is_signed { static constexpr bool value = true; }; +template <> struct is_signed { static constexpr bool value = true; }; +template <> struct is_signed { static constexpr bool value = true; }; +template <> struct is_signed { static constexpr bool value = true; }; diff --git a/base/base/JSON.cpp b/base/base/JSON.cpp index 0b43be38149..9da059c98b6 100644 --- a/base/base/JSON.cpp +++ b/base/base/JSON.cpp @@ -10,14 +10,10 @@ #define JSON_MAX_DEPTH 100 -#ifdef __clang__ -# pragma clang diagnostic push -# pragma clang diagnostic ignored "-Wdeprecated-dynamic-exception-spec" -#endif +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wdeprecated-dynamic-exception-spec" POCO_IMPLEMENT_EXCEPTION(JSONException, Poco::Exception, "JSONException") // NOLINT(cert-err60-cpp, modernize-use-noexcept, hicpp-use-noexcept) -#ifdef __clang__ -# pragma clang diagnostic pop -#endif +#pragma clang diagnostic pop /// Read unsigned integer in a simple form from a non-0-terminated string. diff --git a/base/base/JSON.h b/base/base/JSON.h index 850b74715c6..bc053670a96 100644 --- a/base/base/JSON.h +++ b/base/base/JSON.h @@ -39,14 +39,10 @@ // NOLINTBEGIN(google-explicit-constructor) -#ifdef __clang__ -# pragma clang diagnostic push -# pragma clang diagnostic ignored "-Wdeprecated-dynamic-exception-spec" -#endif +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wdeprecated-dynamic-exception-spec" POCO_DECLARE_EXCEPTION(Foundation_API, JSONException, Poco::Exception) -#ifdef __clang__ -# pragma clang diagnostic pop -#endif +#pragma clang diagnostic pop // NOLINTEND(google-explicit-constructor) class JSON diff --git a/base/base/StringRef.h b/base/base/StringRef.h index 150cd81e33c..24af84626de 100644 --- a/base/base/StringRef.h +++ b/base/base/StringRef.h @@ -185,7 +185,8 @@ inline bool memequalWide(const char * p1, const char * p2, size_t size) { case 3: if (!compare8(p1 + 32, p2 + 32)) return false; [[fallthrough]]; case 2: if (!compare8(p1 + 16, p2 + 16)) return false; [[fallthrough]]; - case 1: if (!compare8(p1, p2)) return false; + case 1: if (!compare8(p1, p2)) return false; [[fallthrough]]; + default: ; } return compare8(p1 + size - 16, p2 + size - 16); diff --git a/base/base/cgroupsv2.cpp b/base/base/cgroupsv2.cpp new file mode 100644 index 00000000000..1686c6bd88c --- /dev/null +++ b/base/base/cgroupsv2.cpp @@ -0,0 +1,64 @@ +#include + +#include + +#include +#include + + +bool cgroupsV2Enabled() +{ +#if defined(OS_LINUX) + /// This file exists iff the host has cgroups v2 enabled. + auto controllers_file = default_cgroups_mount / "cgroup.controllers"; + if (!std::filesystem::exists(controllers_file)) + return false; + return true; +#else + return false; +#endif +} + +bool cgroupsV2MemoryControllerEnabled() +{ +#if defined(OS_LINUX) + chassert(cgroupsV2Enabled()); + /// According to https://docs.kernel.org/admin-guide/cgroup-v2.html: + /// - file 'cgroup.controllers' defines which controllers *can* be enabled + /// - file 'cgroup.subtree_control' defines which controllers *are* enabled + /// Caveat: nested groups may disable controllers. For simplicity, check only the top-level group. + std::ifstream subtree_control_file(default_cgroups_mount / "cgroup.subtree_control"); + if (!subtree_control_file.is_open()) + return false; + std::string controllers; + std::getline(subtree_control_file, controllers); + if (controllers.find("memory") == std::string::npos) + return false; + return true; +#else + return false; +#endif +} + +std::string cgroupV2OfProcess() +{ +#if defined(OS_LINUX) + chassert(cgroupsV2Enabled()); + /// All PIDs assigned to a cgroup are in /sys/fs/cgroups/{cgroup_name}/cgroup.procs + /// A simpler way to get the membership is: + std::ifstream cgroup_name_file("/proc/self/cgroup"); + if (!cgroup_name_file.is_open()) + return ""; + /// With cgroups v2, there will be a *single* line with prefix "0::/" + /// (see https://docs.kernel.org/admin-guide/cgroup-v2.html) + std::string cgroup; + std::getline(cgroup_name_file, cgroup); + static const std::string v2_prefix = "0::/"; + if (!cgroup.starts_with(v2_prefix)) + return ""; + cgroup = cgroup.substr(v2_prefix.length()); + return cgroup; +#else + return ""; +#endif +} diff --git a/base/base/cgroupsv2.h b/base/base/cgroupsv2.h new file mode 100644 index 00000000000..70219d87cd1 --- /dev/null +++ b/base/base/cgroupsv2.h @@ -0,0 +1,22 @@ +#pragma once + +#include +#include + +#if defined(OS_LINUX) +/// I think it is possible to mount the cgroups hierarchy somewhere else (e.g. when in containers). +/// /sys/fs/cgroup was still symlinked to the actual mount in the cases that I have seen. +static inline const std::filesystem::path default_cgroups_mount = "/sys/fs/cgroup"; +#endif + +/// Is cgroups v2 enabled on the system? +bool cgroupsV2Enabled(); + +/// Is the memory controller of cgroups v2 enabled on the system? +/// Assumes that cgroupsV2Enabled() is enabled. +bool cgroupsV2MemoryControllerEnabled(); + +/// Which cgroup does the process belong to? +/// Returns an empty string if the cgroup cannot be determined. +/// Assumes that cgroupsV2Enabled() is enabled. +std::string cgroupV2OfProcess(); diff --git a/base/base/coverage.cpp b/base/base/coverage.cpp index b85f1a16d32..99b897c4571 100644 --- a/base/base/coverage.cpp +++ b/base/base/coverage.cpp @@ -13,11 +13,7 @@ #include -# if defined(__clang__) extern "C" void __llvm_profile_dump(); // NOLINT -# elif defined(__GNUC__) || defined(__GNUG__) -extern "C" void __gcov_exit(); -# endif #endif @@ -28,12 +24,7 @@ void dumpCoverageReportIfPossible() static std::mutex mutex; std::lock_guard lock(mutex); -# if defined(__clang__) __llvm_profile_dump(); // NOLINT -# elif defined(__GNUC__) || defined(__GNUG__) - __gcov_exit(); -# endif - #endif } diff --git a/base/base/defines.h b/base/base/defines.h index 02058a29096..627c50c27d2 100644 --- a/base/base/defines.h +++ b/base/base/defines.h @@ -11,7 +11,7 @@ /// including /// - it should not have fallback to 0, /// since this may create false-positive detection (common problem) -#if defined(__clang__) && defined(__has_feature) +#if defined(__has_feature) # define ch_has_feature __has_feature #endif @@ -76,24 +76,11 @@ /// Explicitly allow undefined behaviour for certain functions. Use it as a function attribute. /// It is useful in case when compiler cannot see (and exploit) it, but UBSan can. /// Example: multiplication of signed integers with possibility of overflow when both sides are from user input. -#if defined(__clang__) -# define NO_SANITIZE_UNDEFINED __attribute__((__no_sanitize__("undefined"))) -# define NO_SANITIZE_ADDRESS __attribute__((__no_sanitize__("address"))) -# define NO_SANITIZE_THREAD __attribute__((__no_sanitize__("thread"))) -# define ALWAYS_INLINE_NO_SANITIZE_UNDEFINED __attribute__((__always_inline__, __no_sanitize__("undefined"))) -#else /// It does not work in GCC. GCC 7 cannot recognize this attribute and GCC 8 simply ignores it. -# define NO_SANITIZE_UNDEFINED -# define NO_SANITIZE_ADDRESS -# define NO_SANITIZE_THREAD -# define ALWAYS_INLINE_NO_SANITIZE_UNDEFINED ALWAYS_INLINE -#endif - -#if defined(__clang__) && defined(__clang_major__) && __clang_major__ >= 14 -# define DISABLE_SANITIZER_INSTRUMENTATION __attribute__((disable_sanitizer_instrumentation)) -#else -# define DISABLE_SANITIZER_INSTRUMENTATION -#endif - +#define NO_SANITIZE_UNDEFINED __attribute__((__no_sanitize__("undefined"))) +#define NO_SANITIZE_ADDRESS __attribute__((__no_sanitize__("address"))) +#define NO_SANITIZE_THREAD __attribute__((__no_sanitize__("thread"))) +#define ALWAYS_INLINE_NO_SANITIZE_UNDEFINED __attribute__((__always_inline__, __no_sanitize__("undefined"))) +#define DISABLE_SANITIZER_INSTRUMENTATION __attribute__((disable_sanitizer_instrumentation)) #if !__has_include() || !defined(ADDRESS_SANITIZER) # define ASAN_UNPOISON_MEMORY_REGION(a, b) @@ -121,68 +108,53 @@ { [[noreturn]] void abortOnFailedAssertion(const String & description); } - #define chassert(x) do { static_cast(x) ? void(0) : ::DB::abortOnFailedAssertion(#x); } while (0) + #define chassert_1(x, ...) do { static_cast(x) ? void(0) : ::DB::abortOnFailedAssertion(#x); } while (0) + #define chassert_2(x, comment, ...) do { static_cast(x) ? void(0) : ::DB::abortOnFailedAssertion(comment); } while (0) #define UNREACHABLE() abort() // clang-format off #else /// Here sizeof() trick is used to suppress unused warning for result, /// since simple "(void)x" will evaluate the expression, while /// "sizeof(!(x))" will not. - #define chassert(x) (void)sizeof(!(x)) + #define chassert_1(x, ...) (void)sizeof(!(x)) + #define chassert_2(x, comment, ...) (void)sizeof(!(x)) #define UNREACHABLE() __builtin_unreachable() #endif + #define CHASSERT_DISPATCH(_1,_2, N,...) N(_1, _2) + #define CHASSERT_INVOKE(tuple) CHASSERT_DISPATCH tuple + #define chassert(...) CHASSERT_INVOKE((__VA_ARGS__, chassert_2, chassert_1)) + #endif /// Macros for Clang Thread Safety Analysis (TSA). They can be safely ignored by other compilers. /// Feel free to extend, but please stay close to https://clang.llvm.org/docs/ThreadSafetyAnalysis.html#mutexheader -#if defined(__clang__) -# define TSA_GUARDED_BY(...) __attribute__((guarded_by(__VA_ARGS__))) /// data is protected by given capability -# define TSA_PT_GUARDED_BY(...) __attribute__((pt_guarded_by(__VA_ARGS__))) /// pointed-to data is protected by the given capability -# define TSA_REQUIRES(...) __attribute__((requires_capability(__VA_ARGS__))) /// thread needs exclusive possession of given capability -# define TSA_REQUIRES_SHARED(...) __attribute__((requires_shared_capability(__VA_ARGS__))) /// thread needs shared possession of given capability -# define TSA_ACQUIRED_AFTER(...) __attribute__((acquired_after(__VA_ARGS__))) /// annotated lock must be locked after given lock -# define TSA_NO_THREAD_SAFETY_ANALYSIS __attribute__((no_thread_safety_analysis)) /// disable TSA for a function -# define TSA_CAPABILITY(...) __attribute__((capability(__VA_ARGS__))) /// object of a class can be used as capability -# define TSA_ACQUIRE(...) __attribute__((acquire_capability(__VA_ARGS__))) /// function acquires a capability, but does not release it -# define TSA_TRY_ACQUIRE(...) __attribute__((try_acquire_capability(__VA_ARGS__))) /// function tries to acquire a capability and returns a boolean value indicating success or failure -# define TSA_RELEASE(...) __attribute__((release_capability(__VA_ARGS__))) /// function releases the given capability -# define TSA_ACQUIRE_SHARED(...) __attribute__((acquire_shared_capability(__VA_ARGS__))) /// function acquires a shared capability, but does not release it -# define TSA_TRY_ACQUIRE_SHARED(...) __attribute__((try_acquire_shared_capability(__VA_ARGS__))) /// function tries to acquire a shared capability and returns a boolean value indicating success or failure -# define TSA_RELEASE_SHARED(...) __attribute__((release_shared_capability(__VA_ARGS__))) /// function releases the given shared capability -# define TSA_SCOPED_LOCKABLE __attribute__((scoped_lockable)) /// object of a class has scoped lockable capability +#define TSA_GUARDED_BY(...) __attribute__((guarded_by(__VA_ARGS__))) /// data is protected by given capability +#define TSA_PT_GUARDED_BY(...) __attribute__((pt_guarded_by(__VA_ARGS__))) /// pointed-to data is protected by the given capability +#define TSA_REQUIRES(...) __attribute__((requires_capability(__VA_ARGS__))) /// thread needs exclusive possession of given capability +#define TSA_REQUIRES_SHARED(...) __attribute__((requires_shared_capability(__VA_ARGS__))) /// thread needs shared possession of given capability +#define TSA_ACQUIRED_AFTER(...) __attribute__((acquired_after(__VA_ARGS__))) /// annotated lock must be locked after given lock +#define TSA_NO_THREAD_SAFETY_ANALYSIS __attribute__((no_thread_safety_analysis)) /// disable TSA for a function +#define TSA_CAPABILITY(...) __attribute__((capability(__VA_ARGS__))) /// object of a class can be used as capability +#define TSA_ACQUIRE(...) __attribute__((acquire_capability(__VA_ARGS__))) /// function acquires a capability, but does not release it +#define TSA_TRY_ACQUIRE(...) __attribute__((try_acquire_capability(__VA_ARGS__))) /// function tries to acquire a capability and returns a boolean value indicating success or failure +#define TSA_RELEASE(...) __attribute__((release_capability(__VA_ARGS__))) /// function releases the given capability +#define TSA_ACQUIRE_SHARED(...) __attribute__((acquire_shared_capability(__VA_ARGS__))) /// function acquires a shared capability, but does not release it +#define TSA_TRY_ACQUIRE_SHARED(...) __attribute__((try_acquire_shared_capability(__VA_ARGS__))) /// function tries to acquire a shared capability and returns a boolean value indicating success or failure +#define TSA_RELEASE_SHARED(...) __attribute__((release_shared_capability(__VA_ARGS__))) /// function releases the given shared capability +#define TSA_SCOPED_LOCKABLE __attribute__((scoped_lockable)) /// object of a class has scoped lockable capability /// Macros for suppressing TSA warnings for specific reads/writes (instead of suppressing it for the whole function) /// They use a lambda function to apply function attribute to a single statement. This enable us to suppress warnings locally instead of /// suppressing them in the whole function /// Consider adding a comment when using these macros. -# define TSA_SUPPRESS_WARNING_FOR_READ(x) ([&]() TSA_NO_THREAD_SAFETY_ANALYSIS -> const auto & { return (x); }()) -# define TSA_SUPPRESS_WARNING_FOR_WRITE(x) ([&]() TSA_NO_THREAD_SAFETY_ANALYSIS -> auto & { return (x); }()) +#define TSA_SUPPRESS_WARNING_FOR_READ(x) ([&]() TSA_NO_THREAD_SAFETY_ANALYSIS -> const auto & { return (x); }()) +#define TSA_SUPPRESS_WARNING_FOR_WRITE(x) ([&]() TSA_NO_THREAD_SAFETY_ANALYSIS -> auto & { return (x); }()) /// This macro is useful when only one thread writes to a member /// and you want to read this member from the same thread without locking a mutex. /// It's safe (because no concurrent writes are possible), but TSA generates a warning. /// (Seems like there's no way to verify it, but it makes sense to distinguish it from TSA_SUPPRESS_WARNING_FOR_READ for readability) -# define TSA_READ_ONE_THREAD(x) TSA_SUPPRESS_WARNING_FOR_READ(x) - -#else -# define TSA_GUARDED_BY(...) -# define TSA_PT_GUARDED_BY(...) -# define TSA_REQUIRES(...) -# define TSA_REQUIRES_SHARED(...) -# define TSA_NO_THREAD_SAFETY_ANALYSIS -# define TSA_CAPABILITY(...) -# define TSA_ACQUIRE(...) -# define TSA_TRY_ACQUIRE(...) -# define TSA_RELEASE(...) -# define TSA_ACQUIRE_SHARED(...) -# define TSA_TRY_ACQUIRE_SHARED(...) -# define TSA_RELEASE_SHARED(...) -# define TSA_SCOPED_LOCKABLE - -# define TSA_SUPPRESS_WARNING_FOR_READ(x) (x) -# define TSA_SUPPRESS_WARNING_FOR_WRITE(x) (x) -# define TSA_READ_ONE_THREAD(x) TSA_SUPPRESS_WARNING_FOR_READ(x) -#endif +#define TSA_READ_ONE_THREAD(x) TSA_SUPPRESS_WARNING_FOR_READ(x) /// A template function for suppressing warnings about unused variables or function results. template diff --git a/base/base/extended_types.h b/base/base/extended_types.h index b58df45a97e..796167ab45d 100644 --- a/base/base/extended_types.h +++ b/base/base/extended_types.h @@ -64,6 +64,44 @@ template <> struct is_arithmetic { static constexpr bool value = true; template inline constexpr bool is_arithmetic_v = is_arithmetic::value; +#define FOR_EACH_ARITHMETIC_TYPE(M) \ + M(DataTypeDate) \ + M(DataTypeDate32) \ + M(DataTypeDateTime) \ + M(DataTypeInt8) \ + M(DataTypeUInt8) \ + M(DataTypeInt16) \ + M(DataTypeUInt16) \ + M(DataTypeInt32) \ + M(DataTypeUInt32) \ + M(DataTypeInt64) \ + M(DataTypeUInt64) \ + M(DataTypeInt128) \ + M(DataTypeUInt128) \ + M(DataTypeInt256) \ + M(DataTypeUInt256) \ + M(DataTypeFloat32) \ + M(DataTypeFloat64) + +#define FOR_EACH_ARITHMETIC_TYPE_PASS(M, X) \ + M(DataTypeDate, X) \ + M(DataTypeDate32, X) \ + M(DataTypeDateTime, X) \ + M(DataTypeInt8, X) \ + M(DataTypeUInt8, X) \ + M(DataTypeInt16, X) \ + M(DataTypeUInt16, X) \ + M(DataTypeInt32, X) \ + M(DataTypeUInt32, X) \ + M(DataTypeInt64, X) \ + M(DataTypeUInt64, X) \ + M(DataTypeInt128, X) \ + M(DataTypeUInt128, X) \ + M(DataTypeInt256, X) \ + M(DataTypeUInt256, X) \ + M(DataTypeFloat32, X) \ + M(DataTypeFloat64, X) + template struct make_unsigned // NOLINT(readability-identifier-naming) { diff --git a/base/base/getMemoryAmount.cpp b/base/base/getMemoryAmount.cpp index ccdc0f0f976..0311238caed 100644 --- a/base/base/getMemoryAmount.cpp +++ b/base/base/getMemoryAmount.cpp @@ -1,17 +1,14 @@ #include +#include #include #include -#include #include #include #include #include -#if defined(BSD) -#include -#endif namespace @@ -20,49 +17,14 @@ namespace std::optional getCgroupsV2MemoryLimit() { #if defined(OS_LINUX) - const std::filesystem::path default_cgroups_mount = "/sys/fs/cgroup"; - - /// This file exists iff the host has cgroups v2 enabled. - std::ifstream controllers_file(default_cgroups_mount / "cgroup.controllers"); - if (!controllers_file.is_open()) + if (!cgroupsV2Enabled()) return {}; - /// Make sure that the memory controller is enabled. - /// - cgroup.controllers defines which controllers *can* be enabled. - /// - cgroup.subtree_control defines which controllers *are* enabled. - /// (see https://docs.kernel.org/admin-guide/cgroup-v2.html) - /// Caveat: nested groups may disable controllers. For simplicity, check only the top-level group. - /// ReadBufferFromFile subtree_control_file(default_cgroups_mount / "cgroup.subtree_control"); - /// std::string subtree_control; - /// readString(subtree_control, subtree_control_file); - /// if (subtree_control.find("memory") == std::string::npos) - /// return {}; - std::ifstream subtree_control_file(default_cgroups_mount / "cgroup.subtree_control"); - std::stringstream subtree_control_buf; - subtree_control_buf << subtree_control_file.rdbuf(); - std::string subtree_control = subtree_control_buf.str(); - if (subtree_control.find("memory") == std::string::npos) + if (!cgroupsV2MemoryControllerEnabled()) return {}; - /// Identify the cgroup the process belongs to - /// All PIDs assigned to a cgroup are in /sys/fs/cgroups/{cgroup_name}/cgroup.procs - /// A simpler way to get the membership is: - std::ifstream cgroup_name_file("/proc/self/cgroup"); - if (!cgroup_name_file.is_open()) - return {}; - - std::stringstream cgroup_name_buf; - cgroup_name_buf << cgroup_name_file.rdbuf(); - std::string cgroup_name = cgroup_name_buf.str(); - if (!cgroup_name.empty() && cgroup_name.back() == '\n') - cgroup_name.pop_back(); /// remove trailing newline, if any - /// With cgroups v2, there will be a *single* line with prefix "0::/" - const std::string v2_prefix = "0::/"; - if (!cgroup_name.starts_with(v2_prefix)) - return {}; - cgroup_name = cgroup_name.substr(v2_prefix.length()); - - std::filesystem::path current_cgroup = cgroup_name.empty() ? default_cgroups_mount : (default_cgroups_mount / cgroup_name); + std::string cgroup = cgroupV2OfProcess(); + auto current_cgroup = cgroup.empty() ? default_cgroups_mount : (default_cgroups_mount / cgroup); /// Open the bottom-most nested memory limit setting file. If there is no such file at the current /// level, try again at the parent level as memory settings are inherited. diff --git a/base/base/iostream_debug_helpers.h b/base/base/iostream_debug_helpers.h index f531a56031b..5c601251272 100644 --- a/base/base/iostream_debug_helpers.h +++ b/base/base/iostream_debug_helpers.h @@ -155,9 +155,7 @@ Out & dump(Out & out, const char * name, T && x) // NOLINT(cppcoreguidelines-mis return dumpValue(out, x) << "; "; } -#ifdef __clang__ #pragma clang diagnostic ignored "-Wgnu-zero-variadic-macro-arguments" -#endif #define DUMPVAR(VAR) ::dump(std::cerr, #VAR, (VAR)); #define DUMPHEAD std::cerr << __FILE__ << ':' << __LINE__ << " [ " << getThreadId() << " ] "; diff --git a/base/base/phdr_cache.cpp b/base/base/phdr_cache.cpp index 7d37f01b560..802d1bf35f5 100644 --- a/base/base/phdr_cache.cpp +++ b/base/base/phdr_cache.cpp @@ -11,10 +11,8 @@ /// Thread Sanitizer uses dl_iterate_phdr function on initialization and fails if we provide our own. #ifdef USE_PHDR_CACHE -#if defined(__clang__) -# pragma clang diagnostic ignored "-Wreserved-id-macro" -# pragma clang diagnostic ignored "-Wunused-macros" -#endif +#pragma clang diagnostic ignored "-Wreserved-id-macro" +#pragma clang diagnostic ignored "-Wunused-macros" #define __msan_unpoison(X, Y) // NOLINT #if defined(ch_has_feature) @@ -57,10 +55,6 @@ std::atomic phdr_cache {}; extern "C" -#ifndef __clang__ -[[gnu::visibility("default")]] -[[gnu::externally_visible]] -#endif int dl_iterate_phdr(int (*callback) (dl_phdr_info * info, size_t size, void * data), void * data) { auto * current_phdr_cache = phdr_cache.load(); diff --git a/base/glibc-compatibility/musl/aarch64/syscall.s b/base/glibc-compatibility/musl/aarch64/syscall.s index 845986bf787..aadaea04ef5 100644 --- a/base/glibc-compatibility/musl/aarch64/syscall.s +++ b/base/glibc-compatibility/musl/aarch64/syscall.s @@ -2,6 +2,7 @@ .hidden __syscall .type __syscall,%function __syscall: +.cfi_startproc uxtw x8,w0 mov x0,x1 mov x1,x2 @@ -12,3 +13,4 @@ __syscall: mov x6,x7 svc 0 ret +.cfi_endproc diff --git a/base/glibc-compatibility/musl/getauxval.c b/base/glibc-compatibility/musl/getauxval.c index 44a9f979f99..ea5cff9fc11 100644 --- a/base/glibc-compatibility/musl/getauxval.c +++ b/base/glibc-compatibility/musl/getauxval.c @@ -20,11 +20,7 @@ /// Suppress TSan since it is possible for this code to be called from multiple threads, /// and initialization is safe to be done multiple times from multiple threads. -#if defined(__clang__) -# define NO_SANITIZE_THREAD __attribute__((__no_sanitize__("thread"))) -#else -# define NO_SANITIZE_THREAD -#endif +#define NO_SANITIZE_THREAD __attribute__((__no_sanitize__("thread"))) // We don't have libc struct available here. // Compute aux vector manually (from /proc/self/auxv). diff --git a/base/harmful/harmful.c b/base/harmful/harmful.c index 78796ca0c05..54b552a84ea 100644 --- a/base/harmful/harmful.c +++ b/base/harmful/harmful.c @@ -6,11 +6,7 @@ /// It is only enabled in debug build (its intended use is for CI checks). #if !defined(NDEBUG) -#if defined(__clang__) - #pragma clang diagnostic ignored "-Wincompatible-library-redeclaration" -#else - #pragma GCC diagnostic ignored "-Wbuiltin-declaration-mismatch" -#endif +#pragma clang diagnostic ignored "-Wincompatible-library-redeclaration" /// We cannot use libc headers here. long write(int, const void *, unsigned long); diff --git a/base/poco/Foundation/include/Poco/LinearHashTable.h b/base/poco/Foundation/include/Poco/LinearHashTable.h index 0464ecb1bc4..d91f75d1d1f 100644 --- a/base/poco/Foundation/include/Poco/LinearHashTable.h +++ b/base/poco/Foundation/include/Poco/LinearHashTable.h @@ -68,7 +68,7 @@ public: typedef typename Bucket::iterator BucketIterator; typedef typename BucketVec::iterator BucketVecIterator; - class ConstIterator : public std::iterator + class ConstIterator { public: ConstIterator() : _initialized(false) { } diff --git a/base/poco/Net/include/Poco/Net/HTTPChunkedStream.h b/base/poco/Net/include/Poco/Net/HTTPChunkedStream.h index 5f4729c9278..a6576aa561d 100644 --- a/base/poco/Net/include/Poco/Net/HTTPChunkedStream.h +++ b/base/poco/Net/include/Poco/Net/HTTPChunkedStream.h @@ -45,6 +45,8 @@ namespace Net ~HTTPChunkedStreamBuf(); void close(); + bool isComplete() const { return _chunk == std::char_traits::eof(); } + protected: int readFromDevice(char * buffer, std::streamsize length); int writeToDevice(const char * buffer, std::streamsize length); @@ -68,6 +70,8 @@ namespace Net ~HTTPChunkedIOS(); HTTPChunkedStreamBuf * rdbuf(); + bool isComplete() const { return _buf.isComplete(); } + protected: HTTPChunkedStreamBuf _buf; }; diff --git a/base/poco/Net/include/Poco/Net/HTTPClientSession.h b/base/poco/Net/include/Poco/Net/HTTPClientSession.h index 7c0caa1c18b..1cef988566c 100644 --- a/base/poco/Net/include/Poco/Net/HTTPClientSession.h +++ b/base/poco/Net/include/Poco/Net/HTTPClientSession.h @@ -210,7 +210,7 @@ namespace Net void setKeepAliveTimeout(const Poco::Timespan & timeout); /// Sets the connection timeout for HTTP connections. - const Poco::Timespan & getKeepAliveTimeout() const; + Poco::Timespan getKeepAliveTimeout() const; /// Returns the connection timeout for HTTP connections. virtual std::ostream & sendRequest(HTTPRequest & request); @@ -275,7 +275,7 @@ namespace Net /// This method should only be called if the request contains /// a "Expect: 100-continue" header. - void flushRequest(); + virtual void flushRequest(); /// Flushes the request stream. /// /// Normally this method does not need to be called. @@ -283,7 +283,7 @@ namespace Net /// fully sent if receiveResponse() is not called, e.g., /// because the underlying socket will be detached. - void reset(); + virtual void reset(); /// Resets the session and closes the socket. /// /// The next request will initiate a new connection, @@ -303,6 +303,9 @@ namespace Net /// Returns true if the proxy should be bypassed /// for the current host. + const Poco::Timestamp & getLastRequest() const; + /// Returns time when connection has been used last time + protected: enum { @@ -338,6 +341,10 @@ namespace Net /// Calls proxyConnect() and attaches the resulting StreamSocket /// to the HTTPClientSession. + void setLastRequest(Poco::Timestamp time); + + void assign(HTTPClientSession & session); + HTTPSessionFactory _proxySessionFactory; /// Factory to create HTTPClientSession to proxy. private: @@ -433,11 +440,20 @@ namespace Net } - inline const Poco::Timespan & HTTPClientSession::getKeepAliveTimeout() const + inline Poco::Timespan HTTPClientSession::getKeepAliveTimeout() const { return _keepAliveTimeout; } + inline const Poco::Timestamp & HTTPClientSession::getLastRequest() const + { + return _lastRequest; + } + + inline void HTTPClientSession::setLastRequest(Poco::Timestamp time) + { + _lastRequest = time; + } } } // namespace Poco::Net diff --git a/base/poco/Net/include/Poco/Net/HTTPFixedLengthStream.h b/base/poco/Net/include/Poco/Net/HTTPFixedLengthStream.h index 2f4df102605..17fa47cfa9b 100644 --- a/base/poco/Net/include/Poco/Net/HTTPFixedLengthStream.h +++ b/base/poco/Net/include/Poco/Net/HTTPFixedLengthStream.h @@ -48,6 +48,8 @@ namespace Net HTTPFixedLengthStreamBuf(HTTPSession & session, ContentLength length, openmode mode); ~HTTPFixedLengthStreamBuf(); + bool isComplete() const; + protected: int readFromDevice(char * buffer, std::streamsize length); int writeToDevice(const char * buffer, std::streamsize length); @@ -67,6 +69,8 @@ namespace Net ~HTTPFixedLengthIOS(); HTTPFixedLengthStreamBuf * rdbuf(); + bool isComplete() const { return _buf.isComplete(); } + protected: HTTPFixedLengthStreamBuf _buf; }; diff --git a/base/poco/Net/include/Poco/Net/HTTPSession.h b/base/poco/Net/include/Poco/Net/HTTPSession.h index 934b34be5d5..cac14f479db 100644 --- a/base/poco/Net/include/Poco/Net/HTTPSession.h +++ b/base/poco/Net/include/Poco/Net/HTTPSession.h @@ -64,6 +64,15 @@ namespace Net Poco::Timespan getTimeout() const; /// Returns the timeout for the HTTP session. + Poco::Timespan getConnectionTimeout() const; + /// Returns connection timeout for the HTTP session. + + Poco::Timespan getSendTimeout() const; + /// Returns send timeout for the HTTP session. + + Poco::Timespan getReceiveTimeout() const; + /// Returns receive timeout for the HTTP session. + bool connected() const; /// Returns true if the underlying socket is connected. @@ -217,12 +226,25 @@ namespace Net return _keepAlive; } - inline Poco::Timespan HTTPSession::getTimeout() const { return _receiveTimeout; } + inline Poco::Timespan HTTPSession::getConnectionTimeout() const + { + return _connectionTimeout; + } + + inline Poco::Timespan HTTPSession::getSendTimeout() const + { + return _sendTimeout; + } + + inline Poco::Timespan HTTPSession::getReceiveTimeout() const + { + return _receiveTimeout; + } inline StreamSocket & HTTPSession::socket() { diff --git a/base/poco/Net/include/Poco/Net/HTTPStream.h b/base/poco/Net/include/Poco/Net/HTTPStream.h index 48502347b2c..a00a861880f 100644 --- a/base/poco/Net/include/Poco/Net/HTTPStream.h +++ b/base/poco/Net/include/Poco/Net/HTTPStream.h @@ -63,6 +63,8 @@ namespace Net ~HTTPIOS(); HTTPStreamBuf * rdbuf(); + bool isComplete() const { return false; } + protected: HTTPStreamBuf _buf; }; diff --git a/base/poco/Net/src/HTTPChunkedStream.cpp b/base/poco/Net/src/HTTPChunkedStream.cpp index 376e3f55492..16ed1e71c31 100644 --- a/base/poco/Net/src/HTTPChunkedStream.cpp +++ b/base/poco/Net/src/HTTPChunkedStream.cpp @@ -49,10 +49,12 @@ HTTPChunkedStreamBuf::~HTTPChunkedStreamBuf() void HTTPChunkedStreamBuf::close() { - if (_mode & std::ios::out) + if (_mode & std::ios::out && _chunk != std::char_traits::eof()) { sync(); _session.write("0\r\n\r\n", 5); + + _chunk = std::char_traits::eof(); } } diff --git a/base/poco/Net/src/HTTPClientSession.cpp b/base/poco/Net/src/HTTPClientSession.cpp index 2282cca682b..33a3dcc4901 100644 --- a/base/poco/Net/src/HTTPClientSession.cpp +++ b/base/poco/Net/src/HTTPClientSession.cpp @@ -227,7 +227,7 @@ void HTTPClientSession::setKeepAliveTimeout(const Poco::Timespan& timeout) std::ostream& HTTPClientSession::sendRequest(HTTPRequest& request) { _pRequestStream = 0; - _pResponseStream = 0; + _pResponseStream = 0; clearException(); _responseReceived = false; @@ -501,5 +501,26 @@ bool HTTPClientSession::bypassProxy() const else return false; } +void HTTPClientSession::assign(Poco::Net::HTTPClientSession & session) +{ + poco_assert (this != &session); + + if (session.buffered()) + throw Poco::LogicException("assign a session with not empty buffered data"); + + if (buffered()) + throw Poco::LogicException("assign to a session with not empty buffered data"); + + attachSocket(session.detachSocket()); + setLastRequest(session.getLastRequest()); + setResolvedHost(session.getResolvedHost()); + setKeepAlive(session.getKeepAlive()); + + setTimeout(session.getConnectionTimeout(), session.getSendTimeout(), session.getReceiveTimeout()); + setKeepAliveTimeout(session.getKeepAliveTimeout()); + setProxyConfig(session.getProxyConfig()); + + session.reset(); +} } } // namespace Poco::Net diff --git a/base/poco/Net/src/HTTPFixedLengthStream.cpp b/base/poco/Net/src/HTTPFixedLengthStream.cpp index fd77ff71cd9..837e5723c57 100644 --- a/base/poco/Net/src/HTTPFixedLengthStream.cpp +++ b/base/poco/Net/src/HTTPFixedLengthStream.cpp @@ -43,6 +43,12 @@ HTTPFixedLengthStreamBuf::~HTTPFixedLengthStreamBuf() } +bool HTTPFixedLengthStreamBuf::isComplete() const +{ + return _count == _length; +} + + int HTTPFixedLengthStreamBuf::readFromDevice(char* buffer, std::streamsize length) { int n = 0; diff --git a/base/poco/Net/src/TCPServerDispatcher.cpp b/base/poco/Net/src/TCPServerDispatcher.cpp index 20a1ffe1b4f..7f9f9a20ee7 100644 --- a/base/poco/Net/src/TCPServerDispatcher.cpp +++ b/base/poco/Net/src/TCPServerDispatcher.cpp @@ -93,7 +93,7 @@ void TCPServerDispatcher::release() void TCPServerDispatcher::run() { - AutoPtr guard(this, true); // ensure object stays alive + AutoPtr guard(this); // ensure object stays alive int idleTime = (int) _pParams->getThreadIdleTime().totalMilliseconds(); @@ -149,11 +149,13 @@ void TCPServerDispatcher::enqueue(const StreamSocket& socket) { try { + this->duplicate(); _threadPool.startWithPriority(_pParams->getThreadPriority(), *this, threadName); ++_currentThreads; } catch (Poco::Exception& exc) { + this->release(); ++_refusedConnections; std::cerr << "Got exception while starting thread for connection. Error code: " << exc.code() << ", message: '" << exc.displayText() << "'" << std::endl; diff --git a/cmake/autogenerated_versions.txt b/cmake/autogenerated_versions.txt index 885080a3e38..2929c64ded8 100644 --- a/cmake/autogenerated_versions.txt +++ b/cmake/autogenerated_versions.txt @@ -2,11 +2,11 @@ # NOTE: has nothing common with DBMS_TCP_PROTOCOL_VERSION, # only DBMS_TCP_PROTOCOL_VERSION should be incremented on protocol changes. -SET(VERSION_REVISION 54483) +SET(VERSION_REVISION 54484) SET(VERSION_MAJOR 24) -SET(VERSION_MINOR 2) +SET(VERSION_MINOR 3) SET(VERSION_PATCH 1) -SET(VERSION_GITHASH 5a024dfc0936e062770d0cfaad0805b57c1fba17) -SET(VERSION_DESCRIBE v24.2.1.1-testing) -SET(VERSION_STRING 24.2.1.1) +SET(VERSION_GITHASH 891689a41506d00aa169548f5b4a8774351242c4) +SET(VERSION_DESCRIBE v24.3.1.1-testing) +SET(VERSION_STRING 24.3.1.1) # end of autochange diff --git a/cmake/warnings.cmake b/cmake/warnings.cmake index 00fa32a6b7f..455e4f09939 100644 --- a/cmake/warnings.cmake +++ b/cmake/warnings.cmake @@ -46,5 +46,6 @@ if (COMPILER_CLANG) no_warning(thread-safety-negative) # experimental flag, too many false positives no_warning(enum-constexpr-conversion) # breaks magic-enum library in clang-16 no_warning(unsafe-buffer-usage) # too aggressive + no_warning(switch-default) # conflicts with "defaults in a switch covering all enum values" # TODO Enable conversion, sign-conversion, double-promotion warnings. endif () diff --git a/contrib/NuRaft b/contrib/NuRaft index 1278e32bb0d..4a12f99dfc9 160000 --- a/contrib/NuRaft +++ b/contrib/NuRaft @@ -1 +1 @@ -Subproject commit 1278e32bb0d5dc489f947e002bdf8c71b0ddaa63 +Subproject commit 4a12f99dfc9d47c687ff7700b927cc76856225d1 diff --git a/contrib/aws b/contrib/aws index 9eb5097a0ab..32870e234ca 160000 --- a/contrib/aws +++ b/contrib/aws @@ -1 +1 @@ -Subproject commit 9eb5097a0abfa837722cca7a5114a25837817bf2 +Subproject commit 32870e234cac03e0ac46370c26858b0ffdf14200 diff --git a/contrib/cctz b/contrib/cctz index 8529bcef5cd..7918cb7afe8 160000 --- a/contrib/cctz +++ b/contrib/cctz @@ -1 +1 @@ -Subproject commit 8529bcef5cd996b7c0f4d7475286b76b5d126c4c +Subproject commit 7918cb7afe82e53428e39a045a437fdfd4f3df47 diff --git a/contrib/curl b/contrib/curl index 7161cb17c01..1a05e833f8f 160000 --- a/contrib/curl +++ b/contrib/curl @@ -1 +1 @@ -Subproject commit 7161cb17c01dcff1dc5bf89a18437d9d729f1ecd +Subproject commit 1a05e833f8f7140628b27882b10525fd9ec4b873 diff --git a/contrib/libarchive-cmake/CMakeLists.txt b/contrib/libarchive-cmake/CMakeLists.txt index cd5658b7086..e89770da5f6 100644 --- a/contrib/libarchive-cmake/CMakeLists.txt +++ b/contrib/libarchive-cmake/CMakeLists.txt @@ -157,7 +157,7 @@ if (TARGET ch_contrib::zlib) endif() if (TARGET ch_contrib::zstd) - target_compile_definitions(_libarchive PUBLIC HAVE_ZSTD_H=1 HAVE_LIBZSTD=1) + target_compile_definitions(_libarchive PUBLIC HAVE_ZSTD_H=1 HAVE_LIBZSTD=1 HAVE_LIBZSTD_COMPRESSOR=1) target_link_libraries(_libarchive PRIVATE ch_contrib::zstd) endif() diff --git a/contrib/libhdfs3 b/contrib/libhdfs3 index b9598e60167..0d04201c453 160000 --- a/contrib/libhdfs3 +++ b/contrib/libhdfs3 @@ -1 +1 @@ -Subproject commit b9598e6016720a7c088bfe85ce1fa0410f9d2103 +Subproject commit 0d04201c45359f0d0701fb1e8297d25eff7cfecf diff --git a/contrib/libmetrohash/src/metrohash128.h b/contrib/libmetrohash/src/metrohash128.h index 639a4fa97e3..2dbb6ca5a8a 100644 --- a/contrib/libmetrohash/src/metrohash128.h +++ b/contrib/libmetrohash/src/metrohash128.h @@ -25,21 +25,21 @@ public: static const uint32_t bits = 128; // Constructor initializes the same as Initialize() - MetroHash128(const uint64_t seed=0); - + explicit MetroHash128(const uint64_t seed=0); + // Initializes internal state for new hash with optional seed void Initialize(const uint64_t seed=0); - + // Update the hash state with a string of bytes. If the length // is sufficiently long, the implementation switches to a bulk // hashing algorithm directly on the argument buffer for speed. void Update(const uint8_t * buffer, const uint64_t length); - + // Constructs the final hash and writes it to the argument buffer. // After a hash is finalized, this instance must be Initialized()-ed // again or the behavior of Update() and Finalize() is undefined. void Finalize(uint8_t * const hash); - + // A non-incremental function implementation. This can be significantly // faster than the incremental implementation for some usage patterns. static void Hash(const uint8_t * buffer, const uint64_t length, uint8_t * const hash, const uint64_t seed=0); @@ -57,7 +57,7 @@ private: static const uint64_t k1 = 0x8648DBDB; static const uint64_t k2 = 0x7BDEC03B; static const uint64_t k3 = 0x2F5870A5; - + struct { uint64_t v[4]; } state; struct { uint8_t b[32]; } input; uint64_t bytes; diff --git a/contrib/libmetrohash/src/metrohash64.h b/contrib/libmetrohash/src/metrohash64.h index d58898b117d..911e54e6863 100644 --- a/contrib/libmetrohash/src/metrohash64.h +++ b/contrib/libmetrohash/src/metrohash64.h @@ -25,21 +25,21 @@ public: static const uint32_t bits = 64; // Constructor initializes the same as Initialize() - MetroHash64(const uint64_t seed=0); - + explicit MetroHash64(const uint64_t seed=0); + // Initializes internal state for new hash with optional seed void Initialize(const uint64_t seed=0); - + // Update the hash state with a string of bytes. If the length // is sufficiently long, the implementation switches to a bulk // hashing algorithm directly on the argument buffer for speed. void Update(const uint8_t * buffer, const uint64_t length); - + // Constructs the final hash and writes it to the argument buffer. // After a hash is finalized, this instance must be Initialized()-ed // again or the behavior of Update() and Finalize() is undefined. void Finalize(uint8_t * const hash); - + // A non-incremental function implementation. This can be significantly // faster than the incremental implementation for some usage patterns. static void Hash(const uint8_t * buffer, const uint64_t length, uint8_t * const hash, const uint64_t seed=0); @@ -57,7 +57,7 @@ private: static const uint64_t k1 = 0xA2AA033B; static const uint64_t k2 = 0x62992FC1; static const uint64_t k3 = 0x30BC5B29; - + struct { uint64_t v[4]; } state; struct { uint8_t b[32]; } input; uint64_t bytes; diff --git a/contrib/liburing b/contrib/liburing index f5a48392c4e..f4e42a515cd 160000 --- a/contrib/liburing +++ b/contrib/liburing @@ -1 +1 @@ -Subproject commit f5a48392c4ea33f222cbebeb2e2fc31620162949 +Subproject commit f4e42a515cd78c8c9cac2be14222834be5f8df2b diff --git a/contrib/lz4 b/contrib/lz4 index 92ebf1870b9..ce45a9dbdb0 160000 --- a/contrib/lz4 +++ b/contrib/lz4 @@ -1 +1 @@ -Subproject commit 92ebf1870b9acbefc0e7970409a181954a10ff40 +Subproject commit ce45a9dbdb059511a3e9576b19db3e7f1a4f172e diff --git a/contrib/qpl b/contrib/qpl index a61bdd845fd..d4715e0e798 160000 --- a/contrib/qpl +++ b/contrib/qpl @@ -1 +1 @@ -Subproject commit a61bdd845fd7ca363b2bcc55454aa520dfcd8298 +Subproject commit d4715e0e79896b85612158e135ee1a85f3b3e04d diff --git a/contrib/rapidjson b/contrib/rapidjson index c4ef90ccdbc..800ca2f38fc 160000 --- a/contrib/rapidjson +++ b/contrib/rapidjson @@ -1 +1 @@ -Subproject commit c4ef90ccdbc21d5d5a628d08316bfd301e32d6fa +Subproject commit 800ca2f38fc3b387271d9e1926fcfc9070222104 diff --git a/docker/images.json b/docker/images.json index 2bf1efe005f..7439517379b 100644 --- a/docker/images.json +++ b/docker/images.json @@ -1,8 +1,12 @@ { - "docker/packager/binary": { + "docker/packager/binary-builder": { "name": "clickhouse/binary-builder", "dependent": [] }, + "docker/packager/cctools": { + "name": "clickhouse/cctools", + "dependent": [] + }, "docker/test/compatibility/centos": { "name": "clickhouse/test-old-centos", "dependent": [] @@ -30,7 +34,6 @@ "docker/test/util": { "name": "clickhouse/test-util", "dependent": [ - "docker/packager/binary", "docker/test/base", "docker/test/fasttest" ] @@ -67,7 +70,9 @@ }, "docker/test/fasttest": { "name": "clickhouse/fasttest", - "dependent": [] + "dependent": [ + "docker/packager/binary-builder" + ] }, "docker/test/style": { "name": "clickhouse/style-test", diff --git a/docker/keeper/Dockerfile b/docker/keeper/Dockerfile index d39ca312454..2f42854a972 100644 --- a/docker/keeper/Dockerfile +++ b/docker/keeper/Dockerfile @@ -34,7 +34,7 @@ RUN arch=${TARGETARCH:-amd64} \ # lts / testing / prestable / etc ARG REPO_CHANNEL="stable" ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}" -ARG VERSION="24.1.5.6" +ARG VERSION="24.2.1.2248" ARG PACKAGES="clickhouse-keeper" ARG DIRECT_DOWNLOAD_URLS="" diff --git a/docker/packager/README.md b/docker/packager/README.md index e0b7f38ea58..3604e8585a4 100644 --- a/docker/packager/README.md +++ b/docker/packager/README.md @@ -28,7 +28,6 @@ lrwxrwxrwx 1 root root 10 clickhouse-benchmark -> clickhouse lrwxrwxrwx 1 root root 10 clickhouse-clang -> clickhouse lrwxrwxrwx 1 root root 10 clickhouse-client -> clickhouse lrwxrwxrwx 1 root root 10 clickhouse-compressor -> clickhouse -lrwxrwxrwx 1 root root 10 clickhouse-copier -> clickhouse lrwxrwxrwx 1 root root 10 clickhouse-extract-from-config -> clickhouse lrwxrwxrwx 1 root root 10 clickhouse-format -> clickhouse lrwxrwxrwx 1 root root 10 clickhouse-lld -> clickhouse diff --git a/docker/packager/binary/Dockerfile b/docker/packager/binary-builder/Dockerfile similarity index 64% rename from docker/packager/binary/Dockerfile rename to docker/packager/binary-builder/Dockerfile index e20cbe9781c..96c90403187 100644 --- a/docker/packager/binary/Dockerfile +++ b/docker/packager/binary-builder/Dockerfile @@ -1,43 +1,6 @@ # docker build -t clickhouse/binary-builder . ARG FROM_TAG=latest -FROM clickhouse/test-util:latest AS cctools -# The cctools are built always from the clickhouse/test-util:latest and cached inline -# Theoretically, it should improve rebuild speed significantly -ENV CC=clang-${LLVM_VERSION} -ENV CXX=clang++-${LLVM_VERSION} -# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! -# DO NOT PUT ANYTHING BEFORE THE NEXT TWO `RUN` DIRECTIVES -# THE MOST HEAVY OPERATION MUST BE THE FIRST IN THE CACHE -# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! -# libtapi is required to support .tbh format from recent MacOS SDKs -RUN git clone https://github.com/tpoechtrager/apple-libtapi.git \ - && cd apple-libtapi \ - && git checkout 15dfc2a8c9a2a89d06ff227560a69f5265b692f9 \ - && INSTALLPREFIX=/cctools ./build.sh \ - && ./install.sh \ - && cd .. \ - && rm -rf apple-libtapi - -# Build and install tools for cross-linking to Darwin (x86-64) -# Build and install tools for cross-linking to Darwin (aarch64) -RUN git clone https://github.com/tpoechtrager/cctools-port.git \ - && cd cctools-port/cctools \ - && git checkout 2a3e1c2a6ff54a30f898b70cfb9ba1692a55fad7 \ - && ./configure --prefix=/cctools --with-libtapi=/cctools \ - --target=x86_64-apple-darwin \ - && make install -j$(nproc) \ - && make clean \ - && ./configure --prefix=/cctools --with-libtapi=/cctools \ - --target=aarch64-apple-darwin \ - && make install -j$(nproc) \ - && cd ../.. \ - && rm -rf cctools-port - -# !!!!!!!!!!! -# END COMPILE -# !!!!!!!!!!! - -FROM clickhouse/test-util:$FROM_TAG +FROM clickhouse/fasttest:$FROM_TAG ENV CC=clang-${LLVM_VERSION} ENV CXX=clang++-${LLVM_VERSION} @@ -110,7 +73,8 @@ RUN curl -Lo /usr/bin/clang-tidy-cache \ "https://raw.githubusercontent.com/matus-chochlik/ctcache/$CLANG_TIDY_SHA1/clang-tidy-cache" \ && chmod +x /usr/bin/clang-tidy-cache -COPY --from=cctools /cctools /cctools +# If the cctools is updated, then first build it in the CI, then update here in a different commit +COPY --from=clickhouse/cctools:5a908f73878a /cctools /cctools RUN mkdir /workdir && chmod 777 /workdir WORKDIR /workdir diff --git a/docker/packager/binary/build.sh b/docker/packager/binary-builder/build.sh similarity index 100% rename from docker/packager/binary/build.sh rename to docker/packager/binary-builder/build.sh diff --git a/docker/packager/cctools/Dockerfile b/docker/packager/cctools/Dockerfile new file mode 100644 index 00000000000..1b8c675a5c5 --- /dev/null +++ b/docker/packager/cctools/Dockerfile @@ -0,0 +1,31 @@ +# This is a hack to significantly reduce the build time of the clickhouse/binary-builder +# It's based on the assumption that we don't care of the cctools version so much +# It event does not depend on the clickhouse/fasttest in the `docker/images.json` +ARG FROM_TAG=latest +FROM clickhouse/fasttest:$FROM_TAG + +ENV CC=clang-${LLVM_VERSION} +ENV CXX=clang++-${LLVM_VERSION} + +RUN git clone https://github.com/tpoechtrager/apple-libtapi.git \ + && cd apple-libtapi \ + && git checkout 15dfc2a8c9a2a89d06ff227560a69f5265b692f9 \ + && INSTALLPREFIX=/cctools ./build.sh \ + && ./install.sh \ + && cd .. \ + && rm -rf apple-libtapi + +# Build and install tools for cross-linking to Darwin (x86-64) +# Build and install tools for cross-linking to Darwin (aarch64) +RUN git clone https://github.com/tpoechtrager/cctools-port.git \ + && cd cctools-port/cctools \ + && git checkout 2a3e1c2a6ff54a30f898b70cfb9ba1692a55fad7 \ + && ./configure --prefix=/cctools --with-libtapi=/cctools \ + --target=x86_64-apple-darwin \ + && make install -j$(nproc) \ + && make clean \ + && ./configure --prefix=/cctools --with-libtapi=/cctools \ + --target=aarch64-apple-darwin \ + && make install -j$(nproc) \ + && cd ../.. \ + && rm -rf cctools-port diff --git a/docker/packager/packager b/docker/packager/packager index ca0ae8358f3..23fc26bc1a4 100755 --- a/docker/packager/packager +++ b/docker/packager/packager @@ -1,16 +1,16 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- -import subprocess -import os import argparse import logging +import os +import subprocess import sys from pathlib import Path from typing import List, Optional SCRIPT_PATH = Path(__file__).absolute() -IMAGE_TYPE = "binary" -IMAGE_NAME = f"clickhouse/{IMAGE_TYPE}-builder" +IMAGE_TYPE = "binary-builder" +IMAGE_NAME = f"clickhouse/{IMAGE_TYPE}" class BuildException(Exception): diff --git a/docker/server/Dockerfile.alpine b/docker/server/Dockerfile.alpine index 2d07937ad79..7bd777de5b9 100644 --- a/docker/server/Dockerfile.alpine +++ b/docker/server/Dockerfile.alpine @@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \ # lts / testing / prestable / etc ARG REPO_CHANNEL="stable" ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}" -ARG VERSION="24.1.5.6" +ARG VERSION="24.2.1.2248" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" ARG DIRECT_DOWNLOAD_URLS="" diff --git a/docker/server/Dockerfile.ubuntu b/docker/server/Dockerfile.ubuntu index d4775b17319..03d01cfd5d7 100644 --- a/docker/server/Dockerfile.ubuntu +++ b/docker/server/Dockerfile.ubuntu @@ -27,7 +27,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list ARG REPO_CHANNEL="stable" ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main" -ARG VERSION="24.1.5.6" +ARG VERSION="24.2.1.2248" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" # set non-empty deb_location_url url to create a docker image diff --git a/docker/test/base/Dockerfile b/docker/test/base/Dockerfile index 55229e893de..2317f84e0cb 100644 --- a/docker/test/base/Dockerfile +++ b/docker/test/base/Dockerfile @@ -33,6 +33,9 @@ ENV TSAN_OPTIONS='halt_on_error=1 abort_on_error=1 history_size=7 memory_limit_m ENV UBSAN_OPTIONS='print_stacktrace=1' ENV MSAN_OPTIONS='abort_on_error=1 poison_in_dtor=1' +# for external_symbolizer_path +RUN ln -s /usr/bin/llvm-symbolizer-${LLVM_VERSION} /usr/bin/llvm-symbolizer + RUN echo "en_US.UTF-8 UTF-8" > /etc/locale.gen && locale-gen en_US.UTF-8 ENV LC_ALL en_US.UTF-8 diff --git a/docker/test/base/setup_export_logs.sh b/docker/test/base/setup_export_logs.sh index 9e854dce65a..8858e12c50e 100755 --- a/docker/test/base/setup_export_logs.sh +++ b/docker/test/base/setup_export_logs.sh @@ -190,7 +190,7 @@ function setup_logs_replication echo -e "Creating remote destination table ${table}_${hash} with statement:\n${statement}" >&2 echo "$statement" | clickhouse-client --database_replicated_initial_query_timeout_sec=10 \ - --distributed_ddl_task_timeout=30 \ + --distributed_ddl_task_timeout=30 --distributed_ddl_output_mode=throw_only_active \ "${CONNECTION_ARGS[@]}" || continue echo "Creating table system.${table}_sender" >&2 diff --git a/docker/test/fasttest/Dockerfile b/docker/test/fasttest/Dockerfile index e10555d4d4a..912ff191e57 100644 --- a/docker/test/fasttest/Dockerfile +++ b/docker/test/fasttest/Dockerfile @@ -6,9 +6,17 @@ FROM clickhouse/test-util:$FROM_TAG RUN apt-get update \ && apt-get install \ brotli \ + clang-${LLVM_VERSION} \ + clang-tidy-${LLVM_VERSION} \ + cmake \ expect \ file \ + libclang-${LLVM_VERSION}-dev \ + libclang-rt-${LLVM_VERSION}-dev \ + lld-${LLVM_VERSION} \ + llvm-${LLVM_VERSION}-dev \ lsof \ + ninja-build \ odbcinst \ psmisc \ python3 \ @@ -26,14 +34,48 @@ RUN apt-get update \ RUN pip3 install numpy==1.26.3 scipy==1.12.0 pandas==1.5.3 Jinja2==3.1.3 -ARG odbc_driver_url="https://github.com/ClickHouse/clickhouse-odbc/releases/download/v1.1.4.20200302/clickhouse-odbc-1.1.4-Linux.tar.gz" +# This symlink is required by gcc to find the lld linker +RUN ln -s /usr/bin/lld-${LLVM_VERSION} /usr/bin/ld.lld +# FIXME: workaround for "The imported target "merge-fdata" references the file" error +# https://salsa.debian.org/pkg-llvm-team/llvm-toolchain/-/commit/992e52c0b156a5ba9c6a8a54f8c4857ddd3d371d +RUN sed -i '/_IMPORT_CHECK_FILES_FOR_\(mlir-\|llvm-bolt\|merge-fdata\|MLIR\)/ {s|^|#|}' /usr/lib/llvm-${LLVM_VERSION}/lib/cmake/llvm/LLVMExports-*.cmake -RUN mkdir -p /tmp/clickhouse-odbc-tmp \ - && wget -nv -O - ${odbc_driver_url} | tar --strip-components=1 -xz -C /tmp/clickhouse-odbc-tmp \ - && cp /tmp/clickhouse-odbc-tmp/lib64/*.so /usr/local/lib/ \ - && odbcinst -i -d -f /tmp/clickhouse-odbc-tmp/share/doc/clickhouse-odbc/config/odbcinst.ini.sample \ - && odbcinst -i -s -l -f /tmp/clickhouse-odbc-tmp/share/doc/clickhouse-odbc/config/odbc.ini.sample \ - && rm -rf /tmp/clickhouse-odbc-tmp +ARG CCACHE_VERSION=4.6.1 +RUN mkdir /tmp/ccache \ + && cd /tmp/ccache \ + && curl -L \ + -O https://github.com/ccache/ccache/releases/download/v$CCACHE_VERSION/ccache-$CCACHE_VERSION.tar.xz \ + -O https://github.com/ccache/ccache/releases/download/v$CCACHE_VERSION/ccache-$CCACHE_VERSION.tar.xz.asc \ + && gpg --recv-keys --keyserver hkps://keyserver.ubuntu.com 5A939A71A46792CF57866A51996DDA075594ADB8 \ + && gpg --verify ccache-4.6.1.tar.xz.asc \ + && tar xf ccache-$CCACHE_VERSION.tar.xz \ + && cd /tmp/ccache/ccache-$CCACHE_VERSION \ + && cmake -DCMAKE_INSTALL_PREFIX=/usr \ + -DCMAKE_BUILD_TYPE=None \ + -DZSTD_FROM_INTERNET=ON \ + -DREDIS_STORAGE_BACKEND=OFF \ + -Wno-dev \ + -B build \ + -S . \ + && make VERBOSE=1 -C build \ + && make install -C build \ + && cd / \ + && rm -rf /tmp/ccache + +ARG TARGETARCH +ARG SCCACHE_VERSION=v0.7.7 +ENV SCCACHE_IGNORE_SERVER_IO_ERROR=1 +# sccache requires a value for the region. So by default we use The Default Region +ENV SCCACHE_REGION=us-east-1 +RUN arch=${TARGETARCH:-amd64} \ + && case $arch in \ + amd64) rarch=x86_64 ;; \ + arm64) rarch=aarch64 ;; \ + esac \ + && curl -Ls "https://github.com/mozilla/sccache/releases/download/$SCCACHE_VERSION/sccache-$SCCACHE_VERSION-$rarch-unknown-linux-musl.tar.gz" | \ + tar xz -C /tmp \ + && mv "/tmp/sccache-$SCCACHE_VERSION-$rarch-unknown-linux-musl/sccache" /usr/bin \ + && rm "/tmp/sccache-$SCCACHE_VERSION-$rarch-unknown-linux-musl" -r # Give suid to gdb to grant it attach permissions # chmod 777 to make the container user independent diff --git a/docker/test/fuzzer/run-fuzzer.sh b/docker/test/fuzzer/run-fuzzer.sh index ca6bff9c6be..63471c288dd 100755 --- a/docker/test/fuzzer/run-fuzzer.sh +++ b/docker/test/fuzzer/run-fuzzer.sh @@ -86,7 +86,7 @@ function download chmod +x clickhouse # clickhouse may be compressed - run once to decompress - ./clickhouse ||: + ./clickhouse --query "SELECT 1" ||: ln -s ./clickhouse ./clickhouse-server ln -s ./clickhouse ./clickhouse-client ln -s ./clickhouse ./clickhouse-local @@ -247,6 +247,12 @@ quit fuzzer_pid=$! echo "Fuzzer pid is $fuzzer_pid" + # The fuzzer_pid belongs to the timeout process. + actual_fuzzer_pid=$(ps -o pid= --ppid "$fuzzer_pid") + + echo "Attaching gdb to the fuzzer itself" + gdb -batch -command script.gdb -p $actual_fuzzer_pid & + # Wait for the fuzzer to complete. # Note that the 'wait || ...' thing is required so that the script doesn't # exit because of 'set -e' when 'wait' returns nonzero code. @@ -337,10 +343,9 @@ quit # which is confusing. task_exit_code=$fuzzer_exit_code echo "failure" > status.txt - { rg -ao "Found error:.*" fuzzer.log \ - || rg -ao "Exception:.*" fuzzer.log \ - || echo "Fuzzer failed ($fuzzer_exit_code). See the logs." ; } \ - | tail -1 > description.txt + echo "Let op!" > description.txt + echo "Fuzzer went wrong with error code: ($fuzzer_exit_code). Its process died somehow when the server stayed alive. The server log probably won't tell you much so try to find information in other files." >>description.txt + { rg -ao "Found error:.*" fuzzer.log || rg -ao "Exception:.*" fuzzer.log; } | tail -1 >>description.txt fi if test -f core.*; then @@ -386,7 +391,13 @@ if [ -f core.zst ]; then CORE_LINK='core.zst' fi -rg --text -F '' server.log > fatal.log ||: +# Keep all the lines in the paragraphs containing that either contain or don't start with 20... (year) +sed -n '//,/^$/p' server.log | awk '// || !/^20/' > fatal.log ||: +FATAL_LINK='' +if [ -s fatal.log ]; then + FATAL_LINK='fatal.log' +fi + dmesg -T > dmesg.log ||: zstd --threads=0 --rm server.log @@ -419,6 +430,7 @@ p.links a { padding: 5px; margin: 3px; background: #FFF; line-height: 2; white-s main.log dmesg.log ${CORE_LINK} + ${FATAL_LINK}

diff --git a/docker/test/integration/runner/compose/docker_compose_mysql_cluster.yml b/docker/test/integration/runner/compose/docker_compose_mysql_cluster.yml index 73f9e39f0d6..079c451b9d6 100644 --- a/docker/test/integration/runner/compose/docker_compose_mysql_cluster.yml +++ b/docker/test/integration/runner/compose/docker_compose_mysql_cluster.yml @@ -1,7 +1,7 @@ version: '2.3' services: mysql2: - image: mysql:5.7 + image: mysql:8.0 restart: always environment: MYSQL_ROOT_PASSWORD: clickhouse @@ -23,7 +23,7 @@ services: source: ${MYSQL_CLUSTER_LOGS:-} target: /mysql/ mysql3: - image: mysql:5.7 + image: mysql:8.0 restart: always environment: MYSQL_ROOT_PASSWORD: clickhouse @@ -45,7 +45,7 @@ services: source: ${MYSQL_CLUSTER_LOGS:-} target: /mysql/ mysql4: - image: mysql:5.7 + image: mysql:8.0 restart: always environment: MYSQL_ROOT_PASSWORD: clickhouse diff --git a/docker/test/server-jepsen/run.sh b/docker/test/server-jepsen/run.sh index 81e442e65b6..09198ca1968 100644 --- a/docker/test/server-jepsen/run.sh +++ b/docker/test/server-jepsen/run.sh @@ -20,6 +20,8 @@ if [ -n "$WITH_LOCAL_BINARY" ]; then clickhouse_source="--clickhouse-source /clickhouse" fi +# $TESTS_TO_RUN comes from docker +# shellcheck disable=SC2153 tests_count="--test-count $TESTS_TO_RUN" tests_to_run="test-all" workload="" @@ -47,6 +49,6 @@ fi cd "$CLICKHOUSE_REPO_PATH/tests/jepsen.clickhouse" -(lein run server $tests_to_run $workload --keeper "$KEEPER_NODE" $concurrency $nemesis $rate --nodes-file "$NODES_FILE_PATH" --username "$NODES_USERNAME" --logging-json --password "$NODES_PASSWORD" --time-limit "$TIME_LIMIT" --concurrency 50 $clickhouse_source $tests_count --reuse-binary || true) | tee "$TEST_OUTPUT/jepsen_run_all_tests.log" +(lein run server $tests_to_run "$workload" --keeper "$KEEPER_NODE" "$concurrency" "$nemesis" "$rate" --nodes-file "$NODES_FILE_PATH" --username "$NODES_USERNAME" --logging-json --password "$NODES_PASSWORD" --time-limit "$TIME_LIMIT" --concurrency 50 "$clickhouse_source" "$tests_count" --reuse-binary || true) | tee "$TEST_OUTPUT/jepsen_run_all_tests.log" mv store "$TEST_OUTPUT/" diff --git a/docker/test/sqllogic/Dockerfile b/docker/test/sqllogic/Dockerfile index 05130044c45..1ea1e52e6fa 100644 --- a/docker/test/sqllogic/Dockerfile +++ b/docker/test/sqllogic/Dockerfile @@ -24,17 +24,18 @@ RUN pip3 install \ deepdiff \ sqlglot -ARG odbc_repo="https://github.com/ClickHouse/clickhouse-odbc.git" +ARG odbc_driver_url="https://github.com/ClickHouse/clickhouse-odbc/releases/download/v1.1.6.20200320/clickhouse-odbc-1.1.6-Linux.tar.gz" + +RUN mkdir -p /tmp/clickhouse-odbc-tmp \ + && cd /tmp/clickhouse-odbc-tmp \ + && curl -L ${odbc_driver_url} | tar --strip-components=1 -xz clickhouse-odbc-1.1.6-Linux \ + && mkdir /usr/local/lib64 -p \ + && cp /tmp/clickhouse-odbc-tmp/lib64/*.so /usr/local/lib64/ \ + && odbcinst -i -d -f /tmp/clickhouse-odbc-tmp/share/doc/clickhouse-odbc/config/odbcinst.ini.sample \ + && odbcinst -i -s -l -f /tmp/clickhouse-odbc-tmp/share/doc/clickhouse-odbc/config/odbc.ini.sample \ + && sed -i 's"=libclickhouseodbc"=/usr/local/lib64/libclickhouseodbc"' /etc/odbcinst.ini \ + && rm -rf /tmp/clickhouse-odbc-tmp -RUN git clone --recursive ${odbc_repo} \ - && mkdir -p /clickhouse-odbc/build \ - && cmake -S /clickhouse-odbc -B /clickhouse-odbc/build \ - && ls /clickhouse-odbc/build/driver \ - && make -j 10 -C /clickhouse-odbc/build \ - && ls /clickhouse-odbc/build/driver \ - && mkdir -p /usr/local/lib64/ && cp /clickhouse-odbc/build/driver/lib*.so /usr/local/lib64/ \ - && odbcinst -i -d -f /clickhouse-odbc/packaging/odbcinst.ini.sample \ - && odbcinst -i -s -l -f /clickhouse-odbc/packaging/odbc.ini.sample ENV TZ=Europe/Amsterdam ENV MAX_RUN_TIME=9000 diff --git a/docker/test/stateless/.gitignore b/docker/test/stateless/.gitignore new file mode 100644 index 00000000000..928fed26d6d --- /dev/null +++ b/docker/test/stateless/.gitignore @@ -0,0 +1 @@ +/minio_data diff --git a/docker/test/stateless/Dockerfile b/docker/test/stateless/Dockerfile index 7f4bad3d4e6..cd8864c6299 100644 --- a/docker/test/stateless/Dockerfile +++ b/docker/test/stateless/Dockerfile @@ -3,7 +3,7 @@ ARG FROM_TAG=latest FROM clickhouse/test-base:$FROM_TAG -ARG odbc_driver_url="https://github.com/ClickHouse/clickhouse-odbc/releases/download/v1.1.4.20200302/clickhouse-odbc-1.1.4-Linux.tar.gz" +ARG odbc_driver_url="https://github.com/ClickHouse/clickhouse-odbc/releases/download/v1.1.6.20200320/clickhouse-odbc-1.1.6-Linux.tar.gz" # golang version 1.13 on Ubuntu 20 is enough for tests RUN apt-get update -y \ @@ -35,7 +35,6 @@ RUN apt-get update -y \ sudo \ tree \ unixodbc \ - wget \ rustc \ cargo \ zstd \ @@ -50,11 +49,14 @@ RUN apt-get update -y \ RUN pip3 install numpy==1.26.3 scipy==1.12.0 pandas==1.5.3 Jinja2==3.1.3 pyarrow==15.0.0 RUN mkdir -p /tmp/clickhouse-odbc-tmp \ - && wget -nv -O - ${odbc_driver_url} | tar --strip-components=1 -xz -C /tmp/clickhouse-odbc-tmp \ - && cp /tmp/clickhouse-odbc-tmp/lib64/*.so /usr/local/lib/ \ - && odbcinst -i -d -f /tmp/clickhouse-odbc-tmp/share/doc/clickhouse-odbc/config/odbcinst.ini.sample \ - && odbcinst -i -s -l -f /tmp/clickhouse-odbc-tmp/share/doc/clickhouse-odbc/config/odbc.ini.sample \ - && rm -rf /tmp/clickhouse-odbc-tmp + && cd /tmp/clickhouse-odbc-tmp \ + && curl -L ${odbc_driver_url} | tar --strip-components=1 -xz clickhouse-odbc-1.1.6-Linux \ + && mkdir /usr/local/lib64 -p \ + && cp /tmp/clickhouse-odbc-tmp/lib64/*.so /usr/local/lib64/ \ + && odbcinst -i -d -f /tmp/clickhouse-odbc-tmp/share/doc/clickhouse-odbc/config/odbcinst.ini.sample \ + && odbcinst -i -s -l -f /tmp/clickhouse-odbc-tmp/share/doc/clickhouse-odbc/config/odbc.ini.sample \ + && sed -i 's"=libclickhouseodbc"=/usr/local/lib64/libclickhouseodbc"' /etc/odbcinst.ini \ + && rm -rf /tmp/clickhouse-odbc-tmp ENV TZ=Europe/Amsterdam RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone @@ -70,11 +72,11 @@ ARG TARGETARCH # Download Minio-related binaries RUN arch=${TARGETARCH:-amd64} \ - && wget "https://dl.min.io/server/minio/release/linux-${arch}/archive/minio.RELEASE.${MINIO_SERVER_VERSION}" -O ./minio \ - && wget "https://dl.min.io/client/mc/release/linux-${arch}/archive/mc.RELEASE.${MINIO_CLIENT_VERSION}" -O ./mc \ + && curl -L "https://dl.min.io/server/minio/release/linux-${arch}/archive/minio.RELEASE.${MINIO_SERVER_VERSION}" -o ./minio \ + && curl -L "https://dl.min.io/client/mc/release/linux-${arch}/archive/mc.RELEASE.${MINIO_CLIENT_VERSION}" -o ./mc \ && chmod +x ./mc ./minio -RUN wget --no-verbose 'https://archive.apache.org/dist/hadoop/common/hadoop-3.3.1/hadoop-3.3.1.tar.gz' \ +RUN curl -L --no-verbose -O 'https://archive.apache.org/dist/hadoop/common/hadoop-3.3.1/hadoop-3.3.1.tar.gz' \ && tar -xvf hadoop-3.3.1.tar.gz \ && rm -rf hadoop-3.3.1.tar.gz diff --git a/docker/test/stateless/attach_gdb.lib b/docker/test/stateless/attach_gdb.lib index f4738cdc333..d288288bb17 100644 --- a/docker/test/stateless/attach_gdb.lib +++ b/docker/test/stateless/attach_gdb.lib @@ -1,5 +1,6 @@ #!/bin/bash +# shellcheck source=./utils.lib source /utils.lib function attach_gdb_to_clickhouse() diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh index aec2add2857..dc181339786 100755 --- a/docker/test/stateless/run.sh +++ b/docker/test/stateless/run.sh @@ -57,8 +57,20 @@ if [[ -n "$BUGFIX_VALIDATE_CHECK" ]] && [[ "$BUGFIX_VALIDATE_CHECK" -eq 1 ]]; th sudo mv /etc/clickhouse-server/config.d/zookeeper.xml.tmp /etc/clickhouse-server/config.d/zookeeper.xml # it contains some new settings, but we can safely remove it + rm /etc/clickhouse-server/config.d/handlers.yaml rm /etc/clickhouse-server/users.d/s3_cache_new.xml rm /etc/clickhouse-server/config.d/zero_copy_destructive_operations.xml + + function remove_keeper_config() + { + sudo cat /etc/clickhouse-server/config.d/keeper_port.xml \ + | sed "/<$1>$2<\/$1>/d" \ + > /etc/clickhouse-server/config.d/keeper_port.xml.tmp + sudo mv /etc/clickhouse-server/config.d/keeper_port.xml.tmp /etc/clickhouse-server/config.d/keeper_port.xml + } + # commit_logs_cache_size_threshold setting doesn't exist on some older versions + remove_keeper_config "commit_logs_cache_size_threshold" "[[:digit:]]\+" + remove_keeper_config "latest_logs_cache_size_threshold" "[[:digit:]]\+" fi # For flaky check we also enable thread fuzzer diff --git a/docker/test/stateless/stress_tests.lib b/docker/test/stateless/stress_tests.lib index 6e1834d6cde..c0fc32ab718 100644 --- a/docker/test/stateless/stress_tests.lib +++ b/docker/test/stateless/stress_tests.lib @@ -19,7 +19,7 @@ function escaped() function head_escaped() { - head -n $FAILURE_CONTEXT_LINES $1 | escaped + head -n "$FAILURE_CONTEXT_LINES" "$1" | escaped } function unts() @@ -29,15 +29,15 @@ function unts() function trim_server_logs() { - head -n $FAILURE_CONTEXT_LINES "/test_output/$1" | grep -Eo " \[ [0-9]+ \] \{.*" | escaped + head -n "$FAILURE_CONTEXT_LINES" "/test_output/$1" | grep -Eo " \[ [0-9]+ \] \{.*" | escaped } function install_packages() { - dpkg -i $1/clickhouse-common-static_*.deb - dpkg -i $1/clickhouse-common-static-dbg_*.deb - dpkg -i $1/clickhouse-server_*.deb - dpkg -i $1/clickhouse-client_*.deb + dpkg -i "$1"/clickhouse-common-static_*.deb + dpkg -i "$1"/clickhouse-common-static-dbg_*.deb + dpkg -i "$1"/clickhouse-server_*.deb + dpkg -i "$1"/clickhouse-client_*.deb } function configure() @@ -54,11 +54,11 @@ function configure() sudo mv /etc/clickhouse-server/config.d/keeper_port.xml.tmp /etc/clickhouse-server/config.d/keeper_port.xml function randomize_config_boolean_value { - value=$(($RANDOM % 2)) - sudo cat /etc/clickhouse-server/config.d/$2.xml \ + value=$((RANDOM % 2)) + sudo cat "/etc/clickhouse-server/config.d/$2.xml" \ | sed "s|<$1>[01]|<$1>$value|" \ - > /etc/clickhouse-server/config.d/$2.xml.tmp - sudo mv /etc/clickhouse-server/config.d/$2.xml.tmp /etc/clickhouse-server/config.d/$2.xml + > "/etc/clickhouse-server/config.d/$2.xml.tmp" + sudo mv "/etc/clickhouse-server/config.d/$2.xml.tmp" "/etc/clickhouse-server/config.d/$2.xml" } if [[ -n "$RANDOMIZE_KEEPER_FEATURE_FLAGS" ]] && [[ "$RANDOMIZE_KEEPER_FEATURE_FLAGS" -eq 1 ]]; then @@ -146,17 +146,17 @@ EOL } -function stop() +function stop_server() { - local max_tries="${1:-90}" - local check_hang="${2:-true}" + local max_tries=90 + local check_hang=true local pid # Preserve the pid, since the server can hung after the PID will be deleted. pid="$(cat /var/run/clickhouse-server/clickhouse-server.pid)" clickhouse stop --max-tries "$max_tries" --do-not-kill && return - if [ $check_hang == true ] + if [ "$check_hang" == true ] then # We failed to stop the server with SIGTERM. Maybe it hang, let's collect stacktraces. # Add a special status just in case, so it will be possible to find in the CI DB @@ -165,7 +165,7 @@ function stop() sleep 5 # The server could finally stop while we were terminating gdb, let's recheck if it's still running - kill -s 0 $pid || return + kill -s 0 "$pid" || return echo -e "Possible deadlock on shutdown (see gdb.log)$FAIL" >> /test_output/test_results.tsv echo "thread apply all backtrace (on stop)" >> /test_output/gdb.log timeout 30m gdb -batch -ex 'thread apply all backtrace' -p "$pid" | ts '%Y-%m-%d %H:%M:%S' >> /test_output/gdb.log @@ -176,12 +176,13 @@ function stop() fi } -function start() +function start_server() { counter=0 + max_attempt=120 until clickhouse-client --query "SELECT 1" do - if [ "$counter" -gt ${1:-120} ] + if [ "$counter" -gt "$max_attempt" ] then echo "Cannot start clickhouse-server" rg --text ".*Application" /var/log/clickhouse-server/clickhouse-server.log > /test_output/application_errors.txt ||: @@ -214,8 +215,7 @@ function check_server_start() function check_logs_for_critical_errors() { # Sanitizer asserts - rg -Fa "==================" /var/log/clickhouse-server/stderr.log | rg -v "in query:" >> /test_output/tmp - rg -Fa "WARNING" /var/log/clickhouse-server/stderr.log >> /test_output/tmp + sed -n '/WARNING:.*anitizer/,/^$/p' >> /test_output/tmp rg -Fav -e "ASan doesn't fully support makecontext/swapcontext functions" -e "DB::Exception" /test_output/tmp > /dev/null \ && echo -e "Sanitizer assert (in stderr.log)$FAIL$(head_escaped /test_output/tmp)" >> /test_output/test_results.tsv \ || echo -e "No sanitizer asserts$OK" >> /test_output/test_results.tsv @@ -233,8 +233,8 @@ function check_logs_for_critical_errors() # Remove file logical_errors.txt if it's empty [ -s /test_output/logical_errors.txt ] || rm /test_output/logical_errors.txt - # No such key errors - rg --text "Code: 499.*The specified key does not exist" /var/log/clickhouse-server/clickhouse-server*.log > /test_output/no_such_key_errors.txt \ + # No such key errors (ignore a.myext which is used in 02724_database_s3.sh and does not exist) + rg --text "Code: 499.*The specified key does not exist" /var/log/clickhouse-server/clickhouse-server*.log | grep -v "a.myext" > /test_output/no_such_key_errors.txt \ && echo -e "S3_ERROR No such key thrown (see clickhouse-server.log or no_such_key_errors.txt)$FAIL$(trim_server_logs no_such_key_errors.txt)" >> /test_output/test_results.tsv \ || echo -e "No lost s3 keys$OK" >> /test_output/test_results.tsv @@ -287,9 +287,9 @@ function collect_query_and_trace_logs() function collect_core_dumps() { - find . -type f -maxdepth 1 -name 'core.*' | while read core; do - zstd --threads=0 $core - mv $core.zst /test_output/ + find . -type f -maxdepth 1 -name 'core.*' | while read -r core; do + zstd --threads=0 "$core" + mv "$core.zst" /test_output/ done } diff --git a/docker/test/stress/run.sh b/docker/test/stress/run.sh index 3981c4dd416..621a6ced7f6 100644 --- a/docker/test/stress/run.sh +++ b/docker/test/stress/run.sh @@ -16,7 +16,9 @@ ln -s /usr/share/clickhouse-test/clickhouse-test /usr/bin/clickhouse-test # Stress tests and upgrade check uses similar code that was placed # in a separate bash library. See tests/ci/stress_tests.lib +# shellcheck source=../stateless/attach_gdb.lib source /attach_gdb.lib +# shellcheck source=../stateless/stress_tests.lib source /stress_tests.lib install_packages package_folder @@ -55,7 +57,7 @@ azurite-blob --blobHost 0.0.0.0 --blobPort 10000 --debug /azurite_log & config_logs_export_cluster /etc/clickhouse-server/config.d/system_logs_export.yaml -start +start_server setup_logs_replication @@ -65,7 +67,7 @@ clickhouse-client --query "SHOW TABLES FROM datasets" clickhouse-client --query "CREATE DATABASE IF NOT EXISTS test" -stop +stop_server mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.initial.log # Randomize cache policies. @@ -85,7 +87,7 @@ if [ "$cache_policy" = "SLRU" ]; then mv /etc/clickhouse-server/config.d/storage_conf.xml.tmp /etc/clickhouse-server/config.d/storage_conf.xml fi -start +start_server clickhouse-client --query "SHOW TABLES FROM datasets" clickhouse-client --query "SHOW TABLES FROM test" @@ -188,7 +190,7 @@ clickhouse-client --query "SHOW TABLES FROM test" clickhouse-client --query "SYSTEM STOP THREAD FUZZER" -stop +stop_server # Let's enable S3 storage by default export USE_S3_STORAGE_FOR_MERGE_TREE=1 @@ -222,7 +224,7 @@ if [ $(( $(date +%-d) % 2 )) -eq 1 ]; then > /etc/clickhouse-server/config.d/enable_async_load_databases.xml fi -start +start_server stress --hung-check --drop-databases --output-folder test_output --skip-func-tests "$SKIP_TESTS_OPTION" --global-time-limit 1200 \ && echo -e "Test script exit code$OK" >> /test_output/test_results.tsv \ @@ -232,18 +234,18 @@ stress --hung-check --drop-databases --output-folder test_output --skip-func-tes rg -Fa "No queries hung" /test_output/test_results.tsv | grep -Fa "OK" \ || echo -e "Hung check failed, possible deadlock found (see hung_check.log)$FAIL$(head_escaped /test_output/hung_check.log)" >> /test_output/test_results.tsv -stop +stop_server mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.stress.log # NOTE Disable thread fuzzer before server start with data after stress test. # In debug build it can take a lot of time. unset "${!THREAD_@}" -start +start_server check_server_start -stop +stop_server [ -f /var/log/clickhouse-server/clickhouse-server.log ] || echo -e "Server log does not exist\tFAIL" [ -f /var/log/clickhouse-server/stderr.log ] || echo -e "Stderr log does not exist\tFAIL" @@ -272,7 +274,7 @@ clickhouse-local --structure "test String, res String, time Nullable(Float32), d (test like '%Signal 9%') DESC, (test like '%Fatal message%') DESC, rowNumberInAllBlocks() -LIMIT 1" < /test_output/test_results.tsv > /test_output/check_status.tsv || echo "failure\tCannot parse test_results.tsv" > /test_output/check_status.tsv +LIMIT 1" < /test_output/test_results.tsv > /test_output/check_status.tsv || echo -e "failure\tCannot parse test_results.tsv" > /test_output/check_status.tsv [ -s /test_output/check_status.tsv ] || echo -e "success\tNo errors found" > /test_output/check_status.tsv # But OOMs in stress test are allowed diff --git a/docker/test/style/Dockerfile b/docker/test/style/Dockerfile index f2bac2f5da4..b4ffcfb597c 100644 --- a/docker/test/style/Dockerfile +++ b/docker/test/style/Dockerfile @@ -16,10 +16,10 @@ RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes \ moreutils \ python3-fuzzywuzzy \ python3-pip \ - shellcheck \ yamllint \ locales \ - && pip3 install black==23.1.0 boto3 codespell==2.2.1 mypy==1.3.0 PyGithub unidiff pylint==2.6.2 \ + && pip3 install black==23.12.0 boto3 codespell==2.2.1 mypy==1.8.0 PyGithub unidiff pylint==3.1.0 \ + requests types-requests \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/* \ && rm -rf /root/.cache/pip @@ -30,6 +30,19 @@ ENV LC_ALL en_US.UTF-8 # Architecture of the image when BuildKit/buildx is used ARG TARGETARCH +ARG SHELLCHECK_VERSION=0.9.0 +RUN arch=${TARGETARCH:-amd64} \ + && case $arch in \ + amd64) sarch=x86_64 ;; \ + arm64) sarch=aarch64 ;; \ + esac \ + && curl -L \ + "https://github.com/koalaman/shellcheck/releases/download/v${SHELLCHECK_VERSION}/shellcheck-v${SHELLCHECK_VERSION}.linux.${sarch}.tar.xz" \ + | tar xJ --strip=1 -C /tmp \ + && mv /tmp/shellcheck /usr/bin \ + && rm -rf /tmp/* + + # Get act and actionlint from releases RUN arch=${TARGETARCH:-amd64} \ && case $arch in \ @@ -47,5 +60,4 @@ RUN arch=${TARGETARCH:-amd64} \ COPY run.sh / -COPY process_style_check_result.py / CMD ["/bin/bash", "/run.sh"] diff --git a/docker/test/upgrade/run.sh b/docker/test/upgrade/run.sh index aaba5cc6a8c..69228b1bfc9 100644 --- a/docker/test/upgrade/run.sh +++ b/docker/test/upgrade/run.sh @@ -16,7 +16,9 @@ ln -s /usr/share/clickhouse-test/ci/get_previous_release_tag.py /usr/bin/get_pre # Stress tests and upgrade check uses similar code that was placed # in a separate bash library. See tests/ci/stress_tests.lib +# shellcheck source=../stateless/attach_gdb.lib source /attach_gdb.lib +# shellcheck source=../stateless/stress_tests.lib source /stress_tests.lib azurite-blob --blobHost 0.0.0.0 --blobPort 10000 --debug /azurite_log & @@ -77,11 +79,31 @@ remove_keeper_config "async_replication" "1" # create_if_not_exists feature flag doesn't exist on some older versions remove_keeper_config "create_if_not_exists" "[01]" +#todo: remove these after 24.3 released. +sudo cat /etc/clickhouse-server/config.d/azure_storage_conf.xml \ + | sed "s|azure|azure_blob_storage|" \ + > /etc/clickhouse-server/config.d/azure_storage_conf.xml.tmp +sudo mv /etc/clickhouse-server/config.d/azure_storage_conf.xml.tmp /etc/clickhouse-server/config.d/azure_storage_conf.xml + +#todo: remove these after 24.3 released. +sudo cat /etc/clickhouse-server/config.d/storage_conf.xml \ + | sed "s|local|local_blob_storage|" \ + > /etc/clickhouse-server/config.d/storage_conf.xml.tmp +sudo mv /etc/clickhouse-server/config.d/storage_conf.xml.tmp /etc/clickhouse-server/config.d/storage_conf.xml + +# latest_logs_cache_size_threshold setting doesn't exist on some older versions +remove_keeper_config "latest_logs_cache_size_threshold" "[[:digit:]]\+" + +# commit_logs_cache_size_threshold setting doesn't exist on some older versions +remove_keeper_config "commit_logs_cache_size_threshold" "[[:digit:]]\+" + # it contains some new settings, but we can safely remove it rm /etc/clickhouse-server/config.d/merge_tree.xml rm /etc/clickhouse-server/config.d/enable_wait_for_shutdown_replicated_tables.xml rm /etc/clickhouse-server/config.d/zero_copy_destructive_operations.xml rm /etc/clickhouse-server/config.d/storage_conf_02963.xml +rm /etc/clickhouse-server/config.d/backoff_failed_mutation.xml +rm /etc/clickhouse-server/config.d/handlers.yaml rm /etc/clickhouse-server/users.d/nonconst_timezone.xml rm /etc/clickhouse-server/users.d/s3_cache_new.xml rm /etc/clickhouse-server/users.d/replicated_ddl_entry.xml @@ -103,12 +125,30 @@ sudo cat /etc/clickhouse-server/config.d/keeper_port.xml \ > /etc/clickhouse-server/config.d/keeper_port.xml.tmp sudo mv /etc/clickhouse-server/config.d/keeper_port.xml.tmp /etc/clickhouse-server/config.d/keeper_port.xml +#todo: remove these after 24.3 released. +sudo cat /etc/clickhouse-server/config.d/azure_storage_conf.xml \ + | sed "s|azure|azure_blob_storage|" \ + > /etc/clickhouse-server/config.d/azure_storage_conf.xml.tmp +sudo mv /etc/clickhouse-server/config.d/azure_storage_conf.xml.tmp /etc/clickhouse-server/config.d/azure_storage_conf.xml + +#todo: remove these after 24.3 released. +sudo cat /etc/clickhouse-server/config.d/storage_conf.xml \ + | sed "s|local|local_blob_storage|" \ + > /etc/clickhouse-server/config.d/storage_conf.xml.tmp +sudo mv /etc/clickhouse-server/config.d/storage_conf.xml.tmp /etc/clickhouse-server/config.d/storage_conf.xml + # async_replication setting doesn't exist on some older versions remove_keeper_config "async_replication" "1" # create_if_not_exists feature flag doesn't exist on some older versions remove_keeper_config "create_if_not_exists" "[01]" +# latest_logs_cache_size_threshold setting doesn't exist on some older versions +remove_keeper_config "latest_logs_cache_size_threshold" "[[:digit:]]\+" + +# commit_logs_cache_size_threshold setting doesn't exist on some older versions +remove_keeper_config "commit_logs_cache_size_threshold" "[[:digit:]]\+" + # But we still need default disk because some tables loaded only into it sudo cat /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml \ | sed "s|
s3
|
s3
default|" \ @@ -122,6 +162,8 @@ rm /etc/clickhouse-server/config.d/merge_tree.xml rm /etc/clickhouse-server/config.d/enable_wait_for_shutdown_replicated_tables.xml rm /etc/clickhouse-server/config.d/zero_copy_destructive_operations.xml rm /etc/clickhouse-server/config.d/storage_conf_02963.xml +rm /etc/clickhouse-server/config.d/backoff_failed_mutation.xml +rm /etc/clickhouse-server/config.d/handlers.yaml rm /etc/clickhouse-server/config.d/block_number.xml rm /etc/clickhouse-server/users.d/nonconst_timezone.xml rm /etc/clickhouse-server/users.d/s3_cache_new.xml @@ -321,7 +363,7 @@ clickhouse-local --structure "test String, res String, time Nullable(Float32), d (test like '%Changed settings%') DESC, (test like '%New settings%') DESC, rowNumberInAllBlocks() -LIMIT 1" < /test_output/test_results.tsv > /test_output/check_status.tsv || echo "failure\tCannot parse test_results.tsv" > /test_output/check_status.tsv +LIMIT 1" < /test_output/test_results.tsv > /test_output/check_status.tsv || echo -e "failure\tCannot parse test_results.tsv" > /test_output/check_status.tsv [ -s /test_output/check_status.tsv ] || echo -e "success\tNo errors found" > /test_output/check_status.tsv # But OOMs in stress test are allowed diff --git a/docker/test/util/Dockerfile b/docker/test/util/Dockerfile index 396d5801be9..5446adf3793 100644 --- a/docker/test/util/Dockerfile +++ b/docker/test/util/Dockerfile @@ -26,6 +26,8 @@ RUN apt-get update \ && export CODENAME="$(lsb_release --codename --short | tr 'A-Z' 'a-z')" \ && echo "deb https://apt.llvm.org/${CODENAME}/ llvm-toolchain-${CODENAME}-${LLVM_VERSION} main" >> \ /etc/apt/sources.list \ + && apt-get update \ + && apt-get install --yes --no-install-recommends --verbose-versions llvm-${LLVM_VERSION} \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/* @@ -41,20 +43,11 @@ RUN apt-get update \ bash \ bsdmainutils \ build-essential \ - clang-${LLVM_VERSION} \ - clang-tidy-${LLVM_VERSION} \ - cmake \ gdb \ git \ gperf \ - libclang-rt-${LLVM_VERSION}-dev \ - lld-${LLVM_VERSION} \ - llvm-${LLVM_VERSION} \ - llvm-${LLVM_VERSION}-dev \ - libclang-${LLVM_VERSION}-dev \ moreutils \ nasm \ - ninja-build \ pigz \ rename \ software-properties-common \ @@ -63,49 +56,4 @@ RUN apt-get update \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/* -# This symlink is required by gcc to find the lld linker -RUN ln -s /usr/bin/lld-${LLVM_VERSION} /usr/bin/ld.lld -# for external_symbolizer_path -RUN ln -s /usr/bin/llvm-symbolizer-${LLVM_VERSION} /usr/bin/llvm-symbolizer -# FIXME: workaround for "The imported target "merge-fdata" references the file" error -# https://salsa.debian.org/pkg-llvm-team/llvm-toolchain/-/commit/992e52c0b156a5ba9c6a8a54f8c4857ddd3d371d -RUN sed -i '/_IMPORT_CHECK_FILES_FOR_\(mlir-\|llvm-bolt\|merge-fdata\|MLIR\)/ {s|^|#|}' /usr/lib/llvm-${LLVM_VERSION}/lib/cmake/llvm/LLVMExports-*.cmake - -ARG CCACHE_VERSION=4.6.1 -RUN mkdir /tmp/ccache \ - && cd /tmp/ccache \ - && curl -L \ - -O https://github.com/ccache/ccache/releases/download/v$CCACHE_VERSION/ccache-$CCACHE_VERSION.tar.xz \ - -O https://github.com/ccache/ccache/releases/download/v$CCACHE_VERSION/ccache-$CCACHE_VERSION.tar.xz.asc \ - && gpg --recv-keys --keyserver hkps://keyserver.ubuntu.com 5A939A71A46792CF57866A51996DDA075594ADB8 \ - && gpg --verify ccache-4.6.1.tar.xz.asc \ - && tar xf ccache-$CCACHE_VERSION.tar.xz \ - && cd /tmp/ccache/ccache-$CCACHE_VERSION \ - && cmake -DCMAKE_INSTALL_PREFIX=/usr \ - -DCMAKE_BUILD_TYPE=None \ - -DZSTD_FROM_INTERNET=ON \ - -DREDIS_STORAGE_BACKEND=OFF \ - -Wno-dev \ - -B build \ - -S . \ - && make VERBOSE=1 -C build \ - && make install -C build \ - && cd / \ - && rm -rf /tmp/ccache - -ARG TARGETARCH -ARG SCCACHE_VERSION=v0.5.4 -ENV SCCACHE_IGNORE_SERVER_IO_ERROR=1 -# sccache requires a value for the region. So by default we use The Default Region -ENV SCCACHE_REGION=us-east-1 -RUN arch=${TARGETARCH:-amd64} \ - && case $arch in \ - amd64) rarch=x86_64 ;; \ - arm64) rarch=aarch64 ;; \ - esac \ - && curl -Ls "https://github.com/mozilla/sccache/releases/download/$SCCACHE_VERSION/sccache-$SCCACHE_VERSION-$rarch-unknown-linux-musl.tar.gz" | \ - tar xz -C /tmp \ - && mv "/tmp/sccache-$SCCACHE_VERSION-$rarch-unknown-linux-musl/sccache" /usr/bin \ - && rm "/tmp/sccache-$SCCACHE_VERSION-$rarch-unknown-linux-musl" -r - COPY process_functional_tests_result.py / diff --git a/docs/changelogs/v23.10.1.1976-stable.md b/docs/changelogs/v23.10.1.1976-stable.md index 0e7e7bcd55a..b08383a859b 100644 --- a/docs/changelogs/v23.10.1.1976-stable.md +++ b/docs/changelogs/v23.10.1.1976-stable.md @@ -403,4 +403,3 @@ sidebar_label: 2023 * Do not remove part if `Too many open files` is thrown [#56238](https://github.com/ClickHouse/ClickHouse/pull/56238) ([Nikolay Degterinsky](https://github.com/evillique)). * Fix ORC commit [#56261](https://github.com/ClickHouse/ClickHouse/pull/56261) ([Raúl Marín](https://github.com/Algunenano)). * Fix typo in largestTriangleThreeBuckets.md [#56263](https://github.com/ClickHouse/ClickHouse/pull/56263) ([Nikita Taranov](https://github.com/nickitat)). - diff --git a/docs/changelogs/v23.3.20.27-lts.md b/docs/changelogs/v23.3.20.27-lts.md new file mode 100644 index 00000000000..9f49e47f0bc --- /dev/null +++ b/docs/changelogs/v23.3.20.27-lts.md @@ -0,0 +1,29 @@ +--- +sidebar_position: 1 +sidebar_label: 2024 +--- + +# 2024 Changelog + +### ClickHouse release v23.3.20.27-lts (cc974ba4f81) FIXME as compared to v23.3.19.32-lts (c4d4ca8ec02) + +#### Improvement +* Backported in [#58818](https://github.com/ClickHouse/ClickHouse/issues/58818): Add `SYSTEM JEMALLOC PURGE` for purging unused jemalloc pages, `SYSTEM JEMALLOC [ ENABLE | DISABLE | FLUSH ] PROFILE` for controlling jemalloc profile if the profiler is enabled. Add jemalloc-related 4LW command in Keeper: `jmst` for dumping jemalloc stats, `jmfp`, `jmep`, `jmdp` for controlling jemalloc profile if the profiler is enabled. [#58665](https://github.com/ClickHouse/ClickHouse/pull/58665) ([Antonio Andelic](https://github.com/antonio2368)). + +#### Build/Testing/Packaging Improvement +* Backported in [#59877](https://github.com/ClickHouse/ClickHouse/issues/59877): If you want to run initdb scripts every time when ClickHouse container is starting you shoud initialize environment varible CLICKHOUSE_ALWAYS_RUN_INITDB_SCRIPTS. [#59808](https://github.com/ClickHouse/ClickHouse/pull/59808) ([Alexander Nikolaev](https://github.com/AlexNik)). + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Fix working with read buffers in StreamingFormatExecutor [#57438](https://github.com/ClickHouse/ClickHouse/pull/57438) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix double destroy call on exception throw in addBatchLookupTable8 [#58745](https://github.com/ClickHouse/ClickHouse/pull/58745) ([Raúl Marín](https://github.com/Algunenano)). +* Fix: LIMIT BY and LIMIT in distributed query [#59153](https://github.com/ClickHouse/ClickHouse/pull/59153) ([Igor Nikonov](https://github.com/devcrafter)). +* Fix translate() with FixedString input [#59356](https://github.com/ClickHouse/ClickHouse/pull/59356) ([Raúl Marín](https://github.com/Algunenano)). +* Fix leftPad / rightPad function with FixedString input [#59739](https://github.com/ClickHouse/ClickHouse/pull/59739) ([Raúl Marín](https://github.com/Algunenano)). +* Fix cosineDistance crash with Nullable [#60150](https://github.com/ClickHouse/ClickHouse/pull/60150) ([Raúl Marín](https://github.com/Algunenano)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Fix possible race in ManyAggregatedData dtor. [#58624](https://github.com/ClickHouse/ClickHouse/pull/58624) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Make ZooKeeper actually sequentialy consistent [#59735](https://github.com/ClickHouse/ClickHouse/pull/59735) ([Alexander Tokmakov](https://github.com/tavplubix)). + diff --git a/docs/changelogs/v23.5.1.3174-stable.md b/docs/changelogs/v23.5.1.3174-stable.md index 01e5425de71..2212eb6e893 100644 --- a/docs/changelogs/v23.5.1.3174-stable.md +++ b/docs/changelogs/v23.5.1.3174-stable.md @@ -596,4 +596,3 @@ sidebar_label: 2023 * Fix assertion from stress test [#50718](https://github.com/ClickHouse/ClickHouse/pull/50718) ([Kseniia Sumarokova](https://github.com/kssenii)). * Fix flaky unit test [#50719](https://github.com/ClickHouse/ClickHouse/pull/50719) ([Kseniia Sumarokova](https://github.com/kssenii)). * Show correct sharing state in system.query_cache [#50728](https://github.com/ClickHouse/ClickHouse/pull/50728) ([Robert Schulze](https://github.com/rschu1ze)). - diff --git a/docs/changelogs/v23.6.1.1524-stable.md b/docs/changelogs/v23.6.1.1524-stable.md index 6d295d61ef4..b91c5340789 100644 --- a/docs/changelogs/v23.6.1.1524-stable.md +++ b/docs/changelogs/v23.6.1.1524-stable.md @@ -298,4 +298,3 @@ sidebar_label: 2023 * Update version_date.tsv and changelogs after v23.4.5.22-stable [#51638](https://github.com/ClickHouse/ClickHouse/pull/51638) ([robot-clickhouse](https://github.com/robot-clickhouse)). * Update version_date.tsv and changelogs after v23.3.7.5-lts [#51639](https://github.com/ClickHouse/ClickHouse/pull/51639) ([robot-clickhouse](https://github.com/robot-clickhouse)). * Update parts.md [#51643](https://github.com/ClickHouse/ClickHouse/pull/51643) ([Ramazan Polat](https://github.com/ramazanpolat)). - diff --git a/docs/changelogs/v23.8.1.2992-lts.md b/docs/changelogs/v23.8.1.2992-lts.md index e3e0e4f0344..7c224b19350 100644 --- a/docs/changelogs/v23.8.1.2992-lts.md +++ b/docs/changelogs/v23.8.1.2992-lts.md @@ -588,4 +588,3 @@ sidebar_label: 2023 * tests: mark 02152_http_external_tables_memory_tracking as no-parallel [#54155](https://github.com/ClickHouse/ClickHouse/pull/54155) ([Azat Khuzhin](https://github.com/azat)). * The external logs have had colliding arguments [#54165](https://github.com/ClickHouse/ClickHouse/pull/54165) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). * Rename macro [#54169](https://github.com/ClickHouse/ClickHouse/pull/54169) ([Kseniia Sumarokova](https://github.com/kssenii)). - diff --git a/docs/changelogs/v23.8.10.43-lts.md b/docs/changelogs/v23.8.10.43-lts.md new file mode 100644 index 00000000000..0093467d129 --- /dev/null +++ b/docs/changelogs/v23.8.10.43-lts.md @@ -0,0 +1,39 @@ +--- +sidebar_position: 1 +sidebar_label: 2024 +--- + +# 2024 Changelog + +### ClickHouse release v23.8.10.43-lts (a278225bba9) FIXME as compared to v23.8.9.54-lts (192a1d231fa) + +#### Improvement +* Backported in [#58819](https://github.com/ClickHouse/ClickHouse/issues/58819): Add `SYSTEM JEMALLOC PURGE` for purging unused jemalloc pages, `SYSTEM JEMALLOC [ ENABLE | DISABLE | FLUSH ] PROFILE` for controlling jemalloc profile if the profiler is enabled. Add jemalloc-related 4LW command in Keeper: `jmst` for dumping jemalloc stats, `jmfp`, `jmep`, `jmdp` for controlling jemalloc profile if the profiler is enabled. [#58665](https://github.com/ClickHouse/ClickHouse/pull/58665) ([Antonio Andelic](https://github.com/antonio2368)). +* Backported in [#60286](https://github.com/ClickHouse/ClickHouse/issues/60286): Copy S3 file GCP fallback to buffer copy in case GCP returned `Internal Error` with `GATEWAY_TIMEOUT` HTTP error code. [#60164](https://github.com/ClickHouse/ClickHouse/pull/60164) ([Maksim Kita](https://github.com/kitaisreal)). + +#### Build/Testing/Packaging Improvement +* Backported in [#59879](https://github.com/ClickHouse/ClickHouse/issues/59879): If you want to run initdb scripts every time when ClickHouse container is starting you shoud initialize environment varible CLICKHOUSE_ALWAYS_RUN_INITDB_SCRIPTS. [#59808](https://github.com/ClickHouse/ClickHouse/pull/59808) ([Alexander Nikolaev](https://github.com/AlexNik)). + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Background merges correctly use temporary data storage in the cache [#57275](https://github.com/ClickHouse/ClickHouse/pull/57275) ([vdimir](https://github.com/vdimir)). +* MergeTree mutations reuse source part index granularity [#57352](https://github.com/ClickHouse/ClickHouse/pull/57352) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix double destroy call on exception throw in addBatchLookupTable8 [#58745](https://github.com/ClickHouse/ClickHouse/pull/58745) ([Raúl Marín](https://github.com/Algunenano)). +* Fix JSONExtract function for LowCardinality(Nullable) columns [#58808](https://github.com/ClickHouse/ClickHouse/pull/58808) ([vdimir](https://github.com/vdimir)). +* Fix: LIMIT BY and LIMIT in distributed query [#59153](https://github.com/ClickHouse/ClickHouse/pull/59153) ([Igor Nikonov](https://github.com/devcrafter)). +* Fix translate() with FixedString input [#59356](https://github.com/ClickHouse/ClickHouse/pull/59356) ([Raúl Marín](https://github.com/Algunenano)). +* Fix error "Read beyond last offset" for AsynchronousBoundedReadBuffer [#59630](https://github.com/ClickHouse/ClickHouse/pull/59630) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix query start time on non initial queries [#59662](https://github.com/ClickHouse/ClickHouse/pull/59662) ([Raúl Marín](https://github.com/Algunenano)). +* Fix leftPad / rightPad function with FixedString input [#59739](https://github.com/ClickHouse/ClickHouse/pull/59739) ([Raúl Marín](https://github.com/Algunenano)). +* rabbitmq: fix having neither acked nor nacked messages [#59775](https://github.com/ClickHouse/ClickHouse/pull/59775) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix cosineDistance crash with Nullable [#60150](https://github.com/ClickHouse/ClickHouse/pull/60150) ([Raúl Marín](https://github.com/Algunenano)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Fix rare race in external sort/aggregation with temporary data in cache [#58013](https://github.com/ClickHouse/ClickHouse/pull/58013) ([Anton Popov](https://github.com/CurtizJ)). +* Fix possible race in ManyAggregatedData dtor. [#58624](https://github.com/ClickHouse/ClickHouse/pull/58624) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix 02720_row_policy_column_with_dots [#59453](https://github.com/ClickHouse/ClickHouse/pull/59453) ([Duc Canh Le](https://github.com/canhld94)). +* Pin python dependencies in stateless tests [#59663](https://github.com/ClickHouse/ClickHouse/pull/59663) ([Raúl Marín](https://github.com/Algunenano)). +* Make ZooKeeper actually sequentialy consistent [#59735](https://github.com/ClickHouse/ClickHouse/pull/59735) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Remove broken test while we fix it [#60547](https://github.com/ClickHouse/ClickHouse/pull/60547) ([Raúl Marín](https://github.com/Algunenano)). + diff --git a/docs/changelogs/v23.9.1.1854-stable.md b/docs/changelogs/v23.9.1.1854-stable.md index aa27cd34478..bccd082bbaa 100644 --- a/docs/changelogs/v23.9.1.1854-stable.md +++ b/docs/changelogs/v23.9.1.1854-stable.md @@ -379,4 +379,3 @@ sidebar_label: 2023 * Fix typo in packager when ccache is used [#55104](https://github.com/ClickHouse/ClickHouse/pull/55104) ([Ilya Yatsishin](https://github.com/qoega)). * Reduce flakiness of 01455_opentelemetry_distributed [#55111](https://github.com/ClickHouse/ClickHouse/pull/55111) ([Michael Kolupaev](https://github.com/al13n321)). * Fix build [#55113](https://github.com/ClickHouse/ClickHouse/pull/55113) ([Alexey Milovidov](https://github.com/alexey-milovidov)). - diff --git a/docs/changelogs/v24.2.1.2248-stable.md b/docs/changelogs/v24.2.1.2248-stable.md new file mode 100644 index 00000000000..6113dd51ab1 --- /dev/null +++ b/docs/changelogs/v24.2.1.2248-stable.md @@ -0,0 +1,462 @@ +--- +sidebar_position: 1 +sidebar_label: 2024 +--- + +# 2024 Changelog + +### ClickHouse release v24.2.1.2248-stable (891689a4150) FIXME as compared to v24.1.1.2048-stable (5a024dfc093) + +#### Backward Incompatible Change +* Validate suspicious/experimental types in nested types. Previously we didn't validate such types (except JSON) in nested types like Array/Tuple/Map. [#59385](https://github.com/ClickHouse/ClickHouse/pull/59385) ([Kruglov Pavel](https://github.com/Avogar)). +* The sort clause `ORDER BY ALL` (introduced with v23.12) is replaced by `ORDER BY *`. The previous syntax was too error-prone for tables with a column `all`. [#59450](https://github.com/ClickHouse/ClickHouse/pull/59450) ([Robert Schulze](https://github.com/rschu1ze)). +* Rename the setting `extract_kvp_max_pairs_per_row` to `extract_key_value_pairs_max_pairs_per_row`. The bug (unnecessary abbreviation in the setting name) was introduced in https://github.com/ClickHouse/ClickHouse/pull/43606. Fix the documentation of this setting. [#59683](https://github.com/ClickHouse/ClickHouse/pull/59683) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Rename the setting extract_kvp_max_pairs_per_row to extract_key_value_pairs_max_pairs_per_row. The bug (unnecessary abbreviation in the setting name) was introduced in https://github.com/ClickHouse/ClickHouse/pull/43606. Fix the documentation of this setting. [#59960](https://github.com/ClickHouse/ClickHouse/pull/59960) ([jsc0218](https://github.com/jsc0218)). +* Add sanity check for number of threads and block sizes. [#60138](https://github.com/ClickHouse/ClickHouse/pull/60138) ([Raúl Marín](https://github.com/Algunenano)). + +#### New Feature +* Added maximum sequential login failures to the quota. [#54737](https://github.com/ClickHouse/ClickHouse/pull/54737) ([Alexey Gerasimchuck](https://github.com/Demilivor)). +* Added new syntax which allows to specify definer user in View/Materialized View. This allows to execute selects/inserts from views without explicit grants for underlying tables. [#54901](https://github.com/ClickHouse/ClickHouse/pull/54901) ([pufit](https://github.com/pufit)). +* Backup & Restore support for AzureBlobStorage resolves [#50747](https://github.com/ClickHouse/ClickHouse/issues/50747). [#56988](https://github.com/ClickHouse/ClickHouse/pull/56988) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Implemented automatic conversion of merge tree tables of different kinds to replicated engine. Create empty `convert_to_replicated` file in table's data directory (`/clickhouse/store/xxx/xxxyyyyy-yyyy-yyyy-yyyy-yyyyyyyyyyyy/`) and that table will be converted automatically on next server start. [#57798](https://github.com/ClickHouse/ClickHouse/pull/57798) ([Kirill](https://github.com/kirillgarbar)). +* Added table function `mergeTreeIndex`. It represents the contents of index and marks files of `MergeTree` tables. It can be used for introspection. Syntax: `mergeTreeIndex(database, table, [with_marks = true])` where `database.table` is an existing table with `MergeTree` engine. [#58140](https://github.com/ClickHouse/ClickHouse/pull/58140) ([Anton Popov](https://github.com/CurtizJ)). +* Added function `seriesOutliersTukey` to detect outliers in series data using Tukey's fences algorithm. [#58632](https://github.com/ClickHouse/ClickHouse/pull/58632) ([Bhavna Jindal](https://github.com/bhavnajindal)). +* The user can now specify the template string directly in the query using `format_schema_rows_template` as an alternative to `format_template_row`. Closes [#31363](https://github.com/ClickHouse/ClickHouse/issues/31363). [#59088](https://github.com/ClickHouse/ClickHouse/pull/59088) ([Shaun Struwig](https://github.com/Blargian)). +* Try to detect file format automatically during schema inference if it's unknown in `file/s3/hdfs/url/azureBlobStorage` engines. Closes [#50576](https://github.com/ClickHouse/ClickHouse/issues/50576). [#59092](https://github.com/ClickHouse/ClickHouse/pull/59092) ([Kruglov Pavel](https://github.com/Avogar)). +* Add function variantType that returns Enum with variant type name for each row. [#59398](https://github.com/ClickHouse/ClickHouse/pull/59398) ([Kruglov Pavel](https://github.com/Avogar)). +* Added query `ALTER TABLE table FORGET PARTITION partition` that removes ZooKeeper nodes, related to an empty partition. [#59507](https://github.com/ClickHouse/ClickHouse/pull/59507) ([Sergei Trifonov](https://github.com/serxa)). +* Support JWT credentials file for the NATS table engine. [#59543](https://github.com/ClickHouse/ClickHouse/pull/59543) ([Nickolaj Jepsen](https://github.com/nickolaj-jepsen)). +* Provides new aggregate function ‘groupArrayIntersect’. Follows up: [#49862](https://github.com/ClickHouse/ClickHouse/issues/49862). [#59598](https://github.com/ClickHouse/ClickHouse/pull/59598) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Implemented system.dns_cache table, which can be useful for debugging DNS issues. [#59856](https://github.com/ClickHouse/ClickHouse/pull/59856) ([Kirill Nikiforov](https://github.com/allmazz)). +* The codec `LZ4HC` will accept a new level 2, which is faster than the previous minimum level 3, at the expense of less compression. In previous versions, `LZ4HC(2)` and less was the same as `LZ4HC(3)`. Author: [Cyan4973](https://github.com/Cyan4973). [#60090](https://github.com/ClickHouse/ClickHouse/pull/60090) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Implemented system.dns_cache table, which can be useful for debugging DNS issues. New server setting dns_cache_max_size. [#60257](https://github.com/ClickHouse/ClickHouse/pull/60257) ([Kirill Nikiforov](https://github.com/allmazz)). +* Support single-argument version for the merge table function, as `merge(['db_name', ] 'tables_regexp')`. [#60372](https://github.com/ClickHouse/ClickHouse/pull/60372) ([豪肥肥](https://github.com/HowePa)). +* Added new syntax which allows to specify definer user in View/Materialized View. This allows to execute selects/inserts from views without explicit grants for underlying tables. [#60439](https://github.com/ClickHouse/ClickHouse/pull/60439) ([pufit](https://github.com/pufit)). + +#### Performance Improvement +* Eliminates min/max/any/anyLast aggregators of GROUP BY keys in SELECT section. [#52230](https://github.com/ClickHouse/ClickHouse/pull/52230) ([JackyWoo](https://github.com/JackyWoo)). +* Vectorized distance functions used in vector search. [#58866](https://github.com/ClickHouse/ClickHouse/pull/58866) ([Robert Schulze](https://github.com/rschu1ze)). +* Continue optimizing branch miss of if function when result type is float*/decimal*/int* , follow up of https://github.com/ClickHouse/ClickHouse/pull/57885. [#59148](https://github.com/ClickHouse/ClickHouse/pull/59148) ([æŽæ‰¬](https://github.com/taiyang-li)). +* Optimize if function when input type is map, speed up by ~10x. [#59413](https://github.com/ClickHouse/ClickHouse/pull/59413) ([æŽæ‰¬](https://github.com/taiyang-li)). +* Improve performance of Int8 type by implementing strict aliasing. [#59485](https://github.com/ClickHouse/ClickHouse/pull/59485) ([Raúl Marín](https://github.com/Algunenano)). +* Optimize performance of sum/avg conditionally for bigint and big decimal types by reducing branch miss. [#59504](https://github.com/ClickHouse/ClickHouse/pull/59504) ([æŽæ‰¬](https://github.com/taiyang-li)). +* Improve performance of SELECTs with active mutations. [#59531](https://github.com/ClickHouse/ClickHouse/pull/59531) ([Azat Khuzhin](https://github.com/azat)). +* Optimized function `isNotNull` with AVX2. [#59621](https://github.com/ClickHouse/ClickHouse/pull/59621) ([æŽæ‰¬](https://github.com/taiyang-li)). +* Reuse the result of `FunctionFactory::instance().get("isNotNull", context)` and `FunctionFactory::instance().get("assumeNotNull", context)`. Make sure it is called once during the lifetime of `FunctionCoalesce`. [#59627](https://github.com/ClickHouse/ClickHouse/pull/59627) ([æŽæ‰¬](https://github.com/taiyang-li)). +* Improve ASOF JOIN performance for sorted or almost sorted data. [#59731](https://github.com/ClickHouse/ClickHouse/pull/59731) ([Maksim Kita](https://github.com/kitaisreal)). +* Primary key will use less amount of memory. [#60049](https://github.com/ClickHouse/ClickHouse/pull/60049) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Improve memory usage for primary key and some other operations. [#60050](https://github.com/ClickHouse/ClickHouse/pull/60050) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* The tables' primary keys will be loaded in memory lazily on first access. This is controlled by the new MergeTree setting `primary_key_lazy_load`, which is on by default. This provides several advantages: - it will not be loaded for tables that are not used; - if there is not enough memory, an exception will be thrown on first use instead of at server startup. This provides several disadvantages: - the latency of loading the primary key will be paid on the first query rather than before accepting connections; this theoretically may introduce a thundering-herd problem. This closes [#11188](https://github.com/ClickHouse/ClickHouse/issues/11188). [#60093](https://github.com/ClickHouse/ClickHouse/pull/60093) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Vectorized function `dotProduct` which is useful for vector search. [#60202](https://github.com/ClickHouse/ClickHouse/pull/60202) ([Robert Schulze](https://github.com/rschu1ze)). +* As is shown in Fig 1, the replacement of "&&" with "&" could generate the SIMD code. ![image](https://github.com/ClickHouse/ClickHouse/assets/26588299/a5a72ac4-6dc6-4d52-835a-4f512e55f0b9) Fig 1. Code compiled from '&&' (left) and '&' (right). [#60498](https://github.com/ClickHouse/ClickHouse/pull/60498) ([Zhiguo Zhou](https://github.com/ZhiguoZh)). + +#### Improvement +* Added support for parameterized view with analyzer to not analyze create parameterized view. Refactor existing parameterized view logic to not analyze create parameterized view. [#54211](https://github.com/ClickHouse/ClickHouse/pull/54211) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Do not consider data part as broken if projection is broken. Closes [#56593](https://github.com/ClickHouse/ClickHouse/issues/56593). [#56864](https://github.com/ClickHouse/ClickHouse/pull/56864) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Add short-circuit ability for `dictGetOrDefault` function. Closes [#52098](https://github.com/ClickHouse/ClickHouse/issues/52098). [#57767](https://github.com/ClickHouse/ClickHouse/pull/57767) ([jsc0218](https://github.com/jsc0218)). +* Running `ALTER COLUMN MATERIALIZE` on a column with `DEFAULT` or `MATERIALIZED` expression now writes the correct values: The default value for existing parts with default value or the non-default value for existing parts with non-default value. Previously, the default value was written for all existing parts. [#58023](https://github.com/ClickHouse/ClickHouse/pull/58023) ([Duc Canh Le](https://github.com/canhld94)). +* Enabled a backoff logic (e.g. exponential). Will provide an ability for reduced CPU usage, memory usage and log file sizes. [#58036](https://github.com/ClickHouse/ClickHouse/pull/58036) ([MikhailBurdukov](https://github.com/MikhailBurdukov)). +* Add improvement to count InitialQuery. [#58195](https://github.com/ClickHouse/ClickHouse/pull/58195) ([Unalian](https://github.com/Unalian)). +* Support negative positional arguments. Closes [#57736](https://github.com/ClickHouse/ClickHouse/issues/57736). [#58292](https://github.com/ClickHouse/ClickHouse/pull/58292) ([flynn](https://github.com/ucasfl)). +* Implement auto-adjustment for asynchronous insert timeouts. The following settings are introduced: async_insert_poll_timeout_ms, async_insert_use_adaptive_busy_timeout, async_insert_busy_timeout_min_ms, async_insert_busy_timeout_max_ms, async_insert_busy_timeout_increase_rate, async_insert_busy_timeout_decrease_rate. [#58486](https://github.com/ClickHouse/ClickHouse/pull/58486) ([Julia Kartseva](https://github.com/jkartseva)). +* Allow to define `volume_priority` in `storage_configuration`. [#58533](https://github.com/ClickHouse/ClickHouse/pull/58533) ([Andrey Zvonov](https://github.com/zvonand)). +* Add support for Date32 type in T64 codec. [#58738](https://github.com/ClickHouse/ClickHouse/pull/58738) ([Hongbin Ma](https://github.com/binmahone)). +* Support `LEFT JOIN`, `ALL INNER JOIN`, and simple subqueries for parallel replicas (only with analyzer). New setting `parallel_replicas_prefer_local_join` chooses local `JOIN` execution (by default) vs `GLOBAL JOIN`. All tables should exist on every replica from `cluster_for_parallel_replicas`. New settings `min_external_table_block_size_rows` and `min_external_table_block_size_bytes` are used to squash small blocks that are sent for temporary tables (only with analyzer). [#58916](https://github.com/ClickHouse/ClickHouse/pull/58916) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Allow trailing commas in types with several items. [#59119](https://github.com/ClickHouse/ClickHouse/pull/59119) ([Aleksandr Musorin](https://github.com/AVMusorin)). +* Allow parallel and distributed processing for `S3Queue` table engine. For distributed processing use setting `s3queue_total_shards_num` (by default `1`). Setting `s3queue_processing_threads_num` previously was not allowed for Ordered processing mode, now it is allowed. Warning: settings `s3queue_processing_threads_num`(processing threads per each shard) and `s3queue_total_shards_num` for ordered mode change how metadata is stored (make the number of `max_processed_file` nodes equal to `s3queue_processing_threads_num * s3queue_total_shards_num`), so they must be the same for all shards and cannot be changed once at least one shard is created. [#59167](https://github.com/ClickHouse/ClickHouse/pull/59167) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Allow concurrent table creation in `DatabaseReplicated` during `recoverLostReplica`. [#59277](https://github.com/ClickHouse/ClickHouse/pull/59277) ([Konstantin Bogdanov](https://github.com/thevar1able)). +* Settings for the Distributed table engine can now be specified in the server configuration file (similar to MergeTree settings), e.g. ``` false ```. [#59291](https://github.com/ClickHouse/ClickHouse/pull/59291) ([Azat Khuzhin](https://github.com/azat)). +* Use MergeTree as a default table engine. It makes the usability much better, and closer to ClickHouse Cloud. [#59316](https://github.com/ClickHouse/ClickHouse/pull/59316) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Retry disconnects and expired sessions when reading `system.zookeeper`. This is helpful when reading many rows from `system.zookeeper` table especially in the presence of fault-injected disconnects. [#59388](https://github.com/ClickHouse/ClickHouse/pull/59388) ([Alexander Gololobov](https://github.com/davenger)). +* Do not interpret numbers with leading zeroes as octals when `input_format_values_interpret_expressions=0`. [#59403](https://github.com/ClickHouse/ClickHouse/pull/59403) ([Joanna Hulboj](https://github.com/jh0x)). +* At startup and whenever config files are changed, ClickHouse updates the hard memory limits of its total memory tracker. These limits are computed based on various server settings and cgroups limits (on Linux). Previously, setting `/sys/fs/cgroup/memory.max` (for cgroups v2) was hard-coded. As a result, cgroup v2 memory limits configured for nested groups (hierarchies), e.g. `/sys/fs/cgroup/my/nested/group/memory.max` were ignored. This is now fixed. The behavior of v1 memory limits remains unchanged. [#59435](https://github.com/ClickHouse/ClickHouse/pull/59435) ([Robert Schulze](https://github.com/rschu1ze)). +* New profile events added to observe the time spent on calculating PK/projections/secondary indices during `INSERT`-s. [#59436](https://github.com/ClickHouse/ClickHouse/pull/59436) ([Nikita Taranov](https://github.com/nickitat)). +* Allow to define a starting point for S3Queue with Ordered mode at creation using setting `s3queue_last_processed_path`. [#59446](https://github.com/ClickHouse/ClickHouse/pull/59446) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Keeper improvement: cache only a certain amount of logs in-memory controlled by `latest_logs_cache_size_threshold` and `commit_logs_cache_size_threshold`. [#59460](https://github.com/ClickHouse/ClickHouse/pull/59460) ([Antonio Andelic](https://github.com/antonio2368)). +* Made comments for system tables also available in `system.tables` in `clickhouse-local`. [#59493](https://github.com/ClickHouse/ClickHouse/pull/59493) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Don't infer floats in exponential notation by default. Add a setting `input_format_try_infer_exponent_floats` that will restore previous behaviour (disabled by default). Closes [#59476](https://github.com/ClickHouse/ClickHouse/issues/59476). [#59500](https://github.com/ClickHouse/ClickHouse/pull/59500) ([Kruglov Pavel](https://github.com/Avogar)). +* Allow alter operations to be surrounded by parenthesis. The emission of parentheses can be controlled by the `format_alter_operations_with_parentheses` config. By default in formatted queries the parentheses are emitted as we store the formatted alter operations in some places as metadata (e.g.: mutations). The new syntax clarifies some of the queries where alter operations end in a list. E.g.: `ALTER TABLE x MODIFY TTL date GROUP BY a, b, DROP COLUMN c` cannot be parsed properly with the old syntax. In the new syntax the query `ALTER TABLE x (MODIFY TTL date GROUP BY a, b), (DROP COLUMN c)` is obvious. Older versions are not able to read the new syntax, therefore using the new syntax might cause issues if newer and older version of ClickHouse are mixed in a single cluster. [#59532](https://github.com/ClickHouse/ClickHouse/pull/59532) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* The previous default value equals to 1 MB for `async_insert_max_data_size` appeared to be too small. The new one would be 10 MiB. [#59536](https://github.com/ClickHouse/ClickHouse/pull/59536) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Previously the whole result was accumulated in memory and returned as one big chunk. This change should help to reduce memory consumption when reading many rows from `system.zookeeper`, allow showing intermediate progress (how many rows have been read so far) and avoid hitting connection timeout when result set is big. [#59545](https://github.com/ClickHouse/ClickHouse/pull/59545) ([Alexander Gololobov](https://github.com/davenger)). +* Now dashboard understands both compressed and uncompressed state of URL's #hash (backward compatibility). Continuation of [#59124](https://github.com/ClickHouse/ClickHouse/issues/59124) . [#59548](https://github.com/ClickHouse/ClickHouse/pull/59548) ([Amos Bird](https://github.com/amosbird)). +* Bumped Intel QPL (used by codec `DEFLATE_QPL`) from v1.3.1 to v1.4.0 . Also fixed a bug for polling timeout mechanism, as we observed in same cases timeout won't work properly, if timeout happen, IAA and CPU may process buffer concurrently. So far, we'd better make sure IAA codec status is not QPL_STS_BEING_PROCESSED, then fallback to SW codec. [#59551](https://github.com/ClickHouse/ClickHouse/pull/59551) ([jasperzhu](https://github.com/jinjunzh)). +* Keeper improvement: reduce size of data node even more. [#59592](https://github.com/ClickHouse/ClickHouse/pull/59592) ([Antonio Andelic](https://github.com/antonio2368)). +* Do not show a warning about the server version in ClickHouse Cloud because ClickHouse Cloud handles seamless upgrades automatically. [#59657](https://github.com/ClickHouse/ClickHouse/pull/59657) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* After self-extraction temporary binary is moved instead copying. [#59661](https://github.com/ClickHouse/ClickHouse/pull/59661) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Fix stack unwinding on MacOS. This closes [#53653](https://github.com/ClickHouse/ClickHouse/issues/53653). [#59690](https://github.com/ClickHouse/ClickHouse/pull/59690) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Check for stack overflow in parsers even if the user misconfigured the `max_parser_depth` setting to a very high value. This closes [#59622](https://github.com/ClickHouse/ClickHouse/issues/59622). [#59697](https://github.com/ClickHouse/ClickHouse/pull/59697) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Perform synchronous inserts if dependent MV deduplication is enabled through deduplicate_blocks_in_dependent_materialized_views=1. [#59699](https://github.com/ClickHouse/ClickHouse/pull/59699) ([Julia Kartseva](https://github.com/jkartseva)). +* Added settings `split_parts_ranges_into_intersecting_and_non_intersecting_final` and `split_intersecting_parts_ranges_into_layers_final`. This settings are needed to disable optimizations for queries with `FINAL` and needed for debug only. [#59705](https://github.com/ClickHouse/ClickHouse/pull/59705) ([Maksim Kita](https://github.com/kitaisreal)). +* Unify xml and sql created named collection behaviour in kafka storage. [#59710](https://github.com/ClickHouse/ClickHouse/pull/59710) ([Pervakov Grigorii](https://github.com/GrigoryPervakov)). +* In case when `merge_max_block_size_bytes` is small enough and tables contain wide rows (strings or tuples) background merges may stuck in an endless loop. This behaviour is fixed. Follow-up for https://github.com/ClickHouse/ClickHouse/pull/59340. [#59812](https://github.com/ClickHouse/ClickHouse/pull/59812) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Allow uuid in replica_path if CREATE TABLE explicitly has it. [#59908](https://github.com/ClickHouse/ClickHouse/pull/59908) ([Azat Khuzhin](https://github.com/azat)). +* Add column `metadata_version` of ReplicatedMergeTree table in `system.tables` system table. [#59942](https://github.com/ClickHouse/ClickHouse/pull/59942) ([Maksim Kita](https://github.com/kitaisreal)). +* Keeper improvement: send only Keeper related metrics/events for Prometheus. [#59945](https://github.com/ClickHouse/ClickHouse/pull/59945) ([Antonio Andelic](https://github.com/antonio2368)). +* The dashboard will display metrics across different ClickHouse versions even if the structure of system tables has changed after the upgrade. [#59967](https://github.com/ClickHouse/ClickHouse/pull/59967) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Allow loading AZ info from a file. [#59976](https://github.com/ClickHouse/ClickHouse/pull/59976) ([Konstantin Bogdanov](https://github.com/thevar1able)). +* Keeper improvement: add retries on failures for Disk related operations. [#59980](https://github.com/ClickHouse/ClickHouse/pull/59980) ([Antonio Andelic](https://github.com/antonio2368)). +* Add new config setting `backups.remove_backup_files_after_failure`: ``` true ```. [#60002](https://github.com/ClickHouse/ClickHouse/pull/60002) ([Vitaly Baranov](https://github.com/vitlibar)). +* Use multiple threads while reading the metadata of tables from a backup while executing the RESTORE command. [#60040](https://github.com/ClickHouse/ClickHouse/pull/60040) ([Vitaly Baranov](https://github.com/vitlibar)). +* Now if `StorageBuffer` has more than 1 shard (`num_layers` > 1) background flush will happen simultaneously for all shards in multiple threads. [#60111](https://github.com/ClickHouse/ClickHouse/pull/60111) ([alesapin](https://github.com/alesapin)). +* Support specifying users for specific S3 settings in config using `user` key. [#60144](https://github.com/ClickHouse/ClickHouse/pull/60144) ([Antonio Andelic](https://github.com/antonio2368)). +* Copy S3 file GCP fallback to buffer copy in case GCP returned `Internal Error` with `GATEWAY_TIMEOUT` HTTP error code. [#60164](https://github.com/ClickHouse/ClickHouse/pull/60164) ([Maksim Kita](https://github.com/kitaisreal)). +* Implement comparison operator for Variant values and proper Field inserting into Variant column. Don't allow creating `Variant` type with similar variant types by default (allow uder a setting `allow_suspicious_variant_types`) Closes [#59996](https://github.com/ClickHouse/ClickHouse/issues/59996). Closes [#59850](https://github.com/ClickHouse/ClickHouse/issues/59850). [#60198](https://github.com/ClickHouse/ClickHouse/pull/60198) ([Kruglov Pavel](https://github.com/Avogar)). +* Short circuit execution for `ULIDStringToDateTime`. [#60211](https://github.com/ClickHouse/ClickHouse/pull/60211) ([Juan Madurga](https://github.com/jlmadurga)). +* Added `query_id` column for tables `system.backups` and `system.backup_log`. Added error stacktrace to `error` column. [#60220](https://github.com/ClickHouse/ClickHouse/pull/60220) ([Maksim Kita](https://github.com/kitaisreal)). +* Connections through the MySQL port now automatically run with setting `prefer_column_name_to_alias = 1` to support QuickSight out-of-the-box. Also, settings `mysql_map_string_to_text_in_show_columns` and `mysql_map_fixed_string_to_text_in_show_columns` are now enabled by default, affecting also only MySQL connections. This increases compatibility with more BI tools. [#60365](https://github.com/ClickHouse/ClickHouse/pull/60365) ([Robert Schulze](https://github.com/rschu1ze)). +* When output format is Pretty format and a block consists of a single numeric value which exceeds one million, A readable number will be printed on table right. e.g. ``` ┌──────count()─┠│ 233765663884 │ -- 233.77 billion └──────────────┘ ```. [#60379](https://github.com/ClickHouse/ClickHouse/pull/60379) ([rogeryk](https://github.com/rogeryk)). +* Fix a race condition in JavaScript code leading to duplicate charts on top of each other. [#60392](https://github.com/ClickHouse/ClickHouse/pull/60392) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Check for stack overflow in parsers even if the user misconfigured the `max_parser_depth` setting to a very high value. This closes [#59622](https://github.com/ClickHouse/ClickHouse/issues/59622). [#60434](https://github.com/ClickHouse/ClickHouse/pull/60434) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### Build/Testing/Packaging Improvement +* Added builds and tests with coverage collection with introspection. Continuation of [#56102](https://github.com/ClickHouse/ClickHouse/issues/56102). [#58792](https://github.com/ClickHouse/ClickHouse/pull/58792) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Include `pytest-reportlog` in integration test CI runner Dockerfile to enable JSON test reports. [#58926](https://github.com/ClickHouse/ClickHouse/pull/58926) ([MyroTk](https://github.com/MyroTk)). +* Update the rust toolchain in `corrosion-cmake` when the CMake cross-compilation toolchain variable is set. [#59309](https://github.com/ClickHouse/ClickHouse/pull/59309) ([Aris Tritas](https://github.com/aris-aiven)). +* Add some fuzzing to ASTLiterals. [#59383](https://github.com/ClickHouse/ClickHouse/pull/59383) ([Raúl Marín](https://github.com/Algunenano)). +* If you want to run initdb scripts every time when ClickHouse container is starting you shoud initialize environment varible CLICKHOUSE_ALWAYS_RUN_INITDB_SCRIPTS. [#59808](https://github.com/ClickHouse/ClickHouse/pull/59808) ([Alexander Nikolaev](https://github.com/AlexNik)). +* Remove ability to disable generic clickhouse components (like server/client/...), but keep some that requires extra libraries (like ODBC or keeper). [#59857](https://github.com/ClickHouse/ClickHouse/pull/59857) ([Azat Khuzhin](https://github.com/azat)). +* Query fuzzer will fuzz SETTINGS inside queries. [#60087](https://github.com/ClickHouse/ClickHouse/pull/60087) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add test that validates projections still work after attach partition. [#60415](https://github.com/ClickHouse/ClickHouse/pull/60415) ([Arthur Passos](https://github.com/arthurpassos)). +* Add test that validates attach partition fails if structure differs because of materialized column. [#60418](https://github.com/ClickHouse/ClickHouse/pull/60418) ([Arthur Passos](https://github.com/arthurpassos)). +* Add support for building ClickHouse with clang-19 (master). [#60448](https://github.com/ClickHouse/ClickHouse/pull/60448) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Speedup check-whitespaces check. [#60496](https://github.com/ClickHouse/ClickHouse/pull/60496) ([Raúl Marín](https://github.com/Algunenano)). + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Non ready set in TTL WHERE. [#57430](https://github.com/ClickHouse/ClickHouse/pull/57430) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix quantilesGK bug [#58216](https://github.com/ClickHouse/ClickHouse/pull/58216) ([æŽæ‰¬](https://github.com/taiyang-li)). +* Disable parallel replicas JOIN with CTE (not analyzer) [#59239](https://github.com/ClickHouse/ClickHouse/pull/59239) ([Raúl Marín](https://github.com/Algunenano)). +* Fix bug with `intDiv` for decimal arguments [#59243](https://github.com/ClickHouse/ClickHouse/pull/59243) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Fix translate() with FixedString input [#59356](https://github.com/ClickHouse/ClickHouse/pull/59356) ([Raúl Marín](https://github.com/Algunenano)). +* Fix digest calculation in Keeper [#59439](https://github.com/ClickHouse/ClickHouse/pull/59439) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix stacktraces for binaries without debug symbols [#59444](https://github.com/ClickHouse/ClickHouse/pull/59444) ([Azat Khuzhin](https://github.com/azat)). +* Fix `ASTAlterCommand::formatImpl` in case of column specific settings… [#59445](https://github.com/ClickHouse/ClickHouse/pull/59445) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Fix `SELECT * FROM [...] ORDER BY ALL` with Analyzer [#59462](https://github.com/ClickHouse/ClickHouse/pull/59462) ([zhongyuankai](https://github.com/zhongyuankai)). +* Fix possible uncaught exception during distributed query cancellation [#59487](https://github.com/ClickHouse/ClickHouse/pull/59487) ([Azat Khuzhin](https://github.com/azat)). +* Make MAX use the same rules as permutation for complex types [#59498](https://github.com/ClickHouse/ClickHouse/pull/59498) ([Raúl Marín](https://github.com/Algunenano)). +* Fix corner case when passing `update_insert_deduplication_token_in_dependent_materialized_views` [#59544](https://github.com/ClickHouse/ClickHouse/pull/59544) ([Jordi Villar](https://github.com/jrdi)). +* Fix incorrect result of arrayElement / map[] on empty value [#59594](https://github.com/ClickHouse/ClickHouse/pull/59594) ([Raúl Marín](https://github.com/Algunenano)). +* Fix crash in topK when merging empty states [#59603](https://github.com/ClickHouse/ClickHouse/pull/59603) ([Raúl Marín](https://github.com/Algunenano)). +* Fix distributed table with a constant sharding key [#59606](https://github.com/ClickHouse/ClickHouse/pull/59606) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix_kql_issue_found_by_wingfuzz [#59626](https://github.com/ClickHouse/ClickHouse/pull/59626) ([Yong Wang](https://github.com/kashwy)). +* Fix error "Read beyond last offset" for AsynchronousBoundedReadBuffer [#59630](https://github.com/ClickHouse/ClickHouse/pull/59630) ([Vitaly Baranov](https://github.com/vitlibar)). +* Maintain function alias in RewriteSumFunctionWithSumAndCountVisitor [#59658](https://github.com/ClickHouse/ClickHouse/pull/59658) ([Raúl Marín](https://github.com/Algunenano)). +* Fix query start time on non initial queries [#59662](https://github.com/ClickHouse/ClickHouse/pull/59662) ([Raúl Marín](https://github.com/Algunenano)). +* Validate types of arguments for `minmax` skipping index [#59733](https://github.com/ClickHouse/ClickHouse/pull/59733) ([Anton Popov](https://github.com/CurtizJ)). +* Fix leftPad / rightPad function with FixedString input [#59739](https://github.com/ClickHouse/ClickHouse/pull/59739) ([Raúl Marín](https://github.com/Algunenano)). +* Fix AST fuzzer issue in function `countMatches` [#59752](https://github.com/ClickHouse/ClickHouse/pull/59752) ([Robert Schulze](https://github.com/rschu1ze)). +* rabbitmq: fix having neither acked nor nacked messages [#59775](https://github.com/ClickHouse/ClickHouse/pull/59775) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix StorageURL doing some of the query execution in single thread [#59833](https://github.com/ClickHouse/ClickHouse/pull/59833) ([Michael Kolupaev](https://github.com/al13n321)). +* s3queue: fix uninitialized value [#59897](https://github.com/ClickHouse/ClickHouse/pull/59897) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix parsing of partition expressions surrounded by parens [#59901](https://github.com/ClickHouse/ClickHouse/pull/59901) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Fix crash in JSONColumnsWithMetadata format over http [#59925](https://github.com/ClickHouse/ClickHouse/pull/59925) ([Kruglov Pavel](https://github.com/Avogar)). +* Do not rewrite sum() to count() if return value differs in analyzer [#59926](https://github.com/ClickHouse/ClickHouse/pull/59926) ([Azat Khuzhin](https://github.com/azat)). +* UniqExactSet read crash fix [#59928](https://github.com/ClickHouse/ClickHouse/pull/59928) ([Maksim Kita](https://github.com/kitaisreal)). +* ReplicatedMergeTree invalid metadata_version fix [#59946](https://github.com/ClickHouse/ClickHouse/pull/59946) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix data race in `StorageDistributed` [#59987](https://github.com/ClickHouse/ClickHouse/pull/59987) ([Nikita Taranov](https://github.com/nickitat)). +* Run init scripts when option is enabled rather than disabled [#59991](https://github.com/ClickHouse/ClickHouse/pull/59991) ([jktng](https://github.com/jktng)). +* Fix scale conversion for DateTime64 [#60004](https://github.com/ClickHouse/ClickHouse/pull/60004) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Fix INSERT into SQLite with single quote (by escaping single quotes with a quote instead of backslash) [#60015](https://github.com/ClickHouse/ClickHouse/pull/60015) ([Azat Khuzhin](https://github.com/azat)). +* Fix several logical errors in arrayFold [#60022](https://github.com/ClickHouse/ClickHouse/pull/60022) ([Raúl Marín](https://github.com/Algunenano)). +* Fix optimize_uniq_to_count removing the column alias [#60026](https://github.com/ClickHouse/ClickHouse/pull/60026) ([Raúl Marín](https://github.com/Algunenano)). +* Fix possible exception from s3queue table on drop [#60036](https://github.com/ClickHouse/ClickHouse/pull/60036) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix formatting of NOT with single literals [#60042](https://github.com/ClickHouse/ClickHouse/pull/60042) ([Raúl Marín](https://github.com/Algunenano)). +* Use max_query_size from context in DDLLogEntry instead of hardcoded 4096 [#60083](https://github.com/ClickHouse/ClickHouse/pull/60083) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix inconsistent formatting of queries [#60095](https://github.com/ClickHouse/ClickHouse/pull/60095) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix inconsistent formatting of explain in subqueries [#60102](https://github.com/ClickHouse/ClickHouse/pull/60102) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix cosineDistance crash with Nullable [#60150](https://github.com/ClickHouse/ClickHouse/pull/60150) ([Raúl Marín](https://github.com/Algunenano)). +* Allow casting of bools in string representation to to true bools [#60160](https://github.com/ClickHouse/ClickHouse/pull/60160) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix system.s3queue_log [#60166](https://github.com/ClickHouse/ClickHouse/pull/60166) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix arrayReduce with nullable aggregate function name [#60188](https://github.com/ClickHouse/ClickHouse/pull/60188) ([Raúl Marín](https://github.com/Algunenano)). +* Fix actions execution during preliminary filtering (PK, partition pruning) [#60196](https://github.com/ClickHouse/ClickHouse/pull/60196) ([Azat Khuzhin](https://github.com/azat)). +* Hide sensitive info for s3queue [#60233](https://github.com/ClickHouse/ClickHouse/pull/60233) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Revert "Replace `ORDER BY ALL` by `ORDER BY *`" [#60248](https://github.com/ClickHouse/ClickHouse/pull/60248) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix http exception codes. [#60252](https://github.com/ClickHouse/ClickHouse/pull/60252) ([Austin Kothig](https://github.com/kothiga)). +* s3queue: fix bug (also fixes flaky test_storage_s3_queue/test.py::test_shards_distributed) [#60282](https://github.com/ClickHouse/ClickHouse/pull/60282) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix use-of-uninitialized-value and invalid result in hashing functions with IPv6 [#60359](https://github.com/ClickHouse/ClickHouse/pull/60359) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix OptimizeDateOrDateTimeConverterWithPreimageVisitor with null arguments [#60453](https://github.com/ClickHouse/ClickHouse/pull/60453) ([Raúl Marín](https://github.com/Algunenano)). +* Merging [#59674](https://github.com/ClickHouse/ClickHouse/issues/59674). [#60470](https://github.com/ClickHouse/ClickHouse/pull/60470) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Correctly check keys in s3Cluster [#60477](https://github.com/ClickHouse/ClickHouse/pull/60477) ([Antonio Andelic](https://github.com/antonio2368)). + +#### CI Fix or Improvement (changelog entry is not required) + +* ... [#60457](https://github.com/ClickHouse/ClickHouse/pull/60457) ([Max K.](https://github.com/maxknv)). +* ... [#60512](https://github.com/ClickHouse/ClickHouse/pull/60512) ([Max K.](https://github.com/maxknv)). +* Arm and amd docker build jobs use similar job names and thus overwrite job reports - aarch64 and amd64 suffixes added to fix this. [#60554](https://github.com/ClickHouse/ClickHouse/pull/60554) ([Max K.](https://github.com/maxknv)). +* ... [#60557](https://github.com/ClickHouse/ClickHouse/pull/60557) ([Max K.](https://github.com/maxknv)). +* BUG: build job can report success cache record on failed build Add a check relying on job report fail. [#60587](https://github.com/ClickHouse/ClickHouse/pull/60587) ([Max K.](https://github.com/maxknv)). + +#### NO CL ENTRY + +* NO CL ENTRY: 'Revert "Revert "Add new aggregation function groupArraySorted()""'. [#59003](https://github.com/ClickHouse/ClickHouse/pull/59003) ([Maksim Kita](https://github.com/kitaisreal)). +* NO CL ENTRY: 'Revert "Update libxml2 version to address some bogus security issues"'. [#59479](https://github.com/ClickHouse/ClickHouse/pull/59479) ([Raúl Marín](https://github.com/Algunenano)). +* NO CL ENTRY: 'Revert "Poco Logger small refactoring"'. [#59509](https://github.com/ClickHouse/ClickHouse/pull/59509) ([Raúl Marín](https://github.com/Algunenano)). +* NO CL ENTRY: 'Revert "Revert "Poco Logger small refactoring""'. [#59564](https://github.com/ClickHouse/ClickHouse/pull/59564) ([Maksim Kita](https://github.com/kitaisreal)). +* NO CL ENTRY: 'Revert "MergeTree FINAL optimization diagnostics and settings"'. [#59702](https://github.com/ClickHouse/ClickHouse/pull/59702) ([Raúl Marín](https://github.com/Algunenano)). +* NO CL ENTRY: 'Revert "Use `MergeTree` as a default table engine"'. [#59711](https://github.com/ClickHouse/ClickHouse/pull/59711) ([Raúl Marín](https://github.com/Algunenano)). +* NO CL ENTRY: 'Revert "Rename a setting"'. [#59754](https://github.com/ClickHouse/ClickHouse/pull/59754) ([Raúl Marín](https://github.com/Algunenano)). +* NO CL ENTRY: 'Revert "Less error prone interface of read buffers"'. [#59911](https://github.com/ClickHouse/ClickHouse/pull/59911) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* NO CL ENTRY: 'Revert "Update version_date.tsv and changelogs after v24.1.4.19-stable"'. [#59973](https://github.com/ClickHouse/ClickHouse/pull/59973) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* NO CL ENTRY: 'Revert "ReplicatedMergeTree invalid metadata_version fix"'. [#60058](https://github.com/ClickHouse/ClickHouse/pull/60058) ([Raúl Marín](https://github.com/Algunenano)). +* NO CL ENTRY: 'Revert "Revert "ReplicatedMergeTree invalid metadata_version fix""'. [#60078](https://github.com/ClickHouse/ClickHouse/pull/60078) ([Maksim Kita](https://github.com/kitaisreal)). +* NO CL ENTRY: 'Revert "Implement system.dns_cache table"'. [#60085](https://github.com/ClickHouse/ClickHouse/pull/60085) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Revert "Restriction for the access key id for s3."'. [#60181](https://github.com/ClickHouse/ClickHouse/pull/60181) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Revert "Do not retry queries if container is down in integration tests"'. [#60215](https://github.com/ClickHouse/ClickHouse/pull/60215) ([Antonio Andelic](https://github.com/antonio2368)). +* NO CL ENTRY: 'Revert "Check stack size in Parser"'. [#60216](https://github.com/ClickHouse/ClickHouse/pull/60216) ([Antonio Andelic](https://github.com/antonio2368)). +* NO CL ENTRY: 'Revert "Support resource request canceling"'. [#60253](https://github.com/ClickHouse/ClickHouse/pull/60253) ([Raúl Marín](https://github.com/Algunenano)). +* NO CL ENTRY: 'Revert "Add definers for views"'. [#60350](https://github.com/ClickHouse/ClickHouse/pull/60350) ([Raúl Marín](https://github.com/Algunenano)). +* NO CL ENTRY: 'Update build-osx.md'. [#60380](https://github.com/ClickHouse/ClickHouse/pull/60380) ([rogeryk](https://github.com/rogeryk)). +* NO CL ENTRY: 'Revert "Fix: IAST::clone() for RENAME"'. [#60398](https://github.com/ClickHouse/ClickHouse/pull/60398) ([Antonio Andelic](https://github.com/antonio2368)). +* NO CL ENTRY: 'Revert "Add table function `mergeTreeIndex`"'. [#60428](https://github.com/ClickHouse/ClickHouse/pull/60428) ([Alexander Tokmakov](https://github.com/tavplubix)). +* NO CL ENTRY: 'Revert "Userspace page cache"'. [#60550](https://github.com/ClickHouse/ClickHouse/pull/60550) ([Alexander Tokmakov](https://github.com/tavplubix)). +* NO CL ENTRY: 'Revert "Analyzer: compute ALIAS columns right after reading"'. [#60570](https://github.com/ClickHouse/ClickHouse/pull/60570) ([Alexander Tokmakov](https://github.com/tavplubix)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Analyzer: support aliases and distributed JOINs in StorageMerge [#50894](https://github.com/ClickHouse/ClickHouse/pull/50894) ([Dmitry Novik](https://github.com/novikd)). +* Userspace page cache [#53770](https://github.com/ClickHouse/ClickHouse/pull/53770) ([Michael Kolupaev](https://github.com/al13n321)). +* Simplify optimize-push-to-prewhere from query plan [#58554](https://github.com/ClickHouse/ClickHouse/pull/58554) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Create ch/chc/chl symlinks by cmake as well (for develop mode) [#58609](https://github.com/ClickHouse/ClickHouse/pull/58609) ([Azat Khuzhin](https://github.com/azat)). +* CI: ci cache. step 1 [#58664](https://github.com/ClickHouse/ClickHouse/pull/58664) ([Max K.](https://github.com/maxknv)). +* Enable building JIT with UBSAN [#58952](https://github.com/ClickHouse/ClickHouse/pull/58952) ([Raúl Marín](https://github.com/Algunenano)). +* Support resource request canceling [#59032](https://github.com/ClickHouse/ClickHouse/pull/59032) ([Sergei Trifonov](https://github.com/serxa)). +* Analyzer: Do not resolve remote table id on initiator [#59073](https://github.com/ClickHouse/ClickHouse/pull/59073) ([Dmitry Novik](https://github.com/novikd)). +* Analyzer: Add cast for ConstantNode from constant folding [#59121](https://github.com/ClickHouse/ClickHouse/pull/59121) ([Dmitry Novik](https://github.com/novikd)). +* Fix the default value of `async_insert_max_data_size` in EN document [#59161](https://github.com/ClickHouse/ClickHouse/pull/59161) ([Alex Cheng](https://github.com/Alex-Cheng)). +* CI: Add ARM integration tests [#59241](https://github.com/ClickHouse/ClickHouse/pull/59241) ([Max K.](https://github.com/maxknv)). +* Fix getting filename from read buffer wrappers [#59298](https://github.com/ClickHouse/ClickHouse/pull/59298) ([Kruglov Pavel](https://github.com/Avogar)). +* Update AWS SDK to 1.11.234 [#59299](https://github.com/ClickHouse/ClickHouse/pull/59299) ([Nikita Taranov](https://github.com/nickitat)). +* Split `ISlotControl` from `ConcurrencyControl` [#59313](https://github.com/ClickHouse/ClickHouse/pull/59313) ([Sergei Trifonov](https://github.com/serxa)). +* Some small fixes for docker images [#59337](https://github.com/ClickHouse/ClickHouse/pull/59337) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* CI: bugfix-validate, integration, functional test scripts updates [#59348](https://github.com/ClickHouse/ClickHouse/pull/59348) ([Max K.](https://github.com/maxknv)). +* MaterializedMySQL: Fix gtid_after_attach_test to retry on detach [#59370](https://github.com/ClickHouse/ClickHouse/pull/59370) ([Val Doroshchuk](https://github.com/valbok)). +* Poco Logger small refactoring [#59375](https://github.com/ClickHouse/ClickHouse/pull/59375) ([Maksim Kita](https://github.com/kitaisreal)). +* Add sanity checks for function return types [#59379](https://github.com/ClickHouse/ClickHouse/pull/59379) ([Raúl Marín](https://github.com/Algunenano)). +* Cleanup connection pool surroundings [#59380](https://github.com/ClickHouse/ClickHouse/pull/59380) ([Igor Nikonov](https://github.com/devcrafter)). +* Fix ARRAY JOIN with subcolumns [#59382](https://github.com/ClickHouse/ClickHouse/pull/59382) ([vdimir](https://github.com/vdimir)). +* Update curl submodule to be version 8.50 to address the irrelevant CVE-2023-46218 and CVE-2023-49219, which we don't care about at all. [#59384](https://github.com/ClickHouse/ClickHouse/pull/59384) ([josh-hildred](https://github.com/josh-hildred)). +* Update libxml2 version to address some bogus security issues [#59386](https://github.com/ClickHouse/ClickHouse/pull/59386) ([josh-hildred](https://github.com/josh-hildred)). +* Update version after release [#59393](https://github.com/ClickHouse/ClickHouse/pull/59393) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Job names [#59395](https://github.com/ClickHouse/ClickHouse/pull/59395) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* CI: fix status and report for docker server jobs [#59396](https://github.com/ClickHouse/ClickHouse/pull/59396) ([Max K.](https://github.com/maxknv)). +* Update version_date.tsv and changelogs after v24.1.1.2048-stable [#59397](https://github.com/ClickHouse/ClickHouse/pull/59397) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Forward declaration for PeekableReadBuffer [#59399](https://github.com/ClickHouse/ClickHouse/pull/59399) ([Azat Khuzhin](https://github.com/azat)). +* Progress bar: use FQDN to differentiate metrics from different hosts [#59404](https://github.com/ClickHouse/ClickHouse/pull/59404) ([Igor Nikonov](https://github.com/devcrafter)). +* Fix test test_stop_other_host_during_backup [#59432](https://github.com/ClickHouse/ClickHouse/pull/59432) ([Vitaly Baranov](https://github.com/vitlibar)). +* Update run.sh [#59433](https://github.com/ClickHouse/ClickHouse/pull/59433) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Post a failure status if can not run the CI [#59440](https://github.com/ClickHouse/ClickHouse/pull/59440) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Safer Rust (catch panic with catch_unwind()) [#59447](https://github.com/ClickHouse/ClickHouse/pull/59447) ([Azat Khuzhin](https://github.com/azat)). +* More parallel insert-select pipeline [#59448](https://github.com/ClickHouse/ClickHouse/pull/59448) ([Nikita Taranov](https://github.com/nickitat)). +* CLion says these headers are unused [#59451](https://github.com/ClickHouse/ClickHouse/pull/59451) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix 02720_row_policy_column_with_dots [#59453](https://github.com/ClickHouse/ClickHouse/pull/59453) ([Duc Canh Le](https://github.com/canhld94)). +* Fix problem detected by UBSAN [#59461](https://github.com/ClickHouse/ClickHouse/pull/59461) ([Raúl Marín](https://github.com/Algunenano)). +* Analyzer: Fix denny_crane [#59483](https://github.com/ClickHouse/ClickHouse/pull/59483) ([vdimir](https://github.com/vdimir)). +* Fix `00191_aggregating_merge_tree_and_final` [#59494](https://github.com/ClickHouse/ClickHouse/pull/59494) ([Nikita Taranov](https://github.com/nickitat)). +* Avoid running all checks when `aspell-dict.txt` was changed [#59496](https://github.com/ClickHouse/ClickHouse/pull/59496) ([Aleksandr Musorin](https://github.com/AVMusorin)). +* Fixes for binary.html [#59499](https://github.com/ClickHouse/ClickHouse/pull/59499) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Parallel replicas: better initial replicas failover (2) [#59501](https://github.com/ClickHouse/ClickHouse/pull/59501) ([Igor Nikonov](https://github.com/devcrafter)). +* Update version_date.tsv and changelogs after v24.1.2.5-stable [#59510](https://github.com/ClickHouse/ClickHouse/pull/59510) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Update version_date.tsv and changelogs after v23.12.3.40-stable [#59511](https://github.com/ClickHouse/ClickHouse/pull/59511) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Update version_date.tsv and changelogs after v23.11.5.29-stable [#59515](https://github.com/ClickHouse/ClickHouse/pull/59515) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Update memory tracker periodically with cgroup memory usage [#59516](https://github.com/ClickHouse/ClickHouse/pull/59516) ([Robert Schulze](https://github.com/rschu1ze)). +* Remove a scary message if an error is retryable [#59517](https://github.com/ClickHouse/ClickHouse/pull/59517) ([alesapin](https://github.com/alesapin)). +* Update the peter-evans/create-pull-request action to v6 [#59520](https://github.com/ClickHouse/ClickHouse/pull/59520) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix usage of StatusType [#59527](https://github.com/ClickHouse/ClickHouse/pull/59527) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Analyzer fix test_select_access_rights/test_main.py::test_select_count [#59528](https://github.com/ClickHouse/ClickHouse/pull/59528) ([vdimir](https://github.com/vdimir)). +* GRPCServer: do not call value() on empty optional query_info [#59533](https://github.com/ClickHouse/ClickHouse/pull/59533) ([Sema Checherinda](https://github.com/CheSema)). +* Use ConnectionPoolPtr instead of raw pointer [#59534](https://github.com/ClickHouse/ClickHouse/pull/59534) ([Igor Nikonov](https://github.com/devcrafter)). +* Fix data race with `AggregatedDataVariants` [#59537](https://github.com/ClickHouse/ClickHouse/pull/59537) ([Nikita Taranov](https://github.com/nickitat)). +* Refactoring of dashboard state encoding [#59554](https://github.com/ClickHouse/ClickHouse/pull/59554) ([Sergei Trifonov](https://github.com/serxa)). +* CI: ci_cache, enable await [#59555](https://github.com/ClickHouse/ClickHouse/pull/59555) ([Max K.](https://github.com/maxknv)). +* Bump libssh to 0.9.8 [#59563](https://github.com/ClickHouse/ClickHouse/pull/59563) ([Robert Schulze](https://github.com/rschu1ze)). +* MultiVersion use mutex [#59565](https://github.com/ClickHouse/ClickHouse/pull/59565) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix aws submodule reference [#59566](https://github.com/ClickHouse/ClickHouse/pull/59566) ([Raúl Marín](https://github.com/Algunenano)). +* Add missed #include and [#59567](https://github.com/ClickHouse/ClickHouse/pull/59567) ([Mikhnenko Sasha](https://github.com/4JustMe4)). +* CI: nightly job to update latest docker tag only [#59586](https://github.com/ClickHouse/ClickHouse/pull/59586) ([Max K.](https://github.com/maxknv)). +* Analyzer: compute ALIAS columns right after reading [#59595](https://github.com/ClickHouse/ClickHouse/pull/59595) ([vdimir](https://github.com/vdimir)). +* Add another sanity check for function return types [#59605](https://github.com/ClickHouse/ClickHouse/pull/59605) ([Raúl Marín](https://github.com/Algunenano)). +* Update README.md [#59610](https://github.com/ClickHouse/ClickHouse/pull/59610) ([Tyler Hannan](https://github.com/tylerhannan)). +* Updated a list of trusted contributors [#59616](https://github.com/ClickHouse/ClickHouse/pull/59616) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* CI: fix ast fuzzer job report (slack bot issue) [#59629](https://github.com/ClickHouse/ClickHouse/pull/59629) ([Max K.](https://github.com/maxknv)). +* MergeTree FINAL optimization diagnostics and settings [#59650](https://github.com/ClickHouse/ClickHouse/pull/59650) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix default path when path is not specified in config [#59654](https://github.com/ClickHouse/ClickHouse/pull/59654) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Follow up for [#59277](https://github.com/ClickHouse/ClickHouse/issues/59277) [#59659](https://github.com/ClickHouse/ClickHouse/pull/59659) ([Konstantin Bogdanov](https://github.com/thevar1able)). +* Pin python dependencies in stateless tests [#59663](https://github.com/ClickHouse/ClickHouse/pull/59663) ([Raúl Marín](https://github.com/Algunenano)). +* Unquote FLAG_LATEST to fix issue with empty argument [#59672](https://github.com/ClickHouse/ClickHouse/pull/59672) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Temporarily remove a feature that doesn't work [#59688](https://github.com/ClickHouse/ClickHouse/pull/59688) ([Alexander Tokmakov](https://github.com/tavplubix)). +* ConnectionEstablisher: remove unused is_finished [#59706](https://github.com/ClickHouse/ClickHouse/pull/59706) ([Igor Nikonov](https://github.com/devcrafter)). +* Add test for increase-always autoscaling lambda [#59709](https://github.com/ClickHouse/ClickHouse/pull/59709) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Remove SourceWithKeyCondition from ReadFromStorageStep [#59720](https://github.com/ClickHouse/ClickHouse/pull/59720) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Make ZooKeeper actually sequentialy consistent [#59735](https://github.com/ClickHouse/ClickHouse/pull/59735) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Add assertions around FixedString code [#59737](https://github.com/ClickHouse/ClickHouse/pull/59737) ([Raúl Marín](https://github.com/Algunenano)). +* Fix skipping unused shards with analyzer [#59741](https://github.com/ClickHouse/ClickHouse/pull/59741) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix DB type check - now it'll refuse to create in Replicated databases [#59743](https://github.com/ClickHouse/ClickHouse/pull/59743) ([Michael Kolupaev](https://github.com/al13n321)). +* Analyzer: Fix test_replicating_constants/test.py::test_different_versions [#59750](https://github.com/ClickHouse/ClickHouse/pull/59750) ([Dmitry Novik](https://github.com/novikd)). +* Fix dashboard params default values [#59753](https://github.com/ClickHouse/ClickHouse/pull/59753) ([Sergei Trifonov](https://github.com/serxa)). +* Fix logical optimizer with LowCardinality in new analyzer [#59766](https://github.com/ClickHouse/ClickHouse/pull/59766) ([Antonio Andelic](https://github.com/antonio2368)). +* Update libuv [#59773](https://github.com/ClickHouse/ClickHouse/pull/59773) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Followup [#50894](https://github.com/ClickHouse/ClickHouse/issues/50894) [#59774](https://github.com/ClickHouse/ClickHouse/pull/59774) ([Dmitry Novik](https://github.com/novikd)). +* CI: ci test await [#59778](https://github.com/ClickHouse/ClickHouse/pull/59778) ([Max K.](https://github.com/maxknv)). +* Better logging for adaptive async timeouts [#59781](https://github.com/ClickHouse/ClickHouse/pull/59781) ([Julia Kartseva](https://github.com/jkartseva)). +* Fix broken youtube embedding in ne-tormozit.md [#59782](https://github.com/ClickHouse/ClickHouse/pull/59782) ([Shaun Struwig](https://github.com/Blargian)). +* Hide URL/S3 'headers' argument in SHOW CREATE [#59787](https://github.com/ClickHouse/ClickHouse/pull/59787) ([Michael Kolupaev](https://github.com/al13n321)). +* Fix special build reports in release branches [#59797](https://github.com/ClickHouse/ClickHouse/pull/59797) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* CI: do not reuse builds on release branches [#59798](https://github.com/ClickHouse/ClickHouse/pull/59798) ([Max K.](https://github.com/maxknv)). +* Update version_date.tsv and changelogs after v24.1.3.31-stable [#59799](https://github.com/ClickHouse/ClickHouse/pull/59799) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Update version_date.tsv and changelogs after v23.12.4.15-stable [#59800](https://github.com/ClickHouse/ClickHouse/pull/59800) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Analyzer: fix test_access_for_functions/test.py::test_access_rights_for_function [#59801](https://github.com/ClickHouse/ClickHouse/pull/59801) ([Dmitry Novik](https://github.com/novikd)). +* Analyzer: Fix test_wrong_db_or_table_name/test.py::test_wrong_table_name [#59806](https://github.com/ClickHouse/ClickHouse/pull/59806) ([Dmitry Novik](https://github.com/novikd)). +* CI: await tune ups [#59807](https://github.com/ClickHouse/ClickHouse/pull/59807) ([Max K.](https://github.com/maxknv)). +* Enforce tests with enabled analyzer in CI [#59814](https://github.com/ClickHouse/ClickHouse/pull/59814) ([Dmitry Novik](https://github.com/novikd)). +* Handle different timestamp related aspects of zip-files [#59815](https://github.com/ClickHouse/ClickHouse/pull/59815) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix settings history azure_max_single_part_copy_size [#59819](https://github.com/ClickHouse/ClickHouse/pull/59819) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Updated a list of trusted contributors [#59844](https://github.com/ClickHouse/ClickHouse/pull/59844) ([Maksim Kita](https://github.com/kitaisreal)). +* Add check for recursiveRemoveLowCardinality() [#59845](https://github.com/ClickHouse/ClickHouse/pull/59845) ([Vitaly Baranov](https://github.com/vitlibar)). +* Better warning for disabled kernel.task_delayacct [#59846](https://github.com/ClickHouse/ClickHouse/pull/59846) ([Azat Khuzhin](https://github.com/azat)). +* Reintroduce 02590_interserver_mode_client_info_initial_query_start_time [#59851](https://github.com/ClickHouse/ClickHouse/pull/59851) ([Azat Khuzhin](https://github.com/azat)). +* Respect CMAKE_OSX_DEPLOYMENT_TARGET for Rust targets [#59852](https://github.com/ClickHouse/ClickHouse/pull/59852) ([Azat Khuzhin](https://github.com/azat)). +* Do not reinitialize ZooKeeperWithFaultInjection on each chunk [#59854](https://github.com/ClickHouse/ClickHouse/pull/59854) ([Alexander Gololobov](https://github.com/davenger)). +* Fix: check if std::function is set before calling it [#59858](https://github.com/ClickHouse/ClickHouse/pull/59858) ([Igor Nikonov](https://github.com/devcrafter)). +* Fix long shutdown of FileLog storage [#59873](https://github.com/ClickHouse/ClickHouse/pull/59873) ([Azat Khuzhin](https://github.com/azat)). +* tests: fix 02322_sql_insert_format flakiness [#59874](https://github.com/ClickHouse/ClickHouse/pull/59874) ([Azat Khuzhin](https://github.com/azat)). +* Follow up for [#58554](https://github.com/ClickHouse/ClickHouse/issues/58554). Cleanup. [#59889](https://github.com/ClickHouse/ClickHouse/pull/59889) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* CI: Fix job failures due to jepsen artifacts [#59890](https://github.com/ClickHouse/ClickHouse/pull/59890) ([Max K.](https://github.com/maxknv)). +* Add test 02988_join_using_prewhere_pushdown [#59892](https://github.com/ClickHouse/ClickHouse/pull/59892) ([vdimir](https://github.com/vdimir)). +* Do not pull mutations if pulling replication log had been stopped [#59895](https://github.com/ClickHouse/ClickHouse/pull/59895) ([Azat Khuzhin](https://github.com/azat)). +* Fix `02982_comments_in_system_tables` [#59896](https://github.com/ClickHouse/ClickHouse/pull/59896) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Refactor Squashing for inserts. [#59899](https://github.com/ClickHouse/ClickHouse/pull/59899) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Do not rebuild a lambda package if it is updated [#59902](https://github.com/ClickHouse/ClickHouse/pull/59902) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix self-extracting: macOS doesn't allow to run renamed executable - copy instead [#59906](https://github.com/ClickHouse/ClickHouse/pull/59906) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Update tests with indexHint for analyzer. [#59907](https://github.com/ClickHouse/ClickHouse/pull/59907) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Petite cleanup around macros and ReplicatedMergeTree [#59909](https://github.com/ClickHouse/ClickHouse/pull/59909) ([Azat Khuzhin](https://github.com/azat)). +* Fix: absence of closing record in query_log for failed insert over http [#59910](https://github.com/ClickHouse/ClickHouse/pull/59910) ([Igor Nikonov](https://github.com/devcrafter)). +* Decrease logging level for http retriable errors to Warning (and fix 00157_cache_dictionary flakiness) [#59920](https://github.com/ClickHouse/ClickHouse/pull/59920) ([Azat Khuzhin](https://github.com/azat)). +* Remove `test_distributed_backward_compatability` [#59921](https://github.com/ClickHouse/ClickHouse/pull/59921) ([Dmitry Novik](https://github.com/novikd)). +* Commands node args should add rvalue to push_back to reduce object copy cost [#59922](https://github.com/ClickHouse/ClickHouse/pull/59922) ([xuzifu666](https://github.com/xuzifu666)). +* tests: fix 02981_vertical_merges_memory_usage flakiness [#59923](https://github.com/ClickHouse/ClickHouse/pull/59923) ([Azat Khuzhin](https://github.com/azat)). +* Analyzer: Update broken integration tests list [#59924](https://github.com/ClickHouse/ClickHouse/pull/59924) ([Dmitry Novik](https://github.com/novikd)). +* CI: integration tests to mysql80 [#59939](https://github.com/ClickHouse/ClickHouse/pull/59939) ([Max K.](https://github.com/maxknv)). +* Register StorageMergeTree exception message fix [#59941](https://github.com/ClickHouse/ClickHouse/pull/59941) ([Maksim Kita](https://github.com/kitaisreal)). +* Replace lambdas with pointers to members to simplify stacks [#59944](https://github.com/ClickHouse/ClickHouse/pull/59944) ([Alexander Gololobov](https://github.com/davenger)). +* Analyzer: Fix test_user_defined_object_persistence [#59948](https://github.com/ClickHouse/ClickHouse/pull/59948) ([Dmitry Novik](https://github.com/novikd)). +* Analyzer: Fix test_mutations_with_merge_tree [#59951](https://github.com/ClickHouse/ClickHouse/pull/59951) ([Dmitry Novik](https://github.com/novikd)). +* Cleanups [#59964](https://github.com/ClickHouse/ClickHouse/pull/59964) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Update version_date.tsv and changelogs after v24.1.4.19-stable [#59966](https://github.com/ClickHouse/ClickHouse/pull/59966) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Less conflicts [#59968](https://github.com/ClickHouse/ClickHouse/pull/59968) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* We don't have external dictionaries from Aerospike [#59969](https://github.com/ClickHouse/ClickHouse/pull/59969) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix max num to warn message [#59972](https://github.com/ClickHouse/ClickHouse/pull/59972) ([Jordi Villar](https://github.com/jrdi)). +* Analyzer: Fix test_settings_profile [#59975](https://github.com/ClickHouse/ClickHouse/pull/59975) ([Dmitry Novik](https://github.com/novikd)). +* Update version_date.tsv and changelogs after v24.1.4.20-stable [#59978](https://github.com/ClickHouse/ClickHouse/pull/59978) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Analyzer: Fix test_storage_rabbitmq [#59981](https://github.com/ClickHouse/ClickHouse/pull/59981) ([Dmitry Novik](https://github.com/novikd)). +* Analyzer: Fix test_shard_level_const_function [#59983](https://github.com/ClickHouse/ClickHouse/pull/59983) ([Dmitry Novik](https://github.com/novikd)). +* Add newlines to SettingsChangesHistory to maybe have less conflicts [#59984](https://github.com/ClickHouse/ClickHouse/pull/59984) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Remove context from comparison functions. [#59985](https://github.com/ClickHouse/ClickHouse/pull/59985) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Update version_date.tsv and changelogs after v24.1.5.6-stable [#59993](https://github.com/ClickHouse/ClickHouse/pull/59993) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Fix mark release ready [#59994](https://github.com/ClickHouse/ClickHouse/pull/59994) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Revert "Insert synchronously if dependent MV deduplication is enabled" [#59998](https://github.com/ClickHouse/ClickHouse/pull/59998) ([Julia Kartseva](https://github.com/jkartseva)). +* Fix obviously wrong (but non significant) error in dictionaries [#60005](https://github.com/ClickHouse/ClickHouse/pull/60005) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Inhibit randomization in some tests [#60009](https://github.com/ClickHouse/ClickHouse/pull/60009) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* The code should not be complex [#60010](https://github.com/ClickHouse/ClickHouse/pull/60010) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Exclude test run from a slow build [#60011](https://github.com/ClickHouse/ClickHouse/pull/60011) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix broken lambdas formatting [#60012](https://github.com/ClickHouse/ClickHouse/pull/60012) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Verify formatting consistency on the server-side [#60013](https://github.com/ClickHouse/ClickHouse/pull/60013) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Analyzer: Fix test_sql_user_defined_functions_on_cluster [#60019](https://github.com/ClickHouse/ClickHouse/pull/60019) ([Dmitry Novik](https://github.com/novikd)). +* Fix 02981_vertical_merges_memory_usage with SharedMergeTree [#60028](https://github.com/ClickHouse/ClickHouse/pull/60028) ([Raúl Marín](https://github.com/Algunenano)). +* Fix 01656_test_query_log_factories_info with analyzer. [#60037](https://github.com/ClickHouse/ClickHouse/pull/60037) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Ability to detect undead ZooKeeper sessions [#60044](https://github.com/ClickHouse/ClickHouse/pull/60044) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Disable tests with coverage [#60047](https://github.com/ClickHouse/ClickHouse/pull/60047) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Print CPU flags at startup [#60075](https://github.com/ClickHouse/ClickHouse/pull/60075) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Cleanup: less confusion between config priority and balancing priority in connection pools [#60077](https://github.com/ClickHouse/ClickHouse/pull/60077) ([Igor Nikonov](https://github.com/devcrafter)). +* Temporary table already exists exception message fix [#60080](https://github.com/ClickHouse/ClickHouse/pull/60080) ([Maksim Kita](https://github.com/kitaisreal)). +* Refactor prewhere and primary key optimization [#60082](https://github.com/ClickHouse/ClickHouse/pull/60082) ([Amos Bird](https://github.com/amosbird)). +* Bump curl to version 4.6.0 [#60084](https://github.com/ClickHouse/ClickHouse/pull/60084) ([josh-hildred](https://github.com/josh-hildred)). +* Check wrong abbreviations [#60086](https://github.com/ClickHouse/ClickHouse/pull/60086) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove the check for formatting consistency from the Fuzzer [#60088](https://github.com/ClickHouse/ClickHouse/pull/60088) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Avoid overflow in settings [#60089](https://github.com/ClickHouse/ClickHouse/pull/60089) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* A small preparation for better handling of primary key in memory [#60092](https://github.com/ClickHouse/ClickHouse/pull/60092) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Move threadPoolCallbackRunner to the "Common" folder [#60097](https://github.com/ClickHouse/ClickHouse/pull/60097) ([Vitaly Baranov](https://github.com/vitlibar)). +* Speed up the CI [#60106](https://github.com/ClickHouse/ClickHouse/pull/60106) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Insignificant changes [#60108](https://github.com/ClickHouse/ClickHouse/pull/60108) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Do not retry queries if container is down in integration tests [#60109](https://github.com/ClickHouse/ClickHouse/pull/60109) ([Azat Khuzhin](https://github.com/azat)). +* Better check for inconsistent formatting [#60110](https://github.com/ClickHouse/ClickHouse/pull/60110) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* skip printing meaningless log [#60123](https://github.com/ClickHouse/ClickHouse/pull/60123) ([conic](https://github.com/conicl)). +* Implement TODO [#60124](https://github.com/ClickHouse/ClickHouse/pull/60124) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix bad log message [#60125](https://github.com/ClickHouse/ClickHouse/pull/60125) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix data race in `IMergeTreeDataPart` [#60139](https://github.com/ClickHouse/ClickHouse/pull/60139) ([Antonio Andelic](https://github.com/antonio2368)). +* Add new setting to changes history [#60141](https://github.com/ClickHouse/ClickHouse/pull/60141) ([Antonio Andelic](https://github.com/antonio2368)). +* Analyzer: fix row level filters with PREWHERE + additional filters [#60142](https://github.com/ClickHouse/ClickHouse/pull/60142) ([vdimir](https://github.com/vdimir)). +* Tests: query log for inserts over http [#60143](https://github.com/ClickHouse/ClickHouse/pull/60143) ([Igor Nikonov](https://github.com/devcrafter)). +* Fix build in master [#60151](https://github.com/ClickHouse/ClickHouse/pull/60151) ([Raúl Marín](https://github.com/Algunenano)). +* Add setting history check to stateless tests [#60154](https://github.com/ClickHouse/ClickHouse/pull/60154) ([Raúl Marín](https://github.com/Algunenano)). +* Mini cleanup of CPUID.h [#60155](https://github.com/ClickHouse/ClickHouse/pull/60155) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix: custom key failover test flakiness [#60158](https://github.com/ClickHouse/ClickHouse/pull/60158) ([Igor Nikonov](https://github.com/devcrafter)). +* Skip sanity checks on secondary CREATE query [#60159](https://github.com/ClickHouse/ClickHouse/pull/60159) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Remove extensively aggressive check [#60162](https://github.com/ClickHouse/ClickHouse/pull/60162) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix wrong message during compilation [#60178](https://github.com/ClickHouse/ClickHouse/pull/60178) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add a test for [#44318](https://github.com/ClickHouse/ClickHouse/issues/44318) [#60179](https://github.com/ClickHouse/ClickHouse/pull/60179) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add test for 59437 [#60191](https://github.com/ClickHouse/ClickHouse/pull/60191) ([Raúl Marín](https://github.com/Algunenano)). +* CI: hot fix for gh statuses [#60201](https://github.com/ClickHouse/ClickHouse/pull/60201) ([Max K.](https://github.com/maxknv)). +* Limit libarchive format to what we use [#60203](https://github.com/ClickHouse/ClickHouse/pull/60203) ([San](https://github.com/santrancisco)). +* Fix bucket region discovery [#60204](https://github.com/ClickHouse/ClickHouse/pull/60204) ([Nikita Taranov](https://github.com/nickitat)). +* Fix `test_backup_restore_s3/test.py::test_user_specific_auth` [#60210](https://github.com/ClickHouse/ClickHouse/pull/60210) ([Antonio Andelic](https://github.com/antonio2368)). +* CI: combine analyzer, s3, dbreplicated into one job [#60224](https://github.com/ClickHouse/ClickHouse/pull/60224) ([Max K.](https://github.com/maxknv)). +* Slightly better Keeper loading from snapshot [#60226](https://github.com/ClickHouse/ClickHouse/pull/60226) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix: IAST::clone() for RENAME [#60227](https://github.com/ClickHouse/ClickHouse/pull/60227) ([Igor Nikonov](https://github.com/devcrafter)). +* Treat 2+ in allow_experimental_parallel_reading_from_replicas as 2 [#60228](https://github.com/ClickHouse/ClickHouse/pull/60228) ([Raúl Marín](https://github.com/Algunenano)). +* CI: random job pick support [#60229](https://github.com/ClickHouse/ClickHouse/pull/60229) ([Max K.](https://github.com/maxknv)). +* Fix analyzer - hide arguments for secret functions [#60230](https://github.com/ClickHouse/ClickHouse/pull/60230) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Backups delete suspicious file [#60231](https://github.com/ClickHouse/ClickHouse/pull/60231) ([Maksim Kita](https://github.com/kitaisreal)). +* CI: random sanitizer for parallel repl in PR wf [#60234](https://github.com/ClickHouse/ClickHouse/pull/60234) ([Max K.](https://github.com/maxknv)). +* CI: use aarch runner for runconfig job [#60236](https://github.com/ClickHouse/ClickHouse/pull/60236) ([Max K.](https://github.com/maxknv)). +* Add test for 60232 [#60244](https://github.com/ClickHouse/ClickHouse/pull/60244) ([Raúl Marín](https://github.com/Algunenano)). +* Make cloud sync required [#60245](https://github.com/ClickHouse/ClickHouse/pull/60245) ([Raúl Marín](https://github.com/Algunenano)). +* Tests from [#60094](https://github.com/ClickHouse/ClickHouse/issues/60094) [#60256](https://github.com/ClickHouse/ClickHouse/pull/60256) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove bad check in Keeper [#60266](https://github.com/ClickHouse/ClickHouse/pull/60266) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix integration `test_backup_restore_s3` [#60269](https://github.com/ClickHouse/ClickHouse/pull/60269) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Ignore valid 'No such key errors' in stress tests [#60270](https://github.com/ClickHouse/ClickHouse/pull/60270) ([Raúl Marín](https://github.com/Algunenano)). +* Stress test: Include the first sanitizer block message in the report [#60283](https://github.com/ClickHouse/ClickHouse/pull/60283) ([Raúl Marín](https://github.com/Algunenano)). +* Update analyzer_tech_debt.txt [#60303](https://github.com/ClickHouse/ClickHouse/pull/60303) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Minor fixes for hashed dictionary [#60310](https://github.com/ClickHouse/ClickHouse/pull/60310) ([vdimir](https://github.com/vdimir)). +* Install tailscale during AMI build and set it up on runners [#60316](https://github.com/ClickHouse/ClickHouse/pull/60316) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* CI: remove Integration tests asan and release from PR wf [#60327](https://github.com/ClickHouse/ClickHouse/pull/60327) ([Max K.](https://github.com/maxknv)). +* Fix - analyzer related - "executable" function subquery arguments. [#60339](https://github.com/ClickHouse/ClickHouse/pull/60339) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Update settings.md to correct the description for setting `max_concurrent_queries_for_user` [#60343](https://github.com/ClickHouse/ClickHouse/pull/60343) ([Alex Cheng](https://github.com/Alex-Cheng)). +* Fix rapidjson submodule [#60346](https://github.com/ClickHouse/ClickHouse/pull/60346) ([Raúl Marín](https://github.com/Algunenano)). +* Validate experimental and suspicious types inside nested types under a setting [#60353](https://github.com/ClickHouse/ClickHouse/pull/60353) ([Kruglov Pavel](https://github.com/Avogar)). +* Update 01158_zookeeper_log_long.sql [#60357](https://github.com/ClickHouse/ClickHouse/pull/60357) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Add missed #include [#60358](https://github.com/ClickHouse/ClickHouse/pull/60358) ([Mikhnenko Sasha](https://github.com/4JustMe4)). +* Follow up [#60082](https://github.com/ClickHouse/ClickHouse/issues/60082) [#60360](https://github.com/ClickHouse/ClickHouse/pull/60360) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Remove ALTER LIVE VIEW [#60370](https://github.com/ClickHouse/ClickHouse/pull/60370) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Detect io_uring in tests [#60373](https://github.com/ClickHouse/ClickHouse/pull/60373) ([Azat Khuzhin](https://github.com/azat)). +* Expose fatal.log separately for fuzzer [#60374](https://github.com/ClickHouse/ClickHouse/pull/60374) ([Azat Khuzhin](https://github.com/azat)). +* Minor changes for dashboard [#60387](https://github.com/ClickHouse/ClickHouse/pull/60387) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove unused method [#60388](https://github.com/ClickHouse/ClickHouse/pull/60388) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Allow to map UI handlers to different paths [#60389](https://github.com/ClickHouse/ClickHouse/pull/60389) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove old tags from integration tests [#60407](https://github.com/ClickHouse/ClickHouse/pull/60407) ([Raúl Marín](https://github.com/Algunenano)). +* Update `liburing` to 2.5 [#60409](https://github.com/ClickHouse/ClickHouse/pull/60409) ([Nikita Taranov](https://github.com/nickitat)). +* Fix undefined-behavior in case of too big max_execution_time setting [#60419](https://github.com/ClickHouse/ClickHouse/pull/60419) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix wrong log message in Fuzzer [#60425](https://github.com/ClickHouse/ClickHouse/pull/60425) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix unrestricted reads from keeper [#60429](https://github.com/ClickHouse/ClickHouse/pull/60429) ([Raúl Marín](https://github.com/Algunenano)). +* Split update_mergeable_check into two functions to force trigger the status [#60431](https://github.com/ClickHouse/ClickHouse/pull/60431) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Revert "Revert "Add table function `mergeTreeIndex`"" [#60435](https://github.com/ClickHouse/ClickHouse/pull/60435) ([Anton Popov](https://github.com/CurtizJ)). +* Revert "Merge pull request [#56864](https://github.com/ClickHouse/ClickHouse/issues/56864) from ClickHouse/broken-projections-better-handling" [#60436](https://github.com/ClickHouse/ClickHouse/pull/60436) ([Nikita Taranov](https://github.com/nickitat)). +* Keeper: fix moving changelog files between disks [#60442](https://github.com/ClickHouse/ClickHouse/pull/60442) ([Antonio Andelic](https://github.com/antonio2368)). +* Replace deprecated distutils by vendored packaging [#60444](https://github.com/ClickHouse/ClickHouse/pull/60444) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Do not fail the build if ci-logs is not healthy [#60445](https://github.com/ClickHouse/ClickHouse/pull/60445) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Move setting `enable_order_by_all` out of the experimental setting section [#60449](https://github.com/ClickHouse/ClickHouse/pull/60449) ([Robert Schulze](https://github.com/rschu1ze)). +* Minor: Replace `boost::algorithm::starts_with()` by `std::string::starts_with()` [#60450](https://github.com/ClickHouse/ClickHouse/pull/60450) ([Robert Schulze](https://github.com/rschu1ze)). +* Minor: Replace boost::algorithm::ends_with() by std::string::ends_with() [#60454](https://github.com/ClickHouse/ClickHouse/pull/60454) ([Robert Schulze](https://github.com/rschu1ze)). +* CI: remove input params for job scripts [#60455](https://github.com/ClickHouse/ClickHouse/pull/60455) ([Max K.](https://github.com/maxknv)). +* Fix: 02496_remove_redundant_sorting_analyzer [#60456](https://github.com/ClickHouse/ClickHouse/pull/60456) ([Igor Nikonov](https://github.com/devcrafter)). +* PR template fix to include ci fix category [#60461](https://github.com/ClickHouse/ClickHouse/pull/60461) ([Max K.](https://github.com/maxknv)). +* Reduce iterations in 01383_log_broken_table [#60465](https://github.com/ClickHouse/ClickHouse/pull/60465) ([Raúl Marín](https://github.com/Algunenano)). +* Merge [#57434](https://github.com/ClickHouse/ClickHouse/issues/57434) [#60466](https://github.com/ClickHouse/ClickHouse/pull/60466) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix bad test: looks like an obvious race condition, but I didn't check in detail. [#60471](https://github.com/ClickHouse/ClickHouse/pull/60471) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Make test slower [#60472](https://github.com/ClickHouse/ClickHouse/pull/60472) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix cgroups v1 rss parsing in CgroupsMemoryUsageObserver [#60481](https://github.com/ClickHouse/ClickHouse/pull/60481) ([Maksim Kita](https://github.com/kitaisreal)). +* CI: fix pr check status to not fail mergeable check [#60483](https://github.com/ClickHouse/ClickHouse/pull/60483) ([Max K.](https://github.com/maxknv)). +* Report respects skipped builds [#60488](https://github.com/ClickHouse/ClickHouse/pull/60488) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* CI: quick style fix [#60490](https://github.com/ClickHouse/ClickHouse/pull/60490) ([Max K.](https://github.com/maxknv)). +* Decrease logging level for http retriable errors to Info [#60508](https://github.com/ClickHouse/ClickHouse/pull/60508) ([Raúl Marín](https://github.com/Algunenano)). +* Remove broken test while we fix it [#60547](https://github.com/ClickHouse/ClickHouse/pull/60547) ([Raúl Marín](https://github.com/Algunenano)). + diff --git a/docs/en/development/architecture.md b/docs/en/development/architecture.md index cfdd2bbcc41..6428c0e90d5 100644 --- a/docs/en/development/architecture.md +++ b/docs/en/development/architecture.md @@ -166,11 +166,11 @@ For most external applications, we recommend using the HTTP interface because it ## Configuration {#configuration} -ClickHouse Server is based on POCO C++ Libraries and uses `Poco::Util::AbstractConfiguration` to represent it's configuration. Configuration is held by `Poco::Util::ServerApplication` class inherited by `DaemonBase` class, which in turn is inherited by `DB::Server` class, implementing clickhouse-server itself. So config can be accessed by `ServerApplication::config()` method. +ClickHouse Server is based on POCO C++ Libraries and uses `Poco::Util::AbstractConfiguration` to represent its configuration. Configuration is held by `Poco::Util::ServerApplication` class inherited by `DaemonBase` class, which in turn is inherited by `DB::Server` class, implementing clickhouse-server itself. So config can be accessed by `ServerApplication::config()` method. Config is read from multiple files (in XML or YAML format) and merged into single `AbstractConfiguration` by `ConfigProcessor` class. Configuration is loaded at server startup and can be reloaded later if one of config files is updated, removed or added. `ConfigReloader` class is responsible for periodic monitoring of these changes and reload procedure as well. `SYSTEM RELOAD CONFIG` query also triggers config to be reloaded. -For queries and subsystems other than `Server` config is accessible using `Context::getConfigRef()` method. Every subsystem that is capable of reloading it's config without server restart should register itself in reload callback in `Server::main()` method. Note that if newer config has an error, most subsystems will ignore new config, log warning messages and keep working with previously loaded config. Due to the nature of `AbstractConfiguration` it is not possible to pass reference to specific section, so `String config_prefix` is usually used instead. +For queries and subsystems other than `Server` config is accessible using `Context::getConfigRef()` method. Every subsystem that is capable of reloading its config without server restart should register itself in reload callback in `Server::main()` method. Note that if newer config has an error, most subsystems will ignore new config, log warning messages and keep working with previously loaded config. Due to the nature of `AbstractConfiguration` it is not possible to pass reference to specific section, so `String config_prefix` is usually used instead. ## Threads and jobs {#threads-and-jobs} @@ -255,7 +255,7 @@ When we are going to read something from a part in `MergeTree`, we look at `prim When you `INSERT` a bunch of data into `MergeTree`, that bunch is sorted by primary key order and forms a new part. There are background threads that periodically select some parts and merge them into a single sorted part to keep the number of parts relatively low. That’s why it is called `MergeTree`. Of course, merging leads to “write amplificationâ€. All parts are immutable: they are only created and deleted, but not modified. When SELECT is executed, it holds a snapshot of the table (a set of parts). After merging, we also keep old parts for some time to make a recovery after failure easier, so if we see that some merged part is probably broken, we can replace it with its source parts. -`MergeTree` is not an LSM tree because it does not contain MEMTABLE and LOG: inserted data is written directly to the filesystem. This behavior makes MergeTree much more suitable to insert data in batches. Therefore frequently inserting small amounts of rows is not ideal for MergeTree. For example, a couple of rows per second is OK, but doing it a thousand times a second is not optimal for MergeTree. However, there is an async insert mode for small inserts to overcome this limitation. We did it this way for simplicity’s sake, and because we are already inserting data in batches in our applications +`MergeTree` is not an LSM tree because it does not contain MEMTABLE and LOG: inserted data is written directly to the filesystem. This behavior makes MergeTree much more suitable to insert data in batches. Therefore, frequently inserting small amounts of rows is not ideal for MergeTree. For example, a couple of rows per second is OK, but doing it a thousand times a second is not optimal for MergeTree. However, there is an async insert mode for small inserts to overcome this limitation. We did it this way for simplicity’s sake, and because we are already inserting data in batches in our applications There are MergeTree engines that are doing additional work during background merges. Examples are `CollapsingMergeTree` and `AggregatingMergeTree`. This could be treated as special support for updates. Keep in mind that these are not real updates because users usually have no control over the time when background merges are executed, and data in a `MergeTree` table is almost always stored in more than one part, not in completely merged form. @@ -276,5 +276,3 @@ Besides, each replica stores its state in ZooKeeper as the set of parts and its :::note The ClickHouse cluster consists of independent shards, and each shard consists of replicas. The cluster is **not elastic**, so after adding a new shard, data is not rebalanced between shards automatically. Instead, the cluster load is supposed to be adjusted to be uneven. This implementation gives you more control, and it is ok for relatively small clusters, such as tens of nodes. But for clusters with hundreds of nodes that we are using in production, this approach becomes a significant drawback. We should implement a table engine that spans across the cluster with dynamically replicated regions that could be split and balanced between clusters automatically. ::: - -[Original article](https://clickhouse.com/docs/en/development/architecture/) diff --git a/docs/en/development/build-cross-s390x.md b/docs/en/development/build-cross-s390x.md index b7cda515d77..a4a83c7989b 100644 --- a/docs/en/development/build-cross-s390x.md +++ b/docs/en/development/build-cross-s390x.md @@ -38,7 +38,7 @@ ninja ## Running -Once built, the binary can be run with, eg.: +Once built, the binary can be run with, e.g.: ```bash qemu-s390x-static -L /usr/s390x-linux-gnu ./clickhouse diff --git a/docs/en/development/build-osx.md b/docs/en/development/build-osx.md index 39ccc9a78c3..21b9446aa66 100644 --- a/docs/en/development/build-osx.md +++ b/docs/en/development/build-osx.md @@ -37,7 +37,7 @@ sudo xcode-select --install ``` bash brew update -brew install ccache cmake ninja libtool gettext llvm gcc binutils grep findutils +brew install ccache cmake ninja libtool gettext llvm gcc binutils grep findutils nasm ``` ## Checkout ClickHouse Sources {#checkout-clickhouse-sources} diff --git a/docs/en/development/build.md b/docs/en/development/build.md index b474c445604..5cbf851b785 100644 --- a/docs/en/development/build.md +++ b/docs/en/development/build.md @@ -14,20 +14,6 @@ Supported platforms: - PowerPC 64 LE (experimental) - RISC-V 64 (experimental) -## Building in docker -We use the docker image `clickhouse/binary-builder` for our CI builds. It contains everything necessary to build the binary and packages. There is a script `docker/packager/packager` to ease the image usage: - -```bash -# define a directory for the output artifacts -output_dir="build_results" -# a simplest build -./docker/packager/packager --package-type=binary --output-dir "$output_dir" -# build debian packages -./docker/packager/packager --package-type=deb --output-dir "$output_dir" -# by default, debian packages use thin LTO, so we can override it to speed up the build -CMAKE_FLAGS='-DENABLE_THINLTO=' ./docker/packager/packager --package-type=deb --output-dir "./$(git rev-parse --show-cdup)/build_results" -``` - ## Building on Ubuntu The following tutorial is based on Ubuntu Linux. @@ -37,6 +23,7 @@ The minimum recommended Ubuntu version for development is 22.04 LTS. ### Install Prerequisites {#install-prerequisites} ``` bash +sudo apt-get update sudo apt-get install git cmake ccache python3 ninja-build nasm yasm gawk lsb-release wget software-properties-common gnupg ``` @@ -57,7 +44,7 @@ sudo add-apt-repository -y ppa:ubuntu-toolchain-r/test For other Linux distributions - check the availability of LLVM's [prebuild packages](https://releases.llvm.org/download.html). -As of August 2023, clang-16 or higher will work. +As of March 2024, clang-17 or higher will work. GCC as a compiler is not supported. To build with a specific Clang version: @@ -67,8 +54,8 @@ to see what version you have installed before setting this environment variable. ::: ``` bash -export CC=clang-17 -export CXX=clang++-17 +export CC=clang-18 +export CXX=clang++-18 ``` ### Checkout ClickHouse Sources {#checkout-clickhouse-sources} @@ -133,3 +120,17 @@ mkdir build cmake -S . -B build cmake --build build ``` + +## Building in docker +We use the docker image `clickhouse/binary-builder` for our CI builds. It contains everything necessary to build the binary and packages. There is a script `docker/packager/packager` to ease the image usage: + +```bash +# define a directory for the output artifacts +output_dir="build_results" +# a simplest build +./docker/packager/packager --package-type=binary --output-dir "$output_dir" +# build debian packages +./docker/packager/packager --package-type=deb --output-dir "$output_dir" +# by default, debian packages use thin LTO, so we can override it to speed up the build +CMAKE_FLAGS='-DENABLE_THINLTO=' ./docker/packager/packager --package-type=deb --output-dir "./$(git rev-parse --show-cdup)/build_results" +``` diff --git a/docs/en/development/building_and_benchmarking_deflate_qpl.md b/docs/en/development/building_and_benchmarking_deflate_qpl.md index 4e01b41ab3c..b9d39b8cc2d 100644 --- a/docs/en/development/building_and_benchmarking_deflate_qpl.md +++ b/docs/en/development/building_and_benchmarking_deflate_qpl.md @@ -95,7 +95,7 @@ Complete below three steps mentioned in [Star Schema Benchmark](https://clickhou - Inserting data. Here should use `./benchmark_sample/rawdata_dir/ssb-dbgen/*.tbl` as input data. - Converting “star schema†to de-normalized “flat schema†-Set up database with with IAA Deflate codec +Set up database with IAA Deflate codec ``` bash $ cd ./database_dir/deflate @@ -104,7 +104,7 @@ $ [CLICKHOUSE_EXE] client ``` Complete three steps same as lz4 above -Set up database with with ZSTD codec +Set up database with ZSTD codec ``` bash $ cd ./database_dir/zstd diff --git a/docs/en/development/contrib.md b/docs/en/development/contrib.md index 4b296c43db4..bbc5fbeebcb 100644 --- a/docs/en/development/contrib.md +++ b/docs/en/development/contrib.md @@ -13,7 +13,7 @@ ClickHouse utilizes third-party libraries for different purposes, e.g., to conne SELECT library_name, license_type, license_path FROM system.licenses ORDER BY library_name COLLATE 'en'; ``` -(Note that the listed libraries are the ones located in the `contrib/` directory of the ClickHouse repository. Depending on the build options, some of of the libraries may have not been compiled, and as a result, their functionality may not be available at runtime. +Note that the listed libraries are the ones located in the `contrib/` directory of the ClickHouse repository. Depending on the build options, some of the libraries may have not been compiled, and as a result, their functionality may not be available at runtime. [Example](https://play.clickhouse.com/play?user=play#U0VMRUNUIGxpYnJhcnlfbmFtZSwgbGljZW5zZV90eXBlLCBsaWNlbnNlX3BhdGggRlJPTSBzeXN0ZW0ubGljZW5zZXMgT1JERVIgQlkgbGlicmFyeV9uYW1lIENPTExBVEUgJ2VuJw==) diff --git a/docs/en/development/developer-instruction.md b/docs/en/development/developer-instruction.md index 303e0e315d5..e08096d8042 100644 --- a/docs/en/development/developer-instruction.md +++ b/docs/en/development/developer-instruction.md @@ -7,13 +7,13 @@ description: Prerequisites and an overview of how to build ClickHouse # Getting Started Guide for Building ClickHouse -ClickHouse can be build on Linux, FreeBSD and macOS. If you use Windows, you can still build ClickHouse in a virtual machine running Linux, e.g. [VirtualBox](https://www.virtualbox.org/) with Ubuntu. +ClickHouse can be built on Linux, FreeBSD and macOS. If you use Windows, you can still build ClickHouse in a virtual machine running Linux, e.g. [VirtualBox](https://www.virtualbox.org/) with Ubuntu. ClickHouse requires a 64-bit system to compile and run, 32-bit systems do not work. ## Creating a Repository on GitHub {#creating-a-repository-on-github} -To start developing for ClickHouse you will need a [GitHub](https://www.virtualbox.org/) account. Please also generate a SSH key locally (if you don't have one already) and upload the public key to GitHub as this is a prerequisite for contributing patches. +To start developing for ClickHouse you will need a [GitHub](https://www.virtualbox.org/) account. Please also generate an SSH key locally (if you don't have one already) and upload the public key to GitHub as this is a prerequisite for contributing patches. Next, create a fork of the [ClickHouse repository](https://github.com/ClickHouse/ClickHouse/) in your personal account by clicking the "fork" button in the upper right corner. @@ -37,7 +37,7 @@ git clone git@github.com:your_github_username/ClickHouse.git # replace placehol cd ClickHouse ``` -This command creates a directory `ClickHouse/` containing the source code of ClickHouse. If you specify a custom checkout directory after the URL but it is important that this path does not contain whitespaces as it may lead to problems with the build later on. +This command creates a directory `ClickHouse/` containing the source code of ClickHouse. If you specify a custom checkout directory after the URL, but it is important that this path does not contain whitespaces as it may lead to problems with the build later on. The ClickHouse repository uses Git submodules, i.e. references to external repositories (usually 3rd party libraries used by ClickHouse). These are not checked out by default. To do so, you can either @@ -45,7 +45,7 @@ The ClickHouse repository uses Git submodules, i.e. references to external repos - if `git clone` did not check out submodules, run `git submodule update --init --jobs ` (e.g. ` = 12` to parallelize the checkout) to achieve the same as the previous alternative, or -- if `git clone` did not check out submodules and you like to use [sparse](https://github.blog/2020-01-17-bring-your-monorepo-down-to-size-with-sparse-checkout/) and [shallow](https://github.blog/2020-12-21-get-up-to-speed-with-partial-clone-and-shallow-clone/) submodule checkout to omit unneeded files and history in submodules to save space (ca. 5 GB instead of ca. 15 GB), run `./contrib/update-submodules.sh`. Not really recommended as it generally makes working with submodules less convenient and slower. +- if `git clone` did not check out submodules, and you like to use [sparse](https://github.blog/2020-01-17-bring-your-monorepo-down-to-size-with-sparse-checkout/) and [shallow](https://github.blog/2020-12-21-get-up-to-speed-with-partial-clone-and-shallow-clone/) submodule checkout to omit unneeded files and history in submodules to save space (ca. 5 GB instead of ca. 15 GB), run `./contrib/update-submodules.sh`. Not really recommended as it generally makes working with submodules less convenient and slower. You can check the Git status with the command: `git submodule status`. @@ -143,7 +143,7 @@ When a large amount of RAM is available on build machine you should limit the nu On machines with 4GB of RAM, it is recommended to specify 1, for 8GB of RAM `-j 2` is recommended. -If you get the message: `ninja: error: loading 'build.ninja': No such file or directory`, it means that generating a build configuration has failed and you need to inspect the message above. +If you get the message: `ninja: error: loading 'build.ninja': No such file or directory`, it means that generating a build configuration has failed, and you need to inspect the message above. Upon the successful start of the building process, you’ll see the build progress - the number of processed tasks and the total number of tasks. @@ -184,7 +184,7 @@ You can also run your custom-built ClickHouse binary with the config file from t **CLion (recommended)** -If you do not know which IDE to use, we recommend that you use [CLion](https://www.jetbrains.com/clion/). CLion is commercial software but it offers a 30 day free trial. It is also free of charge for students. CLion can be used on both Linux and macOS. +If you do not know which IDE to use, we recommend that you use [CLion](https://www.jetbrains.com/clion/). CLion is commercial software, but it offers a 30 day free trial. It is also free of charge for students. CLion can be used on both Linux and macOS. A few things to know when using CLion to develop ClickHouse: diff --git a/docs/en/engines/database-engines/postgresql.md b/docs/en/engines/database-engines/postgresql.md index 294d1202bdd..ae323680688 100644 --- a/docs/en/engines/database-engines/postgresql.md +++ b/docs/en/engines/database-engines/postgresql.md @@ -10,7 +10,7 @@ Allows to connect to databases on a remote [PostgreSQL](https://www.postgresql.o Gives the real-time access to table list and table structure from remote PostgreSQL with the help of `SHOW TABLES` and `DESCRIBE TABLE` queries. -Supports table structure modifications (`ALTER TABLE ... ADD|DROP COLUMN`). If `use_table_cache` parameter (see the Engine Parameters below) it set to `1`, the table structure is cached and not checked for being modified, but can be updated with `DETACH` and `ATTACH` queries. +Supports table structure modifications (`ALTER TABLE ... ADD|DROP COLUMN`). If `use_table_cache` parameter (see the Engine Parameters below) is set to `1`, the table structure is cached and not checked for being modified, but can be updated with `DETACH` and `ATTACH` queries. ## Creating a Database {#creating-a-database} diff --git a/docs/en/engines/table-engines/integrations/azureBlobStorage.md b/docs/en/engines/table-engines/integrations/azureBlobStorage.md index c6525121667..0843ff1ac47 100644 --- a/docs/en/engines/table-engines/integrations/azureBlobStorage.md +++ b/docs/en/engines/table-engines/integrations/azureBlobStorage.md @@ -19,6 +19,8 @@ CREATE TABLE azure_blob_storage_table (name String, value UInt32) ### Engine parameters +- `endpoint` — AzureBlobStorage endpoint URL with container & prefix. Optionally can contain account_name if the authentication method used needs it. (http://azurite1:{port}/[account_name]{container_name}/{data_prefix}) or these parameters can be provided separately using storage_account_url, account_name & container. For specifying prefix, endpoint should be used. +- `endpoint_contains_account_name` - This flag is used to specify if endpoint contains account_name as it is only needed for certain authentication methods. (Default : true) - `connection_string|storage_account_url` — connection_string includes account name & key ([Create connection string](https://learn.microsoft.com/en-us/azure/storage/common/storage-configure-connection-string?toc=%2Fazure%2Fstorage%2Fblobs%2Ftoc.json&bc=%2Fazure%2Fstorage%2Fblobs%2Fbreadcrumb%2Ftoc.json#configure-a-connection-string-for-an-azure-storage-account)) or you could also provide the storage account url here and account name & account key as separate parameters (see parameters account_name & account_key) - `container_name` - Container name - `blobpath` - file path. Supports following wildcards in readonly mode: `*`, `**`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc'`, `'def'` — strings. diff --git a/docs/en/engines/table-engines/integrations/rabbitmq.md b/docs/en/engines/table-engines/integrations/rabbitmq.md index 0f3fef3d6fb..a4d0cf78066 100644 --- a/docs/en/engines/table-engines/integrations/rabbitmq.md +++ b/docs/en/engines/table-engines/integrations/rabbitmq.md @@ -18,8 +18,8 @@ This engine allows integrating ClickHouse with [RabbitMQ](https://www.rabbitmq.c ``` sql CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] ( - name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1], - name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2], + name1 [type1], + name2 [type2], ... ) ENGINE = RabbitMQ SETTINGS rabbitmq_host_port = 'host:port' [or rabbitmq_address = 'amqp(s)://guest:guest@localhost/vhost'], @@ -198,6 +198,10 @@ Additional virtual columns when `kafka_handle_error_mode='stream'`: Note: `_raw_message` and `_error` virtual columns are filled only in case of exception during parsing, they are always `NULL` when message was parsed successfully. +## Caveats {#caveats} + +Even though you may specify [default column expressions](/docs/en/sql-reference/statements/create/table.md/#default_values) (such as `DEFAULT`, `MATERIALIZED`, `ALIAS`) in the table definition, these will be ignored. Instead, the columns will be filled with their respective default values for their types. + ## Data formats support {#data-formats-support} RabbitMQ engine supports all [formats](../../../interfaces/formats.md) supported in ClickHouse. diff --git a/docs/en/engines/table-engines/mergetree-family/annindexes.md b/docs/en/engines/table-engines/mergetree-family/annindexes.md index be588f1764d..78a27d3ff86 100644 --- a/docs/en/engines/table-engines/mergetree-family/annindexes.md +++ b/docs/en/engines/table-engines/mergetree-family/annindexes.md @@ -2,7 +2,7 @@ Nearest neighborhood search is the problem of finding the M closest points for a given point in an N-dimensional vector space. The most straightforward approach to solve this problem is a brute force search where the distance between all points in the vector space and the -reference point is computed. This method guarantees perfect accuracy but it is usually too slow for practical applications. Thus, nearest +reference point is computed. This method guarantees perfect accuracy, but it is usually too slow for practical applications. Thus, nearest neighborhood search problems are often solved with [approximative algorithms](https://github.com/erikbern/ann-benchmarks). Approximative nearest neighborhood search techniques, in conjunction with [embedding methods](https://cloud.google.com/architecture/overview-extracting-and-serving-feature-embeddings-for-machine-learning) allow to search huge @@ -24,7 +24,7 @@ LIMIT N `vectors` contains N-dimensional values of type [Array](../../../sql-reference/data-types/array.md) or [Tuple](../../../sql-reference/data-types/tuple.md), for example embeddings. Function `Distance` computes the distance between two vectors. -Often, the the Euclidean (L2) distance is chosen as distance function but [other +Often, the Euclidean (L2) distance is chosen as distance function but [other distance functions](/docs/en/sql-reference/functions/distance-functions.md) are also possible. `Point` is the reference point, e.g. `(0.17, 0.33, ...)`, and `N` limits the number of search results. @@ -109,7 +109,7 @@ clickhouse-client --param_vec='hello' --query="SELECT * FROM table_with_ann_inde **Restrictions**: Queries that contain both a `WHERE Distance(vectors, Point) < MaxDistance` and an `ORDER BY Distance(vectors, Point)` clause cannot use ANN indexes. Also, the approximate algorithms used to determine the nearest neighbors require a limit, hence queries -without `LIMIT` clause cannot utilize ANN indexes. Also ANN indexes are only used if the query has a `LIMIT` value smaller than setting +without `LIMIT` clause cannot utilize ANN indexes. Also, ANN indexes are only used if the query has a `LIMIT` value smaller than setting `max_limit_for_ann_queries` (default: 1 million rows). This is a safeguard to prevent large memory allocations by external libraries for approximate neighbor search. @@ -120,9 +120,9 @@ then each indexed block will contain 16384 rows. However, data structures and al provided by external libraries) are inherently row-oriented. They store a compact representation of a set of rows and also return rows for ANN queries. This causes some rather unintuitive differences in the way ANN indexes behave compared to normal skip indexes. -When a user defines a ANN index on a column, ClickHouse internally creates a ANN "sub-index" for each index block. The sub-index is "local" +When a user defines an ANN index on a column, ClickHouse internally creates an ANN "sub-index" for each index block. The sub-index is "local" in the sense that it only knows about the rows of its containing index block. In the previous example and assuming that a column has 65536 -rows, we obtain four index blocks (spanning eight granules) and a ANN sub-index for each index block. A sub-index is theoretically able to +rows, we obtain four index blocks (spanning eight granules) and an ANN sub-index for each index block. A sub-index is theoretically able to return the rows with the N closest points within its index block directly. However, since ClickHouse loads data from disk to memory at the granularity of granules, sub-indexes extrapolate matching rows to granule granularity. This is different from regular skip indexes which skip data at the granularity of index blocks. @@ -231,7 +231,7 @@ The Annoy index currently does not work with per-table, non-default `index_granu ## USearch {#usearch} -This type of ANN index is based on the [the USearch library](https://github.com/unum-cloud/usearch), which implements the [HNSW +This type of ANN index is based on the [USearch library](https://github.com/unum-cloud/usearch), which implements the [HNSW algorithm](https://arxiv.org/abs/1603.09320), i.e., builds a hierarchical graph where each point represents a vector and the edges represent similarity. Such hierarchical structures can be very efficient on large collections. They may often fetch 0.05% or less data from the overall dataset, while still providing 99% recall. This is especially useful when working with high-dimensional vectors, diff --git a/docs/en/engines/table-engines/mergetree-family/collapsingmergetree.md b/docs/en/engines/table-engines/mergetree-family/collapsingmergetree.md index 0043e1b6748..ba4021d8422 100644 --- a/docs/en/engines/table-engines/mergetree-family/collapsingmergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/collapsingmergetree.md @@ -125,7 +125,7 @@ For each resulting data part ClickHouse saves: 3. The first “cancel†row, if there are more “cancel†rows than “state†rows. 4. None of the rows, in all other cases. -Also when there are at least 2 more “state†rows than “cancel†rows, or at least 2 more “cancel†rows then “state†rows, the merge continues, but ClickHouse treats this situation as a logical error and records it in the server log. This error can occur if the same data were inserted more than once. +Also, when there are at least 2 more “state†rows than “cancel†rows, or at least 2 more “cancel†rows then “state†rows, the merge continues, but ClickHouse treats this situation as a logical error and records it in the server log. This error can occur if the same data were inserted more than once. Thus, collapsing should not change the results of calculating statistics. Changes gradually collapsed so that in the end only the last state of almost every object left. @@ -196,7 +196,7 @@ What do we see and where is collapsing? With two `INSERT` queries, we created 2 data parts. The `SELECT` query was performed in 2 threads, and we got a random order of rows. Collapsing not occurred because there was no merge of the data parts yet. ClickHouse merges data part in an unknown moment which we can not predict. -Thus we need aggregation: +Thus, we need aggregation: ``` sql SELECT diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md index f23b251f3a1..29672541d9d 100644 --- a/docs/en/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md @@ -870,6 +870,11 @@ Tags: - `load_balancing` - Policy for disk balancing, `round_robin` or `least_used`. - `least_used_ttl_ms` - Configure timeout (in milliseconds) for the updating available space on all disks (`0` - update always, `-1` - never update, default is `60000`). Note, if the disk can be used by ClickHouse only and is not subject to a online filesystem resize/shrink you can use `-1`, in all other cases it is not recommended, since eventually it will lead to incorrect space distribution. - `prefer_not_to_merge` — You should not use this setting. Disables merging of data parts on this volume (this is harmful and leads to performance degradation). When this setting is enabled (don't do it), merging data on this volume is not allowed (which is bad). This allows (but you don't need it) controlling (if you want to control something, you're making a mistake) how ClickHouse works with slow disks (but ClickHouse knows better, so please don't use this setting). +- `volume_priority` — Defines the priority (order) in which volumes are filled. Lower value means higher priority. The parameter values should be natural numbers and collectively cover the range from 1 to N (lowest priority given) without skipping any numbers. + * If _all_ volumes are tagged, they are prioritized in given order. + * If only _some_ volumes are tagged, those without the tag have the lowest priority, and they are prioritized in the order they are defined in config. + * If _no_ volumes are tagged, their priority is set correspondingly to their order they are declared in configuration. + * Two volumes cannot have the same priority value. Configuration examples: @@ -919,7 +924,8 @@ In given example, the `hdd_in_order` policy implements the [round-robin](https:/ If there are different kinds of disks available in the system, `moving_from_ssd_to_hdd` policy can be used instead. The volume `hot` consists of an SSD disk (`fast_ssd`), and the maximum size of a part that can be stored on this volume is 1GB. All the parts with the size larger than 1GB will be stored directly on the `cold` volume, which contains an HDD disk `disk1`. Also, once the disk `fast_ssd` gets filled by more than 80%, data will be transferred to the `disk1` by a background process. -The order of volume enumeration within a storage policy is important. Once a volume is overfilled, data are moved to the next one. The order of disk enumeration is important as well because data are stored on them in turns. +The order of volume enumeration within a storage policy is important in case at least one of the volumes listed has no explicit `volume_priority` parameter. +Once a volume is overfilled, data are moved to the next one. The order of disk enumeration is important as well because data are stored on them in turns. When creating a table, one can apply one of the configured storage policies to it: diff --git a/docs/en/engines/table-engines/mergetree-family/replication.md b/docs/en/engines/table-engines/mergetree-family/replication.md index 01782ac25bd..f70e275fd4e 100644 --- a/docs/en/engines/table-engines/mergetree-family/replication.md +++ b/docs/en/engines/table-engines/mergetree-family/replication.md @@ -304,6 +304,24 @@ We use the term `MergeTree` to refer to all table engines in the `MergeTree fami If you had a `MergeTree` table that was manually replicated, you can convert it to a replicated table. You might need to do this if you have already collected a large amount of data in a `MergeTree` table and now you want to enable replication. +`MergeTree` table can be automatically converted on server restart if `convert_to_replicated` flag is set at the table's data directory (`/var/lib/clickhouse/store/xxx/xxxyyyyy-yyyy-yyyy-yyyy-yyyyyyyyyyyy/` for `Atomic` database). +Create empty `convert_to_replicated` file and the table will be loaded as replicated on next server restart. + +This query can be used to get the table's data path. + +```sql +SELECT data_paths FROM system.tables WHERE table = 'table_name' AND database = 'database_name'; +``` + +Note that ReplicatedMergeTree table will be created with values of `default_replica_path` and `default_replica_name` settings. +To create a converted table on other replicas, you will need to explicitly specify its path in the first argument of the `ReplicatedMergeTree` engine. The following query can be used to get its path. + +```sql +SELECT zookeeper_path FROM system.replicas WHERE table = 'table_name'; +``` + +There is also a manual way to do this without server restart. + If the data differs on various replicas, first sync it, or delete this data on all the replicas except one. Rename the existing MergeTree table, then create a `ReplicatedMergeTree` table with the old name. diff --git a/docs/en/engines/table-engines/special/distributed.md b/docs/en/engines/table-engines/special/distributed.md index de8ae0357dc..4e0ee9bfcc9 100644 --- a/docs/en/engines/table-engines/special/distributed.md +++ b/docs/en/engines/table-engines/special/distributed.md @@ -72,7 +72,11 @@ Specifying the `sharding_key` is necessary for the following: #### fsync_directories -`fsync_directories` - do the `fsync` for directories. Guarantees that the OS refreshed directory metadata after operations related to background inserts on Distributed table (after insert, after sending the data to shard, etc). +`fsync_directories` - do the `fsync` for directories. Guarantees that the OS refreshed directory metadata after operations related to background inserts on Distributed table (after insert, after sending the data to shard, etc.). + +#### skip_unavailable_shards + +`skip_unavailable_shards` - If true, ClickHouse silently skips unavailable shards. Shard is marked as unavailable when: 1) The shard cannot be reached due to a connection failure. 2) Shard is unresolvable through DNS. 3) Table does not exist on the shard. Default false. #### bytes_to_throw_insert @@ -102,6 +106,10 @@ Specifying the `sharding_key` is necessary for the following: `background_insert_max_sleep_time_ms` - same as [distributed_background_insert_max_sleep_time_ms](../../../operations/settings/settings.md#distributed_background_insert_max_sleep_time_ms) +#### flush_on_detach + +`flush_on_detach` - Flush data to remote nodes on DETACH/DROP/server shutdown. Default true. + :::note **Durability settings** (`fsync_...`): @@ -220,7 +228,7 @@ Second, you can perform `INSERT` statements on a `Distributed` table. In this ca Each shard can have a `` defined in the config file. By default, the weight is `1`. Data is distributed across shards in the amount proportional to the shard weight. All shard weights are summed up, then each shard's weight is divided by the total to determine each shard's proportion. For example, if there are two shards and the first has a weight of 1 while the second has a weight of 2, the first will be sent one third (1 / 3) of inserted rows and the second will be sent two thirds (2 / 3). -Each shard can have the `internal_replication` parameter defined in the config file. If this parameter is set to `true`, the write operation selects the first healthy replica and writes data to it. Use this if the tables underlying the `Distributed` table are replicated tables (e.g. any of the `Replicated*MergeTree` table engines). One of the table replicas will receive the write and it will be replicated to the other replicas automatically. +Each shard can have the `internal_replication` parameter defined in the config file. If this parameter is set to `true`, the write operation selects the first healthy replica and writes data to it. Use this if the tables underlying the `Distributed` table are replicated tables (e.g. any of the `Replicated*MergeTree` table engines). One of the table replicas will receive the write, and it will be replicated to the other replicas automatically. If `internal_replication` is set to `false` (the default), data is written to all replicas. In this case, the `Distributed` table replicates data itself. This is worse than using replicated tables because the consistency of replicas is not checked and, over time, they will contain slightly different data. diff --git a/docs/en/getting-started/example-datasets/amazon-reviews.md b/docs/en/getting-started/example-datasets/amazon-reviews.md index 00dc553782c..c07ffa86dd9 100644 --- a/docs/en/getting-started/example-datasets/amazon-reviews.md +++ b/docs/en/getting-started/example-datasets/amazon-reviews.md @@ -12,7 +12,7 @@ The queries below were executed on a **Production** instance of [ClickHouse Clou ::: -1. Without inserting the data into ClickHouse, we can query it in place. Let's grab some rows so we can see what they look like: +1. Without inserting the data into ClickHouse, we can query it in place. Let's grab some rows, so we can see what they look like: ```sql SELECT * diff --git a/docs/en/getting-started/example-datasets/cell-towers.md b/docs/en/getting-started/example-datasets/cell-towers.md index a84eb5d561f..090de1b32fd 100644 --- a/docs/en/getting-started/example-datasets/cell-towers.md +++ b/docs/en/getting-started/example-datasets/cell-towers.md @@ -29,7 +29,7 @@ Here is a preview of the dashboard created in this guide: This dataset is from [OpenCelliD](https://www.opencellid.org/) - The world's largest Open Database of Cell Towers. -As of 2021, it contains more than 40 million records about cell towers (GSM, LTE, UMTS, etc.) around the world with their geographical coordinates and metadata (country code, network, etc). +As of 2021, it contains more than 40 million records about cell towers (GSM, LTE, UMTS, etc.) around the world with their geographical coordinates and metadata (country code, network, etc.). OpenCelliD Project is licensed under a Creative Commons Attribution-ShareAlike 4.0 International License, and we redistribute a snapshot of this dataset under the terms of the same license. The up-to-date version of the dataset is available to download after sign in. @@ -355,7 +355,7 @@ Click on **UPDATE CHART** to render the visualization. ### Add the charts to a **dashboard** -This screenshot shows cell tower locations with LTE, UMTS, and GSM radios. The charts are all created in the same way and they are added to a dashboard. +This screenshot shows cell tower locations with LTE, UMTS, and GSM radios. The charts are all created in the same way, and they are added to a dashboard. ![Dashboard of cell towers by radio type in mcc 204](@site/docs/en/getting-started/example-datasets/images/superset-cell-tower-dashboard.png) diff --git a/docs/en/getting-started/example-datasets/covid19.md b/docs/en/getting-started/example-datasets/covid19.md index 3a7fae89ae0..7dc4cea9be4 100644 --- a/docs/en/getting-started/example-datasets/covid19.md +++ b/docs/en/getting-started/example-datasets/covid19.md @@ -28,7 +28,7 @@ The CSV file has 10 columns: ```response ┌─name─────────────────┬─type─────────────┠-│ date │ Nullable(String) │ +│ date │ Nullable(Date) │ │ location_key │ Nullable(String) │ │ new_confirmed │ Nullable(Int64) │ │ new_deceased │ Nullable(Int64) │ @@ -132,7 +132,7 @@ FROM covid19; └────────────────────────────────────────────┘ ``` -7. You will notice the data has a lot of 0's for dates - either weekends or days where numbers were not reported each day. We can use a window function to smooth out the daily averages of new cases: +7. You will notice the data has a lot of 0's for dates - either weekends or days when numbers were not reported each day. We can use a window function to smooth out the daily averages of new cases: ```sql SELECT @@ -262,4 +262,4 @@ The results look like :::note As mentioned in the [GitHub repo](https://github.com/GoogleCloudPlatform/covid-19-open-data), the dataset is no longer updated as of September 15, 2022. -::: \ No newline at end of file +::: diff --git a/docs/en/getting-started/example-datasets/github.md b/docs/en/getting-started/example-datasets/github.md index 9ed8782e512..e5ffb15bb9a 100644 --- a/docs/en/getting-started/example-datasets/github.md +++ b/docs/en/getting-started/example-datasets/github.md @@ -23,7 +23,6 @@ As of November 8th, 2022, each TSV is approximately the following size and numbe # Table of Contents -- [ClickHouse GitHub data](#clickhouse-github-data) - [Table of Contents](#table-of-contents) - [Generating the data](#generating-the-data) - [Downloading and inserting the data](#downloading-and-inserting-the-data) diff --git a/docs/en/getting-started/example-datasets/laion.md b/docs/en/getting-started/example-datasets/laion.md index 0dbaceffc13..327c1796d11 100644 --- a/docs/en/getting-started/example-datasets/laion.md +++ b/docs/en/getting-started/example-datasets/laion.md @@ -10,10 +10,14 @@ The embeddings and the metadata are stored in separate files in the raw data. A converts them to CSV and imports them into ClickHouse. You can use the following `download.sh` script for that: ```bash -wget --tries=100 https://deploy.laion.ai/8f83b608504d46bb81708ec86e912220/embeddings/img_emb/img_emb_${1}.npy # download image embedding -wget --tries=100 https://deploy.laion.ai/8f83b608504d46bb81708ec86e912220/embeddings/text_emb/text_emb_${1}.npy # download text embedding -wget --tries=100 https://deploy.laion.ai/8f83b608504d46bb81708ec86e912220/embeddings/metadata/metadata_${1}.parquet # download metadata -python3 process.py ${1} # merge files and convert to CSV +number=${1} +if [[ $number == '' ]]; then + number=1 +fi; +wget --tries=100 https://deploy.laion.ai/8f83b608504d46bb81708ec86e912220/embeddings/img_emb/img_emb_${number}.npy # download image embedding +wget --tries=100 https://deploy.laion.ai/8f83b608504d46bb81708ec86e912220/embeddings/text_emb/text_emb_${number}.npy # download text embedding +wget --tries=100 https://deploy.laion.ai/8f83b608504d46bb81708ec86e912220/embeddings/metadata/metadata_${number}.parquet # download metadata +python3 process.py $number # merge files and convert to CSV ``` Script `process.py` is defined as follows: diff --git a/docs/en/getting-started/install.md b/docs/en/getting-started/install.md index e8662ec16fa..ca689ef7995 100644 --- a/docs/en/getting-started/install.md +++ b/docs/en/getting-started/install.md @@ -78,11 +78,8 @@ It is recommended to use official pre-compiled `deb` packages for Debian or Ubun #### Setup the Debian repository ``` bash -sudo apt-get install -y apt-transport-https ca-certificates dirmngr -GNUPGHOME=$(mktemp -d) -sudo GNUPGHOME="$GNUPGHOME" gpg --no-default-keyring --keyring /usr/share/keyrings/clickhouse-keyring.gpg --keyserver hkp://keyserver.ubuntu.com:80 --recv-keys 8919F6BD2B48D754 -sudo rm -rf "$GNUPGHOME" -sudo chmod +r /usr/share/keyrings/clickhouse-keyring.gpg +sudo apt-get install -y apt-transport-https ca-certificates curl gnupg +curl -fsSL 'https://packages.clickhouse.com/rpm/lts/repodata/repomd.xml.key' | sudo gpg --dearmor -o /usr/share/keyrings/clickhouse-keyring.gpg echo "deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb stable main" | sudo tee \ /etc/apt/sources.list.d/clickhouse.list diff --git a/docs/en/interfaces/cli.md b/docs/en/interfaces/cli.md index 518037a2c7c..2b90d684c13 100644 --- a/docs/en/interfaces/cli.md +++ b/docs/en/interfaces/cli.md @@ -243,7 +243,7 @@ If no database is specified, the `default` database will be used. If the user name, password or database was specified in the connection string, it cannot be specified using `--user`, `--password` or `--database` (and vice versa). -The host component can either be an a host name and IP address. Put an IPv6 address in square brackets to specify it: +The host component can either be a host name and IP address. Put an IPv6 address in square brackets to specify it: ```text clickhouse://[2001:db8::1234] diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md index 0f597282f9e..a76bb01ce9e 100644 --- a/docs/en/interfaces/formats.md +++ b/docs/en/interfaces/formats.md @@ -7,6 +7,7 @@ title: Formats for Input and Output Data ClickHouse can accept and return data in various formats. A format supported for input can be used to parse the data provided to `INSERT`s, to perform `SELECT`s from a file-backed table such as File, URL or HDFS, or to read a dictionary. A format supported for output can be used to arrange the results of a `SELECT`, and to perform `INSERT`s into a file-backed table. +All format names are case insensitive. The supported formats are: @@ -33,7 +34,7 @@ The supported formats are: | [JSONAsString](#jsonasstring) | ✔ | ✗ | | [JSONStrings](#jsonstrings) | ✔ | ✔ | | [JSONColumns](#jsoncolumns) | ✔ | ✔ | -| [JSONColumnsWithMetadata](#jsoncolumnsmonoblock)) | ✔ | ✔ | +| [JSONColumnsWithMetadata](#jsoncolumnsmonoblock) | ✔ | ✔ | | [JSONCompact](#jsoncompact) | ✔ | ✔ | | [JSONCompactStrings](#jsoncompactstrings) | ✗ | ✔ | | [JSONCompactColumns](#jsoncompactcolumns) | ✔ | ✔ | diff --git a/docs/en/interfaces/postgresql.md b/docs/en/interfaces/postgresql.md index 1146274b012..7306575a4d3 100644 --- a/docs/en/interfaces/postgresql.md +++ b/docs/en/interfaces/postgresql.md @@ -69,5 +69,3 @@ psql "port=9005 host=127.0.0.1 user=alice dbname=default sslcert=/path/to/certif ``` View the [PostgreSQL docs](https://jdbc.postgresql.org/documentation/head/ssl-client.html) for more details on their SSL settings. - -[Original article](https://clickhouse.com/docs/en/interfaces/postgresql) diff --git a/docs/en/interfaces/schema-inference.md b/docs/en/interfaces/schema-inference.md index 4db1d53987a..05fae994cbe 100644 --- a/docs/en/interfaces/schema-inference.md +++ b/docs/en/interfaces/schema-inference.md @@ -13,7 +13,7 @@ can control it. Schema inference is used when ClickHouse needs to read the data in a specific data format and the structure is unknown. -## Table functions [file](../sql-reference/table-functions/file.md), [s3](../sql-reference/table-functions/s3.md), [url](../sql-reference/table-functions/url.md), [hdfs](../sql-reference/table-functions/hdfs.md). +## Table functions [file](../sql-reference/table-functions/file.md), [s3](../sql-reference/table-functions/s3.md), [url](../sql-reference/table-functions/url.md), [hdfs](../sql-reference/table-functions/hdfs.md), [azureBlobStorage](../sql-reference/table-functions/azureBlobStorage.md). These table functions have the optional argument `structure` with the structure of input data. If this argument is not specified or set to `auto`, the structure will be inferred from the data. @@ -55,7 +55,7 @@ DESCRIBE file('hobbies.jsonl') └─────────┴─────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ ``` -## Table engines [File](../engines/table-engines/special/file.md), [S3](../engines/table-engines/integrations/s3.md), [URL](../engines/table-engines/special/url.md), [HDFS](../engines/table-engines/integrations/hdfs.md) +## Table engines [File](../engines/table-engines/special/file.md), [S3](../engines/table-engines/integrations/s3.md), [URL](../engines/table-engines/special/url.md), [HDFS](../engines/table-engines/integrations/hdfs.md), [azureBlobStorage](../engines/table-engines/integrations/azureBlobStorage.md) If the list of columns is not specified in `CREATE TABLE` query, the structure of the table will be inferred automatically from the data. @@ -549,6 +549,48 @@ Result: └───────┴─────────────────────────────────────────────────────────────────────────────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ ``` +##### input_format_json_use_string_type_for_ambiguous_paths_in_named_tuples_inference_from_objects + +Enabling this setting allows to use String type for ambiguous paths during named tuples inference from JSON objects (when `input_format_json_try_infer_named_tuples_from_objects` is enabled) instead of an exception. +It allows to read JSON objects as named Tuples even if there are ambiguous paths. + +Disabled by default. + +**Examples** + +With disabled setting: +```sql +SET input_format_json_try_infer_named_tuples_from_objects = 1; +SET input_format_json_use_string_type_for_ambiguous_paths_in_named_tuples_inference_from_objects = 0; +DESC format(JSONEachRow, '{"obj" : {"a" : 42}}, {"obj" : {"a" : {"b" : "Hello"}}}'); +``` +Result: + +```text +Code: 636. DB::Exception: The table structure cannot be extracted from a JSONEachRow format file. Error: +Code: 117. DB::Exception: JSON objects have ambiguous data: in some objects path 'a' has type 'Int64' and in some - 'Tuple(b String)'. You can enable setting input_format_json_use_string_type_for_ambiguous_paths_in_named_tuples_inference_from_objects to use String type for path 'a'. (INCORRECT_DATA) (version 24.3.1.1). +You can specify the structure manually. (CANNOT_EXTRACT_TABLE_STRUCTURE) +``` + +With enabled setting: +```sql +SET input_format_json_try_infer_named_tuples_from_objects = 1; +SET input_format_json_use_string_type_for_ambiguous_paths_in_named_tuples_inference_from_objects = 1; +DESC format(JSONEachRow, '{"obj" : "a" : 42}, {"obj" : {"a" : {"b" : "Hello"}}}'); +SELECT * FROM format(JSONEachRow, '{"obj" : {"a" : 42}}, {"obj" : {"a" : {"b" : "Hello"}}}'); +``` + +Result: +```text +┌─name─┬─type──────────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┠+│ obj │ Tuple(a Nullable(String)) │ │ │ │ │ │ +└──────┴───────────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +┌─obj─────────────────┠+│ ('42') │ +│ ('{"b" : "Hello"}') │ +└─────────────────────┘ +``` + ##### input_format_json_read_objects_as_strings Enabling this setting allows reading nested JSON objects as strings. @@ -1061,7 +1103,7 @@ $$) └──────────────┴───────────────┘ ``` -## Values {#values} +### Values {#values} In Values format ClickHouse extracts column value from the row and then parses it using the recursive parser similar to how literals are parsed. @@ -1554,6 +1596,28 @@ DESC format(JSONEachRow, $$ └──────┴──────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ ``` +#### input_format_try_infer_exponent_floats + +If enabled, ClickHouse will try to infer floats in exponential form for text formats (except JSON where numbers in exponential form are always inferred). + +Disabled by default. + +**Example** + +```sql +SET input_format_try_infer_exponent_floats = 1; +DESC format(CSV, +$$1.1E10 +2.3e-12 +42E00 +$$) +``` +```response +┌─name─┬─type──────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┠+│ c1 │ Nullable(Float64) │ │ │ │ │ │ +└──────┴───────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + ## Self describing formats {#self-describing-formats} Self-describing formats contain information about the structure of the data in the data itself, @@ -1986,3 +2050,46 @@ Note: - As some of the files may not contain some columns from the resulting schema, union mode is supported only for formats that support reading subset of columns (like JSONEachRow, Parquet, TSVWithNames, etc) and won't work for other formats (like CSV, TSV, JSONCompactEachRow, etc). - If ClickHouse cannot infer the schema from one of the files, the exception will be thrown. - If you have a lot of files, reading schema from all of them can take a lot of time. + + +## Automatic format detection {#automatic-format-detection} + +If data format is not specified and cannot be determined by the file extension, ClickHouse will try to detect the file format by its content. + +**Examples:** + +Let's say we have `data` with the following content: +``` +"a","b" +1,"Data1" +2,"Data2" +3,"Data3" +``` + +We can inspect and query this file without specifying format or structure: +```sql +:) desc file(data); +``` + +```text +┌─name─┬─type─────────────┠+│ a │ Nullable(Int64) │ +│ b │ Nullable(String) │ +└──────┴──────────────────┘ +``` + +```sql +:) select * from file(data); +``` + +```text +┌─a─┬─b─────┠+│ 1 │ Data1 │ +│ 2 │ Data2 │ +│ 3 │ Data3 │ +└───┴───────┘ +``` + +:::note +ClickHouse can detect only some subset of formats and this detection takes some time, it's always better to specify the format explicitly. +::: \ No newline at end of file diff --git a/docs/en/interfaces/third-party/gui.md b/docs/en/interfaces/third-party/gui.md index 900764b8128..0b3ca3db3a9 100644 --- a/docs/en/interfaces/third-party/gui.md +++ b/docs/en/interfaces/third-party/gui.md @@ -306,3 +306,18 @@ License: [commercial](https://tablum.io/pricing) product with 3-month free perio Try it out for free [in the cloud](https://tablum.io/try). Learn more about the product at [TABLUM.IO](https://tablum.io/) + +### CKMAN {#ckman} + +[CKMAN] (https://www.github.com/housepower/ckman) is a tool for managing and monitoring ClickHouse clusters! + +Features: + +- Rapid and convenient automated deployment of clusters through a browser interface +- Clusters can be scaled or scaled +- Load balance the data of the cluster +- Upgrade the cluster online +- Modify the cluster configuration on the page +- Provides cluster node monitoring and zookeeper monitoring +- Monitor the status of tables and partitions, and monitor slow SQL statements +- Provides an easy-to-use SQL execution page diff --git a/docs/en/operations/backup.md b/docs/en/operations/backup.md index 44df05a7260..8639af468c2 100644 --- a/docs/en/operations/backup.md +++ b/docs/en/operations/backup.md @@ -80,6 +80,7 @@ The BACKUP and RESTORE statements take a list of DATABASE and TABLE names, a des - ASYNC: backup or restore asynchronously - PARTITIONS: a list of partitions to restore - SETTINGS: + - `id`: id of backup or restore operation, randomly generated UUID is used, if not specified manually. If there is already running operation with the same `id` exception is thrown. - [`compression_method`](/docs/en/sql-reference/statements/create/table.md/#column-compression-codecs) and compression_level - `password` for the file on disk - `base_backup`: the destination of the previous backup of this source. For example, `Disk('backups', '1.zip')` @@ -167,6 +168,28 @@ RESTORE TABLE test.table PARTITIONS '2', '3' FROM Disk('backups', 'filename.zip') ``` +### Backups as tar archives + +Backups can also be stored as tar archives. The functionality is the same as for zip, except that a password is not supported. + +Write a backup as a tar: +``` +BACKUP TABLE test.table TO Disk('backups', '1.tar') +``` + +Corresponding restore: +``` +RESTORE TABLE test.table FROM Disk('backups', '1.tar') +``` + +To change the compression method, the correct file suffix should be appended to the backup name. I.E to compress the tar archive using gzip: +``` +BACKUP TABLE test.table TO Disk('backups', '1.tar.gz') +``` + +The supported compression file suffixes are `tar.gz`, `.tgz` `tar.bz2`, `tar.lzma`, `.tar.zst`, `.tzst` and `.tar.xz`. + + ### Check the status of backups The backup command returns an `id` and `status`, and that `id` can be used to get the status of the backup. This is very useful to check the progress of long ASYNC backups. The example below shows a failure that happened when trying to overwrite an existing backup file: @@ -206,7 +229,7 @@ end_time: 2022-08-30 09:21:46 1 row in set. Elapsed: 0.002 sec. ``` -Along with `system.backups` table, all backup and restore operations are also tracked in the system log table [backup_log](../operations/system-tables/backup_log.md): +Along with `system.backups` table, all backup and restore operations are also tracked in the system log table [backup_log](../operations/system-tables/backup_log.md): ``` SELECT * FROM system.backup_log @@ -222,7 +245,7 @@ event_time_microseconds: 2023-08-18 11:13:43.097414 id: 7678b0b3-f519-4e6e-811f-5a0781a4eb52 name: Disk('backups', '1.zip') status: CREATING_BACKUP -error: +error: start_time: 2023-08-18 11:13:43 end_time: 1970-01-01 03:00:00 num_files: 0 @@ -252,7 +275,7 @@ compressed_size: 0 files_read: 0 bytes_read: 0 -2 rows in set. Elapsed: 0.075 sec. +2 rows in set. Elapsed: 0.075 sec. ``` ## Configuring BACKUP/RESTORE to use an S3 Endpoint @@ -271,7 +294,7 @@ Creating an S3 bucket is covered in [Use S3 Object Storage as a ClickHouse disk] The destination for a backup will be specified like this: ``` -S3('/', '', ') +S3('/', '', '') ``` ```sql @@ -421,10 +444,6 @@ Often data that is ingested into ClickHouse is delivered through some sort of pe Some local filesystems provide snapshot functionality (for example, [ZFS](https://en.wikipedia.org/wiki/ZFS)), but they might not be the best choice for serving live queries. A possible solution is to create additional replicas with this kind of filesystem and exclude them from the [Distributed](../engines/table-engines/special/distributed.md) tables that are used for `SELECT` queries. Snapshots on such replicas will be out of reach of any queries that modify data. As a bonus, these replicas might have special hardware configurations with more disks attached per server, which would be cost-effective. -### clickhouse-copier {#clickhouse-copier} - -[clickhouse-copier](../operations/utilities/clickhouse-copier.md) is a versatile tool that was initially created to re-shard petabyte-sized tables. It can also be used for backup and restore purposes because it reliably copies data between ClickHouse tables and clusters. - For smaller volumes of data, a simple `INSERT INTO ... SELECT ...` to remote tables might work as well. ### Manipulations with Parts {#manipulations-with-parts} diff --git a/docs/en/operations/configuration-files.md b/docs/en/operations/configuration-files.md index 005c7818eb1..9f17f4af1e8 100644 --- a/docs/en/operations/configuration-files.md +++ b/docs/en/operations/configuration-files.md @@ -6,15 +6,66 @@ sidebar_label: Configuration Files # Configuration Files -The ClickHouse server can be configured with configuration files in XML or YAML syntax. In most installation types, the ClickHouse server runs with `/etc/clickhouse-server/config.xml` as default configuration file but it is also possible to specify the location of the configuration file manually at server startup using command line option `--config-file=` or `-C`. Additional configuration files may be placed into directory `config.d/` relative to the main configuration file, for example into directory `/etc/clickhouse-server/config.d/`. Files in this directory and the main configuration are merged in a preprocessing step before the configuration is applied in ClickHouse server. Configuration files are merged in alphabetical order. To simplify updates and improve modularization, it is best practice to keep the default `config.xml` file unmodified and place additional customization into `config.d/`. +The ClickHouse server can be configured with configuration files in XML or YAML syntax. In most installation types, the ClickHouse server runs with `/etc/clickhouse-server/config.xml` as default configuration file, but it is also possible to specify the location of the configuration file manually at server startup using command line option `--config-file=` or `-C`. Additional configuration files may be placed into directory `config.d/` relative to the main configuration file, for example into directory `/etc/clickhouse-server/config.d/`. Files in this directory and the main configuration are merged in a preprocessing step before the configuration is applied in ClickHouse server. Configuration files are merged in alphabetical order. To simplify updates and improve modularization, it is best practice to keep the default `config.xml` file unmodified and place additional customization into `config.d/`. It is possible to mix XML and YAML configuration files, for example you could have a main configuration file `config.xml` and additional configuration files `config.d/network.xml`, `config.d/timezone.yaml` and `config.d/keeper.yaml`. Mixing XML and YAML within a single configuration file is not supported. XML configuration files should use `...` as top-level tag. In YAML configuration files, `clickhouse:` is optional, the parser inserts it implicitly if absent. -## Overriding Configuration {#override} +## Merging Configuration {#merging} -The merge of configuration files behaves as one intuitively expects: The contents of both files are combined recursively, children with the same name are replaced by the element of the more specific configuration file. The merge can be customized using attributes `replace` and `remove`. -- Attribute `replace` means that the element is replaced by the specified one. -- Attribute `remove` means that the element is deleted. +Two configuration files (usually the main configuration file and another configuration files from `config.d/`) are merged as follows: + +- If a node (i.e. a path leading to an element) appears in both files and does not have attributes `replace` or `remove`, it is included in the merged configuration file and children from both nodes are included and merged recursively. +- If one of both nodes contains attribute `replace`, it is included in the merged configuration file but only children from the node with attribute `replace` are included. +- If one of both nodes contains attribute `remove`, the node is not included in the merged configuration file (if it exists already, it is deleted). + +Example: + + +```xml + + + + 1 + + + 2 + + + 3 + + +``` + +and + +```xml + + + + 4 + + + 5 + + + 6 + + +``` + +generates merged configuration file: + +```xml + + + 1 + 4 + + + 5 + + +``` To specify that a value of an element should be replaced by the value of an environment variable, you can use attribute `from_env`. @@ -36,7 +87,7 @@ which is equal to - 150000 + 150000 @@ -63,7 +114,7 @@ XML substitution example: ``` -Substitutions can also be performed from ZooKeeper. To do this, specify the attribute `from_zk = "/path/to/node"`. The element value is replaced with the contents of the node at `/path/to/node` in ZooKeeper. You can also put an entire XML subtree on the ZooKeeper node and it will be fully inserted into the source element. +Substitutions can also be performed from ZooKeeper. To do this, specify the attribute `from_zk = "/path/to/node"`. The element value is replaced with the contents of the node at `/path/to/node` in ZooKeeper. You can also put an entire XML subtree on the ZooKeeper node, and it will be fully inserted into the source element. ## Encrypting and Hiding Configuration {#encryption} @@ -125,7 +176,7 @@ Users configuration can be split into separate files similar to `config.xml` and Directory name is defined as `users_config` setting without `.xml` postfix concatenated with `.d`. Directory `users.d` is used by default, as `users_config` defaults to `users.xml`. -Note that configuration files are first merged taking into account [Override](#override) settings and includes are processed after that. +Note that configuration files are first [merged](#merging) taking into account settings, and includes are processed after that. ## XML example {#example} diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md index 1cbf9deccc6..07c9a2b88ab 100644 --- a/docs/en/operations/server-configuration-parameters/settings.md +++ b/docs/en/operations/server-configuration-parameters/settings.md @@ -199,6 +199,16 @@ Type: Bool Default: 0 + +## dns_cache_max_entries + +Internal DNS cache max entries. + +Type: UInt64 + +Default: 10000 + + ## dns_cache_update_period Internal DNS cache update period in seconds. @@ -458,6 +468,38 @@ Type: Double Default: 0.9 +## cgroups_memory_usage_observer_wait_time + +Interval in seconds during which the server's maximum allowed memory consumption is adjusted by the corresponding threshold in cgroups. (see +settings `cgroup_memory_watcher_hard_limit_ratio` and `cgroup_memory_watcher_soft_limit_ratio`). + +Type: UInt64 + +Default: 15 + +## cgroup_memory_watcher_hard_limit_ratio + +Specifies the "hard" threshold with regards to the memory consumption of the server process according to cgroups after which the server's +maximum memory consumption is adjusted to the threshold value. + +See settings `cgroups_memory_usage_observer_wait_time` and `cgroup_memory_watcher_soft_limit_ratio` + +Type: Double + +Default: 0.95 + +## cgroup_memory_watcher_soft_limit_ratio + +Specifies the "soft" threshold with regards to the memory consumption of the server process according to cgroups after which arenas in +jemalloc are purged. + + +See settings `cgroups_memory_usage_observer_wait_time` and `cgroup_memory_watcher_hard_limit_ratio` + +Type: Double + +Default: 0.95 + ## max_table_size_to_drop Restriction on deleting tables. @@ -472,10 +514,10 @@ The value 0 means that you can delete all tables without any restrictions. ``` xml 0 ``` - -## max\_database\_num\_to\_warn {#max-database-num-to-warn} -If the number of attached databases exceeds the specified value, clickhouse server will add warning messages to `system.warnings` table. + +## max\_database\_num\_to\_warn {#max-database-num-to-warn} +If the number of attached databases exceeds the specified value, clickhouse server will add warning messages to `system.warnings` table. Default value: 1000 **Example** @@ -483,10 +525,10 @@ Default value: 1000 ``` xml 50 ``` - -## max\_table\_num\_to\_warn {#max-table-num-to-warn} -If the number of attached tables exceeds the specified value, clickhouse server will add warning messages to `system.warnings` table. -Default value: 5000 + +## max\_table\_num\_to\_warn {#max-table-num-to-warn} +If the number of attached tables exceeds the specified value, clickhouse server will add warning messages to `system.warnings` table. +Default value: 5000 **Example** @@ -495,9 +537,9 @@ Default value: 5000 ``` -## max\_part\_num\_to\_warn {#max-part-num-to-warn} -If the number of active parts exceeds the specified value, clickhouse server will add warning messages to `system.warnings` table. -Default value: 100000 +## max\_part\_num\_to\_warn {#max-part-num-to-warn} +If the number of active parts exceeds the specified value, clickhouse server will add warning messages to `system.warnings` table. +Default value: 100000 **Example** @@ -2873,3 +2915,23 @@ A limit on the number of materialized views attached to a table. Note that only directly dependent views are considered here, and the creation of one view on top of another view is not considered. Default value: `0`. + +## format_alter_operations_with_parentheses {#format_alter_operations_with_parentheses} + +If set to true, then alter operations will be surrounded by parentheses in formatted queries. This makes the parsing of formatted alter queries less ambiguous. + +Type: Bool + +Default: 0 + +## ignore_empty_sql_security_in_create_view_query {#ignore_empty_sql_security_in_create_view_query} + +If true, ClickHouse doesn't write defaults for empty SQL security statement in CREATE VIEW queries. + +:::note +This setting is only necessary for the migration period and will become obsolete in 24.4 +::: + +Type: Bool + +Default: 1 diff --git a/docs/en/operations/settings/settings-formats.md b/docs/en/operations/settings/settings-formats.md index 9265fffa323..477fbf94625 100644 --- a/docs/en/operations/settings/settings-formats.md +++ b/docs/en/operations/settings/settings-formats.md @@ -467,7 +467,7 @@ Enabled by default. Allow to use String type for JSON keys that contain only `Null`/`{}`/`[]` in data sample during schema inference. In JSON formats any value can be read as String, and we can avoid errors like `Cannot determine type for column 'column_name' by first 25000 rows of data, most likely this column contains only Nulls or empty Arrays/Maps` during schema inference -by using String type for keys with unknown types. +by using String type for keys with unknown types. Example: @@ -891,7 +891,7 @@ Default value: `,`. If it is set to true, allow strings in single quotes. -Enabled by default. +Disabled by default. ### format_csv_allow_double_quotes {#format_csv_allow_double_quotes} @@ -1605,7 +1605,7 @@ possible values: - `1` — Enabled. Pretty formats will use ANSI escape sequences except for `NoEscapes` formats. - `auto` - Enabled if `stdout` is a terminal except for `NoEscapes` formats. -Default value is `auto`. +Default value is `auto`. ### output_format_pretty_grid_charset {#output_format_pretty_grid_charset} @@ -1656,6 +1656,33 @@ Result: └─────────────────────────┴─────────┘ ``` +### output_format_pretty_single_large_number_tip_threshold {#output_format_pretty_single_large_number_tip_threshold} + +Print a readable number tip on the right side of the table if the block consists of a single number which exceeds +this value (except 0). + +Possible values: + +- 0 — The readable number tip will not be printed. +- Positive integer — The readable number tip will be printed if the single number exceeds this value. + +Default value: `1000000`. + +**Example** + +Query: + +```sql +SELECT 1000000000 as a; +``` + +Result: +```text +┌──────────a─┠+│ 1000000000 │ -- 1.00 billion +└────────────┘ +``` + ## Template format settings {#template-format-settings} ### format_template_resultset {#format_template_resultset} diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index a275878f32e..3f4dec9dc93 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -755,7 +755,7 @@ By default: 1,000,000. It only works when reading from MergeTree engines. ## max_concurrent_queries_for_user {#max-concurrent-queries-for-user} -The maximum number of simultaneously processed queries related to MergeTree table per user. +The maximum number of simultaneously processed queries per user. Possible values: @@ -1775,6 +1775,10 @@ Default value: 0 (no restriction). ## insert_quorum {#insert_quorum} +:::note +This setting is not applicable to SharedMergeTree, see [SharedMergeTree consistency](/docs/en/cloud/reference/shared-merge-tree/#consistency) for more information. +::: + Enables the quorum writes. - If `insert_quorum < 2`, the quorum writes are disabled. @@ -1814,6 +1818,10 @@ See also: ## insert_quorum_parallel {#insert_quorum_parallel} +:::note +This setting is not applicable to SharedMergeTree, see [SharedMergeTree consistency](/docs/en/cloud/reference/shared-merge-tree/#consistency) for more information. +::: + Enables or disables parallelism for quorum `INSERT` queries. If enabled, additional `INSERT` queries can be sent while previous queries have not yet finished. If disabled, additional writes to the same table will be rejected. Possible values: @@ -1831,6 +1839,10 @@ See also: ## select_sequential_consistency {#select_sequential_consistency} +:::note +This setting differ in behavior between SharedMergeTree and ReplicatedMergeTree, see [SharedMergeTree consistency](/docs/en/cloud/reference/shared-merge-tree/#consistency) for more information about the behavior of `select_sequential_consistency` in SharedMergeTree. +::: + Enables or disables sequential consistency for `SELECT` queries. Requires `insert_quorum_parallel` to be disabled (enabled by default). Possible values: @@ -2029,7 +2041,7 @@ Possible values: - 0 — Disabled. - 1 — Enabled. -Default value: 1. +Default value: 0. By default, async inserts are inserted into replicated tables by the `INSERT` statement enabling [async_insert](#async-insert) are deduplicated (see [Data Replication](../../engines/table-engines/mergetree-family/replication.md)). For the replicated tables, by default, only 10000 of the most recent inserts for each partition are deduplicated (see [replicated_deduplication_window_for_async_inserts](merge-tree-settings.md/#replicated-deduplication-window-async-inserts), [replicated_deduplication_window_seconds_for_async_inserts](merge-tree-settings.md/#replicated-deduplication-window-seconds-async-inserts)). @@ -3437,7 +3449,7 @@ Has an effect only when the connection is made through the MySQL wire protocol. - 0 - Use `BLOB`. - 1 - Use `TEXT`. -Default value: `0`. +Default value: `1`. ## mysql_map_fixed_string_to_text_in_show_columns {#mysql_map_fixed_string_to_text_in_show_columns} @@ -3448,7 +3460,7 @@ Has an effect only when the connection is made through the MySQL wire protocol. - 0 - Use `BLOB`. - 1 - Use `TEXT`. -Default value: `0`. +Default value: `1`. ## execute_merges_on_single_replica_time_threshold {#execute-merges-on-single-replica-time-threshold} @@ -3698,7 +3710,7 @@ Default value: `0`. ## allow_experimental_live_view {#allow-experimental-live-view} -Allows creation of experimental [live views](../../sql-reference/statements/create/view.md/#live-view). +Allows creation of a deprecated LIVE VIEW. Possible values: @@ -3709,21 +3721,15 @@ Default value: `0`. ## live_view_heartbeat_interval {#live-view-heartbeat-interval} -Sets the heartbeat interval in seconds to indicate [live view](../../sql-reference/statements/create/view.md/#live-view) is alive . - -Default value: `15`. +Deprecated. ## max_live_view_insert_blocks_before_refresh {#max-live-view-insert-blocks-before-refresh} -Sets the maximum number of inserted blocks after which mergeable blocks are dropped and query for [live view](../../sql-reference/statements/create/view.md/#live-view) is re-executed. - -Default value: `64`. +Deprecated. ## periodic_live_view_refresh {#periodic-live-view-refresh} -Sets the interval in seconds after which periodically refreshed [live view](../../sql-reference/statements/create/view.md/#live-view) is forced to refresh. - -Default value: `60`. +Deprecated. ## http_connection_timeout {#http_connection_timeout} @@ -3948,6 +3954,7 @@ Possible values: - `none` — Is similar to throw, but distributed DDL query returns no result set. - `null_status_on_timeout` — Returns `NULL` as execution status in some rows of result set instead of throwing `TIMEOUT_EXCEEDED` if query is not finished on the corresponding hosts. - `never_throw` — Do not throw `TIMEOUT_EXCEEDED` and do not rethrow exceptions if query has failed on some hosts. +- `none_only_active` - similar to `none`, but doesn't wait for inactive replicas of the `Replicated` database. Note: with this mode it's impossible to figure out that the query was not executed on some replica and will be executed in background. - `null_status_on_timeout_only_active` — similar to `null_status_on_timeout`, but doesn't wait for inactive replicas of the `Replicated` database - `throw_only_active` — similar to `throw`, but doesn't wait for inactive replicas of the `Replicated` database @@ -4273,7 +4280,7 @@ Result: ## enable_order_by_all {#enable-order-by-all} -Enables or disables sorting by `ALL` columns, i.e. [ORDER BY](../../sql-reference/statements/select/order-by.md) +Enables or disables sorting with `ORDER BY ALL` syntax, see [ORDER BY](../../sql-reference/statements/select/order-by.md). Possible values: @@ -4293,7 +4300,7 @@ INSERT INTO TAB VALUES (10, 20, 30), (20, 20, 10), (30, 10, 20); SELECT * FROM TAB ORDER BY ALL; -- returns an error that ALL is ambiguous -SELECT * FROM TAB ORDER BY ALL SETTINGS enable_order_by_all; +SELECT * FROM TAB ORDER BY ALL SETTINGS enable_order_by_all = 0; ``` Result: @@ -5372,6 +5379,24 @@ SELECT map('a', range(number), 'b', number, 'c', 'str_' || toString(number)) as Default value: `false`. +## default_normal_view_sql_security {#default_normal_view_sql_security} + +Allows to set default `SQL SECURITY` option while creating a normal view. [More about SQL security](../../sql-reference/statements/create/view.md#sql_security). + +The default value is `INVOKER`. + +## default_materialized_view_sql_security {#default_materialized_view_sql_security} + +Allows to set a default value for SQL SECURITY option when creating a materialized view. [More about SQL security](../../sql-reference/statements/create/view.md#sql_security). + +The default value is `DEFINER`. + +## default_view_definer {#default_view_definer} + +Allows to set default `DEFINER` option while creating a view. [More about SQL security](../../sql-reference/statements/create/view.md#sql_security). + +The default value is `CURRENT_USER`. + ## max_partition_size_to_drop Restriction on dropping partitions in query time. The value 0 means that you can drop partitions without any restrictions. diff --git a/docs/en/operations/system-tables/asynchronous_loader.md b/docs/en/operations/system-tables/asynchronous_loader.md index af9aa4ecd09..75d98e4549d 100644 --- a/docs/en/operations/system-tables/asynchronous_loader.md +++ b/docs/en/operations/system-tables/asynchronous_loader.md @@ -49,6 +49,6 @@ Every job has a pool associated with it and is started in this pool. Each pool h Time instants during job lifetime: - `schedule_time` (`DateTime64`) - Time when job was created and scheduled to be executed (usually with all its dependencies). -- `enqueue_time` (`Nullable(DateTime64)`) - Time when job became ready and was enqueued into a ready queue of it's pool. Null if the job is not ready yet. +- `enqueue_time` (`Nullable(DateTime64)`) - Time when job became ready and was enqueued into a ready queue of its pool. Null if the job is not ready yet. - `start_time` (`Nullable(DateTime64)`) - Time when worker dequeues the job from ready queue and start its execution. Null if the job is not started yet. - `finish_time` (`Nullable(DateTime64)`) - Time when job execution is finished. Null if the job is not finished yet. diff --git a/docs/en/operations/system-tables/asynchronous_metrics.md b/docs/en/operations/system-tables/asynchronous_metrics.md index fe8f963b1ec..81725b97e41 100644 --- a/docs/en/operations/system-tables/asynchronous_metrics.md +++ b/docs/en/operations/system-tables/asynchronous_metrics.md @@ -297,11 +297,11 @@ Total number of databases on the server. ### NumberOfDetachedByUserParts -The total number of parts detached from MergeTree tables by users with the `ALTER TABLE DETACH` query (as opposed to unexpected, broken or ignored parts). The server does not care about detached parts and they can be removed. +The total number of parts detached from MergeTree tables by users with the `ALTER TABLE DETACH` query (as opposed to unexpected, broken or ignored parts). The server does not care about detached parts, and they can be removed. ### NumberOfDetachedParts -The total number of parts detached from MergeTree tables. A part can be detached by a user with the `ALTER TABLE DETACH` query or by the server itself it the part is broken, unexpected or unneeded. The server does not care about detached parts and they can be removed. +The total number of parts detached from MergeTree tables. A part can be detached by a user with the `ALTER TABLE DETACH` query or by the server itself it the part is broken, unexpected or unneeded. The server does not care about detached parts, and they can be removed. ### NumberOfTables @@ -393,7 +393,7 @@ The amount of free memory plus OS page cache memory on the host system, in bytes ### OSMemoryFreeWithoutCached -The amount of free memory on the host system, in bytes. This does not include the memory used by the OS page cache memory, in bytes. The page cache memory is also available for usage by programs, so the value of this metric can be confusing. See the `OSMemoryAvailable` metric instead. For convenience we also provide the `OSMemoryFreePlusCached` metric, that should be somewhat similar to OSMemoryAvailable. See also https://www.linuxatemyram.com/. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server. +The amount of free memory on the host system, in bytes. This does not include the memory used by the OS page cache memory, in bytes. The page cache memory is also available for usage by programs, so the value of this metric can be confusing. See the `OSMemoryAvailable` metric instead. For convenience, we also provide the `OSMemoryFreePlusCached` metric, that should be somewhat similar to OSMemoryAvailable. See also https://www.linuxatemyram.com/. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server. ### OSMemoryTotal @@ -493,7 +493,7 @@ Number of threads in the server of the PostgreSQL compatibility protocol. ### QueryCacheBytes -Total size of the query cache cache in bytes. +Total size of the query cache in bytes. ### QueryCacheEntries @@ -549,7 +549,7 @@ Total amount of bytes (compressed, including data and indices) stored in all tab ### TotalPartsOfMergeTreeTables -Total amount of data parts in all tables of MergeTree family. Numbers larger than 10 000 will negatively affect the server startup time and it may indicate unreasonable choice of the partition key. +Total amount of data parts in all tables of MergeTree family. Numbers larger than 10 000 will negatively affect the server startup time, and it may indicate unreasonable choice of the partition key. ### TotalPrimaryKeyBytesInMemory diff --git a/docs/en/operations/system-tables/clusters.md b/docs/en/operations/system-tables/clusters.md index 63cc083e4bc..7a9f1438b87 100644 --- a/docs/en/operations/system-tables/clusters.md +++ b/docs/en/operations/system-tables/clusters.md @@ -19,7 +19,7 @@ Columns: - `default_database` ([String](../../sql-reference/data-types/string.md)) — The default database name. - `errors_count` ([UInt32](../../sql-reference/data-types/int-uint.md)) — The number of times this host failed to reach replica. - `slowdowns_count` ([UInt32](../../sql-reference/data-types/int-uint.md)) — The number of slowdowns that led to changing replica when establishing a connection with hedged requests. -- `estimated_recovery_time` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Seconds remaining until the replica error count is zeroed and it is considered to be back to normal. +- `estimated_recovery_time` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Seconds remaining until the replica error count is zeroed, and it is considered to be back to normal. - `database_shard_name` ([String](../../sql-reference/data-types/string.md)) — The name of the `Replicated` database shard (for clusters that belong to a `Replicated` database). - `database_replica_name` ([String](../../sql-reference/data-types/string.md)) — The name of the `Replicated` database replica (for clusters that belong to a `Replicated` database). - `is_active` ([Nullable(UInt8)](../../sql-reference/data-types/int-uint.md)) — The status of the `Replicated` database replica (for clusters that belong to a `Replicated` database): 1 means "replica is online", 0 means "replica is offline", `NULL` means "unknown". diff --git a/docs/en/operations/system-tables/crash-log.md b/docs/en/operations/system-tables/crash-log.md index e83da3624b2..9877f674211 100644 --- a/docs/en/operations/system-tables/crash-log.md +++ b/docs/en/operations/system-tables/crash-log.md @@ -49,5 +49,3 @@ build_id: **See also** - [trace_log](../../operations/system-tables/trace_log.md) system table - -[Original article](https://clickhouse.com/docs/en/operations/system-tables/crash-log) diff --git a/docs/en/operations/system-tables/dictionaries.md b/docs/en/operations/system-tables/dictionaries.md index 8632581144c..c4cf7ba8bfb 100644 --- a/docs/en/operations/system-tables/dictionaries.md +++ b/docs/en/operations/system-tables/dictionaries.md @@ -18,7 +18,7 @@ Columns: - `LOADED_AND_RELOADING` — Dictionary is loaded successfully, and is being reloaded right now (frequent reasons: [SYSTEM RELOAD DICTIONARY](../../sql-reference/statements/system.md#query_language-system-reload-dictionary) query, timeout, dictionary config has changed). - `FAILED_AND_RELOADING` — Could not load the dictionary as a result of an error and is loading now. - `origin` ([String](../../sql-reference/data-types/string.md)) — Path to the configuration file that describes the dictionary. -- `type` ([String](../../sql-reference/data-types/string.md)) — Type of a dictionary allocation. [Storing Dictionaries in Memory](../../sql-reference/dictionaries/index.md#storig-dictionaries-in-memory). +- `type` ([String](../../sql-reference/data-types/string.md)) — Type of dictionary allocation. [Storing Dictionaries in Memory](../../sql-reference/dictionaries/index.md#storig-dictionaries-in-memory). - `key.names` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — Array of [key names](../../sql-reference/dictionaries/index.md#dictionary-key-and-fields#ext_dict_structure-key) provided by the dictionary. - `key.types` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — Corresponding array of [key types](../../sql-reference/dictionaries/index.md#dictionary-key-and-fields#ext_dict_structure-key) provided by the dictionary. - `attribute.names` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — Array of [attribute names](../../sql-reference/dictionaries/index.md#dictionary-key-and-fields#ext_dict_structure-attributes) provided by the dictionary. diff --git a/docs/en/operations/system-tables/dns_cache.md b/docs/en/operations/system-tables/dns_cache.md new file mode 100644 index 00000000000..befeb9298aa --- /dev/null +++ b/docs/en/operations/system-tables/dns_cache.md @@ -0,0 +1,38 @@ +--- +slug: /en/operations/system-tables/dns_cache +--- +# dns_cache + +Contains information about cached DNS records. + +Columns: + +- `hostname` ([String](../../sql-reference/data-types/string.md)) — cached hostname +- `ip_address` ([String](../../sql-reference/data-types/string.md)) — ip address for the hostname +- `ip_family` ([Enum](../../sql-reference/data-types/enum.md)) — family of the ip address, possible values: + - 'IPv4' + - 'IPv6' + - 'UNIX_LOCAL' +- `cached_at` ([DateTime](../../sql-reference/data-types/datetime.md)) - when the record was cached + +**Example** + +Query: + +```sql +SELECT * FROM system.dns_cache; +``` + +Result: + +| hostname | ip\_address | ip\_family | cached\_at | +| :--- | :--- | :--- | :--- | +| localhost | ::1 | IPv6 | 2024-02-11 17:04:40 | +| localhost | 127.0.0.1 | IPv4 | 2024-02-11 17:04:40 | + +**See also** + +- [disable_internal_dns_cache setting](../../operations/server-configuration-parameters/settings.md#disable_internal_dns_cache) +- [dns_cache_max_entries setting](../../operations/server-configuration-parameters/settings.md#dns_cache_max_entries) +- [dns_cache_update_period setting](../../operations/server-configuration-parameters/settings.md#dns_cache_update_period) +- [dns_max_consecutive_failures setting](../../operations/server-configuration-parameters/settings.md#dns_max_consecutive_failures) diff --git a/docs/en/operations/system-tables/metrics.md b/docs/en/operations/system-tables/metrics.md index 898e6ae2e2c..83ce817b7db 100644 --- a/docs/en/operations/system-tables/metrics.md +++ b/docs/en/operations/system-tables/metrics.md @@ -513,10 +513,6 @@ Part was moved to another disk and should be deleted in own destructor. Not active data part with identity refcounter, it is deleting right now by a cleaner. -### PartsInMemory - -In-memory parts. - ### PartsOutdated Not active data part, but could be used by only current SELECTs, could be deleted after SELECTs finishes. diff --git a/docs/en/operations/system-tables/query_thread_log.md b/docs/en/operations/system-tables/query_thread_log.md index 0420a0392f2..a0712c78409 100644 --- a/docs/en/operations/system-tables/query_thread_log.md +++ b/docs/en/operations/system-tables/query_thread_log.md @@ -21,7 +21,7 @@ Columns: - `hostname` ([LowCardinality(String)](../../sql-reference/data-types/string.md)) — Hostname of the server executing the query. - `event_date` ([Date](../../sql-reference/data-types/date.md)) — The date when the thread has finished execution of the query. - `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — The date and time when the thread has finished execution of the query. -- `event_time_microsecinds` ([DateTime](../../sql-reference/data-types/datetime.md)) — The date and time when the thread has finished execution of the query with microseconds precision. +- `event_time_microseconds` ([DateTime](../../sql-reference/data-types/datetime.md)) — The date and time when the thread has finished execution of the query with microseconds precision. - `query_start_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Start time of query execution. - `query_start_time_microseconds` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — Start time of query execution with microsecond precision. - `query_duration_ms` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Duration of query execution. @@ -32,8 +32,7 @@ Columns: - `memory_usage` ([Int64](../../sql-reference/data-types/int-uint.md)) — The difference between the amount of allocated and freed memory in context of this thread. - `peak_memory_usage` ([Int64](../../sql-reference/data-types/int-uint.md)) — The maximum difference between the amount of allocated and freed memory in context of this thread. - `thread_name` ([String](../../sql-reference/data-types/string.md)) — Name of the thread. -- `thread_number` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Internal thread ID. -- `thread_id` ([Int32](../../sql-reference/data-types/int-uint.md)) — thread ID. +- `thread_id` ([UInt64](../../sql-reference/data-types/int-uint.md)) — OS thread ID. - `master_thread_id` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — OS initial ID of initial thread. - `query` ([String](../../sql-reference/data-types/string.md)) — Query string. - `is_initial_query` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Query type. Possible values: diff --git a/docs/en/operations/system-tables/settings_changes.md b/docs/en/operations/system-tables/settings_changes.md new file mode 100644 index 00000000000..c097915d430 --- /dev/null +++ b/docs/en/operations/system-tables/settings_changes.md @@ -0,0 +1,32 @@ +--- +slug: /en/operations/system-tables/settings_changes +--- +# settings_changes + +Contains information about setting changes in previous ClickHouse versions. + +Columns: + +- `version` ([String](../../sql-reference/data-types/string.md)) — The ClickHouse version in which settings were changed +- `changes` ([Array](../../sql-reference/data-types/array.md) of [Tuple](../../sql-reference/data-types/tuple.md)) — A description of the setting changes: (setting name, previous value, new value, reason for the change) + +**Example** + +``` sql +SELECT * +FROM system.settings_changes +WHERE version = '23.5' +FORMAT Vertical +``` + +``` text +Row 1: +────── +version: 23.5 +changes: [('input_format_parquet_preserve_order','1','0','Allow Parquet reader to reorder rows for better parallelism.'),('parallelize_output_from_storages','0','1','Allow parallelism when executing queries that read from file/url/s3/etc. This may reorder rows.'),('use_with_fill_by_sorting_prefix','0','1','Columns preceding WITH FILL columns in ORDER BY clause form sorting prefix. Rows with different values in sorting prefix are filled independently'),('output_format_parquet_compliant_nested_types','0','1','Change an internal field name in output Parquet file schema.')] +``` + +**See also** + +- [Settings](../../operations/settings/index.md#session-settings-intro) +- [system.settings](settings.md) diff --git a/docs/en/operations/system-tables/settings_profile_elements.md b/docs/en/operations/system-tables/settings_profile_elements.md index c1fc562e1e9..8955c84fab2 100644 --- a/docs/en/operations/system-tables/settings_profile_elements.md +++ b/docs/en/operations/system-tables/settings_profile_elements.md @@ -26,6 +26,6 @@ Columns: - `max` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — The maximum value of the setting. NULL if not set. -- `readonly` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges))) — Profile that allows only read queries. +- `writability` ([Nullable](../../sql-reference/data-types/nullable.md)([Enum8](../../sql-reference/data-types/enum.md)('WRITABLE' = 0, 'CONST' = 1, 'CHANGEABLE_IN_READONLY' = 2))) — Sets the settings constraint writability kind. - `inherit_profile` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — A parent profile for this setting profile. `NULL` if not set. Setting profile will inherit all the settings' values and constraints (`min`, `max`, `readonly`) from its parent profiles. diff --git a/docs/en/operations/system-tables/tables.md b/docs/en/operations/system-tables/tables.md index 8049ab091c0..2132f69319e 100644 --- a/docs/en/operations/system-tables/tables.md +++ b/docs/en/operations/system-tables/tables.md @@ -27,6 +27,8 @@ Columns: - `metadata_modification_time` ([DateTime](../../sql-reference/data-types/datetime.md)) - Time of latest modification of the table metadata. +- `metadata_version` ([Int32](../../sql-reference/data-types/int-uint.md)) - Metadata version for ReplicatedMergeTree table, 0 for non ReplicatedMergeTree table. + - `dependencies_database` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) - Database dependencies. - `dependencies_table` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) - Table dependencies ([materialized views](../../sql-reference/statements/create/view.md#materialized-view) the current table). diff --git a/docs/en/operations/tips.md b/docs/en/operations/tips.md index 757afff599c..119684ba68d 100644 --- a/docs/en/operations/tips.md +++ b/docs/en/operations/tips.md @@ -111,6 +111,14 @@ On newer Linux kernels transparent huge pages are alright. $ echo 'madvise' | sudo tee /sys/kernel/mm/transparent_hugepage/enabled ``` +If you want to modify the transparent huge pages setting permanently, editing the `/etc/default/grub` to add the `transparent_hugepage=never` to the `GRUB_CMDLINE_LINUX_DEFAULT` option: + +```bash +$ GRUB_CMDLINE_LINUX_DEFAULT="transparent_hugepage=madvise ..." +``` + +After that, run the `sudo update-grub` command then reboot to take effect. + ## Hypervisor configuration If you are using OpenStack, set @@ -289,8 +297,6 @@ end script If you use antivirus software configure it to skip folders with ClickHouse datafiles (`/var/lib/clickhouse`) otherwise performance may be reduced and you may experience unexpected errors during data ingestion and background merges. -[Original article](https://clickhouse.com/docs/en/operations/tips/) - ## Related Content - [Getting started with ClickHouse? Here are 13 "Deadly Sins" and how to avoid them](https://clickhouse.com/blog/common-getting-started-issues-with-clickhouse) diff --git a/docs/en/operations/utilities/clickhouse-copier.md b/docs/en/operations/utilities/clickhouse-copier.md deleted file mode 100644 index 0d329487504..00000000000 --- a/docs/en/operations/utilities/clickhouse-copier.md +++ /dev/null @@ -1,187 +0,0 @@ ---- -slug: /en/operations/utilities/clickhouse-copier -sidebar_position: 59 -sidebar_label: clickhouse-copier ---- - -# clickhouse-copier - -Copies data from the tables in one cluster to tables in another (or the same) cluster. - -:::note -To get a consistent copy, the data in the source tables and partitions should not change during the entire process. -::: - -You can run multiple `clickhouse-copier` instances on different servers to perform the same job. ClickHouse Keeper, or ZooKeeper, is used for syncing the processes. - -After starting, `clickhouse-copier`: - -- Connects to ClickHouse Keeper and receives: - - - Copying jobs. - - The state of the copying jobs. - -- It performs the jobs. - - Each running process chooses the “closest†shard of the source cluster and copies the data into the destination cluster, resharding the data if necessary. - -`clickhouse-copier` tracks the changes in ClickHouse Keeper and applies them on the fly. - -To reduce network traffic, we recommend running `clickhouse-copier` on the same server where the source data is located. - -## Running Clickhouse-copier {#running-clickhouse-copier} - -The utility should be run manually: - -``` bash -$ clickhouse-copier --daemon --config keeper.xml --task-path /task/path --base-dir /path/to/dir -``` - -Parameters: - -- `daemon` — Starts `clickhouse-copier` in daemon mode. -- `config` — The path to the `keeper.xml` file with the parameters for the connection to ClickHouse Keeper. -- `task-path` — The path to the ClickHouse Keeper node. This node is used for syncing `clickhouse-copier` processes and storing tasks. Tasks are stored in `$task-path/description`. -- `task-file` — Optional path to file with task configuration for initial upload to ClickHouse Keeper. -- `task-upload-force` — Force upload `task-file` even if node already exists. Default is false. -- `base-dir` — The path to logs and auxiliary files. When it starts, `clickhouse-copier` creates `clickhouse-copier_YYYYMMHHSS_` subdirectories in `$base-dir`. If this parameter is omitted, the directories are created in the directory where `clickhouse-copier` was launched. - -## Format of keeper.xml {#format-of-zookeeper-xml} - -``` xml - - - trace - 100M - 3 - - - - - 127.0.0.1 - 2181 - - - -``` - -## Configuration of Copying Tasks {#configuration-of-copying-tasks} - -``` xml - - - - - - - false - - 127.0.0.1 - 9000 - - - - ... - - - - ... - - - - - 2 - - - - 1 - - - - - 0 - - - - - 3 - - 1 - - - - - - - - source_cluster - test - hits - - - destination_cluster - test - hits2 - - - - ENGINE=ReplicatedMergeTree('/clickhouse/tables/{cluster}/{shard}/hits2', '{replica}') - PARTITION BY toMonday(date) - ORDER BY (CounterID, EventDate) - - - - jumpConsistentHash(intHash64(UserID), 2) - - - CounterID != 0 - - - - '2018-02-26' - '2018-03-05' - ... - - - - - - ... - - ... - - -``` - -`clickhouse-copier` tracks the changes in `/task/path/description` and applies them on the fly. For instance, if you change the value of `max_workers`, the number of processes running tasks will also change. diff --git a/docs/en/operations/utilities/clickhouse-local.md b/docs/en/operations/utilities/clickhouse-local.md index c863282efc1..93a3fecf3c6 100644 --- a/docs/en/operations/utilities/clickhouse-local.md +++ b/docs/en/operations/utilities/clickhouse-local.md @@ -34,7 +34,7 @@ The binary you just downloaded can run all sorts of ClickHouse tools and utiliti A common use of `clickhouse-local` is to run ad-hoc queries on files: where you don't have to insert the data into a table. `clickhouse-local` can stream the data from a file into a temporary table and execute your SQL. -If the file is sitting on the same machine as `clickhouse-local`, you can simple specify the file to load. The following `reviews.tsv` file contains a sampling of Amazon product reviews: +If the file is sitting on the same machine as `clickhouse-local`, you can simply specify the file to load. The following `reviews.tsv` file contains a sampling of Amazon product reviews: ```bash ./clickhouse local -q "SELECT * FROM 'reviews.tsv'" @@ -201,12 +201,12 @@ Arguments: - `-S`, `--structure` — table structure for input data. - `--input-format` — input format, `TSV` by default. -- `-f`, `--file` — path to data, `stdin` by default. +- `-F`, `--file` — path to data, `stdin` by default. - `-q`, `--query` — queries to execute with `;` as delimiter. `--query` can be specified multiple times, e.g. `--query "SELECT 1" --query "SELECT 2"`. Cannot be used simultaneously with `--queries-file`. - `--queries-file` - file path with queries to execute. `--queries-file` can be specified multiple times, e.g. `--query queries1.sql --query queries2.sql`. Cannot be used simultaneously with `--query`. - `--multiquery, -n` – If specified, multiple queries separated by semicolons can be listed after the `--query` option. For convenience, it is also possible to omit `--query` and pass the queries directly after `--multiquery`. - `-N`, `--table` — table name where to put output data, `table` by default. -- `--format`, `--output-format` — output format, `TSV` by default. +- `-f`, `--format`, `--output-format` — output format, `TSV` by default. - `-d`, `--database` — default database, `_local` by default. - `--stacktrace` — whether to dump debug output in case of exception. - `--echo` — print query before execution. @@ -220,7 +220,7 @@ Arguments: - `--help` — arguments references for `clickhouse-local`. - `-V`, `--version` — print version information and exit. -Also there are arguments for each ClickHouse configuration variable which are more commonly used instead of `--config-file`. +Also, there are arguments for each ClickHouse configuration variable which are more commonly used instead of `--config-file`. ## Examples {#examples} diff --git a/docs/en/operations/utilities/clickhouse-obfuscator.md b/docs/en/operations/utilities/clickhouse-obfuscator.md index ad51e9c7776..f9a94713be7 100644 --- a/docs/en/operations/utilities/clickhouse-obfuscator.md +++ b/docs/en/operations/utilities/clickhouse-obfuscator.md @@ -38,7 +38,7 @@ For example, you have a column `IsMobile` in your table with values 0 and 1. In So, the user will be able to count the exact ratio of mobile traffic. -Let's give another example. When you have some private data in your table, like user email and you don't want to publish any single email address. +Let's give another example. When you have some private data in your table, like user email, and you don't want to publish any single email address. If your table is large enough and contains multiple different emails and no email has a very high frequency than all others, it will anonymize all data. But if you have a small number of different values in a column, it can reproduce some of them. You should look at the working algorithm of this tool works, and fine-tune its command line parameters. diff --git a/docs/en/operations/utilities/index.md b/docs/en/operations/utilities/index.md index 8959073d00e..912a5b9ccb1 100644 --- a/docs/en/operations/utilities/index.md +++ b/docs/en/operations/utilities/index.md @@ -2,13 +2,11 @@ slug: /en/operations/utilities/ sidebar_position: 56 sidebar_label: List of tools and utilities -pagination_next: 'en/operations/utilities/clickhouse-copier' --- # List of tools and utilities - [clickhouse-local](../../operations/utilities/clickhouse-local.md) — Allows running SQL queries on data without starting the ClickHouse server, similar to how `awk` does this. -- [clickhouse-copier](../../operations/utilities/clickhouse-copier.md) — Copies (and reshards) data from one cluster to another cluster. - [clickhouse-benchmark](../../operations/utilities/clickhouse-benchmark.md) — Loads server with the custom queries and settings. - [clickhouse-format](../../operations/utilities/clickhouse-format.md) — Enables formatting input queries. - [ClickHouse obfuscator](../../operations/utilities/clickhouse-obfuscator.md) — Obfuscates data. diff --git a/docs/en/sql-reference/aggregate-functions/index.md b/docs/en/sql-reference/aggregate-functions/index.md index 5d2229fbcce..96bf0c5d93b 100644 --- a/docs/en/sql-reference/aggregate-functions/index.md +++ b/docs/en/sql-reference/aggregate-functions/index.md @@ -16,7 +16,9 @@ ClickHouse also supports: ## NULL Processing -During aggregation, all `NULL`s are skipped. If the aggregation has several parameters it will ignore any row in which one or more of the parameters are NULL. +During aggregation, all `NULL` arguments are skipped. If the aggregation has several arguments it will ignore any row in which one or more of them are NULL. + +There is an exception to this rule, which are the functions [`first_value`](../../sql-reference/aggregate-functions/reference/first_value.md), [`last_value`](../../sql-reference/aggregate-functions/reference/last_value.md) and their aliases when followed by the modifier `RESPECT NULLS`: `FIRST_VALUE(b) RESPECT NULLS`. **Examples:** @@ -85,3 +87,50 @@ FROM t_null_big; │ [2,2,3] │ [2,NULL,2,3,NULL] │ └───────────────┴───────────────────────────────────────┘ ``` + +Note that aggregations are skipped when the columns are used as arguments to an aggregated function. For example [`count`](../../sql-reference/aggregate-functions/reference/count.md) without parameters (`count()`) or with constant ones (`count(1)`) will count all rows in the block (independently of the value of the GROUP BY column as it's not an argument), while `count(column)` will only return the number of rows where column is not NULL. + +```sql +SELECT + v, + count(1), + count(v) +FROM +( + SELECT if(number < 10, NULL, number % 3) AS v + FROM numbers(15) +) +GROUP BY v + +┌────v─┬─count()─┬─count(v)─┠+│ á´ºáµá´¸á´¸ │ 10 │ 0 │ +│ 0 │ 1 │ 1 │ +│ 1 │ 2 │ 2 │ +│ 2 │ 2 │ 2 │ +└──────┴─────────┴──────────┘ +``` + +And here is an example of of first_value with `RESPECT NULLS` where we can see that NULL inputs are respected and it will return the first value read, whether it's NULL or not: + +```sql +SELECT + col || '_' || ((col + 1) * 5 - 1) as range, + first_value(odd_or_null) as first, + first_value(odd_or_null) IGNORE NULLS as first_ignore_null, + first_value(odd_or_null) RESPECT NULLS as first_respect_nulls +FROM +( + SELECT + intDiv(number, 5) AS col, + if(number % 2 == 0, NULL, number) as odd_or_null + FROM numbers(15) +) +GROUP BY col +ORDER BY col + +┌─range─┬─first─┬─first_ignore_null─┬─first_respect_nulls─┠+│ 0_4 │ 1 │ 1 │ á´ºáµá´¸á´¸ │ +│ 1_9 │ 5 │ 5 │ 5 │ +│ 2_14 │ 11 │ 11 │ á´ºáµá´¸á´¸ │ +└───────┴───────┴───────────────────┴─────────────────────┘ +``` diff --git a/docs/en/sql-reference/aggregate-functions/reference/any.md b/docs/en/sql-reference/aggregate-functions/reference/any.md index a45eb1b409f..4631060f33f 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/any.md +++ b/docs/en/sql-reference/aggregate-functions/reference/any.md @@ -9,7 +9,7 @@ Selects the first encountered value of a column. By default, it ignores NULL values and returns the first NOT NULL value found in the column. As [`first_value`](../../../sql-reference/aggregate-functions/reference/first_value.md) if supports `RESPECT NULLS`, in which case it will select the first value passed, independently on whether it's NULL or not. -The return type of the function is the same as the input, except for LowCardinality which is discarded). This means that given no rows as input it will return the default value of that type (0 for integers, or Null for a Nullable() column). You might use the `-OrNull` [combinator](../../../sql-reference/aggregate-functions/combinators.md) ) to modify this behaviour. +The return type of the function is the same as the input, except for LowCardinality which is discarded. This means that given no rows as input it will return the default value of that type (0 for integers, or Null for a Nullable() column). You might use the `-OrNull` [combinator](../../../sql-reference/aggregate-functions/combinators.md) ) to modify this behaviour. The query can be executed in any order and even in a different order each time, so the result of this function is indeterminate. To get a determinate result, you can use the ‘min’ or ‘max’ function instead of ‘any’. diff --git a/docs/en/sql-reference/aggregate-functions/reference/approxtopk.md b/docs/en/sql-reference/aggregate-functions/reference/approxtopk.md new file mode 100644 index 00000000000..2bb43a9f665 --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/approxtopk.md @@ -0,0 +1,55 @@ +--- +slug: /en/sql-reference/aggregate-functions/reference/approxtopk +sidebar_position: 212 +--- + +# approx_top_k + +Returns an array of the approximately most frequent values and their counts in the specified column. The resulting array is sorted in descending order of approximate frequency of values (not by the values themselves). + + +``` sql +approx_top_k(N)(column) +approx_top_k(N, reserved)(column) +``` + +This function does not provide a guaranteed result. In certain situations, errors might occur and it might return frequent values that aren’t the most frequent values. + +We recommend using the `N < 10` value; performance is reduced with large `N` values. Maximum value of `N = 65536`. + +**Parameters** + +- `N` — The number of elements to return. Optional. Default value: 10. +- `reserved` — Defines, how many cells reserved for values. If uniq(column) > reserved, result of topK function will be approximate. Optional. Default value: N * 3. + +**Arguments** + +- `column` — The value to calculate frequency. + +**Example** + +Query: + +``` sql +SELECT approx_top_k(2)(k) +FROM VALUES('k Char, w UInt64', ('y', 1), ('y', 1), ('x', 5), ('y', 1), ('z', 10)); +``` + +Result: + +``` text +┌─approx_top_k(2)(k)────┠+│ [('y',3,0),('x',1,0)] │ +└───────────────────────┘ +``` + +# approx_top_count + +Is an alias to `approx_top_k` function + +**See Also** + +- [topK](../../../sql-reference/aggregate-functions/reference/topk.md) +- [topKWeighted](../../../sql-reference/aggregate-functions/reference/topkweighted.md) +- [approx_top_sum](../../../sql-reference/aggregate-functions/reference/approxtopsum.md) + diff --git a/docs/en/sql-reference/aggregate-functions/reference/approxtopsum.md b/docs/en/sql-reference/aggregate-functions/reference/approxtopsum.md new file mode 100644 index 00000000000..aa884b26d8e --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/approxtopsum.md @@ -0,0 +1,51 @@ +--- +slug: /en/sql-reference/aggregate-functions/reference/approxtopsum +sidebar_position: 212 +--- + +# approx_top_sum + +Returns an array of the approximately most frequent values and their counts in the specified column. The resulting array is sorted in descending order of approximate frequency of values (not by the values themselves). Additionally, the weight of the value is taken into account. + +``` sql +approx_top_sum(N)(column, weight) +approx_top_sum(N, reserved)(column, weight) +``` + +This function does not provide a guaranteed result. In certain situations, errors might occur and it might return frequent values that aren’t the most frequent values. + +We recommend using the `N < 10` value; performance is reduced with large `N` values. Maximum value of `N = 65536`. + +**Parameters** + +- `N` — The number of elements to return. Optional. Default value: 10. +- `reserved` — Defines, how many cells reserved for values. If uniq(column) > reserved, result of topK function will be approximate. Optional. Default value: N * 3. + +**Arguments** + +- `column` — The value to calculate frequency. +- `weight` — The weight. Every value is accounted `weight` times for frequency calculation. [UInt64](../../../sql-reference/data-types/int-uint.md). + + +**Example** + +Query: + +``` sql +SELECT approx_top_sum(2)(k, w) +FROM VALUES('k Char, w UInt64', ('y', 1), ('y', 1), ('x', 5), ('y', 1), ('z', 10)) +``` + +Result: + +``` text +┌─approx_top_sum(2)(k, w)─┠+│ [('z',10,0),('x',5,0)] │ +└─────────────────────────┘ +``` + +**See Also** + +- [topK](../../../sql-reference/aggregate-functions/reference/topk.md) +- [topKWeighted](../../../sql-reference/aggregate-functions/reference/topkweighted.md) +- [approx_top_k](../../../sql-reference/aggregate-functions/reference/approxtopk.md) diff --git a/docs/en/sql-reference/aggregate-functions/reference/contingency.md b/docs/en/sql-reference/aggregate-functions/reference/contingency.md index 1b53ca1528f..902c1f4af80 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/contingency.md +++ b/docs/en/sql-reference/aggregate-functions/reference/contingency.md @@ -20,7 +20,7 @@ contingency(column1, column2) **Returned value** -- a value between 0 to 1. The larger the result, the closer the association of the two columns. +- a value between 0 and 1. The larger the result, the closer the association of the two columns. **Return type** is always [Float64](../../../sql-reference/data-types/float.md). @@ -48,4 +48,4 @@ Result: ┌──────cramersV(a, b)─┬───contingency(a, b)─┠│ 0.41171788506213564 │ 0.05812725261759165 │ └─────────────────────┴─────────────────────┘ -``` \ No newline at end of file +``` diff --git a/docs/en/sql-reference/aggregate-functions/reference/grouparrayintersect.md b/docs/en/sql-reference/aggregate-functions/reference/grouparrayintersect.md new file mode 100644 index 00000000000..5cac88be073 --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/grouparrayintersect.md @@ -0,0 +1,50 @@ +--- +slug: /en/sql-reference/aggregate-functions/reference/grouparrayintersect +sidebar_position: 115 +--- + +# groupArrayIntersect + +Return an intersection of given arrays (Return all items of arrays, that are in all given arrays). + +**Syntax** + +``` sql +groupArrayIntersect(x) +``` + +**Arguments** + +- `x` — Argument (column name or expression). + +**Returned values** + +- Array that contains elements that are in all arrays. + +Type: [Array](../../data-types/array.md). + +**Examples** + +Consider table `numbers`: + +``` text +┌─a──────────────┠+│ [1,2,4] │ +│ [1,5,2,8,-1,0] │ +│ [1,5,7,5,8,2] │ +└────────────────┘ +``` + +Query with column name as argument: + +``` sql +SELECT groupArrayIntersect(a) as intersection FROM numbers; +``` + +Result: + +```text +┌─intersection──────┠+│ [1, 2] │ +└───────────────────┘ +``` diff --git a/docs/en/sql-reference/aggregate-functions/reference/grouparraysorted.md b/docs/en/sql-reference/aggregate-functions/reference/grouparraysorted.md index cc601c097fe..9bee0c29e7a 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/grouparraysorted.md +++ b/docs/en/sql-reference/aggregate-functions/reference/grouparraysorted.md @@ -14,8 +14,6 @@ - `N` – The number of elements to return. - If the parameter is omitted, default value is the size of input. - - `column` – The value (Integer, String, Float and other Generic types). **Example** @@ -36,13 +34,12 @@ Gets all the String implementations of all numbers in column: ``` sql -SELECT groupArraySorted(str) FROM (SELECT toString(number) as str FROM numbers(5)); +SELECT groupArraySorted(5)(str) FROM (SELECT toString(number) as str FROM numbers(5)); ``` ``` text - ┌─groupArraySorted(str)────────┠- │ ['0','1','2','3','4'] │ - └──────────────────────────────┘ - ``` - \ No newline at end of file +┌─groupArraySorted(5)(str)─┠+│ ['0','1','2','3','4'] │ +└──────────────────────────┘ + ``` \ No newline at end of file diff --git a/docs/en/sql-reference/aggregate-functions/reference/index.md b/docs/en/sql-reference/aggregate-functions/reference/index.md index 93d4282c32b..b99d4b06d55 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/index.md +++ b/docs/en/sql-reference/aggregate-functions/reference/index.md @@ -55,6 +55,7 @@ ClickHouse-specific aggregate functions: - [groupArrayMovingSum](/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingsum.md) - [groupArraySample](./grouparraysample.md) - [groupArraySorted](/docs/en/sql-reference/aggregate-functions/reference/grouparraysorted.md) +- [groupArrayIntersect](./grouparrayintersect.md) - [groupBitAnd](/docs/en/sql-reference/aggregate-functions/reference/groupbitand.md) - [groupBitOr](/docs/en/sql-reference/aggregate-functions/reference/groupbitor.md) - [groupBitXor](/docs/en/sql-reference/aggregate-functions/reference/groupbitxor.md) diff --git a/docs/en/sql-reference/aggregate-functions/reference/simplelinearregression.md b/docs/en/sql-reference/aggregate-functions/reference/simplelinearregression.md index bcff05ada47..ea3dbff8691 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/simplelinearregression.md +++ b/docs/en/sql-reference/aggregate-functions/reference/simplelinearregression.md @@ -13,8 +13,8 @@ simpleLinearRegression(x, y) Parameters: -- `x` — Column with dependent variable values. -- `y` — Column with explanatory variable values. +- `x` — Column with explanatory variable values. +- `y` — Column with dependent variable values. Returned values: diff --git a/docs/en/sql-reference/aggregate-functions/reference/stochasticlinearregression.md b/docs/en/sql-reference/aggregate-functions/reference/stochasticlinearregression.md index f7615d90790..ddac82a0977 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/stochasticlinearregression.md +++ b/docs/en/sql-reference/aggregate-functions/reference/stochasticlinearregression.md @@ -5,25 +5,25 @@ sidebar_position: 221 # stochasticLinearRegression -This function implements stochastic linear regression. It supports custom parameters for learning rate, L2 regularization coefficient, mini-batch size and has few methods for updating weights ([Adam](https://en.wikipedia.org/wiki/Stochastic_gradient_descent#Adam) (used by default), [simple SGD](https://en.wikipedia.org/wiki/Stochastic_gradient_descent), [Momentum](https://en.wikipedia.org/wiki/Stochastic_gradient_descent#Momentum), [Nesterov](https://mipt.ru/upload/medialibrary/d7e/41-91.pdf)). +This function implements stochastic linear regression. It supports custom parameters for learning rate, L2 regularization coefficient, mini-batch size, and has a few methods for updating weights ([Adam](https://en.wikipedia.org/wiki/Stochastic_gradient_descent#Adam) (used by default), [simple SGD](https://en.wikipedia.org/wiki/Stochastic_gradient_descent), [Momentum](https://en.wikipedia.org/wiki/Stochastic_gradient_descent#Momentum), and [Nesterov](https://mipt.ru/upload/medialibrary/d7e/41-91.pdf)). ### Parameters There are 4 customizable parameters. They are passed to the function sequentially, but there is no need to pass all four - default values will be used, however good model required some parameter tuning. ``` text -stochasticLinearRegression(1.0, 1.0, 10, 'SGD') +stochasticLinearRegression(0.00001, 0.1, 15, 'Adam') ``` -1. `learning rate` is the coefficient on step length, when gradient descent step is performed. Too big learning rate may cause infinite weights of the model. Default is `0.00001`. +1. `learning rate` is the coefficient on step length, when the gradient descent step is performed. A learning rate that is too big may cause infinite weights of the model. Default is `0.00001`. 2. `l2 regularization coefficient` which may help to prevent overfitting. Default is `0.1`. -3. `mini-batch size` sets the number of elements, which gradients will be computed and summed to perform one step of gradient descent. Pure stochastic descent uses one element, however having small batches(about 10 elements) make gradient steps more stable. Default is `15`. -4. `method for updating weights`, they are: `Adam` (by default), `SGD`, `Momentum`, `Nesterov`. `Momentum` and `Nesterov` require little bit more computations and memory, however they happen to be useful in terms of speed of convergence and stability of stochastic gradient methods. +3. `mini-batch size` sets the number of elements, which gradients will be computed and summed to perform one step of gradient descent. Pure stochastic descent uses one element, however, having small batches (about 10 elements) makes gradient steps more stable. Default is `15`. +4. `method for updating weights`, they are: `Adam` (by default), `SGD`, `Momentum`, and `Nesterov`. `Momentum` and `Nesterov` require a little bit more computations and memory, however, they happen to be useful in terms of speed of convergence and stability of stochastic gradient methods. ### Usage -`stochasticLinearRegression` is used in two steps: fitting the model and predicting on new data. In order to fit the model and save its state for later usage we use `-State` combinator, which basically saves the state (model weights, etc). -To predict we use function [evalMLMethod](../../../sql-reference/functions/machine-learning-functions.md#machine_learning_methods-evalmlmethod), which takes a state as an argument as well as features to predict on. +`stochasticLinearRegression` is used in two steps: fitting the model and predicting on new data. In order to fit the model and save its state for later usage, we use the `-State` combinator, which saves the state (e.g. model weights). +To predict, we use the function [evalMLMethod](../../../sql-reference/functions/machine-learning-functions.md#machine_learning_methods-evalmlmethod), which takes a state as an argument as well as features to predict on. @@ -44,12 +44,12 @@ stochasticLinearRegressionState(0.1, 0.0, 5, 'SGD')(target, param1, param2) AS state FROM train_data; ``` -Here we also need to insert data into `train_data` table. The number of parameters is not fixed, it depends only on number of arguments, passed into `linearRegressionState`. They all must be numeric values. -Note that the column with target value(which we would like to learn to predict) is inserted as the first argument. +Here, we also need to insert data into the `train_data` table. The number of parameters is not fixed, it depends only on the number of arguments passed into `linearRegressionState`. They all must be numeric values. +Note that the column with target value (which we would like to learn to predict) is inserted as the first argument. **2.** Predicting -After saving a state into the table, we may use it multiple times for prediction, or even merge with other states and create new even better models. +After saving a state into the table, we may use it multiple times for prediction or even merge with other states and create new, even better models. ``` sql WITH (SELECT state FROM your_model) AS model SELECT diff --git a/docs/en/sql-reference/aggregate-functions/reference/topk.md b/docs/en/sql-reference/aggregate-functions/reference/topk.md index bde29275f79..dd4b2251a8a 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/topk.md +++ b/docs/en/sql-reference/aggregate-functions/reference/topk.md @@ -11,21 +11,23 @@ Implements the [Filtered Space-Saving](https://doi.org/10.1016/j.ins.2010.08.024 ``` sql topK(N)(column) +topK(N, load_factor)(column) +topK(N, load_factor, 'counts')(column) ``` This function does not provide a guaranteed result. In certain situations, errors might occur and it might return frequent values that aren’t the most frequent values. We recommend using the `N < 10` value; performance is reduced with large `N` values. Maximum value of `N = 65536`. +**Parameters** + +- `N` — The number of elements to return. Optional. Default value: 10. +- `load_factor` — Defines, how many cells reserved for values. If uniq(column) > N * load_factor, result of topK function will be approximate. Optional. Default value: 3. +- `counts` — Defines, should result contain approximate count and error value. + **Arguments** -- `N` – The number of elements to return. - -If the parameter is omitted, default value 10 is used. - -**Arguments** - -- `x` – The value to calculate frequency. +- `column` — The value to calculate frequency. **Example** @@ -41,3 +43,9 @@ FROM ontime │ [19393,19790,19805] │ └─────────────────────┘ ``` + +**See Also** + +- [topKWeighted](../../../sql-reference/aggregate-functions/reference/topkweighted.md) +- [approx_top_k](../../../sql-reference/aggregate-functions/reference/approxtopk.md) +- [approx_top_sum](../../../sql-reference/aggregate-functions/reference/approxtopsum.md) \ No newline at end of file diff --git a/docs/en/sql-reference/aggregate-functions/reference/topkweighted.md b/docs/en/sql-reference/aggregate-functions/reference/topkweighted.md index 03932e88a6a..d2a469828fc 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/topkweighted.md +++ b/docs/en/sql-reference/aggregate-functions/reference/topkweighted.md @@ -10,13 +10,20 @@ Returns an array of the approximately most frequent values in the specified colu **Syntax** ``` sql -topKWeighted(N)(x, weight) +topKWeighted(N)(column, weight) +topKWeighted(N, load_factor)(column, weight) +topKWeighted(N, load_factor, 'counts')(column, weight) ``` +**Parameters** + +- `N` — The number of elements to return. Optional. Default value: 10. +- `load_factor` — Defines, how many cells reserved for values. If uniq(column) > N * load_factor, result of topK function will be approximate. Optional. Default value: 3. +- `counts` — Defines, should result contain approximate count and error value. + **Arguments** -- `N` — The number of elements to return. -- `x` — The value. +- `column` — The value. - `weight` — The weight. Every value is accounted `weight` times for frequency calculation. [UInt64](../../../sql-reference/data-types/int-uint.md). **Returned value** @@ -40,6 +47,23 @@ Result: └────────────────────────┘ ``` +Query: + +``` sql +SELECT topKWeighted(2, 10, 'counts')(k, w) +FROM VALUES('k Char, w UInt64', ('y', 1), ('y', 1), ('x', 5), ('y', 1), ('z', 10)) +``` + +Result: + +``` text +┌─topKWeighted(2, 10, 'counts')(k, w)─┠+│ [('z',10,0),('x',5,0)] │ +└─────────────────────────────────────┘ +``` + **See Also** - [topK](../../../sql-reference/aggregate-functions/reference/topk.md) +- [approx_top_k](../../../sql-reference/aggregate-functions/reference/approxtopk.md) +- [approx_top_sum](../../../sql-reference/aggregate-functions/reference/approxtopsum.md) \ No newline at end of file diff --git a/docs/en/sql-reference/aggregate-functions/reference/varpop.md b/docs/en/sql-reference/aggregate-functions/reference/varpop.md index 751688b0830..76472f62789 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/varpop.md +++ b/docs/en/sql-reference/aggregate-functions/reference/varpop.md @@ -1,16 +1,99 @@ --- -slug: /en/sql-reference/aggregate-functions/reference/varpop +title: "varPop" +slug: "/en/sql-reference/aggregate-functions/reference/varpop" sidebar_position: 32 --- -# varPop(x) +This page covers the `varPop` and `varPopStable` functions available in ClickHouse. -Calculates the amount `Σ((x - xÌ…)^2) / n`, where `n` is the sample size and `xÌ…`is the average value of `x`. +## varPop -In other words, dispersion for a set of values. Returns `Float64`. +Calculates the population covariance between two data columns. The population covariance measures the degree to which two variables vary together. Calculates the amount `Σ((x - xÌ…)^2) / n`, where `n` is the sample size and `xÌ…`is the average value of `x`. -Alias: `VAR_POP`. +**Syntax** -:::note -This function uses a numerically unstable algorithm. If you need [numerical stability](https://en.wikipedia.org/wiki/Numerical_stability) in calculations, use the `varPopStable` function. It works slower but provides a lower computational error. -::: \ No newline at end of file +```sql +covarPop(x, y) +``` + +**Parameters** + +- `x`: The first data column. [Numeric](../../../native-protocol/columns.md) +- `y`: The second data column. [Numeric](../../../native-protocol/columns.md) + +**Returned value** + +Returns an integer of type `Float64`. + +**Implementation details** + +This function uses a numerically unstable algorithm. If you need numerical stability in calculations, use the slower but more stable [`varPopStable` function](#varPopStable). + +**Example** + +```sql +DROP TABLE IF EXISTS test_data; +CREATE TABLE test_data +( + x Int32, + y Int32 +) +ENGINE = Memory; + +INSERT INTO test_data VALUES (1, 2), (2, 3), (3, 5), (4, 6), (5, 8); + +SELECT + covarPop(x, y) AS covar_pop +FROM test_data; +``` + +```response +3 +``` + +## varPopStable + +Calculates population covariance between two data columns using a stable, numerically accurate method to calculate the variance. This function is designed to provide reliable results even with large datasets or values that might cause numerical instability in other implementations. + +**Syntax** + +```sql +covarPopStable(x, y) +``` + +**Parameters** + +- `x`: The first data column. [String literal](../syntax#syntax-string-literal) +- `y`: The second data column. [Expression](../syntax#syntax-expressions) + +**Returned value** + +Returns an integer of type `Float64`. + +**Implementation details** + +Unlike [`varPop()`](#varPop), this function uses a stable, numerically accurate algorithm to calculate the population variance to avoid issues like catastrophic cancellation or loss of precision. This function also handles `NaN` and `Inf` values correctly, excluding them from calculations. + +**Example** + +Query: + +```sql +DROP TABLE IF EXISTS test_data; +CREATE TABLE test_data +( + x Int32, + y Int32 +) +ENGINE = Memory; + +INSERT INTO test_data VALUES (1, 2), (2, 9), (9, 5), (4, 6), (5, 8); + +SELECT + covarPopStable(x, y) AS covar_pop_stable +FROM test_data; +``` + +```response +0.5999999999999999 +``` diff --git a/docs/en/sql-reference/aggregate-functions/reference/varsamp.md b/docs/en/sql-reference/aggregate-functions/reference/varsamp.md index 9b2b94936ec..e75cb075ff8 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/varsamp.md +++ b/docs/en/sql-reference/aggregate-functions/reference/varsamp.md @@ -1,18 +1,128 @@ --- +title: "varSamp" slug: /en/sql-reference/aggregate-functions/reference/varsamp sidebar_position: 33 --- -# varSamp +This page contains information on the `varSamp` and `varSampStable` ClickHouse functions. -Calculates the amount `Σ((x - xÌ…)^2) / (n - 1)`, where `n` is the sample size and `xÌ…`is the average value of `x`. +## varSamp -It represents an unbiased estimate of the variance of a random variable if passed values from its sample. +Calculate the sample variance of a data set. -Returns `Float64`. When `n <= 1`, returns `+∞`. +**Syntax** -Alias: `VAR_SAMP`. +```sql +varSamp(expr) +``` -:::note -This function uses a numerically unstable algorithm. If you need [numerical stability](https://en.wikipedia.org/wiki/Numerical_stability) in calculations, use the `varSampStable` function. It works slower but provides a lower computational error. -::: +**Parameters** + +- `expr`: An expression representing the data set for which you want to calculate the sample variance. [Expression](../syntax#syntax-expressions) + +**Returned value** + +Returns a Float64 value representing the sample variance of the input data set. + +**Implementation details** + +The `varSamp()` function calculates the sample variance using the following formula: + +```plaintext +∑(x - mean(x))^2 / (n - 1) +``` + +Where: + +- `x` is each individual data point in the data set. +- `mean(x)` is the arithmetic mean of the data set. +- `n` is the number of data points in the data set. + +The function assumes that the input data set represents a sample from a larger population. If you want to calculate the variance of the entire population (when you have the complete data set), you should use the [`varPop()` function](./varpop#varpop) instead. + +This function uses a numerically unstable algorithm. If you need numerical stability in calculations, use the slower but more stable [`varSampStable` function](#varSampStable). + +**Example** + +Query: + +```sql +CREATE TABLE example_table +( + id UInt64, + value Float64 +) +ENGINE = MergeTree +ORDER BY id; + +INSERT INTO example_table VALUES (1, 10.5), (2, 12.3), (3, 9.8), (4, 11.2), (5, 10.7); + +SELECT varSamp(value) FROM example_table; +``` + +Response: + +```response +0.8650000000000091 +``` + +## varSampStable + +Calculate the sample variance of a data set using a numerically stable algorithm. + +**Syntax** + +```sql +varSampStable(expr) +``` + +**Parameters** + +- `expr`: An expression representing the data set for which you want to calculate the sample variance. [Expression](../syntax#syntax-expressions) + +**Returned value** + +The `varSampStable()` function returns a Float64 value representing the sample variance of the input data set. + +**Implementation details** + +The `varSampStable()` function calculates the sample variance using the same formula as the [`varSamp()`](#varSamp function): + +```plaintext +∑(x - mean(x))^2 / (n - 1) +``` + +Where: +- `x` is each individual data point in the data set. +- `mean(x)` is the arithmetic mean of the data set. +- `n` is the number of data points in the data set. + +The difference between `varSampStable()` and `varSamp()` is that `varSampStable()` is designed to provide a more deterministic and stable result when dealing with floating-point arithmetic. It uses an algorithm that minimizes the accumulation of rounding errors, which can be particularly important when dealing with large data sets or data with a wide range of values. + +Like `varSamp()`, the `varSampStable()` function assumes that the input data set represents a sample from a larger population. If you want to calculate the variance of the entire population (when you have the complete data set), you should use the [`varPopStable()` function](./varpop#varpopstable) instead. + +**Example** + +Query: + +```sql +CREATE TABLE example_table +( + id UInt64, + value Float64 +) +ENGINE = MergeTree +ORDER BY id; + +INSERT INTO example_table VALUES (1, 10.5), (2, 12.3), (3, 9.8), (4, 11.2), (5, 10.7); + +SELECT varSampStable(value) FROM example_table; +``` + +Response: + +```response +0.865 +``` + +This query calculates the sample variance of the `value` column in the `example_table` using the `varSampStable()` function. The result shows that the sample variance of the values `[10.5, 12.3, 9.8, 11.2, 10.7]` is approximately 0.865, which may differ slightly from the result of `varSamp()` due to the more precise handling of floating-point arithmetic. diff --git a/docs/en/sql-reference/data-types/datetime64.md b/docs/en/sql-reference/data-types/datetime64.md index 8c7fa17ae92..504d0e2b0a6 100644 --- a/docs/en/sql-reference/data-types/datetime64.md +++ b/docs/en/sql-reference/data-types/datetime64.md @@ -9,7 +9,7 @@ sidebar_label: DateTime64 Allows to store an instant in time, that can be expressed as a calendar date and a time of a day, with defined sub-second precision Tick size (precision): 10-precision seconds. Valid range: [ 0 : 9 ]. -Typically are used - 3 (milliseconds), 6 (microseconds), 9 (nanoseconds). +Typically, are used - 3 (milliseconds), 6 (microseconds), 9 (nanoseconds). **Syntax:** diff --git a/docs/en/sql-reference/data-types/decimal.md b/docs/en/sql-reference/data-types/decimal.md index e082eb29fbd..2b32e72a28f 100644 --- a/docs/en/sql-reference/data-types/decimal.md +++ b/docs/en/sql-reference/data-types/decimal.md @@ -10,7 +10,7 @@ Signed fixed-point numbers that keep precision during add, subtract and multiply ## Parameters -- P - precision. Valid range: \[ 1 : 76 \]. Determines how many decimal digits number can have (including fraction). By default the precision is 10. +- P - precision. Valid range: \[ 1 : 76 \]. Determines how many decimal digits number can have (including fraction). By default, the precision is 10. - S - scale. Valid range: \[ 0 : P \]. Determines how many decimal digits fraction can have. Decimal(P) is equivalent to Decimal(P, 0). Similarly, the syntax Decimal is equivalent to Decimal(10, 0). diff --git a/docs/en/sql-reference/data-types/variant.md b/docs/en/sql-reference/data-types/variant.md index f027e3fe343..7d10d4b0e97 100644 --- a/docs/en/sql-reference/data-types/variant.md +++ b/docs/en/sql-reference/data-types/variant.md @@ -12,6 +12,11 @@ has a value of either type `T1` or `T2` or ... or `TN` or none of them (`NULL` v The order of nested types doesn't matter: Variant(T1, T2) = Variant(T2, T1). Nested types can be arbitrary types except Nullable(...), LowCardinality(Nullable(...)) and Variant(...) types. +:::note +It's not recommended to use similar types as variants (for example different numeric types like `Variant(UInt32, Int64)` or different date types like `Variant(Date, DateTime)`), +because working with values of such types can lead to ambiguity. By default, creating such `Variant` type will lead to an exception, but can be enabled using setting `allow_suspicious_variant_types` +::: + :::note The Variant data type is an experimental feature. To use it, set `allow_experimental_variant_type = 1`. ::: @@ -272,3 +277,121 @@ $$) │ [1,2,3] │ á´ºáµá´¸á´¸ │ á´ºáµá´¸á´¸ │ á´ºáµá´¸á´¸ │ á´ºáµá´¸á´¸ │ [1,2,3] │ └─────────────────────┴───────────────┴──────┴───────┴─────────────────────┴─────────┘ ``` + + +## Comparing values of Variant type + +Values of a `Variant` type can be compared only with values with the same `Variant` type. + +The result of operator `<` for values `v1` with underlying type `T1` and `v2` with underlying type `T2` of a type `Variant(..., T1, ... T2, ...)` is defined as follows: +- If `T1 = T2 = T`, the result will be `v1.T < v2.T` (underlying values will be compared). +- If `T1 != T2`, the result will be `T1 < T2` (type names will be compared). + +Examples: +```sql +CREATE TABLE test (v1 Variant(String, UInt64, Array(UInt32)), v2 Variant(String, UInt64, Array(UInt32))) ENGINE=Memory; +INSERT INTO test VALUES (42, 42), (42, 43), (42, 'abc'), (42, [1, 2, 3]), (42, []), (42, NULL); +``` + +```sql +SELECT v2, variantType(v2) as v2_type from test order by v2; +``` + +```text +┌─v2──────┬─v2_type───────┠+│ [] │ Array(UInt32) │ +│ [1,2,3] │ Array(UInt32) │ +│ abc │ String │ +│ 42 │ UInt64 │ +│ 43 │ UInt64 │ +│ á´ºáµá´¸á´¸ │ None │ +└─────────┴───────────────┘ +``` + +```sql +SELECT v1, variantType(v1) as v1_type, v2, variantType(v2) as v2_type, v1 = v2, v1 < v2, v1 > v2 from test; +``` + +```text +┌─v1─┬─v1_type─┬─v2──────┬─v2_type───────┬─equals(v1, v2)─┬─less(v1, v2)─┬─greater(v1, v2)─┠+│ 42 │ UInt64 │ 42 │ UInt64 │ 1 │ 0 │ 0 │ +│ 42 │ UInt64 │ 43 │ UInt64 │ 0 │ 1 │ 0 │ +│ 42 │ UInt64 │ abc │ String │ 0 │ 0 │ 1 │ +│ 42 │ UInt64 │ [1,2,3] │ Array(UInt32) │ 0 │ 0 │ 1 │ +│ 42 │ UInt64 │ [] │ Array(UInt32) │ 0 │ 0 │ 1 │ +│ 42 │ UInt64 │ á´ºáµá´¸á´¸ │ None │ 0 │ 1 │ 0 │ +└────┴─────────┴─────────┴───────────────┴────────────────┴──────────────┴─────────────────┘ + +``` + +If you need to find the row with specific `Variant` value, you can do one of the following: + +- Cast value to the corresponding `Variant` type: + +```sql +SELECT * FROM test WHERE v2 == [1,2,3]::Array(UInt32)::Variant(String, UInt64, Array(UInt32)); +``` + +```text +┌─v1─┬─v2──────┠+│ 42 │ [1,2,3] │ +└────┴─────────┘ +``` + +- Compare `Variant` subcolumn with required type: + +```sql +SELECT * FROM test WHERE v2.`Array(UInt32)` == [1,2,3] -- or using variantElement(v2, 'Array(UInt32)') +``` + +```text +┌─v1─┬─v2──────┠+│ 42 │ [1,2,3] │ +└────┴─────────┘ +``` + +Sometimes it can be useful to make additional check on variant type as subcolumns with complex types like `Array/Map/Tuple` cannot be inside `Nullable` and will have default values instead of `NULL` on rows with different types: + +```sql +SELECT v2, v2.`Array(UInt32)`, variantType(v2) FROM test WHERE v2.`Array(UInt32)` == []; +``` + +```text +┌─v2───┬─v2.Array(UInt32)─┬─variantType(v2)─┠+│ 42 │ [] │ UInt64 │ +│ 43 │ [] │ UInt64 │ +│ abc │ [] │ String │ +│ [] │ [] │ Array(UInt32) │ +│ á´ºáµá´¸á´¸ │ [] │ None │ +└──────┴──────────────────┴─────────────────┘ +``` + +```sql +SELECT v2, v2.`Array(UInt32)`, variantType(v2) FROM test WHERE variantType(v2) == 'Array(UInt32)' AND v2.`Array(UInt32)` == []; +``` + +```text +┌─v2─┬─v2.Array(UInt32)─┬─variantType(v2)─┠+│ [] │ [] │ Array(UInt32) │ +└────┴──────────────────┴─────────────────┘ +``` + +**Note:** values of variants with different numeric types are considered as different variants and not compared between each other, their type names are compared instead. + +Example: + +```sql +SET allow_suspicious_variant_types = 1; +CREATE TABLE test (v Variant(UInt32, Int64)) ENGINE=Memory; +INSERT INTO test VALUES (1::UInt32), (1::Int64), (100::UInt32), (100::Int64); +SELECT v, variantType(v) FROM test ORDER by v; +``` + +```text +┌─v───┬─variantType(v)─┠+│ 1 │ Int64 │ +│ 100 │ Int64 │ +│ 1 │ UInt32 │ +│ 100 │ UInt32 │ +└─────┴────────────────┘ +``` diff --git a/docs/en/sql-reference/distributed-ddl.md b/docs/en/sql-reference/distributed-ddl.md index d170f3765c2..7952792cbf4 100644 --- a/docs/en/sql-reference/distributed-ddl.md +++ b/docs/en/sql-reference/distributed-ddl.md @@ -6,7 +6,7 @@ sidebar_label: Distributed DDL # Distributed DDL Queries (ON CLUSTER Clause) -By default the `CREATE`, `DROP`, `ALTER`, and `RENAME` queries affect only the current server where they are executed. In a cluster setup, it is possible to run such queries in a distributed manner with the `ON CLUSTER` clause. +By default, the `CREATE`, `DROP`, `ALTER`, and `RENAME` queries affect only the current server where they are executed. In a cluster setup, it is possible to run such queries in a distributed manner with the `ON CLUSTER` clause. For example, the following query creates the `all_hits` `Distributed` table on each host in `cluster`: diff --git a/docs/en/sql-reference/functions/array-functions.md b/docs/en/sql-reference/functions/array-functions.md index 2120b675c73..67a4c026851 100644 --- a/docs/en/sql-reference/functions/array-functions.md +++ b/docs/en/sql-reference/functions/array-functions.md @@ -19,7 +19,7 @@ empty([x]) An array is considered empty if it does not contain any elements. :::note -Can be optimized by enabling the [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns) setting. With `optimize_functions_to_subcolumns = 1` the function reads only [size0](../../sql-reference/data-types/array.md#array-size) subcolumn instead of reading and processing the whole array column. The query `SELECT empty(arr) FROM TABLE;` transforms to `SELECT arr.size0 = 0 FROM TABLE;`. +Can be optimized by enabling the [`optimize_functions_to_subcolumns` setting](../../operations/settings/settings.md#optimize-functions-to-subcolumns). With `optimize_functions_to_subcolumns = 1` the function reads only [size0](../../sql-reference/data-types/array.md#array-size) subcolumn instead of reading and processing the whole array column. The query `SELECT empty(arr) FROM TABLE;` transforms to `SELECT arr.size0 = 0 FROM TABLE;`. ::: The function also works for [strings](string-functions.md#empty) or [UUID](uuid-functions.md#empty). @@ -104,17 +104,416 @@ Can be optimized by enabling the [optimize_functions_to_subcolumns](../../operat Alias: `OCTET_LENGTH` -## emptyArrayUInt8, emptyArrayUInt16, emptyArrayUInt32, emptyArrayUInt64 +## emptyArrayUInt8 -## emptyArrayInt8, emptyArrayInt16, emptyArrayInt32, emptyArrayInt64 +Returns an empty UInt8 array. -## emptyArrayFloat32, emptyArrayFloat64 +**Syntax** -## emptyArrayDate, emptyArrayDateTime +```sql +emptyArrayUInt8() +``` + +**Arguments** + +None. + +**Returned value** + +An empty array. + +**Examples** + +Query: + +```sql +SELECT emptyArrayUInt8(); +``` + +Result: + +```response +[] +``` + +## emptyArrayUInt16 + +Returns an empty UInt16 array. + +**Syntax** + +```sql +emptyArrayUInt16() +``` + +**Arguments** + +None. + +**Returned value** + +An empty array. + +**Examples** + +Query: + +```sql +SELECT emptyArrayUInt16(); + +``` + +Result: + +```response +[] +``` + +## emptyArrayUInt32 + +Returns an empty UInt32 array. + +**Syntax** + +```sql +emptyArrayUInt32() +``` + +**Arguments** + +None. + +**Returned value** + +An empty array. + +**Examples** + +Query: + +```sql +SELECT emptyArrayUInt32(); +``` + +Result: + +```response +[] +``` + +## emptyArrayUInt64 + +Returns an empty UInt64 array. + +**Syntax** + +```sql +emptyArrayUInt64() +``` + +**Arguments** + +None. + +**Returned value** + +An empty array. + +**Examples** + +Query: + +```sql +SELECT emptyArrayUInt64(); +``` + +Result: + +```response +[] +``` + +## emptyArrayInt8 + +Returns an empty Int8 array. + +**Syntax** + +```sql +emptyArrayInt8() +``` + +**Arguments** + +None. + +**Returned value** + +An empty array. + +**Examples** + +Query: + +```sql +SELECT emptyArrayInt8(); +``` + +Result: + +```response +[] +``` + +## emptyArrayInt16 + +Returns an empty Int16 array. + +**Syntax** + +```sql +emptyArrayInt16() +``` + +**Arguments** + +None. + +**Returned value** + +An empty array. + +**Examples** + +Query: + +```sql +SELECT emptyArrayInt16(); +``` + +Result: + +```response +[] +``` + +## emptyArrayInt32 + +Returns an empty Int32 array. + +**Syntax** + +```sql +emptyArrayInt32() +``` + +**Arguments** + +None. + +**Returned value** + +An empty array. + +**Examples** + +Query: + +```sql +SELECT emptyArrayInt32(); +``` + +Result: + +```response +[] +``` + +## emptyArrayInt64 + +Returns an empty Int64 array. + +**Syntax** + +```sql +emptyArrayInt64() +``` + +**Arguments** + +None. + +**Returned value** + +An empty array. + +**Examples** + +Query: + +```sql +SELECT emptyArrayInt64(); +``` + +Result: + +```response +[] +``` + +## emptyArrayFloat32 + +Returns an empty Float32 array. + +**Syntax** + +```sql +emptyArrayFloat32() +``` + +**Arguments** + +None. + +**Returned value** + +An empty array. + +**Examples** + +Query: + +```sql +SELECT emptyArrayFloat32(); +``` + +Result: + +```response +[] +``` + +## emptyArrayFloat64 + +Returns an empty Float64 array. + +**Syntax** + +```sql +emptyArrayFloat64() +``` + +**Arguments** + +None. + +**Returned value** + +An empty array. + +**Examples** + +Query: + +```sql +SELECT emptyArrayFloat64(); +``` + +Result: + +```response +[] +``` + +## emptyArrayDate + +Returns an empty Date array. + +**Syntax** + +```sql +emptyArrayDate() +``` + +**Arguments** + +None. + +**Returned value** + +An empty array. + +**Examples** + +Query: + +```sql +SELECT emptyArrayDate(); +``` + +## emptyArrayDateTime + +Returns an empty DateTime array. + +**Syntax** + +```sql +[] +``` + +**Arguments** + +None. + +**Returned value** + +An empty array. + +**Examples** + +Query: + +```sql +SELECT emptyArrayDateTime(); +``` + +Result: + +```response +[] +``` ## emptyArrayString -Accepts zero arguments and returns an empty array of the appropriate type. +Returns an empty String array. + +**Syntax** + +```sql +emptyArrayString() +``` + +**Arguments** + +None. + +**Returned value** + +An empty array. + +**Examples** + +Query: + +```sql +SELECT emptyArrayString(); +``` + +Result: + +```response +[] +``` ## emptyArrayToSingle diff --git a/docs/en/sql-reference/functions/bit-functions.md b/docs/en/sql-reference/functions/bit-functions.md index 3c07fe8bcbe..0951c783aae 100644 --- a/docs/en/sql-reference/functions/bit-functions.md +++ b/docs/en/sql-reference/functions/bit-functions.md @@ -167,6 +167,10 @@ Result: └──────────────────────────────────────────┴───────────────────────────────┘ ``` +## byteSlice(s, offset, length) + +See function [substring](string-functions.md#substring). + ## bitTest Takes any integer and converts it into [binary form](https://en.wikipedia.org/wiki/Binary_number), returns the value of a bit at specified position. The countdown starts from 0 from the right to the left. diff --git a/docs/en/sql-reference/functions/bitmap-functions.md b/docs/en/sql-reference/functions/bitmap-functions.md index 9b66d00656b..379be302881 100644 --- a/docs/en/sql-reference/functions/bitmap-functions.md +++ b/docs/en/sql-reference/functions/bitmap-functions.md @@ -372,7 +372,7 @@ Result: ## bitmapAnd -Computes the logical conjunction of two two bitmaps. +Computes the logical conjunction of two bitmaps. **Syntax** diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md index 5622097537e..ba7695af3fa 100644 --- a/docs/en/sql-reference/functions/date-time-functions.md +++ b/docs/en/sql-reference/functions/date-time-functions.md @@ -394,8 +394,7 @@ Result: ## toYear -Converts a date or date with time to the year number (AD) as `UInt16` value. - +Returns the year component (AD) of a date or date with time. **Syntax** @@ -431,7 +430,7 @@ Result: ## toQuarter -Converts a date or date with time to the quarter number (1-4) as `UInt8` value. +Returns the quarter (1-4) of a date or date with time. **Syntax** @@ -465,10 +464,9 @@ Result: └──────────────────────────────────────────────┘ ``` - ## toMonth -Converts a date or date with time to the month number (1-12) as `UInt8` value. +Returns the month component (1-12) of a date or date with time. **Syntax** @@ -504,7 +502,7 @@ Result: ## toDayOfYear -Converts a date or date with time to the number of the day of the year (1-366) as `UInt16` value. +Returns the number of the day within the year (1-366) of a date or date with time. **Syntax** @@ -540,7 +538,7 @@ Result: ## toDayOfMonth -Converts a date or date with time to the number of the day in the month (1-31) as `UInt8` value. +Returns the number of the day within the month (1-31) of a date or date with time. **Syntax** @@ -576,7 +574,7 @@ Result: ## toDayOfWeek -Converts a date or date with time to the number of the day in the week as `UInt8` value. +Returns the number of the day within the week of a date or date with time. The two-argument form of `toDayOfWeek()` enables you to specify whether the week starts on Monday or Sunday, and whether the return value should be in the range from 0 to 6 or 1 to 7. If the mode argument is omitted, the default mode is 0. The time zone of the date can be specified as the third argument. @@ -627,7 +625,7 @@ Result: ## toHour -Converts a date with time to the number of the hour in 24-hour time (0-23) as `UInt8` value. +Returns the hour component (0-24) of a date with time. Assumes that if clocks are moved ahead, it is by one hour and occurs at 2 a.m., and if clocks are moved back, it is by one hour and occurs at 3 a.m. (which is not always exactly when it occurs - it depends on the timezone). @@ -641,7 +639,7 @@ Alias: `HOUR` **Arguments** -- `value` - a [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md) +- `value` - a [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md) **Returned value** @@ -665,7 +663,7 @@ Result: ## toMinute -Converts a date with time to the number of the minute of the hour (0-59) as `UInt8` value. +Returns the minute component (0-59) a date with time. **Syntax** @@ -677,7 +675,7 @@ Alias: `MINUTE` **Arguments** -- `value` - a [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md) +- `value` - a [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md) **Returned value** @@ -701,7 +699,7 @@ Result: ## toSecond -Converts a date with time to the second in the minute (0-59) as `UInt8` value. Leap seconds are not considered. +Returns the second component (0-59) of a date with time. Leap seconds are not considered. **Syntax** @@ -713,7 +711,7 @@ Alias: `SECOND` **Arguments** -- `value` - a [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md) +- `value` - a [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md) **Returned value** @@ -735,6 +733,40 @@ Result: └─────────────────────────────────────────────┘ ``` +## toMillisecond + +Returns the millisecond component (0-999) of a date with time. + +**Syntax** + +```sql +toMillisecond(value) +``` + +*Arguments** + +- `value` - [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md) + +Alias: `MILLISECOND` + +```sql +SELECT toMillisecond(toDateTime64('2023-04-21 10:20:30.456', 3)) +``` + +Result: + +```response +┌──toMillisecond(toDateTime64('2023-04-21 10:20:30.456', 3))─┠+│ 456 │ +└────────────────────────────────────────────────────────────┘ +``` + +**Returned value** + +- The millisecond in the minute (0 - 59) of the given date/time + +Type: `UInt16` + ## toUnixTimestamp Converts a string, a date or a date with time to the [Unix Timestamp](https://en.wikipedia.org/wiki/Unix_time) in `UInt32` representation. @@ -1564,7 +1596,7 @@ Alias: `TO_DAYS` **Arguments** - `date` — The date to calculate the number of days passed since year zero from. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). -- `time_zone` — A String type const value or a expression represent the time zone. [String types](../../sql-reference/data-types/string.md) +- `time_zone` — A String type const value or an expression represent the time zone. [String types](../../sql-reference/data-types/string.md) **Returned value** @@ -2218,7 +2250,7 @@ now64([scale], [timezone]) **Arguments** -- `scale` - Tick size (precision): 10-precision seconds. Valid range: [ 0 : 9 ]. Typically are used - 3 (default) (milliseconds), 6 (microseconds), 9 (nanoseconds). +- `scale` - Tick size (precision): 10-precision seconds. Valid range: [ 0 : 9 ]. Typically, are used - 3 (default) (milliseconds), 6 (microseconds), 9 (nanoseconds). - `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). [String](../../sql-reference/data-types/string.md). **Returned value** @@ -2287,10 +2319,43 @@ Result: ## today {#today} -Accepts zero arguments and returns the current date at one of the moments of query analysis. -The same as ‘toDate(now())’. +Returns the current date at moment of query analysis. It is the same as ‘toDate(now())’ and has aliases: `curdate`, `current_date`. -Aliases: `curdate`, `current_date`. +**Syntax** + +```sql +today() +``` + +**Arguments** + +- None + +**Returned value** + +- Current date + +Type: [DateTime](../../sql-reference/data-types/datetime.md). + +**Example** + +Query: + +```sql +SELECT today() AS today, curdate() AS curdate, current_date() AS current_date FORMAT Pretty +``` + +**Result**: + +Running the query above on the 3rd of March 2024 would have returned the following response: + +```response +â”â”â”â”â”â”â”â”â”â”â”â”â”┳â”â”â”â”â”â”â”â”â”â”â”â”┳â”â”â”â”â”â”â”â”â”â”â”â”â”â”┓ +┃ today ┃ curdate ┃ current_date ┃ +┡â”â”â”â”â”â”â”â”â”â”â”â”╇â”â”â”â”â”â”â”â”â”â”â”â”╇â”â”â”â”â”â”â”â”â”â”â”â”â”â”┩ +│ 2024-03-03 │ 2024-03-03 │ 2024-03-03 │ +└────────────┴────────────┴──────────────┘ +``` ## yesterday {#yesterday} @@ -2305,7 +2370,7 @@ Rounds the time to the half hour. Converts a date or date with time to a UInt32 number containing the year and month number (YYYY \* 100 + MM). Accepts a second optional timezone argument. If provided, the timezone must be a string constant. -This functions is the opposite of function `YYYYMMDDToDate()`. +This function is the opposite of function `YYYYMMDDToDate()`. **Example** @@ -2362,7 +2427,7 @@ Result: Converts a number containing the year, month and day number to a [Date](../../sql-reference/data-types/date.md). -This functions is the opposite of function `toYYYYMMDD()`. +This function is the opposite of function `toYYYYMMDD()`. The output is undefined if the input does not encode a valid Date value. @@ -2406,7 +2471,7 @@ Converts a number containing the year, month, day, hours, minute and second numb The output is undefined if the input does not encode a valid DateTime value. -This functions is the opposite of function `toYYYYMMDDhhmmss()`. +This function is the opposite of function `toYYYYMMDDhhmmss()`. **Syntax** @@ -2981,8 +3046,8 @@ toUTCTimestamp(time_val, time_zone) **Arguments** -- `time_val` — A DateTime/DateTime64 type const value or a expression . [DateTime/DateTime64 types](../../sql-reference/data-types/datetime.md) -- `time_zone` — A String type const value or a expression represent the time zone. [String types](../../sql-reference/data-types/string.md) +- `time_val` — A DateTime/DateTime64 type const value or an expression . [DateTime/DateTime64 types](../../sql-reference/data-types/datetime.md) +- `time_zone` — A String type const value or an expression represent the time zone. [String types](../../sql-reference/data-types/string.md) **Returned value** @@ -3014,8 +3079,8 @@ fromUTCTimestamp(time_val, time_zone) **Arguments** -- `time_val` — A DateTime/DateTime64 type const value or a expression . [DateTime/DateTime64 types](../../sql-reference/data-types/datetime.md) -- `time_zone` — A String type const value or a expression represent the time zone. [String types](../../sql-reference/data-types/string.md) +- `time_val` — A DateTime/DateTime64 type const value or an expression . [DateTime/DateTime64 types](../../sql-reference/data-types/datetime.md) +- `time_zone` — A String type const value or an expression represent the time zone. [String types](../../sql-reference/data-types/string.md) **Returned value** @@ -3034,6 +3099,40 @@ Result: │ 2023-03-16 18:00:00.000 │ └─────────────────────────────────────────────────────────────────────────┘ ``` +## timeDiff + +Returns the difference between two dates or dates with time values. The difference is calculated in units of seconds. It is same as `dateDiff` and was added only for MySQL support. `dateDiff` is preferred. + +**Syntax** + +```sql +timeDiff(first_datetime, second_datetime) +``` + +*Arguments** + +- `first_datetime` — A DateTime/DateTime64 type const value or an expression . [DateTime/DateTime64 types](../../sql-reference/data-types/datetime.md) +- `second_datetime` — A DateTime/DateTime64 type const value or an expression . [DateTime/DateTime64 types](../../sql-reference/data-types/datetime.md) + +**Returned value** + +The difference between two dates or dates with time values in seconds. + +**Example** + +Query: + +```sql +timeDiff(toDateTime64('1927-01-01 00:00:00', 3), toDate32('1927-01-02')); +``` + +**Result**: + +```response +┌─timeDiff(toDateTime64('1927-01-01 00:00:00', 3), toDate32('1927-01-02'))─┠+│ 86400 │ +└──────────────────────────────────────────────────────────────────────────┘ +``` ## Related content diff --git a/docs/en/sql-reference/functions/distance-functions.md b/docs/en/sql-reference/functions/distance-functions.md index 1774c22014d..e20c35c6b6f 100644 --- a/docs/en/sql-reference/functions/distance-functions.md +++ b/docs/en/sql-reference/functions/distance-functions.md @@ -509,7 +509,7 @@ Result: ## cosineDistance -Calculates the cosine distance between two vectors (the values of the tuples are the coordinates). The less the returned value is, the more similar are the vectors. +Calculates the cosine distance between two vectors (the values of the tuples are the coordinates). The smaller the returned value is, the more similar are the vectors. **Syntax** diff --git a/docs/en/sql-reference/functions/encoding-functions.md b/docs/en/sql-reference/functions/encoding-functions.md index 618dd3f4b4f..4f6da764b3c 100644 --- a/docs/en/sql-reference/functions/encoding-functions.md +++ b/docs/en/sql-reference/functions/encoding-functions.md @@ -433,3 +433,292 @@ Result: │ [0,1,2,3,4,5,6,7] │ └───────────────────┘ ``` + +## mortonEncode + +Calculates the Morton encoding (ZCurve) for a list of unsigned integers. + +The function has two modes of operation: +- Simple +- Expanded + +### Simple mode + +Accepts up to 8 unsigned integers as arguments and produces a UInt64 code. + +**Syntax** + +```sql +mortonEncode(args) +``` + +**Parameters** + +- `args`: up to 8 [unsigned integers](../../sql-reference/data-types/int-uint.md) or columns of the aforementioned type. + +**Returned value** + +- A UInt64 code + +Type: [UInt64](../../sql-reference/data-types/int-uint.md) + +**Example** + +Query: + +```sql +SELECT mortonEncode(1, 2, 3); +``` +Result: + +```response +53 +``` + +### Expanded mode + +Accepts a range mask ([tuple](../../sql-reference/data-types/tuple.md)) as a first argument and up to 8 [unsigned integers](../../sql-reference/data-types/int-uint.md) as other arguments. + +Each number in the mask configures the amount of range expansion:
+1 - no expansion
+2 - 2x expansion
+3 - 3x expansion
+...
+Up to 8x expansion.
+ +**Syntax** + +```sql +mortonEncode(range_mask, args) +``` + +**Parameters** +- `range_mask`: 1-8. +- `args`: up to 8 [unsigned integers](../../sql-reference/data-types/int-uint.md) or columns of the aforementioned type. + +Note: when using columns for `args` the provided `range_mask` tuple should still be a constant. + +**Returned value** + +- A UInt64 code + +Type: [UInt64](../../sql-reference/data-types/int-uint.md) + + +**Example** + +Range expansion can be beneficial when you need a similar distribution for arguments with wildly different ranges (or cardinality) +For example: 'IP Address' (0...FFFFFFFF) and 'Country code' (0...FF). + +Query: + +```sql +SELECT mortonEncode((1,2), 1024, 16); +``` + +Result: + +```response +1572864 +``` + +Note: tuple size must be equal to the number of the other arguments. + +**Example** + +Morton encoding for one argument is always the argument itself: + +Query: + +```sql +SELECT mortonEncode(1); +``` + +Result: + +```response +1 +``` + +**Example** + +It is also possible to expand one argument too: + +Query: + +```sql +SELECT mortonEncode(tuple(2), 128); +``` + +Result: + +```response +32768 +``` + +**Example** + +You can also use column names in the function. + +Query: + +First create the table and insert some data. + +```sql +create table morton_numbers( + n1 UInt32, + n2 UInt32, + n3 UInt16, + n4 UInt16, + n5 UInt8, + n6 UInt8, + n7 UInt8, + n8 UInt8 +) +Engine=MergeTree() +ORDER BY n1 SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; +insert into morton_numbers (*) values(1,2,3,4,5,6,7,8); +``` +Use column names instead of constants as function arguments to `mortonEncode` + +Query: + +```sql +SELECT mortonEncode(n1, n2, n3, n4, n5, n6, n7, n8) FROM morton_numbers; +``` + +Result: + +```response +2155374165 +``` + +**implementation details** + +Please note that you can fit only so many bits of information into Morton code as [UInt64](../../sql-reference/data-types/int-uint.md) has. Two arguments will have a range of maximum 2^32 (64/2) each, three arguments a range of max 2^21 (64/3) each and so on. All overflow will be clamped to zero. + +## mortonDecode + +Decodes a Morton encoding (ZCurve) into the corresponding unsigned integer tuple. + +As with the `mortonEncode` function, this function has two modes of operation: +- Simple +- Expanded + +### Simple mode + +Accepts a resulting tuple size as the first argument and the code as the second argument. + +**Syntax** + +```sql +mortonDecode(tuple_size, code) +``` + +**Parameters** +- `tuple_size`: integer value no more than 8. +- `code`: [UInt64](../../sql-reference/data-types/int-uint.md) code. + +**Returned value** + +- [tuple](../../sql-reference/data-types/tuple.md) of the specified size. + +Type: [UInt64](../../sql-reference/data-types/int-uint.md) + +**Example** + +Query: + +```sql +SELECT mortonDecode(3, 53); +``` + +Result: + +```response +["1","2","3"] +``` + +### Expanded mode + +Accepts a range mask (tuple) as a first argument and the code as the second argument. +Each number in the mask configures the amount of range shrink:
+1 - no shrink
+2 - 2x shrink
+3 - 3x shrink
+...
+Up to 8x shrink.
+ +Range expansion can be beneficial when you need a similar distribution for arguments with wildly different ranges (or cardinality) +For example: 'IP Address' (0...FFFFFFFF) and 'Country code' (0...FF). +As with the encode function, this is limited to 8 numbers at most. + +**Example** + +Query: + +```sql +SELECT mortonDecode(1, 1); +``` + +Result: + +```response +["1"] +``` + +**Example** + +It is also possible to shrink one argument: + +Query: + +```sql +SELECT mortonDecode(tuple(2), 32768); +``` + +Result: + +```response +["128"] +``` + +**Example** + +You can also use column names in the function. + +First create the table and insert some data. + +Query: +```sql +create table morton_numbers( + n1 UInt32, + n2 UInt32, + n3 UInt16, + n4 UInt16, + n5 UInt8, + n6 UInt8, + n7 UInt8, + n8 UInt8 +) +Engine=MergeTree() +ORDER BY n1 SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; +insert into morton_numbers (*) values(1,2,3,4,5,6,7,8); +``` +Use column names instead of constants as function arguments to `mortonDecode` + +Query: + +```sql +select untuple(mortonDecode(8, mortonEncode(n1, n2, n3, n4, n5, n6, n7, n8))) from morton_numbers; +``` + +Result: + +```response +1 2 3 4 5 6 7 8 +``` + + + + diff --git a/docs/en/sql-reference/functions/functions-for-nulls.md b/docs/en/sql-reference/functions/functions-for-nulls.md index 91c04cfded3..4dfbf4262ed 100644 --- a/docs/en/sql-reference/functions/functions-for-nulls.md +++ b/docs/en/sql-reference/functions/functions-for-nulls.md @@ -10,6 +10,8 @@ sidebar_label: Nullable Returns whether the argument is [NULL](../../sql-reference/syntax.md#null). +See also operator [`IS NULL`](../operators/index.md#is_null). + ``` sql isNull(x) ``` @@ -54,6 +56,8 @@ Result: Returns whether the argument is not [NULL](../../sql-reference/syntax.md#null-literal). +See also operator [`IS NOT NULL`](../operators/index.md#is_not_null). + ``` sql isNotNull(x) ``` diff --git a/docs/en/sql-reference/functions/geo/polygon.md b/docs/en/sql-reference/functions/geo/polygon.md index 4a8653965c2..35e2280e5cc 100644 --- a/docs/en/sql-reference/functions/geo/polygon.md +++ b/docs/en/sql-reference/functions/geo/polygon.md @@ -53,6 +53,62 @@ String starting with `POLYGON` Polygon +## readWKTPoint + +The `readWKTPoint` function in ClickHouse parses a Well-Known Text (WKT) representation of a Point geometry and returns a point in the internal ClickHouse format. + +### Syntax + +```sql +readWKTPoint(wkt_string) +``` + +### Arguments + +- `wkt_string`: The input WKT string representing a Point geometry. + +### Returned value + +The function returns a ClickHouse internal representation of the Point geometry. + +### Example + +```sql +SELECT readWKTPoint('POINT (1.2 3.4)'); +``` + +```response +(1.2,3.4) +``` + +## readWKTRing + +Parses a Well-Known Text (WKT) representation of a Polygon geometry and returns a ring (closed linestring) in the internal ClickHouse format. + +### Syntax + +```sql +readWKTRing(wkt_string) +``` + +### Arguments + +- `wkt_string`: The input WKT string representing a Polygon geometry. + +### Returned value + +The function returns a ClickHouse internal representation of the ring (closed linestring) geometry. + +### Example + +```sql +SELECT readWKTRing('LINESTRING (1 1, 2 2, 3 3, 1 1)'); +``` + +```response +[(1,1),(2,2),(3,3),(1,1)] +``` + ## polygonsWithinSpherical Returns true or false depending on whether or not one polygon lies completely inside another polygon. Reference https://www.boost.org/doc/libs/1_62_0/libs/geometry/doc/html/geometry/reference/algorithms/within/within_2.html diff --git a/docs/en/sql-reference/functions/json-functions.md b/docs/en/sql-reference/functions/json-functions.md index 2c837ff4a42..e920ab82988 100644 --- a/docs/en/sql-reference/functions/json-functions.md +++ b/docs/en/sql-reference/functions/json-functions.md @@ -5,80 +5,372 @@ sidebar_label: JSON --- There are two sets of functions to parse JSON. - - `visitParam*` (`simpleJSON*`) is made to parse a special very limited subset of a JSON, but these functions are extremely fast. + - `simpleJSON*` (`visitParam*`) is made to parse a special very limited subset of a JSON, but these functions are extremely fast. - `JSONExtract*` is made to parse normal JSON. -# visitParam functions +# simpleJSON/visitParam functions ClickHouse has special functions for working with simplified JSON. All these JSON functions are based on strong assumptions about what the JSON can be, but they try to do as little as possible to get the job done. The following assumptions are made: 1. The field name (function argument) must be a constant. -2. The field name is somehow canonically encoded in JSON. For example: `visitParamHas('{"abc":"def"}', 'abc') = 1`, but `visitParamHas('{"\\u0061\\u0062\\u0063":"def"}', 'abc') = 0` +2. The field name is somehow canonically encoded in JSON. For example: `simpleJSONHas('{"abc":"def"}', 'abc') = 1`, but `simpleJSONHas('{"\\u0061\\u0062\\u0063":"def"}', 'abc') = 0` 3. Fields are searched for on any nesting level, indiscriminately. If there are multiple matching fields, the first occurrence is used. 4. The JSON does not have space characters outside of string literals. -## visitParamHas(params, name) +## simpleJSONHas -Checks whether there is a field with the `name` name. +Checks whether there is a field named `field_name`. The result is `UInt8`. -Alias: `simpleJSONHas`. +**Syntax** -## visitParamExtractUInt(params, name) - -Parses UInt64 from the value of the field named `name`. If this is a string field, it tries to parse a number from the beginning of the string. If the field does not exist, or it exists but does not contain a number, it returns 0. - -Alias: `simpleJSONExtractUInt`. - -## visitParamExtractInt(params, name) - -The same as for Int64. - -Alias: `simpleJSONExtractInt`. - -## visitParamExtractFloat(params, name) - -The same as for Float64. - -Alias: `simpleJSONExtractFloat`. - -## visitParamExtractBool(params, name) - -Parses a true/false value. The result is UInt8. - -Alias: `simpleJSONExtractBool`. - -## visitParamExtractRaw(params, name) - -Returns the value of a field, including separators. - -Alias: `simpleJSONExtractRaw`. - -Examples: - -``` sql -visitParamExtractRaw('{"abc":"\\n\\u0000"}', 'abc') = '"\\n\\u0000"'; -visitParamExtractRaw('{"abc":{"def":[1,2,3]}}', 'abc') = '{"def":[1,2,3]}'; +```sql +simpleJSONHas(json, field_name) ``` -## visitParamExtractString(params, name) +**Parameters** -Parses the string in double quotes. The value is unescaped. If unescaping failed, it returns an empty string. +- `json`: The JSON in which the field is searched for. [String](../../sql-reference/data-types/string.md#string) +- `field_name`: The name of the field to search for. [String literal](../syntax#string) -Alias: `simpleJSONExtractString`. +**Returned value** -Examples: +It returns `1` if the field exists, `0` otherwise. -``` sql -visitParamExtractString('{"abc":"\\n\\u0000"}', 'abc') = '\n\0'; -visitParamExtractString('{"abc":"\\u263a"}', 'abc') = '☺'; -visitParamExtractString('{"abc":"\\u263"}', 'abc') = ''; -visitParamExtractString('{"abc":"hello}', 'abc') = ''; +**Example** + +Query: + +```sql +CREATE TABLE jsons +( + `json` String +) +ENGINE = Memory; + +INSERT INTO jsons VALUES ('{"foo":"true","qux":1}'); + +SELECT simpleJSONHas(json, 'foo') FROM jsons; +SELECT simpleJSONHas(json, 'bar') FROM jsons; ``` +```response +1 +0 +``` +## simpleJSONExtractUInt + +Parses `UInt64` from the value of the field named `field_name`. If this is a string field, it tries to parse a number from the beginning of the string. If the field does not exist, or it exists but does not contain a number, it returns `0`. + +**Syntax** + +```sql +simpleJSONExtractUInt(json, field_name) +``` + +**Parameters** + +- `json`: The JSON in which the field is searched for. [String](../../sql-reference/data-types/string.md#string) +- `field_name`: The name of the field to search for. [String literal](../syntax#string) + +**Returned value** + +It returns the number parsed from the field if the field exists and contains a number, `0` otherwise. + +**Example** + +Query: + +```sql +CREATE TABLE jsons +( + `json` String +) +ENGINE = Memory; + +INSERT INTO jsons VALUES ('{"foo":"4e3"}'); +INSERT INTO jsons VALUES ('{"foo":3.4}'); +INSERT INTO jsons VALUES ('{"foo":5}'); +INSERT INTO jsons VALUES ('{"foo":"not1number"}'); +INSERT INTO jsons VALUES ('{"baz":2}'); + +SELECT simpleJSONExtractUInt(json, 'foo') FROM jsons ORDER BY json; +``` + +```response +0 +4 +0 +3 +5 +``` + +## simpleJSONExtractInt + +Parses `Int64` from the value of the field named `field_name`. If this is a string field, it tries to parse a number from the beginning of the string. If the field does not exist, or it exists but does not contain a number, it returns `0`. + +**Syntax** + +```sql +simpleJSONExtractInt(json, field_name) +``` + +**Parameters** + +- `json`: The JSON in which the field is searched for. [String](../../sql-reference/data-types/string.md#string) +- `field_name`: The name of the field to search for. [String literal](../syntax#string) + +**Returned value** + +It returns the number parsed from the field if the field exists and contains a number, `0` otherwise. + +**Example** + +Query: + +```sql +CREATE TABLE jsons +( + `json` String +) +ENGINE = Memory; + +INSERT INTO jsons VALUES ('{"foo":"-4e3"}'); +INSERT INTO jsons VALUES ('{"foo":-3.4}'); +INSERT INTO jsons VALUES ('{"foo":5}'); +INSERT INTO jsons VALUES ('{"foo":"not1number"}'); +INSERT INTO jsons VALUES ('{"baz":2}'); + +SELECT simpleJSONExtractInt(json, 'foo') FROM jsons ORDER BY json; +``` + +```response +0 +-4 +0 +-3 +5 +``` + +## simpleJSONExtractFloat + +Parses `Float64` from the value of the field named `field_name`. If this is a string field, it tries to parse a number from the beginning of the string. If the field does not exist, or it exists but does not contain a number, it returns `0`. + +**Syntax** + +```sql +simpleJSONExtractFloat(json, field_name) +``` + +**Parameters** + +- `json`: The JSON in which the field is searched for. [String](../../sql-reference/data-types/string.md#string) +- `field_name`: The name of the field to search for. [String literal](../syntax#string) + +**Returned value** + +It returns the number parsed from the field if the field exists and contains a number, `0` otherwise. + +**Example** + +Query: + +```sql +CREATE TABLE jsons +( + `json` String +) +ENGINE = Memory; + +INSERT INTO jsons VALUES ('{"foo":"-4e3"}'); +INSERT INTO jsons VALUES ('{"foo":-3.4}'); +INSERT INTO jsons VALUES ('{"foo":5}'); +INSERT INTO jsons VALUES ('{"foo":"not1number"}'); +INSERT INTO jsons VALUES ('{"baz":2}'); + +SELECT simpleJSONExtractFloat(json, 'foo') FROM jsons ORDER BY json; +``` + +```response +0 +-4000 +0 +-3.4 +5 +``` + +## simpleJSONExtractBool + +Parses a true/false value from the value of the field named `field_name`. The result is `UInt8`. + +**Syntax** + +```sql +simpleJSONExtractBool(json, field_name) +``` + +**Parameters** + +- `json`: The JSON in which the field is searched for. [String](../../sql-reference/data-types/string.md#string) +- `field_name`: The name of the field to search for. [String literal](../syntax#string) + +**Returned value** + +It returns `1` if the value of the field is `true`, `0` otherwise. This means this function will return `0` including (and not only) in the following cases: + - If the field doesn't exists. + - If the field contains `true` as a string, e.g.: `{"field":"true"}`. + - If the field contains `1` as a numerical value. + +**Example** + +Query: + +```sql +CREATE TABLE jsons +( + `json` String +) +ENGINE = Memory; + +INSERT INTO jsons VALUES ('{"foo":false,"bar":true}'); +INSERT INTO jsons VALUES ('{"foo":"true","qux":1}'); + +SELECT simpleJSONExtractBool(json, 'bar') FROM jsons ORDER BY json; +SELECT simpleJSONExtractBool(json, 'foo') FROM jsons ORDER BY json; +``` + +```response +0 +1 +0 +0 +``` + +## simpleJSONExtractRaw + +Returns the value of the field named `field_name` as a `String`, including separators. + +**Syntax** + +```sql +simpleJSONExtractRaw(json, field_name) +``` + +**Parameters** + +- `json`: The JSON in which the field is searched for. [String](../../sql-reference/data-types/string.md#string) +- `field_name`: The name of the field to search for. [String literal](../syntax#string) + +**Returned value** + +It returns the value of the field as a [`String`](../../sql-reference/data-types/string.md#string), including separators if the field exists, or an empty `String` otherwise. + +**Example** + +Query: + +```sql +CREATE TABLE jsons +( + `json` String +) +ENGINE = Memory; + +INSERT INTO jsons VALUES ('{"foo":"-4e3"}'); +INSERT INTO jsons VALUES ('{"foo":-3.4}'); +INSERT INTO jsons VALUES ('{"foo":5}'); +INSERT INTO jsons VALUES ('{"foo":{"def":[1,2,3]}}'); +INSERT INTO jsons VALUES ('{"baz":2}'); + +SELECT simpleJSONExtractRaw(json, 'foo') FROM jsons ORDER BY json; +``` + +```response + +"-4e3" +-3.4 +5 +{"def":[1,2,3]} +``` + +## simpleJSONExtractString + +Parses `String` in double quotes from the value of the field named `field_name`. + +**Syntax** + +```sql +simpleJSONExtractString(json, field_name) +``` + +**Parameters** + +- `json`: The JSON in which the field is searched for. [String](../../sql-reference/data-types/string.md#string) +- `field_name`: The name of the field to search for. [String literal](../syntax#string) + +**Returned value** + +It returns the value of a field as a [`String`](../../sql-reference/data-types/string.md#string), including separators. The value is unescaped. It returns an empty `String`: if the field doesn't contain a double quoted string, if unescaping fails or if the field doesn't exist. + +**Implementation details** + There is currently no support for code points in the format `\uXXXX\uYYYY` that are not from the basic multilingual plane (they are converted to CESU-8 instead of UTF-8). +**Example** + +Query: + +```sql +CREATE TABLE jsons +( + `json` String +) +ENGINE = Memory; + +INSERT INTO jsons VALUES ('{"foo":"\\n\\u0000"}'); +INSERT INTO jsons VALUES ('{"foo":"\\u263"}'); +INSERT INTO jsons VALUES ('{"foo":"\\u263a"}'); +INSERT INTO jsons VALUES ('{"foo":"hello}'); + +SELECT simpleJSONExtractString(json, 'foo') FROM jsons ORDER BY json; +``` + +```response +\n\0 + +☺ + +``` + +## visitParamHas + +This function is [an alias of `simpleJSONHas`](./json-functions#simplejsonhas). + +## visitParamExtractUInt + +This function is [an alias of `simpleJSONExtractUInt`](./json-functions#simplejsonextractuint). + +## visitParamExtractInt + +This function is [an alias of `simpleJSONExtractInt`](./json-functions#simplejsonextractint). + +## visitParamExtractFloat + +This function is [an alias of `simpleJSONExtractFloat`](./json-functions#simplejsonextractfloat). + +## visitParamExtractBool + +This function is [an alias of `simpleJSONExtractBool`](./json-functions#simplejsonextractbool). + +## visitParamExtractRaw + +This function is [an alias of `simpleJSONExtractRaw`](./json-functions#simplejsonextractraw). + +## visitParamExtractString + +This function is [an alias of `simpleJSONExtractString`](./json-functions#simplejsonextractstring). + # JSONExtract functions The following functions are based on [simdjson](https://github.com/lemire/simdjson) designed for more complex JSON parsing requirements. diff --git a/docs/en/sql-reference/functions/math-functions.md b/docs/en/sql-reference/functions/math-functions.md index b27668caf0c..fc659891b5c 100644 --- a/docs/en/sql-reference/functions/math-functions.md +++ b/docs/en/sql-reference/functions/math-functions.md @@ -299,6 +299,18 @@ sin(x) Type: [Float*](../../sql-reference/data-types/float.md). +**Example** + +Query: + +```sql +SELECT sin(1.23); +``` + +```response +0.9424888019316975 +``` + ## cos Returns the cosine of the argument. diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md index d05e7bbfe51..e7fca31483a 100644 --- a/docs/en/sql-reference/functions/other-functions.md +++ b/docs/en/sql-reference/functions/other-functions.md @@ -17,7 +17,7 @@ Returns a named value from the [macros](../../operations/server-configuration-pa **Syntax** -``` sql +```sql getMacro(name); ``` @@ -35,7 +35,7 @@ Type: [String](../../sql-reference/data-types/string.md). Example `` section in the server configuration file: -``` xml +```xml Value @@ -43,13 +43,13 @@ Example `` section in the server configuration file: Query: -``` sql +```sql SELECT getMacro('test'); ``` Result: -``` text +```text ┌─getMacro('test')─┠│ Value │ └──────────────────┘ @@ -57,12 +57,12 @@ Result: The same value can be retrieved as follows: -``` sql +```sql SELECT * FROM system.macros WHERE macro = 'test'; ``` -``` text +```text ┌─macro─┬─substitution─┠│ test │ Value │ └───────┴──────────────┘ @@ -74,7 +74,7 @@ Returns the fully qualified domain name of the ClickHouse server. **Syntax** -``` sql +```sql fqdn(); ``` @@ -88,13 +88,13 @@ Type: `String`. **Example** -``` sql +```sql SELECT FQDN(); ``` Result: -``` text +```text ┌─FQDN()──────────────────────────┠│ clickhouse.ru-central1.internal │ └─────────────────────────────────┘ @@ -104,7 +104,7 @@ Result: Extracts the tail of a string following its last slash or backslash. This function if often used to extract the filename from a path. -``` sql +```sql basename(expr) ``` @@ -123,13 +123,13 @@ A string that contains: Query: -``` sql +```sql SELECT 'some/long/path/to/file' AS a, basename(a) ``` Result: -``` text +```text ┌─a──────────────────────┬─basename('some\\long\\path\\to\\file')─┠│ some\long\path\to\file │ file │ └────────────────────────┴────────────────────────────────────────┘ @@ -137,13 +137,13 @@ Result: Query: -``` sql +```sql SELECT 'some\\long\\path\\to\\file' AS a, basename(a) ``` Result: -``` text +```text ┌─a──────────────────────┬─basename('some\\long\\path\\to\\file')─┠│ some\long\path\to\file │ file │ └────────────────────────┴────────────────────────────────────────┘ @@ -151,13 +151,13 @@ Result: Query: -``` sql +```sql SELECT 'some-file-name' AS a, basename(a) ``` Result: -``` text +```text ┌─a──────────────┬─basename('some-file-name')─┠│ some-file-name │ some-file-name │ └────────────────┴────────────────────────────┘ @@ -170,11 +170,11 @@ This function is used by the system to implement Pretty formats. `NULL` is represented as a string corresponding to `NULL` in `Pretty` formats. -``` sql +```sql SELECT visibleWidth(NULL) ``` -``` text +```text ┌─visibleWidth(NULL)─┠│ 4 │ └────────────────────┘ @@ -256,7 +256,7 @@ SELECT key, byteSize(u8) AS `byteSize(UInt8)`, byteSize(u16) AS `byteSize(UInt16 Result: -``` text +```text Row 1: ────── key: 1 @@ -298,13 +298,99 @@ Full columns and constants are represented differently in memory. Functions usua Accepts any arguments, including `NULL` and does nothing. Always returns 0. The argument is internally still evaluated. Useful e.g. for benchmarks. -## sleep(seconds) +## sleep -Sleeps ‘seconds’ seconds for each data block. The sleep time can be specified as integer or as floating-point number. +Used to introduce a delay or pause in the execution of a query. It is primarily used for testing and debugging purposes. -## sleepEachRow(seconds) +**Syntax** -Sleeps ‘seconds’ seconds for each row. The sleep time can be specified as integer or as floating-point number. +```sql +sleep(seconds) +``` + +**Arguments** + +- `seconds`: [UInt*](../../sql-reference/data-types/int-uint.md) or [Float](../../sql-reference/data-types/float.md) The number of seconds to pause the query execution to a maximum of 3 seconds. It can be a floating-point value to specify fractional seconds. + +**Returned value** + +This function does not return any value. + +**Example** + +```sql +SELECT sleep(2); +``` + +This function does not return any value. However, if you run the function with `clickhouse client` you will see something similar to: + +```response +SELECT sleep(2) + +Query id: 8aa9943e-a686-45e1-8317-6e8e3a5596ac + +┌─sleep(2)─┠+│ 0 │ +└──────────┘ + +1 row in set. Elapsed: 2.012 sec. +``` + +This query will pause for 2 seconds before completing. During this time, no results will be returned, and the query will appear to be hanging or unresponsive. + +**Implementation details** + +The `sleep()` function is generally not used in production environments, as it can negatively impact query performance and system responsiveness. However, it can be useful in the following scenarios: + +1. **Testing**: When testing or benchmarking ClickHouse, you may want to simulate delays or introduce pauses to observe how the system behaves under certain conditions. +2. **Debugging**: If you need to examine the state of the system or the execution of a query at a specific point in time, you can use `sleep()` to introduce a pause, allowing you to inspect or collect relevant information. +3. **Simulation**: In some cases, you may want to simulate real-world scenarios where delays or pauses occur, such as network latency or external system dependencies. + +It's important to use the `sleep()` function judiciously and only when necessary, as it can potentially impact the overall performance and responsiveness of your ClickHouse system. + +## sleepEachRow + +Pauses the execution of a query for a specified number of seconds for each row in the result set. + +**Syntax** + +```sql +sleepEachRow(seconds) +``` + +**Arguments** + +- `seconds`: [UInt*](../../sql-reference/data-types/int-uint.md) or [Float*](../../sql-reference/data-types/float.md) The number of seconds to pause the query execution for each row in the result set to a maximum of 3 seconds. It can be a floating-point value to specify fractional seconds. + +**Returned value** + +This function returns the same input values as it receives, without modifying them. + +**Example** + +```sql +SELECT number, sleepEachRow(0.5) FROM system.numbers LIMIT 5; +``` + +```response +┌─number─┬─sleepEachRow(0.5)─┠+│ 0 │ 0 │ +│ 1 │ 0 │ +│ 2 │ 0 │ +│ 3 │ 0 │ +│ 4 │ 0 │ +└────────┴───────────────────┘ +``` + +But the output will be delayed, with a 0.5-second pause between each row. + +The `sleepEachRow()` function is primarily used for testing and debugging purposes, similar to the `sleep()` function. It allows you to simulate delays or introduce pauses in the processing of each row, which can be useful in scenarios such as: + +1. **Testing**: When testing or benchmarking ClickHouse's performance under specific conditions, you can use `sleepEachRow()` to simulate delays or introduce pauses for each row processed. +2. **Debugging**: If you need to examine the state of the system or the execution of a query for each row processed, you can use `sleepEachRow()` to introduce pauses, allowing you to inspect or collect relevant information. +3. **Simulation**: In some cases, you may want to simulate real-world scenarios where delays or pauses occur for each row processed, such as when dealing with external systems or network latencies. + +Like the [`sleep()` function](#sleep), it's important to use `sleepEachRow()` judiciously and only when necessary, as it can significantly impact the overall performance and responsiveness of your ClickHouse system, especially when dealing with large result sets. ## currentDatabase() @@ -315,7 +401,7 @@ Useful in table engine parameters of `CREATE TABLE` queries where you need to sp Returns the name of the current user. In case of a distributed query, the name of the user who initiated the query is returned. -``` sql +```sql SELECT currentUser(); ``` @@ -330,13 +416,13 @@ Type: `String`. **Example** -``` sql +```sql SELECT currentUser(); ``` Result: -``` text +```text ┌─currentUser()─┠│ default │ └───────────────┘ @@ -352,7 +438,7 @@ This function is mostly intended for development, debugging and demonstration. **Syntax** -``` sql +```sql isConstant(x) ``` @@ -371,13 +457,13 @@ Type: [UInt8](../../sql-reference/data-types/int-uint.md). Query: -``` sql +```sql SELECT isConstant(x + 1) FROM (SELECT 43 AS x) ``` Result: -``` text +```text ┌─isConstant(plus(x, 1))─┠│ 1 │ └────────────────────────┘ @@ -385,13 +471,13 @@ Result: Query: -``` sql +```sql WITH 3.14 AS pi SELECT isConstant(cos(pi)) ``` Result: -``` text +```text ┌─isConstant(cos(pi))─┠│ 1 │ └─────────────────────┘ @@ -399,13 +485,13 @@ Result: Query: -``` sql +```sql SELECT isConstant(number) FROM numbers(1) ``` Result: -``` text +```text ┌─isConstant(number)─┠│ 0 │ └────────────────────┘ @@ -425,7 +511,7 @@ Checks whether a floating point value is finite. **Syntax** -``` sql +```sql ifNotFinite(x,y) ``` @@ -479,7 +565,7 @@ The band is drawn with accuracy to one eighth of a symbol. Example: -``` sql +```sql SELECT toHour(EventTime) AS h, count() AS c, @@ -489,7 +575,7 @@ GROUP BY h ORDER BY h ASC ``` -``` text +```text ┌──h─┬──────c─┬─bar────────────────┠│ 0 │ 292907 │ █████████▋ │ │ 1 │ 180563 │ ██████ │ @@ -547,7 +633,7 @@ For example, the first argument could have type `Int64`, while the second argume Example: -``` sql +```sql SELECT transform(SearchEngineID, [2, 3], ['Yandex', 'Google'], 'Other') AS title, count() AS c @@ -557,7 +643,7 @@ GROUP BY title ORDER BY c DESC ``` -``` text +```text ┌─title─────┬──────c─┠│ Yandex │ 498635 │ │ Google │ 229872 │ @@ -571,7 +657,7 @@ Similar to the other variation but has no ‘default’ argument. In case no mat Example: -``` sql +```sql SELECT transform(domain(Referer), ['yandex.ru', 'google.ru', 'vkontakte.ru'], ['www.yandex', 'example.com', 'vk.com']) AS s, count() AS c @@ -581,7 +667,7 @@ ORDER BY count() DESC LIMIT 10 ``` -``` text +```text ┌─s──────────────┬───────c─┠│ │ 2906259 │ │ www.yandex │ 867767 │ @@ -601,13 +687,13 @@ Given a size (number of bytes), this function returns a readable, rounded size w Example: -``` sql +```sql SELECT arrayJoin([1, 1024, 1024*1024, 192851925]) AS filesize_bytes, formatReadableDecimalSize(filesize_bytes) AS filesize ``` -``` text +```text ┌─filesize_bytes─┬─filesize───┠│ 1 │ 1.00 B │ │ 1024 │ 1.02 KB │ @@ -622,7 +708,7 @@ Given a size (number of bytes), this function returns a readable, rounded size w Example: -``` sql +```sql SELECT arrayJoin([1, 1024, 1024*1024, 192851925]) AS filesize_bytes, formatReadableSize(filesize_bytes) AS filesize @@ -630,7 +716,7 @@ SELECT Alias: `FORMAT_BYTES`. -``` text +```text ┌─filesize_bytes─┬─filesize───┠│ 1 │ 1.00 B │ │ 1024 │ 1.00 KiB │ @@ -645,13 +731,13 @@ Given a number, this function returns a rounded number with suffix (thousand, mi Example: -``` sql +```sql SELECT arrayJoin([1024, 1234 * 1000, (4567 * 1000) * 1000, 98765432101234]) AS number, formatReadableQuantity(number) AS number_for_humans ``` -``` text +```text ┌─────────number─┬─number_for_humans─┠│ 1024 │ 1.02 thousand │ │ 1234000 │ 1.23 million │ @@ -666,7 +752,7 @@ Given a time interval (delta) in seconds, this function returns a time delta wit **Syntax** -``` sql +```sql formatReadableTimeDelta(column[, maximum_unit, minimum_unit]) ``` @@ -674,21 +760,22 @@ formatReadableTimeDelta(column[, maximum_unit, minimum_unit]) - `column` — A column with a numeric time delta. - `maximum_unit` — Optional. Maximum unit to show. - * Acceptable values: `nanoseconds`, `microseconds`, `milliseconds`, `seconds`, `minutes`, `hours`, `days`, `months`, `years`. - * Default value: `years`. + - Acceptable values: `nanoseconds`, `microseconds`, `milliseconds`, `seconds`, `minutes`, `hours`, `days`, `months`, `years`. + - Default value: `years`. - `minimum_unit` — Optional. Minimum unit to show. All smaller units are truncated. - * Acceptable values: `nanoseconds`, `microseconds`, `milliseconds`, `seconds`, `minutes`, `hours`, `days`, `months`, `years`. - * If explicitly specified value is bigger than `maximum_unit`, an exception will be thrown. - * Default value: `seconds` if `maximum_unit` is `seconds` or bigger, `nanoseconds` otherwise. + - Acceptable values: `nanoseconds`, `microseconds`, `milliseconds`, `seconds`, `minutes`, `hours`, `days`, `months`, `years`. + - If explicitly specified value is bigger than `maximum_unit`, an exception will be thrown. + - Default value: `seconds` if `maximum_unit` is `seconds` or bigger, `nanoseconds` otherwise. **Example** -``` sql + +```sql SELECT arrayJoin([100, 12345, 432546534]) AS elapsed, formatReadableTimeDelta(elapsed) AS time_delta ``` -``` text +```text ┌────elapsed─┬─time_delta ─────────────────────────────────────────────────────┠│ 100 │ 1 minute and 40 seconds │ │ 12345 │ 3 hours, 25 minutes and 45 seconds │ @@ -696,13 +783,13 @@ SELECT └────────────┴─────────────────────────────────────────────────────────────────┘ ``` -``` sql +```sql SELECT arrayJoin([100, 12345, 432546534]) AS elapsed, formatReadableTimeDelta(elapsed, 'minutes') AS time_delta ``` -``` text +```text ┌────elapsed─┬─time_delta ─────────────────────────────────────────────────────┠│ 100 │ 1 minute and 40 seconds │ │ 12345 │ 205 minutes and 45 seconds │ @@ -738,7 +825,6 @@ parseTimeDelta(timestr) - `timestr` — A sequence of numbers followed by something resembling a time unit. - **Returned value** - A floating-point number with the number of seconds. @@ -780,8 +866,52 @@ If executed in the context of a distributed table, this function generates a nor ## version() -Returns the server version as a string. -If executed in the context of a distributed table, this function generates a normal column with values relevant to each shard. Otherwise it produces a constant value. +Returns the current version of ClickHouse as a string in the form of: + +- Major version +- Minor version +- Patch version +- Number of commits since the previous stable release. + +```plaintext +major_version.minor_version.patch_version.number_of_commits_since_the_previous_stable_release +``` + +If executed in the context of a distributed table, this function generates a normal column with values relevant to each shard. Otherwise, it produces a constant value. + +**Syntax** + +```sql +version() +``` + +**Arguments** + +None. + +**Returned value** + +Type: [String](../data-types/string) + +**Implementation details** + +None. + +**Example** + +Query: + +```sql +SELECT version() +``` + +**Result**: + +```response +┌─version()─┠+│ 24.2.1.1 │ +└───────────┘ +``` ## buildId() @@ -806,7 +936,7 @@ The window function that provides access to a row at a specified offset before o **Syntax** -``` sql +```sql neighbor(column, offset[, default_value]) ``` @@ -836,13 +966,13 @@ Type: type of data blocks affected or default value type. Query: -``` sql +```sql SELECT number, neighbor(number, 2) FROM system.numbers LIMIT 10; ``` Result: -``` text +```text ┌─number─┬─neighbor(number, 2)─┠│ 0 │ 2 │ │ 1 │ 3 │ @@ -859,13 +989,13 @@ Result: Query: -``` sql +```sql SELECT number, neighbor(number, 2, 999) FROM system.numbers LIMIT 10; ``` Result: -``` text +```text ┌─number─┬─neighbor(number, 2, 999)─┠│ 0 │ 2 │ │ 1 │ 3 │ @@ -884,7 +1014,7 @@ This function can be used to compute year-over-year metric value: Query: -``` sql +```sql WITH toDate('2018-01-01') AS start_date SELECT toStartOfMonth(start_date + (number * 32)) AS month, @@ -896,7 +1026,7 @@ FROM numbers(16) Result: -``` text +```text ┌──────month─┬─money─┬─prev_year─┬─year_over_year─┠│ 2018-01-01 │ 32 │ 0 │ 0 │ │ 2018-02-01 │ 63 │ 0 │ 0 │ @@ -933,7 +1063,7 @@ To prevent that you can create a subquery with [ORDER BY](../../sql-reference/st Example: -``` sql +```sql SELECT EventID, EventTime, @@ -950,7 +1080,7 @@ FROM ) ``` -``` text +```text ┌─EventID─┬───────────EventTime─┬─delta─┠│ 1106 │ 2016-11-24 00:00:04 │ 0 │ │ 1107 │ 2016-11-24 00:00:05 │ 1 │ @@ -962,7 +1092,7 @@ FROM Please note that the block size affects the result. The internal state of `runningDifference` state is reset for each new block. -``` sql +```sql SELECT number, runningDifference(number + 1) AS diff @@ -970,7 +1100,7 @@ FROM numbers(100000) WHERE diff != 1 ``` -``` text +```text ┌─number─┬─diff─┠│ 0 │ 0 │ └────────┴──────┘ @@ -979,7 +1109,7 @@ WHERE diff != 1 └────────┴──────┘ ``` -``` sql +```sql set max_block_size=100000 -- default value is 65536! SELECT @@ -989,7 +1119,7 @@ FROM numbers(100000) WHERE diff != 1 ``` -``` text +```text ┌─number─┬─diff─┠│ 0 │ 0 │ └────────┴──────┘ @@ -1005,21 +1135,20 @@ Calculates the number of concurrent events. Each event has a start time and an end time. The start time is included in the event, while the end time is excluded. Columns with a start time and an end time must be of the same data type. The function calculates the total number of active (concurrent) events for each event start time. - :::tip Events must be ordered by the start time in ascending order. If this requirement is violated the function raises an exception. Every data block is processed separately. If events from different data blocks overlap then they can not be processed correctly. ::: **Syntax** -``` sql +```sql runningConcurrency(start, end) ``` **Arguments** - `start` — A column with the start time of events. [Date](../../sql-reference/data-types/date.md), [DateTime](../../sql-reference/data-types/datetime.md), or [DateTime64](../../sql-reference/data-types/datetime64.md). -- `end` — A column with the end time of events. [Date](../../sql-reference/data-types/date.md), [DateTime](../../sql-reference/data-types/datetime.md), or [DateTime64](../../sql-reference/data-types/datetime64.md). +- `end` — A column with the end time of events. [Date](../../sql-reference/data-types/date.md), [DateTime](../../sql-reference/data-types/datetime.md), or [DateTime64](../../sql-reference/data-types/datetime64.md). **Returned values** @@ -1031,7 +1160,7 @@ Type: [UInt32](../../sql-reference/data-types/int-uint.md) Consider the table: -``` text +```text ┌──────start─┬────────end─┠│ 2021-03-03 │ 2021-03-11 │ │ 2021-03-06 │ 2021-03-12 │ @@ -1042,13 +1171,13 @@ Consider the table: Query: -``` sql +```sql SELECT start, runningConcurrency(start, end) FROM example_table; ``` Result: -``` text +```text ┌──────start─┬─runningConcurrency(start, end)─┠│ 2021-03-03 │ 1 │ │ 2021-03-06 │ 2 │ @@ -1074,7 +1203,7 @@ Given a MAC address in format AA:BB:CC:DD:EE:FF (colon-separated numbers in hexa Returns the number of fields in [Enum](../../sql-reference/data-types/enum.md). An exception is thrown if the type is not `Enum`. -``` sql +```sql getSizeOfEnumType(value) ``` @@ -1088,11 +1217,11 @@ getSizeOfEnumType(value) **Example** -``` sql +```sql SELECT getSizeOfEnumType( CAST('a' AS Enum8('a' = 1, 'b' = 2) ) ) AS x ``` -``` text +```text ┌─x─┠│ 2 │ └───┘ @@ -1102,7 +1231,7 @@ SELECT getSizeOfEnumType( CAST('a' AS Enum8('a' = 1, 'b' = 2) ) ) AS x Returns the size on disk without considering compression. -``` sql +```sql blockSerializedSize(value[, value[, ...]]) ``` @@ -1118,13 +1247,13 @@ blockSerializedSize(value[, value[, ...]]) Query: -``` sql +```sql SELECT blockSerializedSize(maxState(1)) as x ``` Result: -``` text +```text ┌─x─┠│ 2 │ └───┘ @@ -1134,7 +1263,7 @@ Result: Returns the internal name of the data type that represents the value. -``` sql +```sql toColumnTypeName(value) ``` @@ -1150,13 +1279,13 @@ toColumnTypeName(value) Difference between `toTypeName ' and ' toColumnTypeName`: -``` sql +```sql SELECT toTypeName(CAST('2018-01-01 01:02:03' AS DateTime)) ``` Result: -``` text +```text ┌─toTypeName(CAST('2018-01-01 01:02:03', 'DateTime'))─┠│ DateTime │ └─────────────────────────────────────────────────────┘ @@ -1164,13 +1293,13 @@ Result: Query: -``` sql +```sql SELECT toColumnTypeName(CAST('2018-01-01 01:02:03' AS DateTime)) ``` Result: -``` text +```text ┌─toColumnTypeName(CAST('2018-01-01 01:02:03', 'DateTime'))─┠│ Const(UInt32) │ └───────────────────────────────────────────────────────────┘ @@ -1182,7 +1311,7 @@ The example shows that the `DateTime` data type is internally stored as `Const(U Outputs a detailed description of data structures in RAM -``` sql +```sql dumpColumnStructure(value) ``` @@ -1196,11 +1325,11 @@ dumpColumnStructure(value) **Example** -``` sql +```sql SELECT dumpColumnStructure(CAST('2018-01-01 01:02:03', 'DateTime')) ``` -``` text +```text ┌─dumpColumnStructure(CAST('2018-01-01 01:02:03', 'DateTime'))─┠│ DateTime, Const(size = 1, UInt32(size = 1)) │ └──────────────────────────────────────────────────────────────┘ @@ -1212,7 +1341,7 @@ Returns the default value for the given data type. Does not include default values for custom columns set by the user. -``` sql +```sql defaultValueOfArgumentType(expression) ``` @@ -1230,13 +1359,13 @@ defaultValueOfArgumentType(expression) Query: -``` sql +```sql SELECT defaultValueOfArgumentType( CAST(1 AS Int8) ) ``` Result: -``` text +```text ┌─defaultValueOfArgumentType(CAST(1, 'Int8'))─┠│ 0 │ └─────────────────────────────────────────────┘ @@ -1244,13 +1373,13 @@ Result: Query: -``` sql +```sql SELECT defaultValueOfArgumentType( CAST(1 AS Nullable(Int8) ) ) ``` Result: -``` text +```text ┌─defaultValueOfArgumentType(CAST(1, 'Nullable(Int8)'))─┠│ á´ºáµá´¸á´¸ │ └───────────────────────────────────────────────────────┘ @@ -1262,7 +1391,7 @@ Returns the default value for the given type name. Does not include default values for custom columns set by the user. -``` sql +```sql defaultValueOfTypeName(type) ``` @@ -1280,13 +1409,13 @@ defaultValueOfTypeName(type) Query: -``` sql +```sql SELECT defaultValueOfTypeName('Int8') ``` Result: -``` text +```text ┌─defaultValueOfTypeName('Int8')─┠│ 0 │ └────────────────────────────────┘ @@ -1294,13 +1423,13 @@ Result: Query: -``` sql +```sql SELECT defaultValueOfTypeName('Nullable(Int8)') ``` Result: -``` text +```text ┌─defaultValueOfTypeName('Nullable(Int8)')─┠│ á´ºáµá´¸á´¸ │ └──────────────────────────────────────────┘ @@ -1412,7 +1541,7 @@ Creates an array with a single value. Used for the internal implementation of [arrayJoin](../../sql-reference/functions/array-join.md#functions_arrayjoin). -``` sql +```sql SELECT replicate(x, arr); ``` @@ -1431,13 +1560,13 @@ Type: `Array`. Query: -``` sql +```sql SELECT replicate(1, ['a', 'b', 'c']) ``` Result: -``` text +```text ┌─replicate(1, ['a', 'b', 'c'])─┠│ [1,1,1] │ └───────────────────────────────┘ @@ -1449,7 +1578,7 @@ Returns the amount of free space in the filesystem hosting the database persiste **Syntax** -``` sql +```sql filesystemAvailable() ``` @@ -1463,13 +1592,13 @@ Type: [UInt64](../../sql-reference/data-types/int-uint.md). Query: -``` sql +```sql SELECT formatReadableSize(filesystemAvailable()) AS "Available space"; ``` Result: -``` text +```text ┌─Available space─┠│ 30.75 GiB │ └─────────────────┘ @@ -1481,7 +1610,7 @@ Returns the total amount of the free space on the filesystem hosting the databas **Syntax** -``` sql +```sql filesystemFree() ``` @@ -1495,13 +1624,13 @@ Type: [UInt64](../../sql-reference/data-types/int-uint.md). Query: -``` sql +```sql SELECT formatReadableSize(filesystemFree()) AS "Free space"; ``` Result: -``` text +```text ┌─Free space─┠│ 32.39 GiB │ └────────────┘ @@ -1513,7 +1642,7 @@ Returns the capacity of the filesystem in bytes. Needs the [path](../../operatio **Syntax** -``` sql +```sql filesystemCapacity() ``` @@ -1527,13 +1656,13 @@ Type: [UInt64](../../sql-reference/data-types/int-uint.md). Query: -``` sql +```sql SELECT formatReadableSize(filesystemCapacity()) AS "Capacity"; ``` Result: -``` text +```text ┌─Capacity──┠│ 39.32 GiB │ └───────────┘ @@ -1545,7 +1674,7 @@ Calculates the result of an aggregate function based on a single value. This fun **Syntax** -``` sql +```sql initializeAggregation (aggregate_function, arg1, arg2, ..., argN) ``` @@ -1567,6 +1696,7 @@ Query: ```sql SELECT uniqMerge(state) FROM (SELECT initializeAggregation('uniqState', number % 3) AS state FROM numbers(10000)); ``` + Result: ```text @@ -1619,7 +1749,7 @@ Given a state of aggregate function, this function returns the result of aggrega **Syntax** -``` sql +```sql finalizeAggregation(state) ``` @@ -1724,7 +1854,7 @@ The state is reset for each new block of data. **Syntax** -``` sql +```sql runningAccumulate(agg_state[, grouping]); ``` @@ -1745,13 +1875,13 @@ Consider how you can use `runningAccumulate` to find the cumulative sum of numbe Query: -``` sql +```sql SELECT k, runningAccumulate(sum_k) AS res FROM (SELECT number as k, sumState(k) AS sum_k FROM numbers(10) GROUP BY k ORDER BY k); ``` Result: -``` text +```text ┌─k─┬─res─┠│ 0 │ 0 │ │ 1 │ 1 │ @@ -1779,7 +1909,7 @@ The following example shows the `groupping` parameter usage: Query: -``` sql +```sql SELECT grouping, item, @@ -1798,7 +1928,7 @@ FROM Result: -``` text +```text ┌─grouping─┬─item─┬─res─┠│ 0 │ 0 │ 0 │ │ 0 │ 1 │ 1 │ @@ -1830,7 +1960,7 @@ Only supports tables created with the `ENGINE = Join(ANY, LEFT, )` st **Syntax** -``` sql +```sql joinGet(join_storage_table_name, `value_column`, join_keys) ``` @@ -1852,13 +1982,13 @@ More info about `join_use_nulls` in [Join operation](../../engines/table-engines Input table: -``` sql +```sql CREATE DATABASE db_test CREATE TABLE db_test.id_val(`id` UInt32, `val` UInt32) ENGINE = Join(ANY, LEFT, id) SETTINGS join_use_nulls = 1 INSERT INTO db_test.id_val VALUES (1,11)(2,12)(4,13) ``` -``` text +```text ┌─id─┬─val─┠│ 4 │ 13 │ │ 2 │ 12 │ @@ -1868,13 +1998,13 @@ INSERT INTO db_test.id_val VALUES (1,11)(2,12)(4,13) Query: -``` sql +```sql SELECT joinGet(db_test.id_val, 'val', toUInt32(number)) from numbers(4) SETTINGS join_use_nulls = 1 ``` Result: -``` text +```text ┌─joinGet(db_test.id_val, 'val', toUInt32(number))─┠│ 0 │ │ 11 │ @@ -1892,7 +2022,7 @@ This function is not available in ClickHouse Cloud. Evaluate an external catboost model. [CatBoost](https://catboost.ai) is an open-source gradient boosting library developed by Yandex for machine learning. Accepts a path to a catboost model and model arguments (features). Returns Float64. -``` sql +```sql SELECT feat1, ..., feat_n, catboostEvaluate('/path/to/model.bin', feat_1, ..., feat_n) AS prediction FROM data_table ``` @@ -1905,7 +2035,7 @@ Before evaluating catboost models, the `libcatboostmodel.` library mus Next, specify the path to `libcatboostmodel.` in the clickhouse configuration: -``` xml +```xml ... /path/to/libcatboostmodel.so @@ -1918,7 +2048,7 @@ At the first execution of `catboostEvaluate()`, the server starts the library br communicate using a HTTP interface. By default, port `9012` is used. A different port can be specified as follows - this is useful if port `9012` is already assigned to a different service. -``` xml +```xml 9019 @@ -1942,13 +2072,13 @@ To use the `error_code` argument, configuration parameter `allow_custom_error_co **Example** -``` sql +```sql SELECT throwIf(number = 3, 'Too many') FROM numbers(10); ``` Result: -``` text +```text ↙ Progress: 0.00 rows, 0.00 B (0.00 rows/s., 0.00 B/s.) Received exception from server (version 19.14.1): Code: 395. DB::Exception: Received from localhost:9000. DB::Exception: Too many. ``` @@ -1959,7 +2089,7 @@ Returns its argument. Intended for debugging and testing. Allows to cancel using **Syntax** -``` sql +```sql identity(x) ``` @@ -1967,13 +2097,13 @@ identity(x) Query: -``` sql +```sql SELECT identity(42); ``` Result: -``` text +```text ┌─identity(42)─┠│ 42 │ └──────────────┘ @@ -2020,7 +2150,7 @@ Checks whether the [Decimal](../../sql-reference/data-types/decimal.md) value is **Syntax** -``` sql +```sql isDecimalOverflow(d, [p]) ``` @@ -2038,7 +2168,7 @@ isDecimalOverflow(d, [p]) Query: -``` sql +```sql SELECT isDecimalOverflow(toDecimal32(1000000000, 0), 9), isDecimalOverflow(toDecimal32(1000000000, 0)), isDecimalOverflow(toDecimal32(-1000000000, 0), 9), @@ -2047,7 +2177,7 @@ SELECT isDecimalOverflow(toDecimal32(1000000000, 0), 9), Result: -``` text +```text 1 1 1 1 ``` @@ -2057,7 +2187,7 @@ Returns number of decimal digits need to represent a value. **Syntax** -``` sql +```sql countDigits(x) ``` @@ -2079,7 +2209,7 @@ For `Decimal` values takes into account their scales: calculates result over und Query: -``` sql +```sql SELECT countDigits(toDecimal32(1, 9)), countDigits(toDecimal32(-1, 9)), countDigits(toDecimal64(1, 18)), countDigits(toDecimal64(-1, 18)), countDigits(toDecimal128(1, 38)), countDigits(toDecimal128(-1, 38)); @@ -2087,7 +2217,7 @@ SELECT countDigits(toDecimal32(1, 9)), countDigits(toDecimal32(-1, 9)), Result: -``` text +```text 10 10 19 19 39 39 ``` @@ -2099,13 +2229,13 @@ Type: [LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md) **Syntax** -``` sql +```sql errorCodeToName(1) ``` Result: -``` text +```text UNSUPPORTED_METHOD ``` @@ -2116,7 +2246,7 @@ If executed in the context of a distributed table, this function generates a nor **Syntax** -``` sql +```sql tcpPort() ``` @@ -2134,13 +2264,13 @@ Type: [UInt16](../../sql-reference/data-types/int-uint.md). Query: -``` sql +```sql SELECT tcpPort(); ``` Result: -``` text +```text ┌─tcpPort()─┠│ 9000 │ └───────────┘ @@ -2158,7 +2288,7 @@ The command [SET PROFILE](../../sql-reference/statements/set.md#query-set) could **Syntax** -``` sql +```sql currentProfiles() ``` @@ -2170,11 +2300,11 @@ Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-refere ## enabledProfiles - Returns settings profiles, assigned to the current user both explicitly and implicitly. Explicitly assigned profiles are the same as returned by the [currentProfiles](#current-profiles) function. Implicitly assigned profiles include parent profiles of other assigned profiles, profiles assigned via granted roles, profiles assigned via their own settings, and the main default profile (see the `default_profile` section in the main server configuration file). +Returns settings profiles, assigned to the current user both explicitly and implicitly. Explicitly assigned profiles are the same as returned by the [currentProfiles](#current-profiles) function. Implicitly assigned profiles include parent profiles of other assigned profiles, profiles assigned via granted roles, profiles assigned via their own settings, and the main default profile (see the `default_profile` section in the main server configuration file). **Syntax** -``` sql +```sql enabledProfiles() ``` @@ -2190,7 +2320,7 @@ Returns all the profiles specified at the current user's definition (see [CREATE **Syntax** -``` sql +```sql defaultProfiles() ``` @@ -2206,7 +2336,7 @@ Returns the roles assigned to the current user. The roles can be changed by the **Syntax** -``` sql +```sql currentRoles() ``` @@ -2222,7 +2352,7 @@ Returns the names of the current roles and the roles, granted to some of the cur **Syntax** -``` sql +```sql enabledRoles() ``` @@ -2238,7 +2368,7 @@ Returns the roles which are enabled by default for the current user when he logs **Syntax** -``` sql +```sql defaultRoles() ``` @@ -2254,7 +2384,7 @@ Returns the server port number. When the port is not used by the server, throws **Syntax** -``` sql +```sql getServerPort(port_name) ``` @@ -2262,16 +2392,16 @@ getServerPort(port_name) - `port_name` — The name of the server port. [String](../../sql-reference/data-types/string.md#string). Possible values: - - 'tcp_port' - - 'tcp_port_secure' - - 'http_port' - - 'https_port' - - 'interserver_http_port' - - 'interserver_https_port' - - 'mysql_port' - - 'postgresql_port' - - 'grpc_port' - - 'prometheus.port' + - 'tcp_port' + - 'tcp_port_secure' + - 'http_port' + - 'https_port' + - 'interserver_http_port' + - 'interserver_https_port' + - 'mysql_port' + - 'postgresql_port' + - 'grpc_port' + - 'prometheus.port' **Returned value** @@ -2283,13 +2413,13 @@ Type: [UInt16](../../sql-reference/data-types/int-uint.md). Query: -``` sql +```sql SELECT getServerPort('tcp_port'); ``` Result: -``` text +```text ┌─getServerPort('tcp_port')─┠│ 9000 │ └───────────────────────────┘ @@ -2303,7 +2433,7 @@ In contrast to [initialQueryID](#initial-query-id) function, `queryID` can retur **Syntax** -``` sql +```sql queryID() ``` @@ -2317,7 +2447,7 @@ Type: [String](../../sql-reference/data-types/string.md) Query: -``` sql +```sql CREATE TABLE tmp (str String) ENGINE = Log; INSERT INTO tmp (*) VALUES ('a'); SELECT count(DISTINCT t) FROM (SELECT queryID() AS t FROM remote('127.0.0.{1..3}', currentDatabase(), 'tmp') GROUP BY queryID()); @@ -2325,7 +2455,7 @@ SELECT count(DISTINCT t) FROM (SELECT queryID() AS t FROM remote('127.0.0.{1..3} Result: -``` text +```text ┌─count()─┠│ 3 │ └─────────┘ @@ -2339,7 +2469,7 @@ In contrast to [queryID](#query-id) function, `initialQueryID` returns the same **Syntax** -``` sql +```sql initialQueryID() ``` @@ -2353,7 +2483,7 @@ Type: [String](../../sql-reference/data-types/string.md) Query: -``` sql +```sql CREATE TABLE tmp (str String) ENGINE = Log; INSERT INTO tmp (*) VALUES ('a'); SELECT count(DISTINCT t) FROM (SELECT initialQueryID() AS t FROM remote('127.0.0.{1..3}', currentDatabase(), 'tmp') GROUP BY queryID()); @@ -2361,7 +2491,7 @@ SELECT count(DISTINCT t) FROM (SELECT initialQueryID() AS t FROM remote('127.0.0 Result: -``` text +```text ┌─count()─┠│ 1 │ └─────────┘ @@ -2374,7 +2504,7 @@ If a query is not distributed then constant value `0` is returned. **Syntax** -``` sql +```sql shardNum() ``` @@ -2390,7 +2520,7 @@ In the following example a configuration with two shards is used. The query is e Query: -``` sql +```sql CREATE TABLE shard_num_example (dummy UInt8) ENGINE=Distributed(test_cluster_two_shards_localhost, system, one, dummy); SELECT dummy, shardNum(), shardCount() FROM shard_num_example; @@ -2398,7 +2528,7 @@ SELECT dummy, shardNum(), shardCount() FROM shard_num_example; Result: -``` text +```text ┌─dummy─┬─shardNum()─┬─shardCount()─┠│ 0 │ 2 │ 2 │ │ 0 │ 1 │ 2 │ @@ -2416,7 +2546,7 @@ If a query is not distributed then constant value `0` is returned. **Syntax** -``` sql +```sql shardCount() ``` @@ -2436,7 +2566,7 @@ Returns a string with the current OS kernel version. **Syntax** -``` sql +```sql getOSKernelVersion() ``` @@ -2454,13 +2584,13 @@ Type: [String](../../sql-reference/data-types/string.md). Query: -``` sql +```sql SELECT getOSKernelVersion(); ``` Result: -``` text +```text ┌─getOSKernelVersion()────┠│ Linux 4.15.0-55-generic │ └─────────────────────────┘ @@ -2472,7 +2602,7 @@ Returns the uptime of the current ZooKeeper session in seconds. **Syntax** -``` sql +```sql zookeeperSessionUptime() ``` @@ -2490,13 +2620,13 @@ Type: [UInt32](../../sql-reference/data-types/int-uint.md). Query: -``` sql +```sql SELECT zookeeperSessionUptime(); ``` Result: -``` text +```text ┌─zookeeperSessionUptime()─┠│ 286 │ └──────────────────────────┘ @@ -2508,7 +2638,7 @@ Generates random table structure in a format `column1_name column1_type, column2 **Syntax** -``` sql +```sql generateRandomStructure([number_of_columns, seed]) ``` @@ -2529,13 +2659,13 @@ Type: [String](../../sql-reference/data-types/string.md). Query: -``` sql +```sql SELECT generateRandomStructure() ``` Result: -``` text +```text ┌─generateRandomStructure()─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┠│ c1 Decimal32(5), c2 Date, c3 Tuple(LowCardinality(String), Int128, UInt64, UInt16, UInt8, IPv6), c4 Array(UInt128), c5 UInt32, c6 IPv4, c7 Decimal256(64), c8 Decimal128(3), c9 UInt256, c10 UInt64, c11 DateTime │ └───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ @@ -2543,13 +2673,13 @@ Result: Query: -``` sql +```sql SELECT generateRandomStructure(1) ``` Result: -``` text +```text ┌─generateRandomStructure(1)─┠│ c1 Map(UInt256, UInt16) │ └────────────────────────────┘ @@ -2557,13 +2687,13 @@ Result: Query: -``` sql +```sql SELECT generateRandomStructure(NULL, 33) ``` Result: -``` text +```text ┌─generateRandomStructure(NULL, 33)─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┠│ c1 DateTime, c2 Enum8('c2V0' = 0, 'c2V1' = 1, 'c2V2' = 2, 'c2V3' = 3), c3 LowCardinality(Nullable(FixedString(30))), c4 Int16, c5 Enum8('c5V0' = 0, 'c5V1' = 1, 'c5V2' = 2, 'c5V3' = 3), c6 Nullable(UInt8), c7 String, c8 Nested(e1 IPv4, e2 UInt8, e3 UInt16, e4 UInt16, e5 Int32, e6 Map(Date, Decimal256(70))) │ └────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ @@ -2579,7 +2709,7 @@ Converts ClickHouse table structure to CapnProto schema. **Syntax** -``` sql +```sql structureToCapnProtoSchema(structure) ``` @@ -2590,7 +2720,7 @@ structureToCapnProtoSchema(structure) **Returned value** -- CapnProto schema +- CapnProto schema Type: [String](../../sql-reference/data-types/string.md). @@ -2598,13 +2728,13 @@ Type: [String](../../sql-reference/data-types/string.md). Query: -``` sql +```sql SELECT structureToCapnProtoSchema('column1 String, column2 UInt32, column3 Array(String)') FORMAT RawBLOB ``` Result: -``` text +```text @0xf96402dd754d0eb7; struct Message @@ -2617,13 +2747,13 @@ struct Message Query: -``` sql +```sql SELECT structureToCapnProtoSchema('column1 Nullable(String), column2 Tuple(element1 UInt32, element2 Array(String)), column3 Map(String, String)') FORMAT RawBLOB ``` Result: -``` text +```text @0xd1c8320fecad2b7f; struct Message @@ -2658,13 +2788,13 @@ struct Message Query: -``` sql +```sql SELECT structureToCapnProtoSchema('column1 String, column2 UInt32', 'Root') FORMAT RawBLOB ``` Result: -``` text +```text @0x96ab2d4ab133c6e1; struct Root @@ -2680,7 +2810,7 @@ Converts ClickHouse table structure to Protobuf schema. **Syntax** -``` sql +```sql structureToProtobufSchema(structure) ``` @@ -2699,13 +2829,13 @@ Type: [String](../../sql-reference/data-types/string.md). Query: -``` sql +```sql SELECT structureToProtobufSchema('column1 String, column2 UInt32, column3 Array(String)') FORMAT RawBLOB ``` Result: -``` text +```text syntax = "proto3"; message Message @@ -2718,13 +2848,13 @@ message Message Query: -``` sql +```sql SELECT structureToProtobufSchema('column1 Nullable(String), column2 Tuple(element1 UInt32, element2 Array(String)), column3 Map(String, String)') FORMAT RawBLOB ``` Result: -``` text +```text syntax = "proto3"; message Message @@ -2742,13 +2872,13 @@ message Message Query: -``` sql +```sql SELECT structureToProtobufSchema('column1 String, column2 UInt32', 'Root') FORMAT RawBLOB ``` Result: -``` text +```text syntax = "proto3"; message Root @@ -2920,7 +3050,7 @@ Calculates minimum required sample size for an A/B test comparing conversions (p **Syntax** -``` sql +```sql minSampleSizeConversion(baseline, mde, power, alpha) ``` @@ -2945,13 +3075,13 @@ A named [Tuple](../data-types/tuple.md) with 3 elements: The following query calculates the required sample size for an A/B test with baseline conversion of 25%, MDE of 3%, significance level of 5%, and the desired statistical power of 80%: -``` sql +```sql SELECT minSampleSizeConversion(0.25, 0.03, 0.80, 0.05) AS sample_size; ``` Result: -``` text +```text ┌─sample_size───────────────────┠│ (3396.077603219163,0.22,0.28) │ └───────────────────────────────┘ @@ -2963,7 +3093,7 @@ Calculates minimum required sample size for an A/B test comparing means of a con **Syntax** -``` sql +```sql minSampleSizeContinous(baseline, sigma, mde, power, alpha) ``` @@ -2975,7 +3105,7 @@ Uses the formula described in [this article](https://towardsdatascience.com/requ - `baseline` — Baseline value of a metric. [Integer](../data-types/int-uint.md) or [Float](../data-types/float.md). - `sigma` — Baseline standard deviation of a metric. [Integer](../data-types/int-uint.md) or [Float](../data-types/float.md). -- `mde` — Minimum detectable effect (MDE) as percentage of the baseline value (e.g. for a baseline value 112.25 the MDE 0.03 means an expected change to 112.25 ± 112.25*0.03). [Integer](../data-types/int-uint.md) or [Float](../data-types/float.md). +- `mde` — Minimum detectable effect (MDE) as percentage of the baseline value (e.g. for a baseline value 112.25 the MDE 0.03 means an expected change to 112.25 ± 112.25\*0.03). [Integer](../data-types/int-uint.md) or [Float](../data-types/float.md). - `power` — Required statistical power of a test (1 - probability of Type II error). [Integer](../data-types/int-uint.md) or [Float](../data-types/float.md). - `alpha` — Required significance level of a test (probability of Type I error). [Integer](../data-types/int-uint.md) or [Float](../data-types/float.md). @@ -2991,13 +3121,13 @@ A named [Tuple](../data-types/tuple.md) with 3 elements: The following query calculates the required sample size for an A/B test on a metric with baseline value of 112.25, standard deviation of 21.1, MDE of 3%, significance level of 5%, and the desired statistical power of 80%: -``` sql +```sql SELECT minSampleSizeContinous(112.25, 21.1, 0.03, 0.80, 0.05) AS sample_size; ``` Result: -``` text +```text ┌─sample_size───────────────────────────┠│ (616.2931945826209,108.8825,115.6175) │ └───────────────────────────────────────┘ diff --git a/docs/en/sql-reference/functions/random-functions.md b/docs/en/sql-reference/functions/random-functions.md index 6fd31e8d25c..2d7752ed022 100644 --- a/docs/en/sql-reference/functions/random-functions.md +++ b/docs/en/sql-reference/functions/random-functions.md @@ -11,79 +11,173 @@ elimination](../../sql-reference/functions/index.md#common-subexpression-elimina function return different random values. Related content + - Blog: [Generating random data in ClickHouse](https://clickhouse.com/blog/generating-random-test-distribution-data-for-clickhouse) :::note The random numbers are generated by non-cryptographic algorithms. ::: -## rand, rand32 +## rand -Returns a random UInt32 number, evenly distributed across the range of all possible UInt32 numbers. +Returns a random UInt32 number with uniform distribution. -Uses a linear congruential generator. +Uses a linear congruential generator with an initial state obtained from the system, which means that while it appears random, it's not truly random and can be predictable if the initial state is known. For scenarios where true randomness is crucial, consider using alternative methods like system-level calls or integrating with external libraries. + +### Syntax + +```sql +rand() +``` + +Alias: `rand32` + +### Arguments + +None. + +### Returned value + +Returns a number of type UInt32. + +### Example + +```sql +SELECT rand(); +``` + +```response +1569354847 -- Note: The actual output will be a random number, not the specific number shown in the example +``` ## rand64 -Returns a random UInt64 number, evenly distributed across the range of all possible UInt64 numbers. +Returns a random UInt64 integer (UInt64) number -Uses a linear congruential generator. +### Syntax + +```sql +rand64() +``` + +### Arguments + +None. + +### Returned value + +Returns a number UInt64 number with uniform distribution. + +Uses a linear congruential generator with an initial state obtained from the system, which means that while it appears random, it's not truly random and can be predictable if the initial state is known. For scenarios where true randomness is crucial, consider using alternative methods like system-level calls or integrating with external libraries. + +### Example + +```sql +SELECT rand64(); +``` + +```response +15030268859237645412 -- Note: The actual output will be a random number, not the specific number shown in the example. +``` ## randCanonical -Returns a random Float64 value, evenly distributed in interval [0, 1). +Returns a random Float64 number. + +### Syntax + +```sql +randCanonical() +``` + +### Arguments + +None. + +### Returned value + +Returns a Float64 value between 0 (inclusive) and 1 (exclusive). + +### Example + +```sql +SELECT randCanonical(); +``` + +```response +0.3452178901234567 - Note: The actual output will be a random Float64 number between 0 and 1, not the specific number shown in the example. +``` ## randConstant -Like `rand` but produces a constant column with a random value. +Generates a single constant column filled with a random value. Unlike `rand`, this function ensures the same random value appears in every row of the generated column, making it useful for scenarios requiring a consistent random seed across rows in a single query. -**Example** +### Syntax -``` sql -SELECT rand(), rand(1), rand(number), randConstant(), randConstant(1), randConstant(number) -FROM numbers(3) +```sql +randConstant([x]); ``` -Result: +### Arguments -``` result -┌─────rand()─┬────rand(1)─┬─rand(number)─┬─randConstant()─┬─randConstant(1)─┬─randConstant(number)─┠-│ 3047369878 │ 4132449925 │ 4044508545 │ 2740811946 │ 4229401477 │ 1924032898 │ -│ 2938880146 │ 1267722397 │ 4154983056 │ 2740811946 │ 4229401477 │ 1924032898 │ -│ 956619638 │ 4238287282 │ 1104342490 │ 2740811946 │ 4229401477 │ 1924032898 │ -└────────────┴────────────┴──────────────┴────────────────┴─────────────────┴──────────────────────┘ +- **[x] (Optional):** An optional expression that influences the generated random value. Even if provided, the resulting value will still be constant within the same query execution. Different queries using the same expression will likely generate different constant values. + +### Returned value + +Returns a column of type UInt32 containing the same random value in each row. + +### Implementation details + +The actual output will be different for each query execution, even with the same optional expression. The optional parameter may not significantly change the generated value compared to using `randConstant` alone. + +### Examples + +```sql +SELECT randConstant() AS random_value; +``` + +```response +| random_value | +|--------------| +| 1234567890 | +``` + +```sql +SELECT randConstant(10) AS random_value; +``` + +```response +| random_value | +|--------------| +| 9876543210 | ``` ## randUniform -Returns a random Float64 drawn uniformly from interval [`min`, `max`) ([continuous uniform distribution](https://en.wikipedia.org/wiki/Continuous_uniform_distribution)). +Returns a random Float64 drawn uniformly from interval [`min`, `max`]. -**Syntax** +### Syntax -``` sql +```sql randUniform(min, max) ``` -**Arguments** +### Arguments - `min` - `Float64` - left boundary of the range, - `max` - `Float64` - right boundary of the range. -**Returned value** +### Returned value -- Random number. +A random number of type [Float64](/docs/en/sql-reference/data-types/float.md). -Type: [Float64](/docs/en/sql-reference/data-types/float.md). +### Example -**Example** - -``` sql +```sql SELECT randUniform(5.5, 10) FROM numbers(5) ``` -Result: - -``` result +```response ┌─randUniform(5.5, 10)─┠│ 8.094978491443102 │ │ 7.3181248914450885 │ @@ -99,7 +193,7 @@ Returns a random Float64 drawn from a [normal distribution](https://en.wikipedia **Syntax** -``` sql +```sql randNormal(mean, variance) ``` @@ -116,13 +210,13 @@ Type: [Float64](/docs/en/sql-reference/data-types/float.md). **Example** -``` sql +```sql SELECT randNormal(10, 2) FROM numbers(5) ``` Result: -``` result +```result ┌──randNormal(10, 2)─┠│ 13.389228911709653 │ │ 8.622949707401295 │ @@ -138,7 +232,7 @@ Returns a random Float64 drawn from a [log-normal distribution](https://en.wikip **Syntax** -``` sql +```sql randLogNormal(mean, variance) ``` @@ -155,13 +249,13 @@ Type: [Float64](/docs/en/sql-reference/data-types/float.md). **Example** -``` sql +```sql SELECT randLogNormal(100, 5) FROM numbers(5) ``` Result: -``` result +```result ┌─randLogNormal(100, 5)─┠│ 1.295699673937363e48 │ │ 9.719869109186684e39 │ @@ -177,7 +271,7 @@ Returns a random UInt64 drawn from a [binomial distribution](https://en.wikipedi **Syntax** -``` sql +```sql randBinomial(experiments, probability) ``` @@ -194,13 +288,13 @@ Type: [UInt64](/docs/en/sql-reference/data-types/int-uint.md). **Example** -``` sql +```sql SELECT randBinomial(100, .75) FROM numbers(5) ``` Result: -``` result +```result ┌─randBinomial(100, 0.75)─┠│ 74 │ │ 78 │ @@ -216,7 +310,7 @@ Returns a random UInt64 drawn from a [negative binomial distribution](https://en **Syntax** -``` sql +```sql randNegativeBinomial(experiments, probability) ``` @@ -233,13 +327,13 @@ Type: [UInt64](/docs/en/sql-reference/data-types/int-uint.md). **Example** -``` sql +```sql SELECT randNegativeBinomial(100, .75) FROM numbers(5) ``` Result: -``` result +```result ┌─randNegativeBinomial(100, 0.75)─┠│ 33 │ │ 32 │ @@ -255,7 +349,7 @@ Returns a random UInt64 drawn from a [Poisson distribution](https://en.wikipedia **Syntax** -``` sql +```sql randPoisson(n) ``` @@ -271,13 +365,13 @@ Type: [UInt64](/docs/en/sql-reference/data-types/int-uint.md). **Example** -``` sql +```sql SELECT randPoisson(10) FROM numbers(5) ``` Result: -``` result +```result ┌─randPoisson(10)─┠│ 8 │ │ 8 │ @@ -293,7 +387,7 @@ Returns a random UInt64 drawn from a [Bernoulli distribution](https://en.wikiped **Syntax** -``` sql +```sql randBernoulli(probability) ``` @@ -309,13 +403,13 @@ Type: [UInt64](/docs/en/sql-reference/data-types/int-uint.md). **Example** -``` sql +```sql SELECT randBernoulli(.75) FROM numbers(5) ``` Result: -``` result +```result ┌─randBernoulli(0.75)─┠│ 1 │ │ 1 │ @@ -331,7 +425,7 @@ Returns a random Float64 drawn from a [exponential distribution](https://en.wiki **Syntax** -``` sql +```sql randExponential(lambda) ``` @@ -347,13 +441,13 @@ Type: [Float64](/docs/en/sql-reference/data-types/float.md). **Example** -``` sql +```sql SELECT randExponential(1/10) FROM numbers(5) ``` Result: -``` result +```result ┌─randExponential(divide(1, 10))─┠│ 44.71628934340778 │ │ 4.211013337903262 │ @@ -369,7 +463,7 @@ Returns a random Float64 drawn from a [Chi-square distribution](https://en.wikip **Syntax** -``` sql +```sql randChiSquared(degree_of_freedom) ``` @@ -385,13 +479,13 @@ Type: [Float64](/docs/en/sql-reference/data-types/float.md). **Example** -``` sql +```sql SELECT randChiSquared(10) FROM numbers(5) ``` Result: -``` result +```result ┌─randChiSquared(10)─┠│ 10.015463656521543 │ │ 9.621799919882768 │ @@ -407,7 +501,7 @@ Returns a random Float64 drawn from a [Student's t-distribution](https://en.wiki **Syntax** -``` sql +```sql randStudentT(degree_of_freedom) ``` @@ -423,13 +517,13 @@ Type: [Float64](/docs/en/sql-reference/data-types/float.md). **Example** -``` sql +```sql SELECT randStudentT(10) FROM numbers(5) ``` Result: -``` result +```result ┌─────randStudentT(10)─┠│ 1.2217309938538725 │ │ 1.7941971681200541 │ @@ -445,7 +539,7 @@ Returns a random Float64 drawn from a [F-distribution](https://en.wikipedia.org/ **Syntax** -``` sql +```sql randFisherF(d1, d2) ``` @@ -462,13 +556,13 @@ Type: [Float64](/docs/en/sql-reference/data-types/float.md). **Example** -``` sql +```sql SELECT randFisherF(10, 3) FROM numbers(5) ``` Result: -``` result +```result ┌──randFisherF(10, 3)─┠│ 7.286287504216609 │ │ 0.26590779413050386 │ @@ -484,7 +578,7 @@ Generates a string of the specified length filled with random bytes (including z **Syntax** -``` sql +```sql randomString(length) ``` @@ -502,13 +596,13 @@ Type: [String](../../sql-reference/data-types/string.md). Query: -``` sql +```sql SELECT randomString(30) AS str, length(str) AS len FROM numbers(2) FORMAT Vertical; ``` Result: -``` text +```text Row 1: ────── str: 3 G : pT ?w Ñ‚i k aV f6 @@ -526,7 +620,7 @@ Generates a binary string of the specified length filled with random bytes (incl **Syntax** -``` sql +```sql randomFixedString(length); ``` @@ -563,7 +657,7 @@ If you pass `length < 0`, the behavior of the function is undefined. **Syntax** -``` sql +```sql randomPrintableASCII(length) ``` @@ -579,11 +673,11 @@ Type: [String](../../sql-reference/data-types/string.md) **Example** -``` sql +```sql SELECT number, randomPrintableASCII(30) as str, length(str) FROM system.numbers LIMIT 3 ``` -``` text +```text ┌─number─┬─str────────────────────────────┬─length(randomPrintableASCII(30))─┠│ 0 │ SuiCOSTvC0csfABSw=UcSzp2.`rv8x │ 30 │ │ 1 │ 1Ag NlJ &RCN:*>HVPG;PE-nO"SUFD │ 30 │ @@ -597,7 +691,7 @@ Generates a random string of a specified length. Result string contains valid UT **Syntax** -``` sql +```sql randomStringUTF8(length); ``` @@ -635,11 +729,12 @@ Flips the bits of String or FixedString `s`, each with probability `prob`. **Syntax** -``` sql +```sql fuzzBits(s, prob) ``` **Arguments** + - `s` - `String` or `FixedString`, - `prob` - constant `Float32/64` between 0.0 and 1.0. @@ -649,14 +744,14 @@ Fuzzed string with same type as `s`. **Example** -``` sql +```sql SELECT fuzzBits(materialize('abacaba'), 0.1) FROM numbers(3) ``` Result: -``` result +```result ┌─fuzzBits(materialize('abacaba'), 0.1)─┠│ abaaaja │ │ a*cjab+ │ diff --git a/docs/en/sql-reference/functions/string-functions.md b/docs/en/sql-reference/functions/string-functions.md index 60cb3ac4ac4..b4e2adbed3c 100644 --- a/docs/en/sql-reference/functions/string-functions.md +++ b/docs/en/sql-reference/functions/string-functions.md @@ -4,6 +4,8 @@ sidebar_position: 170 sidebar_label: Strings --- +import VersionBadge from '@theme/badges/VersionBadge'; + # Functions for Working with Strings Functions for [searching](string-search-functions.md) in strings and for [replacing](string-replace-functions.md) in strings are described separately. @@ -556,6 +558,7 @@ substring(s, offset[, length]) Alias: - `substr` - `mid` +- `byteSlice` **Arguments** @@ -585,8 +588,41 @@ Result: ## substringUTF8 -Like `substring` but for Unicode code points. Assumes that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined. +Returns the substring of a string `s` which starts at the specified byte index `offset` for Unicode code points. Byte counting starts from `1`. If `offset` is `0`, an empty string is returned. If `offset` is negative, the substring starts `pos` characters from the end of the string, rather than from the beginning. An optional argument `length` specifies the maximum number of bytes the returned substring may have. +Assumes that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined. + +**Syntax** + +```sql +substringUTF8(s, offset[, length]) +``` + +**Arguments** + +- `s`: The string to calculate a substring from. [String](../../sql-reference/data-types/string.md), [FixedString](../../sql-reference/data-types/fixedstring.md) or [Enum](../../sql-reference/data-types/enum.md) +- `offset`: The starting position of the substring in `s` . [(U)Int*](../../sql-reference/data-types/int-uint.md). +- `length`: The maximum length of the substring. [(U)Int*](../../sql-reference/data-types/int-uint.md). Optional. + +**Returned value** + +A substring of `s` with `length` many bytes, starting at index `offset`. + +**Implementation details** + +Assumes that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined. + +**Example** + +```sql +SELECT 'Täglich grüßt das Murmeltier.' AS str, + substringUTF8(str, 9), + substringUTF8(str, 9, 5) +``` + +```response +Täglich grüßt das Murmeltier. grüßt das Murmeltier. grüßt +``` ## substringIndex @@ -621,7 +657,39 @@ Result: ## substringIndexUTF8 -Like `substringIndex` but for Unicode code points. Assumes that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined. +Returns the substring of `s` before `count` occurrences of the delimiter `delim`, specifically for Unicode code points. + +Assumes that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined. + +**Syntax** + +```sql +substringIndexUTF8(s, delim, count) +``` + +**Arguments** + +- `s`: The string to extract substring from. [String](../../sql-reference/data-types/string.md). +- `delim`: The character to split. [String](../../sql-reference/data-types/string.md). +- `count`: The number of occurrences of the delimiter to count before extracting the substring. If count is positive, everything to the left of the final delimiter (counting from the left) is returned. If count is negative, everything to the right of the final delimiter (counting from the right) is returned. [UInt or Int](../data-types/int-uint.md) + +**Returned value** + +A substring [String](../../sql-reference/data-types/string.md) of `s` before `count` occurrences of `delim`. + +**Implementation details** + +Assumes that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined. + +**Example** + +```sql +SELECT substringIndexUTF8('www.straßen-in-europa.de', '.', 2) +``` + +```response +www.straßen-in-europa +``` ## appendTrailingCharIfAbsent @@ -783,6 +851,8 @@ SELECT startsWith('Spider-Man', 'Spi'); ## startsWithUTF8 + + Returns whether string `str` starts with `prefix`, the difference between `startsWithUTF8` and `startsWith` is that `startsWithUTF8` match `str` and `suffix` by UTF-8 characters. diff --git a/docs/en/sql-reference/functions/time-series-functions.md b/docs/en/sql-reference/functions/time-series-functions.md index ce36c89f473..e80a3fa9860 100644 --- a/docs/en/sql-reference/functions/time-series-functions.md +++ b/docs/en/sql-reference/functions/time-series-functions.md @@ -22,8 +22,8 @@ seriesOutliersDetectTukey(series, min_percentile, max_percentile, K); **Arguments** - `series` - An array of numeric values. -- `min_percentile` - The minimum percentile to be used to calculate inter-quantile range [(IQR)](https://en.wikipedia.org/wiki/Interquartile_range). The value must be in range [2,98]. The default is 25. -- `max_percentile` - The maximum percentile to be used to calculate inter-quantile range (IQR). The value must be in range [2,98]. The default is 75. +- `min_percentile` - The minimum percentile to be used to calculate inter-quantile range [(IQR)](https://en.wikipedia.org/wiki/Interquartile_range). The value must be in range [0.02,0.98]. The default is 0.25. +- `max_percentile` - The maximum percentile to be used to calculate inter-quantile range (IQR). The value must be in range [0.02,0.98]. The default is 0.75. - `K` - Non-negative constant value to detect mild or stronger outliers. The default value is 1.5. At least four data points are required in `series` to detect outliers. @@ -53,7 +53,7 @@ Result: Query: ``` sql -SELECT seriesOutliersDetectTukey([-3, 2, 15, 3, 5, 6, 4.50, 5, 12, 45, 12, 3.40, 3, 4, 5, 6], 20, 80, 1.5) AS print_0; +SELECT seriesOutliersDetectTukey([-3, 2, 15, 3, 5, 6, 4.50, 5, 12, 45, 12, 3.40, 3, 4, 5, 6], 0.2, 0.8, 1.5) AS print_0; ``` Result: diff --git a/docs/en/sql-reference/functions/tuple-functions.md b/docs/en/sql-reference/functions/tuple-functions.md index 5930239dc56..b089de67e98 100644 --- a/docs/en/sql-reference/functions/tuple-functions.md +++ b/docs/en/sql-reference/functions/tuple-functions.md @@ -542,7 +542,7 @@ Alias: `scalarProduct`. - Scalar product. -Type: [Int/UInt](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). +Type: [Int/UInt](../../sql-reference/data-types/int-uint.md) or [Float](../../sql-reference/data-types/float.md). **Example** diff --git a/docs/en/sql-reference/functions/type-conversion-functions.md b/docs/en/sql-reference/functions/type-conversion-functions.md index 37d4ac30648..ea08ffa50e7 100644 --- a/docs/en/sql-reference/functions/type-conversion-functions.md +++ b/docs/en/sql-reference/functions/type-conversion-functions.md @@ -968,7 +968,7 @@ Converts a numeric value to String with the number of fractional digits in the o toDecimalString(number, scale) ``` -**Parameters** +**Arguments** - `number` — Value to be represented as String, [Int, UInt](/docs/en/sql-reference/data-types/int-uint.md), [Float](/docs/en/sql-reference/data-types/float.md), [Decimal](/docs/en/sql-reference/data-types/decimal.md), - `scale` — Number of fractional digits, [UInt8](/docs/en/sql-reference/data-types/int-uint.md). @@ -1261,7 +1261,7 @@ Converts input value `x` to the specified data type `T`. Always returns [Nullabl accurateCastOrNull(x, T) ``` -**Parameters** +**Arguments** - `x` — Input value. - `T` — The name of the returned data type. @@ -1314,7 +1314,7 @@ Converts input value `x` to the specified data type `T`. Returns default type va accurateCastOrDefault(x, T) ``` -**Parameters** +**Arguments** - `x` — Input value. - `T` — The name of the returned data type. @@ -1675,7 +1675,7 @@ Same as [parseDateTimeBestEffort](#parsedatetimebesteffort) function but also pa parseDateTime64BestEffort(time_string [, precision [, time_zone]]) ``` -**Parameters** +**Arguments** - `time_string` — String containing a date or date with time to convert. [String](/docs/en/sql-reference/data-types/string.md). - `precision` — Required precision. `3` — for milliseconds, `6` — for microseconds. Default — `3`. Optional. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). @@ -1990,7 +1990,7 @@ Extracts the timestamp component of a [Snowflake ID](https://en.wikipedia.org/wi snowflakeToDateTime(value[, time_zone]) ``` -**Parameters** +**Arguments** - `value` — Snowflake ID. [Int64](/docs/en/sql-reference/data-types/int-uint.md). - `time_zone` — [Timezone](/docs/en/operations/server-configuration-parameters/settings.md/#server_configuration_parameters-timezone). The function parses `time_string` according to the timezone. Optional. [String](/docs/en/sql-reference/data-types/string.md). @@ -2026,7 +2026,7 @@ Extracts the timestamp component of a [Snowflake ID](https://en.wikipedia.org/wi snowflakeToDateTime64(value[, time_zone]) ``` -**Parameters** +**Arguments** - `value` — Snowflake ID. [Int64](/docs/en/sql-reference/data-types/int-uint.md). - `time_zone` — [Timezone](/docs/en/operations/server-configuration-parameters/settings.md/#server_configuration_parameters-timezone). The function parses `time_string` according to the timezone. Optional. [String](/docs/en/sql-reference/data-types/string.md). @@ -2062,7 +2062,7 @@ Converts a [DateTime](/docs/en/sql-reference/data-types/datetime.md) value to th dateTimeToSnowflake(value) ``` -**Parameters** +**Arguments** - `value` — Date with time. [DateTime](/docs/en/sql-reference/data-types/datetime.md). @@ -2096,7 +2096,7 @@ Convert a [DateTime64](/docs/en/sql-reference/data-types/datetime64.md) to the f dateTime64ToSnowflake(value) ``` -**Parameters** +**Arguments** - `value` — Date with time. [DateTime64](/docs/en/sql-reference/data-types/datetime64.md). diff --git a/docs/en/sql-reference/functions/url-functions.md b/docs/en/sql-reference/functions/url-functions.md index f6871c86c4f..ac81815b47f 100644 --- a/docs/en/sql-reference/functions/url-functions.md +++ b/docs/en/sql-reference/functions/url-functions.md @@ -155,7 +155,7 @@ Configuration example: cutToFirstSignificantSubdomain(URL, TLD) ``` -**Parameters** +**Arguments** - `URL` — URL. [String](../../sql-reference/data-types/string.md). - `TLD` — Custom TLD list name. [String](../../sql-reference/data-types/string.md). @@ -209,7 +209,7 @@ Configuration example: cutToFirstSignificantSubdomainCustomWithWWW(URL, TLD) ``` -**Parameters** +**Arguments** - `URL` — URL. [String](../../sql-reference/data-types/string.md). - `TLD` — Custom TLD list name. [String](../../sql-reference/data-types/string.md). @@ -263,7 +263,7 @@ Configuration example: firstSignificantSubdomainCustom(URL, TLD) ``` -**Parameters** +**Arguments** - `URL` — URL. [String](../../sql-reference/data-types/string.md). - `TLD` — Custom TLD list name. [String](../../sql-reference/data-types/string.md). diff --git a/docs/en/sql-reference/operators/index.md b/docs/en/sql-reference/operators/index.md index 120e464e009..31bf43e8b35 100644 --- a/docs/en/sql-reference/operators/index.md +++ b/docs/en/sql-reference/operators/index.md @@ -353,7 +353,7 @@ For efficiency, the `and` and `or` functions accept any number of arguments. The ClickHouse supports the `IS NULL` and `IS NOT NULL` operators. -### IS NULL +### IS NULL {#is_null} - For [Nullable](../../sql-reference/data-types/nullable.md) type values, the `IS NULL` operator returns: - `1`, if the value is `NULL`. @@ -374,7 +374,7 @@ SELECT x+100 FROM t_null WHERE y IS NULL └──────────────┘ ``` -### IS NOT NULL +### IS NOT NULL {#is_not_null} - For [Nullable](../../sql-reference/data-types/nullable.md) type values, the `IS NOT NULL` operator returns: - `0`, if the value is `NULL`. diff --git a/docs/en/sql-reference/statements/alter/apply-deleted-mask.md b/docs/en/sql-reference/statements/alter/apply-deleted-mask.md index 7a11d66e739..1afc2a0ff5a 100644 --- a/docs/en/sql-reference/statements/alter/apply-deleted-mask.md +++ b/docs/en/sql-reference/statements/alter/apply-deleted-mask.md @@ -10,7 +10,7 @@ sidebar_label: APPLY DELETED MASK ALTER TABLE [db].name [ON CLUSTER cluster] APPLY DELETED MASK [IN PARTITION partition_id] ``` -The command applies mask created by [lightweight delete](/docs/en/sql-reference/statements/delete) and forcefully removes rows marked as deleted from disk. This command is a heavyweight mutation and it semantically equals to query ```ALTER TABLE [db].name DELETE WHERE _row_exists = 0```. +The command applies mask created by [lightweight delete](/docs/en/sql-reference/statements/delete) and forcefully removes rows marked as deleted from disk. This command is a heavyweight mutation, and it semantically equals to query ```ALTER TABLE [db].name DELETE WHERE _row_exists = 0```. :::note It only works for tables in the [`MergeTree`](../../../engines/table-engines/mergetree-family/mergetree.md) family (including [replicated](../../../engines/table-engines/mergetree-family/replication.md) tables). diff --git a/docs/en/sql-reference/statements/alter/column.md b/docs/en/sql-reference/statements/alter/column.md index f6d9668e628..a23710b12bd 100644 --- a/docs/en/sql-reference/statements/alter/column.md +++ b/docs/en/sql-reference/statements/alter/column.md @@ -272,10 +272,16 @@ ALTER TABLE table_name MODIFY COLUMN column_name RESET SETTING max_compress_bloc ## MATERIALIZE COLUMN -Materializes or updates a column with an expression for a default value (`DEFAULT` or `MATERIALIZED`). -It is used if it is necessary to add or update a column with a complicated expression, because evaluating such an expression directly on `SELECT` executing turns out to be expensive. +Materializes a column with a `DEFAULT` or `MATERIALIZED` value expression. +This statement can be used to rewrite existing column data after a `DEFAULT` or `MATERIALIZED` expression has been added or updated (which only updates the metadata but does not change existing data). Implemented as a [mutation](/docs/en/sql-reference/statements/alter/index.md#mutations). +For columns with a new or updated `MATERIALIZED` value expression, all existing rows are rewritten. + +For columns with a new or updated `DEFAULT` value expression, the behavior depends on the ClickHouse version: +- In ClickHouse < v24.2, all existing rows are rewritten. +- ClickHouse >= v24.2 distinguishes if a row value in a column with `DEFAULT` value expression was explicitly specified when it was inserted, or not, i.e. calculated from the `DEFAULT` value expression. If the value was explicitly specified, ClickHouse keeps it as is. If the value was was calculated, ClickHouse changes it to the new or updated `MATERIALIZED` value expression. + Syntax: ```sql @@ -329,7 +335,7 @@ The `ALTER` query lets you create and delete separate elements (columns) in nest There is no support for deleting columns in the primary key or the sampling key (columns that are used in the `ENGINE` expression). Changing the type for columns that are included in the primary key is only possible if this change does not cause the data to be modified (for example, you are allowed to add values to an Enum or to change a type from `DateTime` to `UInt32`). -If the `ALTER` query is not sufficient to make the table changes you need, you can create a new table, copy the data to it using the [INSERT SELECT](/docs/en/sql-reference/statements/insert-into.md/#inserting-the-results-of-select) query, then switch the tables using the [RENAME](/docs/en/sql-reference/statements/rename.md/#rename-table) query and delete the old table. You can use the [clickhouse-copier](/docs/en/operations/utilities/clickhouse-copier.md) as an alternative to the `INSERT SELECT` query. +If the `ALTER` query is not sufficient to make the table changes you need, you can create a new table, copy the data to it using the [INSERT SELECT](/docs/en/sql-reference/statements/insert-into.md/#inserting-the-results-of-select) query, then switch the tables using the [RENAME](/docs/en/sql-reference/statements/rename.md/#rename-table) query and delete the old table. The `ALTER` query blocks all reads and writes for the table. In other words, if a long `SELECT` is running at the time of the `ALTER` query, the `ALTER` query will wait for it to complete. At the same time, all new queries to the same table will wait while this `ALTER` is running. diff --git a/docs/en/sql-reference/statements/alter/constraint.md b/docs/en/sql-reference/statements/alter/constraint.md index 7a8f5809320..29675f704b5 100644 --- a/docs/en/sql-reference/statements/alter/constraint.md +++ b/docs/en/sql-reference/statements/alter/constraint.md @@ -15,7 +15,7 @@ ALTER TABLE [db].name [ON CLUSTER cluster] DROP CONSTRAINT constraint_name; See more on [constraints](../../../sql-reference/statements/create/table.md#constraints). -Queries will add or remove metadata about constraints from table so they are processed immediately. +Queries will add or remove metadata about constraints from table, so they are processed immediately. :::tip Constraint check **will not be executed** on existing data if it was added. diff --git a/docs/en/sql-reference/statements/alter/partition.md b/docs/en/sql-reference/statements/alter/partition.md index 114b8d5ffe3..a21ef4f4af5 100644 --- a/docs/en/sql-reference/statements/alter/partition.md +++ b/docs/en/sql-reference/statements/alter/partition.md @@ -9,6 +9,7 @@ The following operations with [partitions](/docs/en/engines/table-engines/merget - [DETACH PARTITION\|PART](#detach-partitionpart) — Moves a partition or part to the `detached` directory and forget it. - [DROP PARTITION\|PART](#drop-partitionpart) — Deletes a partition or part. +- [FORGET PARTITION](#forget-partition) — Deletes a partition metadata from zookeeper if it's empty. - [ATTACH PARTITION\|PART](#attach-partitionpart) — Adds a partition or part from the `detached` directory to the table. - [ATTACH PARTITION FROM](#attach-partition-from) — Copies the data partition from one table to another and adds. - [REPLACE PARTITION](#replace-partition) — Copies the data partition from one table to another and replaces. @@ -73,6 +74,22 @@ ALTER TABLE table_name [ON CLUSTER cluster] DROP DETACHED PARTITION|PART partiti Removes the specified part or all parts of the specified partition from `detached`. Read more about setting the partition expression in a section [How to set the partition expression](#how-to-set-partition-expression). +## FORGET PARTITION + +``` sql +ALTER TABLE table_name FORGET PARTITION partition_expr +``` + +Removes all metadata about an empty partition from ZooKeeper. Query fails if partition is not empty or unknown. Make sure to execute only for partitions that will never be used again. + +Read about setting the partition expression in a section [How to set the partition expression](#how-to-set-partition-expression). + +Example: + +``` sql +ALTER TABLE mt FORGET PARTITION '20201121'; +``` + ## ATTACH PARTITION\|PART ``` sql diff --git a/docs/en/sql-reference/statements/create/view.md b/docs/en/sql-reference/statements/create/view.md index 028d0b09a1a..073a3c0d246 100644 --- a/docs/en/sql-reference/statements/create/view.md +++ b/docs/en/sql-reference/statements/create/view.md @@ -13,7 +13,9 @@ Creates a new view. Views can be [normal](#normal-view), [materialized](#materia Syntax: ``` sql -CREATE [OR REPLACE] VIEW [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster_name] AS SELECT ... +CREATE [OR REPLACE] VIEW [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster_name] +[DEFINER = { user | CURRENT_USER }] [SQL SECURITY { DEFINER | INVOKER | NONE }] +AS SELECT ... ``` Normal views do not store any data. They just perform a read from another table on each access. In other words, a normal view is nothing more than a saved query. When reading from a view, this saved query is used as a subquery in the [FROM](../../../sql-reference/statements/select/from.md) clause. @@ -52,7 +54,9 @@ SELECT * FROM view(column1=value1, column2=value2 ...) ## Materialized View ``` sql -CREATE MATERIALIZED VIEW [IF NOT EXISTS] [db.]table_name [ON CLUSTER] [TO[db.]name] [ENGINE = engine] [POPULATE] AS SELECT ... +CREATE MATERIALIZED VIEW [IF NOT EXISTS] [db.]table_name [ON CLUSTER] [TO[db.]name] [ENGINE = engine] [POPULATE] +[DEFINER = { user | CURRENT_USER }] [SQL SECURITY { DEFINER | INVOKER | NONE }] +AS SELECT ... ``` :::tip @@ -91,6 +95,49 @@ Views look the same as normal tables. For example, they are listed in the result To delete a view, use [DROP VIEW](../../../sql-reference/statements/drop.md#drop-view). Although `DROP TABLE` works for VIEWs as well. +## SQL security {#sql_security} + +`DEFINER` and `SQL SECURITY` allow you to specify which ClickHouse user to use when executing the view's underlying query. +`SQL SECURITY` has three legal values: `DEFINER`, `INVOKER`, or `NONE`. You can specify any existing user or `CURRENT_USER` in the `DEFINER` clause. + +The following table will explain which rights are required for which user in order to select from view. +Note that regardless of the SQL security option, in every case it is still required to have `GRANT SELECT ON ` in order to read from it. + +| SQL security option | View | Materialized View | +|---------------------|-----------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------| +| `DEFINER alice` | `alice` must have a `SELECT` grant for the view's source table. | `alice` must have a `SELECT` grant for the view's source table and an `INSERT` grant for the view's target table. | +| `INVOKER` | User must have a `SELECT` grant for the view's source table. | `SQL SECURITY INVOKER` can't be specified for materialized views. | +| `NONE` | - | - | + +:::note +`SQL SECURITY NONE` is a deprecated option. Any user with the rights to create views with `SQL SECURITY NONE` will be able to execute any arbitrary query. +Thus, it is required to have `GRANT ALLOW SQL SECURITY NONE TO ` in order to create a view with this option. +::: + +If `DEFINER`/`SQL SECURITY` aren't specified, the default values are used: +- `SQL SECURITY`: `INVOKER` for normal views and `DEFINER` for materialized views ([configurable by settings](../../../operations/settings/settings.md#default_normal_view_sql_security)) +- `DEFINER`: `CURRENT_USER` ([configurable by settings](../../../operations/settings/settings.md#default_view_definer)) + +If a view is attached without `DEFINER`/`SQL SECURITY` specified, the default value is `SQL SECURITY NONE` for the materialized view and `SQL SECURITY INVOKER` for the normal view. + +To change SQL security for an existing view, use +```sql +ALTER TABLE MODIFY SQL SECURITY { DEFINER | INVOKER | NONE } [DEFINER = { user | CURRENT_USER }] +``` + +### Examples sql security +```sql +CREATE test_view +DEFINER = alice SQL SECURITY DEFINER +AS SELECT ... +``` + +```sql +CREATE test_view +SQL SECURITY INVOKER +AS SELECT ... +``` + ## Live View [Deprecated] This feature is deprecated and will be removed in the future. diff --git a/docs/en/sql-reference/statements/detach.md b/docs/en/sql-reference/statements/detach.md index 938a5f9c3cb..e88e625aed1 100644 --- a/docs/en/sql-reference/statements/detach.md +++ b/docs/en/sql-reference/statements/detach.md @@ -16,13 +16,13 @@ DETACH TABLE|VIEW|DICTIONARY|DATABASE [IF EXISTS] [db.]name [ON CLUSTER cluster] Detaching does not delete the data or metadata of a table, a materialized view, a dictionary or a database. If an entity was not detached `PERMANENTLY`, on the next server launch the server will read the metadata and recall the table/view/dictionary/database again. If an entity was detached `PERMANENTLY`, there will be no automatic recall. Whether a table, a dictionary or a database was detached permanently or not, in both cases you can reattach them using the [ATTACH](../../sql-reference/statements/attach.md) query. -System log tables can be also attached back (e.g. `query_log`, `text_log`, etc). Other system tables can't be reattached. On the next server launch the server will recall those tables again. +System log tables can be also attached back (e.g. `query_log`, `text_log`, etc.). Other system tables can't be reattached. On the next server launch the server will recall those tables again. `ATTACH MATERIALIZED VIEW` does not work with short syntax (without `SELECT`), but you can attach it using the `ATTACH TABLE` query. Note that you can not detach permanently the table which is already detached (temporary). But you can attach it back and then detach permanently again. -Also you can not [DROP](../../sql-reference/statements/drop.md#drop-table) the detached table, or [CREATE TABLE](../../sql-reference/statements/create/table.md) with the same name as detached permanently, or replace it with the other table with [RENAME TABLE](../../sql-reference/statements/rename.md) query. +Also, you can not [DROP](../../sql-reference/statements/drop.md#drop-table) the detached table, or [CREATE TABLE](../../sql-reference/statements/create/table.md) with the same name as detached permanently, or replace it with the other table with [RENAME TABLE](../../sql-reference/statements/rename.md) query. The `SYNC` modifier executes the action without delay. diff --git a/docs/en/sql-reference/statements/grant.md b/docs/en/sql-reference/statements/grant.md index e6073f3523a..a93db29e82c 100644 --- a/docs/en/sql-reference/statements/grant.md +++ b/docs/en/sql-reference/statements/grant.md @@ -114,6 +114,7 @@ Hierarchy of privileges: - `ALTER VIEW` - `ALTER VIEW REFRESH` - `ALTER VIEW MODIFY QUERY` + - `ALTER VIEW MODIFY SQL SECURITY` - [CREATE](#grant-create) - `CREATE DATABASE` - `CREATE TABLE` @@ -201,6 +202,13 @@ Hierarchy of privileges: - `S3` - [dictGet](#grant-dictget) - [displaySecretsInShowAndSelect](#grant-display-secrets) +- [NAMED COLLECTION ADMIN](#grant-named-collection-admin) + - `CREATE NAMED COLLECTION` + - `DROP NAMED COLLECTION` + - `ALTER NAMED COLLECTION` + - `SHOW NAMED COLLECTIONS` + - `SHOW NAMED COLLECTIONS SECRETS` + - `NAMED COLLECTION` Examples of how this hierarchy is treated: @@ -307,6 +315,7 @@ Allows executing [ALTER](../../sql-reference/statements/alter/index.md) queries - `ALTER VIEW` Level: `GROUP` - `ALTER VIEW REFRESH`. Level: `VIEW`. Aliases: `ALTER LIVE VIEW REFRESH`, `REFRESH VIEW` - `ALTER VIEW MODIFY QUERY`. Level: `VIEW`. Aliases: `ALTER TABLE MODIFY QUERY` + - `ALTER VIEW MODIFY SQL SECURITY`. Level: `VIEW`. Aliases: `ALTER TABLE MODIFY SQL SECURITY` Examples of how this hierarchy is treated: @@ -409,6 +418,7 @@ Allows a user to execute queries that manage users, roles and row policies. - `SHOW_ROW_POLICIES`. Level: `GLOBAL`. Aliases: `SHOW POLICIES`, `SHOW CREATE ROW POLICY`, `SHOW CREATE POLICY` - `SHOW_QUOTAS`. Level: `GLOBAL`. Aliases: `SHOW CREATE QUOTA` - `SHOW_SETTINGS_PROFILES`. Level: `GLOBAL`. Aliases: `SHOW PROFILES`, `SHOW CREATE SETTINGS PROFILE`, `SHOW CREATE PROFILE` + - `ALLOW SQL SECURITY NONE`. Level: `GLOBAL`. Aliases: `CREATE SQL SECURITY NONE`, `SQL SECURITY NONE`, `SECURITY NONE` The `ROLE ADMIN` privilege allows a user to assign and revoke any roles including those which are not assigned to the user with the admin option. @@ -495,6 +505,25 @@ and [`format_display_secrets_in_show_and_select` format setting](../../operations/settings/formats#format_display_secrets_in_show_and_select) are turned on. +### NAMED COLLECTION ADMIN + +Allows a certain operation on a specified named collection. Before version 23.7 it was called NAMED COLLECTION CONTROL, and after 23.7 NAMED COLLECTION ADMIN was added and NAMED COLLECTION CONTROL is preserved as an alias. + +- `NAMED COLLECTION ADMIN`. Level: `NAMED_COLLECTION`. Aliases: `NAMED COLLECTION CONTROL` + - `CREATE NAMED COLLECTION`. Level: `NAMED_COLLECTION` + - `DROP NAMED COLLECTION`. Level: `NAMED_COLLECTION` + - `ALTER NAMED COLLECTION`. Level: `NAMED_COLLECTION` + - `SHOW NAMED COLLECTIONS`. Level: `NAMED_COLLECTION`. Aliases: `SHOW NAMED COLLECTIONS` + - `SHOW NAMED COLLECTIONS SECRETS`. Level: `NAMED_COLLECTION`. Aliases: `SHOW NAMED COLLECTIONS SECRETS` + - `NAMED COLLECTION`. Level: `NAMED_COLLECTION`. Aliases: `NAMED COLLECTION USAGE, USE NAMED COLLECTION` + +Unlike all other grants (CREATE, DROP, ALTER, SHOW) grant NAMED COLLECTION was added only in 23.7, while all others were added earlier - in 22.12. + +**Examples** + +Assuming a named collection is called abc, we grant privilege CREATE NAMED COLLECTION to user john. +- `GRANT CREATE NAMED COLLECTION ON abc TO john` + ### ALL Grants all the privileges on regulated entity to a user account or a role. diff --git a/docs/en/sql-reference/statements/insert-into.md b/docs/en/sql-reference/statements/insert-into.md index f9d93305071..a76692cf291 100644 --- a/docs/en/sql-reference/statements/insert-into.md +++ b/docs/en/sql-reference/statements/insert-into.md @@ -176,7 +176,7 @@ INSERT INTO infile_globs FROM INFILE 'input_?.csv' FORMAT CSV; ``` ::: -## Inserting into Table Function +## Inserting using a Table Function Data can be inserted into tables referenced by [table functions](../../sql-reference/table-functions/index.md). @@ -204,6 +204,26 @@ Result: └─────┴───────────────────────┘ ``` +## Inserting into ClickHouse Cloud + +By default, services on ClickHouse Cloud provide multiple replicas for high availability. When you connect to a service, a connection is established to one of these replicas. + +After an `INSERT` succeeds, data is written to the underlying storage. However, it may take some time for replicas to receive these updates. Therefore, if you use a different connection that executes a `SELECT` query on one of these other replicas, the updated data may not yet be reflected. + +It is possible to use the `select_sequential_consistency` to force the replica to receive the latest updates. Here is an example of a SELECT query using this setting: + +```sql +SELECT .... SETTINGS select_sequential_consistency = 1; +``` + +Note that using `select_sequential_consistency` will increase the load on ClickHouse Keeper (used by ClickHouse Cloud internally) and may result in slower performance depending on the load on the service. We recommend against enabling this setting unless necessary. The recommended approach is to execute read/writes in the same session or to use a client driver that uses the native protocol (and thus supports sticky connections). + +## Inserting into a replicated setup + +In a replicated setup, data will be visible on other replicas after it has been replicated. Data begins being replicated (downloaded on other replicas) immediately after an `INSERT`. This differs from ClickHouse Cloud, where data is immediately written to shared storage and replicas subscribe to metadata changes. + +Note that for replicated setups, `INSERTs` can sometimes take a considerable amount of time (in the order of one second) as it requires committing to ClickHouse Keeper for distributed consensus. Using S3 for storage also adds additional latency. + ## Performance Considerations `INSERT` sorts the input data by primary key and splits them into partitions by a partition key. If you insert data into several partitions at once, it can significantly reduce the performance of the `INSERT` query. To avoid this: @@ -216,7 +236,15 @@ Performance will not decrease if: - Data is added in real time. - You upload data that is usually sorted by time. -It's also possible to asynchronously insert data in small but frequent inserts. The data from such insertions is combined into batches and then safely inserted into a table. To enable the asynchronous mode, switch on the [async_insert](../../operations/settings/settings.md#async-insert) setting. Note that asynchronous insertions are supported only over HTTP protocol, and deduplication is not supported for them. +### Asynchronous inserts + +It is possible to asynchronously insert data in small but frequent inserts. The data from such insertions is combined into batches and then safely inserted into a table. To use asynchronous inserts, enable the [`async_insert`](../../operations/settings/settings.md#async-insert) setting. + +Using `async_insert` or the [`Buffer` table engine](/en/engines/table-engines/special/buffer) results in additional buffering. + +### Large or long-running inserts + +When you are inserting large amounts of data, ClickHouse will optimize write performance through a process called "squashing". Small blocks of inserted data in memory are merged and squashed into larger blocks before being written to disk. Squashing reduces the overhead associated with each write operation. In this process, inserted data will be available to query after ClickHouse completes writing each [`max_insert_block_size`](/en/operations/settings/settings#max_insert_block_size) rows. **See Also** diff --git a/docs/en/sql-reference/statements/select/distinct.md b/docs/en/sql-reference/statements/select/distinct.md index 10326b0ef8f..08359b035ae 100644 --- a/docs/en/sql-reference/statements/select/distinct.md +++ b/docs/en/sql-reference/statements/select/distinct.md @@ -5,7 +5,7 @@ sidebar_label: DISTINCT # DISTINCT Clause -If `SELECT DISTINCT` is specified, only unique rows will remain in a query result. Thus only a single row will remain out of all the sets of fully matching rows in the result. +If `SELECT DISTINCT` is specified, only unique rows will remain in a query result. Thus, only a single row will remain out of all the sets of fully matching rows in the result. You can specify the list of columns that must have unique values: `SELECT DISTINCT ON (column1, column2,...)`. If the columns are not specified, all of them are taken into consideration. diff --git a/docs/en/sql-reference/statements/select/union.md b/docs/en/sql-reference/statements/select/union.md index 92a4ed1bb20..39ed3f2aceb 100644 --- a/docs/en/sql-reference/statements/select/union.md +++ b/docs/en/sql-reference/statements/select/union.md @@ -83,6 +83,3 @@ Queries that are parts of `UNION/UNION ALL/UNION DISTINCT` can be run simultaneo - [insert_null_as_default](../../../operations/settings/settings.md#insert_null_as_default) setting. - [union_default_mode](../../../operations/settings/settings.md#union-default-mode) setting. - - -[Original article](https://clickhouse.com/docs/en/sql-reference/statements/select/union/) diff --git a/docs/en/sql-reference/statements/system.md b/docs/en/sql-reference/statements/system.md index 5d416dfffb3..a128814f072 100644 --- a/docs/en/sql-reference/statements/system.md +++ b/docs/en/sql-reference/statements/system.md @@ -68,7 +68,7 @@ RELOAD FUNCTION [ON CLUSTER cluster_name] function_name Clears ClickHouse’s internal DNS cache. Sometimes (for old ClickHouse versions) it is necessary to use this command when changing the infrastructure (changing the IP address of another ClickHouse server or the server used by dictionaries). -For more convenient (automatic) cache management, see disable_internal_dns_cache, dns_cache_update_period parameters. +For more convenient (automatic) cache management, see disable_internal_dns_cache, dns_cache_max_entries, dns_cache_update_period parameters. ## DROP MARK CACHE diff --git a/docs/en/sql-reference/table-functions/cluster.md b/docs/en/sql-reference/table-functions/cluster.md index ad92ab39183..136ff72e4a9 100644 --- a/docs/en/sql-reference/table-functions/cluster.md +++ b/docs/en/sql-reference/table-functions/cluster.md @@ -5,7 +5,7 @@ sidebar_label: cluster title: "cluster, clusterAllReplicas" --- -Allows to access all shards in an existing cluster which configured in `remote_servers` section without creating a [Distributed](../../engines/table-engines/special/distributed.md) table. One replica of each shard is queried. +Allows to access all shards (configured in the `remote_servers` section) of a cluster without creating a [Distributed](../../engines/table-engines/special/distributed.md) table. Only one replica of each shard is queried. `clusterAllReplicas` function — same as `cluster`, but all replicas are queried. Each replica in a cluster is used as a separate shard/connection. diff --git a/docs/en/sql-reference/table-functions/fileCluster.md b/docs/en/sql-reference/table-functions/fileCluster.md index 2646250311c..4677d2883a7 100644 --- a/docs/en/sql-reference/table-functions/fileCluster.md +++ b/docs/en/sql-reference/table-functions/fileCluster.md @@ -59,9 +59,7 @@ INSERT INTO TABLE FUNCTION file('file2.csv', 'CSV', 'i UInt32, s String') VALUES Now, read data contents of `test1.csv` and `test2.csv` via `fileCluster` table function: ```sql -SELECT * from fileCluster( - 'my_cluster', 'file{1,2}.csv', 'CSV', 'i UInt32, s String') ORDER BY (i, s)""" -) +SELECT * FROM fileCluster('my_cluster', 'file{1,2}.csv', 'CSV', 'i UInt32, s String') ORDER BY i, s ``` ``` diff --git a/docs/en/sql-reference/table-functions/merge.md b/docs/en/sql-reference/table-functions/merge.md index a1f376ba0eb..1d21bd504af 100644 --- a/docs/en/sql-reference/table-functions/merge.md +++ b/docs/en/sql-reference/table-functions/merge.md @@ -11,11 +11,11 @@ Creates a temporary [Merge](../../engines/table-engines/special/merge.md) table. **Syntax** ```sql -merge('db_name', 'tables_regexp') +merge(['db_name',] 'tables_regexp') ``` **Arguments** -- `db_name` — Possible values: +- `db_name` — Possible values (optional, default is `currentDatabase()`): - database name, - constant expression that returns a string with a database name, for example, `currentDatabase()`, - `REGEXP(expression)`, where `expression` is a regular expression to match the DB names. diff --git a/docs/en/sql-reference/table-functions/mergeTreeIndex.md b/docs/en/sql-reference/table-functions/mergeTreeIndex.md new file mode 100644 index 00000000000..dccfd1cfc97 --- /dev/null +++ b/docs/en/sql-reference/table-functions/mergeTreeIndex.md @@ -0,0 +1,83 @@ +--- +slug: /en/sql-reference/table-functions/mergeTreeIndex +sidebar_position: 77 +sidebar_label: mergeTreeIndex +--- + +# mergeTreeIndex + +Represents the contents of index and marks files of MergeTree tables. It can be used for introspection + +``` sql +mergeTreeIndex(database, table, [with_marks = true]) +``` + +**Arguments** + +- `database`- The database name to read index and marks from. +- `table`- The table name to read index and marks from. +- `with_marks` - Whether include columns with marks to the result. + +**Returned Value** + +A table object with columns with values of primary index of source table, columns with values of marks (if enabled) for all possible files in data parts of source table and virtual columns: + +- `part_name` - The name of data part. +- `mark_number` - The number of current mark in data part. +- `rows_in_granule` - The number of rows in current granule. + +Marks column may contain `(NULL, NULL)` value in case when column is absent in data part or marks for one of its substreams are not written (e.g. in compact parts). + +## Usage Example + +```sql +CREATE TABLE test_table +( + `id` UInt64, + `n` UInt64, + `arr` Array(UInt64) +) +ENGINE = MergeTree +ORDER BY id +SETTINGS index_granularity = 3, min_bytes_for_wide_part = 0, min_rows_for_wide_part = 8; + +INSERT INTO test_table SELECT number, number, range(number % 5) FROM numbers(5); + +INSERT INTO test_table SELECT number, number, range(number % 5) FROM numbers(10, 10); +``` + +```sql +SELECT * FROM mergeTreeIndex(currentDatabase(), test_table, with_marks = true); +``` + +```text +┌─part_name─┬─mark_number─┬─rows_in_granule─┬─id─┬─id.mark─┬─n.mark──┬─arr.size0.mark─┬─arr.mark─┠+│ all_1_1_0 │ 0 │ 3 │ 0 │ (0,0) │ (42,0) │ (NULL,NULL) │ (84,0) │ +│ all_1_1_0 │ 1 │ 2 │ 3 │ (133,0) │ (172,0) │ (NULL,NULL) │ (211,0) │ +│ all_1_1_0 │ 2 │ 0 │ 4 │ (271,0) │ (271,0) │ (NULL,NULL) │ (271,0) │ +└───────────┴─────────────┴─────────────────┴────┴─────────┴─────────┴────────────────┴──────────┘ +┌─part_name─┬─mark_number─┬─rows_in_granule─┬─id─┬─id.mark─┬─n.mark─┬─arr.size0.mark─┬─arr.mark─┠+│ all_2_2_0 │ 0 │ 3 │ 10 │ (0,0) │ (0,0) │ (0,0) │ (0,0) │ +│ all_2_2_0 │ 1 │ 3 │ 13 │ (0,24) │ (0,24) │ (0,24) │ (0,24) │ +│ all_2_2_0 │ 2 │ 3 │ 16 │ (0,48) │ (0,48) │ (0,48) │ (0,80) │ +│ all_2_2_0 │ 3 │ 1 │ 19 │ (0,72) │ (0,72) │ (0,72) │ (0,128) │ +│ all_2_2_0 │ 4 │ 0 │ 19 │ (0,80) │ (0,80) │ (0,80) │ (0,160) │ +└───────────┴─────────────┴─────────────────┴────┴─────────┴────────┴────────────────┴──────────┘ +``` + +```sql +DESCRIBE mergeTreeIndex(currentDatabase(), test_table, with_marks = true) SETTINGS describe_compact_output = 1; +``` + +```text +┌─name────────────┬─type─────────────────────────────────────────────────────────────────────────────────────────────┠+│ part_name │ String │ +│ mark_number │ UInt64 │ +│ rows_in_granule │ UInt64 │ +│ id │ UInt64 │ +│ id.mark │ Tuple(offset_in_compressed_file Nullable(UInt64), offset_in_decompressed_block Nullable(UInt64)) │ +│ n.mark │ Tuple(offset_in_compressed_file Nullable(UInt64), offset_in_decompressed_block Nullable(UInt64)) │ +│ arr.size0.mark │ Tuple(offset_in_compressed_file Nullable(UInt64), offset_in_decompressed_block Nullable(UInt64)) │ +│ arr.mark │ Tuple(offset_in_compressed_file Nullable(UInt64), offset_in_decompressed_block Nullable(UInt64)) │ +└─────────────────┴──────────────────────────────────────────────────────────────────────────────────────────────────┘ +``` diff --git a/docs/en/sql-reference/window-functions/index.md b/docs/en/sql-reference/window-functions/index.md index 6340c369bff..9b2ded7b6ce 100644 --- a/docs/en/sql-reference/window-functions/index.md +++ b/docs/en/sql-reference/window-functions/index.md @@ -5,7 +5,12 @@ sidebar_label: Window Functions title: Window Functions --- -ClickHouse supports the standard grammar for defining windows and window functions. The following features are currently supported: +Windows functions let you perform calculations across a set of rows that are related to the current row. +Some of the calculations that you can do are similar to those that can be done with an aggregate function, but a window function doesn't cause rows to be grouped into a single output - the individual rows are still returned. + +## Standard Window Functions + +ClickHouse supports the standard grammar for defining windows and window functions. The table below indicates whether a feature is currently supported. | Feature | Support or workaround | |------------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| @@ -25,6 +30,8 @@ ClickHouse supports the standard grammar for defining windows and window functio ## ClickHouse-specific Window Functions +There are also the following window function that's specific to ClickHouse: + ### nonNegativeDerivative(metric_column, timestamp_column[, INTERVAL X UNITS]) Finds non-negative derivative for given `metric_column` by `timestamp_column`. @@ -33,40 +40,6 @@ The computed value is the following for each row: - `0` for 1st row, - ${metric_i - metric_{i-1} \over timestamp_i - timestamp_{i-1}} * interval$ for $i_th$ row. -## References - -### GitHub Issues - -The roadmap for the initial support of window functions is [in this issue](https://github.com/ClickHouse/ClickHouse/issues/18097). - -All GitHub issues related to window functions have the [comp-window-functions](https://github.com/ClickHouse/ClickHouse/labels/comp-window-functions) tag. - -### Tests - -These tests contain the examples of the currently supported grammar: - -https://github.com/ClickHouse/ClickHouse/blob/master/tests/performance/window_functions.xml - -https://github.com/ClickHouse/ClickHouse/blob/master/tests/queries/0_stateless/01591_window_functions.sql - -### Postgres Docs - -https://www.postgresql.org/docs/current/sql-select.html#SQL-WINDOW - -https://www.postgresql.org/docs/devel/sql-expressions.html#SYNTAX-WINDOW-FUNCTIONS - -https://www.postgresql.org/docs/devel/functions-window.html - -https://www.postgresql.org/docs/devel/tutorial-window.html - -### MySQL Docs - -https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html - -https://dev.mysql.com/doc/refman/8.0/en/window-functions-usage.html - -https://dev.mysql.com/doc/refman/8.0/en/window-functions-frames.html - ## Syntax ```text @@ -80,20 +53,7 @@ WINDOW window_name as ([[PARTITION BY grouping_column] [ORDER BY sorting_column] - `PARTITION BY` - defines how to break a resultset into groups. - `ORDER BY` - defines how to order rows inside the group during calculation aggregate_function. - `ROWS or RANGE` - defines bounds of a frame, aggregate_function is calculated within a frame. -- `WINDOW` - allows to reuse a window definition with multiple expressions. - -### Functions - -These functions can be used only as a window function. - -- `row_number()` - Number the current row within its partition starting from 1. -- `first_value(x)` - Return the first non-NULL value evaluated within its ordered frame. -- `last_value(x)` - Return the last non-NULL value evaluated within its ordered frame. -- `nth_value(x, offset)` - Return the first non-NULL value evaluated against the nth row (offset) in its ordered frame. -- `rank()` - Rank the current row within its partition with gaps. -- `dense_rank()` - Rank the current row within its partition without gaps. -- `lagInFrame(x)` - Return a value evaluated at the row that is at a specified physical offset row before the current row within the ordered frame. -- `leadInFrame(x)` - Return a value evaluated at the row that is offset rows after the current row within the ordered frame. +- `WINDOW` - allows multiple expressions to use the same window definition. ```text PARTITION @@ -112,8 +72,23 @@ These functions can be used only as a window function. └─────────────────┘ <--- UNBOUNDED FOLLOWING (END of the PARTITION) ``` +### Functions + +These functions can be used only as a window function. + +- `row_number()` - Number the current row within its partition starting from 1. +- `first_value(x)` - Return the first non-NULL value evaluated within its ordered frame. +- `last_value(x)` - Return the last non-NULL value evaluated within its ordered frame. +- `nth_value(x, offset)` - Return the first non-NULL value evaluated against the nth row (offset) in its ordered frame. +- `rank()` - Rank the current row within its partition with gaps. +- `dense_rank()` - Rank the current row within its partition without gaps. +- `lagInFrame(x)` - Return a value evaluated at the row that is at a specified physical offset row before the current row within the ordered frame. +- `leadInFrame(x)` - Return a value evaluated at the row that is offset rows after the current row within the ordered frame. + ## Examples +Let's have a look at some examples of how window functions can be used. + ```sql CREATE TABLE wf_partition ( @@ -589,6 +564,41 @@ ORDER BY └──────────────┴─────────────────────┴───────┴─────────────────────────┘ ``` +## References + +### GitHub Issues + +The roadmap for the initial support of window functions is [in this issue](https://github.com/ClickHouse/ClickHouse/issues/18097). + +All GitHub issues related to window functions have the [comp-window-functions](https://github.com/ClickHouse/ClickHouse/labels/comp-window-functions) tag. + +### Tests + +These tests contain the examples of the currently supported grammar: + +https://github.com/ClickHouse/ClickHouse/blob/master/tests/performance/window_functions.xml + +https://github.com/ClickHouse/ClickHouse/blob/master/tests/queries/0_stateless/01591_window_functions.sql + +### Postgres Docs + +https://www.postgresql.org/docs/current/sql-select.html#SQL-WINDOW + +https://www.postgresql.org/docs/devel/sql-expressions.html#SYNTAX-WINDOW-FUNCTIONS + +https://www.postgresql.org/docs/devel/functions-window.html + +https://www.postgresql.org/docs/devel/tutorial-window.html + +### MySQL Docs + +https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html + +https://dev.mysql.com/doc/refman/8.0/en/window-functions-usage.html + +https://dev.mysql.com/doc/refman/8.0/en/window-functions-frames.html + + ## Related Content - Blog: [Working with time series data in ClickHouse](https://clickhouse.com/blog/working-with-time-series-data-and-functions-ClickHouse) diff --git a/docs/ru/development/architecture.md b/docs/ru/development/architecture.md index b2e851a78cd..575799cccc4 100644 --- a/docs/ru/development/architecture.md +++ b/docs/ru/development/architecture.md @@ -63,7 +63,7 @@ ClickHouse — Ð¿Ð¾Ð»Ð½Ð¾Ñ†ÐµÐ½Ð½Ð°Ñ ÑÑ‚Ð¾Ð»Ð±Ñ†Ð¾Ð²Ð°Ñ Ð¡Ð£Ð‘Ð”. Данны Ð”Ð»Ñ Ð±Ð°Ð¹Ñ‚-ориентированного ввода-вывода ÑущеÑтвуют абÑтрактные клаÑÑÑ‹ `ReadBuffer` и `WriteBuffer`. Они иÑпользуютÑÑ Ð²Ð¼ÐµÑто `iostream`. Ðе волнуйтеÑÑŒ: каждый зрелый проект C++ иÑпользует что-то другое вмеÑто `iostream` по уважительным причинам. -`ReadBuffer` и `WriteBuffer` — Ñто проÑто непрерывный буфер и курÑор, указывающий на позицию в Ñтом буфере. Реализации могут как владеть так и не владеть памÑтью буфера. СущеÑтвует виртуальный метод Ð·Ð°Ð¿Ð¾Ð»Ð½ÐµÐ½Ð¸Ñ Ð±ÑƒÑ„ÐµÑ€Ð° Ñледующими данными (Ð´Ð»Ñ `ReadBuffer`) или ÑброÑа буфера куда-нибудь (например `WriteBuffer`). Виртуальные методы редко вызываютÑÑ. +`ReadBuffer` и `WriteBuffer` — Ñто проÑто непрерывный буфер и курÑор, указывающий на позицию в Ñтом буфере. Реализации могут как владеть, так и не владеть памÑтью буфера. СущеÑтвует виртуальный метод Ð·Ð°Ð¿Ð¾Ð»Ð½ÐµÐ½Ð¸Ñ Ð±ÑƒÑ„ÐµÑ€Ð° Ñледующими данными (Ð´Ð»Ñ `ReadBuffer`) или ÑброÑа буфера куда-нибудь (например `WriteBuffer`). Виртуальные методы редко вызываютÑÑ. Реализации `ReadBuffer`/`WriteBuffer` иÑпользуютÑÑ Ð´Ð»Ñ Ñ€Ð°Ð±Ð¾Ñ‚Ñ‹ Ñ Ñ„Ð°Ð¹Ð»Ð°Ð¼Ð¸ и файловыми деÑкрипторами, а также Ñетевыми Ñокетами, Ð´Ð»Ñ Ñ€ÐµÐ°Ð»Ð¸Ð·Ð°Ñ†Ð¸Ð¸ ÑÐ¶Ð°Ñ‚Ð¸Ñ (`CompressedWriteBuffer` инициализируетÑÑ Ð²Ð¼ÐµÑте Ñ Ð´Ñ€ÑƒÐ³Ð¸Ð¼ `WriteBuffer` и оÑущеÑтвлÑет Ñжатие данных перед запиÑью в него), и Ð´Ð»Ñ Ð´Ñ€ÑƒÐ³Ð¸Ñ… целей – Ð½Ð°Ð·Ð²Ð°Ð½Ð¸Ñ `ConcatReadBuffer`, `LimitReadBuffer`, и `HashingWriteBuffer` говорÑÑ‚ Ñами за ÑебÑ. diff --git a/docs/ru/development/developer-instruction.md b/docs/ru/development/developer-instruction.md index c63622594e4..01ff4dd5f28 100644 --- a/docs/ru/development/developer-instruction.md +++ b/docs/ru/development/developer-instruction.md @@ -71,7 +71,7 @@ ClickHouse не работает и не ÑобираетÑÑ Ð½Ð° 32-битны Please make sure you have the correct access rights and the repository exists. -Как правило Ñто означает, что отÑутÑтвуют ssh ключи Ð´Ð»Ñ ÑÐ¾ÐµÐ´Ð¸Ð½ÐµÐ½Ð¸Ñ Ñ GitHub. Ключи раÑположены в директории `~/.ssh`. Ð’ интерфейÑе GitHub, в наÑтройках, необходимо загрузить публичные ключи, чтобы он их понимал. +Как правило, Ñто означает, что отÑутÑтвуют ssh ключи Ð´Ð»Ñ ÑÐ¾ÐµÐ´Ð¸Ð½ÐµÐ½Ð¸Ñ Ñ GitHub. Ключи раÑположены в директории `~/.ssh`. Ð’ интерфейÑе GitHub, в наÑтройках, необходимо загрузить публичные ключи, чтобы он их понимал. Ð’Ñ‹ также можете клонировать репозиторий по протоколу https: @@ -199,7 +199,7 @@ sudo bash -c "$(wget -O - https://apt.llvm.org/llvm.sh)" Ð’ Ñлучае уÑпешного запуÑка, вы увидите прогреÑÑ Ñборки - количеÑтво обработанных задач и общее количеÑтво задач. -Ð’ процеÑÑе Ñборки могут поÑвитÑÑ ÑÐ¾Ð¾Ð±Ñ‰ÐµÐ½Ð¸Ñ `libprotobuf WARNING` про protobuf файлы в библиотеке libhdfs2. Это не имеет значениÑ. +Ð’ процеÑÑе Ñборки могут поÑвитьÑÑ ÑÐ¾Ð¾Ð±Ñ‰ÐµÐ½Ð¸Ñ `libprotobuf WARNING` про protobuf файлы в библиотеке libhdfs2. Это не имеет значениÑ. При уÑпешной Ñборке, вы получите готовый иÑполнÑемый файл `ClickHouse/build/programs/clickhouse`: @@ -207,7 +207,7 @@ sudo bash -c "$(wget -O - https://apt.llvm.org/llvm.sh)" ## ЗапуÑк Ñобранной верÑии ClickHouse {#zapusk-sobrannoi-versii-clickhouse} -Ð”Ð»Ñ Ð·Ð°Ð¿ÑƒÑка Ñервера из под текущего пользователÑ, Ñ Ð²Ñ‹Ð²Ð¾Ð´Ð¾Ð¼ логов в терминал и Ñ Ð¸Ñпользованием примеров конфигурационных файлов, раÑположенных в иÑходниках, перейдите в директорию `ClickHouse/programs/server/` (Ñта Ð´Ð¸Ñ€ÐµÐºÑ‚Ð¾Ñ€Ð¸Ñ Ð½Ð°Ñ…Ð¾Ð´Ð¸Ñ‚ÑÑ Ð½Ðµ в директории build) и выполните: +Ð”Ð»Ñ Ð·Ð°Ð¿ÑƒÑка Ñервера из-под текущего пользователÑ, Ñ Ð²Ñ‹Ð²Ð¾Ð´Ð¾Ð¼ логов в терминал и Ñ Ð¸Ñпользованием примеров конфигурационных файлов, раÑположенных в иÑходниках, перейдите в директорию `ClickHouse/programs/server/` (Ñта Ð´Ð¸Ñ€ÐµÐºÑ‚Ð¾Ñ€Ð¸Ñ Ð½Ð°Ñ…Ð¾Ð´Ð¸Ñ‚ÑÑ Ð½Ðµ в директории build) и выполните: ../../build/programs/clickhouse server diff --git a/docs/ru/engines/table-engines/mergetree-family/collapsingmergetree.md b/docs/ru/engines/table-engines/mergetree-family/collapsingmergetree.md index cfafddf0bc2..4a7d81d38fc 100644 --- a/docs/ru/engines/table-engines/mergetree-family/collapsingmergetree.md +++ b/docs/ru/engines/table-engines/mergetree-family/collapsingmergetree.md @@ -37,7 +37,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] **Секции запроÑа** -При Ñоздании таблицы Ñ Ð´Ð²Ð¸Ð¶ÐºÐ¾Ð¼ `CollapsingMergeTree` иÑпользуютÑÑ Ñ‚Ðµ же [Ñекции запроÑа](mergetree.md#table_engine-mergetree-creating-a-table) что и при Ñоздании таблицы Ñ Ð´Ð²Ð¸Ð¶ÐºÐ¾Ð¼ `MergeTree`. +При Ñоздании таблицы Ñ Ð´Ð²Ð¸Ð¶ÐºÐ¾Ð¼ `CollapsingMergeTree` иÑпользуютÑÑ Ñ‚Ðµ же [Ñекции запроÑа](mergetree.md#table_engine-mergetree-creating-a-table), что и при Ñоздании таблицы Ñ Ð´Ð²Ð¸Ð¶ÐºÐ¾Ð¼ `MergeTree`.
diff --git a/docs/ru/engines/table-engines/mergetree-family/mergetree.md b/docs/ru/engines/table-engines/mergetree-family/mergetree.md index 9f223157ea7..faa492d4d85 100644 --- a/docs/ru/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/ru/engines/table-engines/mergetree-family/mergetree.md @@ -679,11 +679,20 @@ TTL d + INTERVAL 1 MONTH GROUP BY k1, k2 SET x = max(x), y = min(y); ТÑги: -- `policy_name_N` — название политики. ÐÐ°Ð·Ð²Ð°Ð½Ð¸Ñ Ð¿Ð¾Ð»Ð¸Ñ‚Ð¸Ðº должны быть уникальны. -- `volume_name_N` — название тома. ÐÐ°Ð·Ð²Ð°Ð½Ð¸Ñ Ñ‚Ð¾Ð¼Ð¾Ð² должны быть уникальны. -- `disk` — диÑк, находÑщийÑÑ Ð²Ð½ÑƒÑ‚Ñ€Ð¸ тома. -- `max_data_part_size_bytes` — макÑимальный размер куÑка данных, который может находитьÑÑ Ð½Ð° любом из диÑков Ñтого тома. ЕÑли в результате ÑлиÑÐ½Ð¸Ñ Ñ€Ð°Ð·Ð¼ÐµÑ€ куÑка ожидаетÑÑ Ð±Ð¾Ð»ÑŒÑˆÐµ, чем max_data_part_size_bytes, то Ñтот куÑок будет запиÑан в Ñледующий том. Ð’ оÑновном Ñта Ñ„ÑƒÐ½ÐºÑ†Ð¸Ñ Ð¿Ð¾Ð·Ð²Ð¾Ð»Ñет хранить новые / мелкие куÑки на горÑчем (SSD) томе и перемещать их на холодный (HDD) том, когда они доÑтигают большого размера. Ðе иÑпользуйте Ñтот параметр, еÑли политика имеет только один том. -- `move_factor` — Ð´Ð¾Ð»Ñ Ð´Ð¾Ñтупного Ñвободного меÑта на томе, еÑли меÑта ÑтановитÑÑ Ð¼ÐµÐ½ÑŒÑˆÐµ, то данные начнут перемещение на Ñледующий том, еÑли он еÑÑ‚ÑŒ (по умолчанию 0.1). Ð”Ð»Ñ Ð¿ÐµÑ€ÐµÐ¼ÐµÑ‰ÐµÐ½Ð¸Ñ ÐºÑƒÑки ÑортируютÑÑ Ð¿Ð¾ размеру от большего к меньшему (по убыванию) и выбираютÑÑ ÐºÑƒÑки, Ñовокупный размер которых доÑтаточен Ð´Ð»Ñ ÑÐ¾Ð±Ð»ÑŽÐ´ÐµÐ½Ð¸Ñ ÑƒÑÐ»Ð¾Ð²Ð¸Ñ `move_factor`, еÑли Ñовокупный размер вÑех партов недоÑтаточен, будут перемещены вÑе парты. +- `policy_name_N` — название политики. ÐÐ°Ð·Ð²Ð°Ð½Ð¸Ñ Ð¿Ð¾Ð»Ð¸Ñ‚Ð¸Ðº должны быть уникальны. +- `volume_name_N` — название тома. ÐÐ°Ð·Ð²Ð°Ð½Ð¸Ñ Ñ‚Ð¾Ð¼Ð¾Ð² должны быть уникальны. +- `disk` — диÑк, находÑщийÑÑ Ð²Ð½ÑƒÑ‚Ñ€Ð¸ тома. +- `max_data_part_size_bytes` — макÑимальный размер куÑка данных, который может находитьÑÑ Ð½Ð° любом из диÑков Ñтого тома. ЕÑли в результате ÑлиÑÐ½Ð¸Ñ Ñ€Ð°Ð·Ð¼ÐµÑ€ куÑка ожидаетÑÑ Ð±Ð¾Ð»ÑŒÑˆÐµ, чем max_data_part_size_bytes, то Ñтот куÑок будет запиÑан в Ñледующий том. Ð’ оÑновном Ñта Ñ„ÑƒÐ½ÐºÑ†Ð¸Ñ Ð¿Ð¾Ð·Ð²Ð¾Ð»Ñет хранить новые / мелкие куÑки на горÑчем (SSD) томе и перемещать их на холодный (HDD) том, когда они доÑтигают большого размера. Ðе иÑпользуйте Ñтот параметр, еÑли политика имеет только один том. +- `move_factor` — Ð´Ð¾Ð»Ñ Ð´Ð¾Ñтупного Ñвободного меÑта на томе, еÑли меÑта ÑтановитÑÑ Ð¼ÐµÐ½ÑŒÑˆÐµ, то данные начнут перемещение на Ñледующий том, еÑли он еÑÑ‚ÑŒ (по умолчанию 0.1). Ð”Ð»Ñ Ð¿ÐµÑ€ÐµÐ¼ÐµÑ‰ÐµÐ½Ð¸Ñ ÐºÑƒÑки ÑортируютÑÑ Ð¿Ð¾ размеру от большего к меньшему (по убыванию) и выбираютÑÑ ÐºÑƒÑки, Ñовокупный размер которых доÑтаточен Ð´Ð»Ñ ÑÐ¾Ð±Ð»ÑŽÐ´ÐµÐ½Ð¸Ñ ÑƒÑÐ»Ð¾Ð²Ð¸Ñ `move_factor`, еÑли Ñовокупный размер вÑех партов недоÑтаточен, будут перемещены вÑе парты. +- `perform_ttl_move_on_insert` — отключает перемещение данных Ñ Ð¸Ñтекшим TTL при вÑтавке. По умолчанию (еÑли включено), еÑли мы вÑтавлÑем чаÑÑ‚ÑŒ данных, ÐºÐ¾Ñ‚Ð¾Ñ€Ð°Ñ ÑƒÐ¶Ðµ проÑрочилаÑÑŒ по правилу Ð¿ÐµÑ€ÐµÐ¼ÐµÑ‰ÐµÐ½Ð¸Ñ Ð¿Ð¾ Ñроку жизни, она немедленно перемещаетÑÑ Ð½Ð° том / диÑк, указанный в правиле перемещениÑ. Это может значительно замедлить вÑтавку в Ñлучае, еÑли целевой том / диÑк медленный (например, S3). ЕÑли отключено, то проÑÑ€Ð¾Ñ‡ÐµÐ½Ð½Ð°Ñ Ñ‡Ð°ÑÑ‚ÑŒ данных запиÑываетÑÑ Ð½Ð° том по умолчанию, а затем Ñразу перемещаетÑÑ Ð½Ð° том, указанный в правиле Ð´Ð»Ñ Ð¸Ñтёкшего TTL. +- `load_balancing` - политика баланÑировки диÑков, `round_robin` или `least_used`. +- `least_used_ttl_ms` - уÑтанавливает таймаут (в миллиÑекундах) Ð´Ð»Ñ Ð¾Ð±Ð½Ð¾Ð²Ð»ÐµÐ½Ð¸Ñ Ð´Ð¾Ñтупного проÑтранÑтва на вÑех диÑках (`0` - обновлÑÑ‚ÑŒ вÑегда, `-1` - никогда не обновлÑÑ‚ÑŒ, значение по умолчанию - `60000`). Обратите внимание, еÑли диÑк иÑпользуетÑÑ Ñ‚Ð¾Ð»ÑŒÐºÐ¾ ClickHouse и не будет подвергатьÑÑ Ð¸Ð·Ð¼ÐµÐ½ÐµÐ½Ð¸ÑŽ размеров файловой ÑиÑтемы на лету, можно иÑпользовать значение `-1`. Во вÑех оÑтальных ÑлучаÑÑ… Ñто не рекомендуетÑÑ, так как в конечном итоге Ñто приведет к неправильному раÑпределению проÑтранÑтва. +- `prefer_not_to_merge` — Ñту наÑтройку лучше не иÑпользовать. Она отключает ÑлиÑние чаÑтей данных на Ñтом томе (что потенциально вредно и может привеÑти к замедлению). Когда Ñта наÑтройка включена (не делайте Ñтого), объединение данных на Ñтом томе запрещено (что плохо). Это позволÑет (но вам Ñто не нужно) контролировать (еÑли вы хотите что-то контролировать, вы делаете ошибку), как ClickHouse взаимодейÑтвует Ñ Ð¼ÐµÐ´Ð»ÐµÐ½Ð½Ñ‹Ð¼Ð¸ диÑками (но ClickHouse лучше знает, поÑтому, пожалуйÑта, не иÑпользуйте Ñту наÑтройку). +- `volume_priority` — ОпределÑет приоритет (порÑдок), в котором заполнÑÑŽÑ‚ÑÑ Ñ‚Ð¾Ð¼Ð°. Чем меньше значение -- тем выше приоритет. Ð—Ð½Ð°Ñ‡ÐµÐ½Ð¸Ñ Ð¿Ð°Ñ€Ð°Ð¼ÐµÑ‚Ñ€Ð° должны быть натуральными чиÑлами и охватывать диапазон от 1 до N (N - наибольшее значение параметра из указанных) без пропуÑков. + * ЕÑли _вÑе_ тома имеют Ñтот параметр, они приоритизируютÑÑ Ð² указанном порÑдке. + * ЕÑли его имеют лишь _некоторые_, то не имеющие Ñтого параметра тома имеют Ñамый низкий приоритет. Те, у которых он указан, приоритизируютÑÑ Ð² ÑоответÑтвии Ñо значением тега, приоритет оÑтальных определÑетÑÑ Ð¿Ð¾Ñ€Ñдком опиÑÐ°Ð½Ð¸Ñ Ð² конфигурационном файле отноÑительно друг друга. + * ЕÑли _ни одному_ тому не приÑвоен Ñтот параметр, их порÑдок определÑетÑÑ Ð¿Ð¾Ñ€Ñдком опиÑÐ°Ð½Ð¸Ñ Ð² конфигурационном файле. + * Приоритет неÑкольких томов не может быть одинаковым. Примеры конфигураций: @@ -733,7 +742,7 @@ TTL d + INTERVAL 1 MONTH GROUP BY k1, k2 SET x = max(x), y = min(y); ЕÑли ÑиÑтема Ñодержит диÑки различных типов, то может пригодитьÑÑ Ð¿Ð¾Ð»Ð¸Ñ‚Ð¸ÐºÐ° `moving_from_ssd_to_hdd`. Ð’ томе `hot` находитÑÑ Ð¾Ð´Ð¸Ð½ SSD-диÑк (`fast_ssd`), а также задаетÑÑ Ð¾Ð³Ñ€Ð°Ð½Ð¸Ñ‡ÐµÐ½Ð¸Ðµ на макÑимальный размер куÑка, который может хранитьÑÑ Ð½Ð° Ñтом томе (1GB). Ð’Ñе куÑки такой таблицы больше 1GB будут запиÑыватьÑÑ Ñразу на том `cold`, в котором ÑодержитÑÑ Ð¾Ð´Ð¸Ð½ HDD-диÑк `disk1`. Также при заполнении диÑка `fast_ssd` более чем на 80% данные будут переноÑитьÑÑ Ð½Ð° диÑк `disk1` фоновым процеÑÑом. -ПорÑдок томов в политиках Ñ…Ñ€Ð°Ð½ÐµÐ½Ð¸Ñ Ð²Ð°Ð¶ÐµÐ½, при доÑтижении уÑловий на переполнение тома данные переноÑÑÑ‚ÑÑ Ð½Ð° Ñледующий. ПорÑдок диÑков в томах так же важен, данные пишутÑÑ Ð¿Ð¾ очереди на каждый из них. +ПорÑдок томов в политиках Ñ…Ñ€Ð°Ð½ÐµÐ½Ð¸Ñ Ð²Ð°Ð¶ÐµÐ½ в Ñлучае, еÑли приоритеты томов (`volume_priority`) не указаны Ñвно: при доÑтижении уÑловий на переполнение тома данные переноÑÑÑ‚ÑÑ Ð½Ð° Ñледующий. ПорÑдок диÑков в томах так же важен, данные пишутÑÑ Ð¿Ð¾ очереди на каждый из них. ПоÑле Ð·Ð°Ð´Ð°Ð½Ð¸Ñ ÐºÐ¾Ð½Ñ„Ð¸Ð³ÑƒÑ€Ð°Ñ†Ð¸Ð¸ политик Ñ…Ñ€Ð°Ð½ÐµÐ½Ð¸Ñ Ð¸Ñ… можно иÑпользовать, как наÑтройку при Ñоздании таблиц: diff --git a/docs/ru/engines/table-engines/special/buffer.md b/docs/ru/engines/table-engines/special/buffer.md index 1fd8483e54d..3d2f1ee850d 100644 --- a/docs/ru/engines/table-engines/special/buffer.md +++ b/docs/ru/engines/table-engines/special/buffer.md @@ -42,7 +42,7 @@ CREATE TABLE merge.hits_buffer AS merge.hits ENGINE = Buffer(merge, hits, 16, 10 Ð’ качеÑтве имени базы данных и имени таблицы можно указать пуÑтые Ñтроки в одинарных кавычках. Это обозначает отÑутÑтвие таблицы назначениÑ. Ð’ таком Ñлучае, при доÑтижении уÑловий на ÑÐ±Ñ€Ð¾Ñ Ð´Ð°Ð½Ð½Ñ‹Ñ…, буфер будет проÑто очищатьÑÑ. Это может быть полезным, чтобы хранить в оперативке некоторое окно данных. При чтении из таблицы типа Buffer, будут обработаны данные, как находÑщиеÑÑ Ð² буфере, так и данные из таблицы Ð½Ð°Ð·Ð½Ð°Ñ‡ÐµÐ½Ð¸Ñ (еÑли Ñ‚Ð°ÐºÐ°Ñ ÐµÑÑ‚ÑŒ). -Ðо Ñледует иметь ввиду, что таблица Buffer не поддерживает индекÑ. То еÑÑ‚ÑŒ, данные в буфере будут проÑканированы полноÑтью, что может быть медленно Ð´Ð»Ñ Ð±ÑƒÑ„ÐµÑ€Ð¾Ð² большого размера. (Ð”Ð»Ñ Ð´Ð°Ð½Ð½Ñ‹Ñ… в подчинённой таблице, будет иÑпользоватьÑÑ Ñ‚Ð¾Ñ‚ индекÑ, который она поддерживает.) +Ðо Ñледует иметь в виду, что таблица Buffer не поддерживает индекÑ. То еÑÑ‚ÑŒ, данные в буфере будут проÑканированы полноÑтью, что может быть медленно Ð´Ð»Ñ Ð±ÑƒÑ„ÐµÑ€Ð¾Ð² большого размера. (Ð”Ð»Ñ Ð´Ð°Ð½Ð½Ñ‹Ñ… в подчинённой таблице, будет иÑпользоватьÑÑ Ñ‚Ð¾Ñ‚ индекÑ, который она поддерживает.) ЕÑли множеÑтво Ñтолбцов таблицы Buffer не Ñовпадает Ñ Ð¼Ð½Ð¾Ð¶ÐµÑтвом Ñтолбцов подчинённой таблицы, то будут вÑтавлено подмножеÑтво Ñтолбцов, которое приÑутÑтвует в обеих таблицах. @@ -66,4 +66,4 @@ CREATE TABLE merge.hits_buffer AS merge.hits ENGINE = Buffer(merge, hits, 16, 10 Таблицы типа Buffer иÑпользуютÑÑ Ð² тех ÑлучаÑÑ…, когда от большого количеÑтва Ñерверов поÑтупает Ñлишком много INSERT-ов в единицу времени, и нет возможноÑти заранее ÑамоÑтоÑтельно буферизовать данные перед вÑтавкой, в результате чего, INSERT-Ñ‹ не уÑпевают выполнÑÑ‚ÑŒÑÑ. -Заметим, что даже Ð´Ð»Ñ Ñ‚Ð°Ð±Ð»Ð¸Ñ† типа Buffer не имеет ÑмыÑла вÑтавлÑÑ‚ÑŒ данные по одной Ñтроке, так как таким образом будет доÑтигнута ÑкороÑÑ‚ÑŒ вÑего лишь в неÑколько Ñ‚Ñ‹ÑÑч Ñтрок в Ñекунду, тогда как при вÑтавке более крупными блоками, доÑтижимо более миллиона Ñтрок в Ñекунду (Ñмотрите раздел [«ПроизводительноÑть»](../../../introduction/performance.md). +Заметим, что даже Ð´Ð»Ñ Ñ‚Ð°Ð±Ð»Ð¸Ñ† типа Buffer не имеет ÑмыÑла вÑтавлÑÑ‚ÑŒ данные по одной Ñтроке, так как таким образом будет доÑтигнута ÑкороÑÑ‚ÑŒ вÑего лишь в неÑколько Ñ‚Ñ‹ÑÑч Ñтрок в Ñекунду, тогда как при вÑтавке более крупными блоками, доÑтижимо более миллиона Ñтрок в Ñекунду (Ñмотрите раздел [«ПроизводительноÑть»](../../../introduction/performance.md)). diff --git a/docs/ru/getting-started/tutorial.md b/docs/ru/getting-started/tutorial.md index 34064b6cf2f..8c827137e6d 100644 --- a/docs/ru/getting-started/tutorial.md +++ b/docs/ru/getting-started/tutorial.md @@ -585,10 +585,6 @@ ENGINE = Distributed(perftest_3shards_1replicas, tutorial, hits_local, rand()); INSERT INTO tutorial.hits_all SELECT * FROM tutorial.hits_v1; ``` -:::danger Внимание! -Этот подход не годитÑÑ Ð´Ð»Ñ ÑÐµÐ³Ð¼ÐµÐ½Ñ‚Ð¸Ñ€Ð¾Ð²Ð°Ð½Ð¸Ñ Ð±Ð¾Ð»ÑŒÑˆÐ¸Ñ… таблиц. ЕÑÑ‚ÑŒ инÑтрумент [clickhouse-copier](../operations/utilities/clickhouse-copier.md), Ñпециально предназначенный Ð´Ð»Ñ Ð¿ÐµÑ€ÐµÑ€Ð°ÑÐ¿Ñ€ÐµÐ´ÐµÐ»ÐµÐ½Ð¸Ñ Ð»ÑŽÐ±Ñ‹Ñ… больших таблиц. -::: - Как и Ñледовало ожидать, вычиÑлительно Ñложные запроÑÑ‹ работают втрое быÑтрее, еÑли они выполнÑÑŽÑ‚ÑÑ Ð½Ð° трёх Ñерверах, а не на одном. Ð’ данном Ñлучае мы иÑпользовали клаÑтер из трёх Ñегментов Ñ Ð¾Ð´Ð½Ð¾Ð¹ репликой Ð´Ð»Ñ ÐºÐ°Ð¶Ð´Ð¾Ð³Ð¾. diff --git a/docs/ru/interfaces/cli.md b/docs/ru/interfaces/cli.md index 8910c258788..4d19cf50ae1 100644 --- a/docs/ru/interfaces/cli.md +++ b/docs/ru/interfaces/cli.md @@ -177,11 +177,11 @@ URI позволÑет подключатьÑÑ Ðº неÑкольким хоÑÑ‚ -Строка Ð¿Ð¾Ð´ÐºÐ»ÑŽÑ‡ÐµÐ½Ð¸Ñ Ð´Ð¾Ð»Ð¶Ð½Ð° быть указана в первом аргументе clickhouse-client. Строка Ð¿Ð¾Ð´ÐºÐ»ÑŽÑ‡ÐµÐ½Ð¸Ñ Ð¼Ð¾Ð¶ÐµÑ‚ комбинироватьÑÑ Ñ Ð´Ñ€ÑƒÐ³Ð¸Ð¼Ð¸ [параметрами командной Ñтроки] (#command-line-options) кроме `--host/-h` и `--port`. +Строка Ð¿Ð¾Ð´ÐºÐ»ÑŽÑ‡ÐµÐ½Ð¸Ñ Ð´Ð¾Ð»Ð¶Ð½Ð° быть указана в первом аргументе clickhouse-client. Строка Ð¿Ð¾Ð´ÐºÐ»ÑŽÑ‡ÐµÐ½Ð¸Ñ Ð¼Ð¾Ð¶ÐµÑ‚ комбинироватьÑÑ Ñ Ð´Ñ€ÑƒÐ³Ð¸Ð¼Ð¸ [параметрами командной Ñтроки](#command-line-options) кроме `--host/-h` и `--port`. Ð”Ð»Ñ ÐºÐ¾Ð¼Ð¿Ð¾Ð½ÐµÐ½Ñ‚Ð° `query_parameter` разрешены Ñледующие ключи: -- `secure` или Ñокращенно `s` - без значение. ЕÑли параметр указан, то Ñоединение Ñ Ñервером будет оÑущеÑтвлÑÑ‚ÑŒÑÑ Ð¿Ð¾ защищенному каналу (TLS). См. `secure` в [command-line-options](#command-line-options). +- `secure` или Ñокращенно `s` - без значениÑ. ЕÑли параметр указан, то Ñоединение Ñ Ñервером будет оÑущеÑтвлÑÑ‚ÑŒÑÑ Ð¿Ð¾ защищенному каналу (TLS). См. `secure` в [command-line-options](#command-line-options). ### Кодирование URI {#connection_string_uri_percent_encoding} @@ -206,7 +206,7 @@ clickhouse-client clickhouse://john:secret@127.0.0.1:9000 clickhouse-client clickhouse://[::1]:9000 ``` -ПодключитьÑÑ Ðº localhost через порт 9000 многоÑтрочном режиме. +ПодключитьÑÑ Ðº localhost через порт 9000 в многоÑтрочном режиме. ``` bash clickhouse-client clickhouse://localhost:9000 '-m' diff --git a/docs/ru/interfaces/third-party/gui.md b/docs/ru/interfaces/third-party/gui.md index 34d2f0e371a..6bed32052ad 100644 --- a/docs/ru/interfaces/third-party/gui.md +++ b/docs/ru/interfaces/third-party/gui.md @@ -260,3 +260,19 @@ SeekTable [беÑплатен](https://www.seektable.com/help/cloud-pricing) д ПротеÑтировать TABLUM.IO без Ñ€Ð°Ð·Ð²Ð¾Ñ€Ð°Ñ‡Ð¸Ð²Ð°Ð½Ð¸Ñ Ð½Ð° ÑобÑтвенном Ñервере можно [здеÑÑŒ](https://tablum.io/try). Подробно о продукте Ñмотрите на [TABLUM.IO](https://tablum.io/) + + +### CKMAN {#ckman} + +[CKMAN] (https://www.github.com/housepower/ckman) — инÑтрумент ÑƒÐ¿Ñ€Ð°Ð²Ð»ÐµÐ½Ð¸Ñ Ð¸ мониторинга клаÑтеров ClickHouse! + +ОÑновные возможноÑти: + +- БыÑтрое и проÑтое развертывание клаÑтеров через веб-Ð¸Ð½Ñ‚ÐµÑ€Ñ„ÐµÐ¹Ñ +- КлаÑтеры можно маÑштабировать или маÑштабировать +- БаланÑировка нагрузки данных клаÑтера +- Обновление клаÑтера в режиме онлайн +- Измените конфигурацию клаÑтера на Ñтранице +- ОбеÑпечивает мониторинг узлов клаÑтера и zookeeper +- Мониторинг ÑоÑтоÑÐ½Ð¸Ñ Ñ‚Ð°Ð±Ð»Ð¸Ñ† и Ñекций, а также медленные SQL-операторы +- ПредоÑтавлÑет проÑтую в иÑпользовании Ñтраницу Ð²Ñ‹Ð¿Ð¾Ð»Ð½ÐµÐ½Ð¸Ñ SQL diff --git a/docs/ru/operations/backup.md b/docs/ru/operations/backup.md index 9ff13bbc8a6..44877ff8071 100644 --- a/docs/ru/operations/backup.md +++ b/docs/ru/operations/backup.md @@ -24,10 +24,6 @@ sidebar_label: "Резервное копирование данных" Ðекоторые локальные файловые ÑиÑтемы позволÑÑŽÑ‚ делать Ñнимки (например, [ZFS](https://en.wikipedia.org/wiki/ZFS)), но они могут быть не лучшим выбором Ð´Ð»Ñ Ð¾Ð±ÑÐ»ÑƒÐ¶Ð¸Ð²Ð°Ð½Ð¸Ñ Ð¶Ð¸Ð²Ñ‹Ñ… запроÑов. Возможным решением ÑвлÑетÑÑ Ñоздание дополнительных реплик Ñ Ñ‚Ð°ÐºÐ¾Ð¹ файловой ÑиÑтемой и иÑключение их из [Distributed](../engines/table-engines/special/distributed.md) таблиц, иÑпользуемых Ð´Ð»Ñ Ð·Ð°Ð¿Ñ€Ð¾Ñов `SELECT`. Снимки на таких репликах будут недоÑтупны Ð´Ð»Ñ Ð·Ð°Ð¿Ñ€Ð¾Ñов, изменÑющих данные. Ð’ качеÑтве бонуÑа, Ñти реплики могут иметь оÑобые конфигурации Ð¾Ð±Ð¾Ñ€ÑƒÐ´Ð¾Ð²Ð°Ð½Ð¸Ñ Ñ Ð±Ð¾Ð»ÑŒÑˆÐ¸Ð¼ количеÑтвом диÑков, подключенных к Ñерверу, что будет ÑкономичеÑки Ñффективным. -## clickhouse-copier {#clickhouse-copier} - -[clickhouse-copier](utilities/clickhouse-copier.md) — Ñто универÑальный инÑтрумент, который изначально был Ñоздан Ð´Ð»Ñ Ð¿ÐµÑ€ÐµÑˆÐ°Ñ€Ð´Ð¸Ñ€Ð¾Ð²Ð°Ð½Ð¸Ñ Ñ‚Ð°Ð±Ð»Ð¸Ñ† Ñ Ð¿ÐµÑ‚Ð°Ð±Ð°Ð¹Ñ‚Ð°Ð¼Ð¸ данных. Его также можно иÑпользовать Ð´Ð»Ñ Ñ€ÐµÐ·ÐµÑ€Ð²Ð½Ð¾Ð³Ð¾ ÐºÐ¾Ð¿Ð¸Ñ€Ð¾Ð²Ð°Ð½Ð¸Ñ Ð¸ воÑÑтановлениÑ, поÑкольку он надёжно копирует данные между таблицами и клаÑтерами ClickHouse. - Ð”Ð»Ñ Ð½ÐµÐ±Ð¾Ð»ÑŒÑˆÐ¸Ñ… объёмов данных можно применÑÑ‚ÑŒ `INSERT INTO ... SELECT ...` в удалённые таблицы. ## МанипулÑции Ñ Ð¿Ð°Ñ€Ñ‚Ð¸Ñ†Ð¸Ñми {#manipuliatsii-s-partitsiiami} diff --git a/docs/ru/operations/clickhouse-keeper.md b/docs/ru/operations/clickhouse-keeper.md index 3a931529b32..e1d21dd537c 100644 --- a/docs/ru/operations/clickhouse-keeper.md +++ b/docs/ru/operations/clickhouse-keeper.md @@ -38,6 +38,7 @@ ClickHouse Keeper может иÑпользоватьÑÑ ÐºÐ°Ðº равноце - `dead_session_check_period_ms` — чаÑтота, Ñ ÐºÐ¾Ñ‚Ð¾Ñ€Ð¾Ð¹ ClickHouse Keeper проверÑет мертвые ÑеÑÑии и удалÑет их, в миллиÑекундах (по умолчанию: 500). - `election_timeout_lower_bound_ms` — времÑ, поÑле которого поÑледователь может инициировать перевыбор лидера, еÑли не получил от него контрольный Ñигнал (по умолчанию: 1000). - `election_timeout_upper_bound_ms` — времÑ, поÑле которого поÑледователь должен инициировать перевыбор лидера, еÑли не получил от него контрольный Ñигнал (по умолчанию: 2000). +- `leadership_expiry_ms` — ЕÑли лидер не получает ответа от доÑтаточного количеÑтва поÑледователей в течение Ñтого промежутка времени, он добровольно отказываетÑÑ Ð¾Ñ‚ Ñвоего руководÑтва. При наÑтройке 0 автоматичеÑки уÑтанавливаетÑÑ 20 - кратное значение `heart_beat_interval_ms`, а при наÑтройке меньше 0 лидер не отказываетÑÑ Ð¾Ñ‚ лидерÑтва (по умолчанию 0). - `force_sync` — вызывать `fsync` при каждой запиÑи в журнал координации (по умолчанию: true). - `four_letter_word_white_list` — ÑпиÑок разрешенных 4-Ñ… буквенных команд (по умолчанию: "conf,cons,crst,envi,ruok,srst,srvr,stat,wchc,wchs,dirs,mntr,isro"). - `fresh_log_gap` — минимальное отÑтавание от лидера в количеÑтве запиÑей журнала поÑле которого поÑледователь Ñчитает ÑÐµÐ±Ñ Ð°ÐºÑ‚ÑƒÐ°Ð»ÑŒÐ½Ñ‹Ð¼ (по умолчанию: 200). @@ -69,7 +70,7 @@ ClickHouse Keeper может иÑпользоватьÑÑ ÐºÐ°Ðº равноце :::note -Ð’ Ñлучае Ð¸Ð·Ð¼ÐµÐ½ÐµÐ½Ð¸Ñ Ñ‚Ð¾Ð¿Ð¾Ð»Ð¾Ð³Ð¸Ð¸ клаÑтера ClickHouse Keeper(например, замены Ñервера), удоÑтоверьтеÑÑŒ, что вы ÑохранÑеете отношение `server_id` - `hostname`, не переиÑпользуете ÑущеÑтвующие `server_id` Ð´Ð»Ñ Ð´Ð»Ñ Ð½Ð¾Ð²Ñ‹Ñ… Ñерверов и не перемешиваете идентификаторы. Подобные ошибки могут ÑлучатьÑÑ, еÑли вы иÑпользуете автоматизацию при разворачивании клаÑтера без логики ÑÐ¾Ñ…Ñ€Ð°Ð½ÐµÐ½Ð¸Ñ Ð¸Ð´ÐµÐ½Ñ‚Ð¸Ñ„Ð¸ÐºÐ°Ñ‚Ð¾Ñ€Ð¾Ð². +Ð’ Ñлучае Ð¸Ð·Ð¼ÐµÐ½ÐµÐ½Ð¸Ñ Ñ‚Ð¾Ð¿Ð¾Ð»Ð¾Ð³Ð¸Ð¸ клаÑтера ClickHouse Keeper(например, замены Ñервера), удоÑтоверьтеÑÑŒ, что вы ÑохранÑеете отношение `server_id` - `hostname`, не переиÑпользуете ÑущеÑтвующие `server_id` Ð´Ð»Ñ Ð½Ð¾Ð²Ñ‹Ñ… Ñерверов и не перемешиваете идентификаторы. Подобные ошибки могут ÑлучатьÑÑ, еÑли вы иÑпользуете автоматизацию при разворачивании клаÑтера без логики ÑÐ¾Ñ…Ñ€Ð°Ð½ÐµÐ½Ð¸Ñ Ð¸Ð´ÐµÐ½Ñ‚Ð¸Ñ„Ð¸ÐºÐ°Ñ‚Ð¾Ñ€Ð¾Ð². ::: Примеры конфигурации кворума Ñ Ñ‚Ñ€ÐµÐ¼Ñ ÑƒÐ·Ð»Ð°Ð¼Ð¸ можно найти в [интеграционных теÑтах](https://github.com/ClickHouse/ClickHouse/tree/master/tests/integration) Ñ Ð¿Ñ€ÐµÑ„Ð¸ÐºÑом `test_keeper_`. Пример конфигурации Ð´Ð»Ñ Ñервера â„–1: @@ -209,6 +210,7 @@ dead_session_check_period_ms=500 heart_beat_interval_ms=500 election_timeout_lower_bound_ms=1000 election_timeout_upper_bound_ms=2000 +leadership_expiry_ms=0 reserved_log_items=1000000000000000 snapshot_distance=10000 auto_forwarding=true @@ -337,7 +339,7 @@ clickhouse-keeper-converter --zookeeper-logs-dir /var/lib/zookeeper/version-2 -- ПоÑле того, как выполнили дейÑÑ‚Ð²Ð¸Ñ Ð²Ñ‹ÑˆÐµ выполните Ñледующие шаги. 1. Выберете одну ноду Keeper, ÐºÐ¾Ñ‚Ð¾Ñ€Ð°Ñ Ñтанет новым лидером. Учтите, что данные Ñ Ñтой ноды будут иÑпользованы вÑем клаÑтером, поÑтому рекомендуетÑÑ Ð²Ñ‹Ð±Ñ€Ð°Ñ‚ÑŒ ноду Ñ Ð½Ð°Ð¸Ð±Ð¾Ð»ÐµÐµ актуальным ÑоÑтоÑнием. -2. Перед дальнейшими дейÑтвиÑм Ñделайте резервную копию данных из директорий `log_storage_path` и `snapshot_storage_path`. +2. Перед дальнейшими дейÑтвиÑми Ñделайте резервную копию данных из директорий `log_storage_path` и `snapshot_storage_path`. 3. Измените наÑтройки на вÑех нодах клаÑтера, которые вы ÑобираетеÑÑŒ иÑпользовать. 4. Отправьте команду `rcvr` на ноду, которую вы выбрали, или оÑтановите ее и запуÑтите заново Ñ Ð°Ñ€Ð³ÑƒÐ¼ÐµÐ½Ñ‚Ð¾Ð¼ `--force-recovery`. Это переведет ноду в режим воÑÑтановлениÑ. 5. ЗапуÑкайте оÑтальные ноды клаÑтера по одной и проверÑйте, что команда `mntr` возвращает `follower` в выводе ÑоÑтоÑÐ½Ð¸Ñ `zk_server_state` перед тем, как запуÑтить Ñледующую ноду. diff --git a/docs/ru/operations/configuration-files.md b/docs/ru/operations/configuration-files.md index 3b037521692..74f7d217fb7 100644 --- a/docs/ru/operations/configuration-files.md +++ b/docs/ru/operations/configuration-files.md @@ -89,7 +89,7 @@ $ cat /etc/clickhouse-server/users.d/alice.xml Ð’Ñ‹ можете иÑпользовать Ñимметричное шифрование Ð´Ð»Ñ Ð·Ð°ÑˆÐ¸Ñ„Ñ€Ð¾Ð²ÐºÐ¸ Ñлемента конфигурации, например, Ð¿Ð¾Ð»Ñ password. Чтобы Ñто Ñделать, Ñначала наÑтройте [кодек шифрованиÑ](../sql-reference/statements/create/table.md#encryption-codecs), затем добавьте аттибут`encrypted_by` Ñ Ð¸Ð¼ÐµÐ½ÐµÐ¼ кодека ÑˆÐ¸Ñ„Ñ€Ð¾Ð²Ð°Ð½Ð¸Ñ ÐºÐ°Ðº значение к Ñлементу, который надо зашифровать. -Ð’ отличии от аттрибутов `from_zk`, `from_env` и `incl` (или Ñлемента `include`), подÑтановка, Ñ‚.е. раÑшифровка зашифрованного значениÑ, не выподнÑетÑÑ Ð² файле предобработки. РаÑшифровка проиÑходит только во Ð²Ñ€ÐµÐ¼Ñ Ð¸ÑÐ¿Ð¾Ð»Ð½ÐµÐ½Ð¸Ñ Ð² Ñерверном процеÑÑе. +Ð’ отличие от аттрибутов `from_zk`, `from_env` и `incl` (или Ñлемента `include`), подÑтановка, Ñ‚.е. раÑшифровка зашифрованного значениÑ, не выподнÑетÑÑ Ð² файле предобработки. РаÑшифровка проиÑходит только во Ð²Ñ€ÐµÐ¼Ñ Ð¸ÑÐ¿Ð¾Ð»Ð½ÐµÐ½Ð¸Ñ Ð² Ñерверном процеÑÑе. Пример: @@ -110,7 +110,7 @@ $ cat /etc/clickhouse-server/users.d/alice.xml ``` -Чтобы получить зашифрованное значение может быть иÑпользовано приложение-пример `encrypt_decrypt` . +Чтобы получить зашифрованное значение, может быть иÑпользовано приложение-пример `encrypt_decrypt` . Пример: diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index cd949e9e6b1..a56afda641b 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -3258,7 +3258,7 @@ SELECT * FROM test2; ## allow_experimental_live_view {#allow-experimental-live-view} -Включает ÑкÑпериментальную возможноÑÑ‚ÑŒ иÑÐ¿Ð¾Ð»ÑŒÐ·Ð¾Ð²Ð°Ð½Ð¸Ñ [LIVE-предÑтавлений](../../sql-reference/statements/create/view.md#live-view). +Включает уÑтаревшую возможноÑÑ‚ÑŒ иÑÐ¿Ð¾Ð»ÑŒÐ·Ð¾Ð²Ð°Ð½Ð¸Ñ [LIVE-предÑтавлений](../../sql-reference/statements/create/view.md#live-view). Возможные значениÑ: - 0 — живые предÑÑ‚Ð°Ð²Ð»ÐµÐ½Ð¸Ñ Ð½Ðµ поддерживаютÑÑ. @@ -3268,21 +3268,15 @@ SELECT * FROM test2; ## live_view_heartbeat_interval {#live-view-heartbeat-interval} -Задает интервал в Ñекундах Ð´Ð»Ñ Ð¿ÐµÑ€Ð¸Ð¾Ð´Ð¸Ñ‡ÐµÑкой проверки ÑущеÑÑ‚Ð²Ð¾Ð²Ð°Ð½Ð¸Ñ [LIVE VIEW](../../sql-reference/statements/create/view.md#live-view). - -Значение по умолчанию: `15`. +УÑтарело. ## max_live_view_insert_blocks_before_refresh {#max-live-view-insert-blocks-before-refresh} -Задает наибольшее чиÑло вÑтавок, поÑле которых Ð·Ð°Ð¿Ñ€Ð¾Ñ Ð½Ð° формирование [LIVE VIEW](../../sql-reference/statements/create/view.md#live-view) иÑполнÑетÑÑ Ñнова. - -Значение по умолчанию: `64`. +УÑтарело. ## periodic_live_view_refresh {#periodic-live-view-refresh} -Задает Ð²Ñ€ÐµÐ¼Ñ Ð² Ñекундах, по иÑтечении которого [LIVE VIEW](../../sql-reference/statements/create/view.md#live-view) Ñ ÑƒÑтановленным автообновлением обновлÑетÑÑ. - -Значение по умолчанию: `60`. +УÑтарело. ## check_query_single_value_result {#check_query_single_value_result} diff --git a/docs/ru/operations/utilities/clickhouse-benchmark.md b/docs/ru/operations/utilities/clickhouse-benchmark.md index 73de78d1c15..eb342bea9a7 100644 --- a/docs/ru/operations/utilities/clickhouse-benchmark.md +++ b/docs/ru/operations/utilities/clickhouse-benchmark.md @@ -50,7 +50,7 @@ clickhouse-benchmark [keys] < queries_file; - `-r`, `--randomize` — иÑпользовать Ñлучайный порÑдок Ð²Ñ‹Ð¿Ð¾Ð»Ð½ÐµÐ½Ð¸Ñ Ð·Ð°Ð¿Ñ€Ð¾Ñов при наличии более одного входного запроÑа. - `-s`, `--secure` — иÑпользуетÑÑ `TLS` Ñоединение. - `-t N`, `--timelimit=N` — лимит по времени в Ñекундах. `clickhouse-benchmark` переÑтает отправлÑÑ‚ÑŒ запроÑÑ‹ при доÑтижении лимита по времени. Значение по умолчанию: 0 (лимит отключен). -- `--confidence=N` — уровень Ð´Ð¾Ð²ÐµÑ€Ð¸Ñ Ð´Ð»Ñ T-критериÑ. Возможные значениÑ: 0 (80%), 1 (90%), 2 (95%), 3 (98%), 4 (99%), 5 (99.5%). Значение по умолчанию: 5. Ð’ [режиме ÑравнениÑ](#clickhouse-benchmark-comparison-mode) `clickhouse-benchmark` проверÑет [двухвыборочный t-критерий Стьюдента Ð´Ð»Ñ Ð½ÐµÐ·Ð°Ð²Ð¸Ñимых выборок](https://en.wikipedia.org/wiki/Student%27s_t-test#Independent_two-sample_t-test) чтобы определить, различны ли две выборки при выбранном уровне довериÑ. +- `--confidence=N` — уровень Ð´Ð¾Ð²ÐµÑ€Ð¸Ñ Ð´Ð»Ñ T-критериÑ. Возможные значениÑ: 0 (80%), 1 (90%), 2 (95%), 3 (98%), 4 (99%), 5 (99.5%). Значение по умолчанию: 5. Ð’ [режиме ÑравнениÑ](#clickhouse-benchmark-comparison-mode) `clickhouse-benchmark` проверÑет [двухвыборочный t-критерий Стьюдента Ð´Ð»Ñ Ð½ÐµÐ·Ð°Ð²Ð¸Ñимых выборок](https://en.wikipedia.org/wiki/Student%27s_t-test#Independent_two-sample_t-test), чтобы определить, различны ли две выборки при выбранном уровне довериÑ. - `--cumulative` — выводить ÑтатиÑтику за вÑе Ð²Ñ€ÐµÐ¼Ñ Ñ€Ð°Ð±Ð¾Ñ‚Ñ‹, а не за поÑледний временной интервал. - `--database=DATABASE_NAME` — Ð¸Ð¼Ñ Ð±Ð°Ð·Ñ‹ данных ClickHouse. Значение по умолчанию: `default`. - `--json=FILEPATH` — дополнительный вывод в формате `JSON`. Когда Ñтот ключ указан, `clickhouse-benchmark` выводит отчет в указанный JSON-файл. diff --git a/docs/ru/operations/utilities/clickhouse-copier.md b/docs/ru/operations/utilities/clickhouse-copier.md deleted file mode 100644 index da86ef2d35d..00000000000 --- a/docs/ru/operations/utilities/clickhouse-copier.md +++ /dev/null @@ -1,183 +0,0 @@ ---- -slug: /ru/operations/utilities/clickhouse-copier -sidebar_position: 59 -sidebar_label: clickhouse-copier ---- - -# clickhouse-copier {#clickhouse-copier} - -Копирует данные из таблиц одного клаÑтера в таблицы другого (или Ñтого же) клаÑтера. - -Можно запуÑтить неÑколько `clickhouse-copier` Ð´Ð»Ñ Ñ€Ð°Ð·Ð½Ñ‹Ñ… Ñерверах Ð´Ð»Ñ Ð²Ñ‹Ð¿Ð¾Ð»Ð½ÐµÐ½Ð¸Ñ Ð¾Ð´Ð½Ð¾Ð³Ð¾ и того же заданиÑ. Ð”Ð»Ñ Ñинхронизации между процеÑÑами иÑпользуетÑÑ ZooKeeper. - -ПоÑле запуÑка, `clickhouse-copier`: - -- СоединÑетÑÑ Ñ ZooKeeper и получает: - - - Ð—Ð°Ð´Ð°Ð½Ð¸Ñ Ð½Ð° копирование. - - СоÑтоÑние заданий на копирование. - -- ВыполнÑет заданиÑ. - - Каждый запущенный процеÑÑ Ð²Ñ‹Ð±Ð¸Ñ€Ð°ÐµÑ‚ "ближайший" шард иÑходного клаÑтера и копирует данные в клаÑтер назначениÑ, при необходимоÑти Ð¿ÐµÑ€ÐµÑˆÐ°Ñ€Ð´Ð¸Ñ€ÑƒÑ Ð¸Ñ…. - -`clickhouse-copier` отÑлеживает Ð¸Ð·Ð¼ÐµÐ½ÐµÐ½Ð¸Ñ Ð² ZooKeeper и применÑет их «на лету». - -Ð”Ð»Ñ ÑÐ½Ð¸Ð¶ÐµÐ½Ð¸Ñ Ñетевого трафика рекомендуем запуÑкать `clickhouse-copier` на том же Ñервере, где находÑÑ‚ÑÑ Ð¸Ñходные данные. - -## ЗапуÑк Clickhouse-copier {#zapusk-clickhouse-copier} - -Утилиту Ñледует запуÑкать вручную Ñледующим образом: - -``` bash -$ clickhouse-copier --daemon --config zookeeper.xml --task-path /task/path --base-dir /path/to/dir -``` - -Параметры запуÑка: - -- `daemon` - запуÑкает `clickhouse-copier` в режиме демона. -- `config` - путь к файлу `zookeeper.xml` Ñ Ð¿Ð°Ñ€Ð°Ð¼ÐµÑ‚Ñ€Ð°Ð¼Ð¸ ÑÐ¾ÐµÐ´Ð¸Ð½ÐµÐ½Ð¸Ñ Ñ ZooKeeper. -- `task-path` - путь к ноде ZooKeeper. Ðода иÑпользуетÑÑ Ð´Ð»Ñ Ñинхронизации между процеÑÑами `clickhouse-copier` и Ð´Ð»Ñ Ñ…Ñ€Ð°Ð½ÐµÐ½Ð¸Ñ Ð·Ð°Ð´Ð°Ð½Ð¸Ð¹. Ð—Ð°Ð´Ð°Ð½Ð¸Ñ Ñ…Ñ€Ð°Ð½ÑÑ‚ÑÑ Ð² `$task-path/description`. -- `task-file` - необÑзательный путь к файлу Ñ Ð¾Ð¿Ð¸Ñанием ÐºÐ¾Ð½Ñ„Ð¸Ð³ÑƒÑ€Ð°Ñ†Ð¸Ñ Ð·Ð°Ð´Ð°Ð½Ð¸Ð¹ Ð´Ð»Ñ Ð·Ð°Ð³Ñ€ÑƒÐ·ÐºÐ¸ в ZooKeeper. -- `task-upload-force` - Загрузить `task-file` в ZooKeeper даже еÑли уже было загружено. -- `base-dir` - путь к логам и вÑпомогательным файлам. При запуÑке `clickhouse-copier` Ñоздает в `$base-dir` подкаталоги `clickhouse-copier_YYYYMMHHSS_`. ЕÑли параметр не указан, то каталоги будут ÑоздаватьÑÑ Ð² каталоге, где `clickhouse-copier` был запущен. - -## Формат Zookeeper.xml {#format-zookeeper-xml} - -``` xml - - - trace - 100M - 3 - - - - - 127.0.0.1 - 2181 - - - -``` - -## ÐšÐ¾Ð½Ñ„Ð¸Ð³ÑƒÑ€Ð°Ñ†Ð¸Ñ Ð·Ð°Ð´Ð°Ð½Ð¸Ð¹ на копирование {#konfiguratsiia-zadanii-na-kopirovanie} - -``` xml - - - - - - - false - - 127.0.0.1 - 9000 - - - - ... - - - - ... - - - - - 2 - - - - 1 - - - - - 0 - - - - - 3 - - 1 - - - - - - - - source_cluster - test - hits - - - destination_cluster - test - hits2 - - - - ENGINE=ReplicatedMergeTree('/clickhouse/tables/{cluster}/{shard}/hits2', '{replica}') - PARTITION BY toMonday(date) - ORDER BY (CounterID, EventDate) - - - - jumpConsistentHash(intHash64(UserID), 2) - - - CounterID != 0 - - - - '2018-02-26' - '2018-03-05' - ... - - - - - - ... - - ... - - -``` - -`clickhouse-copier` отÑлеживает Ð¸Ð·Ð¼ÐµÐ½ÐµÐ½Ð¸Ñ `/task/path/description` и применÑет их «на лету». ЕÑли вы поменÑете, например, значение `max_workers`, то количеÑтво процеÑÑов, выполнÑющих заданиÑ, также изменитÑÑ. diff --git a/docs/ru/operations/utilities/index.md b/docs/ru/operations/utilities/index.md index 9eb90a3037c..e4b01a0276d 100644 --- a/docs/ru/operations/utilities/index.md +++ b/docs/ru/operations/utilities/index.md @@ -7,7 +7,6 @@ sidebar_position: 56 # Утилиты ClickHouse {#utility-clickhouse} - [clickhouse-local](clickhouse-local.md) - позволÑет выполнÑÑ‚ÑŒ SQL-запроÑÑ‹ над данными без оÑтановки Ñервера ClickHouse, подобно утилите `awk`. -- [clickhouse-copier](clickhouse-copier.md) - копирует (и перешардирует) данные Ñ Ð¾Ð´Ð½Ð¾Ð³Ð¾ клаÑтера на другой. - [clickhouse-benchmark](../../operations/utilities/clickhouse-benchmark.md) — уÑтанавливает Ñоединение Ñ Ñервером ClickHouse и запуÑкает цикличеÑкое выполнение указанных запроÑов. - [clickhouse-format](../../operations/utilities/clickhouse-format.md) — позволÑет форматировать входÑщие запроÑÑ‹. - [ClickHouse obfuscator](../../operations/utilities/clickhouse-obfuscator.md) — обфуÑцирует данные. diff --git a/docs/ru/sql-reference/data-types/datetime.md b/docs/ru/sql-reference/data-types/datetime.md index 80d844a1713..57f24786bb7 100644 --- a/docs/ru/sql-reference/data-types/datetime.md +++ b/docs/ru/sql-reference/data-types/datetime.md @@ -33,7 +33,7 @@ ClickHouse отображает Ð·Ð½Ð°Ñ‡ÐµÐ½Ð¸Ñ Ð² завиÑимоÑти от ## Примеры {#primery} -**1.** Создание таблицы Ñ Ñтолбцом типа `DateTime` и вÑтавка данных в неё: +**1.** Создание таблицы Ñо Ñтолбцом типа `DateTime` и вÑтавка данных в неё: ``` sql CREATE TABLE dt diff --git a/docs/ru/sql-reference/functions/arithmetic-functions.md b/docs/ru/sql-reference/functions/arithmetic-functions.md index 73bac0595e1..ca7a4566c6c 100644 --- a/docs/ru/sql-reference/functions/arithmetic-functions.md +++ b/docs/ru/sql-reference/functions/arithmetic-functions.md @@ -172,7 +172,7 @@ multiplyDecimal(a, b[, result_scale]) ``` :::note -Эта Ñ„ÑƒÐ½ÐºÑ†Ð¸Ñ Ñ€Ð°Ð±Ð¾Ñ‚Ð°ÑŽÑ‚ гораздо медленнее обычной `multiply`. +Эта Ñ„ÑƒÐ½ÐºÑ†Ð¸Ñ Ñ€Ð°Ð±Ð¾Ñ‚Ð°ÐµÑ‚ гораздо медленнее обычной `multiply`. Ð’ Ñлучае, еÑли нет необходимоÑти иметь фикÑированную точноÑÑ‚ÑŒ и/или нужны быÑтрые вычиÑлениÑ, Ñледует иÑпользовать [multiply](#multiply). ::: diff --git a/docs/ru/sql-reference/functions/array-functions.md b/docs/ru/sql-reference/functions/array-functions.md index 659e2d3f75e..1f06bdf264a 100644 --- a/docs/ru/sql-reference/functions/array-functions.md +++ b/docs/ru/sql-reference/functions/array-functions.md @@ -488,7 +488,7 @@ arrayPushBack(array, single_value) **Ðргументы** - `array` – маÑÑив. -- `single_value` – значение добавлÑемого Ñлемента. Ð’ маÑÑив Ñ Ñ‡Ð¸Ñлам можно добавить только чиÑла, в маÑÑив Ñо Ñтроками только Ñтроки. При добавлении чиÑел ClickHouse автоматичеÑки приводит тип `single_value` к типу данных маÑÑива. Подробнее о типах данных в ClickHouse читайте в разделе «[Типы данных](../../sql-reference/functions/array-functions.md#data_types)». Может быть равно `NULL`, в Ñтом Ñлучае Ñ„ÑƒÐ½ÐºÑ†Ð¸Ñ Ð´Ð¾Ð±Ð°Ð²Ð¸Ñ‚ Ñлемент `NULL` в маÑÑив, а тип Ñлементов маÑÑива преобразует в `Nullable`. +- `single_value` – значение добавлÑемого Ñлемента. Ð’ маÑÑив Ñ Ñ‡Ð¸Ñлами можно добавить только чиÑла, в маÑÑив Ñо Ñтроками только Ñтроки. При добавлении чиÑел ClickHouse автоматичеÑки приводит тип `single_value` к типу данных маÑÑива. Подробнее о типах данных в ClickHouse читайте в разделе «[Типы данных](../../sql-reference/functions/array-functions.md#data_types)». Может быть равно `NULL`, в Ñтом Ñлучае Ñ„ÑƒÐ½ÐºÑ†Ð¸Ñ Ð´Ð¾Ð±Ð°Ð²Ð¸Ñ‚ Ñлемент `NULL` в маÑÑив, а тип Ñлементов маÑÑива преобразует в `Nullable`. **Пример** @@ -513,7 +513,7 @@ arrayPushFront(array, single_value) **Ðргументы** - `array` – маÑÑив. -- `single_value` – значение добавлÑемого Ñлемента. Ð’ маÑÑив Ñ Ñ‡Ð¸Ñлам можно добавить только чиÑла, в маÑÑив Ñо Ñтроками только Ñтроки. При добавлении чиÑел ClickHouse автоматичеÑки приводит тип `single_value` к типу данных маÑÑива. Подробнее о типах данных в ClickHouse читайте в разделе «[Типы данных](../../sql-reference/functions/array-functions.md#data_types)». Может быть равно `NULL`, в Ñтом Ñлучае Ñ„ÑƒÐ½ÐºÑ†Ð¸Ñ Ð´Ð¾Ð±Ð°Ð²Ð¸Ñ‚ Ñлемент `NULL` в маÑÑив, а тип Ñлементов маÑÑива преобразует в `Nullable`. +- `single_value` – значение добавлÑемого Ñлемента. Ð’ маÑÑив Ñ Ñ‡Ð¸Ñлами можно добавить только чиÑла, в маÑÑив Ñо Ñтроками только Ñтроки. При добавлении чиÑел ClickHouse автоматичеÑки приводит тип `single_value` к типу данных маÑÑива. Подробнее о типах данных в ClickHouse читайте в разделе «[Типы данных](../../sql-reference/functions/array-functions.md#data_types)». Может быть равно `NULL`, в Ñтом Ñлучае Ñ„ÑƒÐ½ÐºÑ†Ð¸Ñ Ð´Ð¾Ð±Ð°Ð²Ð¸Ñ‚ Ñлемент `NULL` в маÑÑив, а тип Ñлементов маÑÑива преобразует в `Nullable`. **Пример** diff --git a/docs/ru/sql-reference/statements/alter/column.md b/docs/ru/sql-reference/statements/alter/column.md index 385a9835eca..2ea045f4ae3 100644 --- a/docs/ru/sql-reference/statements/alter/column.md +++ b/docs/ru/sql-reference/statements/alter/column.md @@ -94,7 +94,7 @@ RENAME COLUMN [IF EXISTS] name to new_name Переименовывает Ñтолбец `name` в `new_name`. ЕÑли указано выражение `IF EXISTS`, то Ð·Ð°Ð¿Ñ€Ð¾Ñ Ð½Ðµ будет возвращать ошибку при уÑловии, что Ñтолбец `name` не ÑущеÑтвует. ПоÑкольку переименование не затрагивает физичеÑкие данные колонки, Ð·Ð°Ð¿Ñ€Ð¾Ñ Ð²Ñ‹Ð¿Ð¾Ð»Ð½ÑетÑÑ Ð¿Ñ€Ð°ÐºÑ‚Ð¸Ñ‡ÐµÑки мгновенно. -**ЗÐМЕЧЕÐИЕ**: Столбцы, ÑвлÑющиеÑÑ Ñ‡Ð°Ñтью оÑновного ключа или ключа Ñортировки (заданные Ñ Ð¿Ð¾Ð¼Ð¾Ñ‰ÑŒÑŽ `ORDER BY` или `PRIMARY KEY`), не могут быть переименованы. Попытка переименовать Ñти Ñлобцы приведет к `SQL Error [524]`. +**ЗÐМЕЧЕÐИЕ**: Столбцы, ÑвлÑющиеÑÑ Ñ‡Ð°Ñтью оÑновного ключа или ключа Ñортировки (заданные Ñ Ð¿Ð¾Ð¼Ð¾Ñ‰ÑŒÑŽ `ORDER BY` или `PRIMARY KEY`), не могут быть переименованы. Попытка переименовать Ñти Ñлобцы приведет к `SQL Error [524]`. Пример: @@ -254,7 +254,7 @@ SELECT groupArray(x), groupArray(s) FROM tmp; ОтÑутÑтвует возможноÑÑ‚ÑŒ удалÑÑ‚ÑŒ Ñтолбцы, входÑщие в первичный ключ или ключ Ð´Ð»Ñ ÑÑÐ¼Ð¿Ð»Ð¸Ñ€Ð¾Ð²Ð°Ð½Ð¸Ñ (в общем, входÑщие в выражение `ENGINE`). Изменение типа у Ñтолбцов, входÑщих в первичный ключ возможно только в том Ñлучае, еÑли Ñто изменение не приводит к изменению данных (например, разрешено добавление Ð·Ð½Ð°Ñ‡ÐµÐ½Ð¸Ñ Ð² Enum или изменение типа Ñ `DateTime` на `UInt32`). -ЕÑли возможноÑтей запроÑа `ALTER` не хватает Ð´Ð»Ñ Ð½ÑƒÐ¶Ð½Ð¾Ð³Ð¾ Ð¸Ð·Ð¼ÐµÐ½ÐµÐ½Ð¸Ñ Ñ‚Ð°Ð±Ð»Ð¸Ñ†Ñ‹, вы можете Ñоздать новую таблицу, Ñкопировать туда данные Ñ Ð¿Ð¾Ð¼Ð¾Ñ‰ÑŒÑŽ запроÑа [INSERT SELECT](../insert-into.md#inserting-the-results-of-select), затем поменÑÑ‚ÑŒ таблицы меÑтами Ñ Ð¿Ð¾Ð¼Ð¾Ñ‰ÑŒÑŽ запроÑа [RENAME](../rename.md#rename-table), и удалить Ñтарую таблицу. Ð’ качеÑтве альтернативы Ð´Ð»Ñ Ð·Ð°Ð¿Ñ€Ð¾Ñа `INSERT SELECT`, можно иÑпользовать инÑтрумент [clickhouse-copier](../../../sql-reference/statements/alter/index.md). +ЕÑли возможноÑтей запроÑа `ALTER` не хватает Ð´Ð»Ñ Ð½ÑƒÐ¶Ð½Ð¾Ð³Ð¾ Ð¸Ð·Ð¼ÐµÐ½ÐµÐ½Ð¸Ñ Ñ‚Ð°Ð±Ð»Ð¸Ñ†Ñ‹, вы можете Ñоздать новую таблицу, Ñкопировать туда данные Ñ Ð¿Ð¾Ð¼Ð¾Ñ‰ÑŒÑŽ запроÑа [INSERT SELECT](../insert-into.md#inserting-the-results-of-select), затем поменÑÑ‚ÑŒ таблицы меÑтами Ñ Ð¿Ð¾Ð¼Ð¾Ñ‰ÑŒÑŽ запроÑа [RENAME](../rename.md#rename-table), и удалить Ñтарую таблицу. Ð—Ð°Ð¿Ñ€Ð¾Ñ `ALTER` блокирует вÑе Ñ‡Ñ‚ÐµÐ½Ð¸Ñ Ð¸ запиÑи Ð´Ð»Ñ Ñ‚Ð°Ð±Ð»Ð¸Ñ†Ñ‹. То еÑÑ‚ÑŒ еÑли на момент запроÑа `ALTER` выполнÑлÑÑ Ð´Ð¾Ð»Ð³Ð¸Ð¹ `SELECT`, то Ð·Ð°Ð¿Ñ€Ð¾Ñ `ALTER` Ñначала дождётÑÑ ÐµÐ³Ð¾ выполнениÑ. И в Ñто Ð²Ñ€ÐµÐ¼Ñ Ð²Ñе новые запроÑÑ‹ к той же таблице будут ждать, пока завершитÑÑ Ñтот `ALTER`. diff --git a/docs/ru/sql-reference/statements/create/view.md b/docs/ru/sql-reference/statements/create/view.md index 543a4b21ad1..032bdc6e6d4 100644 --- a/docs/ru/sql-reference/statements/create/view.md +++ b/docs/ru/sql-reference/statements/create/view.md @@ -11,7 +11,9 @@ sidebar_label: "ПредÑтавление" ## Обычные предÑÑ‚Ð°Ð²Ð»ÐµÐ½Ð¸Ñ {#normal} ``` sql -CREATE [OR REPLACE] VIEW [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster_name] AS SELECT ... +CREATE [OR REPLACE] VIEW [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster_name] +[DEFINER = { user | CURRENT_USER }] [SQL SECURITY { DEFINER | INVOKER | NONE }] +AS SELECT ... ``` Обычные предÑÑ‚Ð°Ð²Ð»ÐµÐ½Ð¸Ñ Ð½Ðµ хранÑÑ‚ никаких данных, они выполнÑÑŽÑ‚ чтение данных из другой таблицы при каждом доÑтупе. Другими Ñловами, обычное предÑтавление — Ñто не что иное, как Ñохраненный запроÑ. При чтении данных из предÑÑ‚Ð°Ð²Ð»ÐµÐ½Ð¸Ñ Ñтот Ñохраненный Ð·Ð°Ð¿Ñ€Ð¾Ñ Ð¸ÑпользуетÑÑ ÐºÐ°Ðº Ð¿Ð¾Ð´Ð·Ð°Ð¿Ñ€Ð¾Ñ Ð² Ñекции [FROM](../../../sql-reference/statements/select/from.md). @@ -37,7 +39,9 @@ SELECT a, b, c FROM (SELECT ...) ## Материализованные предÑÑ‚Ð°Ð²Ð»ÐµÐ½Ð¸Ñ {#materialized} ``` sql -CREATE MATERIALIZED VIEW [IF NOT EXISTS] [db.]table_name [ON CLUSTER] [TO[db.]name] [ENGINE = engine] [POPULATE] AS SELECT ... +CREATE MATERIALIZED VIEW [IF NOT EXISTS] [db.]table_name [ON CLUSTER] [TO[db.]name] [ENGINE = engine] [POPULATE] +[DEFINER = { user | CURRENT_USER }] [SQL SECURITY { DEFINER | INVOKER | NONE }] +AS SELECT ... ``` Материализованные (MATERIALIZED) предÑÑ‚Ð°Ð²Ð»ÐµÐ½Ð¸Ñ Ñ…Ñ€Ð°Ð½ÑÑ‚ данные, преобразованные ÑоответÑтвующим запроÑом [SELECT](../../../sql-reference/statements/select/index.md). @@ -66,6 +70,52 @@ CREATE MATERIALIZED VIEW [IF NOT EXISTS] [db.]table_name [ON CLUSTER] [TO[db.]na Чтобы удалить предÑтавление, Ñледует иÑпользовать [DROP VIEW](../../../sql-reference/statements/drop.md#drop-view). Впрочем, `DROP TABLE` тоже работает Ð´Ð»Ñ Ð¿Ñ€ÐµÐ´Ñтавлений. +## SQL безопаÑноÑÑ‚ÑŒ {#sql_security} + +Параметры `DEFINER` и `SQL SECURITY` позволÑÑŽÑ‚ задать правило от имени какого Ð¿Ð¾Ð»ÑŒÐ·Ð¾Ð²Ð°Ñ‚ÐµÐ»Ñ Ð±ÑƒÐ´ÑƒÑ‚ выполнÑÑ‚ÑŒÑÑ Ð·Ð°Ð¿Ñ€Ð¾ÑÑ‹ к таблицам, на которые ÑÑылаетÑÑ Ð¿Ñ€ÐµÐ´Ñтавление. +Ð”Ð»Ñ `SQL SECURITY` допуÑтимо три значениÑ: `DEFINER`, `INVOKER`, или `NONE`. +Ð”Ð»Ñ `DEFINER` можно указать Ð¸Ð¼Ñ Ð»ÑŽÐ±Ð¾Ð³Ð¾ ÑущеÑтвующего Ð¿Ð¾Ð»ÑŒÐ·Ð¾Ð²Ð°Ñ‚ÐµÐ»Ñ Ð¸Ð»Ð¸ же `CURRENT_USER`. + +Далее приведена таблица, объÑÑнÑÑŽÑ‰Ð°Ñ ÐºÐ°ÐºÐ¸Ðµ права необходимы каким пользователÑм при заданных параметрах SQL безопаÑноÑти. +Обратите внимание, что, в незавиÑимоÑти от заданных параметров SQL безопаÑноÑти, +у Ð¿Ð¾Ð»ÑŒÐ·Ð¾Ð²Ð°Ñ‚ÐµÐ»Ñ Ð´Ð¾Ð»Ð¶Ð½Ð¾ быть право `GRANT SELECT ON ` Ð´Ð»Ñ Ñ‡Ñ‚ÐµÐ½Ð¸Ñ Ð¸Ð· предÑтавлениÑ. + +| SQL security option | View | Materialized View | +|---------------------|----------------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------| +| `DEFINER alice` | У `alice` должно быть право `SELECT` на таблицу-иÑточник. | У `alice` должны быть права `SELECT` на таблицу-иÑточник и `INSERT` на таблицу-назначение. | +| `INVOKER` | У Ð¿Ð¾Ð»ÑŒÐ·Ð¾Ð²Ð°Ñ‚ÐµÐ»Ñ Ð²Ñ‹Ð¿Ð¾Ð»Ð½Ñющего Ð·Ð°Ð¿Ñ€Ð¾Ñ Ðº предÑтавлению должно быть право `SELECT` на таблицу-иÑточник. | Тип `SQL SECURITY INVOKER` не может быть указан Ð´Ð»Ñ Ð¼Ð°Ñ‚ÐµÑ€Ð¸Ð°Ð»Ð¸Ð·Ð¾Ð²Ð°Ð½Ð½Ñ‹Ñ… предÑтавлений. | +| `NONE` | - | - | + +:::note +Тип `SQL SECURITY NONE` не безопаÑен Ð´Ð»Ñ Ð¸ÑпользованиÑ. Любой пользователь Ñ Ð¿Ñ€Ð°Ð²Ð¾Ð¼ Ñоздавать предÑÑ‚Ð°Ð²Ð»ÐµÐ½Ð¸Ñ Ñ `SQL SECURITY NONE` Ñможет иÑполнÑÑ‚ÑŒ любые запроÑÑ‹ без проверки прав. +По умолчанию, у пользователей нет прав указывать `SQL SECURITY NONE`, однако, при необходимоÑти, Ñто право можно выдать Ñ Ð¿Ð¾Ð¼Ð¾Ñ‰ÑŒÑŽ `GRANT ALLOW SQL SECURITY NONE TO `. +::: + +ЕÑли `DEFINER`/`SQL SECURITY` не указан, будут иÑпользованы Ð·Ð½Ð°Ñ‡ÐµÐ½Ð¸Ñ Ð¿Ð¾ умолчанию: +- `SQL SECURITY`: `INVOKER` Ð´Ð»Ñ Ð¾Ð±Ñ‹Ñ‡Ð½Ñ‹Ñ… предÑтавлений и `DEFINER` Ð´Ð»Ñ Ð¼Ð°Ñ‚ÐµÑ€Ð¸Ð°Ð»Ð¸Ð·Ð¾Ð²Ð°Ð½Ð½Ñ‹Ñ… ([изменÑетÑÑ Ð² наÑтройках](../../../operations/settings/settings.md#default_normal_view_sql_security)) +- `DEFINER`: `CURRENT_USER` ([изменÑетÑÑ Ð² наÑтройках](../../../operations/settings/settings.md#default_view_definer)) + +ЕÑли предÑтавление подключаетÑÑ Ñ Ð¿Ð¾Ð¼Ð¾Ñ‰ÑŒÑŽ ключевого Ñлова `ATTACH` и наÑтройки SQL безопаÑноÑти не были заданы, +то по умолчанию будет иÑпользоватьÑÑ `SQL SECURITY NONE` Ð´Ð»Ñ Ð¼Ð°Ñ‚ÐµÑ€Ð¸Ð°Ð»Ð¸Ð·Ð¾Ð²Ð°Ð½Ð½Ñ‹Ñ… предÑтавлений и `SQL SECURITY INVOKER` Ð´Ð»Ñ Ð¾Ð±Ñ‹Ñ‡Ð½Ñ‹Ñ…. + +Изменить параметры SQL безопаÑноÑти возможно Ñ Ð¿Ð¾Ð¼Ð¾Ñ‰ÑŒÑŽ Ñледующего запроÑа: +```sql +ALTER TABLE MODIFY SQL SECURITY { DEFINER | INVOKER | NONE } [DEFINER = { user | CURRENT_USER }] +``` + +### Примеры предÑтавлений Ñ SQL безопаÑноÑтью +```sql +CREATE test_view +DEFINER = alice SQL SECURITY DEFINER +AS SELECT ... +``` + +```sql +CREATE test_view +SQL SECURITY INVOKER +AS SELECT ... +``` + ## LIVE-предÑÑ‚Ð°Ð²Ð»ÐµÐ½Ð¸Ñ [ÑкÑпериментальный функционал] {#live-view} :::note Важно diff --git a/docs/ru/sql-reference/statements/select/distinct.md b/docs/ru/sql-reference/statements/select/distinct.md index 58fe16b16d9..ad310434598 100644 --- a/docs/ru/sql-reference/statements/select/distinct.md +++ b/docs/ru/sql-reference/statements/select/distinct.md @@ -92,7 +92,7 @@ ClickHouse поддерживает иÑпользование Ñекций `DIS ## Обработка NULL {#null-processing} -`DISTINCT` работает Ñ [NULL](../../syntax.md#null-literal) как-будто `NULL` — обычное значение и `NULL==NULL`. Другими Ñловами, в результате `DISTINCT`, различные комбинации Ñ `NULL` вÑтретÑÑ‚ÑÑ Ñ‚Ð¾Ð»ÑŒÐºÐ¾ один раз. Это отличаетÑÑ Ð¾Ñ‚ обработки `NULL` в большинÑтве других контекÑтов. +`DISTINCT` работает Ñ [NULL](../../syntax.md#null-literal) как будто `NULL` — обычное значение и `NULL==NULL`. Другими Ñловами, в результате `DISTINCT`, различные комбинации Ñ `NULL` вÑтретÑÑ‚ÑÑ Ñ‚Ð¾Ð»ÑŒÐºÐ¾ один раз. Это отличаетÑÑ Ð¾Ñ‚ обработки `NULL` в большинÑтве других контекÑтов. ## Ðльтернативы {#alternatives} diff --git a/docs/ru/sql-reference/table-functions/cluster.md b/docs/ru/sql-reference/table-functions/cluster.md index f148a21294a..bb22b38f8f9 100644 --- a/docs/ru/sql-reference/table-functions/cluster.md +++ b/docs/ru/sql-reference/table-functions/cluster.md @@ -33,7 +33,7 @@ clusterAllReplicas('cluster_name', db, table[, sharding_key]) **ИÑпользование макроÑов** -`cluster_name` может Ñодержать Ð¼Ð°ÐºÑ€Ð¾Ñ â€” подÑтановку в фигурных Ñкобках. Эта подÑтановка заменÑетÑÑ Ð½Ð° ÑоответÑтвующее значение из Ñекции [macros](../../operations/server-configuration-parameters/settings.md#macros) конфигурационного файла . +`cluster_name` может Ñодержать Ð¼Ð°ÐºÑ€Ð¾Ñ â€” подÑтановку в фигурных Ñкобках. Эта подÑтановка заменÑетÑÑ Ð½Ð° ÑоответÑтвующее значение из Ñекции [macros](../../operations/server-configuration-parameters/settings.md#macros) конфигурационного файла. Пример: diff --git a/docs/zh/getting-started/example-datasets/opensky.mdx b/docs/zh/getting-started/example-datasets/opensky.mdx index 92cd104e06e..b79c02ab780 100644 --- a/docs/zh/getting-started/example-datasets/opensky.mdx +++ b/docs/zh/getting-started/example-datasets/opensky.mdx @@ -1,4 +1,4 @@ ---- +--- slug: /zh/getting-started/example-datasets/opensky sidebar_label: ç©ºä¸­äº¤é€šæ•°æ® description: 该数æ®é›†ä¸­çš„æ•°æ®æ˜¯ä»Žå®Œæ•´çš„ OpenSky æ•°æ®é›†ä¸­è¡ç”Ÿè€Œæ¥çš„,对其中的数æ®è¿›è¡Œäº†å¿…è¦çš„清ç†ï¼Œç”¨ä»¥å±•ç¤ºåœ¨ COVID-19 期间空中交通的å‘展。 @@ -53,12 +53,12 @@ CREATE TABLE opensky ls -1 flightlist_*.csv.gz | xargs -P100 -I{} bash -c 'gzip -c -d "{}" | clickhouse-client --date_time_input_format best_effort --query "INSERT INTO opensky FORMAT CSVWithNames"' ``` -- 这里我们将文件列表(`ls -1 flightlist_*.csv.gz`)传递给`xargs`以进行并行处ç†ã€‚ `xargs -P100` 指定最多使用 100 个并行工作程åºï¼Œä½†ç”±äºŽæˆ‘们åªæœ‰ 30 个文件,工作程åºçš„æ•°é‡å°†åªæœ‰ 30 个。 -- 对于æ¯ä¸ªæ–‡ä»¶ï¼Œ`xargs` 将通过 `bash -c` 为æ¯ä¸ªæ–‡ä»¶è¿è¡Œä¸€ä¸ªè„šæœ¬æ–‡ä»¶ã€‚该脚本通过使用 `{}` 表示文件åå ä½ç¬¦ï¼Œç„¶åŽ `xargs` 由命令进行填充(使用 `-I{}`)。 -- 该脚本会将文件 (`gzip -c -d "{}"`) 解压缩到标准输出(`-c` å‚数),并将输出é‡å®šå‘到 `clickhouse-client`。 -- 我们还è¦æ±‚使用扩展解æžå™¨è§£æž [DateTime](../../sql-reference/data-types/datetime.md) 字段 ([--date_time_input_format best_effort](../../operations/settings/ settings.md#settings-date_time_input_format)) 以识别具有时区å移的 ISO-8601 æ ¼å¼ã€‚ +- 这里我们将文件列表(`ls -1 flightlist_*.csv.gz`)传递给`xargs`以进行并行处ç†ã€‚ `xargs -P100` 指定最多使用 100 个并行工作程åºï¼Œä½†ç”±äºŽæˆ‘们åªæœ‰ 30 个文件,工作程åºçš„æ•°é‡å°†åªæœ‰ 30 个。 +- 对于æ¯ä¸ªæ–‡ä»¶ï¼Œ`xargs` 将通过 `bash -c` 为æ¯ä¸ªæ–‡ä»¶è¿è¡Œä¸€ä¸ªè„šæœ¬æ–‡ä»¶ã€‚该脚本通过使用 `{}` 表示文件åå ä½ç¬¦ï¼Œç„¶åŽ `xargs` 由命令进行填充(使用 `-I{}`)。 +- 该脚本会将文件 (`gzip -c -d "{}"`) 解压缩到标准输出(`-c` å‚数),并将输出é‡å®šå‘到 `clickhouse-client`。 +- 我们还è¦æ±‚使用扩展解æžå™¨è§£æž [DateTime](/docs/zh/sql-reference/data-types/datetime.md) 字段 ([--date_time_input_format best_effort](/docs/zh/operations/settings/settings.md#settings-date_time_input_format)) 以识别具有时区å移的 ISO-8601 æ ¼å¼ã€‚ -最åŽï¼Œ`clickhouse-client` 会以 [CSVWithNames](../../interfaces/formats.md#csvwithnames) æ ¼å¼è¯»å–输入数æ®ç„¶åŽæ‰§è¡Œæ’入。 +最åŽï¼Œ`clickhouse-client` 会以 [CSVWithNames](/docs/zh/interfaces/formats.md#csvwithnames) æ ¼å¼è¯»å–输入数æ®ç„¶åŽæ‰§è¡Œæ’入。 å¹¶è¡Œå¯¼å…¥éœ€è¦ 24 秒。 diff --git a/docs/zh/getting-started/tutorial.md b/docs/zh/getting-started/tutorial.md index 989cf5f57d8..d0c9bda83ef 100644 --- a/docs/zh/getting-started/tutorial.md +++ b/docs/zh/getting-started/tutorial.md @@ -582,8 +582,6 @@ ENGINE = Distributed(perftest_3shards_1replicas, tutorial, hits_local, rand()); INSERT INTO tutorial.hits_all SELECT * FROM tutorial.hits_v1; ``` -!!! warning "注æ„:" - è¿™ç§æ–¹æ³•ä¸é€‚åˆå¤§åž‹è¡¨çš„分片。 有一个å•ç‹¬çš„工具 [clickhouse-copier](../operations/utilities/clickhouse-copier.md) è¿™å¯ä»¥é‡æ–°åˆ†ç‰‡ä»»æ„大表。 正如您所期望的那样,如果计算é‡å¤§çš„查询使用3å°æœåŠ¡å™¨è€Œä¸æ˜¯ä¸€ä¸ªï¼Œåˆ™è¿è¡Œé€Ÿåº¦å¿«Nå€ã€‚ diff --git a/docs/zh/interfaces/third-party/gui.md b/docs/zh/interfaces/third-party/gui.md index 9dd32efc970..6cf1b99b640 100644 --- a/docs/zh/interfaces/third-party/gui.md +++ b/docs/zh/interfaces/third-party/gui.md @@ -129,3 +129,18 @@ ClickHouse Web ç•Œé¢ [Tabix](https://github.com/tabixio/tabix). - æ•°æ®ç¼–辑器。 - é‡æž„。 - æœç´¢å’Œå¯¼èˆªã€‚ + +### CKMAN {#ckman} + +[CKMAN](https://www.github.com/housepower/ckman) 是一个用于管ç†å’Œç›‘控ClickHouse集群的å¯è§†åŒ–å·¥å…·ï¼ + +特å¾ï¼š + +- éžå¸¸å¿«é€Ÿä¾¿æ·çš„通过æµè§ˆå™¨ç•Œé¢è‡ªåŠ¨åŒ–部署集群 +- 支æŒå¯¹é›†ç¾¤è¿›è¡Œæ‰©ç¼©å®¹æ“作 +- 对集群的数æ®è¿›è¡Œè´Ÿè½½å‡è¡¡ +- 对集群进行在线å‡çº§ +- 通过界é¢ä¿®æ”¹é›†ç¾¤é…ç½® +- æ供集群节点监控,zookeeper监控 +- 监控表ã€åˆ†åŒºçŠ¶æ€ï¼Œæ…¢SQL监控 +- æ供简å•æ˜“æ“作的SQLæ‰§è¡Œé¡µé¢ diff --git a/docs/zh/operations/backup.md b/docs/zh/operations/backup.md index 6d491f9c2f7..48e852b4228 100644 --- a/docs/zh/operations/backup.md +++ b/docs/zh/operations/backup.md @@ -24,12 +24,6 @@ sidebar_label: "\u6570\u636E\u5907\u4EFD" æŸäº›æœ¬åœ°æ–‡ä»¶ç³»ç»Ÿæ供快照功能(例如, [ZFS](https://en.wikipedia.org/wiki/ZFS)),但它们å¯èƒ½ä¸æ˜¯æ供实时查询的最佳选择。 一个å¯èƒ½çš„解决方案是使用这ç§æ–‡ä»¶ç³»ç»Ÿåˆ›å»ºé¢å¤–的副本,并将它们与用于`SELECT` 查询的 [分布å¼](../engines/table-engines/special/distributed.md) 表分离。 任何修改数æ®çš„查询都无法访问此类副本上的快照。 作为回报,这些副本å¯èƒ½å…·æœ‰ç‰¹æ®Šçš„硬件é…置,æ¯ä¸ªæœåŠ¡å™¨é™„加更多的ç£ç›˜ï¼Œè¿™å°†æ˜¯ç»æµŽé«˜æ•ˆçš„。 -## clickhouse-copier {#clickhouse-copier} - -[clickhouse-copier](utilities/clickhouse-copier.md) 是一个多功能工具,最åˆåˆ›å»ºå®ƒæ˜¯ä¸ºäº†ç”¨äºŽé‡æ–°åˆ‡åˆ†pb大å°çš„表。 因为它能够在ClickHouse表和集群之间å¯é åœ°å¤åˆ¶æ•°æ®ï¼Œæ‰€ä»¥å®ƒä¹Ÿå¯ç”¨äºŽå¤‡ä»½å’Œè¿˜åŽŸæ•°æ®ã€‚ - -对于较å°çš„æ•°æ®é‡ï¼Œä¸€ä¸ªç®€å•çš„ `INSERT INTO ... SELECT ...` 到远程表也å¯ä»¥å·¥ä½œã€‚ - ## partæ“作 {#manipulations-with-parts} ClickHouseå…许使用 `ALTER TABLE ... FREEZE PARTITION ...` 查询以创建表分区的本地副本。 这是利用硬链接(hardlink)到 `/var/lib/clickhouse/shadow/` 文件夹中实现的,所以它通常ä¸ä¼šå› ä¸ºæ—§æ•°æ®è€Œå ç”¨é¢å¤–çš„ç£ç›˜ç©ºé—´ã€‚ 创建的文件副本ä¸ç”±ClickHouseæœåŠ¡å™¨å¤„ç†ï¼Œæ‰€ä»¥ä½ å¯ä»¥æŠŠå®ƒä»¬ç•™åœ¨é‚£é‡Œï¼šä½ å°†æœ‰ä¸€ä¸ªç®€å•çš„备份,ä¸éœ€è¦ä»»ä½•é¢å¤–的外部系统,但它ä»ç„¶å®¹æ˜“出现硬件问题。 出于这个原因,最好将它们远程å¤åˆ¶åˆ°å¦ä¸€ä¸ªä½ç½®ï¼Œç„¶åŽåˆ é™¤æœ¬åœ°å‰¯æœ¬ã€‚ 分布å¼æ–‡ä»¶ç³»ç»Ÿå’Œå¯¹è±¡å­˜å‚¨ä»ç„¶æ˜¯ä¸€ä¸ªä¸é”™çš„选择,但是具有足够大容é‡çš„正常附加文件æœåŠ¡å™¨ä¹Ÿå¯ä»¥å·¥ä½œï¼ˆåœ¨è¿™ç§æƒ…况下,传输将通过网络文件系统或者也许是 [rsync](https://en.wikipedia.org/wiki/Rsync) æ¥è¿›è¡Œ). diff --git a/docs/zh/operations/clickhouse-keeper.md b/docs/zh/operations/clickhouse-keeper.md index 6d8a570aa12..e4412be2e30 100644 --- a/docs/zh/operations/clickhouse-keeper.md +++ b/docs/zh/operations/clickhouse-keeper.md @@ -45,6 +45,7 @@ ClickHouse Keeper 完全å¯ä»¥ä½œä¸ºZooKeeper的独立替代å“或者作为Click - `heart_beat_interval_ms` — ClickHouse Keeperçš„leaderå‘é€å¿ƒè·³é¢‘率(毫秒)(默认为500)。 - `election_timeout_lower_bound_ms` — 如果follower在此间隔内没有收到leader的心跳,那么它å¯ä»¥å¯åŠ¨leader选举(默认为1000). - `election_timeout_upper_bound_ms` — 如果follower在此间隔内没有收到leader的心跳,那么它必须å¯åŠ¨leader选举(默认为2000)。 +- `leadership_expiry_ms` — 如果leader在此间隔内没有收到足够的follower回å¤ï¼Œé‚£ä¹ˆä»–会主动放弃领导æƒã€‚当被设置为0时会自动设置为`heart_beat_interval_ms`çš„20å€ï¼Œå½“被设置å°äºŽ0æ—¶leaderä¸ä¼šä¸»åŠ¨æ”¾å¼ƒé¢†å¯¼æƒï¼ˆé»˜è®¤ä¸º0)。 - `rotate_log_storage_interval` — å•ä¸ªæ–‡ä»¶ä¸­å­˜å‚¨çš„日志记录数é‡(默认100000æ¡)。 - `reserved_log_items` — 在压缩之å‰éœ€è¦å­˜å‚¨å¤šå°‘å调日志记录(默认100000)。 - `snapshot_distance` — ClickHouse Keeper创建新快照的频率(以日志记录的数é‡ä¸ºå•ä½)(默认100000)。 @@ -214,6 +215,7 @@ dead_session_check_period_ms=500 heart_beat_interval_ms=500 election_timeout_lower_bound_ms=1000 election_timeout_upper_bound_ms=2000 +leadership_expiry_ms=0 reserved_log_items=1000000000000000 snapshot_distance=10000 auto_forwarding=true diff --git a/docs/zh/operations/settings/settings.md b/docs/zh/operations/settings/settings.md index 1874970ac95..c3b4194ed44 100644 --- a/docs/zh/operations/settings/settings.md +++ b/docs/zh/operations/settings/settings.md @@ -649,11 +649,22 @@ log_query_threads=1 ## max_query_size {#settings-max_query_size} -查询的最大部分,å¯ä»¥è¢«å¸¦åˆ°RAM用于使用SQL解æžå™¨è¿›è¡Œè§£æžã€‚ -æ’入查询还包å«ç”±å•ç‹¬çš„æµè§£æžå™¨ï¼ˆæ¶ˆè€—O(1)RAM)处ç†çš„æ’入数æ®ï¼Œè¿™äº›æ•°æ®ä¸åŒ…å«åœ¨æ­¤é™åˆ¶ä¸­ã€‚ +SQL 解æžå™¨è§£æžçš„查询字符串的最大字节数。 INSERT 查询的 VALUES å­å¥ä¸­çš„æ•°æ®ç”±å•ç‹¬çš„æµè§£æžå™¨ï¼ˆæ¶ˆè€— O(1) RAM)处ç†ï¼Œå¹¶ä¸”ä¸å—æ­¤é™åˆ¶çš„å½±å“。 默认值:256KiB。 + +## max_parser_depth {#max_parser_depth} + +é™åˆ¶é€’归下é™è§£æžå™¨ä¸­çš„最大递归深度。å…许控制堆栈大å°ã€‚ + +å¯èƒ½çš„值: + +- 正整数。 +- 0 — 递归深度ä¸å—é™åˆ¶ã€‚ + +默认值:1000。 + ## interactive_delay {#interactive-delay} 以微秒为å•ä½çš„间隔,用于检查请求执行是å¦å·²è¢«å–消并å‘é€è¿›åº¦ã€‚ @@ -1064,6 +1075,28 @@ ClickHouse生æˆå¼‚常 默认值:0。 +## optimize_functions_to_subcolumns {#optimize_functions_to_subcolumns} + +å¯ç”¨æˆ–ç¦ç”¨é€šè¿‡å°†æŸäº›å‡½æ•°è½¬æ¢ä¸ºè¯»å–å­åˆ—的优化。这å‡å°‘了è¦è¯»å–çš„æ•°æ®é‡ã€‚ + +这些函数å¯ä»¥è½¬åŒ–为: + +- [length](../../sql-reference/functions/array-functions.md/#array_functions-length) è¯»å– [size0](../../sql-reference/data-types/array.md/#array-size)å­åˆ—。 +- [empty](../../sql-reference/functions/array-functions.md/#empty函数) è¯»å– [size0](../../sql-reference/data-types/array.md/#array-size)å­åˆ—。 +- [notEmpty](../../sql-reference/functions/array-functions.md/#notempty函数) è¯»å– [size0](../../sql-reference/data-types/array.md/#array-size)å­åˆ—。 +- [isNull](../../sql-reference/operators/index.md#operator-is-null) è¯»å– [null](../../sql-reference/data-types/nullable. md/#finding-null) å­åˆ—。 +- [isNotNull](../../sql-reference/operators/index.md#is-not-null) è¯»å– [null](../../sql-reference/data-types/nullable. md/#finding-null) å­åˆ—。 +- [count](../../sql-reference/aggregate-functions/reference/count.md) è¯»å– [null](../../sql-reference/data-types/nullable.md/#finding-null) å­åˆ—。 +- [mapKeys](../../sql-reference/functions/tuple-map-functions.mdx/#mapkeys) è¯»å– [keys](../../sql-reference/data-types/map.md/#map-subcolumns) å­åˆ—。 +- [mapValues](../../sql-reference/functions/tuple-map-functions.mdx/#mapvalues) è¯»å– [values](../../sql-reference/data-types/map.md/#map-subcolumns) å­åˆ—。 + +å¯èƒ½çš„值: + +- 0 — ç¦ç”¨ä¼˜åŒ–。 +- 1 — 优化已å¯ç”¨ã€‚ + +默认值:`0`。 + ## distributed_replica_error_half_life {#settings-distributed_replica_error_half_life} - 类型:秒 diff --git a/docs/zh/operations/system-tables/dictionaries.md b/docs/zh/operations/system-tables/dictionaries.md index 105a591cf69..0cf91e45e86 100644 --- a/docs/zh/operations/system-tables/dictionaries.md +++ b/docs/zh/operations/system-tables/dictionaries.md @@ -20,7 +20,7 @@ machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3 - `LOADED_AND_RELOADING` — Dictionary is loaded successfully, and is being reloaded right now (frequent reasons: [SYSTEM RELOAD DICTIONARY](../../sql-reference/statements/system.md#query_language-system-reload-dictionary) 查询,超时,字典é…置已更改)。 - `FAILED_AND_RELOADING` — Could not load the dictionary as a result of an error and is loading now. - `origin` ([字符串](../../sql-reference/data-types/string.md)) — Path to the configuration file that describes the dictionary. -- `type` ([字符串](../../sql-reference/data-types/string.md)) — Type of a dictionary allocation. [在内存中存储字典](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md). +- `type` ([字符串](../../sql-reference/data-types/string.md)) — Type of dictionary allocation. [在内存中存储字典](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md). - `key` — [密钥类型](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md#ext_dict_structure-key):æ•°å­—é”® ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) or Сomposite key ([字符串](../../sql-reference/data-types/string.md)) — form “(type 1, type 2, …, type n)â€. - `attribute.names` ([阵列](../../sql-reference/data-types/array.md)([字符串](../../sql-reference/data-types/string.md))) — Array of [属性å称](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md#ext_dict_structure-attributes) 由字典æ供。 - `attribute.types` ([阵列](../../sql-reference/data-types/array.md)([字符串](../../sql-reference/data-types/string.md))) — Corresponding array of [属性类型](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md#ext_dict_structure-attributes) 这是由字典æ供。 diff --git a/docs/zh/operations/utilities/clickhouse-copier.md b/docs/zh/operations/utilities/clickhouse-copier.md deleted file mode 100644 index b01edd9257c..00000000000 --- a/docs/zh/operations/utilities/clickhouse-copier.md +++ /dev/null @@ -1,172 +0,0 @@ ---- -slug: /zh/operations/utilities/clickhouse-copier ---- -# clickhouse-copier {#clickhouse-copier} - -将数æ®ä»Žä¸€ä¸ªç¾¤é›†ä¸­çš„表å¤åˆ¶åˆ°å¦ä¸€ä¸ªï¼ˆæˆ–相åŒï¼‰ç¾¤é›†ä¸­çš„表。 - -您å¯ä»¥è¿è¡Œå¤šä¸ª `clickhouse-copier` ä¸åŒæœåŠ¡å™¨ä¸Šçš„实例执行相åŒçš„作业。 ZooKeeper用于åŒæ­¥è¿›ç¨‹ã€‚ - -开始åŽ, `clickhouse-copier`: - -- 连接到ZooKeeper并且接收: - - - å¤åˆ¶ä½œä¸šã€‚ - - å¤åˆ¶ä½œä¸šçš„状æ€ã€‚ - -- 它执行的工作。 - - æ¯ä¸ªæ­£åœ¨è¿è¡Œçš„进程都会选择æºé›†ç¾¤çš„“最接近â€åˆ†ç‰‡ï¼Œç„¶åŽå°†æ•°æ®å¤åˆ¶åˆ°ç›®æ ‡é›†ç¾¤ï¼Œå¹¶åœ¨å¿…è¦æ—¶é‡æ–°åˆ†ç‰‡æ•°æ®ã€‚ - -`clickhouse-copier` 跟踪ZooKeeper中的更改,并实时应用它们。 - -为了å‡å°‘网络æµé‡ï¼Œæˆ‘们建议è¿è¡Œ `clickhouse-copier` 在æºæ•°æ®æ‰€åœ¨çš„åŒä¸€æœåŠ¡å™¨ä¸Šã€‚ - -## è¿è¡ŒClickhouse-copier {#running-clickhouse-copier} - -该实用程åºåº”手动è¿è¡Œ: - -``` bash -clickhouse-copier --daemon --config zookeeper.xml --task-path /task/path --base-dir /path/to/dir -``` - -å‚æ•°: - -- `daemon` — 在守护进程模å¼ä¸‹å¯åŠ¨`clickhouse-copier`。 -- `config` — `zookeeper.xml`文件的路径,其中包å«ç”¨äºŽè¿žæŽ¥ZooKeeperçš„å‚数。 -- `task-path` — ZooKeeper节点的路径。 该节点用于åŒæ­¥`clickhouse-copier`进程和存储任务。 任务存储在`$task-path/description`中。 -- `task-file` — å¯é€‰çš„éžå¿…é¡»å‚æ•°, 指定一个包å«ä»»åŠ¡é…置的å‚数文件, 用于åˆå§‹ä¸Šä¼ åˆ°ZooKeeper。 -- `task-upload-force` — å³ä½¿èŠ‚点已ç»å­˜åœ¨ï¼Œä¹Ÿå¼ºåˆ¶ä¸Šè½½`task-file`。 -- `base-dir` — 日志和辅助文件的路径。 å¯åŠ¨æ—¶ï¼Œ`clickhouse-copier`在`$base-dir`中创建`clickhouse-copier_YYYYMMHHSS_`å­ç›®å½•ã€‚ 如果çœç•¥æ­¤å‚数,则会在å¯åŠ¨`clickhouse-copier`的目录中创建目录。 - - - -## Zookeeper.xmlæ ¼å¼ {#format-of-zookeeper-xml} - -``` xml - - - trace - 100M - 3 - - - - - 127.0.0.1 - 2181 - - - -``` - -## å¤åˆ¶ä»»åŠ¡çš„é…ç½® {#configuration-of-copying-tasks} - -``` xml - - - - - - false - - 127.0.0.1 - 9000 - - - ... - - - - ... - - - - - 2 - - - - 1 - - - - - 0 - - - - - 3 - - 1 - - - - - - - - source_cluster - test - hits - - - destination_cluster - test - hits2 - - - - ENGINE=ReplicatedMergeTree('/clickhouse/tables/{cluster}/{shard}/hits2', '{replica}') - PARTITION BY toMonday(date) - ORDER BY (CounterID, EventDate) - - - - jumpConsistentHash(intHash64(UserID), 2) - - - CounterID != 0 - - - - '2018-02-26' - '2018-03-05' - ... - - - - - - ... - - ... - - -``` - -`clickhouse-copier` 跟踪更改 `/task/path/description` 并在飞行中应用它们。 例如,如果你改å˜çš„值 `max_workers`,è¿è¡Œä»»åŠ¡çš„进程数也会å‘生å˜åŒ–。 diff --git a/docs/zh/operations/utilities/index.md b/docs/zh/operations/utilities/index.md index af158baf275..cebe312450c 100644 --- a/docs/zh/operations/utilities/index.md +++ b/docs/zh/operations/utilities/index.md @@ -4,5 +4,4 @@ slug: /zh/operations/utilities/ # 实用工具 {#clickhouse-utility} - [本地查询](clickhouse-local.md) — 在ä¸åœæ­¢ClickHouseæœåŠ¡çš„情况下,对数æ®æ‰§è¡ŒæŸ¥è¯¢æ“作(类似于 `awk` 命令)。 -- [跨集群å¤åˆ¶](clickhouse-copier.md) — 在ä¸åŒé›†ç¾¤é—´å¤åˆ¶æ•°æ®ã€‚ - [性能测试](clickhouse-benchmark.md) — 连接到ClickhouseæœåŠ¡å™¨ï¼Œæ‰§è¡Œæ€§èƒ½æµ‹è¯•ã€‚ diff --git a/docs/zh/sql-reference/data-types/array.md b/docs/zh/sql-reference/data-types/array.md index 46c40b889ad..da4cea65101 100644 --- a/docs/zh/sql-reference/data-types/array.md +++ b/docs/zh/sql-reference/data-types/array.md @@ -1,7 +1,7 @@ --- slug: /zh/sql-reference/data-types/array --- -# 阵列(T) {#data-type-array} +# 数组(T) {#data-type-array} ç”± `T` 类型元素组æˆçš„数组。 @@ -66,3 +66,27 @@ SELECT array(1, 'a') Received exception from server (version 1.1.54388): Code: 386. DB::Exception: Received from localhost:9000, 127.0.0.1. DB::Exception: There is no supertype for types UInt8, String because some of them are String/FixedString and some of them are not. ``` + +## æ•°ç»„å¤§å° {#array-size} + +å¯ä»¥ä½¿ç”¨ `size0` å­åˆ—找到数组的大å°ï¼Œè€Œæ— éœ€è¯»å–整个列。对于多维数组,您å¯ä»¥ä½¿ç”¨ `sizeN-1`,其中 `N` 是所需的维度。 + +**例å­** + +SQL查询: + +```sql +CREATE TABLE t_arr (`arr` Array(Array(Array(UInt32)))) ENGINE = MergeTree ORDER BY tuple(); + +INSERT INTO t_arr VALUES ([[[12, 13, 0, 1],[12]]]); + +SELECT arr.size0, arr.size1, arr.size2 FROM t_arr; +``` + +结果: + +``` text +┌─arr.size0─┬─arr.size1─┬─arr.size2─┠+│ 1 │ [2] │ [[4,1]] │ +└───────────┴───────────┴───────────┘ +``` diff --git a/docs/zh/sql-reference/data-types/nullable.md b/docs/zh/sql-reference/data-types/nullable.md index 94311f8298a..b1cc9dd7bae 100644 --- a/docs/zh/sql-reference/data-types/nullable.md +++ b/docs/zh/sql-reference/data-types/nullable.md @@ -20,6 +20,33 @@ slug: /zh/sql-reference/data-types/nullable 掩ç æ–‡ä»¶ä¸­çš„æ¡ç›®å…许ClickHouse区分æ¯ä¸ªè¡¨è¡Œçš„对应数æ®ç±»åž‹çš„«NULL»和默认值由于有é¢å¤–的文件,«Nullable»列比普通列消耗更多的存储空间 +## nullå­åˆ— {#finding-null} + +通过使用 `null` å­åˆ—å¯ä»¥åœ¨åˆ—中查找 `NULL` 值,而无需读å–整个列。如果对应的值为 `NULL`,则返回 `1`,å¦åˆ™è¿”回 `0`。 + +**示例** + +SQL查询: + +``` sql +CREATE TABLE nullable (`n` Nullable(UInt32)) ENGINE = MergeTree ORDER BY tuple(); + +INSERT INTO nullable VALUES (1) (NULL) (2) (NULL); + +SELECT n.null FROM nullable; +``` + +结果: + +``` text +┌─n.null─┠+│ 0 │ +│ 1 │ +│ 0 │ +│ 1 │ +└────────┘ +``` + ## 用法示例 {#yong-fa-shi-li} ``` sql diff --git a/docs/zh/sql-reference/statements/alter.md b/docs/zh/sql-reference/statements/alter.md index 002d5102fa3..48665ae04ab 100644 --- a/docs/zh/sql-reference/statements/alter.md +++ b/docs/zh/sql-reference/statements/alter.md @@ -150,7 +150,7 @@ ALTER TABLE visits MODIFY COLUMN browser Array(String) ä¸æ”¯æŒå¯¹primary key或者sampling key中的列(在 `ENGINE` 表达å¼ä¸­ç”¨åˆ°çš„列)进行删除æ“作。改å˜åŒ…å«åœ¨primary key中的列的类型时,如果æ“作ä¸ä¼šå¯¼è‡´æ•°æ®çš„å˜åŒ–(例如,往Enum中添加一个值,或者将`DateTime` ç±»åž‹æ”¹æˆ `UInt32`),那么这ç§æ“作是å¯è¡Œçš„。 -如果 `ALTER` æ“作ä¸è¶³ä»¥å®Œæˆä½ æƒ³è¦çš„表å˜åŠ¨æ“作,你å¯ä»¥åˆ›å»ºä¸€å¼ æ–°çš„表,通过 [INSERT SELECT](../../sql-reference/statements/insert-into.md#inserting-the-results-of-select)将数æ®æ‹·è´è¿›åŽ»ï¼Œç„¶åŽé€šè¿‡ [RENAME](../../sql-reference/statements/misc.md#misc_operations-rename)将新的表改æˆå’ŒåŽŸæœ‰è¡¨ä¸€æ ·çš„å称,并删除原有的表。你å¯ä»¥ä½¿ç”¨ [clickhouse-copier](../../operations/utilities/clickhouse-copier.md) 代替 `INSERT SELECT`。 +如果 `ALTER` æ“作ä¸è¶³ä»¥å®Œæˆä½ æƒ³è¦çš„表å˜åŠ¨æ“作,你å¯ä»¥åˆ›å»ºä¸€å¼ æ–°çš„表,通过 [INSERT SELECT](../../sql-reference/statements/insert-into.md#inserting-the-results-of-select)将数æ®æ‹·è´è¿›åŽ»ï¼Œç„¶åŽé€šè¿‡ [RENAME](../../sql-reference/statements/misc.md#misc_operations-rename)将新的表改æˆå’ŒåŽŸæœ‰è¡¨ä¸€æ ·çš„å称,并删除原有的表。 `ALTER` æ“作会阻塞对表的所有读写æ“作。æ¢å¥è¯è¯´ï¼Œå½“一个大的 `SELECT` 语å¥å’Œ `ALTER`åŒæ—¶æ‰§è¡Œæ—¶ï¼Œ`ALTER`会等待,直到 `SELECT` 执行结æŸã€‚与此åŒæ—¶ï¼Œå½“ `ALTER` è¿è¡Œæ—¶ï¼Œæ–°çš„ sql 语å¥å°†ä¼šç­‰å¾…。 diff --git a/docs/zh/sql-reference/statements/grant.md b/docs/zh/sql-reference/statements/grant.md index 7e7cdbff350..fea51d590d5 100644 --- a/docs/zh/sql-reference/statements/grant.md +++ b/docs/zh/sql-reference/statements/grant.md @@ -280,9 +280,6 @@ GRANT INSERT(x,y) ON db.table TO john - `ALTER MOVE PARTITION`. 级别: `TABLE`. 别å: `ALTER MOVE PART`, `MOVE PARTITION`, `MOVE PART` - `ALTER FETCH PARTITION`. 级别: `TABLE`. 别å: `FETCH PARTITION` - `ALTER FREEZE PARTITION`. 级别: `TABLE`. 别å: `FREEZE PARTITION` - - `ALTER VIEW` 级别: `GROUP` - - `ALTER VIEW REFRESH`. 级别: `VIEW`. 别å: `ALTER LIVE VIEW REFRESH`, `REFRESH VIEW` - - `ALTER VIEW MODIFY QUERY`. 级别: `VIEW`. 别å: `ALTER TABLE MODIFY QUERY` 如何对待该层级的示例: - `ALTER` æƒé™åŒ…å«æ‰€æœ‰å…¶å®ƒ `ALTER *` çš„æƒé™ diff --git a/packages/clickhouse-server.yaml b/packages/clickhouse-server.yaml index 7894129b8e3..dc183ead102 100644 --- a/packages/clickhouse-server.yaml +++ b/packages/clickhouse-server.yaml @@ -50,8 +50,6 @@ contents: dst: /etc/init.d/clickhouse-server - src: clickhouse-server.service dst: /lib/systemd/system/clickhouse-server.service -- src: root/usr/bin/clickhouse-copier - dst: /usr/bin/clickhouse-copier - src: root/usr/bin/clickhouse-server dst: /usr/bin/clickhouse-server # clickhouse-keeper part diff --git a/programs/CMakeLists.txt b/programs/CMakeLists.txt index 62bcf068879..d945fdf4a6f 100644 --- a/programs/CMakeLists.txt +++ b/programs/CMakeLists.txt @@ -122,7 +122,6 @@ add_subdirectory (local) add_subdirectory (benchmark) add_subdirectory (extract-from-config) add_subdirectory (compressor) -add_subdirectory (copier) add_subdirectory (format) add_subdirectory (obfuscator) add_subdirectory (install) @@ -200,7 +199,6 @@ clickhouse_program_install(clickhouse-server server) clickhouse_program_install(clickhouse-client client chc) clickhouse_program_install(clickhouse-local local chl ch) clickhouse_program_install(clickhouse-benchmark benchmark) -clickhouse_program_install(clickhouse-copier copier) clickhouse_program_install(clickhouse-extract-from-config extract-from-config) clickhouse_program_install(clickhouse-compressor compressor) clickhouse_program_install(clickhouse-format format) diff --git a/programs/benchmark/Benchmark.cpp b/programs/benchmark/Benchmark.cpp index 961c678b936..45dadfef774 100644 --- a/programs/benchmark/Benchmark.cpp +++ b/programs/benchmark/Benchmark.cpp @@ -2,7 +2,6 @@ #include #include #include -#include #include #include #include @@ -568,10 +567,6 @@ public: } -#ifndef __clang__ -#pragma GCC optimize("-fno-var-tracking-assignments") -#endif - int mainEntryClickHouseBenchmark(int argc, char ** argv) { using namespace DB; diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index fdd262f185d..fac34003553 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -50,10 +50,7 @@ #include #include #include - -#ifndef __clang__ -#pragma GCC optimize("-fno-var-tracking-assignments") -#endif +#include namespace fs = std::filesystem; using namespace std::literals; @@ -330,6 +327,7 @@ try processConfig(); adjustSettings(); initTTYBuffer(toProgressOption(config().getString("progress", "default"))); + ASTAlterCommand::setFormatAlterCommandsWithParentheses(true); { // All that just to set DB::CurrentThread::get().getGlobalContext() @@ -845,83 +843,7 @@ bool Client::processWithFuzzing(const String & full_query) have_error = true; } - // Check that after the query is formatted, we can parse it back, - // format again and get the same result. Unfortunately, we can't - // compare the ASTs, which would be more sensitive to errors. This - // double formatting check doesn't catch all errors, e.g. we can - // format query incorrectly, but to a valid SQL that we can then - // parse and format into the same SQL. - // There are some complicated cases where we can generate the SQL - // which we can't parse: - // * first argument of lambda() replaced by fuzzer with - // something else, leading to constructs such as - // arrayMap((min(x) + 3) -> x + 1, ....) - // * internals of Enum replaced, leading to: - // Enum(equals(someFunction(y), 3)). - // And there are even the cases when we can parse the query, but - // it's logically incorrect and its formatting is a mess, such as - // when `lambda()` function gets substituted into a wrong place. - // To avoid dealing with these cases, run the check only for the - // queries we were able to successfully execute. - // Another caveat is that sometimes WITH queries are not executed, - // if they are not referenced by the main SELECT, so they can still - // have the aforementioned problems. Disable this check for such - // queries, for lack of a better solution. - // There is also a problem that fuzzer substitutes positive Int64 - // literals or Decimal literals, which are then parsed back as - // UInt64, and suddenly duplicate alias substitution starts or stops - // working (ASTWithAlias::formatImpl) or something like that. - // So we compare not even the first and second formatting of the - // query, but second and third. - // If you have to add any more workarounds to this check, just remove - // it altogether, it's not so useful. - if (ast_to_process && !have_error && !queryHasWithClause(*ast_to_process)) - { - ASTPtr ast_2; - try - { - const auto * tmp_pos = query_to_execute.c_str(); - ast_2 = parseQuery(tmp_pos, tmp_pos + query_to_execute.size(), false /* allow_multi_statements */); - } - catch (Exception & e) - { - if (e.code() != ErrorCodes::SYNTAX_ERROR && - e.code() != ErrorCodes::TOO_DEEP_RECURSION) - throw; - } - - if (ast_2) - { - const auto text_2 = ast_2->formatForErrorMessage(); - const auto * tmp_pos = text_2.c_str(); - const auto ast_3 = parseQuery(tmp_pos, tmp_pos + text_2.size(), - false /* allow_multi_statements */); - const auto text_3 = ast_3 ? ast_3->formatForErrorMessage() : ""; - - if (text_3 != text_2) - { - fmt::print(stderr, "Found error: The query formatting is broken.\n"); - - printChangedSettings(); - - fmt::print(stderr, - "Got the following (different) text after formatting the fuzzed query and parsing it back:\n'{}'\n, expected:\n'{}'\n", - text_3, text_2); - fmt::print(stderr, "In more detail:\n"); - fmt::print(stderr, "AST-1 (generated by fuzzer):\n'{}'\n", ast_to_process->dumpTree()); - fmt::print(stderr, "Text-1 (AST-1 formatted):\n'{}'\n", query_to_execute); - fmt::print(stderr, "AST-2 (Text-1 parsed):\n'{}'\n", ast_2->dumpTree()); - fmt::print(stderr, "Text-2 (AST-2 formatted):\n'{}'\n", text_2); - fmt::print(stderr, "AST-3 (Text-2 parsed):\n'{}'\n", ast_3 ? ast_3->dumpTree() : ""); - fmt::print(stderr, "Text-3 (AST-3 formatted):\n'{}'\n", text_3); - fmt::print(stderr, "Text-3 must be equal to Text-2, but it is not.\n"); - - _exit(1); - } - } - } - - // The server is still alive so we're going to continue fuzzing. + // The server is still alive, so we're going to continue fuzzing. // Determine what we're going to use as the starting AST. if (have_error) { @@ -1216,6 +1138,13 @@ void Client::processOptions(const OptionsDescription & options_description, } +static bool checkIfStdoutIsRegularFile() +{ + struct stat file_stat; + return fstat(STDOUT_FILENO, &file_stat) == 0 && S_ISREG(file_stat.st_mode); +} + + void Client::processConfig() { if (!queries.empty() && config().has("queries-file")) @@ -1252,7 +1181,14 @@ void Client::processConfig() pager = config().getString("pager", ""); is_default_format = !config().has("vertical") && !config().has("format"); - if (config().has("vertical")) + if (is_default_format && checkIfStdoutIsRegularFile()) + { + is_default_format = false; + std::optional format_from_file_name; + format_from_file_name = FormatFactory::instance().tryGetFormatFromFileDescriptor(STDOUT_FILENO); + format = format_from_file_name ? *format_from_file_name : "TabSeparated"; + } + else if (config().has("vertical")) format = config().getString("format", "Vertical"); else format = config().getString("format", is_interactive ? "PrettyCompact" : "TabSeparated"); diff --git a/programs/copier/Aliases.h b/programs/copier/Aliases.h deleted file mode 100644 index 02be3441acd..00000000000 --- a/programs/copier/Aliases.h +++ /dev/null @@ -1,15 +0,0 @@ -#pragma once - -#include - -#include - -#include - -namespace DB -{ - using ConfigurationPtr = Poco::AutoPtr; - - using DatabaseAndTableName = std::pair; - using ListOfDatabasesAndTableNames = std::vector; -} diff --git a/programs/copier/CMakeLists.txt b/programs/copier/CMakeLists.txt deleted file mode 100644 index 2c17e70bc5e..00000000000 --- a/programs/copier/CMakeLists.txt +++ /dev/null @@ -1,28 +0,0 @@ -set(CLICKHOUSE_COPIER_SOURCES - "${CMAKE_CURRENT_SOURCE_DIR}/ClusterCopierApp.cpp" - "${CMAKE_CURRENT_SOURCE_DIR}/ClusterCopier.cpp" - "${CMAKE_CURRENT_SOURCE_DIR}/Internals.cpp" - "${CMAKE_CURRENT_SOURCE_DIR}/ShardPartition.cpp" - "${CMAKE_CURRENT_SOURCE_DIR}/ShardPartitionPiece.cpp" - "${CMAKE_CURRENT_SOURCE_DIR}/StatusAccumulator.cpp" - "${CMAKE_CURRENT_SOURCE_DIR}/TaskCluster.cpp" - "${CMAKE_CURRENT_SOURCE_DIR}/TaskShard.cpp" - "${CMAKE_CURRENT_SOURCE_DIR}/TaskTable.cpp") - -set (CLICKHOUSE_COPIER_LINK - PRIVATE - clickhouse_common_zookeeper - clickhouse_common_config - clickhouse_parsers - clickhouse_functions - clickhouse_table_functions - clickhouse_aggregate_functions - string_utils - - PUBLIC - daemon -) - -set(CLICKHOUSE_COPIER_INCLUDE SYSTEM PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}) - -clickhouse_program_add(copier) diff --git a/programs/copier/ClusterCopier.cpp b/programs/copier/ClusterCopier.cpp deleted file mode 100644 index 59505d08f5c..00000000000 --- a/programs/copier/ClusterCopier.cpp +++ /dev/null @@ -1,2076 +0,0 @@ -#include "ClusterCopier.h" - -#include "Internals.h" -#include "StatusAccumulator.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace CurrentMetrics -{ - extern const Metric LocalThread; - extern const Metric LocalThreadActive; - extern const Metric LocalThreadScheduled; -} - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int NOT_IMPLEMENTED; - extern const int LOGICAL_ERROR; - extern const int UNFINISHED; - extern const int BAD_ARGUMENTS; -} - - -void ClusterCopier::init() -{ - auto zookeeper = getContext()->getZooKeeper(); - - task_description_watch_callback = [this] (const Coordination::WatchResponse & response) - { - if (response.error != Coordination::Error::ZOK) - return; - UInt64 version = ++task_description_version; - LOG_INFO(log, "Task description should be updated, local version {}", version); - }; - - task_description_path = task_zookeeper_path + "/description"; - task_cluster = std::make_unique(task_zookeeper_path, working_database_name); - - reloadTaskDescription(); - - task_cluster->loadTasks(*task_cluster_current_config); - getContext()->setClustersConfig(task_cluster_current_config, false, task_cluster->clusters_prefix); - - /// Set up shards and their priority - task_cluster->random_engine.seed(randomSeed()); - for (auto & task_table : task_cluster->table_tasks) - { - task_table.cluster_pull = getContext()->getCluster(task_table.cluster_pull_name); - task_table.cluster_push = getContext()->getCluster(task_table.cluster_push_name); - task_table.initShards(task_cluster->random_engine); - } - - LOG_INFO(log, "Will process {} table tasks", task_cluster->table_tasks.size()); - - /// Do not initialize tables, will make deferred initialization in process() - - zookeeper->createAncestors(getWorkersPathVersion() + "/"); - zookeeper->createAncestors(getWorkersPath() + "/"); - /// Init status node - zookeeper->createIfNotExists(task_zookeeper_path + "/status", "{}"); -} - -template -decltype(auto) ClusterCopier::retry(T && func, UInt64 max_tries) -{ - std::exception_ptr exception; - - if (max_tries == 0) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot perform zero retries"); - - for (UInt64 try_number = 1; try_number <= max_tries; ++try_number) - { - try - { - return func(); - } - catch (...) - { - exception = std::current_exception(); - if (try_number < max_tries) - { - tryLogCurrentException(log, "Will retry"); - std::this_thread::sleep_for(retry_delay_ms); - } - } - } - - std::rethrow_exception(exception); -} - - -void ClusterCopier::discoverShardPartitions(const ConnectionTimeouts & timeouts, const TaskShardPtr & task_shard) -{ - TaskTable & task_table = task_shard->task_table; - - LOG_INFO(log, "Discover partitions of shard {}", task_shard->getDescription()); - - auto get_partitions = [&] () { return getShardPartitions(timeouts, *task_shard); }; - auto existing_partitions_names = retry(get_partitions, 60); - Strings filtered_partitions_names; - Strings missing_partitions; - - /// Check that user specified correct partition names - auto check_partition_format = [] (const DataTypePtr & type, const String & partition_text_quoted) - { - MutableColumnPtr column_dummy = type->createColumn(); - ReadBufferFromString rb(partition_text_quoted); - - try - { - type->getDefaultSerialization()->deserializeTextQuoted(*column_dummy, rb, FormatSettings()); - } - catch (Exception & e) - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Partition {} has incorrect format. {}", partition_text_quoted, e.displayText()); - } - }; - - if (task_table.has_enabled_partitions) - { - /// Process partition in order specified by - for (const String & partition_name : task_table.enabled_partitions) - { - /// Check that user specified correct partition names - check_partition_format(task_shard->partition_key_column.type, partition_name); - - auto it = existing_partitions_names.find(partition_name); - - /// Do not process partition if it is not in enabled_partitions list - if (it == existing_partitions_names.end()) - { - missing_partitions.emplace_back(partition_name); - continue; - } - - filtered_partitions_names.emplace_back(*it); - } - - for (const String & partition_name : existing_partitions_names) - { - if (!task_table.enabled_partitions_set.contains(partition_name)) - { - LOG_INFO(log, "Partition {} will not be processed, since it is not in enabled_partitions of {}", partition_name, task_table.table_id); - } - } - } - else - { - for (const String & partition_name : existing_partitions_names) - filtered_partitions_names.emplace_back(partition_name); - } - - for (const String & partition_name : filtered_partitions_names) - { - const size_t number_of_splits = task_table.number_of_splits; - task_shard->partition_tasks.emplace(partition_name, ShardPartition(*task_shard, partition_name, number_of_splits)); - task_shard->checked_partitions.emplace(partition_name, true); - - auto shard_partition_it = task_shard->partition_tasks.find(partition_name); - PartitionPieces & shard_partition_pieces = shard_partition_it->second.pieces; - - for (size_t piece_number = 0; piece_number < number_of_splits; ++piece_number) - { - bool res = checkPresentPartitionPiecesOnCurrentShard(timeouts, *task_shard, partition_name, piece_number); - shard_partition_pieces.emplace_back(shard_partition_it->second, piece_number, res); - } - } - - if (!missing_partitions.empty()) - { - WriteBufferFromOwnString ss; - for (const String & missing_partition : missing_partitions) - ss << " " << missing_partition; - - LOG_WARNING(log, "There are no {} partitions from enabled_partitions in shard {} :{}", missing_partitions.size(), task_shard->getDescription(), ss.str()); - } - - LOG_INFO(log, "Will copy {} partitions from shard {}", task_shard->partition_tasks.size(), task_shard->getDescription()); -} - -void ClusterCopier::discoverTablePartitions(const ConnectionTimeouts & timeouts, TaskTable & task_table, UInt64 num_threads) -{ - /// Fetch partitions list from a shard - { - ThreadPool thread_pool(CurrentMetrics::LocalThread, CurrentMetrics::LocalThreadActive, CurrentMetrics::LocalThreadScheduled, num_threads ? num_threads : 2 * getNumberOfPhysicalCPUCores()); - - for (const TaskShardPtr & task_shard : task_table.all_shards) - thread_pool.scheduleOrThrowOnError([this, timeouts, task_shard]() - { - setThreadName("DiscoverPartns"); - discoverShardPartitions(timeouts, task_shard); - }); - - LOG_INFO(log, "Waiting for {} setup jobs", thread_pool.active()); - thread_pool.wait(); - } -} - -void ClusterCopier::uploadTaskDescription(const std::string & task_path, const std::string & task_file, const bool force) -{ - auto local_task_description_path = task_path + "/description"; - - String task_config_str; - { - ReadBufferFromFile in(task_file); - readStringUntilEOF(task_config_str, in); - } - if (task_config_str.empty()) - return; - - auto zookeeper = getContext()->getZooKeeper(); - - zookeeper->createAncestors(local_task_description_path); - auto code = zookeeper->tryCreate(local_task_description_path, task_config_str, zkutil::CreateMode::Persistent); - if (code != Coordination::Error::ZOK && force) - zookeeper->createOrUpdate(local_task_description_path, task_config_str, zkutil::CreateMode::Persistent); - - LOG_INFO(log, "Task description {} uploaded to {} with result {} ({})", - ((code != Coordination::Error::ZOK && !force) ? "not " : ""), local_task_description_path, code, Coordination::errorMessage(code)); -} - -void ClusterCopier::reloadTaskDescription() -{ - auto zookeeper = getContext()->getZooKeeper(); - task_description_watch_zookeeper = zookeeper; - - Coordination::Stat stat{}; - - /// It will throw exception if such a node doesn't exist. - auto task_config_str = zookeeper->get(task_description_path, &stat); - - LOG_INFO(log, "Loading task description"); - task_cluster_current_config = getConfigurationFromXMLString(task_config_str); - - /// Setup settings - task_cluster->reloadSettings(*task_cluster_current_config); - getContext()->setSettings(task_cluster->settings_common); -} - -void ClusterCopier::updateConfigIfNeeded() -{ - UInt64 version_to_update = task_description_version; - bool is_outdated_version = task_description_current_version != version_to_update; - bool is_expired_session = !task_description_watch_zookeeper || task_description_watch_zookeeper->expired(); - - if (!is_outdated_version && !is_expired_session) - return; - - LOG_INFO(log, "Updating task description"); - reloadTaskDescription(); - - task_description_current_version = version_to_update; -} - -void ClusterCopier::process(const ConnectionTimeouts & timeouts) -{ - for (TaskTable & task_table : task_cluster->table_tasks) - { - LOG_INFO(log, "Process table task {} with {} shards, {} of them are local ones", task_table.table_id, task_table.all_shards.size(), task_table.local_shards.size()); - - if (task_table.all_shards.empty()) - continue; - - /// Discover partitions of each shard and total set of partitions - if (!task_table.has_enabled_partitions) - { - /// If there are no specified enabled_partitions, we must discover them manually - discoverTablePartitions(timeouts, task_table); - - /// After partitions of each shard are initialized, initialize cluster partitions - for (const TaskShardPtr & task_shard : task_table.all_shards) - { - for (const auto & partition_elem : task_shard->partition_tasks) - { - const String & partition_name = partition_elem.first; - task_table.cluster_partitions.emplace(partition_name, ClusterPartition{}); - } - } - - for (auto & partition_elem : task_table.cluster_partitions) - { - const String & partition_name = partition_elem.first; - - for (const TaskShardPtr & task_shard : task_table.all_shards) - task_shard->checked_partitions.emplace(partition_name); - - task_table.ordered_partition_names.emplace_back(partition_name); - } - } - else - { - /// If enabled_partitions are specified, assume that each shard has all partitions - /// We will refine partition set of each shard in future - - for (const String & partition_name : task_table.enabled_partitions) - { - task_table.cluster_partitions.emplace(partition_name, ClusterPartition{}); - task_table.ordered_partition_names.emplace_back(partition_name); - } - } - - task_table.watch.restart(); - - /// Retry table processing - bool table_is_done = false; - for (UInt64 num_table_tries = 1; num_table_tries <= max_table_tries; ++num_table_tries) - { - if (tryProcessTable(timeouts, task_table)) - { - table_is_done = true; - break; - } - } - - if (!table_is_done) - { - throw Exception(ErrorCodes::UNFINISHED, "Too many tries to process table {}. Abort remaining execution", - task_table.table_id); - } - } -} - -/// Protected section - - -/* - * Creates task worker node and checks maximum number of workers not to exceed the limit. - * To achieve this we have to check version of workers_version_path node and create current_worker_path - * node atomically. - * */ - -zkutil::EphemeralNodeHolder::Ptr ClusterCopier::createTaskWorkerNodeAndWaitIfNeed( - const zkutil::ZooKeeperPtr & zookeeper, - const String & description, - bool unprioritized) -{ - std::chrono::milliseconds current_sleep_time = retry_delay_ms; - static constexpr std::chrono::milliseconds max_sleep_time(30000); // 30 sec - - if (unprioritized) - std::this_thread::sleep_for(current_sleep_time); - - String workers_version_path = getWorkersPathVersion(); - String workers_path = getWorkersPath(); - String current_worker_path = getCurrentWorkerNodePath(); - - UInt64 num_bad_version_errors = 0; - - while (true) - { - updateConfigIfNeeded(); - - Coordination::Stat stat; - zookeeper->get(workers_version_path, &stat); - auto version = stat.version; - zookeeper->get(workers_path, &stat); - - if (static_cast(stat.numChildren) >= task_cluster->max_workers) - { - LOG_INFO(log, "Too many workers ({}, maximum {}). Postpone processing {}", stat.numChildren, task_cluster->max_workers, description); - - if (unprioritized) - current_sleep_time = std::min(max_sleep_time, current_sleep_time + retry_delay_ms); - - std::this_thread::sleep_for(current_sleep_time); - num_bad_version_errors = 0; - } - else - { - Coordination::Requests ops; - ops.emplace_back(zkutil::makeSetRequest(workers_version_path, description, version)); - ops.emplace_back(zkutil::makeCreateRequest(current_worker_path, description, zkutil::CreateMode::Ephemeral)); - Coordination::Responses responses; - auto code = zookeeper->tryMulti(ops, responses); - - if (code == Coordination::Error::ZOK || code == Coordination::Error::ZNODEEXISTS) - return zkutil::EphemeralNodeHolder::existing(current_worker_path, *zookeeper); - - if (code == Coordination::Error::ZBADVERSION) - { - ++num_bad_version_errors; - - /// Try to make fast retries - if (num_bad_version_errors > 3) - { - LOG_INFO(log, "A concurrent worker has just been added, will check free worker slots again"); - std::chrono::milliseconds random_sleep_time(std::uniform_int_distribution(1, 1000)(task_cluster->random_engine)); - std::this_thread::sleep_for(random_sleep_time); - num_bad_version_errors = 0; - } - } - else - throw Coordination::Exception(code); - } - } -} - - -bool ClusterCopier::checkPartitionPieceIsClean( - const zkutil::ZooKeeperPtr & zookeeper, - const CleanStateClock & clean_state_clock, - const String & task_status_path) -{ - LogicalClock task_start_clock; - - Coordination::Stat stat{}; - if (zookeeper->exists(task_status_path, &stat)) - task_start_clock = LogicalClock(stat.mzxid); - - return clean_state_clock.is_clean() && (!task_start_clock.hasHappened() || clean_state_clock.discovery_zxid <= task_start_clock); -} - - -bool ClusterCopier::checkAllPiecesInPartitionAreDone(const TaskTable & task_table, const String & partition_name, const TasksShard & shards_with_partition) -{ - bool answer = true; - for (size_t piece_number = 0; piece_number < task_table.number_of_splits; ++piece_number) - { - bool piece_is_done = checkPartitionPieceIsDone(task_table, partition_name, piece_number, shards_with_partition); - if (!piece_is_done) - LOG_INFO(log, "Partition {} piece {} is not already done.", partition_name, piece_number); - answer &= piece_is_done; - } - - return answer; -} - - -/* The same as function above - * Assume that we don't know on which shards do we have partition certain piece. - * We'll check them all (I mean shards that contain the whole partition) - * And shards that don't have certain piece MUST mark that piece is_done true. - * */ -bool ClusterCopier::checkPartitionPieceIsDone(const TaskTable & task_table, const String & partition_name, - size_t piece_number, const TasksShard & shards_with_partition) -{ - LOG_INFO(log, "Check that all shards processed partition {} piece {} successfully", partition_name, piece_number); - - auto zookeeper = getContext()->getZooKeeper(); - - /// Collect all shards that contain partition piece number piece_number. - Strings piece_status_paths; - for (const auto & shard : shards_with_partition) - { - ShardPartition & task_shard_partition = shard->partition_tasks.find(partition_name)->second; - ShardPartitionPiece & shard_partition_piece = task_shard_partition.pieces[piece_number]; - piece_status_paths.emplace_back(shard_partition_piece.getShardStatusPath()); - } - - std::vector zxid1, zxid2; - - try - { - std::vector get_futures; - for (const String & path : piece_status_paths) - get_futures.emplace_back(zookeeper->asyncGet(path)); - - // Check that state is Finished and remember zxid - for (auto & future : get_futures) - { - auto res = future.get(); - - TaskStateWithOwner status = TaskStateWithOwner::fromString(res.data); - if (status.state != TaskState::Finished) - { - LOG_INFO(log, "The task {} is being rewritten by {}. Partition piece will be rechecked", res.data, status.owner); - return false; - } - - zxid1.push_back(res.stat.pzxid); - } - - const String piece_is_dirty_flag_path = task_table.getCertainPartitionPieceIsDirtyPath(partition_name, piece_number); - const String piece_is_dirty_cleaned_path = task_table.getCertainPartitionPieceIsCleanedPath(partition_name, piece_number); - const String piece_task_status_path = task_table.getCertainPartitionPieceTaskStatusPath(partition_name, piece_number); - - CleanStateClock clean_state_clock (zookeeper, piece_is_dirty_flag_path, piece_is_dirty_cleaned_path); - - const bool is_clean = checkPartitionPieceIsClean(zookeeper, clean_state_clock, piece_task_status_path); - - - if (!is_clean) - { - LOG_INFO(log, "Partition {} become dirty", partition_name); - return false; - } - - get_futures.clear(); - for (const String & path : piece_status_paths) - get_futures.emplace_back(zookeeper->asyncGet(path)); - - // Remember zxid of states again - for (auto & future : get_futures) - { - auto res = future.get(); - zxid2.push_back(res.stat.pzxid); - } - } - catch (const Coordination::Exception & e) - { - LOG_INFO(log, "A ZooKeeper error occurred while checking partition {} piece number {}. Will recheck the partition. Error: {}", partition_name, toString(piece_number), e.displayText()); - return false; - } - - // If all task is finished and zxid is not changed then partition could not become dirty again - for (UInt64 shard_num = 0; shard_num < piece_status_paths.size(); ++shard_num) - { - if (zxid1[shard_num] != zxid2[shard_num]) - { - LOG_INFO(log, "The task {} is being modified now. Partition piece will be rechecked", piece_status_paths[shard_num]); - return false; - } - } - - LOG_INFO(log, "Partition {} piece number {} is copied successfully", partition_name, toString(piece_number)); - return true; -} - - -TaskStatus ClusterCopier::tryMoveAllPiecesToDestinationTable(const TaskTable & task_table, const String & partition_name) -{ - bool inject_fault = false; - if (move_fault_probability > 0) - { - double value = std::uniform_real_distribution<>(0, 1)(task_table.task_cluster.random_engine); - inject_fault = value < move_fault_probability; - } - - LOG_INFO(log, "Try to move {} to destination table", partition_name); - - auto zookeeper = getContext()->getZooKeeper(); - - const auto current_partition_attach_is_active = task_table.getPartitionAttachIsActivePath(partition_name); - const auto current_partition_attach_is_done = task_table.getPartitionAttachIsDonePath(partition_name); - - /// Create ephemeral node to mark that we are active and process the partition - zookeeper->createAncestors(current_partition_attach_is_active); - zkutil::EphemeralNodeHolderPtr partition_attach_node_holder; - try - { - partition_attach_node_holder = zkutil::EphemeralNodeHolder::create(current_partition_attach_is_active, *zookeeper, host_id); - } - catch (const Coordination::Exception & e) - { - if (e.code == Coordination::Error::ZNODEEXISTS) - { - LOG_INFO(log, "Someone is already moving pieces {}", current_partition_attach_is_active); - return TaskStatus::Active; - } - - throw; - } - - - /// Exit if task has been already processed; - /// create blocking node to signal cleaning up if it is abandoned - { - String status_data; - if (zookeeper->tryGet(current_partition_attach_is_done, status_data)) - { - TaskStateWithOwner status = TaskStateWithOwner::fromString(status_data); - if (status.state == TaskState::Finished) - { - LOG_INFO(log, "All pieces for partition from this task {} has been successfully moved to destination table by {}", current_partition_attach_is_active, status.owner); - return TaskStatus::Finished; - } - - /// Task is abandoned, because previously we created ephemeral node, possibly in other copier's process. - /// Initialize DROP PARTITION - LOG_INFO(log, "Moving piece for partition {} has not been successfully finished by {}. Will try to move by myself.", current_partition_attach_is_active, status.owner); - - /// Remove is_done marker. - zookeeper->remove(current_partition_attach_is_done); - } - } - - - /// Try start processing, create node about it - { - String start_state = TaskStateWithOwner::getData(TaskState::Started, host_id); - zookeeper->create(current_partition_attach_is_done, start_state, zkutil::CreateMode::Persistent); - } - - - /// Try to drop destination partition in original table - if (task_table.allow_to_drop_target_partitions) - { - DatabaseAndTableName original_table = task_table.table_push; - - WriteBufferFromOwnString ss; - ss << "ALTER TABLE " << getQuotedTable(original_table) << ((partition_name == "'all'") ? " DROP PARTITION ID " : " DROP PARTITION ") << partition_name; - - UInt64 num_shards_drop_partition = executeQueryOnCluster(task_table.cluster_push, ss.str(), task_cluster->settings_push, ClusterExecutionMode::ON_EACH_SHARD); - if (num_shards_drop_partition != task_table.cluster_push->getShardCount()) - return TaskStatus::Error; - - LOG_INFO(log, "Drop partition {} in original table {} have been executed successfully on {} shards of {}", - partition_name, getQuotedTable(original_table), num_shards_drop_partition, task_table.cluster_push->getShardCount()); - } - - /// Move partition to original destination table. - for (size_t current_piece_number = 0; current_piece_number < task_table.number_of_splits; ++current_piece_number) - { - LOG_INFO(log, "Trying to move partition {} piece {} to original table", partition_name, toString(current_piece_number)); - - ASTPtr query_alter_ast; - String query_alter_ast_string; - - DatabaseAndTableName original_table = task_table.table_push; - DatabaseAndTableName helping_table = DatabaseAndTableName(original_table.first, - original_table.second + "_piece_" + - toString(current_piece_number)); - - Settings settings_push = task_cluster->settings_push; - ClusterExecutionMode execution_mode = ClusterExecutionMode::ON_EACH_NODE; - - if (settings_push.alter_sync == 1) - execution_mode = ClusterExecutionMode::ON_EACH_SHARD; - - query_alter_ast_string += " ALTER TABLE " + getQuotedTable(original_table) + - ((partition_name == "'all'") ? " ATTACH PARTITION ID " : " ATTACH PARTITION ") + partition_name + - " FROM " + getQuotedTable(helping_table); - - LOG_INFO(log, "Executing ALTER query: {}", query_alter_ast_string); - - try - { - /// Try attach partition on each shard - UInt64 num_nodes = executeQueryOnCluster( - task_table.cluster_push, - query_alter_ast_string, - task_cluster->settings_push, - execution_mode); - - if (settings_push.alter_sync == 1) - { - LOG_INFO( - log, - "Destination tables {} have been executed alter query successfully on {} shards of {}", - getQuotedTable(task_table.table_push), - num_nodes, - task_table.cluster_push->getShardCount()); - - if (num_nodes != task_table.cluster_push->getShardCount()) - return TaskStatus::Error; - } - else - { - LOG_INFO(log, "Number of nodes that executed ALTER query successfully : {}", toString(num_nodes)); - } - } - catch (...) - { - LOG_INFO(log, "Error while moving partition {} piece {} to original table", partition_name, toString(current_piece_number)); - LOG_WARNING(log, "In case of non-replicated tables it can cause duplicates."); - throw; - } - - if (inject_fault) - throw Exception(ErrorCodes::UNFINISHED, "Copy fault injection is activated"); - } - - /// Create node to signal that we finished moving - /// Also increment a counter of processed partitions - { - const auto state_finished = TaskStateWithOwner::getData(TaskState::Finished, host_id); - const auto task_status = task_zookeeper_path + "/status"; - - /// Try until success - while (true) - { - Coordination::Stat stat; - auto status_json = zookeeper->get(task_status, &stat); - auto statuses = StatusAccumulator::fromJSON(status_json); - - /// Increment status for table. - (*statuses)[task_table.name_in_config].processed_partitions_count += 1; - auto statuses_to_commit = StatusAccumulator::serializeToJSON(statuses); - - Coordination::Requests ops; - ops.emplace_back(zkutil::makeSetRequest(current_partition_attach_is_done, state_finished, 0)); - ops.emplace_back(zkutil::makeSetRequest(task_status, statuses_to_commit, stat.version)); - - Coordination::Responses responses; - Coordination::Error code = zookeeper->tryMulti(ops, responses); - - if (code == Coordination::Error::ZOK) - break; - } - } - - return TaskStatus::Finished; -} - -/// This is needed to create internal Distributed table -/// Removes column's TTL expression from `CREATE` query -/// Removes MATEREALIZED or ALIAS columns not to copy additional and useless data over the network. -/// Removes data skipping indices. -ASTPtr ClusterCopier::removeAliasMaterializedAndTTLColumnsFromCreateQuery(const ASTPtr & query_ast, bool allow_to_copy_alias_and_materialized_columns) -{ - const ASTs & column_asts = query_ast->as().columns_list->columns->children; - auto new_columns = std::make_shared(); - - for (const ASTPtr & column_ast : column_asts) - { - const auto & column = column_ast->as(); - - /// Skip this columns - if (!column.default_specifier.empty() && !allow_to_copy_alias_and_materialized_columns) - { - ColumnDefaultKind kind = columnDefaultKindFromString(column.default_specifier); - if (kind == ColumnDefaultKind::Materialized || kind == ColumnDefaultKind::Alias) - continue; - } - - /// Remove TTL on columns definition. - auto new_column_ast = column_ast->clone(); - auto & new_column = new_column_ast->as(); - if (new_column.ttl) - new_column.ttl.reset(); - - new_columns->children.emplace_back(new_column_ast); - } - - ASTPtr new_query_ast = query_ast->clone(); - auto & new_query = new_query_ast->as(); - - auto new_columns_list = std::make_shared(); - new_columns_list->set(new_columns_list->columns, new_columns); - - /// Skip indices and projections are not needed, because distributed table doesn't support it. - - new_query.replace(new_query.columns_list, new_columns_list); - - return new_query_ast; -} - -/// Replaces ENGINE and table name in a create query -std::shared_ptr rewriteCreateQueryStorage(const ASTPtr & create_query_ast, - const DatabaseAndTableName & new_table, - const ASTPtr & new_storage_ast) -{ - const auto & create = create_query_ast->as(); - auto res = std::make_shared(create); - - if (create.storage == nullptr || new_storage_ast == nullptr) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Storage is not specified"); - - res->setDatabase(new_table.first); - res->setTable(new_table.second); - - res->children.clear(); - res->set(res->columns_list, create.columns_list->clone()); - res->set(res->storage, new_storage_ast->clone()); - /// Just to make it better and don't store additional flag like `is_table_created` somewhere else - res->if_not_exists = true; - - return res; -} - - -bool ClusterCopier::tryDropPartitionPiece( - ShardPartition & task_partition, - const size_t current_piece_number, - const zkutil::ZooKeeperPtr & zookeeper, - const CleanStateClock & clean_state_clock) -{ - if (is_safe_mode) - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "DROP PARTITION is prohibited in safe mode"); - - TaskTable & task_table = task_partition.task_shard.task_table; - ShardPartitionPiece & partition_piece = task_partition.pieces[current_piece_number]; - - const String current_shards_path = partition_piece.getPartitionPieceShardsPath(); - const String current_partition_active_workers_dir = partition_piece.getPartitionPieceActiveWorkersPath(); - const String is_dirty_flag_path = partition_piece.getPartitionPieceIsDirtyPath(); - const String dirty_cleaner_path = partition_piece.getPartitionPieceCleanerPath(); - const String is_dirty_cleaned_path = partition_piece.getPartitionPieceIsCleanedPath(); - - zkutil::EphemeralNodeHolder::Ptr cleaner_holder; - try - { - cleaner_holder = zkutil::EphemeralNodeHolder::create(dirty_cleaner_path, *zookeeper, host_id); - } - catch (const Coordination::Exception & e) - { - if (e.code == Coordination::Error::ZNODEEXISTS) - { - LOG_INFO(log, "Partition {} piece {} is cleaning now by somebody, sleep", task_partition.name, toString(current_piece_number)); - std::this_thread::sleep_for(retry_delay_ms); - return false; - } - - throw; - } - - Coordination::Stat stat{}; - if (zookeeper->exists(current_partition_active_workers_dir, &stat)) - { - if (stat.numChildren != 0) - { - LOG_INFO(log, "Partition {} contains {} active workers while trying to drop it. Going to sleep.", task_partition.name, stat.numChildren); - std::this_thread::sleep_for(retry_delay_ms); - return false; - } - else - { - zookeeper->remove(current_partition_active_workers_dir); - } - } - - { - zkutil::EphemeralNodeHolder::Ptr active_workers_lock; - try - { - active_workers_lock = zkutil::EphemeralNodeHolder::create(current_partition_active_workers_dir, *zookeeper, host_id); - } - catch (const Coordination::Exception & e) - { - if (e.code == Coordination::Error::ZNODEEXISTS) - { - LOG_INFO(log, "Partition {} is being filled now by somebody, sleep", task_partition.name); - return false; - } - - throw; - } - - // Lock the dirty flag - zookeeper->set(is_dirty_flag_path, host_id, clean_state_clock.discovery_version.value()); - zookeeper->tryRemove(partition_piece.getPartitionPieceCleanStartPath()); - CleanStateClock my_clock(zookeeper, is_dirty_flag_path, is_dirty_cleaned_path); - - /// Remove all status nodes - { - Strings children; - if (zookeeper->tryGetChildren(current_shards_path, children) == Coordination::Error::ZOK) - for (const auto & child : children) - { - zookeeper->removeRecursive(current_shards_path + "/" + child); - } - } - - - DatabaseAndTableName original_table = task_table.table_push; - DatabaseAndTableName helping_table = DatabaseAndTableName(original_table.first, original_table.second + "_piece_" + toString(current_piece_number)); - - String query = "ALTER TABLE " + getQuotedTable(helping_table); - query += ((task_partition.name == "'all'") ? " DROP PARTITION ID " : " DROP PARTITION ") + task_partition.name + ""; - - /// TODO: use this statement after servers will be updated up to 1.1.54310 - // query += " DROP PARTITION ID '" + task_partition.name + "'"; - - ClusterPtr & cluster_push = task_table.cluster_push; - Settings settings_push = task_cluster->settings_push; - - /// It is important, DROP PARTITION must be done synchronously - settings_push.alter_sync = 2; - - LOG_INFO(log, "Execute distributed DROP PARTITION: {}", query); - /// We have to drop partition_piece on each replica - size_t num_shards = executeQueryOnCluster( - cluster_push, query, - settings_push, - ClusterExecutionMode::ON_EACH_NODE); - - LOG_INFO(log, "DROP PARTITION was successfully executed on {} nodes of a cluster.", num_shards); - - /// Update the locking node - if (!my_clock.is_stale()) - { - zookeeper->set(is_dirty_flag_path, host_id, my_clock.discovery_version.value()); - if (my_clock.clean_state_version) - zookeeper->set(is_dirty_cleaned_path, host_id, my_clock.clean_state_version.value()); - else - zookeeper->create(is_dirty_cleaned_path, host_id, zkutil::CreateMode::Persistent); - } - else - { - LOG_INFO(log, "Clean state is altered when dropping the partition, cowardly bailing"); - /// clean state is stale - return false; - } - - LOG_INFO(log, "Partition {} piece {} was dropped on cluster {}", task_partition.name, toString(current_piece_number), task_table.cluster_push_name); - if (zookeeper->tryCreate(current_shards_path, host_id, zkutil::CreateMode::Persistent) == Coordination::Error::ZNODEEXISTS) - zookeeper->set(current_shards_path, host_id); - } - - LOG_INFO(log, "Partition {} piece {} is safe for work now.", task_partition.name, toString(current_piece_number)); - return true; -} - -bool ClusterCopier::tryProcessTable(const ConnectionTimeouts & timeouts, TaskTable & task_table) -{ - /// Create destination table - TaskStatus task_status = TaskStatus::Error; - - task_status = tryCreateDestinationTable(timeouts, task_table); - /// Exit if success - if (task_status != TaskStatus::Finished) - { - LOG_WARNING(log, "Create destination table failed "); - return false; - } - - /// Set all_partitions_count for table in Zookeeper - auto zookeeper = getContext()->getZooKeeper(); - while (true) - { - Coordination::Stat stat; - auto status_json = zookeeper->get(task_zookeeper_path + "/status", &stat); - auto statuses = StatusAccumulator::fromJSON(status_json); - - /// Exit if someone already set the initial value for this table. - if (statuses->find(task_table.name_in_config) != statuses->end()) - break; - (*statuses)[task_table.name_in_config] = StatusAccumulator::TableStatus - { - /*all_partitions_count=*/task_table.ordered_partition_names.size(), - /*processed_partition_count=*/0 - }; - - auto statuses_to_commit = StatusAccumulator::serializeToJSON(statuses); - auto error = zookeeper->trySet(task_zookeeper_path + "/status", statuses_to_commit, stat.version); - if (error == Coordination::Error::ZOK) - break; - } - - - /// An heuristic: if previous shard is already done, then check next one without sleeps due to max_workers constraint - bool previous_shard_is_instantly_finished = false; - - /// Process each partition that is present in cluster - for (const String & partition_name : task_table.ordered_partition_names) - { - if (!task_table.cluster_partitions.contains(partition_name)) - throw Exception(ErrorCodes::LOGICAL_ERROR, "There are no expected partition {}. It is a bug", partition_name); - - ClusterPartition & cluster_partition = task_table.cluster_partitions[partition_name]; - - Stopwatch watch; - /// We will check all the shards of the table and check if they contain current partition. - TasksShard expected_shards; - UInt64 num_failed_shards = 0; - - ++cluster_partition.total_tries; - - LOG_INFO(log, "Processing partition {} for the whole cluster", partition_name); - - /// Process each source shard having current partition and copy current partition - /// NOTE: shards are sorted by "distance" to current host - bool has_shard_to_process = false; - for (const TaskShardPtr & shard : task_table.all_shards) - { - /// Does shard have a node with current partition? - if (!shard->partition_tasks.contains(partition_name)) - { - /// If not, did we check existence of that partition previously? - if (!shard->checked_partitions.contains(partition_name)) - { - auto check_shard_has_partition = [&] () { return checkShardHasPartition(timeouts, *shard, partition_name); }; - bool has_partition = retry(check_shard_has_partition); - - shard->checked_partitions.emplace(partition_name); - - if (has_partition) - { - const size_t number_of_splits = task_table.number_of_splits; - shard->partition_tasks.emplace(partition_name, ShardPartition(*shard, partition_name, number_of_splits)); - LOG_INFO(log, "Discovered partition {} in shard {}", partition_name, shard->getDescription()); - /// To save references in the future. - auto shard_partition_it = shard->partition_tasks.find(partition_name); - PartitionPieces & shard_partition_pieces = shard_partition_it->second.pieces; - - for (size_t piece_number = 0; piece_number < number_of_splits; ++piece_number) - { - auto res = checkPresentPartitionPiecesOnCurrentShard(timeouts, *shard, partition_name, piece_number); - shard_partition_pieces.emplace_back(shard_partition_it->second, piece_number, res); - } - } - else - { - LOG_INFO(log, "Found that shard {} does not contain current partition {}", shard->getDescription(), partition_name); - continue; - } - } - else - { - /// We have already checked that partition, but did not discover it - previous_shard_is_instantly_finished = true; - continue; - } - } - - auto it_shard_partition = shard->partition_tasks.find(partition_name); - /// Previously when we discovered that shard does not contain current partition, we skipped it. - /// At this moment partition have to be present. - if (it_shard_partition == shard->partition_tasks.end()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "There are no such partition in a shard. This is a bug."); - auto & partition = it_shard_partition->second; - - expected_shards.emplace_back(shard); - - /// Do not sleep if there is a sequence of already processed shards to increase startup - bool is_unprioritized_task = !previous_shard_is_instantly_finished && shard->priority.is_remote; - task_status = TaskStatus::Error; - bool was_error = false; - has_shard_to_process = true; - for (UInt64 try_num = 1; try_num <= max_shard_partition_tries; ++try_num) - { - task_status = tryProcessPartitionTask(timeouts, partition, is_unprioritized_task); - - /// Exit if success - if (task_status == TaskStatus::Finished) - break; - - was_error = true; - - /// Skip if the task is being processed by someone - if (task_status == TaskStatus::Active) - break; - - /// Repeat on errors - std::this_thread::sleep_for(retry_delay_ms); - } - - if (task_status == TaskStatus::Error) - ++num_failed_shards; - - previous_shard_is_instantly_finished = !was_error; - } - - cluster_partition.elapsed_time_seconds += watch.elapsedSeconds(); - - /// Check that whole cluster partition is done - /// Firstly check the number of failed partition tasks, then look into ZooKeeper and ensure that each partition is done - bool partition_copying_is_done = num_failed_shards == 0; - try - { - partition_copying_is_done = - !has_shard_to_process - || (partition_copying_is_done && checkAllPiecesInPartitionAreDone(task_table, partition_name, expected_shards)); - } - catch (...) - { - tryLogCurrentException(log); - partition_copying_is_done = false; - } - - - bool partition_moving_is_done = false; - /// Try to move only if all pieces were copied. - if (partition_copying_is_done) - { - for (UInt64 try_num = 0; try_num < max_shard_partition_piece_tries_for_alter; ++try_num) - { - try - { - auto res = tryMoveAllPiecesToDestinationTable(task_table, partition_name); - /// Exit and mark current task is done. - if (res == TaskStatus::Finished) - { - partition_moving_is_done = true; - break; - } - - /// Exit if this task is active. - if (res == TaskStatus::Active) - break; - - /// Repeat on errors. - std::this_thread::sleep_for(retry_delay_ms); - } - catch (...) - { - tryLogCurrentException(log, "Some error occurred while moving pieces to destination table for partition " + partition_name); - } - } - } - - if (partition_copying_is_done && partition_moving_is_done) - { - task_table.finished_cluster_partitions.emplace(partition_name); - - task_table.bytes_copied += cluster_partition.bytes_copied; - task_table.rows_copied += cluster_partition.rows_copied; - double elapsed = cluster_partition.elapsed_time_seconds; - - LOG_INFO(log, "It took {} seconds to copy partition {}: {} uncompressed bytes, {} rows and {} source blocks are copied", - elapsed, partition_name, - formatReadableSizeWithDecimalSuffix(cluster_partition.bytes_copied), - formatReadableQuantity(cluster_partition.rows_copied), - cluster_partition.blocks_copied); - - if (cluster_partition.rows_copied) - { - LOG_INFO(log, "Average partition speed: {} per second.", formatReadableSizeWithDecimalSuffix(cluster_partition.bytes_copied / elapsed)); - } - - if (task_table.rows_copied) - { - LOG_INFO(log, "Average table {} speed: {} per second.", task_table.table_id, formatReadableSizeWithDecimalSuffix(task_table.bytes_copied / elapsed)); - } - } - } - - UInt64 required_partitions = task_table.cluster_partitions.size(); - UInt64 finished_partitions = task_table.finished_cluster_partitions.size(); - bool table_is_done = finished_partitions >= required_partitions; - - if (!table_is_done) - { - LOG_INFO(log, "Table {} is not processed yet. Copied {} of {}, will retry", task_table.table_id, finished_partitions, required_partitions); - } - else - { - /// Delete helping tables in case that whole table is done - dropHelpingTables(task_table); - } - - return table_is_done; -} - -TaskStatus ClusterCopier::tryCreateDestinationTable(const ConnectionTimeouts & timeouts, TaskTable & task_table) -{ - /// Try create original table (if not exists) on each shard - - //TaskTable & task_table = task_shard.task_table; - const TaskShardPtr task_shard = task_table.all_shards.at(0); - /// We need to update table definitions for each part, it could be changed after ALTER - task_shard->current_pull_table_create_query = getCreateTableForPullShard(timeouts, *task_shard); - try - { - auto create_query_push_ast - = rewriteCreateQueryStorage(task_shard->current_pull_table_create_query, task_table.table_push, task_table.engine_push_ast); - auto & create = create_query_push_ast->as(); - create.if_not_exists = true; - InterpreterCreateQuery::prepareOnClusterQuery(create, getContext(), task_table.cluster_push_name); - String query = queryToString(create_query_push_ast); - - LOG_INFO(log, "Create destination tables. Query: {}", query); - UInt64 shards = executeQueryOnCluster(task_table.cluster_push, query, task_cluster->settings_push, ClusterExecutionMode::ON_EACH_NODE); - LOG_INFO( - log, - "Destination tables {} have been created on {} shards of {}", - getQuotedTable(task_table.table_push), - shards, - task_table.cluster_push->getShardCount()); - } - catch (...) - { - tryLogCurrentException(log, "Error while creating original table. Maybe we are not first."); - } - - return TaskStatus::Finished; -} - -/// Job for copying partition from particular shard. -TaskStatus ClusterCopier::tryProcessPartitionTask(const ConnectionTimeouts & timeouts, ShardPartition & task_partition, bool is_unprioritized_task) -{ - TaskStatus res; - - try - { - res = iterateThroughAllPiecesInPartition(timeouts, task_partition, is_unprioritized_task); - } - catch (...) - { - tryLogCurrentException(log, "An error occurred while processing partition " + task_partition.name); - res = TaskStatus::Error; - } - - /// At the end of each task check if the config is updated - try - { - updateConfigIfNeeded(); - } - catch (...) - { - tryLogCurrentException(log, "An error occurred while updating the config"); - } - - return res; -} - -TaskStatus ClusterCopier::iterateThroughAllPiecesInPartition(const ConnectionTimeouts & timeouts, ShardPartition & task_partition, - bool is_unprioritized_task) -{ - const size_t total_number_of_pieces = task_partition.task_shard.task_table.number_of_splits; - - TaskStatus res{TaskStatus::Finished}; - - bool was_failed_pieces = false; - bool was_active_pieces = false; - - for (size_t piece_number = 0; piece_number < total_number_of_pieces; piece_number++) - { - for (UInt64 try_num = 0; try_num < max_shard_partition_tries; ++try_num) - { - LOG_INFO(log, "Attempt number {} to process partition {} piece number {} on shard number {} with index {}.", - try_num, task_partition.name, piece_number, - task_partition.task_shard.numberInCluster(), - task_partition.task_shard.indexInCluster()); - - res = processPartitionPieceTaskImpl(timeouts, task_partition, piece_number, is_unprioritized_task); - - /// Exit if success - if (res == TaskStatus::Finished) - break; - - /// Skip if the task is being processed by someone - if (res == TaskStatus::Active) - break; - - /// Repeat on errors - std::this_thread::sleep_for(retry_delay_ms); - } - - was_active_pieces |= (res == TaskStatus::Active); - was_failed_pieces |= (res == TaskStatus::Error); - } - - if (was_failed_pieces) - return TaskStatus::Error; - - if (was_active_pieces) - return TaskStatus::Active; - - return TaskStatus::Finished; -} - - -TaskStatus ClusterCopier::processPartitionPieceTaskImpl( - const ConnectionTimeouts & timeouts, ShardPartition & task_partition, - const size_t current_piece_number, bool is_unprioritized_task) -{ - TaskShard & task_shard = task_partition.task_shard; - TaskTable & task_table = task_shard.task_table; - ClusterPartition & cluster_partition = task_table.getClusterPartition(task_partition.name); - ShardPartitionPiece & partition_piece = task_partition.pieces[current_piece_number]; - - const size_t number_of_splits = task_table.number_of_splits; - const String primary_key_comma_separated = task_table.primary_key_comma_separated; - - /// We need to update table definitions for each partition, it could be changed after ALTER - createShardInternalTables(timeouts, task_shard, true); - - auto split_table_for_current_piece = task_shard.list_of_split_tables_on_shard[current_piece_number]; - - auto zookeeper = getContext()->getZooKeeper(); - - const String piece_is_dirty_flag_path = partition_piece.getPartitionPieceIsDirtyPath(); - const String piece_is_dirty_cleaned_path = partition_piece.getPartitionPieceIsCleanedPath(); - const String current_task_piece_is_active_path = partition_piece.getActiveWorkerPath(); - const String current_task_piece_status_path = partition_piece.getShardStatusPath(); - - /// Auxiliary functions: - - /// Creates is_dirty node to initialize DROP PARTITION - auto create_is_dirty_node = [&] (const CleanStateClock & clock) - { - if (clock.is_stale()) - LOG_INFO(log, "Clean state clock is stale while setting dirty flag, cowardly bailing"); - else if (!clock.is_clean()) - LOG_INFO(log, "Thank you, Captain Obvious"); - else if (clock.discovery_version) - { - LOG_INFO(log, "Updating clean state clock"); - zookeeper->set(piece_is_dirty_flag_path, host_id, clock.discovery_version.value()); - } - else - { - LOG_INFO(log, "Creating clean state clock"); - zookeeper->create(piece_is_dirty_flag_path, host_id, zkutil::CreateMode::Persistent); - } - }; - - /// Returns SELECT query filtering current partition and applying user filter - auto get_select_query = [&] (const DatabaseAndTableName & from_table, const String & fields, bool enable_splitting, String limit = "") - { - String query; - query += "WITH " + task_partition.name + " AS partition_key "; - query += "SELECT " + fields + " FROM " + getQuotedTable(from_table); - - if (enable_splitting && experimental_use_sample_offset) - query += " SAMPLE 1/" + toString(number_of_splits) + " OFFSET " + toString(current_piece_number) + "/" + toString(number_of_splits); - - /// TODO: Bad, it is better to rewrite with ASTLiteral(partition_key_field) - query += " WHERE (" + queryToString(task_table.engine_push_partition_key_ast) + " = partition_key)"; - - if (enable_splitting && !experimental_use_sample_offset) - query += " AND ( cityHash64(" + primary_key_comma_separated + ") %" + toString(number_of_splits) + " = " + toString(current_piece_number) + " )"; - - if (!task_table.where_condition_str.empty()) - query += " AND (" + task_table.where_condition_str + ")"; - - if (!limit.empty()) - query += " LIMIT " + limit; - - query += " FORMAT Native"; - - ParserQuery p_query(query.data() + query.size()); - - const auto & settings = getContext()->getSettingsRef(); - return parseQuery(p_query, query, settings.max_query_size, settings.max_parser_depth); - }; - - /// Load balancing - auto worker_node_holder = createTaskWorkerNodeAndWaitIfNeed(zookeeper, current_task_piece_status_path, is_unprioritized_task); - - LOG_INFO(log, "Processing {}", current_task_piece_status_path); - - const String piece_status_path = partition_piece.getPartitionPieceShardsPath(); - - CleanStateClock clean_state_clock(zookeeper, piece_is_dirty_flag_path, piece_is_dirty_cleaned_path); - - const bool is_clean = checkPartitionPieceIsClean(zookeeper, clean_state_clock, piece_status_path); - - /// Do not start if partition piece is dirty, try to clean it - if (is_clean) - { - LOG_INFO(log, "Partition {} piece {} appears to be clean", task_partition.name, current_piece_number); - zookeeper->createAncestors(current_task_piece_status_path); - } - else - { - LOG_INFO(log, "Partition {} piece {} is dirty, try to drop it", task_partition.name, current_piece_number); - - try - { - tryDropPartitionPiece(task_partition, current_piece_number, zookeeper, clean_state_clock); - } - catch (...) - { - tryLogCurrentException(log, "An error occurred when clean partition"); - } - - return TaskStatus::Error; - } - - /// Create ephemeral node to mark that we are active and process the partition - zookeeper->createAncestors(current_task_piece_is_active_path); - zkutil::EphemeralNodeHolderPtr partition_task_node_holder; - try - { - partition_task_node_holder = zkutil::EphemeralNodeHolder::create(current_task_piece_is_active_path, *zookeeper, host_id); - } - catch (const Coordination::Exception & e) - { - if (e.code == Coordination::Error::ZNODEEXISTS) - { - LOG_INFO(log, "Someone is already processing {}", current_task_piece_is_active_path); - return TaskStatus::Active; - } - - throw; - } - - /// Exit if task has been already processed; - /// create blocking node to signal cleaning up if it is abandoned - { - String status_data; - if (zookeeper->tryGet(current_task_piece_status_path, status_data)) - { - TaskStateWithOwner status = TaskStateWithOwner::fromString(status_data); - if (status.state == TaskState::Finished) - { - LOG_INFO(log, "Task {} has been successfully executed by {}", current_task_piece_status_path, status.owner); - return TaskStatus::Finished; - } - - /// Task is abandoned, because previously we created ephemeral node, possibly in other copier's process. - /// Initialize DROP PARTITION - LOG_INFO(log, "Task {} has not been successfully finished by {}. Partition will be dropped and refilled.", current_task_piece_status_path, status.owner); - - create_is_dirty_node(clean_state_clock); - return TaskStatus::Error; - } - } - - - /// Try create table (if not exists) on each shard - /// We have to create this table even in case that partition piece is empty - /// This is significant, because we will have simpler code - { - /// 1) Get columns description from any replica of destination cluster - /// 2) Change ENGINE, database and table name - /// 3) Create helping table on the whole destination cluster - auto & settings_push = task_cluster->settings_push; - - auto connection = task_table.cluster_push->getAnyShardInfo().pool->get(timeouts, settings_push, true); - String create_query = getRemoteCreateTable(task_shard.task_table.table_push, *connection, settings_push); - - ParserCreateQuery parser_create_query; - auto create_query_ast = parseQuery(parser_create_query, create_query, settings_push.max_query_size, settings_push.max_parser_depth); - /// Define helping table database and name for current partition piece - DatabaseAndTableName database_and_table_for_current_piece - { - task_table.table_push.first, - task_table.table_push.second + "_piece_" + toString(current_piece_number) - }; - - - auto new_engine_push_ast = task_table.engine_push_ast; - if (task_table.isReplicatedTable()) - new_engine_push_ast = task_table.rewriteReplicatedCreateQueryToPlain(); - - /// Take columns definition from destination table, new database and table name, and new engine (non replicated variant of MergeTree) - auto create_query_push_ast = rewriteCreateQueryStorage(create_query_ast, database_and_table_for_current_piece, new_engine_push_ast); - String query = queryToString(create_query_push_ast); - - LOG_INFO(log, "Create destination tables. Query: {}", query); - UInt64 shards = executeQueryOnCluster(task_table.cluster_push, query, task_cluster->settings_push, ClusterExecutionMode::ON_EACH_NODE); - LOG_INFO( - log, - "Destination tables {} have been created on {} shards of {}", - getQuotedTable(task_table.table_push), - shards, - task_table.cluster_push->getShardCount()); - } - - - /// Exit if current piece is absent on this shard. Also mark it as finished, because we will check - /// whether each shard have processed each partitition (and its pieces). - if (partition_piece.is_absent_piece) - { - String state_finished = TaskStateWithOwner::getData(TaskState::Finished, host_id); - auto res = zookeeper->tryCreate(current_task_piece_status_path, state_finished, zkutil::CreateMode::Persistent); - if (res == Coordination::Error::ZNODEEXISTS) - LOG_INFO(log, "Partition {} piece {} is absent on current replica of a shard. But other replicas have already marked it as done.", task_partition.name, current_piece_number); - if (res == Coordination::Error::ZOK) - LOG_INFO(log, "Partition {} piece {} is absent on current replica of a shard. Will mark it as done. Other replicas will do the same.", task_partition.name, current_piece_number); - return TaskStatus::Finished; - } - - /// Check that destination partition is empty if we are first worker - /// NOTE: this check is incorrect if pull and push tables have different partition key! - String clean_start_status; - if (!zookeeper->tryGet(partition_piece.getPartitionPieceCleanStartPath(), clean_start_status) || clean_start_status != "ok") - { - zookeeper->createIfNotExists(partition_piece.getPartitionPieceCleanStartPath(), ""); - auto checker = zkutil::EphemeralNodeHolder::create(partition_piece.getPartitionPieceCleanStartPath() + "/checker", - *zookeeper, host_id); - // Maybe we are the first worker - - ASTPtr query_select_ast = get_select_query(split_table_for_current_piece, "count()", /* enable_splitting= */ true); - UInt64 count; - { - auto local_context = Context::createCopy(context); - // Use pull (i.e. readonly) settings, but fetch data from destination servers - local_context->setSettings(task_cluster->settings_pull); - local_context->setSetting("skip_unavailable_shards", true); - - InterpreterSelectWithUnionQuery select(query_select_ast, local_context, SelectQueryOptions{}); - QueryPlan plan; - select.buildQueryPlan(plan); - auto builder = std::move(*plan.buildQueryPipeline( - QueryPlanOptimizationSettings::fromContext(local_context), - BuildQueryPipelineSettings::fromContext(local_context))); - - Block block = getBlockWithAllStreamData(std::move(builder)); - count = (block) ? block.safeGetByPosition(0).column->getUInt(0) : 0; - } - - if (count != 0) - { - LOG_INFO(log, "Partition {} piece {} is not empty. In contains {} rows.", task_partition.name, current_piece_number, count); - Coordination::Stat stat_shards{}; - zookeeper->get(partition_piece.getPartitionPieceShardsPath(), &stat_shards); - - /// NOTE: partition is still fresh if dirt discovery happens before cleaning - if (stat_shards.numChildren == 0) - { - LOG_WARNING(log, "There are no workers for partition {} piece {}, but destination table contains {} rows. Partition will be dropped and refilled.", task_partition.name, toString(current_piece_number), count); - - create_is_dirty_node(clean_state_clock); - return TaskStatus::Error; - } - } - zookeeper->set(partition_piece.getPartitionPieceCleanStartPath(), "ok"); - } - /// At this point, we need to sync that the destination table is clean - /// before any actual work - - /// Try start processing, create node about it - { - String start_state = TaskStateWithOwner::getData(TaskState::Started, host_id); - CleanStateClock new_clean_state_clock(zookeeper, piece_is_dirty_flag_path, piece_is_dirty_cleaned_path); - if (clean_state_clock != new_clean_state_clock) - { - LOG_INFO(log, "Partition {} piece {} clean state changed, cowardly bailing", task_partition.name, toString(current_piece_number)); - return TaskStatus::Error; - } - else if (!new_clean_state_clock.is_clean()) - { - LOG_INFO(log, "Partition {} piece {} is dirty and will be dropped and refilled", task_partition.name, toString(current_piece_number)); - create_is_dirty_node(new_clean_state_clock); - return TaskStatus::Error; - } - zookeeper->create(current_task_piece_status_path, start_state, zkutil::CreateMode::Persistent); - } - - - /// Do the copying - { - bool inject_fault = false; - if (copy_fault_probability > 0) - { - double value = std::uniform_real_distribution<>(0, 1)(task_table.task_cluster.random_engine); - inject_fault = value < copy_fault_probability; - } - - // Select all fields - ASTPtr query_select_ast = get_select_query(task_shard.table_read_shard, "*", /* enable_splitting= */ true, inject_fault ? "1" : ""); - - LOG_INFO(log, "Executing SELECT query and pull from {}: {}", task_shard.getDescription(), queryToString(query_select_ast)); - - ASTPtr query_insert_ast; - { - String query; - query += "INSERT INTO " + getQuotedTable(split_table_for_current_piece) + " FORMAT Native "; - - ParserQuery p_query(query.data() + query.size()); - const auto & settings = getContext()->getSettingsRef(); - query_insert_ast = parseQuery(p_query, query, settings.max_query_size, settings.max_parser_depth); - - LOG_INFO(log, "Executing INSERT query: {}", query); - } - - try - { - auto context_select = Context::createCopy(context); - context_select->setSettings(task_cluster->settings_pull); - - auto context_insert = Context::createCopy(context); - context_insert->setSettings(task_cluster->settings_push); - - /// Custom INSERT SELECT implementation - QueryPipeline input; - QueryPipeline output; - { - BlockIO io_insert = InterpreterFactory::instance().get(query_insert_ast, context_insert)->execute(); - - InterpreterSelectWithUnionQuery select(query_select_ast, context_select, SelectQueryOptions{}); - QueryPlan plan; - select.buildQueryPlan(plan); - auto builder = std::move(*plan.buildQueryPipeline( - QueryPlanOptimizationSettings::fromContext(context_select), - BuildQueryPipelineSettings::fromContext(context_select))); - - output = std::move(io_insert.pipeline); - - /// Add converting actions to make it possible to copy blocks with slightly different schema - const auto & select_block = builder.getHeader(); - const auto & insert_block = output.getHeader(); - auto actions_dag = ActionsDAG::makeConvertingActions( - select_block.getColumnsWithTypeAndName(), - insert_block.getColumnsWithTypeAndName(), - ActionsDAG::MatchColumnsMode::Position); - - auto actions = std::make_shared(actions_dag, ExpressionActionsSettings::fromContext(getContext())); - - builder.addSimpleTransform([&](const Block & header) - { - return std::make_shared(header, actions); - }); - input = QueryPipelineBuilder::getPipeline(std::move(builder)); - } - - /// Fail-fast optimization to abort copying when the current clean state expires - std::future future_is_dirty_checker; - - Stopwatch watch(CLOCK_MONOTONIC_COARSE); - constexpr UInt64 check_period_milliseconds = 500; - - /// Will asynchronously check that ZooKeeper connection and is_dirty flag appearing while copying data - auto cancel_check = [&] () - { - if (zookeeper->expired()) - throw Exception(ErrorCodes::UNFINISHED, "ZooKeeper session is expired, cancel INSERT SELECT"); - - if (!future_is_dirty_checker.valid()) - future_is_dirty_checker = zookeeper->asyncExists(piece_is_dirty_flag_path); - - /// check_period_milliseconds should less than average insert time of single block - /// Otherwise, the insertion will slow a little bit - if (watch.elapsedMilliseconds() >= check_period_milliseconds) - { - Coordination::ExistsResponse status = future_is_dirty_checker.get(); - - if (status.error != Coordination::Error::ZNONODE) - { - LogicalClock dirt_discovery_epoch (status.stat.mzxid); - if (dirt_discovery_epoch == clean_state_clock.discovery_zxid) - return false; - throw Exception(ErrorCodes::UNFINISHED, "Partition is dirty, cancel INSERT SELECT"); - } - } - - return false; - }; - - /// Update statistics - /// It is quite rough: bytes_copied don't take into account DROP PARTITION. - auto update_stats = [&cluster_partition] (const Block & block) - { - cluster_partition.bytes_copied += block.bytes(); - cluster_partition.rows_copied += block.rows(); - cluster_partition.blocks_copied += 1; - }; - - /// Main work is here - PullingPipelineExecutor pulling_executor(input); - PushingPipelineExecutor pushing_executor(output); - - Block data; - bool is_cancelled = false; - while (pulling_executor.pull(data)) - { - if (cancel_check()) - { - is_cancelled = true; - pushing_executor.cancel(); - pushing_executor.cancel(); - break; - } - pushing_executor.push(data); - update_stats(data); - } - - if (!is_cancelled) - pushing_executor.finish(); - - // Just in case - if (future_is_dirty_checker.valid()) - future_is_dirty_checker.get(); - - if (inject_fault) - throw Exception(ErrorCodes::UNFINISHED, "Copy fault injection is activated"); - } - catch (...) - { - tryLogCurrentException(log, "An error occurred during copying, partition will be marked as dirty"); - create_is_dirty_node(clean_state_clock); - return TaskStatus::Error; - } - } - - LOG_INFO(log, "Partition {} piece {} copied. But not moved to original destination table.", task_partition.name, toString(current_piece_number)); - - /// Finalize the processing, change state of current partition task (and also check is_dirty flag) - { - String state_finished = TaskStateWithOwner::getData(TaskState::Finished, host_id); - CleanStateClock new_clean_state_clock (zookeeper, piece_is_dirty_flag_path, piece_is_dirty_cleaned_path); - if (clean_state_clock != new_clean_state_clock) - { - LOG_INFO(log, "Partition {} piece {} clean state changed, cowardly bailing", task_partition.name, toString(current_piece_number)); - return TaskStatus::Error; - } - else if (!new_clean_state_clock.is_clean()) - { - LOG_INFO(log, "Partition {} piece {} became dirty and will be dropped and refilled", task_partition.name, toString(current_piece_number)); - create_is_dirty_node(new_clean_state_clock); - return TaskStatus::Error; - } - zookeeper->set(current_task_piece_status_path, state_finished, 0); - } - - return TaskStatus::Finished; -} - -void ClusterCopier::dropAndCreateLocalTable(const ASTPtr & create_ast) -{ - const auto & create = create_ast->as(); - dropLocalTableIfExists({create.getDatabase(), create.getTable()}); - - auto create_context = Context::createCopy(getContext()); - - InterpreterCreateQuery interpreter(create_ast, create_context); - interpreter.execute(); -} - -void ClusterCopier::dropLocalTableIfExists(const DatabaseAndTableName & table_name) const -{ - auto drop_ast = std::make_shared(); - drop_ast->if_exists = true; - drop_ast->setDatabase(table_name.first); - drop_ast->setTable(table_name.second); - - auto drop_context = Context::createCopy(getContext()); - - InterpreterDropQuery interpreter(drop_ast, drop_context); - interpreter.execute(); -} - -void ClusterCopier::dropHelpingTablesByPieceNumber(const TaskTable & task_table, size_t current_piece_number) -{ - LOG_INFO(log, "Removing helping tables piece {}", current_piece_number); - - DatabaseAndTableName original_table = task_table.table_push; - DatabaseAndTableName helping_table - = DatabaseAndTableName(original_table.first, original_table.second + "_piece_" + toString(current_piece_number)); - - String query = "DROP TABLE IF EXISTS " + getQuotedTable(helping_table); - - const ClusterPtr & cluster_push = task_table.cluster_push; - Settings settings_push = task_cluster->settings_push; - - LOG_INFO(log, "Execute distributed DROP TABLE: {}", query); - - /// We have to drop partition_piece on each replica - UInt64 num_nodes = executeQueryOnCluster(cluster_push, query, settings_push, ClusterExecutionMode::ON_EACH_NODE); - - LOG_INFO(log, "DROP TABLE query was successfully executed on {} nodes.", toString(num_nodes)); -} - -void ClusterCopier::dropHelpingTables(const TaskTable & task_table) -{ - LOG_INFO(log, "Removing helping tables"); - for (size_t current_piece_number = 0; current_piece_number < task_table.number_of_splits; ++current_piece_number) - { - dropHelpingTablesByPieceNumber(task_table, current_piece_number); - } -} - -void ClusterCopier::dropParticularPartitionPieceFromAllHelpingTables(const TaskTable & task_table, const String & partition_name) -{ - LOG_INFO(log, "Try drop partition partition from all helping tables."); - for (size_t current_piece_number = 0; current_piece_number < task_table.number_of_splits; ++current_piece_number) - { - DatabaseAndTableName original_table = task_table.table_push; - DatabaseAndTableName helping_table = DatabaseAndTableName(original_table.first, original_table.second + "_piece_" + toString(current_piece_number)); - - String query = "ALTER TABLE " + getQuotedTable(helping_table) + ((partition_name == "'all'") ? " DROP PARTITION ID " : " DROP PARTITION ") + partition_name; - - const ClusterPtr & cluster_push = task_table.cluster_push; - Settings settings_push = task_cluster->settings_push; - - LOG_INFO(log, "Execute distributed DROP PARTITION: {}", query); - /// We have to drop partition_piece on each replica - UInt64 num_nodes = executeQueryOnCluster( - cluster_push, query, - settings_push, - ClusterExecutionMode::ON_EACH_NODE); - - LOG_INFO(log, "DROP PARTITION query was successfully executed on {} nodes.", toString(num_nodes)); - } - LOG_INFO(log, "All helping tables dropped partition {}", partition_name); -} - -String ClusterCopier::getRemoteCreateTable(const DatabaseAndTableName & table, Connection & connection, const Settings & settings) -{ - auto remote_context = Context::createCopy(context); - remote_context->setSettings(settings); - - String query = "SHOW CREATE TABLE " + getQuotedTable(table); - - QueryPipelineBuilder builder; - builder.init(Pipe(std::make_shared( - std::make_shared(connection, query, InterpreterShowCreateQuery::getSampleBlock(), remote_context), false, false, /* async_query_sending= */ false))); - Block block = getBlockWithAllStreamData(std::move(builder)); - return typeid_cast(*block.safeGetByPosition(0).column).getDataAt(0).toString(); -} - - -ASTPtr ClusterCopier::getCreateTableForPullShard(const ConnectionTimeouts & timeouts, TaskShard & task_shard) -{ - /// Fetch and parse (possibly) new definition - auto connection_entry = task_shard.info.pool->get(timeouts, task_cluster->settings_pull, true); - String create_query_pull_str = getRemoteCreateTable( - task_shard.task_table.table_pull, - *connection_entry, - task_cluster->settings_pull); - - ParserCreateQuery parser_create_query; - const auto & settings = getContext()->getSettingsRef(); - return parseQuery(parser_create_query, create_query_pull_str, settings.max_query_size, settings.max_parser_depth); -} - - -/// If it is implicitly asked to create split Distributed table for certain piece on current shard, we will do it. -void ClusterCopier::createShardInternalTables(const ConnectionTimeouts & timeouts, - TaskShard & task_shard, bool create_split) -{ - TaskTable & task_table = task_shard.task_table; - - /// We need to update table definitions for each part, it could be changed after ALTER - task_shard.current_pull_table_create_query = getCreateTableForPullShard(timeouts, task_shard); - - /// Create local Distributed tables: - /// a table fetching data from current shard and a table inserting data to the whole destination cluster - String read_shard_prefix = ".read_shard_" + toString(task_shard.indexInCluster()) + "."; - String split_shard_prefix = ".split."; - task_shard.table_read_shard = DatabaseAndTableName(working_database_name, read_shard_prefix + task_table.table_id); - task_shard.main_table_split_shard = DatabaseAndTableName(working_database_name, split_shard_prefix + task_table.table_id); - - for (const auto & piece_number : collections::range(0, task_table.number_of_splits)) - { - task_shard.list_of_split_tables_on_shard[piece_number] = - DatabaseAndTableName(working_database_name, split_shard_prefix + task_table.table_id + "_piece_" + toString(piece_number)); - } - - /// Create special cluster with single shard - String shard_read_cluster_name = read_shard_prefix + task_table.cluster_pull_name; - ClusterPtr cluster_pull_current_shard = task_table.cluster_pull->getClusterWithSingleShard(task_shard.indexInCluster()); - getContext()->setCluster(shard_read_cluster_name, cluster_pull_current_shard); - - auto storage_shard_ast = createASTStorageDistributed(shard_read_cluster_name, task_table.table_pull.first, task_table.table_pull.second); - - auto create_query_ast = removeAliasMaterializedAndTTLColumnsFromCreateQuery( - task_shard.current_pull_table_create_query, - task_table.allow_to_copy_alias_and_materialized_columns); - - auto create_table_pull_ast = rewriteCreateQueryStorage(create_query_ast, task_shard.table_read_shard, storage_shard_ast); - dropAndCreateLocalTable(create_table_pull_ast); - - if (create_split) - { - auto create_table_split_piece_ast = rewriteCreateQueryStorage( - create_query_ast, - task_shard.main_table_split_shard, - task_table.main_engine_split_ast); - - dropAndCreateLocalTable(create_table_split_piece_ast); - - /// Create auxiliary split tables for each piece - for (const auto & piece_number : collections::range(0, task_table.number_of_splits)) - { - const auto & storage_piece_split_ast = task_table.auxiliary_engine_split_asts[piece_number]; - - create_table_split_piece_ast = rewriteCreateQueryStorage( - create_query_ast, - task_shard.list_of_split_tables_on_shard[piece_number], - storage_piece_split_ast); - - dropAndCreateLocalTable(create_table_split_piece_ast); - } - } - -} - - -std::set ClusterCopier::getShardPartitions(const ConnectionTimeouts & timeouts, TaskShard & task_shard) -{ - std::set res; - - createShardInternalTables(timeouts, task_shard, false); - - TaskTable & task_table = task_shard.task_table; - - const String & partition_name = queryToString(task_table.engine_push_partition_key_ast); - - if (partition_name == "'all'") - { - res.emplace("'all'"); - return res; - } - - String query; - { - WriteBufferFromOwnString wb; - wb << "SELECT " << partition_name << " AS partition FROM " - << getQuotedTable(task_shard.table_read_shard) << " GROUP BY partition ORDER BY partition DESC"; - query = wb.str(); - } - - ParserQuery parser_query(query.data() + query.size()); - const auto & settings = getContext()->getSettingsRef(); - ASTPtr query_ast = parseQuery(parser_query, query, settings.max_query_size, settings.max_parser_depth); - - LOG_INFO(log, "Computing destination partition set, executing query: {}", query); - - auto local_context = Context::createCopy(context); - local_context->setSettings(task_cluster->settings_pull); - InterpreterSelectWithUnionQuery select(query_ast, local_context, SelectQueryOptions{}); - QueryPlan plan; - select.buildQueryPlan(plan); - auto builder = std::move(*plan.buildQueryPipeline( - QueryPlanOptimizationSettings::fromContext(local_context), - BuildQueryPipelineSettings::fromContext(local_context))); - - Block block = getBlockWithAllStreamData(std::move(builder)); - - if (block) - { - ColumnWithTypeAndName & column = block.getByPosition(0); - task_shard.partition_key_column = column; - - for (size_t i = 0; i < column.column->size(); ++i) - { - WriteBufferFromOwnString wb; - column.type->getDefaultSerialization()->serializeTextQuoted(*column.column, i, wb, FormatSettings()); - res.emplace(wb.str()); - } - } - - LOG_INFO(log, "There are {} destination partitions in shard {}", res.size(), task_shard.getDescription()); - - return res; -} - -bool ClusterCopier::checkShardHasPartition(const ConnectionTimeouts & timeouts, - TaskShard & task_shard, const String & partition_quoted_name) -{ - createShardInternalTables(timeouts, task_shard, false); - - TaskTable & task_table = task_shard.task_table; - - WriteBufferFromOwnString ss; - ss << "WITH " + partition_quoted_name + " AS partition_key "; - ss << "SELECT 1 FROM " << getQuotedTable(task_shard.table_read_shard); - ss << " WHERE (" << queryToString(task_table.engine_push_partition_key_ast) << " = partition_key)"; - if (!task_table.where_condition_str.empty()) - ss << " AND (" << task_table.where_condition_str << ")"; - ss << " LIMIT 1"; - auto query = ss.str(); - - ParserQuery parser_query(query.data() + query.size()); - const auto & settings = getContext()->getSettingsRef(); - ASTPtr query_ast = parseQuery(parser_query, query, settings.max_query_size, settings.max_parser_depth); - - LOG_INFO(log, "Checking shard {} for partition {} existence, executing query: {}", - task_shard.getDescription(), partition_quoted_name, query_ast->formatForErrorMessage()); - - auto local_context = Context::createCopy(context); - local_context->setSettings(task_cluster->settings_pull); - auto pipeline = InterpreterFactory::instance().get(query_ast, local_context)->execute().pipeline; - PullingPipelineExecutor executor(pipeline); - Block block; - executor.pull(block); - return block.rows() != 0; -} - -bool ClusterCopier::checkPresentPartitionPiecesOnCurrentShard(const ConnectionTimeouts & timeouts, - TaskShard & task_shard, const String & partition_quoted_name, size_t current_piece_number) -{ - createShardInternalTables(timeouts, task_shard, false); - - TaskTable & task_table = task_shard.task_table; - const size_t number_of_splits = task_table.number_of_splits; - const String & primary_key_comma_separated = task_table.primary_key_comma_separated; - - UNUSED(primary_key_comma_separated); - - std::string query; - - query += "WITH " + partition_quoted_name + " AS partition_key "; - query += "SELECT 1 FROM " + getQuotedTable(task_shard.table_read_shard); - - if (experimental_use_sample_offset) - query += " SAMPLE 1/" + toString(number_of_splits) + " OFFSET " + toString(current_piece_number) + "/" + toString(number_of_splits); - - query += " WHERE (" + queryToString(task_table.engine_push_partition_key_ast) + " = partition_key)"; - - if (!experimental_use_sample_offset) - query += " AND (cityHash64(" + primary_key_comma_separated + ") % " - + std::to_string(number_of_splits) + " = " + std::to_string(current_piece_number) + " )"; - - if (!task_table.where_condition_str.empty()) - query += " AND (" + task_table.where_condition_str + ")"; - - query += " LIMIT 1"; - - LOG_INFO(log, "Checking shard {} for partition {} piece {} existence, executing query: {}", task_shard.getDescription(), partition_quoted_name, std::to_string(current_piece_number), query); - - ParserQuery parser_query(query.data() + query.size()); - const auto & settings = getContext()->getSettingsRef(); - ASTPtr query_ast = parseQuery(parser_query, query, settings.max_query_size, settings.max_parser_depth); - - auto local_context = Context::createCopy(context); - local_context->setSettings(task_cluster->settings_pull); - auto pipeline = InterpreterFactory::instance().get(query_ast, local_context)->execute().pipeline; - PullingPipelineExecutor executor(pipeline); - Block result; - executor.pull(result); - if (result.rows() != 0) - LOG_INFO(log, "Partition {} piece number {} is PRESENT on shard {}", partition_quoted_name, std::to_string(current_piece_number), task_shard.getDescription()); - else - LOG_INFO(log, "Partition {} piece number {} is ABSENT on shard {}", partition_quoted_name, std::to_string(current_piece_number), task_shard.getDescription()); - return result.rows() != 0; -} - - -/** Executes simple query (without output streams, for example DDL queries) on each shard of the cluster - * Returns number of shards for which at least one replica executed query successfully - */ -UInt64 ClusterCopier::executeQueryOnCluster( - const ClusterPtr & cluster, - const String & query, - const Settings & current_settings, - ClusterExecutionMode execution_mode) const -{ - ClusterPtr cluster_for_query = cluster; - if (execution_mode == ClusterExecutionMode::ON_EACH_NODE) - cluster_for_query = cluster->getClusterWithReplicasAsShards(current_settings); - - std::vector> connections; - connections.reserve(cluster->getShardCount()); - - std::atomic successfully_executed = 0; - - for (const auto & replicas : cluster_for_query->getShardsAddresses()) - { - for (const auto & node : replicas) - { - try - { - connections.emplace_back(std::make_shared( - node.host_name, node.port, node.default_database, - node.user, node.password, ssh::SSHKey(), node.quota_key, node.cluster, node.cluster_secret, - "ClusterCopier", node.compression, node.secure - )); - - /// We execute only Alter, Create and Drop queries. - const auto header = Block{}; - - /// For unknown reason global context is passed to IStorage::read() method - /// So, task_identifier is passed as constructor argument. It is more obvious. - auto remote_query_executor = std::make_shared( - *connections.back(), query, header, getContext(), - /*throttler=*/nullptr, Scalars(), Tables(), QueryProcessingStage::Complete); - - try - { - remote_query_executor->sendQuery(); - } - catch (...) - { - LOG_WARNING(log, "Node with address {} seems to be unreachable.", node.host_name); - continue; - } - - while (true) - { - auto block = remote_query_executor->readBlock(); - if (!block) - break; - } - - remote_query_executor->finish(); - ++successfully_executed; - break; - } - catch (...) - { - LOG_WARNING(log, "An error occurred while processing query: {}", query); - tryLogCurrentException(log); - continue; - } - } - } - - return successfully_executed.load(); -} - -} diff --git a/programs/copier/ClusterCopier.h b/programs/copier/ClusterCopier.h deleted file mode 100644 index 01f8b30f546..00000000000 --- a/programs/copier/ClusterCopier.h +++ /dev/null @@ -1,240 +0,0 @@ -#pragma once - -#include "Aliases.h" -#include "Internals.h" -#include "TaskCluster.h" -#include "TaskShard.h" -#include "TaskTable.h" -#include "ShardPartition.h" -#include "ShardPartitionPiece.h" -#include "ZooKeeperStaff.h" - - -namespace DB -{ - -class ClusterCopier : WithMutableContext -{ -public: - ClusterCopier(const String & task_path_, - const String & host_id_, - const String & proxy_database_name_, - ContextMutablePtr context_, - LoggerRawPtr log_) - : WithMutableContext(context_), - task_zookeeper_path(task_path_), - host_id(host_id_), - working_database_name(proxy_database_name_), - log(log_) {} - - void init(); - - template - decltype(auto) retry(T && func, UInt64 max_tries = 100); - - void discoverShardPartitions(const ConnectionTimeouts & timeouts, const TaskShardPtr & task_shard); - - /// Compute set of partitions, assume set of partitions aren't changed during the processing - void discoverTablePartitions(const ConnectionTimeouts & timeouts, TaskTable & task_table, UInt64 num_threads = 0); - - void uploadTaskDescription(const std::string & task_path, const std::string & task_file, bool force); - - void reloadTaskDescription(); - - void updateConfigIfNeeded(); - - void process(const ConnectionTimeouts & timeouts); - - /// Disables DROP PARTITION commands that used to clear data after errors - void setSafeMode(bool is_safe_mode_ = true) - { - is_safe_mode = is_safe_mode_; - } - - void setCopyFaultProbability(double copy_fault_probability_) - { - copy_fault_probability = copy_fault_probability_; - } - - void setMoveFaultProbability(double move_fault_probability_) - { - move_fault_probability = move_fault_probability_; - } - - void setExperimentalUseSampleOffset(bool value) - { - experimental_use_sample_offset = value; - } - - void setMaxTableTries(UInt64 tries) - { - max_table_tries = tries; - } - void setMaxShardPartitionTries(UInt64 tries) - { - max_shard_partition_tries = tries; - } - void setMaxShardPartitionPieceTriesForAlter(UInt64 tries) - { - max_shard_partition_piece_tries_for_alter = tries; - } - void setRetryDelayMs(std::chrono::milliseconds ms) - { - retry_delay_ms = ms; - } - -protected: - - String getWorkersPath() const - { - return task_cluster->task_zookeeper_path + "/task_active_workers"; - } - - String getWorkersPathVersion() const - { - return getWorkersPath() + "_version"; - } - - String getCurrentWorkerNodePath() const - { - return getWorkersPath() + "/" + host_id; - } - - zkutil::EphemeralNodeHolder::Ptr createTaskWorkerNodeAndWaitIfNeed( - const zkutil::ZooKeeperPtr & zookeeper, - const String & description, - bool unprioritized); - - /* - * Checks that partition piece or some other entity is clean. - * The only requirement is that you have to pass is_dirty_flag_path and is_dirty_cleaned_path to the function. - * And is_dirty_flag_path is a parent of is_dirty_cleaned_path. - * */ - static bool checkPartitionPieceIsClean( - const zkutil::ZooKeeperPtr & zookeeper, - const CleanStateClock & clean_state_clock, - const String & task_status_path); - - bool checkAllPiecesInPartitionAreDone(const TaskTable & task_table, const String & partition_name, const TasksShard & shards_with_partition); - - /** Checks that the whole partition of a table was copied. We should do it carefully due to dirty lock. - * State of some task could change during the processing. - * We have to ensure that all shards have the finished state and there is no dirty flag. - * Moreover, we have to check status twice and check zxid, because state can change during the checking. - */ - - /* The same as function above - * Assume that we don't know on which shards do we have partition certain piece. - * We'll check them all (I mean shards that contain the whole partition) - * And shards that don't have certain piece MUST mark that piece is_done true. - * */ - bool checkPartitionPieceIsDone(const TaskTable & task_table, const String & partition_name, - size_t piece_number, const TasksShard & shards_with_partition); - - - /*Alter successful insertion to helping tables it will move all pieces to destination table*/ - TaskStatus tryMoveAllPiecesToDestinationTable(const TaskTable & task_table, const String & partition_name); - - /// Removes MATERIALIZED and ALIAS columns from create table query - static ASTPtr removeAliasMaterializedAndTTLColumnsFromCreateQuery(const ASTPtr & query_ast, bool allow_to_copy_alias_and_materialized_columns); - - bool tryDropPartitionPiece(ShardPartition & task_partition, size_t current_piece_number, - const zkutil::ZooKeeperPtr & zookeeper, const CleanStateClock & clean_state_clock); - - bool tryProcessTable(const ConnectionTimeouts & timeouts, TaskTable & task_table); - - TaskStatus tryCreateDestinationTable(const ConnectionTimeouts & timeouts, TaskTable & task_table); - /// Job for copying partition from particular shard. - TaskStatus tryProcessPartitionTask(const ConnectionTimeouts & timeouts, - ShardPartition & task_partition, - bool is_unprioritized_task); - - TaskStatus iterateThroughAllPiecesInPartition(const ConnectionTimeouts & timeouts, - ShardPartition & task_partition, - bool is_unprioritized_task); - - TaskStatus processPartitionPieceTaskImpl(const ConnectionTimeouts & timeouts, - ShardPartition & task_partition, - size_t current_piece_number, - bool is_unprioritized_task); - - void dropAndCreateLocalTable(const ASTPtr & create_ast); - - void dropLocalTableIfExists(const DatabaseAndTableName & table_name) const; - - void dropHelpingTables(const TaskTable & task_table); - - void dropHelpingTablesByPieceNumber(const TaskTable & task_table, size_t current_piece_number); - - /// Is used for usage less disk space. - /// After all pieces were successfully moved to original destination - /// table we can get rid of partition pieces (partitions in helping tables). - void dropParticularPartitionPieceFromAllHelpingTables(const TaskTable & task_table, const String & partition_name); - - String getRemoteCreateTable(const DatabaseAndTableName & table, Connection & connection, const Settings & settings); - - ASTPtr getCreateTableForPullShard(const ConnectionTimeouts & timeouts, TaskShard & task_shard); - - /// If it is implicitly asked to create split Distributed table for certain piece on current shard, we will do it. - void createShardInternalTables(const ConnectionTimeouts & timeouts, TaskShard & task_shard, bool create_split = true); - - std::set getShardPartitions(const ConnectionTimeouts & timeouts, TaskShard & task_shard); - - bool checkShardHasPartition(const ConnectionTimeouts & timeouts, TaskShard & task_shard, const String & partition_quoted_name); - - bool checkPresentPartitionPiecesOnCurrentShard(const ConnectionTimeouts & timeouts, - TaskShard & task_shard, const String & partition_quoted_name, size_t current_piece_number); - - /* - * This class is used in executeQueryOnCluster function - * You can execute query on each shard (no sense it is executed on each replica of a shard or not) - * or you can execute query on each replica on each shard. - * First mode is useful for INSERTS queries. - * */ - enum ClusterExecutionMode - { - ON_EACH_SHARD, - ON_EACH_NODE - }; - - /** Executes simple query (without output streams, for example DDL queries) on each shard of the cluster - * Returns number of shards for which at least one replica executed query successfully - */ - UInt64 executeQueryOnCluster( - const ClusterPtr & cluster, - const String & query, - const Settings & current_settings, - ClusterExecutionMode execution_mode = ClusterExecutionMode::ON_EACH_SHARD) const; - -private: - String task_zookeeper_path; - String task_description_path; - String host_id; - String working_database_name; - - /// Auto update config stuff - UInt64 task_description_current_version = 1; - std::atomic task_description_version{1}; - Coordination::WatchCallback task_description_watch_callback; - /// ZooKeeper session used to set the callback - zkutil::ZooKeeperPtr task_description_watch_zookeeper; - - ConfigurationPtr task_cluster_initial_config; - ConfigurationPtr task_cluster_current_config; - - std::unique_ptr task_cluster; - - bool is_safe_mode = false; - double copy_fault_probability = 0.0; - double move_fault_probability = 0.0; - - bool experimental_use_sample_offset{false}; - - LoggerRawPtr log; - - UInt64 max_table_tries = 3; - UInt64 max_shard_partition_tries = 3; - UInt64 max_shard_partition_piece_tries_for_alter = 10; - std::chrono::milliseconds retry_delay_ms{1000}; -}; -} diff --git a/programs/copier/ClusterCopierApp.cpp b/programs/copier/ClusterCopierApp.cpp deleted file mode 100644 index fdf07dec61a..00000000000 --- a/programs/copier/ClusterCopierApp.cpp +++ /dev/null @@ -1,252 +0,0 @@ -#include "ClusterCopierApp.h" -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace fs = std::filesystem; - -namespace DB -{ - -/// ClusterCopierApp - -void ClusterCopierApp::initialize(Poco::Util::Application & self) -{ - is_help = config().has("help"); - if (is_help) - return; - - config_xml_path = config().getString("config-file"); - task_path = config().getString("task-path"); - log_level = config().getString("log-level", "info"); - is_safe_mode = config().has("safe-mode"); - is_status_mode = config().has("status"); - if (config().has("copy-fault-probability")) - copy_fault_probability = std::max(std::min(config().getDouble("copy-fault-probability"), 1.0), 0.0); - if (config().has("move-fault-probability")) - move_fault_probability = std::max(std::min(config().getDouble("move-fault-probability"), 1.0), 0.0); - base_dir = (config().has("base-dir")) ? config().getString("base-dir") : fs::current_path().string(); - - max_table_tries = std::max(config().getUInt("max-table-tries", 3), 1); - max_shard_partition_tries = std::max(config().getUInt("max-shard-partition-tries", 3), 1); - max_shard_partition_piece_tries_for_alter = std::max(config().getUInt("max-shard-partition-piece-tries-for-alter", 10), 1); - retry_delay_ms = std::chrono::milliseconds(std::max(config().getUInt("retry-delay-ms", 1000), 100)); - - if (config().has("experimental-use-sample-offset")) - experimental_use_sample_offset = config().getBool("experimental-use-sample-offset"); - - // process_id is '#_' - time_t timestamp = Poco::Timestamp().epochTime(); - auto curr_pid = Poco::Process::id(); - - process_id = std::to_string(DateLUT::serverTimezoneInstance().toNumYYYYMMDDhhmmss(timestamp)) + "_" + std::to_string(curr_pid); - host_id = escapeForFileName(getFQDNOrHostName()) + '#' + process_id; - process_path = fs::weakly_canonical(fs::path(base_dir) / ("clickhouse-copier_" + process_id)); - fs::create_directories(process_path); - - /// Override variables for BaseDaemon - if (config().has("log-level")) - config().setString("logger.level", config().getString("log-level")); - - if (config().has("base-dir") || !config().has("logger.log")) - config().setString("logger.log", fs::path(process_path) / "log.log"); - - if (config().has("base-dir") || !config().has("logger.errorlog")) - config().setString("logger.errorlog", fs::path(process_path) / "log.err.log"); - - Base::initialize(self); -} - - -void ClusterCopierApp::handleHelp(const std::string &, const std::string &) -{ - uint16_t terminal_width = 0; - if (isatty(STDIN_FILENO)) - terminal_width = getTerminalWidth(); - - Poco::Util::HelpFormatter help_formatter(options()); - if (terminal_width) - help_formatter.setWidth(terminal_width); - help_formatter.setCommand(commandName()); - help_formatter.setHeader("Copies tables from one cluster to another"); - help_formatter.setUsage("--config-file --task-path "); - help_formatter.format(std::cerr); - help_formatter.setFooter("See also: https://clickhouse.com/docs/en/operations/utilities/clickhouse-copier/"); - - stopOptionsProcessing(); -} - - -void ClusterCopierApp::defineOptions(Poco::Util::OptionSet & options) -{ - Base::defineOptions(options); - - options.addOption(Poco::Util::Option("task-path", "", "path to task in ZooKeeper") - .argument("task-path").binding("task-path")); - options.addOption(Poco::Util::Option("task-file", "", "path to task file for uploading in ZooKeeper to task-path") - .argument("task-file").binding("task-file")); - options.addOption(Poco::Util::Option("task-upload-force", "", "Force upload task-file even node already exists. Default is false.") - .argument("task-upload-force").binding("task-upload-force")); - options.addOption(Poco::Util::Option("safe-mode", "", "disables ALTER DROP PARTITION in case of errors") - .binding("safe-mode")); - options.addOption(Poco::Util::Option("copy-fault-probability", "", "the copying fails with specified probability (used to test partition state recovering)") - .argument("copy-fault-probability").binding("copy-fault-probability")); - options.addOption(Poco::Util::Option("move-fault-probability", "", "the moving fails with specified probability (used to test partition state recovering)") - .argument("move-fault-probability").binding("move-fault-probability")); - options.addOption(Poco::Util::Option("log-level", "", "sets log level") - .argument("log-level").binding("log-level")); - options.addOption(Poco::Util::Option("base-dir", "", "base directory for copiers, consecutive copier launches will populate /base-dir/launch_id/* directories") - .argument("base-dir").binding("base-dir")); - options.addOption(Poco::Util::Option("experimental-use-sample-offset", "", "Use SAMPLE OFFSET query instead of cityHash64(PRIMARY KEY) % n == k") - .argument("experimental-use-sample-offset").binding("experimental-use-sample-offset")); - options.addOption(Poco::Util::Option("status", "", "Get for status for current execution").binding("status")); - - options.addOption(Poco::Util::Option("max-table-tries", "", "Number of tries for the copy table task") - .argument("max-table-tries").binding("max-table-tries")); - options.addOption(Poco::Util::Option("max-shard-partition-tries", "", "Number of tries for the copy one partition task") - .argument("max-shard-partition-tries").binding("max-shard-partition-tries")); - options.addOption(Poco::Util::Option("max-shard-partition-piece-tries-for-alter", "", "Number of tries for final ALTER ATTACH to destination table") - .argument("max-shard-partition-piece-tries-for-alter").binding("max-shard-partition-piece-tries-for-alter")); - options.addOption(Poco::Util::Option("retry-delay-ms", "", "Delay between task retries") - .argument("retry-delay-ms").binding("retry-delay-ms")); - - using Me = std::decay_t; - options.addOption(Poco::Util::Option("help", "", "produce this help message").binding("help") - .callback(Poco::Util::OptionCallback(this, &Me::handleHelp))); -} - - -void ClusterCopierApp::mainImpl() -{ - /// Status command - { - if (is_status_mode) - { - SharedContextHolder shared_context = Context::createShared(); - auto context = Context::createGlobal(shared_context.get()); - context->makeGlobalContext(); - SCOPE_EXIT_SAFE(context->shutdown()); - - auto zookeeper = context->getZooKeeper(); - auto status_json = zookeeper->get(task_path + "/status"); - - LOG_INFO(&logger(), "{}", status_json); - std::cout << status_json << std::endl; - - context->resetZooKeeper(); - return; - } - } - StatusFile status_file(process_path + "/status", StatusFile::write_full_info); - ThreadStatus thread_status; - - auto * log = &logger(); - LOG_INFO(log, "Starting clickhouse-copier (id {}, host_id {}, path {}, revision {})", process_id, host_id, process_path, ClickHouseRevision::getVersionRevision()); - - SharedContextHolder shared_context = Context::createShared(); - auto context = Context::createGlobal(shared_context.get()); - context->makeGlobalContext(); - SCOPE_EXIT_SAFE(context->shutdown()); - - context->setConfig(loaded_config.configuration); - context->setApplicationType(Context::ApplicationType::LOCAL); - context->setPath(process_path + "/"); - - registerInterpreters(); - registerFunctions(); - registerAggregateFunctions(); - registerTableFunctions(); - registerDatabases(); - registerStorages(); - registerDictionaries(); - registerDisks(/* global_skip_access_check= */ true); - registerFormats(); - - static const std::string default_database = "_local"; - DatabaseCatalog::instance().attachDatabase(default_database, std::make_shared(default_database, context)); - context->setCurrentDatabase(default_database); - - /// Disable queries logging, since: - /// - There are bits that is not allowed for global context, like adding factories info (for the query_log) - /// - And anyway it is useless for copier. - context->setSetting("log_queries", false); - - auto local_context = Context::createCopy(context); - - /// Initialize query scope just in case. - CurrentThread::QueryScope query_scope(local_context); - - auto copier = std::make_unique( - task_path, host_id, default_database, local_context, log); - copier->setSafeMode(is_safe_mode); - copier->setCopyFaultProbability(copy_fault_probability); - copier->setMoveFaultProbability(move_fault_probability); - copier->setMaxTableTries(max_table_tries); - copier->setMaxShardPartitionTries(max_shard_partition_tries); - copier->setMaxShardPartitionPieceTriesForAlter(max_shard_partition_piece_tries_for_alter); - copier->setRetryDelayMs(retry_delay_ms); - copier->setExperimentalUseSampleOffset(experimental_use_sample_offset); - - auto task_file = config().getString("task-file", ""); - if (!task_file.empty()) - copier->uploadTaskDescription(task_path, task_file, config().getBool("task-upload-force", false)); - - zkutil::validateZooKeeperConfig(config()); - - copier->init(); - copier->process(ConnectionTimeouts::getTCPTimeoutsWithoutFailover(context->getSettingsRef())); - - /// Reset ZooKeeper before removing ClusterCopier. - /// Otherwise zookeeper watch can call callback which use already removed ClusterCopier object. - context->resetZooKeeper(); -} - - -int ClusterCopierApp::main(const std::vector &) -{ - if (is_help) - return 0; - - try - { - mainImpl(); - } - catch (...) - { - tryLogCurrentException(&Poco::Logger::root(), __PRETTY_FUNCTION__); - auto code = getCurrentExceptionCode(); - - return (code) ? code : -1; - } - - return 0; -} - - -} - -#pragma GCC diagnostic ignored "-Wunused-function" -#pragma GCC diagnostic ignored "-Wmissing-declarations" - -int mainEntryClickHouseClusterCopier(int argc, char ** argv) -{ - try - { - DB::ClusterCopierApp app; - return app.run(argc, argv); - } - catch (...) - { - std::cerr << DB::getCurrentExceptionMessage(true) << "\n"; - auto code = DB::getCurrentExceptionCode(); - - return (code) ? code : -1; - } -} diff --git a/programs/copier/ClusterCopierApp.h b/programs/copier/ClusterCopierApp.h deleted file mode 100644 index 0ddc232381e..00000000000 --- a/programs/copier/ClusterCopierApp.h +++ /dev/null @@ -1,99 +0,0 @@ -#pragma once - -#include -#include - -#include "ClusterCopier.h" - -/* clickhouse cluster copier util - * Copies tables data from one cluster to new tables of other (possibly the same) cluster in distributed fault-tolerant manner. - * - * See overview in the docs: docs/en/utils/clickhouse-copier.md - * - * Implementation details: - * - * cluster-copier workers pull each partition of each shard of the source cluster and push it to the destination cluster through - * Distributed table (to perform data resharding). So, worker job is a partition of a source shard. - * A job has three states: Active, Finished and Abandoned. Abandoned means that worker died and did not finish the job. - * - * If an error occurred during the copying (a worker failed or a worker did not finish the INSERT), then the whole partition (on - * all destination servers) should be dropped and refilled. So, copying entity is a partition of all destination shards. - * If a failure is detected a special /is_dirty node is created in ZooKeeper signalling that other workers copying the same partition - * should stop, after a refilling procedure should start. - * - * ZooKeeper task node has the following structure: - * /task/path_root - path passed in --task-path parameter - * /description - contains user-defined XML config of the task - * /task_active_workers - contains ephemeral nodes of all currently active workers, used to implement max_workers limitation - * /server_fqdn#PID_timestamp - cluster-copier worker ID - * ... - * /tables - directory with table tasks - * /cluster.db.table1 - directory of table_hits task - * /partition1 - directory for partition1 - * /shards - directory for source cluster shards - * /1 - worker job for the first shard of partition1 of table test.hits - * Contains info about current status (Active or Finished) and worker ID. - * /2 - * ... - * /partition_active_workers - * /1 - for each job in /shards a corresponding ephemeral node created in /partition_active_workers - * It is used to detect Abandoned jobs (if there is Active node in /shards and there is no node in - * /partition_active_workers). - * Also, it is used to track active workers in the partition (when we need to refill the partition we do - * not DROP PARTITION while there are active workers) - * /2 - * ... - * /is_dirty - the node is set if some worker detected that an error occurred (the INSERT is failed or an Abandoned node is - * detected). If the node appeared workers in this partition should stop and start cleaning and refilling - * partition procedure. - * During this procedure a single 'cleaner' worker is selected. The worker waits for stopping all partition - * workers, removes /shards node, executes DROP PARTITION on each destination node and removes /is_dirty node. - * /cleaner- An ephemeral node used to select 'cleaner' worker. Contains ID of the worker. - * /cluster.db.table2 - * ... - */ - -namespace DB -{ - -class ClusterCopierApp : public BaseDaemon -{ -public: - - void initialize(Poco::Util::Application & self) override; - - void handleHelp(const std::string &, const std::string &); - - void defineOptions(Poco::Util::OptionSet & options) override; - - int main(const std::vector &) override; - -private: - - using Base = BaseDaemon; - - void mainImpl(); - - std::string config_xml_path; - std::string task_path; - std::string log_level = "info"; - bool is_safe_mode = false; - bool is_status_mode = false; - double copy_fault_probability = 0.0; - double move_fault_probability = 0.0; - bool is_help = false; - - UInt64 max_table_tries = 3; - UInt64 max_shard_partition_tries = 3; - UInt64 max_shard_partition_piece_tries_for_alter = 10; - std::chrono::milliseconds retry_delay_ms{1000}; - - bool experimental_use_sample_offset{false}; - - std::string base_dir; - std::string process_path; - std::string process_id; - std::string host_id; -}; - -} diff --git a/programs/copier/ClusterPartition.h b/programs/copier/ClusterPartition.h deleted file mode 100644 index 22063989e22..00000000000 --- a/programs/copier/ClusterPartition.h +++ /dev/null @@ -1,22 +0,0 @@ -#pragma once - -#include -#include - -namespace DB -{ - -/// Contains info about all shards that contain a partition -struct ClusterPartition -{ - double elapsed_time_seconds = 0; - UInt64 bytes_copied = 0; - UInt64 rows_copied = 0; - UInt64 blocks_copied = 0; - - UInt64 total_tries = 0; -}; - -using ClusterPartitions = std::map>; - -} diff --git a/programs/copier/Internals.cpp b/programs/copier/Internals.cpp deleted file mode 100644 index 0cfff7e3f6c..00000000000 --- a/programs/copier/Internals.cpp +++ /dev/null @@ -1,280 +0,0 @@ -#include "Internals.h" -#include -#include -#include -#include -#include -#include -#include - -namespace DB -{ -namespace ErrorCodes -{ - extern const int BAD_ARGUMENTS; -} - -using ConfigurationPtr = Poco::AutoPtr; - -ConfigurationPtr getConfigurationFromXMLString(const std::string & xml_data) -{ - std::stringstream ss(xml_data); // STYLE_CHECK_ALLOW_STD_STRING_STREAM - Poco::XML::InputSource input_source{ss}; - return {new Poco::Util::XMLConfiguration{&input_source}}; -} - -String getQuotedTable(const String & database, const String & table) -{ - if (database.empty()) - return backQuoteIfNeed(table); - - return backQuoteIfNeed(database) + "." + backQuoteIfNeed(table); -} - -String getQuotedTable(const DatabaseAndTableName & db_and_table) -{ - return getQuotedTable(db_and_table.first, db_and_table.second); -} - - -// Creates AST representing 'ENGINE = Distributed(cluster, db, table, [sharding_key]) -std::shared_ptr createASTStorageDistributed( - const String & cluster_name, const String & database, const String & table, - const ASTPtr & sharding_key_ast) -{ - auto args = std::make_shared(); - args->children.emplace_back(std::make_shared(cluster_name)); - args->children.emplace_back(std::make_shared(database)); - args->children.emplace_back(std::make_shared(table)); - if (sharding_key_ast) - args->children.emplace_back(sharding_key_ast); - - auto engine = std::make_shared(); - engine->name = "Distributed"; - engine->arguments = args; - - auto storage = std::make_shared(); - storage->set(storage->engine, engine); - - return storage; -} - - -Block getBlockWithAllStreamData(QueryPipelineBuilder builder) -{ - builder.addTransform(std::make_shared( - builder.getHeader(), - std::numeric_limits::max(), - std::numeric_limits::max())); - - auto cur_pipeline = QueryPipelineBuilder::getPipeline(std::move(builder)); - Block block; - PullingPipelineExecutor executor(cur_pipeline); - executor.pull(block); - - return block; -} - -bool isExtendedDefinitionStorage(const ASTPtr & storage_ast) -{ - const auto & storage = storage_ast->as(); - return storage.partition_by || storage.order_by || storage.sample_by; -} - -ASTPtr extractPartitionKey(const ASTPtr & storage_ast) -{ - String storage_str = queryToString(storage_ast); - - const auto & storage = storage_ast->as(); - const auto & engine = storage.engine->as(); - - if (!endsWith(engine.name, "MergeTree")) - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unsupported engine was specified in {}, only *MergeTree engines are supported", storage_str); - } - - if (isExtendedDefinitionStorage(storage_ast)) - { - if (storage.partition_by) - return storage.partition_by->clone(); - - static const char * all = "all"; - return std::make_shared(Field(all, strlen(all))); - } - else - { - bool is_replicated = startsWith(engine.name, "Replicated"); - size_t min_args = is_replicated ? 3 : 1; - - if (!engine.arguments) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected arguments in {}", storage_str); - - ASTPtr arguments_ast = engine.arguments->clone(); - ASTs & arguments = arguments_ast->children; - - if (arguments.size() < min_args) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected at least {} arguments in {}", min_args, storage_str); - - ASTPtr & month_arg = is_replicated ? arguments[2] : arguments[1]; - return makeASTFunction("toYYYYMM", month_arg->clone()); - } -} - -ASTPtr extractPrimaryKey(const ASTPtr & storage_ast) -{ - String storage_str = queryToString(storage_ast); - - const auto & storage = storage_ast->as(); - const auto & engine = storage.engine->as(); - - if (!endsWith(engine.name, "MergeTree")) - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unsupported engine was specified in {}, only *MergeTree engines are supported", storage_str); - } - - if (!isExtendedDefinitionStorage(storage_ast)) - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Is not extended deginition storage {} Will be fixed later.", storage_str); - } - - if (storage.primary_key) - return storage.primary_key->clone(); - - return nullptr; -} - - -ASTPtr extractOrderBy(const ASTPtr & storage_ast) -{ - String storage_str = queryToString(storage_ast); - - const auto & storage = storage_ast->as(); - const auto & engine = storage.engine->as(); - - if (!endsWith(engine.name, "MergeTree")) - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unsupported engine was specified in {}, only *MergeTree engines are supported", storage_str); - } - - if (!isExtendedDefinitionStorage(storage_ast)) - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Is not extended deginition storage {} Will be fixed later.", storage_str); - } - - if (storage.order_by) - return storage.order_by->clone(); - - throw Exception(ErrorCodes::BAD_ARGUMENTS, "ORDER BY cannot be empty"); -} - -/// Wraps only identifiers with backticks. -std::string wrapIdentifiersWithBackticks(const ASTPtr & root) -{ - if (auto identifier = std::dynamic_pointer_cast(root)) - return backQuote(identifier->name()); - - if (auto function = std::dynamic_pointer_cast(root)) - return function->name + '(' + wrapIdentifiersWithBackticks(function->arguments) + ')'; - - if (auto expression_list = std::dynamic_pointer_cast(root)) - { - Names function_arguments(expression_list->children.size()); - for (size_t i = 0; i < expression_list->children.size(); ++i) - function_arguments[i] = wrapIdentifiersWithBackticks(expression_list->children[0]); - return boost::algorithm::join(function_arguments, ", "); - } - - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Primary key could be represented only as columns or functions from columns."); -} - - -Names extractPrimaryKeyColumnNames(const ASTPtr & storage_ast) -{ - const auto sorting_key_ast = extractOrderBy(storage_ast); - const auto primary_key_ast = extractPrimaryKey(storage_ast); - - const auto sorting_key_expr_list = extractKeyExpressionList(sorting_key_ast); - const auto primary_key_expr_list = primary_key_ast - ? extractKeyExpressionList(primary_key_ast) : sorting_key_expr_list->clone(); - - /// Maybe we have to handle VersionedCollapsing engine separately. But in our case in looks pointless. - - size_t primary_key_size = primary_key_expr_list->children.size(); - size_t sorting_key_size = sorting_key_expr_list->children.size(); - - if (primary_key_size > sorting_key_size) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Primary key must be a prefix of the sorting key, but its length: " - "{} is greater than the sorting key length: {}", - primary_key_size, sorting_key_size); - - Names primary_key_columns; - NameSet primary_key_columns_set; - - for (size_t i = 0; i < sorting_key_size; ++i) - { - /// Column name could be represented as a f_1(f_2(...f_n(column_name))). - /// Each f_i could take one or more parameters. - /// We will wrap identifiers with backticks to allow non-standard identifier names. - String sorting_key_column = sorting_key_expr_list->children[i]->getColumnName(); - - if (i < primary_key_size) - { - String pk_column = primary_key_expr_list->children[i]->getColumnName(); - if (pk_column != sorting_key_column) - throw Exception(ErrorCodes::BAD_ARGUMENTS, - "Primary key must be a prefix of the sorting key, " - "but the column in the position {} is {}, not {}", i, sorting_key_column, pk_column); - - if (!primary_key_columns_set.emplace(pk_column).second) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Primary key contains duplicate columns"); - - primary_key_columns.push_back(wrapIdentifiersWithBackticks(primary_key_expr_list->children[i])); - } - } - - return primary_key_columns; -} - -bool isReplicatedTableEngine(const ASTPtr & storage_ast) -{ - const auto & storage = storage_ast->as(); - const auto & engine = storage.engine->as(); - - if (!endsWith(engine.name, "MergeTree")) - { - String storage_str = queryToString(storage_ast); - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unsupported engine was specified in {}, only *MergeTree engines are supported", storage_str); - } - - return startsWith(engine.name, "Replicated"); -} - -ShardPriority getReplicasPriority(const Cluster::Addresses & replicas, const std::string & local_hostname, UInt8 random) -{ - ShardPriority res; - - if (replicas.empty()) - return res; - - res.is_remote = 1; - for (const auto & replica : replicas) - { - if (isLocalAddress(DNSResolver::instance().resolveHost(replica.host_name))) - { - res.is_remote = 0; - break; - } - } - - res.hostname_difference = std::numeric_limits::max(); - for (const auto & replica : replicas) - { - size_t difference = getHostNamePrefixDistance(local_hostname, replica.host_name); - res.hostname_difference = std::min(difference, res.hostname_difference); - } - - res.random = random; - return res; -} - -} diff --git a/programs/copier/Internals.h b/programs/copier/Internals.h deleted file mode 100644 index 48f4b0fab09..00000000000 --- a/programs/copier/Internals.h +++ /dev/null @@ -1,198 +0,0 @@ -#pragma once - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "Aliases.h" - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int LOGICAL_ERROR; -} - - -ConfigurationPtr getConfigurationFromXMLString(const std::string & xml_data); - -String getQuotedTable(const String & database, const String & table); - -String getQuotedTable(const DatabaseAndTableName & db_and_table); - - -enum class TaskState -{ - Started = 0, - Finished, - Unknown -}; - -/// Used to mark status of shard partition tasks -struct TaskStateWithOwner -{ - TaskStateWithOwner() = default; - - TaskStateWithOwner(TaskState state_, const String & owner_) : state(state_), owner(owner_) {} - - TaskState state{TaskState::Unknown}; - String owner; - - static String getData(TaskState state, const String &owner) - { - return TaskStateWithOwner(state, owner).toString(); - } - - String toString() - { - WriteBufferFromOwnString wb; - wb << static_cast(state) << "\n" << escape << owner; - return wb.str(); - } - - static TaskStateWithOwner fromString(const String & data) - { - ReadBufferFromString rb(data); - TaskStateWithOwner res; - UInt32 state; - - rb >> state >> "\n" >> escape >> res.owner; - - if (state >= static_cast(TaskState::Unknown)) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown state {}", data); - - res.state = static_cast(state); - return res; - } -}; - - -struct ShardPriority -{ - UInt8 is_remote = 1; - size_t hostname_difference = 0; - UInt8 random = 0; - - static bool greaterPriority(const ShardPriority & current, const ShardPriority & other) - { - return std::forward_as_tuple(current.is_remote, current.hostname_difference, current.random) - < std::forward_as_tuple(other.is_remote, other.hostname_difference, other.random); - } -}; - -/// Execution status of a task. -/// Is used for: partition copying task status, partition piece copying task status, partition moving task status. -enum class TaskStatus -{ - Active, - Finished, - Error, -}; - -struct MultiTransactionInfo -{ - int32_t code; - Coordination::Requests requests; - Coordination::Responses responses; -}; - -// Creates AST representing 'ENGINE = Distributed(cluster, db, table, [sharding_key]) -std::shared_ptr createASTStorageDistributed( - const String & cluster_name, const String & database, const String & table, - const ASTPtr & sharding_key_ast = nullptr); - -Block getBlockWithAllStreamData(QueryPipelineBuilder builder); - -bool isExtendedDefinitionStorage(const ASTPtr & storage_ast); - -ASTPtr extractPartitionKey(const ASTPtr & storage_ast); - -/* -* Choosing a Primary Key that Differs from the Sorting Key -* It is possible to specify a primary key (an expression with values that are written in the index file for each mark) -* that is different from the sorting key (an expression for sorting the rows in data parts). -* In this case the primary key expression tuple must be a prefix of the sorting key expression tuple. -* This feature is helpful when using the SummingMergeTree and AggregatingMergeTree table engines. -* In a common case when using these engines, the table has two types of columns: dimensions and measures. -* Typical queries aggregate values of measure columns with arbitrary GROUP BY and filtering by dimensions. -* Because SummingMergeTree and AggregatingMergeTree aggregate rows with the same value of the sorting key, -* it is natural to add all dimensions to it. As a result, the key expression consists of a long list of columns -* and this list must be frequently updated with newly added dimensions. -* In this case it makes sense to leave only a few columns in the primary key that will provide efficient -* range scans and add the remaining dimension columns to the sorting key tuple. -* ALTER of the sorting key is a lightweight operation because when a new column is simultaneously added t -* o the table and to the sorting key, existing data parts don't need to be changed. -* Since the old sorting key is a prefix of the new sorting key and there is no data in the newly added column, -* the data is sorted by both the old and new sorting keys at the moment of table modification. -* -* */ -ASTPtr extractPrimaryKey(const ASTPtr & storage_ast); - -ASTPtr extractOrderBy(const ASTPtr & storage_ast); - -Names extractPrimaryKeyColumnNames(const ASTPtr & storage_ast); - -bool isReplicatedTableEngine(const ASTPtr & storage_ast); - -ShardPriority getReplicasPriority(const Cluster::Addresses & replicas, const std::string & local_hostname, UInt8 random); - -} diff --git a/programs/copier/ShardPartition.cpp b/programs/copier/ShardPartition.cpp deleted file mode 100644 index 4c962fc807d..00000000000 --- a/programs/copier/ShardPartition.cpp +++ /dev/null @@ -1,70 +0,0 @@ -#include "ShardPartition.h" - -#include "TaskShard.h" -#include "TaskTable.h" - -namespace DB -{ - -ShardPartition::ShardPartition(TaskShard & parent, String name_quoted_, size_t number_of_splits) - : task_shard(parent) - , name(std::move(name_quoted_)) -{ - pieces.reserve(number_of_splits); -} - -String ShardPartition::getPartitionCleanStartPath() const -{ - return getPartitionPath() + "/clean_start"; -} - -String ShardPartition::getPartitionPieceCleanStartPath(size_t current_piece_number) const -{ - assert(current_piece_number < task_shard.task_table.number_of_splits); - return getPartitionPiecePath(current_piece_number) + "/clean_start"; -} - -String ShardPartition::getPartitionPath() const -{ - return task_shard.task_table.getPartitionPath(name); -} - -String ShardPartition::getPartitionPiecePath(size_t current_piece_number) const -{ - assert(current_piece_number < task_shard.task_table.number_of_splits); - return task_shard.task_table.getPartitionPiecePath(name, current_piece_number); -} - -String ShardPartition::getShardStatusPath() const -{ - // schema: //tables/
//shards/ - // e.g. /root/table_test.hits/201701/shards/1 - return getPartitionShardsPath() + "/" + toString(task_shard.numberInCluster()); -} - -String ShardPartition::getPartitionShardsPath() const -{ - return getPartitionPath() + "/shards"; -} - -String ShardPartition::getPartitionActiveWorkersPath() const -{ - return getPartitionPath() + "/partition_active_workers"; -} - -String ShardPartition::getActiveWorkerPath() const -{ - return getPartitionActiveWorkersPath() + "/" + toString(task_shard.numberInCluster()); -} - -String ShardPartition::getCommonPartitionIsDirtyPath() const -{ - return getPartitionPath() + "/is_dirty"; -} - -String ShardPartition::getCommonPartitionIsCleanedPath() const -{ - return getCommonPartitionIsDirtyPath() + "/cleaned"; -} - -} diff --git a/programs/copier/ShardPartition.h b/programs/copier/ShardPartition.h deleted file mode 100644 index 2457213733c..00000000000 --- a/programs/copier/ShardPartition.h +++ /dev/null @@ -1,54 +0,0 @@ -#pragma once - -#include "ShardPartitionPiece.h" - -#include - -#include - -namespace DB -{ - -struct TaskShard; - -/// Just destination partition of a shard -/// I don't know what this comment means. -/// In short, when we discovered what shards contain currently processing partition, -/// This class describes a partition (name) that is stored on the shard (parent). -struct ShardPartition -{ - ShardPartition(TaskShard &parent, String name_quoted_, size_t number_of_splits = 10); - - String getPartitionPath() const; - - String getPartitionPiecePath(size_t current_piece_number) const; - - String getPartitionCleanStartPath() const; - - String getPartitionPieceCleanStartPath(size_t current_piece_number) const; - - String getCommonPartitionIsDirtyPath() const; - - String getCommonPartitionIsCleanedPath() const; - - String getPartitionActiveWorkersPath() const; - - String getActiveWorkerPath() const; - - String getPartitionShardsPath() const; - - String getShardStatusPath() const; - - /// What partition pieces are present in current shard. - /// FYI: Piece is a part of partition which has modulo equals to concrete constant (less than number_of_splits obliously) - /// For example SELECT ... from ... WHERE partition=current_partition AND cityHash64(*) == const; - /// Absent pieces have field is_absent_piece equals to true. - PartitionPieces pieces; - - TaskShard & task_shard; - String name; -}; - -using TasksPartition = std::map>; - -} diff --git a/programs/copier/ShardPartitionPiece.cpp b/programs/copier/ShardPartitionPiece.cpp deleted file mode 100644 index 36d1621e012..00000000000 --- a/programs/copier/ShardPartitionPiece.cpp +++ /dev/null @@ -1,64 +0,0 @@ -#include "ShardPartitionPiece.h" - -#include "ShardPartition.h" -#include "TaskShard.h" - -#include - -namespace DB -{ - -ShardPartitionPiece::ShardPartitionPiece(ShardPartition & parent, size_t current_piece_number_, bool is_present_piece_) - : is_absent_piece(!is_present_piece_) - , current_piece_number(current_piece_number_) - , shard_partition(parent) -{ -} - -String ShardPartitionPiece::getPartitionPiecePath() const -{ - return shard_partition.getPartitionPath() + "/piece_" + toString(current_piece_number); -} - -String ShardPartitionPiece::getPartitionPieceCleanStartPath() const -{ - return getPartitionPiecePath() + "/clean_start"; -} - -String ShardPartitionPiece::getPartitionPieceIsDirtyPath() const -{ - return getPartitionPiecePath() + "/is_dirty"; -} - -String ShardPartitionPiece::getPartitionPieceIsCleanedPath() const -{ - return getPartitionPieceIsDirtyPath() + "/cleaned"; -} - -String ShardPartitionPiece::getPartitionPieceActiveWorkersPath() const -{ - return getPartitionPiecePath() + "/partition_piece_active_workers"; -} - -String ShardPartitionPiece::getActiveWorkerPath() const -{ - return getPartitionPieceActiveWorkersPath() + "/" + toString(shard_partition.task_shard.numberInCluster()); -} - -/// On what shards do we have current partition. -String ShardPartitionPiece::getPartitionPieceShardsPath() const -{ - return getPartitionPiecePath() + "/shards"; -} - -String ShardPartitionPiece::getShardStatusPath() const -{ - return getPartitionPieceShardsPath() + "/" + toString(shard_partition.task_shard.numberInCluster()); -} - -String ShardPartitionPiece::getPartitionPieceCleanerPath() const -{ - return getPartitionPieceIsDirtyPath() + "/cleaner"; -} - -} diff --git a/programs/copier/ShardPartitionPiece.h b/programs/copier/ShardPartitionPiece.h deleted file mode 100644 index 453364c0fc8..00000000000 --- a/programs/copier/ShardPartitionPiece.h +++ /dev/null @@ -1,43 +0,0 @@ -#pragma once - -#include - -#include - -namespace DB -{ - -struct ShardPartition; - -struct ShardPartitionPiece -{ - ShardPartitionPiece(ShardPartition & parent, size_t current_piece_number_, bool is_present_piece_); - - String getPartitionPiecePath() const; - - String getPartitionPieceCleanStartPath() const; - - String getPartitionPieceIsDirtyPath() const; - - String getPartitionPieceIsCleanedPath() const; - - String getPartitionPieceActiveWorkersPath() const; - - String getActiveWorkerPath() const ; - - /// On what shards do we have current partition. - String getPartitionPieceShardsPath() const; - - String getShardStatusPath() const; - - String getPartitionPieceCleanerPath() const; - - bool is_absent_piece; - const size_t current_piece_number; - - ShardPartition & shard_partition; -}; - -using PartitionPieces = std::vector; - -} diff --git a/programs/copier/StatusAccumulator.cpp b/programs/copier/StatusAccumulator.cpp deleted file mode 100644 index 77adeac708c..00000000000 --- a/programs/copier/StatusAccumulator.cpp +++ /dev/null @@ -1,48 +0,0 @@ -#include "StatusAccumulator.h" - -#include -#include -#include -#include - -#include - -namespace DB -{ - -StatusAccumulator::MapPtr StatusAccumulator::fromJSON(String state_json) -{ - Poco::JSON::Parser parser; - auto state = parser.parse(state_json).extract(); - MapPtr result_ptr = std::make_shared(); - for (const auto & table_name : state->getNames()) - { - auto table_status_json = state->getValue(table_name); - auto table_status = parser.parse(table_status_json).extract(); - /// Map entry will be created if it is absent - auto & map_table_status = (*result_ptr)[table_name]; - map_table_status.all_partitions_count += table_status->getValue("all_partitions_count"); - map_table_status.processed_partitions_count += table_status->getValue("processed_partitions_count"); - } - return result_ptr; -} - -String StatusAccumulator::serializeToJSON(MapPtr statuses) -{ - Poco::JSON::Object result_json; - for (const auto & [table_name, table_status] : *statuses) - { - Poco::JSON::Object status_json; - status_json.set("all_partitions_count", table_status.all_partitions_count); - status_json.set("processed_partitions_count", table_status.processed_partitions_count); - - result_json.set(table_name, status_json); - } - std::ostringstream oss; // STYLE_CHECK_ALLOW_STD_STRING_STREAM - oss.exceptions(std::ios::failbit); - Poco::JSON::Stringifier::stringify(result_json, oss); - auto result = oss.str(); - return result; -} - -} diff --git a/programs/copier/StatusAccumulator.h b/programs/copier/StatusAccumulator.h deleted file mode 100644 index d420b611602..00000000000 --- a/programs/copier/StatusAccumulator.h +++ /dev/null @@ -1,27 +0,0 @@ -#pragma once - -#include - -#include -#include - -namespace DB -{ - -class StatusAccumulator -{ -public: - struct TableStatus - { - size_t all_partitions_count; - size_t processed_partitions_count; - }; - - using Map = std::unordered_map; - using MapPtr = std::shared_ptr; - - static MapPtr fromJSON(String state_json); - static String serializeToJSON(MapPtr statuses); -}; - -} diff --git a/programs/copier/TaskCluster.cpp b/programs/copier/TaskCluster.cpp deleted file mode 100644 index 0fb06616e50..00000000000 --- a/programs/copier/TaskCluster.cpp +++ /dev/null @@ -1,74 +0,0 @@ -#include "TaskCluster.h" - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int BAD_ARGUMENTS; -} - -TaskCluster::TaskCluster(const String & task_zookeeper_path_, const String & default_local_database_) - : task_zookeeper_path(task_zookeeper_path_) - , default_local_database(default_local_database_) -{} - -void DB::TaskCluster::loadTasks(const Poco::Util::AbstractConfiguration & config, const String & base_key) -{ - String prefix = base_key.empty() ? "" : base_key + "."; - - clusters_prefix = prefix + "remote_servers"; - if (!config.has(clusters_prefix)) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "You should specify list of clusters in {}", clusters_prefix); - - Poco::Util::AbstractConfiguration::Keys tables_keys; - config.keys(prefix + "tables", tables_keys); - - for (const auto & table_key : tables_keys) - { - table_tasks.emplace_back(*this, config, prefix + "tables", table_key); - } -} - -void DB::TaskCluster::reloadSettings(const Poco::Util::AbstractConfiguration & config, const String & base_key) -{ - String prefix = base_key.empty() ? "" : base_key + "."; - - max_workers = config.getUInt64(prefix + "max_workers"); - - settings_common = Settings(); - if (config.has(prefix + "settings")) - settings_common.loadSettingsFromConfig(prefix + "settings", config); - - settings_common.prefer_localhost_replica = false; - - settings_pull = settings_common; - if (config.has(prefix + "settings_pull")) - settings_pull.loadSettingsFromConfig(prefix + "settings_pull", config); - - settings_push = settings_common; - if (config.has(prefix + "settings_push")) - settings_push.loadSettingsFromConfig(prefix + "settings_push", config); - - auto set_default_value = [] (auto && setting, auto && default_value) - { - setting = setting.changed ? setting.value : default_value; - }; - - /// Override important settings - settings_pull.readonly = 1; - settings_pull.prefer_localhost_replica = false; - settings_push.distributed_foreground_insert = true; - settings_push.prefer_localhost_replica = false; - - set_default_value(settings_pull.load_balancing, LoadBalancing::NEAREST_HOSTNAME); - set_default_value(settings_pull.max_threads, 1); - set_default_value(settings_pull.max_block_size, 8192UL); - set_default_value(settings_pull.preferred_block_size_bytes, 0); - - set_default_value(settings_push.distributed_background_insert_timeout, 0); - set_default_value(settings_push.alter_sync, 2); -} - -} - diff --git a/programs/copier/TaskCluster.h b/programs/copier/TaskCluster.h deleted file mode 100644 index a7f8bc3baca..00000000000 --- a/programs/copier/TaskCluster.h +++ /dev/null @@ -1,51 +0,0 @@ -#pragma once - -#include "TaskTable.h" - -#include -#include - -#include - -#include - -namespace DB -{ - -struct TaskCluster -{ - TaskCluster(const String & task_zookeeper_path_, const String & default_local_database_); - - void loadTasks(const Poco::Util::AbstractConfiguration & config, const String & base_key = ""); - - /// Set (or update) settings and max_workers param - void reloadSettings(const Poco::Util::AbstractConfiguration & config, const String & base_key = ""); - - /// Base node for all tasks. Its structure: - /// workers/ - directory with active workers (amount of them is less or equal max_workers) - /// description - node with task configuration - /// table_table1/ - directories with per-partition copying status - String task_zookeeper_path; - - /// Database used to create temporary Distributed tables - String default_local_database; - - /// Limits number of simultaneous workers - UInt64 max_workers = 0; - - /// Base settings for pull and push - Settings settings_common; - /// Settings used to fetch data - Settings settings_pull; - /// Settings used to insert data - Settings settings_push; - - String clusters_prefix; - - /// Subtasks - TasksTable table_tasks; - - pcg64 random_engine; -}; - -} diff --git a/programs/copier/TaskShard.cpp b/programs/copier/TaskShard.cpp deleted file mode 100644 index d156f451a84..00000000000 --- a/programs/copier/TaskShard.cpp +++ /dev/null @@ -1,37 +0,0 @@ -#include "TaskShard.h" - -#include "TaskTable.h" - -namespace DB -{ - -TaskShard::TaskShard(TaskTable & parent, const Cluster::ShardInfo & info_) - : task_table(parent) - , info(info_) -{ - list_of_split_tables_on_shard.assign(task_table.number_of_splits, DatabaseAndTableName()); -} - -UInt32 TaskShard::numberInCluster() const -{ - return info.shard_num; -} - -UInt32 TaskShard::indexInCluster() const -{ - return info.shard_num - 1; -} - -String DB::TaskShard::getDescription() const -{ - return fmt::format("N{} (having a replica {}, pull table {} of cluster {}", - numberInCluster(), getHostNameExample(), getQuotedTable(task_table.table_pull), task_table.cluster_pull_name); -} - -String DB::TaskShard::getHostNameExample() const -{ - const auto & replicas = task_table.cluster_pull->getShardsAddresses().at(indexInCluster()); - return replicas.at(0).readableString(); -} - -} diff --git a/programs/copier/TaskShard.h b/programs/copier/TaskShard.h deleted file mode 100644 index 05d652077ea..00000000000 --- a/programs/copier/TaskShard.h +++ /dev/null @@ -1,56 +0,0 @@ -#pragma once - -#include "Aliases.h" -#include "Internals.h" -#include "ClusterPartition.h" -#include "ShardPartition.h" - - -namespace DB -{ - -struct TaskTable; - -struct TaskShard -{ - TaskShard(TaskTable & parent, const Cluster::ShardInfo & info_); - - TaskTable & task_table; - - Cluster::ShardInfo info; - - UInt32 numberInCluster() const; - - UInt32 indexInCluster() const; - - String getDescription() const; - - String getHostNameExample() const; - - /// Used to sort clusters by their proximity - ShardPriority priority; - - /// Column with unique destination partitions (computed from engine_push_partition_key expr.) in the shard - ColumnWithTypeAndName partition_key_column; - - /// There is a task for each destination partition - TasksPartition partition_tasks; - - /// Which partitions have been checked for existence - /// If some partition from this lists is exists, it is in partition_tasks - std::set checked_partitions; - - /// Last CREATE TABLE query of the table of the shard - ASTPtr current_pull_table_create_query; - ASTPtr current_push_table_create_query; - - /// Internal distributed tables - DatabaseAndTableName table_read_shard; - DatabaseAndTableName main_table_split_shard; - ListOfDatabasesAndTableNames list_of_split_tables_on_shard; -}; - -using TaskShardPtr = std::shared_ptr; -using TasksShard = std::vector; - -} diff --git a/programs/copier/TaskTable.cpp b/programs/copier/TaskTable.cpp deleted file mode 100644 index d055ceb4c7b..00000000000 --- a/programs/copier/TaskTable.cpp +++ /dev/null @@ -1,222 +0,0 @@ -#include "TaskTable.h" - -#include "ClusterPartition.h" -#include "TaskCluster.h" - -#include -#include - -#include - - -namespace DB -{ -namespace ErrorCodes -{ - extern const int UNKNOWN_ELEMENT_IN_CONFIG; - extern const int LOGICAL_ERROR; -} - -TaskTable::TaskTable(TaskCluster & parent, const Poco::Util::AbstractConfiguration & config, - const String & prefix_, const String & table_key) - : task_cluster(parent) -{ - String table_prefix = prefix_ + "." + table_key + "."; - - name_in_config = table_key; - - number_of_splits = config.getUInt64(table_prefix + "number_of_splits", 3); - - allow_to_copy_alias_and_materialized_columns = config.getBool(table_prefix + "allow_to_copy_alias_and_materialized_columns", false); - allow_to_drop_target_partitions = config.getBool(table_prefix + "allow_to_drop_target_partitions", false); - - cluster_pull_name = config.getString(table_prefix + "cluster_pull"); - cluster_push_name = config.getString(table_prefix + "cluster_push"); - - table_pull.first = config.getString(table_prefix + "database_pull"); - table_pull.second = config.getString(table_prefix + "table_pull"); - - table_push.first = config.getString(table_prefix + "database_push"); - table_push.second = config.getString(table_prefix + "table_push"); - - /// Used as node name in ZooKeeper - table_id = escapeForFileName(cluster_push_name) - + "." + escapeForFileName(table_push.first) - + "." + escapeForFileName(table_push.second); - - engine_push_str = config.getString(table_prefix + "engine", "rand()"); - - { - ParserStorage parser_storage{ParserStorage::TABLE_ENGINE}; - engine_push_ast = parseQuery(parser_storage, engine_push_str, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); - engine_push_partition_key_ast = extractPartitionKey(engine_push_ast); - primary_key_comma_separated = boost::algorithm::join(extractPrimaryKeyColumnNames(engine_push_ast), ", "); - is_replicated_table = isReplicatedTableEngine(engine_push_ast); - } - - sharding_key_str = config.getString(table_prefix + "sharding_key"); - - auxiliary_engine_split_asts.reserve(number_of_splits); - { - ParserExpressionWithOptionalAlias parser_expression(false); - sharding_key_ast = parseQuery(parser_expression, sharding_key_str, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); - main_engine_split_ast = createASTStorageDistributed(cluster_push_name, table_push.first, table_push.second, - sharding_key_ast); - - for (const auto piece_number : collections::range(0, number_of_splits)) - { - auxiliary_engine_split_asts.emplace_back - ( - createASTStorageDistributed(cluster_push_name, table_push.first, - table_push.second + "_piece_" + toString(piece_number), sharding_key_ast) - ); - } - } - - where_condition_str = config.getString(table_prefix + "where_condition", ""); - if (!where_condition_str.empty()) - { - ParserExpressionWithOptionalAlias parser_expression(false); - where_condition_ast = parseQuery(parser_expression, where_condition_str, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); - - // Will use canonical expression form - where_condition_str = queryToString(where_condition_ast); - } - - String enabled_partitions_prefix = table_prefix + "enabled_partitions"; - has_enabled_partitions = config.has(enabled_partitions_prefix); - - if (has_enabled_partitions) - { - Strings keys; - config.keys(enabled_partitions_prefix, keys); - - if (keys.empty()) - { - /// Parse list of partition from space-separated string - String partitions_str = config.getString(table_prefix + "enabled_partitions"); - boost::trim_if(partitions_str, isWhitespaceASCII); - boost::split(enabled_partitions, partitions_str, isWhitespaceASCII, boost::token_compress_on); - } - else - { - /// Parse sequence of ... - for (const String &key : keys) - { - if (!startsWith(key, "partition")) - throw Exception(ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG, "Unknown key {} in {}", key, enabled_partitions_prefix); - - enabled_partitions.emplace_back(config.getString(enabled_partitions_prefix + "." + key)); - } - } - - std::copy(enabled_partitions.begin(), enabled_partitions.end(), std::inserter(enabled_partitions_set, enabled_partitions_set.begin())); - } -} - - -String TaskTable::getPartitionPath(const String & partition_name) const -{ - return task_cluster.task_zookeeper_path // root - + "/tables/" + table_id // tables/dst_cluster.merge.hits - + "/" + escapeForFileName(partition_name); // 201701 -} - -String TaskTable::getPartitionAttachIsActivePath(const String & partition_name) const -{ - return getPartitionPath(partition_name) + "/attach_active"; -} - -String TaskTable::getPartitionAttachIsDonePath(const String & partition_name) const -{ - return getPartitionPath(partition_name) + "/attach_is_done"; -} - -String TaskTable::getPartitionPiecePath(const String & partition_name, size_t piece_number) const -{ - assert(piece_number < number_of_splits); - return getPartitionPath(partition_name) + "/piece_" + toString(piece_number); // 1...number_of_splits -} - -String TaskTable::getCertainPartitionIsDirtyPath(const String &partition_name) const -{ - return getPartitionPath(partition_name) + "/is_dirty"; -} - -String TaskTable::getCertainPartitionPieceIsDirtyPath(const String & partition_name, const size_t piece_number) const -{ - return getPartitionPiecePath(partition_name, piece_number) + "/is_dirty"; -} - -String TaskTable::getCertainPartitionIsCleanedPath(const String & partition_name) const -{ - return getCertainPartitionIsDirtyPath(partition_name) + "/cleaned"; -} - -String TaskTable::getCertainPartitionPieceIsCleanedPath(const String & partition_name, const size_t piece_number) const -{ - return getCertainPartitionPieceIsDirtyPath(partition_name, piece_number) + "/cleaned"; -} - -String TaskTable::getCertainPartitionTaskStatusPath(const String & partition_name) const -{ - return getPartitionPath(partition_name) + "/shards"; -} - -String TaskTable::getCertainPartitionPieceTaskStatusPath(const String & partition_name, const size_t piece_number) const -{ - return getPartitionPiecePath(partition_name, piece_number) + "/shards"; -} - -bool TaskTable::isReplicatedTable() const -{ - return is_replicated_table; -} - -String TaskTable::getStatusAllPartitionCount() const -{ - return task_cluster.task_zookeeper_path + "/status/all_partitions_count"; -} - -String TaskTable::getStatusProcessedPartitionsCount() const -{ - return task_cluster.task_zookeeper_path + "/status/processed_partitions_count"; -} - -ASTPtr TaskTable::rewriteReplicatedCreateQueryToPlain() const -{ - ASTPtr prev_engine_push_ast = engine_push_ast->clone(); - - auto & new_storage_ast = prev_engine_push_ast->as(); - auto & new_engine_ast = new_storage_ast.engine->as(); - - /// Remove "Replicated" from name - new_engine_ast.name = new_engine_ast.name.substr(10); - - if (new_engine_ast.arguments) - { - auto & replicated_table_arguments = new_engine_ast.arguments->children; - - - /// In some cases of Atomic database engine usage ReplicatedMergeTree tables - /// could be created without arguments. - if (!replicated_table_arguments.empty()) - { - /// Delete first two arguments of Replicated...MergeTree() table. - replicated_table_arguments.erase(replicated_table_arguments.begin()); - replicated_table_arguments.erase(replicated_table_arguments.begin()); - } - } - - return new_storage_ast.clone(); -} - -ClusterPartition & TaskTable::getClusterPartition(const String & partition_name) -{ - auto it = cluster_partitions.find(partition_name); - if (it == cluster_partitions.end()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "There are no cluster partition {} in {}", partition_name, table_id); - return it->second; -} - -} diff --git a/programs/copier/TaskTable.h b/programs/copier/TaskTable.h deleted file mode 100644 index 2bb7f078bc6..00000000000 --- a/programs/copier/TaskTable.h +++ /dev/null @@ -1,173 +0,0 @@ -#pragma once - -#include "Aliases.h" -#include "TaskShard.h" - - -namespace DB -{ - -struct ClusterPartition; -struct TaskCluster; - -struct TaskTable -{ - TaskTable(TaskCluster & parent, const Poco::Util::AbstractConfiguration & config, const String & prefix, const String & table_key); - - TaskCluster & task_cluster; - - /// These functions used in checkPartitionIsDone() or checkPartitionPieceIsDone() - /// They are implemented here not to call task_table.tasks_shard[partition_name].second.pieces[current_piece_number] etc. - - String getPartitionPath(const String & partition_name) const; - - String getPartitionAttachIsActivePath(const String & partition_name) const; - - String getPartitionAttachIsDonePath(const String & partition_name) const; - - String getPartitionPiecePath(const String & partition_name, size_t piece_number) const; - - String getCertainPartitionIsDirtyPath(const String & partition_name) const; - - String getCertainPartitionPieceIsDirtyPath(const String & partition_name, size_t piece_number) const; - - String getCertainPartitionIsCleanedPath(const String & partition_name) const; - - String getCertainPartitionPieceIsCleanedPath(const String & partition_name, size_t piece_number) const; - - String getCertainPartitionTaskStatusPath(const String & partition_name) const; - - String getCertainPartitionPieceTaskStatusPath(const String & partition_name, size_t piece_number) const; - - bool isReplicatedTable() const; - - /// These nodes are used for check-status option - String getStatusAllPartitionCount() const; - String getStatusProcessedPartitionsCount() const; - - /// Partitions will be split into number-of-splits pieces. - /// Each piece will be copied independently. (10 by default) - size_t number_of_splits; - - bool allow_to_copy_alias_and_materialized_columns{false}; - bool allow_to_drop_target_partitions{false}; - - String name_in_config; - - /// Used as task ID - String table_id; - - /// Column names in primary key - String primary_key_comma_separated; - - /// Source cluster and table - String cluster_pull_name; - DatabaseAndTableName table_pull; - - /// Destination cluster and table - String cluster_push_name; - DatabaseAndTableName table_push; - - /// Storage of destination table - /// (tables that are stored on each shard of target cluster) - String engine_push_str; - ASTPtr engine_push_ast; - ASTPtr engine_push_partition_key_ast; - - /// First argument of Replicated...MergeTree() - String engine_push_zk_path; - bool is_replicated_table; - - ASTPtr rewriteReplicatedCreateQueryToPlain() const; - - /* - * A Distributed table definition used to split data - * Distributed table will be created on each shard of default - * cluster to perform data copying and resharding - * */ - String sharding_key_str; - ASTPtr sharding_key_ast; - ASTPtr main_engine_split_ast; - - /* - * To copy partition piece form one cluster to another we have to use Distributed table. - * In case of usage separate table (engine_push) for each partition piece, - * we have to use many Distributed tables. - * */ - ASTs auxiliary_engine_split_asts; - - /// Additional WHERE expression to filter input data - String where_condition_str; - ASTPtr where_condition_ast; - - /// Resolved clusters - ClusterPtr cluster_pull; - ClusterPtr cluster_push; - - /// Filter partitions that should be copied - bool has_enabled_partitions = false; - Strings enabled_partitions; - NameSet enabled_partitions_set; - - /** - * Prioritized list of shards - * all_shards contains information about all shards in the table. - * So we have to check whether particular shard have current partition or not while processing. - */ - TasksShard all_shards; - TasksShard local_shards; - - /// All partitions of the current table. - ClusterPartitions cluster_partitions; - NameSet finished_cluster_partitions; - - /// Partition names to process in user-specified order - Strings ordered_partition_names; - - ClusterPartition & getClusterPartition(const String & partition_name); - - Stopwatch watch; - UInt64 bytes_copied = 0; - UInt64 rows_copied = 0; - - template - void initShards(RandomEngine &&random_engine); -}; - -using TasksTable = std::list; - - -template -inline void TaskTable::initShards(RandomEngine && random_engine) -{ - const String & fqdn_name = getFQDNOrHostName(); - std::uniform_int_distribution get_urand(0, std::numeric_limits::max()); - - // Compute the priority - for (const auto & shard_info : cluster_pull->getShardsInfo()) - { - TaskShardPtr task_shard = std::make_shared(*this, shard_info); - const auto & replicas = cluster_pull->getShardsAddresses().at(task_shard->indexInCluster()); - task_shard->priority = getReplicasPriority(replicas, fqdn_name, get_urand(random_engine)); - - all_shards.emplace_back(task_shard); - } - - // Sort by priority - std::sort(all_shards.begin(), all_shards.end(), - [](const TaskShardPtr & lhs, const TaskShardPtr & rhs) - { - return ShardPriority::greaterPriority(lhs->priority, rhs->priority); - }); - - // Cut local shards - auto it_first_remote = std::lower_bound(all_shards.begin(), all_shards.end(), 1, - [](const TaskShardPtr & lhs, UInt8 is_remote) - { - return lhs->priority.is_remote < is_remote; - }); - - local_shards.assign(all_shards.begin(), it_first_remote); -} - -} diff --git a/programs/copier/ZooKeeperStaff.h b/programs/copier/ZooKeeperStaff.h deleted file mode 100644 index bbdec230d2d..00000000000 --- a/programs/copier/ZooKeeperStaff.h +++ /dev/null @@ -1,221 +0,0 @@ -#pragma once - -/** Allows to compare two incremental counters of type UInt32 in presence of possible overflow. - * We assume that we compare values that are not too far away. - * For example, when we increment 0xFFFFFFFF, we get 0. So, 0xFFFFFFFF is less than 0. - */ -class WrappingUInt32 -{ -public: - UInt32 value; - - explicit WrappingUInt32(UInt32 _value) - : value(_value) - {} - - bool operator<(const WrappingUInt32 & other) const - { - return value != other.value && *this <= other; - } - - bool operator<=(const WrappingUInt32 & other) const - { - const UInt32 HALF = static_cast(1) << 31; - return (value <= other.value && other.value - value < HALF) - || (value > other.value && value - other.value > HALF); - } - - bool operator==(const WrappingUInt32 & other) const - { - return value == other.value; - } -}; - -/** Conforming Zxid definition. - * cf. https://github.com/apache/zookeeper/blob/631d1b284f0edb1c4f6b0fb221bf2428aec71aaa/zookeeper-docs/src/main/resources/markdown/zookeeperInternals.md#guarantees-properties-and-definitions - * - * But it is better to read this: https://zookeeper.apache.org/doc/r3.1.2/zookeeperProgrammers.html - * - * Actually here is the definition of Zxid. - * Every change to the ZooKeeper state receives a stamp in the form of a zxid (ZooKeeper Transaction Id). - * This exposes the total ordering of all changes to ZooKeeper. Each change will have a unique zxid - * and if zxid1 is smaller than zxid2 then zxid1 happened before zxid2. - */ -class Zxid -{ -public: - WrappingUInt32 epoch; - WrappingUInt32 counter; - explicit Zxid(UInt64 _zxid) - : epoch(static_cast(_zxid >> 32)) - , counter(static_cast(_zxid)) - {} - - bool operator<=(const Zxid & other) const - { - return (epoch < other.epoch) - || (epoch == other.epoch && counter <= other.counter); - } - - bool operator==(const Zxid & other) const - { - return epoch == other.epoch && counter == other.counter; - } -}; - -/* When multiple ClusterCopiers discover that the target partition is not empty, - * they will attempt to clean up this partition before proceeding to copying. - * - * Instead of purging is_dirty, the history of cleaning work is preserved and partition hygiene is established - * based on a happens-before relation between the events. - * This relation is encoded by LogicalClock based on the mzxid of the is_dirty ZNode and is_dirty/cleaned. - * The fact of the partition hygiene is encoded by CleanStateClock. - * - * For you to know what mzxid means: - * - * ZooKeeper Stat Structure: - * The Stat structure for each znode in ZooKeeper is made up of the following fields: - * - * -- czxid - * The zxid of the change that caused this znode to be created. - * - * -- mzxid - * The zxid of the change that last modified this znode. - * - * -- ctime - * The time in milliseconds from epoch when this znode was created. - * - * -- mtime - * The time in milliseconds from epoch when this znode was last modified. - * - * -- version - * The number of changes to the data of this znode. - * - * -- cversion - * The number of changes to the children of this znode. - * - * -- aversion - * The number of changes to the ACL of this znode. - * - * -- ephemeralOwner - * The session id of the owner of this znode if the znode is an ephemeral node. - * If it is not an ephemeral node, it will be zero. - * - * -- dataLength - * The length of the data field of this znode. - * - * -- numChildren - * The number of children of this znode. - * */ - -class LogicalClock -{ -public: - std::optional zxid; - - LogicalClock() = default; - - explicit LogicalClock(UInt64 _zxid) - : zxid(_zxid) - {} - - bool hasHappened() const - { - return bool(zxid); - } - - /// happens-before relation with a reasonable time bound - bool happensBefore(const LogicalClock & other) const - { - return !zxid - || (other.zxid && *zxid <= *other.zxid); - } - - bool operator<=(const LogicalClock & other) const - { - return happensBefore(other); - } - - /// strict equality check - bool operator==(const LogicalClock & other) const - { - return zxid == other.zxid; - } -}; - - -class CleanStateClock -{ -public: - LogicalClock discovery_zxid; - std::optional discovery_version; - - LogicalClock clean_state_zxid; - std::optional clean_state_version; - - std::shared_ptr stale; - - bool is_clean() const - { - return !is_stale() - && (!discovery_zxid.hasHappened() || (clean_state_zxid.hasHappened() && discovery_zxid <= clean_state_zxid)); - } - - bool is_stale() const - { - return stale->load(); - } - - CleanStateClock( - const zkutil::ZooKeeperPtr & zookeeper, - const String & discovery_path, - const String & clean_state_path) - : stale(std::make_shared(false)) - { - Coordination::Stat stat{}; - String _some_data; - auto watch_callback = - [my_stale = stale] (const Coordination::WatchResponse & rsp) - { - auto logger = getLogger("ClusterCopier"); - if (rsp.error == Coordination::Error::ZOK) - { - switch (rsp.type) - { - case Coordination::CREATED: - LOG_DEBUG(logger, "CleanStateClock change: CREATED, at {}", rsp.path); - my_stale->store(true); - break; - case Coordination::CHANGED: - LOG_DEBUG(logger, "CleanStateClock change: CHANGED, at {}", rsp.path); - my_stale->store(true); - } - } - }; - if (zookeeper->tryGetWatch(discovery_path, _some_data, &stat, watch_callback)) - { - discovery_zxid = LogicalClock(stat.mzxid); - discovery_version = stat.version; - } - if (zookeeper->tryGetWatch(clean_state_path, _some_data, &stat, watch_callback)) - { - clean_state_zxid = LogicalClock(stat.mzxid); - clean_state_version = stat.version; - } - } - - bool operator==(const CleanStateClock & other) const - { - return !is_stale() - && !other.is_stale() - && discovery_zxid == other.discovery_zxid - && discovery_version == other.discovery_version - && clean_state_zxid == other.clean_state_zxid - && clean_state_version == other.clean_state_version; - } - - bool operator!=(const CleanStateClock & other) const - { - return !(*this == other); - } -}; diff --git a/programs/copier/clickhouse-copier.cpp b/programs/copier/clickhouse-copier.cpp deleted file mode 100644 index 4dabb01775b..00000000000 --- a/programs/copier/clickhouse-copier.cpp +++ /dev/null @@ -1 +0,0 @@ -int mainEntryClickHouseClusterCopier(int argc, char ** argv); diff --git a/programs/diagnostics/testdata/configs/xml/config.xml b/programs/diagnostics/testdata/configs/xml/config.xml index ae09d207091..eb7c70cf498 100644 --- a/programs/diagnostics/testdata/configs/xml/config.xml +++ b/programs/diagnostics/testdata/configs/xml/config.xml @@ -94,7 +94,7 @@ 8123 SQL_ @@ -1392,13 +1392,27 @@ - + + + + + + + + true Float32 and (Float64 x Float64) --> Float64 instead of 10 x + /// 10 input types x 8 output types, + /// - the most powerful SIMD instruction set (AVX-512F). +#if USE_MULTITARGET_CODE + if constexpr ((std::is_same_v || std::is_same_v) + && std::is_same_v && std::is_same_v) + { + if (isArchSupported(TargetArch::AVX512F)) + Kernel::template accumulateCombine(&left[current_offset], &right[current_offset], array_size, i, state); + } +#else + /// Process chunks in vectorized manner + static constexpr size_t VEC_SIZE = 4; + typename Kernel::template State states[VEC_SIZE]; + for (; i + VEC_SIZE < array_size; i += VEC_SIZE) + { + for (size_t j = 0; j < VEC_SIZE; ++j) + Kernel::template accumulate(states[j], static_cast(left[i + j]), static_cast(right[i + j])); + } + + for (const auto & other_state : states) + Kernel::template combine(state, other_state); +#endif + + /// Process the tail + for (; i < array_size; ++i) + Kernel::template accumulate(state, static_cast(left[i]), static_cast(right[i])); + + /// ResultType res = Kernel::template finalize(state); + result[row] = Kernel::template finalize(state); + + current_offset = offsets[row]; + } } }; -using FunctionArrayDotProduct = FunctionArrayScalarProduct; +using FunctionArrayDotProduct = FunctionArrayScalarProduct; REGISTER_FUNCTION(ArrayDotProduct) { @@ -77,4 +326,5 @@ REGISTER_FUNCTION(ArrayDotProduct) // These functions are used by TupleOrArrayFunction in Function/vectorFunctions.cpp FunctionPtr createFunctionArrayDotProduct(ContextPtr context_) { return FunctionArrayDotProduct::create(context_); } + } diff --git a/src/Functions/array/arrayEnumerateRanked.cpp b/src/Functions/array/arrayEnumerateRanked.cpp index dd597d607dc..69d8954bfcf 100644 --- a/src/Functions/array/arrayEnumerateRanked.cpp +++ b/src/Functions/array/arrayEnumerateRanked.cpp @@ -1,8 +1,8 @@ -#include #include +#include #include -#include "arrayEnumerateRanked.h" +#include namespace DB { @@ -12,88 +12,105 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; } -ArraysDepths getArraysDepths(const ColumnsWithTypeAndName & arguments) +ArraysDepths getArraysDepths(const ColumnsWithTypeAndName & arguments, const char * function_name) { const size_t num_arguments = arguments.size(); + if (!num_arguments) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Missing arguments for function arrayEnumerateUniqRanked"); DepthType clear_depth = 1; - DepthTypes depths; + size_t i = 0; + if (const DataTypeArray * type_array = typeid_cast(arguments[0].type.get()); !type_array) + { + /// If the first argument is not an array, it must be a const positive and non zero number + const auto & depth_column = arguments[i].column; + if (!depth_column || !isColumnConst(*depth_column)) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "First argument of {} must be Const(UInt64)", function_name); + Field f = assert_cast(*depth_column).getField(); + if (f.getType() != Field::Types::UInt64 || f.safeGet() == 0) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "First argument of {} must be a positive integer", function_name); - /// function signature is the following: - /// f(c0, arr1, c1, arr2, c2, ...) - /// - /// c0 is something called "clear_depth" here. + clear_depth = static_cast(f.safeGet()); + i++; + } + + + /// The rest of the arguments must be in the shape: arr1, c1, arr2, c2, ... /// cN... - how deep to look into the corresponding arrN, (called "depths" here) - /// may be omitted - then it means "look at the full depth". - - size_t array_num = 0; - DepthType prev_array_depth = 0; - for (size_t i = 0; i < num_arguments; ++i) + /// may be omitted - then it means "look at the full depth" + DepthTypes depths; + for (; i < num_arguments; i++) { const DataTypePtr & type = arguments[i].type; - const DataTypeArray * type_array = typeid_cast(type.get()); + const DataTypeArray * current_type_array = typeid_cast(type.get()); + if (!current_type_array) + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "Incorrect argument {} type of function {}. Expected an Array, got {}", + i + 1, + function_name, + type->getName()); - if (type_array) + if (i == num_arguments - 1) { - if (depths.size() < array_num && prev_array_depth) - depths.emplace_back(prev_array_depth); - - prev_array_depth = static_cast(type_array->getNumberOfDimensions()); - ++array_num; + depths.emplace_back(current_type_array->getNumberOfDimensions()); } else { - const auto & depth_column = arguments[i].column; - - if (depth_column && isColumnConst(*depth_column)) + const DataTypeArray * next_argument_array = typeid_cast(arguments[i + 1].type.get()); + if (next_argument_array) { - UInt64 value = assert_cast(*depth_column).getValue(); - if (!value) - throw Exception(ErrorCodes::BAD_ARGUMENTS, - "Incorrect arguments for function arrayEnumerateUniqRanked " - "or arrayEnumerateDenseRanked: depth ({}) cannot be less or equal 0.", - std::to_string(value)); - - if (i == 0) - { - clear_depth = static_cast(value); - } - else - { - if (depths.size() >= array_num) - throw Exception(ErrorCodes::BAD_ARGUMENTS, - "Incorrect arguments for function arrayEnumerateUniqRanked " - "or arrayEnumerateDenseRanked: depth ({}) for missing array.", - std::to_string(value)); - if (value > prev_array_depth) - throw Exception(ErrorCodes::BAD_ARGUMENTS, - "Arguments for function arrayEnumerateUniqRanked/arrayEnumerateDenseRanked incorrect: depth={}" - " for array with depth={}.", - std::to_string(value), std::to_string(prev_array_depth)); - - depths.emplace_back(value); - } + depths.emplace_back(current_type_array->getNumberOfDimensions()); + } + else + { + i++; + /// The following argument is not array, so it must be a const positive integer with the depth + const auto & depth_column = arguments[i].column; + if (!depth_column || !isColumnConst(*depth_column)) + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "Incorrect argument {} type of function {}. Expected an Array or Const(UInt64), got {}", + i + 1, + function_name, + arguments[i].type->getName()); + Field f = assert_cast(*depth_column).getField(); + if (f.getType() != Field::Types::UInt64 || f.safeGet() == 0) + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "Incorrect argument {} of function {}. Expected a positive integer", + i + 1, + function_name); + UInt64 value = f.safeGet(); + UInt64 prev_array_depth = current_type_array->getNumberOfDimensions(); + if (value > prev_array_depth) + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "Incorrect argument {} of function {}. Required depth '{}' is larger than the array depth ({})", + i + 1, + function_name, + value, + prev_array_depth); + depths.emplace_back(value); } } } - if (depths.size() < array_num) - depths.emplace_back(prev_array_depth); - if (depths.empty()) - throw Exception(ErrorCodes::BAD_ARGUMENTS, - "Incorrect arguments for function arrayEnumerateUniqRanked or arrayEnumerateDenseRanked: " - "at least one array should be passed."); + throw Exception( + ErrorCodes::BAD_ARGUMENTS, "Incorrect arguments for function {}: At least one array should be passed", function_name); DepthType max_array_depth = 0; for (auto depth : depths) max_array_depth = std::max(depth, max_array_depth); if (clear_depth > max_array_depth) - throw Exception(ErrorCodes::BAD_ARGUMENTS, - "Incorrect arguments for function arrayEnumerateUniqRanked or arrayEnumerateDenseRanked: " - "clear_depth ({}) can't be larger than max_array_depth ({}).", - std::to_string(clear_depth), std::to_string(max_array_depth)); + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "Incorrect arguments for function {}: clear_depth ({}) can't be larger than max_array_depth ({})", + function_name, + clear_depth, + max_array_depth); return {clear_depth, depths, max_array_depth}; } diff --git a/src/Functions/array/arrayEnumerateRanked.h b/src/Functions/array/arrayEnumerateRanked.h index 1a920260906..04fa305368d 100644 --- a/src/Functions/array/arrayEnumerateRanked.h +++ b/src/Functions/array/arrayEnumerateRanked.h @@ -84,7 +84,7 @@ struct ArraysDepths }; /// Return depth info about passed arrays -ArraysDepths getArraysDepths(const ColumnsWithTypeAndName & arguments); +ArraysDepths getArraysDepths(const ColumnsWithTypeAndName & arguments, const char * function_name); template class FunctionArrayEnumerateRankedExtended : public IFunction @@ -105,7 +105,7 @@ public: "Number of arguments for function {} doesn't match: passed {}, should be at least 1.", getName(), arguments.size()); - const ArraysDepths arrays_depths = getArraysDepths(arguments); + const ArraysDepths arrays_depths = getArraysDepths(arguments, Derived::name); /// Return type is the array of the depth as the maximum effective depth of arguments, containing UInt32. @@ -154,7 +154,7 @@ ColumnPtr FunctionArrayEnumerateRankedExtended::executeImpl( Columns array_holders; ColumnPtr offsets_column; - const ArraysDepths arrays_depths = getArraysDepths(arguments); + const ArraysDepths arrays_depths = getArraysDepths(arguments, Derived::name); /// If the column is Array - return it. If the const Array - materialize it, keep ownership and return. auto get_array_column = [&](const auto & column) -> const DB::ColumnArray * @@ -213,17 +213,23 @@ ColumnPtr FunctionArrayEnumerateRankedExtended::executeImpl( { if (*offsets_by_depth[col_depth] != array->getOffsets()) { - throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH, - "Lengths and effective depths of all arrays passed to {} must be equal.", getName()); + throw Exception( + ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH, + "Lengths and effective depths of all arrays passed to {} must be equal", + getName()); } } } if (col_depth < arrays_depths.depths[array_num]) { - throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH, - "{}: Passed array number {} depth ({}) is more than the actual array depth ({}).", - getName(), array_num, std::to_string(arrays_depths.depths[array_num]), col_depth); + throw Exception( + ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH, + "{}: Passed array number {} depth ({}) is more than the actual array depth ({})", + getName(), + array_num, + std::to_string(arrays_depths.depths[array_num]), + col_depth); } auto * array_data = &array->getData(); diff --git a/src/Functions/array/arrayFold.cpp b/src/Functions/array/arrayFold.cpp index 44fe95624a6..63c14f475fc 100644 --- a/src/Functions/array/arrayFold.cpp +++ b/src/Functions/array/arrayFold.cpp @@ -32,6 +32,12 @@ public: size_t getNumberOfArguments() const override { return 0; } bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } + /// Avoid the default adaptors since they modify the inputs and that makes knowing the lambda argument types + /// (getLambdaArgumentTypes) more complex, as it requires knowing what the adaptors will do + /// It's much simpler to avoid the adapters + bool useDefaultImplementationForNulls() const override { return false; } + bool useDefaultImplementationForLowCardinalityColumns() const override { return false; } + void getLambdaArgumentTypes(DataTypes & arguments) const override { if (arguments.size() < 3) diff --git a/src/Functions/array/arrayIndex.h b/src/Functions/array/arrayIndex.h index 3b19f0b486a..cd537763b4a 100644 --- a/src/Functions/array/arrayIndex.h +++ b/src/Functions/array/arrayIndex.h @@ -1007,8 +1007,13 @@ private: if (!(*null_map)[row]) continue; } - else if (!applyVisitor(FieldVisitorAccurateEquals(), arr[i], value)) - continue; + else + { + if (null_map && (*null_map)[row]) + continue; + if (!applyVisitor(FieldVisitorAccurateEquals(), arr[i], value)) + continue; + } ConcreteAction::apply(data[row], i); diff --git a/src/Functions/array/arrayJaccardIndex.cpp b/src/Functions/array/arrayJaccardIndex.cpp index c2a4fee4845..9cb74a7aa62 100644 --- a/src/Functions/array/arrayJaccardIndex.cpp +++ b/src/Functions/array/arrayJaccardIndex.cpp @@ -84,8 +84,8 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { FunctionArgumentDescriptors args{ - {"array_1", &isArray, nullptr, "Array"}, - {"array_2", &isArray, nullptr, "Array"}, + {"array_1", static_cast(&isArray), nullptr, "Array"}, + {"array_2", static_cast(&isArray), nullptr, "Array"}, }; validateFunctionArgumentTypes(*this, arguments, args); return std::make_shared>(); diff --git a/src/Functions/array/arrayRandomSample.cpp b/src/Functions/array/arrayRandomSample.cpp index 40344efb077..b08a73b93f3 100644 --- a/src/Functions/array/arrayRandomSample.cpp +++ b/src/Functions/array/arrayRandomSample.cpp @@ -36,8 +36,8 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { FunctionArgumentDescriptors args{ - {"array", &isArray, nullptr, "Array"}, - {"samples", &isUInt, isColumnConst, "const UInt*"}, + {"array", static_cast(&isArray), nullptr, "Array"}, + {"samples", static_cast(&isUInt), isColumnConst, "const UInt*"}, }; validateFunctionArgumentTypes(*this, arguments, args); diff --git a/src/Functions/array/arrayReduce.cpp b/src/Functions/array/arrayReduce.cpp index 5a6a99ef785..d47d1ae98cc 100644 --- a/src/Functions/array/arrayReduce.cpp +++ b/src/Functions/array/arrayReduce.cpp @@ -1,14 +1,15 @@ -#include -#include -#include -#include -#include -#include -#include #include #include #include #include +#include +#include +#include +#include +#include +#include +#include +#include #include #include @@ -48,6 +49,11 @@ public: bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } bool useDefaultImplementationForConstants() const override { return true; } + /// As we parse the function name and deal with arrays we don't want to default NULL handler, which will hide + /// nullability from us (which also means hidden from the aggregate functions) + bool useDefaultImplementationForNulls() const override { return false; } + /// Same for low cardinality. We want to return exactly what the aggregate function returns, no meddling + bool useDefaultImplementationForLowCardinalityColumns() const override { return false; } ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {0}; } DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override; @@ -115,7 +121,8 @@ ColumnPtr FunctionArrayReduce::executeImpl(const ColumnsWithTypeAndName & argume const IAggregateFunction & agg_func = *aggregate_function; std::unique_ptr arena = std::make_unique(); - /// Aggregate functions do not support constant columns. Therefore, we materialize them. + /// Aggregate functions do not support constant or lowcardinality columns. Therefore, we materialize them and + /// keep a reference so they are alive until we finish using their nested columns (array data/offset) std::vector materialized_columns; const size_t num_arguments_columns = arguments.size() - 1; @@ -126,6 +133,12 @@ ColumnPtr FunctionArrayReduce::executeImpl(const ColumnsWithTypeAndName & argume for (size_t i = 0; i < num_arguments_columns; ++i) { const IColumn * col = arguments[i + 1].column.get(); + auto col_no_lowcardinality = recursiveRemoveLowCardinality(arguments[i + 1].column); + if (col_no_lowcardinality != arguments[i + 1].column) + { + materialized_columns.emplace_back(col_no_lowcardinality); + col = col_no_lowcardinality.get(); + } const ColumnArray::Offsets * offsets_i = nullptr; if (const ColumnArray * arr = checkAndGetColumn(col)) diff --git a/src/Functions/array/arrayScalarProduct.h b/src/Functions/array/arrayScalarProduct.h deleted file mode 100644 index 374a2d8a194..00000000000 --- a/src/Functions/array/arrayScalarProduct.h +++ /dev/null @@ -1,182 +0,0 @@ -#pragma once - -#include -#include -#include -#include -#include -#include -#include - - -namespace DB -{ - -class Context; - -namespace ErrorCodes -{ - extern const int ILLEGAL_COLUMN; - extern const int ILLEGAL_TYPE_OF_ARGUMENT; - extern const int BAD_ARGUMENTS; - extern const int LOGICAL_ERROR; -} - - -template -class FunctionArrayScalarProduct : public IFunction -{ -public: - static constexpr auto name = Name::name; - static FunctionPtr create(ContextPtr) { return std::make_shared(); } - -private: - - template - ColumnPtr executeNumber(const ColumnsWithTypeAndName & arguments) const - { - ColumnPtr res; - if ( (res = executeNumberNumber(arguments)) - || (res = executeNumberNumber(arguments)) - || (res = executeNumberNumber(arguments)) - || (res = executeNumberNumber(arguments)) - || (res = executeNumberNumber(arguments)) - || (res = executeNumberNumber(arguments)) - || (res = executeNumberNumber(arguments)) - || (res = executeNumberNumber(arguments)) - || (res = executeNumberNumber(arguments)) - || (res = executeNumberNumber(arguments))) - return res; - - return nullptr; - } - - - template - ColumnPtr executeNumberNumber(const ColumnsWithTypeAndName & arguments) const - { - ColumnPtr col1 = arguments[0].column->convertToFullColumnIfConst(); - ColumnPtr col2 = arguments[1].column->convertToFullColumnIfConst(); - if (!col1 || !col2) - return nullptr; - - const ColumnArray * col_array1 = checkAndGetColumn(col1.get()); - const ColumnArray * col_array2 = checkAndGetColumn(col2.get()); - if (!col_array1 || !col_array2) - return nullptr; - - if (!col_array1->hasEqualOffsets(*col_array2)) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Array arguments for function {} must have equal sizes", getName()); - - const ColumnVector * col_nested1 = checkAndGetColumn>(col_array1->getData()); - const ColumnVector * col_nested2 = checkAndGetColumn>(col_array2->getData()); - if (!col_nested1 || !col_nested2) - return nullptr; - - auto col_res = ColumnVector::create(); - - vector( - col_nested1->getData(), - col_nested2->getData(), - col_array1->getOffsets(), - col_res->getData()); - - return col_res; - } - - template - static NO_INLINE void vector( - const PaddedPODArray & data1, - const PaddedPODArray & data2, - const ColumnArray::Offsets & offsets, - PaddedPODArray & result) - { - size_t size = offsets.size(); - result.resize(size); - - ColumnArray::Offset current_offset = 0; - for (size_t i = 0; i < size; ++i) - { - size_t array_size = offsets[i] - current_offset; - result[i] = Method::template apply(&data1[current_offset], &data2[current_offset], array_size); - current_offset = offsets[i]; - } - } - -public: - String getName() const override { return name; } - size_t getNumberOfArguments() const override { return 2; } - - bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } - - - DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override - { - // Basic type check - std::vector nested_types(2, nullptr); - for (size_t i = 0; i < getNumberOfArguments(); ++i) - { - const DataTypeArray * array_type = checkAndGetDataType(arguments[i].get()); - if (!array_type) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "All arguments for function {} must be an array.", getName()); - - const auto & nested_type = array_type->getNestedType(); - if (!isNativeNumber(nested_type) && !isEnum(nested_type)) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "{} cannot process values of type {}", - getName(), nested_type->getName()); - nested_types[i] = nested_type; - } - - // Detail type check in Method, then return ReturnType - return Method::getReturnType(nested_types[0], nested_types[1]); - } - - template - ColumnPtr executeWithResultType(const ColumnsWithTypeAndName & arguments) const - { - ColumnPtr res; - if (!((res = executeNumber(arguments)) - || (res = executeNumber(arguments)) - || (res = executeNumber(arguments)) - || (res = executeNumber(arguments)) - || (res = executeNumber(arguments)) - || (res = executeNumber(arguments)) - || (res = executeNumber(arguments)) - || (res = executeNumber(arguments)) - || (res = executeNumber(arguments)) - || (res = executeNumber(arguments)))) - throw Exception(ErrorCodes::ILLEGAL_COLUMN, - "Illegal column {} of first argument of function {}", arguments[0].column->getName(), getName()); - - return res; - } - - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /* input_rows_count */) const override - { - switch (result_type->getTypeId()) - { - #define SUPPORTED_TYPE(type) \ - case TypeIndex::type: \ - return executeWithResultType(arguments); \ - break; - - SUPPORTED_TYPE(UInt8) - SUPPORTED_TYPE(UInt16) - SUPPORTED_TYPE(UInt32) - SUPPORTED_TYPE(UInt64) - SUPPORTED_TYPE(Int8) - SUPPORTED_TYPE(Int16) - SUPPORTED_TYPE(Int32) - SUPPORTED_TYPE(Int64) - SUPPORTED_TYPE(Float32) - SUPPORTED_TYPE(Float64) - #undef SUPPORTED_TYPE - - default: - throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected result type {}", result_type->getName()); - } - } -}; - -} - diff --git a/src/Functions/array/arrayShingles.cpp b/src/Functions/array/arrayShingles.cpp index ade1cb862f7..8932482c69c 100644 --- a/src/Functions/array/arrayShingles.cpp +++ b/src/Functions/array/arrayShingles.cpp @@ -28,8 +28,8 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { FunctionArgumentDescriptors args{ - {"array", &isArray, nullptr, "Array"}, - {"length", &isInteger, nullptr, "Integer"} + {"array", static_cast(&isArray), nullptr, "Array"}, + {"length", static_cast(&isInteger), nullptr, "Integer"} }; validateFunctionArgumentTypes(*this, arguments, args); diff --git a/src/Functions/array/has.cpp b/src/Functions/array/has.cpp index f08a4f29d2d..a17dcdcfbf9 100644 --- a/src/Functions/array/has.cpp +++ b/src/Functions/array/has.cpp @@ -9,4 +9,10 @@ struct NameHas { static constexpr auto name = "has"; }; using FunctionHas = FunctionArrayIndex; REGISTER_FUNCTION(Has) { factory.registerFunction(); } + +FunctionOverloadResolverPtr createInternalFunctionHasOverloadResolver() +{ + return std::make_unique(std::make_shared()); +} + } diff --git a/src/Functions/array/has.h b/src/Functions/array/has.h new file mode 100644 index 00000000000..226662d4051 --- /dev/null +++ b/src/Functions/array/has.h @@ -0,0 +1,12 @@ +#pragma once +#include + +namespace DB +{ + +class IFunctionOverloadResolver; +using FunctionOverloadResolverPtr = std::shared_ptr; + +FunctionOverloadResolverPtr createInternalFunctionHasOverloadResolver(); + +} diff --git a/src/Functions/arrayStringConcat.cpp b/src/Functions/arrayStringConcat.cpp index 0194cc4871a..c186c0ca7e6 100644 --- a/src/Functions/arrayStringConcat.cpp +++ b/src/Functions/arrayStringConcat.cpp @@ -151,12 +151,12 @@ public: { FunctionArgumentDescriptors mandatory_args { - {"arr", &isArray, nullptr, "Array"}, + {"arr", static_cast(&isArray), nullptr, "Array"}, }; FunctionArgumentDescriptors optional_args { - {"separator", &isString, isColumnConst, "const String"}, + {"separator", static_cast(&isString), isColumnConst, "const String"}, }; validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); diff --git a/src/Functions/castOrDefault.cpp b/src/Functions/castOrDefault.cpp index 26eaf4f5613..970e6fd6f75 100644 --- a/src/Functions/castOrDefault.cpp +++ b/src/Functions/castOrDefault.cpp @@ -210,10 +210,10 @@ private: FunctionArgumentDescriptors optional_args; if constexpr (IsDataTypeDecimal) - mandatory_args.push_back({"scale", &isNativeInteger, &isColumnConst, "const Integer"}); + mandatory_args.push_back({"scale", static_cast(&isNativeInteger), &isColumnConst, "const Integer"}); if (std::is_same_v || std::is_same_v) - optional_args.push_back({"timezone", &isString, isColumnConst, "const String"}); + optional_args.push_back({"timezone", static_cast(&isString), isColumnConst, "const String"}); optional_args.push_back({"default_value", nullptr, nullptr, nullptr}); diff --git a/src/Functions/countMatches.h b/src/Functions/countMatches.h index e9880e6e93f..fbbb9d017ee 100644 --- a/src/Functions/countMatches.h +++ b/src/Functions/countMatches.h @@ -35,8 +35,8 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { FunctionArgumentDescriptors args{ - {"haystack", &isStringOrFixedString, nullptr, "String or FixedString"}, - {"pattern", &isString, isColumnConst, "constant String"} + {"haystack", static_cast(&isStringOrFixedString), nullptr, "String or FixedString"}, + {"pattern", static_cast(&isString), isColumnConst, "constant String"} }; validateFunctionArgumentTypes(*this, arguments, args); diff --git a/src/Functions/coverage.cpp b/src/Functions/coverage.cpp index f4cac26df78..a1a43d0cf58 100644 --- a/src/Functions/coverage.cpp +++ b/src/Functions/coverage.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Functions/date_trunc.cpp b/src/Functions/date_trunc.cpp index c3903fef137..f64848e9185 100644 --- a/src/Functions/date_trunc.cpp +++ b/src/Functions/date_trunc.cpp @@ -55,9 +55,9 @@ public: if (!IntervalKind::tryParseString(datepart_param, datepart_kind)) throw Exception(ErrorCodes::BAD_ARGUMENTS, "{} doesn't look like datepart name in {}", datepart_param, getName()); - result_type_is_date = (datepart_kind == IntervalKind::Year) - || (datepart_kind == IntervalKind::Quarter) || (datepart_kind == IntervalKind::Month) - || (datepart_kind == IntervalKind::Week); + result_type_is_date = (datepart_kind == IntervalKind::Kind::Year) + || (datepart_kind == IntervalKind::Kind::Quarter) || (datepart_kind == IntervalKind::Kind::Month) + || (datepart_kind == IntervalKind::Kind::Week); }; bool second_argument_is_date = false; @@ -68,8 +68,8 @@ public: second_argument_is_date = isDate(arguments[1].type); - if (second_argument_is_date && ((datepart_kind == IntervalKind::Hour) - || (datepart_kind == IntervalKind::Minute) || (datepart_kind == IntervalKind::Second))) + if (second_argument_is_date && ((datepart_kind == IntervalKind::Kind::Hour) + || (datepart_kind == IntervalKind::Kind::Minute) || (datepart_kind == IntervalKind::Kind::Second))) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type Date of argument for function {}", getName()); }; diff --git a/src/Functions/divide/divide.cpp b/src/Functions/divide/divide.cpp index 6262d42a666..0708964c7d4 100644 --- a/src/Functions/divide/divide.cpp +++ b/src/Functions/divide/divide.cpp @@ -1,5 +1,5 @@ #include "divide.h" -#include +#include #if defined(__x86_64__) namespace SSE2 @@ -26,9 +26,9 @@ template void divideImpl(const A * __restrict a_pos, B b, ResultType * __restrict c_pos, size_t size) { #if defined(__x86_64__) - if (DB::Cpu::CpuFlagsCache::have_AVX2) + if (DB::CPU::CPUFlagsCache::have_AVX2) AVX2::divideImpl(a_pos, b, c_pos, size); - else if (DB::Cpu::CpuFlagsCache::have_SSE2) + else if (DB::CPU::CPUFlagsCache::have_SSE2) SSE2::divideImpl(a_pos, b, c_pos, size); #else Generic::divideImpl(a_pos, b, c_pos, size); diff --git a/src/Functions/extractAll.cpp b/src/Functions/extractAll.cpp index ad49f32f769..f0c18bf79b9 100644 --- a/src/Functions/extractAll.cpp +++ b/src/Functions/extractAll.cpp @@ -50,11 +50,13 @@ public: static bool isVariadic() { return false; } static size_t getNumberOfArguments() { return 2; } + static ColumnNumbers getArgumentsThatAreAlwaysConstant() { return {1}; } + static void checkArguments(const IFunction & func, const ColumnsWithTypeAndName & arguments) { FunctionArgumentDescriptors mandatory_args{ - {"haystack", &isString, nullptr, "String"}, - {"pattern", &isString, isColumnConst, "const String"} + {"haystack", static_cast(&isString), nullptr, "String"}, + {"pattern", static_cast(&isString), isColumnConst, "const String"} }; validateFunctionArgumentTypes(func, arguments, mandatory_args); diff --git a/src/Functions/extractAllGroups.h b/src/Functions/extractAllGroups.h index c64c9d6ccef..ac12cad1698 100644 --- a/src/Functions/extractAllGroups.h +++ b/src/Functions/extractAllGroups.h @@ -71,8 +71,8 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { FunctionArgumentDescriptors args{ - {"haystack", &isStringOrFixedString, nullptr, "const String or const FixedString"}, - {"needle", &isStringOrFixedString, isColumnConst, "const String or const FixedString"}, + {"haystack", static_cast(&isStringOrFixedString), nullptr, "const String or const FixedString"}, + {"needle", static_cast(&isStringOrFixedString), isColumnConst, "const String or const FixedString"}, }; validateFunctionArgumentTypes(*this, arguments, args); diff --git a/src/Functions/extractGroups.cpp b/src/Functions/extractGroups.cpp index e22938f8565..f62352af0bd 100644 --- a/src/Functions/extractGroups.cpp +++ b/src/Functions/extractGroups.cpp @@ -45,8 +45,8 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { FunctionArgumentDescriptors args{ - {"haystack", &isStringOrFixedString, nullptr, "const String or const FixedString"}, - {"needle", &isStringOrFixedString, isColumnConst, "const String or const FixedString"}, + {"haystack", static_cast(&isStringOrFixedString), nullptr, "const String or const FixedString"}, + {"needle", static_cast(&isStringOrFixedString), isColumnConst, "const String or const FixedString"}, }; validateFunctionArgumentTypes(*this, arguments, args); diff --git a/src/Functions/formatQuery.cpp b/src/Functions/formatQuery.cpp index 2f6bc6f9903..92403d2e88e 100644 --- a/src/Functions/formatQuery.cpp +++ b/src/Functions/formatQuery.cpp @@ -54,7 +54,7 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { FunctionArgumentDescriptors args{ - {"query", &isString, nullptr, "String"} + {"query", static_cast(&isString), nullptr, "String"} }; validateFunctionArgumentTypes(*this, arguments, args); diff --git a/src/Functions/formatRow.cpp b/src/Functions/formatRow.cpp index 12a5fc2cc27..1ac6becfb15 100644 --- a/src/Functions/formatRow.cpp +++ b/src/Functions/formatRow.cpp @@ -18,7 +18,6 @@ namespace ErrorCodes { extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; - extern const int UNKNOWN_FORMAT; extern const int BAD_ARGUMENTS; } @@ -40,8 +39,7 @@ public: , arguments_column_names(std::move(arguments_column_names_)) , context(std::move(context_)) { - if (!FormatFactory::instance().getAllFormats().contains(format_name)) - throw Exception(ErrorCodes::UNKNOWN_FORMAT, "Unknown format {}", format_name); + FormatFactory::instance().checkFormatName(format_name); } String getName() const override { return name; } diff --git a/src/Functions/fromDaysSinceYearZero.cpp b/src/Functions/fromDaysSinceYearZero.cpp index a21d0cc25bf..b98c587d172 100644 --- a/src/Functions/fromDaysSinceYearZero.cpp +++ b/src/Functions/fromDaysSinceYearZero.cpp @@ -52,7 +52,7 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { - FunctionArgumentDescriptors args{{"days", &isNativeInteger, nullptr, "Integer"}}; + FunctionArgumentDescriptors args{{"days", static_cast(&isNativeInteger), nullptr, "Integer"}}; validateFunctionArgumentTypes(*this, arguments, args); diff --git a/src/Functions/getTypeSerializationStreams.cpp b/src/Functions/getTypeSerializationStreams.cpp index da9fce70ee9..34a4e47947f 100644 --- a/src/Functions/getTypeSerializationStreams.cpp +++ b/src/Functions/getTypeSerializationStreams.cpp @@ -48,7 +48,7 @@ public: SerializationPtr serialization = type->getDefaultSerialization(); auto col_res = ColumnArray::create(ColumnString::create()); ColumnString & col_res_strings = typeid_cast(col_res->getData()); - ColumnVectorHelper::Offsets & col_res_offsets = typeid_cast(col_res->getOffsets()); + ColumnFixedSizeHelper::Offsets & col_res_offsets = typeid_cast(col_res->getOffsets()); serialization->enumerateStreams([&](const ISerialization::SubstreamPath & substream_path) { col_res_strings.insert(substream_path.toString()); diff --git a/src/Functions/identity.cpp b/src/Functions/identity.cpp index 7174f1fd318..43cca76c801 100644 --- a/src/Functions/identity.cpp +++ b/src/Functions/identity.cpp @@ -9,4 +9,9 @@ REGISTER_FUNCTION(Identity) factory.registerFunction(); } +REGISTER_FUNCTION(ScalarSubqueryResult) +{ + factory.registerFunction(); +} + } diff --git a/src/Functions/identity.h b/src/Functions/identity.h index efee95841f5..c753625caa7 100644 --- a/src/Functions/identity.h +++ b/src/Functions/identity.h @@ -6,11 +6,12 @@ namespace DB { -class FunctionIdentity : public IFunction +template +class FunctionIdentityBase : public IFunction { public: - static constexpr auto name = "identity"; - static FunctionPtr create(ContextPtr) { return std::make_shared(); } + static constexpr auto name = Name::name; + static FunctionPtr create(ContextPtr) { return std::make_shared>(); } String getName() const override { return name; } size_t getNumberOfArguments() const override { return 1; } @@ -28,4 +29,17 @@ public: } }; +struct IdentityName +{ + static constexpr auto name = "identity"; +}; + +struct ScalarSubqueryResultName +{ + static constexpr auto name = "__scalarSubqueryResult"; +}; + +using FunctionIdentity = FunctionIdentityBase; +using FunctionScalarSubqueryResult = FunctionIdentityBase; + } diff --git a/src/Functions/idna.cpp b/src/Functions/idna.cpp index a73347400c6..c9682b44b2c 100644 --- a/src/Functions/idna.cpp +++ b/src/Functions/idna.cpp @@ -6,16 +6,12 @@ #include #include -#ifdef __clang__ -# pragma clang diagnostic push -# pragma clang diagnostic ignored "-Wnewline-eof" -#endif -# include -# include -# include -#ifdef __clang__ -# pragma clang diagnostic pop -#endif +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wnewline-eof" +#include +#include +#include +#pragma clang diagnostic pop namespace DB { @@ -199,4 +195,3 @@ Computes the Unicode representation of ASCII-encoded Internationalized Domain Na } #endif - diff --git a/src/Functions/if.cpp b/src/Functions/if.cpp index 7306dc4173e..4f75042ad8d 100644 --- a/src/Functions/if.cpp +++ b/src/Functions/if.cpp @@ -1266,7 +1266,7 @@ public: bool useDefaultImplementationForNothing() const override { return false; } bool isShortCircuit(ShortCircuitSettings & settings, size_t /*number_of_arguments*/) const override { - settings.enable_lazy_execution_for_first_argument = false; + settings.arguments_with_disabled_lazy_execution.insert(0); settings.enable_lazy_execution_for_common_descendants_of_arguments = false; settings.force_enable_lazy_execution = false; return true; @@ -1413,4 +1413,9 @@ REGISTER_FUNCTION(If) factory.registerFunction({}, FunctionFactory::CaseInsensitive); } +FunctionOverloadResolverPtr createInternalFunctionIfOverloadResolver(bool allow_experimental_variant_type, bool use_variant_as_common_type) +{ + return std::make_unique(std::make_shared(allow_experimental_variant_type && use_variant_as_common_type)); +} + } diff --git a/src/Functions/if.h b/src/Functions/if.h new file mode 100644 index 00000000000..09a7a6a3e78 --- /dev/null +++ b/src/Functions/if.h @@ -0,0 +1,12 @@ +#pragma once +#include + +namespace DB +{ + +class IFunctionOverloadResolver; +using FunctionOverloadResolverPtr = std::shared_ptr; + +FunctionOverloadResolverPtr createInternalFunctionIfOverloadResolver(bool allow_experimental_variant_type, bool use_variant_as_common_type); + +} diff --git a/src/Functions/keyvaluepair/impl/StateHandlerImpl.h b/src/Functions/keyvaluepair/impl/StateHandlerImpl.h index 687d8d95d42..cf31d30b9dc 100644 --- a/src/Functions/keyvaluepair/impl/StateHandlerImpl.h +++ b/src/Functions/keyvaluepair/impl/StateHandlerImpl.h @@ -403,7 +403,7 @@ struct NoEscapingStateHandler : public StateHandlerImpl }; template - NoEscapingStateHandler(Args && ... args) + explicit NoEscapingStateHandler(Args && ... args) : StateHandlerImpl(std::forward(args)...) {} }; @@ -465,7 +465,7 @@ struct InlineEscapingStateHandler : public StateHandlerImpl }; template - InlineEscapingStateHandler(Args && ... args) + explicit InlineEscapingStateHandler(Args && ... args) : StateHandlerImpl(std::forward(args)...) {} }; diff --git a/src/Functions/logical.h b/src/Functions/logical.h new file mode 100644 index 00000000000..d2d07f6cec7 --- /dev/null +++ b/src/Functions/logical.h @@ -0,0 +1,15 @@ +#pragma once +#include + +namespace DB +{ + +class IFunctionOverloadResolver; +using FunctionOverloadResolverPtr = std::shared_ptr; + +FunctionOverloadResolverPtr createInternalFunctionOrOverloadResolver(); +FunctionOverloadResolverPtr createInternalFunctionAndOverloadResolver(); +FunctionOverloadResolverPtr createInternalFunctionXorOverloadResolver(); +FunctionOverloadResolverPtr createInternalFunctionNotOverloadResolver(); + +} diff --git a/src/Functions/makeDate.cpp b/src/Functions/makeDate.cpp index 987cf4eb1a9..c7f3c195578 100644 --- a/src/Functions/makeDate.cpp +++ b/src/Functions/makeDate.cpp @@ -82,17 +82,17 @@ public: if (is_year_month_variant) { FunctionArgumentDescriptors args{ - {mandatory_argument_names_year_month_day[0], &isNumber, nullptr, "Number"}, - {mandatory_argument_names_year_month_day[1], &isNumber, nullptr, "Number"}, - {mandatory_argument_names_year_month_day[2], &isNumber, nullptr, "Number"} + {mandatory_argument_names_year_month_day[0], static_cast(&isNumber), nullptr, "Number"}, + {mandatory_argument_names_year_month_day[1], static_cast(&isNumber), nullptr, "Number"}, + {mandatory_argument_names_year_month_day[2], static_cast(&isNumber), nullptr, "Number"} }; validateFunctionArgumentTypes(*this, arguments, args); } else { FunctionArgumentDescriptors args{ - {mandatory_argument_names_year_dayofyear[0], &isNumber, nullptr, "Number"}, - {mandatory_argument_names_year_dayofyear[1], &isNumber, nullptr, "Number"} + {mandatory_argument_names_year_dayofyear[0], static_cast(&isNumber), nullptr, "Number"}, + {mandatory_argument_names_year_dayofyear[1], static_cast(&isNumber), nullptr, "Number"} }; validateFunctionArgumentTypes(*this, arguments, args); } @@ -189,7 +189,7 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { FunctionArgumentDescriptors args{ - {mandatory_argument_names[0], &isNumber, nullptr, "Number"} + {mandatory_argument_names[0], static_cast(&isNumber), nullptr, "Number"} }; validateFunctionArgumentTypes(*this, arguments, args); @@ -344,16 +344,16 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { FunctionArgumentDescriptors mandatory_args{ - {mandatory_argument_names[0], &isNumber, nullptr, "Number"}, - {mandatory_argument_names[1], &isNumber, nullptr, "Number"}, - {mandatory_argument_names[2], &isNumber, nullptr, "Number"}, - {mandatory_argument_names[3], &isNumber, nullptr, "Number"}, - {mandatory_argument_names[4], &isNumber, nullptr, "Number"}, - {mandatory_argument_names[5], &isNumber, nullptr, "Number"} + {mandatory_argument_names[0], static_cast(&isNumber), nullptr, "Number"}, + {mandatory_argument_names[1], static_cast(&isNumber), nullptr, "Number"}, + {mandatory_argument_names[2], static_cast(&isNumber), nullptr, "Number"}, + {mandatory_argument_names[3], static_cast(&isNumber), nullptr, "Number"}, + {mandatory_argument_names[4], static_cast(&isNumber), nullptr, "Number"}, + {mandatory_argument_names[5], static_cast(&isNumber), nullptr, "Number"} }; FunctionArgumentDescriptors optional_args{ - {optional_argument_names[0], &isString, isColumnConst, "const String"} + {optional_argument_names[0], static_cast(&isString), isColumnConst, "const String"} }; validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); @@ -425,18 +425,18 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { FunctionArgumentDescriptors mandatory_args{ - {mandatory_argument_names[0], &isNumber, nullptr, "Number"}, - {mandatory_argument_names[1], &isNumber, nullptr, "Number"}, - {mandatory_argument_names[2], &isNumber, nullptr, "Number"}, - {mandatory_argument_names[3], &isNumber, nullptr, "Number"}, - {mandatory_argument_names[4], &isNumber, nullptr, "Number"}, - {mandatory_argument_names[5], &isNumber, nullptr, "Number"} + {mandatory_argument_names[0], static_cast(&isNumber), nullptr, "Number"}, + {mandatory_argument_names[1], static_cast(&isNumber), nullptr, "Number"}, + {mandatory_argument_names[2], static_cast(&isNumber), nullptr, "Number"}, + {mandatory_argument_names[3], static_cast(&isNumber), nullptr, "Number"}, + {mandatory_argument_names[4], static_cast(&isNumber), nullptr, "Number"}, + {mandatory_argument_names[5], static_cast(&isNumber), nullptr, "Number"} }; FunctionArgumentDescriptors optional_args{ - {optional_argument_names[0], &isNumber, nullptr, "const Number"}, - {optional_argument_names[1], &isNumber, isColumnConst, "const Number"}, - {optional_argument_names[2], &isString, isColumnConst, "const String"} + {optional_argument_names[0], static_cast(&isNumber), nullptr, "const Number"}, + {optional_argument_names[1], static_cast(&isNumber), isColumnConst, "const Number"}, + {optional_argument_names[2], static_cast(&isString), isColumnConst, "const String"} }; validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); @@ -564,11 +564,11 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { FunctionArgumentDescriptors mandatory_args{ - {mandatory_argument_names[0], &isNumber, nullptr, "Number"} + {mandatory_argument_names[0], static_cast(&isNumber), nullptr, "Number"} }; FunctionArgumentDescriptors optional_args{ - {optional_argument_names[0], &isString, isColumnConst, "const String"} + {optional_argument_names[0], static_cast(&isString), isColumnConst, "const String"} }; validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); @@ -643,12 +643,12 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { FunctionArgumentDescriptors mandatory_args{ - {mandatory_argument_names[0], &isNumber, nullptr, "Number"} + {mandatory_argument_names[0], static_cast(&isNumber), nullptr, "Number"} }; FunctionArgumentDescriptors optional_args{ - {optional_argument_names[0], &isNumber, isColumnConst, "const Number"}, - {optional_argument_names[0], &isString, isColumnConst, "const String"} + {optional_argument_names[0], static_cast(&isNumber), isColumnConst, "const Number"}, + {optional_argument_names[0], static_cast(&isString), isColumnConst, "const String"} }; validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); diff --git a/src/Functions/multiIf.cpp b/src/Functions/multiIf.cpp index cb946b55c73..81304f3afbd 100644 --- a/src/Functions/multiIf.cpp +++ b/src/Functions/multiIf.cpp @@ -3,12 +3,20 @@ #include #include #include +#include #include #include #include #include #include #include +#include +#include +#include +#include +#include +#include +#include #include #include @@ -20,7 +28,7 @@ namespace ErrorCodes extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; extern const int NOT_IMPLEMENTED; - extern const int LOGICAL_ERROR; + extern const int BAD_ARGUMENTS; } namespace @@ -40,15 +48,23 @@ class FunctionMultiIf final : public FunctionIfBase { public: static constexpr auto name = "multiIf"; - static FunctionPtr create(ContextPtr context_) { return std::make_shared(context_); } + static FunctionPtr create(ContextPtr context_) + { + const auto & settings = context_->getSettingsRef(); + return std::make_shared(settings.allow_execute_multiif_columnar, settings.allow_experimental_variant_type, settings.use_variant_as_common_type); + } - explicit FunctionMultiIf(ContextPtr context_) : context(context_) { } + explicit FunctionMultiIf(bool allow_execute_multiif_columnar_, bool allow_experimental_variant_type_, bool use_variant_as_common_type_) + : allow_execute_multiif_columnar(allow_execute_multiif_columnar_) + , allow_experimental_variant_type(allow_experimental_variant_type_) + , use_variant_as_common_type(use_variant_as_common_type_) + {} String getName() const override { return name; } bool isVariadic() const override { return true; } bool isShortCircuit(ShortCircuitSettings & settings, size_t number_of_arguments) const override { - settings.enable_lazy_execution_for_first_argument = false; + settings.arguments_with_disabled_lazy_execution.insert(0); settings.enable_lazy_execution_for_common_descendants_of_arguments = (number_of_arguments != 3); settings.force_enable_lazy_execution = false; return true; @@ -118,7 +134,7 @@ public: types_of_branches.emplace_back(arg); }); - if (context->getSettingsRef().allow_experimental_variant_type && context->getSettingsRef().use_variant_as_common_type) + if (allow_experimental_variant_type && use_variant_as_common_type) return getLeastSupertypeOrVariant(types_of_branches); return getLeastSupertype(types_of_branches); @@ -141,6 +157,10 @@ public: ColumnPtr executeImpl(const ColumnsWithTypeAndName & args, const DataTypePtr & result_type, size_t input_rows_count) const override { + /// Fast path when data is empty + if (input_rows_count == 0) + return result_type->createColumn(); + ColumnsWithTypeAndName arguments = args; executeShortCircuitArguments(arguments); /** We will gather values from columns in branches to result column, @@ -240,66 +260,74 @@ public: } } - const auto & settings = context->getSettingsRef(); const WhichDataType which(removeNullable(result_type)); - bool execute_multiif_columnar - = settings.allow_execute_multiif_columnar && !contains_short && (which.isInt() || which.isUInt() || which.isFloat()); + bool execute_multiif_columnar = allow_execute_multiif_columnar && !contains_short + && instructions.size() <= std::numeric_limits::max() + && (which.isInt() || which.isUInt() || which.isFloat() || which.isDecimal() || which.isDateOrDate32OrDateTimeOrDateTime64() + || which.isEnum() || which.isIPv4() || which.isIPv6()); size_t rows = input_rows_count; if (!execute_multiif_columnar) { MutableColumnPtr res = return_type->createColumn(); + res->reserve(rows); executeInstructions(instructions, rows, res); return std::move(res); } -#define EXECUTE_INSTRUCTIONS_COLUMNAR(TYPE, INDEX) \ +#define EXECUTE_INSTRUCTIONS_COLUMNAR(TYPE, FIELD, INDEX) \ if (which.is##TYPE()) \ { \ - MutableColumnPtr res = ColumnVector::create(rows); \ - MutableColumnPtr null_map = result_type->isNullable() ? ColumnUInt8::create(rows) : nullptr; \ - executeInstructionsColumnar(instructions, rows, res, null_map, result_type->isNullable()); \ - if (!result_type->isNullable()) \ - return std::move(res); \ + MutableColumnPtr res = result_type->createColumn(); \ + if (result_type->isNullable()) \ + { \ + auto & res_nullable = assert_cast(*res); \ + auto & res_data = assert_cast &>(res_nullable.getNestedColumn()).getData(); \ + auto & res_null_map = res_nullable.getNullMapData(); \ + executeInstructionsColumnar(instructions, rows, res_data, &res_null_map); \ + } \ else \ - return ColumnNullable::create(std::move(res), std::move(null_map)); \ + { \ + auto & res_data = assert_cast &>(*res).getData(); \ + executeInstructionsColumnar(instructions, rows, res_data, nullptr); \ + } \ + return std::move(res); \ } #define ENUMERATE_NUMERIC_TYPES(M, INDEX) \ - M(UInt8, INDEX) \ - M(UInt16, INDEX) \ - M(UInt32, INDEX) \ - M(UInt64, INDEX) \ - M(Int8, INDEX) \ - M(Int16, INDEX) \ - M(Int32, INDEX) \ - M(Int64, INDEX) \ - M(UInt128, INDEX) \ - M(UInt256, INDEX) \ - M(Int128, INDEX) \ - M(Int256, INDEX) \ - M(Float32, INDEX) \ - M(Float64, INDEX) \ + M(UInt8, UInt8, INDEX) \ + M(UInt16, UInt16, INDEX) \ + M(UInt32, UInt32, INDEX) \ + M(UInt64, UInt64, INDEX) \ + M(Int8, Int8, INDEX) \ + M(Int16, Int16, INDEX) \ + M(Int32, Int32, INDEX) \ + M(Int64, Int64, INDEX) \ + M(Float32, Float32, INDEX) \ + M(Float64, Float64, INDEX) \ + M(UInt128, UInt128, INDEX) \ + M(UInt256, UInt256, INDEX) \ + M(Int128, Int128, INDEX) \ + M(Int256, Int256, INDEX) \ + M(Decimal32, Decimal32, INDEX) \ + M(Decimal64, Decimal64, INDEX) \ + M(Decimal128, Decimal128, INDEX) \ + M(Decimal256, Decimal256, INDEX) \ + M(Date, UInt16, INDEX) \ + M(Date32, Int32, INDEX) \ + M(DateTime, UInt32, INDEX) \ + M(DateTime64, DateTime64, INDEX) \ + M(Enum8, Int8, INDEX) \ + M(Enum16, Int16, INDEX) \ + M(IPv4, IPv4, INDEX) \ + M(IPv6, IPv6, INDEX) \ throw Exception( \ ErrorCodes::NOT_IMPLEMENTED, "Columnar execution of function {} not implemented for type {}", getName(), result_type->getName()); - size_t num_instructions = instructions.size(); - if (num_instructions <= std::numeric_limits::max()) - { - ENUMERATE_NUMERIC_TYPES(EXECUTE_INSTRUCTIONS_COLUMNAR, Int16) - } - else if (num_instructions <= std::numeric_limits::max()) - { - ENUMERATE_NUMERIC_TYPES(EXECUTE_INSTRUCTIONS_COLUMNAR, Int32) - } - else if (num_instructions <= std::numeric_limits::max()) - { - ENUMERATE_NUMERIC_TYPES(EXECUTE_INSTRUCTIONS_COLUMNAR, Int64) - } - else - throw Exception( - ErrorCodes::LOGICAL_ERROR, "Instruction size({}) of function {} is out of range", getName(), result_type->getName()); + ENUMERATE_NUMERIC_TYPES(EXECUTE_INSTRUCTIONS_COLUMNAR, UInt8) } +#undef ENUMERATE_NUMERIC_TYPES +#undef EXECUTE_INSTRUCTIONS_COLUMNAR private: @@ -341,11 +369,11 @@ private: /// We should read source from which instruction on each row? template - static void calculateInserts(std::vector & instructions, size_t rows, PaddedPODArray & inserts) + static NO_INLINE void calculateInserts(const std::vector & instructions, size_t rows, PaddedPODArray & inserts) { - for (S i = static_cast(instructions.size() - 1); i >= 0; --i) + for (S i = instructions.size() - 1; i != static_cast(-1); --i) { - auto & instruction = instructions[i]; + const auto & instruction = instructions[i]; if (instruction.condition_always_true) { for (size_t row_i = 0; row_i < rows; ++row_i) @@ -381,60 +409,62 @@ private: } } - template - static void executeInstructionsColumnar(std::vector & instructions, size_t rows, const MutableColumnPtr & res, const MutableColumnPtr & null_map, bool nullable) + template + static NO_INLINE void executeInstructionsColumnar( + const std::vector & instructions, + size_t rows, + PaddedPODArray & res_data, + PaddedPODArray * res_null_map = nullptr) { PaddedPODArray inserts(rows, static_cast(instructions.size())); calculateInserts(instructions, rows, inserts); - PaddedPODArray & res_data = assert_cast &>(*res).getData(); - if (!nullable) + res_data.resize_exact(rows); + if constexpr (nullable_result) { - for (size_t row_i = 0; row_i < rows; ++row_i) + if (!res_null_map) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Invalid result null_map while result type is nullable"); + + res_null_map->resize_exact(rows); + } + + std::vector data_cols(instructions.size(), nullptr); + std::vector null_map_cols(instructions.size(), nullptr); + for (size_t i = 0; i < instructions.size(); ++i) + { + const auto & instruction = instructions[i]; + const IColumn * non_const_col = instructions[i].source_is_constant + ? &assert_cast(*instruction.source).getDataColumn() + : instruction.source.get(); + const ColumnNullable * nullable_col = checkAndGetColumn(non_const_col); + data_cols[i] = nullable_col ? assert_cast &>(nullable_col->getNestedColumn()).getData().data() + : assert_cast &>(*non_const_col).getData().data(); + null_map_cols[i] = nullable_col ? assert_cast(nullable_col->getNullMapColumn()).getData().data() : nullptr; + } + + std::unique_ptr> shared_null_map; + if constexpr (nullable_result) + { + for (auto & col : null_map_cols) { - auto & instruction = instructions[inserts[row_i]]; - auto ref = instruction.source->getDataAt(row_i); - res_data[row_i] = *reinterpret_cast(ref.data); + if (!col) + { + if (!shared_null_map) + shared_null_map = std::make_unique>(rows, 0); + + col = shared_null_map->data(); + } } } - else + + for (size_t row_i = 0; row_i < rows; ++row_i) { - PaddedPODArray & null_map_data = assert_cast(*null_map).getData(); - std::vector data_cols(instructions.size()); - std::vector null_map_cols(instructions.size()); - ColumnPtr shared_null_map_col = nullptr; - for (size_t i = 0; i < instructions.size(); ++i) - { - if (instructions[i].source->isNullable()) - { - const ColumnNullable * nullable_col; - if (!instructions[i].source_is_constant) - nullable_col = assert_cast(instructions[i].source.get()); - else - { - const ColumnPtr data_column = assert_cast(*instructions[i].source).getDataColumnPtr(); - nullable_col = assert_cast(data_column.get()); - } - null_map_cols[i] = assert_cast(*nullable_col->getNullMapColumnPtr()).getData().data(); - data_cols[i] = assert_cast &>(*nullable_col->getNestedColumnPtr()).getData().data(); - } - else - { - if (!shared_null_map_col) - { - shared_null_map_col = ColumnUInt8::create(rows, 0); - } - null_map_cols[i] = assert_cast(*shared_null_map_col).getData().data(); - data_cols[i] = assert_cast &>(*instructions[i].source).getData().data(); - } - } - for (size_t row_i = 0; row_i < rows; ++row_i) - { - auto & instruction = instructions[inserts[row_i]]; - size_t index = instruction.source_is_constant ? 0 : row_i; - res_data[row_i] = *(data_cols[inserts[row_i]] + index); - null_map_data[row_i] = *(null_map_cols[inserts[row_i]] + index); - } + S insert = inserts[row_i]; + const auto & instruction = instructions[insert]; + size_t index = instruction.source_is_constant ? 0 : row_i; + res_data[row_i] = *(data_cols[insert] + index); + if constexpr (nullable_result) + (*res_null_map)[row_i] = *(null_map_cols[insert] + index); } } @@ -507,7 +537,9 @@ private: executeColumnIfNeeded(arguments[i], true); } - ContextPtr context; + const bool allow_execute_multiif_columnar; + const bool allow_experimental_variant_type; + const bool use_variant_as_common_type; }; } @@ -521,6 +553,11 @@ REGISTER_FUNCTION(MultiIf) factory.registerFunction("caseWithoutExpression"); } +FunctionOverloadResolverPtr createInternalMultiIfOverloadResolver(bool allow_execute_multiif_columnar, bool allow_experimental_variant_type, bool use_variant_as_common_type) +{ + return std::make_unique(std::make_shared(allow_execute_multiif_columnar, allow_experimental_variant_type, use_variant_as_common_type)); +} + } diff --git a/src/Functions/multiIf.h b/src/Functions/multiIf.h new file mode 100644 index 00000000000..617d63b89bc --- /dev/null +++ b/src/Functions/multiIf.h @@ -0,0 +1,12 @@ +#pragma once +#include + +namespace DB +{ + +class IFunctionOverloadResolver; +using FunctionOverloadResolverPtr = std::shared_ptr; + +FunctionOverloadResolverPtr createInternalMultiIfOverloadResolver(bool allow_execute_multiif_columnar, bool allow_experimental_variant_type, bool use_variant_as_common_type); + +} diff --git a/src/Functions/multiMatchAny.cpp b/src/Functions/multiMatchAny.cpp index 6e6abe61898..054a60fce2d 100644 --- a/src/Functions/multiMatchAny.cpp +++ b/src/Functions/multiMatchAny.cpp @@ -22,4 +22,9 @@ REGISTER_FUNCTION(MultiMatchAny) factory.registerFunction(); } +FunctionOverloadResolverPtr createInternalMultiMatchAnyOverloadResolver(bool allow_hyperscan, size_t max_hyperscan_regexp_length, size_t max_hyperscan_regexp_total_length, bool reject_expensive_hyperscan_regexps) +{ + return std::make_unique(std::make_shared(allow_hyperscan, max_hyperscan_regexp_length, max_hyperscan_regexp_total_length, reject_expensive_hyperscan_regexps)); +} + } diff --git a/src/Functions/multiMatchAny.h b/src/Functions/multiMatchAny.h new file mode 100644 index 00000000000..4548ec1d593 --- /dev/null +++ b/src/Functions/multiMatchAny.h @@ -0,0 +1,12 @@ +#pragma once +#include + +namespace DB +{ + +class IFunctionOverloadResolver; +using FunctionOverloadResolverPtr = std::shared_ptr; + +FunctionOverloadResolverPtr createInternalMultiMatchAnyOverloadResolver(bool allow_hyperscan, size_t max_hyperscan_regexp_length, size_t max_hyperscan_regexp_total_length, bool reject_expensive_hyperscan_regexps); + +} diff --git a/src/Functions/parseDateTime.cpp b/src/Functions/parseDateTime.cpp index 860603dc503..18882177c90 100644 --- a/src/Functions/parseDateTime.cpp +++ b/src/Functions/parseDateTime.cpp @@ -489,12 +489,12 @@ namespace DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { FunctionArgumentDescriptors mandatory_args{ - {"time", &isString, nullptr, "String"}, - {"format", &isString, nullptr, "String"} + {"time", static_cast(&isString), nullptr, "String"}, + {"format", static_cast(&isString), nullptr, "String"} }; FunctionArgumentDescriptors optional_args{ - {"timezone", &isString, &isColumnConst, "const String"} + {"timezone", static_cast(&isString), &isColumnConst, "const String"} }; validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); diff --git a/src/Functions/punycode.cpp b/src/Functions/punycode.cpp index 159189744bd..107302069b4 100644 --- a/src/Functions/punycode.cpp +++ b/src/Functions/punycode.cpp @@ -6,15 +6,11 @@ #include #include -#ifdef __clang__ # pragma clang diagnostic push # pragma clang diagnostic ignored "-Wnewline-eof" -#endif # include # include -#ifdef __clang__ # pragma clang diagnostic pop -#endif namespace DB { diff --git a/src/Functions/regexpExtract.cpp b/src/Functions/regexpExtract.cpp index f6bbd2f96f2..cfb42580cb0 100644 --- a/src/Functions/regexpExtract.cpp +++ b/src/Functions/regexpExtract.cpp @@ -47,12 +47,12 @@ public: arguments.size()); FunctionArgumentDescriptors args{ - {"haystack", &isString, nullptr, "String"}, - {"pattern", &isString, isColumnConst, "const String"}, + {"haystack", static_cast(&isString), nullptr, "String"}, + {"pattern", static_cast(&isString), isColumnConst, "const String"}, }; if (arguments.size() == 3) - args.emplace_back(FunctionArgumentDescriptor{"index", &isInteger, nullptr, "Integer"}); + args.emplace_back(FunctionArgumentDescriptor{"index", static_cast(&isInteger), nullptr, "Integer"}); validateFunctionArgumentTypes(*this, arguments, args); diff --git a/src/Functions/repeat.cpp b/src/Functions/repeat.cpp index c1b553ac6b3..11a2ca37a3b 100644 --- a/src/Functions/repeat.cpp +++ b/src/Functions/repeat.cpp @@ -186,8 +186,8 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { FunctionArgumentDescriptors args{ - {"s", &isString, nullptr, "String"}, - {"n", &isInteger, nullptr, "Integer"}, + {"s", static_cast(&isString), nullptr, "String"}, + {"n", static_cast(&isInteger), nullptr, "Integer"}, }; validateFunctionArgumentTypes(*this, arguments, args); diff --git a/src/Functions/s2_fwd.h b/src/Functions/s2_fwd.h index 6e0b58ae118..4ed5d4fcc1b 100644 --- a/src/Functions/s2_fwd.h +++ b/src/Functions/s2_fwd.h @@ -1,8 +1,6 @@ #pragma once -#ifdef __clang__ #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wambiguous-reversed-operator" -#endif #include #include @@ -11,6 +9,4 @@ #include #include -#ifdef __clang__ #pragma clang diagnostic pop -#endif diff --git a/src/Functions/seriesDecomposeSTL.cpp b/src/Functions/seriesDecomposeSTL.cpp index e9276c4aefb..618808b64ed 100644 --- a/src/Functions/seriesDecomposeSTL.cpp +++ b/src/Functions/seriesDecomposeSTL.cpp @@ -1,15 +1,9 @@ -#ifdef __clang__ #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wold-style-cast" #pragma clang diagnostic ignored "-Wshadow" #pragma clang diagnostic ignored "-Wimplicit-float-conversion" -#endif - #include - -#ifdef __clang__ #pragma clang diagnostic pop -#endif #include #include @@ -48,8 +42,8 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { FunctionArgumentDescriptors args{ - {"time_series", &isArray, nullptr, "Array"}, - {"period", &isNativeUInt, nullptr, "Unsigned Integer"}, + {"time_series", static_cast(&isArray), nullptr, "Array"}, + {"period", static_cast(&isNativeUInt), nullptr, "Unsigned Integer"}, }; validateFunctionArgumentTypes(*this, arguments, args); diff --git a/src/Functions/seriesOutliersDetectTukey.cpp b/src/Functions/seriesOutliersDetectTukey.cpp index 8a2e276c74a..da04d3b78d3 100644 --- a/src/Functions/seriesOutliersDetectTukey.cpp +++ b/src/Functions/seriesOutliersDetectTukey.cpp @@ -24,9 +24,6 @@ class FunctionSeriesOutliersDetectTukey : public IFunction public: static constexpr auto name = "seriesOutliersDetectTukey"; - static constexpr Float64 min_quartile = 2.0; - static constexpr Float64 max_quartile = 98.0; - static FunctionPtr create(ContextPtr) { return std::make_shared(); } std::string getName() const override { return name; } @@ -48,11 +45,11 @@ public: getName(), arguments.size()); - FunctionArgumentDescriptors mandatory_args{{"time_series", &isArray, nullptr, "Array"}}; + FunctionArgumentDescriptors mandatory_args{{"time_series", static_cast(&isArray), nullptr, "Array"}}; FunctionArgumentDescriptors optional_args{ - {"min_percentile", &isNativeNumber, isColumnConst, "Number"}, - {"max_percentile", &isNativeNumber, isColumnConst, "Number"}, - {"k", &isNativeNumber, isColumnConst, "Number"}}; + {"min_percentile", static_cast(&isFloat), isColumnConst, "Number"}, + {"max_percentile", static_cast(&isFloat), isColumnConst, "Number"}, + {"k", static_cast(&isNativeNumber), isColumnConst, "Number"}}; validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); @@ -73,30 +70,26 @@ public: if (input_rows_count == 0) return ColumnArray::create(ColumnFloat64::create()); - Float64 min_percentile = 0.25; /// default 25th percentile Float64 max_percentile = 0.75; /// default 75th percentile Float64 k = 1.50; if (arguments.size() > 1) { - Float64 p_min = arguments[1].column->getFloat64(0); - if (isnan(p_min) || !isFinite(p_min) || p_min < min_quartile|| p_min > max_quartile) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "The second argument of function {} must be in range [2.0, 98.0]", getName()); + static constexpr Float64 min_percentile_lower_bound = 0.02; + static constexpr Float64 max_percentile_upper_bound = 0.98; - min_percentile = p_min / 100; + min_percentile = arguments[1].column->getFloat64(0); + if (isnan(min_percentile) || !isFinite(min_percentile) || min_percentile < min_percentile_lower_bound|| min_percentile > max_percentile_upper_bound) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "The second argument of function {} must be in range [0.02, 0.98]", getName()); - Float64 p_max = arguments[2].column->getFloat64(0); - if (isnan(p_max) || !isFinite(p_max) || p_max < min_quartile || p_max > max_quartile || p_max < min_percentile * 100) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "The third argument of function {} must be in range [2.0, 98.0]", getName()); + max_percentile = arguments[2].column->getFloat64(0); + if (isnan(max_percentile) || !isFinite(max_percentile) || max_percentile < min_percentile_lower_bound || max_percentile > max_percentile_upper_bound || max_percentile < min_percentile) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "The third argument of function {} must be in range [0.02, 0.98]", getName()); - max_percentile = p_max / 100; - - auto k_val = arguments[3].column->getFloat64(0); - if (k_val < 0.0 || isnan(k_val) || !isFinite(k_val)) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "The fourth argument of function {} must be a positive number", getName()); - - k = k_val; + k = arguments[3].column->getFloat64(0); + if (k < 0.0 || isnan(k) || !isFinite(k)) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "The fourth argument of function {} must be a non-negative number", getName()); } if (executeNumber(arr_data, arr_offsets, min_percentile, max_percentile, k, col_res) @@ -155,7 +148,8 @@ private: src_sorted.assign(src_vec.begin() + prev_src_offset, src_vec.begin() + src_offset); std::sort(src_sorted.begin(), src_sorted.end()); - Float64 q1, q2; + Float64 q1; + Float64 q2; Float64 p1 = len * min_percentile; if (p1 == static_cast(p1)) @@ -216,8 +210,8 @@ seriesOutliersDetectTukey(series, min_percentile, max_percentile, k); **Arguments** - `series` - An array of numeric values. -- `min_percentile` - The minimum percentile to be used to calculate inter-quantile range [(IQR)](https://en.wikipedia.org/wiki/Interquartile_range). The value must be in range [2,98]. The default is 25. -- `max_percentile` - The maximum percentile to be used to calculate inter-quantile range (IQR). The value must be in range [2,98]. The default is 75. +- `min_quantile` - The minimum quantile to be used to calculate inter-quantile range [(IQR)](https://en.wikipedia.org/wiki/Interquartile_range). The value must be in range [0.02,0.98]. The default is 0.25. +- `max_quantile` - The maximum quantile to be used to calculate inter-quantile range (IQR). The value must be in range [0.02, 0.98]. The default is 0.75. - `k` - Non-negative constant value to detect mild or stronger outliers. The default value is 1.5 At least four data points are required in `series` to detect outliers. @@ -247,7 +241,7 @@ Result: Query: ``` sql -SELECT seriesOutliersDetectTukey([-3, 2, 15, 3, 5, 6, 4.50, 5, 12, 45, 12, 3.40, 3, 4, 5, 6], 20, 80, 1.5) AS print_0; +SELECT seriesOutliersDetectTukey([-3, 2, 15, 3, 5, 6, 4.50, 5, 12, 45, 12, 3.40, 3, 4, 5, 6], 0.2, 0.8, 1.5) AS print_0; ``` Result: diff --git a/src/Functions/seriesPeriodDetectFFT.cpp b/src/Functions/seriesPeriodDetectFFT.cpp index 61e3319d810..fbaa2b14e64 100644 --- a/src/Functions/seriesPeriodDetectFFT.cpp +++ b/src/Functions/seriesPeriodDetectFFT.cpp @@ -1,18 +1,14 @@ #include "config.h" #if USE_POCKETFFT -# ifdef __clang__ -# pragma clang diagnostic push -# pragma clang diagnostic ignored "-Wshadow" -# pragma clang diagnostic ignored "-Wextra-semi-stmt" -# pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant" -# endif +# pragma clang diagnostic push +# pragma clang diagnostic ignored "-Wshadow" +# pragma clang diagnostic ignored "-Wextra-semi-stmt" +# pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant" # include -# ifdef __clang__ -# pragma clang diagnostic pop -# endif +# pragma clang diagnostic pop # include # include @@ -56,7 +52,7 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { - FunctionArgumentDescriptors args{{"time_series", &isArray, nullptr, "Array"}}; + FunctionArgumentDescriptors args{{"time_series", static_cast(&isArray), nullptr, "Array"}}; validateFunctionArgumentTypes(*this, arguments, args); return std::make_shared(); diff --git a/src/Functions/serverConstants.cpp b/src/Functions/serverConstants.cpp index 9f1a3584df8..fd8fb22455b 100644 --- a/src/Functions/serverConstants.cpp +++ b/src/Functions/serverConstants.cpp @@ -51,12 +51,12 @@ namespace }; - class FunctionTcpPort : public FunctionConstantBase + class FunctionTCPPort : public FunctionConstantBase { public: static constexpr auto name = "tcpPort"; - static FunctionPtr create(ContextPtr context) { return std::make_shared(context); } - explicit FunctionTcpPort(ContextPtr context) : FunctionConstantBase(context->getTCPPort(), context->isDistributed()) {} + static FunctionPtr create(ContextPtr context) { return std::make_shared(context); } + explicit FunctionTCPPort(ContextPtr context) : FunctionConstantBase(context->getTCPPort(), context->isDistributed()) {} }; @@ -153,9 +153,9 @@ REGISTER_FUNCTION(ServerUUID) factory.registerFunction(); } -REGISTER_FUNCTION(TcpPort) +REGISTER_FUNCTION(TCPPort) { - factory.registerFunction(); + factory.registerFunction(); } REGISTER_FUNCTION(Timezone) diff --git a/src/Functions/sin.cpp b/src/Functions/sin.cpp index dc75f4800c0..914f431adb4 100644 --- a/src/Functions/sin.cpp +++ b/src/Functions/sin.cpp @@ -13,7 +13,15 @@ using FunctionSin = FunctionMathUnary>; REGISTER_FUNCTION(Sin) { - factory.registerFunction({}, FunctionFactory::CaseInsensitive); + factory.registerFunction( + FunctionDocumentation{ + .description = "Returns the sine of the argument.", + .syntax = "sin(x)", + .arguments = {{"x", "The number whose sine will be returned. (U)Int*, Float* or Decimal*."}}, + .returned_value = "The sine of x.", + .examples = {{.name = "simple", .query = "SELECT sin(1.23)", .result = "0.9424888019316975"}}, + .categories{"Mathematical", "Trigonometric"}}, + FunctionFactory::CaseInsensitive); } } diff --git a/src/Functions/sleep.h b/src/Functions/sleep.h index 73d58ca6b5b..84f08dd5440 100644 --- a/src/Functions/sleep.h +++ b/src/Functions/sleep.h @@ -62,32 +62,17 @@ public: { } - /// Get the name of the function. - String getName() const override - { - return name; - } - - /// Do not sleep during query analysis. - bool isSuitableForConstantFolding() const override - { - return false; - } - - size_t getNumberOfArguments() const override - { - return 1; - } - + String getName() const override { return name; } + bool isSuitableForConstantFolding() const override { return false; } /// Do not sleep during query analysis. + size_t getNumberOfArguments() const override { return 1; } bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override { WhichDataType which(arguments[0]); - if (!which.isFloat() - && !which.isNativeUInt()) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}, expected Float64", + if (!which.isFloat() && !which.isNativeUInt()) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}, expected UInt* or Float*", arguments[0]->getName(), getName()); return std::make_shared(); diff --git a/src/Functions/snowflake.cpp b/src/Functions/snowflake.cpp index 6aafa2cb5cf..f2dd1f1c51d 100644 --- a/src/Functions/snowflake.cpp +++ b/src/Functions/snowflake.cpp @@ -47,7 +47,7 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { FunctionArgumentDescriptors args{ - {"value", &isDateTime, nullptr, "DateTime"} + {"value", static_cast(&isDateTime), nullptr, "DateTime"} }; validateFunctionArgumentTypes(*this, arguments, args); @@ -91,10 +91,10 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { FunctionArgumentDescriptors mandatory_args{ - {"value", &isInt64, nullptr, "Int64"} + {"value", static_cast(&isInt64), nullptr, "Int64"} }; FunctionArgumentDescriptors optional_args{ - {"time_zone", &isString, nullptr, "String"} + {"time_zone", static_cast(&isString), nullptr, "String"} }; validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); @@ -151,7 +151,7 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { FunctionArgumentDescriptors args{ - {"value", &isDateTime64, nullptr, "DateTime64"} + {"value", static_cast(&isDateTime64), nullptr, "DateTime64"} }; validateFunctionArgumentTypes(*this, arguments, args); @@ -203,10 +203,10 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { FunctionArgumentDescriptors mandatory_args{ - {"value", &isInt64, nullptr, "Int64"} + {"value", static_cast(&isInt64), nullptr, "Int64"} }; FunctionArgumentDescriptors optional_args{ - {"time_zone", &isString, nullptr, "String"} + {"time_zone", static_cast(&isString), nullptr, "String"} }; validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); diff --git a/src/Functions/space.cpp b/src/Functions/space.cpp index 009bc20e065..03dc0d06719 100644 --- a/src/Functions/space.cpp +++ b/src/Functions/space.cpp @@ -45,7 +45,7 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { FunctionArgumentDescriptors args{ - {"n", &isInteger, nullptr, "Integer"} + {"n", static_cast(&isInteger), nullptr, "Integer"} }; validateFunctionArgumentTypes(*this, arguments, args); diff --git a/src/Functions/splitByChar.cpp b/src/Functions/splitByChar.cpp index d537039dc23..d3d5dc9fe4a 100644 --- a/src/Functions/splitByChar.cpp +++ b/src/Functions/splitByChar.cpp @@ -40,6 +40,8 @@ public: static bool isVariadic() { return true; } static size_t getNumberOfArguments() { return 0; } + static ColumnNumbers getArgumentsThatAreAlwaysConstant() { return {0, 2}; } + static void checkArguments(const IFunction & func, const ColumnsWithTypeAndName & arguments) { checkArgumentsWithSeparatorAndOptionalMaxSubstrings(func, arguments); diff --git a/src/Functions/splitByNonAlpha.cpp b/src/Functions/splitByNonAlpha.cpp index 467e7b0b5c3..4486a33aa88 100644 --- a/src/Functions/splitByNonAlpha.cpp +++ b/src/Functions/splitByNonAlpha.cpp @@ -42,6 +42,8 @@ public: static bool isVariadic() { return true; } static size_t getNumberOfArguments() { return 0; } + static ColumnNumbers getArgumentsThatAreAlwaysConstant() { return {1}; } + static void checkArguments(const IFunction & func, const ColumnsWithTypeAndName & arguments) { checkArgumentsWithOptionalMaxSubstrings(func, arguments); diff --git a/src/Functions/splitByRegexp.cpp b/src/Functions/splitByRegexp.cpp index 77328205c01..430089f14ee 100644 --- a/src/Functions/splitByRegexp.cpp +++ b/src/Functions/splitByRegexp.cpp @@ -44,6 +44,8 @@ public: static bool isVariadic() { return true; } static size_t getNumberOfArguments() { return 0; } + static ColumnNumbers getArgumentsThatAreAlwaysConstant() { return {0, 2}; } + static void checkArguments(const IFunction & func, const ColumnsWithTypeAndName & arguments) { checkArgumentsWithSeparatorAndOptionalMaxSubstrings(func, arguments); diff --git a/src/Functions/splitByString.cpp b/src/Functions/splitByString.cpp index 7d6803b2f27..5c97f9841e7 100644 --- a/src/Functions/splitByString.cpp +++ b/src/Functions/splitByString.cpp @@ -39,6 +39,8 @@ public: static bool isVariadic() { return true; } static size_t getNumberOfArguments() { return 0; } + static ColumnNumbers getArgumentsThatAreAlwaysConstant() { return {0, 2}; } + static void checkArguments(const IFunction & func, const ColumnsWithTypeAndName & arguments) { checkArgumentsWithSeparatorAndOptionalMaxSubstrings(func, arguments); diff --git a/src/Functions/splitByWhitespace.cpp b/src/Functions/splitByWhitespace.cpp index 168e429c6f5..cf21a218b15 100644 --- a/src/Functions/splitByWhitespace.cpp +++ b/src/Functions/splitByWhitespace.cpp @@ -30,6 +30,8 @@ public: static bool isVariadic() { return true; } static size_t getNumberOfArguments() { return 0; } + static ColumnNumbers getArgumentsThatAreAlwaysConstant() { return {1}; } + static void checkArguments(const IFunction & func, const ColumnsWithTypeAndName & arguments) { checkArgumentsWithOptionalMaxSubstrings(func, arguments); diff --git a/src/Functions/sqid.cpp b/src/Functions/sqid.cpp index cd3875e2607..6679646fef4 100644 --- a/src/Functions/sqid.cpp +++ b/src/Functions/sqid.cpp @@ -98,7 +98,7 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { FunctionArgumentDescriptors args{ - {"sqid", &isString, nullptr, "String"} + {"sqid", static_cast(&isString), nullptr, "String"} }; validateFunctionArgumentTypes(*this, arguments, args); @@ -122,7 +122,7 @@ public: for (size_t i = 0; i < input_rows_count; ++i) { std::string_view sqid = col_non_const->getDataAt(i).toView(); - std::vector integers = sqids.decode(sqid); + std::vector integers = sqids.decode(String(sqid)); res_nested_data.insert(integers.begin(), integers.end()); res_offsets_data.push_back(integers.size()); } diff --git a/src/Functions/substring.cpp b/src/Functions/substring.cpp index e3dfdf3de5e..e809914f5f0 100644 --- a/src/Functions/substring.cpp +++ b/src/Functions/substring.cpp @@ -189,6 +189,7 @@ REGISTER_FUNCTION(Substring) factory.registerFunction>({}, FunctionFactory::CaseInsensitive); factory.registerAlias("substr", "substring", FunctionFactory::CaseInsensitive); // MySQL alias factory.registerAlias("mid", "substring", FunctionFactory::CaseInsensitive); /// MySQL alias + factory.registerAlias("byteSlice", "substring", FunctionFactory::CaseInsensitive); /// resembles PostgreSQL's get_byte function, similar to ClickHouse's bitSlice factory.registerFunction>({}, FunctionFactory::CaseSensitive); } diff --git a/src/Functions/timestamp.cpp b/src/Functions/timestamp.cpp index 48012c1376f..fbca08b0968 100644 --- a/src/Functions/timestamp.cpp +++ b/src/Functions/timestamp.cpp @@ -41,10 +41,10 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { FunctionArgumentDescriptors mandatory_args{ - {"timestamp", &isStringOrFixedString, nullptr, "String or FixedString"} + {"timestamp", static_cast(&isStringOrFixedString), nullptr, "String or FixedString"} }; FunctionArgumentDescriptors optional_args{ - {"time", &isString, nullptr, "String"} + {"time", static_cast(&isString), nullptr, "String"} }; validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); diff --git a/src/Functions/toDecimalString.cpp b/src/Functions/toDecimalString.cpp index cc2de8df0d4..fc621b272de 100644 --- a/src/Functions/toDecimalString.cpp +++ b/src/Functions/toDecimalString.cpp @@ -39,8 +39,8 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { FunctionArgumentDescriptors mandatory_args = { - {"Value", &isNumber, nullptr, "Number"}, - {"precision", &isNativeInteger, &isColumnConst, "const Integer"} + {"Value", static_cast(&isNumber), nullptr, "Number"}, + {"precision", static_cast(&isNativeInteger), &isColumnConst, "const Integer"} }; validateFunctionArgumentTypes(*this, arguments, mandatory_args, {}); diff --git a/src/Functions/toMillisecond.cpp b/src/Functions/toMillisecond.cpp new file mode 100644 index 00000000000..aaef517c996 --- /dev/null +++ b/src/Functions/toMillisecond.cpp @@ -0,0 +1,33 @@ +#include +#include +#include +#include + +namespace DB +{ + +using FunctionToMillisecond = FunctionDateOrDateTimeToSomething; + +REGISTER_FUNCTION(ToMillisecond) +{ + factory.registerFunction( + + + FunctionDocumentation{ + .description=R"( +Returns the millisecond component (0-999) of a date with time. + )", + .syntax="toMillisecond(value)", + .arguments={{"value", "DateTime or DateTime64"}}, + .returned_value="The millisecond in the minute (0 - 59) of the given date/time", + .examples{ + {"toMillisecond", "SELECT toMillisecond(toDateTime64('2023-04-21 10:20:30.456', 3)", "456"}}, + .categories{"Dates and Times"} + } + ); + + /// MySQL compatibility alias. + factory.registerAlias("MILLISECOND", "toMillisecond", FunctionFactory::CaseInsensitive); +} + +} diff --git a/src/Functions/toStartOfInterval.cpp b/src/Functions/toStartOfInterval.cpp index ea0ad139481..7f25a317466 100644 --- a/src/Functions/toStartOfInterval.cpp +++ b/src/Functions/toStartOfInterval.cpp @@ -3,7 +3,6 @@ #include #include #include -#include #include #include #include @@ -11,7 +10,6 @@ #include #include #include -#include namespace DB @@ -73,21 +71,21 @@ public: switch (interval_type->getKind()) // NOLINT(bugprone-switch-missing-default-case) { - case IntervalKind::Nanosecond: - case IntervalKind::Microsecond: - case IntervalKind::Millisecond: + case IntervalKind::Kind::Nanosecond: + case IntervalKind::Kind::Microsecond: + case IntervalKind::Kind::Millisecond: result_type = ResultType::DateTime64; break; - case IntervalKind::Second: - case IntervalKind::Minute: - case IntervalKind::Hour: - case IntervalKind::Day: /// weird why Day leads to DateTime but too afraid to change it + case IntervalKind::Kind::Second: + case IntervalKind::Kind::Minute: + case IntervalKind::Kind::Hour: + case IntervalKind::Kind::Day: /// weird why Day leads to DateTime but too afraid to change it result_type = ResultType::DateTime; break; - case IntervalKind::Week: - case IntervalKind::Month: - case IntervalKind::Quarter: - case IntervalKind::Year: + case IntervalKind::Kind::Week: + case IntervalKind::Kind::Month: + case IntervalKind::Kind::Quarter: + case IntervalKind::Kind::Year: result_type = ResultType::Date; break; } @@ -133,11 +131,11 @@ public: case ResultType::DateTime64: { UInt32 scale = 0; - if (interval_type->getKind() == IntervalKind::Nanosecond) + if (interval_type->getKind() == IntervalKind::Kind::Nanosecond) scale = 9; - else if (interval_type->getKind() == IntervalKind::Microsecond) + else if (interval_type->getKind() == IntervalKind::Kind::Microsecond) scale = 6; - else if (interval_type->getKind() == IntervalKind::Millisecond) + else if (interval_type->getKind() == IntervalKind::Kind::Millisecond) scale = 3; return std::make_shared(scale, extractTimeZoneNameFromFunctionArguments(arguments, 2, 0, false)); @@ -206,28 +204,28 @@ private: switch (interval_type->getKind()) // NOLINT(bugprone-switch-missing-default-case) { - case IntervalKind::Nanosecond: - return execute(time_data_type, time_column, num_units, result_type, time_zone, scale); - case IntervalKind::Microsecond: - return execute(time_data_type, time_column, num_units, result_type, time_zone, scale); - case IntervalKind::Millisecond: - return execute(time_data_type, time_column, num_units, result_type, time_zone, scale); - case IntervalKind::Second: - return execute(time_data_type, time_column, num_units, result_type, time_zone, scale); - case IntervalKind::Minute: - return execute(time_data_type, time_column, num_units, result_type, time_zone, scale); - case IntervalKind::Hour: - return execute(time_data_type, time_column, num_units, result_type, time_zone, scale); - case IntervalKind::Day: - return execute(time_data_type, time_column, num_units, result_type, time_zone, scale); - case IntervalKind::Week: - return execute(time_data_type, time_column, num_units, result_type, time_zone, scale); - case IntervalKind::Month: - return execute(time_data_type, time_column, num_units, result_type, time_zone, scale); - case IntervalKind::Quarter: - return execute(time_data_type, time_column, num_units, result_type, time_zone, scale); - case IntervalKind::Year: - return execute(time_data_type, time_column, num_units, result_type, time_zone, scale); + case IntervalKind::Kind::Nanosecond: + return execute(time_data_type, time_column, num_units, result_type, time_zone, scale); + case IntervalKind::Kind::Microsecond: + return execute(time_data_type, time_column, num_units, result_type, time_zone, scale); + case IntervalKind::Kind::Millisecond: + return execute(time_data_type, time_column, num_units, result_type, time_zone, scale); + case IntervalKind::Kind::Second: + return execute(time_data_type, time_column, num_units, result_type, time_zone, scale); + case IntervalKind::Kind::Minute: + return execute(time_data_type, time_column, num_units, result_type, time_zone, scale); + case IntervalKind::Kind::Hour: + return execute(time_data_type, time_column, num_units, result_type, time_zone, scale); + case IntervalKind::Kind::Day: + return execute(time_data_type, time_column, num_units, result_type, time_zone, scale); + case IntervalKind::Kind::Week: + return execute(time_data_type, time_column, num_units, result_type, time_zone, scale); + case IntervalKind::Kind::Month: + return execute(time_data_type, time_column, num_units, result_type, time_zone, scale); + case IntervalKind::Kind::Quarter: + return execute(time_data_type, time_column, num_units, result_type, time_zone, scale); + case IntervalKind::Kind::Year: + return execute(time_data_type, time_column, num_units, result_type, time_zone, scale); } std::unreachable(); diff --git a/src/Functions/trap.cpp b/src/Functions/trap.cpp index 99430f039a4..6ce696fedb5 100644 --- a/src/Functions/trap.cpp +++ b/src/Functions/trap.cpp @@ -177,7 +177,7 @@ public: } else if (mode == "logical error") { - throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: trap"); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Trap"); } else throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown trap mode"); diff --git a/src/Functions/vectorFunctions.cpp b/src/Functions/vectorFunctions.cpp index 33b0e9f6039..de4a6fb0a5c 100644 --- a/src/Functions/vectorFunctions.cpp +++ b/src/Functions/vectorFunctions.cpp @@ -1,9 +1,9 @@ #include #include #include +#include #include #include -#include #include #include #include @@ -1364,11 +1364,11 @@ public: ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { - if (getReturnTypeImpl(arguments)->isNullable()) - { - return DataTypeNullable(std::make_shared()) - .createColumnConstWithDefaultValue(input_rows_count); - } + /// TODO: cosineDistance does not support nullable arguments + /// https://github.com/ClickHouse/ClickHouse/pull/27933#issuecomment-916670286 + auto return_type = getReturnTypeImpl(arguments); + if (return_type->isNullable()) + return return_type->createColumnConstWithDefaultValue(input_rows_count); FunctionDotProduct dot(context); ColumnWithTypeAndName dot_result{dot.executeImpl(arguments, DataTypePtr(), input_rows_count), diff --git a/src/Functions/visitParamExtractBool.cpp b/src/Functions/visitParamExtractBool.cpp index 31763fe54ce..2c413ec13bb 100644 --- a/src/Functions/visitParamExtractBool.cpp +++ b/src/Functions/visitParamExtractBool.cpp @@ -21,7 +21,35 @@ using FunctionSimpleJSONExtractBool = FunctionsStringSearch(); + factory.registerFunction(FunctionDocumentation{ + .description = "Parses a true/false value from the value of the field named field_name. The result is UInt8.", + .syntax = "simpleJSONExtractBool(json, field_name)", + .arguments + = {{"json", "The JSON in which the field is searched for. String."}, + {"field_name", "The name of the field to search for. String literal."}}, + .returned_value + = R"(It returns 1 if the value of the field is true, 0 otherwise. This means this function will return 0 including (and not only) in the following cases: + - If the field doesn't exists. + - If the field contains true as a string, e.g.: {"field":"true"}. + - If the field contains 1 as a numerical value.)", + .examples + = {{.name = "simple", + .query = R"(CREATE TABLE jsons +( + json String +) +ENGINE = Memory; + +INSERT INTO jsons VALUES ('{"foo":false,"bar":true}'); +INSERT INTO jsons VALUES ('{"foo":"true","qux":1}'); + +SELECT simpleJSONExtractBool(json, 'bar') FROM jsons ORDER BY json; +SELECT simpleJSONExtractBool(json, 'foo') FROM jsons ORDER BY json;)", + .result = R"(0 +1 +0 +0)"}}, + .categories{"JSON"}}); factory.registerAlias("visitParamExtractBool", "simpleJSONExtractBool"); } diff --git a/src/Functions/visitParamExtractFloat.cpp b/src/Functions/visitParamExtractFloat.cpp index 6f6d5274050..fc839142cc7 100644 --- a/src/Functions/visitParamExtractFloat.cpp +++ b/src/Functions/visitParamExtractFloat.cpp @@ -11,7 +11,36 @@ using FunctionSimpleJSONExtractFloat = FunctionsStringSearch(); + factory.registerFunction(FunctionDocumentation{ + .description + = "Parses Float64 from the value of the field named field_name. If this is a string field, it tries to parse a number from the " + "beginning of the string. If the field does not exist, or it exists but does not contain a number, it returns 0.", + .syntax = "simpleJSONExtractFloat(json, field_name)", + .arguments + = {{"json", "The JSON in which the field is searched for. String."}, + {"field_name", "The name of the field to search for. String literal."}}, + .returned_value = "It returns the number parsed from the field if the field exists and contains a number, 0 otherwise.", + .examples + = {{.name = "simple", + .query = R"(CREATE TABLE jsons +( + json String +) +ENGINE = Memory; + +INSERT INTO jsons VALUES ('{"foo":"-4e3"}'); +INSERT INTO jsons VALUES ('{"foo":-3.4}'); +INSERT INTO jsons VALUES ('{"foo":5}'); +INSERT INTO jsons VALUES ('{"foo":"not1number"}'); +INSERT INTO jsons VALUES ('{"baz":2}'); + +SELECT simpleJSONExtractFloat(json, 'foo') FROM jsons ORDER BY json;)", + .result = R"(0 +-4000 +0 +-3.4 +5)"}}, + .categories{"JSON"}}); factory.registerAlias("visitParamExtractFloat", "simpleJSONExtractFloat"); } diff --git a/src/Functions/visitParamExtractInt.cpp b/src/Functions/visitParamExtractInt.cpp index e020c43e8b4..4588fc55c52 100644 --- a/src/Functions/visitParamExtractInt.cpp +++ b/src/Functions/visitParamExtractInt.cpp @@ -11,7 +11,36 @@ using FunctionSimpleJSONExtractInt = FunctionsStringSearch(); + factory.registerFunction(FunctionDocumentation{ + .description + = "Parses Int64 from the value of the field named field_name. If this is a string field, it tries to parse a number from the " + "beginning of the string. If the field does not exist, or it exists but does not contain a number, it returns 0.", + .syntax = "simpleJSONExtractInt(json, field_name)", + .arguments + = {{"json", "The JSON in which the field is searched for. String."}, + {"field_name", "The name of the field to search for. String literal."}}, + .returned_value = "It returns the number parsed from the field if the field exists and contains a number, 0 otherwise.", + .examples + = {{.name = "simple", + .query = R"(CREATE TABLE jsons +( + json String +) +ENGINE = Memory; + +INSERT INTO jsons VALUES ('{"foo":"-4e3"}'); +INSERT INTO jsons VALUES ('{"foo":-3.4}'); +INSERT INTO jsons VALUES ('{"foo":5}'); +INSERT INTO jsons VALUES ('{"foo":"not1number"}'); +INSERT INTO jsons VALUES ('{"baz":2}'); + +SELECT simpleJSONExtractInt(json, 'foo') FROM jsons ORDER BY json;)", + .result = R"(0 +-4 +0 +-3 +5)"}}, + .categories{"JSON"}}); factory.registerAlias("visitParamExtractInt", "simpleJSONExtractInt"); } diff --git a/src/Functions/visitParamExtractRaw.cpp b/src/Functions/visitParamExtractRaw.cpp index 74a83170545..3cdc5001e13 100644 --- a/src/Functions/visitParamExtractRaw.cpp +++ b/src/Functions/visitParamExtractRaw.cpp @@ -61,7 +61,35 @@ using FunctionSimpleJSONExtractRaw = FunctionsStringSearchToString(); + factory.registerFunction(FunctionDocumentation{ + .description = "Returns the value of the field named field_name as a String, including separators.", + .syntax = "simpleJSONExtractRaw(json, field_name)", + .arguments + = {{"json", "The JSON in which the field is searched for. String."}, + {"field_name", "The name of the field to search for. String literal."}}, + .returned_value + = "It returns the value of the field as a String including separators if the field exists, or an empty String otherwise.", + .examples + = {{.name = "simple", + .query = R"(CREATE TABLE jsons +( + json String +) +ENGINE = Memory; + +INSERT INTO jsons VALUES ('{"foo":"-4e3"}'); +INSERT INTO jsons VALUES ('{"foo":-3.4}'); +INSERT INTO jsons VALUES ('{"foo":5}'); +INSERT INTO jsons VALUES ('{"foo":{"def":[1,2,3]}}'); +INSERT INTO jsons VALUES ('{"baz":2}'); + +SELECT simpleJSONExtractRaw(json, 'foo') FROM jsons ORDER BY json;)", + .result = R"( +"-4e3" +-3.4 +5 +{"def":[1,2,3]})"}}, + .categories{"JSON"}}); factory.registerAlias("visitParamExtractRaw", "simpleJSONExtractRaw"); } diff --git a/src/Functions/visitParamExtractString.cpp b/src/Functions/visitParamExtractString.cpp index 50d5f345189..8dae10638f8 100644 --- a/src/Functions/visitParamExtractString.cpp +++ b/src/Functions/visitParamExtractString.cpp @@ -22,7 +22,35 @@ using FunctionSimpleJSONExtractString = FunctionsStringSearchToString(); + factory.registerFunction(FunctionDocumentation{ + .description = R"(Parses String in double quotes from the value of the field named field_name. + + There is currently no support for code points in the format \uXXXX\uYYYY that are not from the basic multilingual plane (they are converted to CESU-8 instead of UTF-8).)", + .syntax = "simpleJSONExtractString(json, field_name)", + .arguments + = {{"json", "The JSON in which the field is searched for. String."}, + {"field_name", "The name of the field to search for. String literal."}}, + .returned_value = "It returns the value of a field as a String, including separators. The value is unescaped. It returns an empty " + "String: if the field doesn't contain a double quoted string, if unescaping fails or if the field doesn't exist.", + .examples + = {{.name = "simple", + .query = R"(CREATE TABLE jsons +( + json String +) +ENGINE = Memory; + +INSERT INTO jsons VALUES ('{"foo":"\\n\\u0000"}'); +INSERT INTO jsons VALUES ('{"foo":"\\u263"}'); +INSERT INTO jsons VALUES ('{"foo":"\\u263a"}'); +INSERT INTO jsons VALUES ('{"foo":"hello}'); + +SELECT simpleJSONExtractString(json, 'foo') FROM jsons ORDER BY json;)", + .result = R"(\n\0 + +☺ +)"}}, + .categories{"JSON"}}); factory.registerAlias("visitParamExtractString", "simpleJSONExtractString"); } diff --git a/src/Functions/visitParamExtractUInt.cpp b/src/Functions/visitParamExtractUInt.cpp index fb58e417f34..777df9fdd24 100644 --- a/src/Functions/visitParamExtractUInt.cpp +++ b/src/Functions/visitParamExtractUInt.cpp @@ -12,7 +12,36 @@ using FunctionSimpleJSONExtractUInt = FunctionsStringSearch(); + factory.registerFunction(FunctionDocumentation{ + .description + = "Parses UInt64 from the value of the field named field_name. If this is a string field, it tries to parse a number from the " + "beginning of the string. If the field does not exist, or it exists but does not contain a number, it returns 0.", + .syntax = "simpleJSONExtractUInt(json, field_name)", + .arguments + = {{"json", "The JSON in which the field is searched for. String."}, + {"field_name", "The name of the field to search for. String literal."}}, + .returned_value = "It returns the number parsed from the field if the field exists and contains a number, 0 otherwise.", + .examples + = {{.name = "simple", + .query = R"(CREATE TABLE jsons +( + json String +) +ENGINE = Memory; + +INSERT INTO jsons VALUES ('{"foo":"4e3"}'); +INSERT INTO jsons VALUES ('{"foo":3.4}'); +INSERT INTO jsons VALUES ('{"foo":5}'); +INSERT INTO jsons VALUES ('{"foo":"not1number"}'); +INSERT INTO jsons VALUES ('{"baz":2}'); + +SELECT simpleJSONExtractUInt(json, 'foo') FROM jsons ORDER BY json;)", + .result = R"(0 +4 +0 +3 +5)"}}, + .categories{"JSON"}}); factory.registerAlias("visitParamExtractUInt", "simpleJSONExtractUInt"); } diff --git a/src/Functions/visitParamHas.cpp b/src/Functions/visitParamHas.cpp index 1ed1f1d16e7..09fec782980 100644 --- a/src/Functions/visitParamHas.cpp +++ b/src/Functions/visitParamHas.cpp @@ -21,7 +21,28 @@ using FunctionSimpleJSONHas = FunctionsStringSearch(); + factory.registerFunction(FunctionDocumentation{ + .description = "Checks whether there is a field named field_name. The result is UInt8.", + .syntax = "simpleJSONHas(json, field_name)", + .arguments + = {{"json", "The JSON in which the field is searched for. String."}, + {"field_name", "The name of the field to search for. String literal."}}, + .returned_value = "It returns 1 if the field exists, 0 otherwise.", + .examples + = {{.name = "simple", + .query = R"(CREATE TABLE jsons +( + json String +) +ENGINE = Memory; + +INSERT INTO jsons VALUES ('{"foo":"true","qux":1}'); + +SELECT simpleJSONHas(json, 'foo') FROM jsons; +SELECT simpleJSONHas(json, 'bar') FROM jsons;)", + .result = R"(1 +0)"}}, + .categories{"JSON"}}); factory.registerAlias("visitParamHas", "simpleJSONHas"); } diff --git a/src/IO/Archives/ArchiveUtils.h b/src/IO/Archives/ArchiveUtils.h index 810b9d8d730..1b66be005a2 100644 --- a/src/IO/Archives/ArchiveUtils.h +++ b/src/IO/Archives/ArchiveUtils.h @@ -4,11 +4,9 @@ #if USE_LIBARCHIVE -#ifdef __clang__ #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wreserved-macro-identifier" #include #include #endif -#endif diff --git a/src/IO/Archives/IArchiveReader.h b/src/IO/Archives/IArchiveReader.h index 84a1dc21f5b..ee516d2655b 100644 --- a/src/IO/Archives/IArchiveReader.h +++ b/src/IO/Archives/IArchiveReader.h @@ -56,6 +56,7 @@ public: /// It's possible to convert a file enumerator to a read buffer and vice versa. virtual std::unique_ptr readFile(std::unique_ptr enumerator) = 0; virtual std::unique_ptr nextFile(std::unique_ptr read_buffer) = 0; + virtual std::unique_ptr currentFile(std::unique_ptr read_buffer) = 0; virtual std::vector getAllFiles() = 0; virtual std::vector getAllFiles(NameFilter filter) = 0; diff --git a/src/IO/Archives/IArchiveWriter.h b/src/IO/Archives/IArchiveWriter.h index cccc6dc953b..c8f0c609da1 100644 --- a/src/IO/Archives/IArchiveWriter.h +++ b/src/IO/Archives/IArchiveWriter.h @@ -22,6 +22,8 @@ public: /// of the function `writeFile()` should be destroyed before next call of `writeFile()`. virtual std::unique_ptr writeFile(const String & filename) = 0; + virtual std::unique_ptr writeFile(const String & filename, size_t size) = 0; + /// Returns true if there is an active instance of WriteBuffer returned by writeFile(). /// This function should be used mostly for debugging purposes. virtual bool isWritingFile() const = 0; diff --git a/src/IO/Archives/LibArchiveReader.cpp b/src/IO/Archives/LibArchiveReader.cpp index 94e68045575..bec7f587180 100644 --- a/src/IO/Archives/LibArchiveReader.cpp +++ b/src/IO/Archives/LibArchiveReader.cpp @@ -1,11 +1,9 @@ +#include #include #include #include #include -#include - -#include namespace DB { @@ -14,35 +12,58 @@ namespace DB namespace ErrorCodes { - extern const int CANNOT_UNPACK_ARCHIVE; - extern const int LOGICAL_ERROR; - extern const int CANNOT_READ_ALL_DATA; - extern const int UNSUPPORTED_METHOD; +extern const int CANNOT_UNPACK_ARCHIVE; +extern const int LOGICAL_ERROR; +extern const int CANNOT_READ_ALL_DATA; +extern const int UNSUPPORTED_METHOD; } +class LibArchiveReader::StreamInfo +{ +public: + explicit StreamInfo(std::unique_ptr read_buffer_) : read_buffer(std::move(read_buffer_)) { } + + static ssize_t read(struct archive *, void * client_data, const void ** buff) + { + auto * read_stream = reinterpret_cast(client_data); + *buff = reinterpret_cast(read_stream->buf); + return read_stream->read_buffer->read(read_stream->buf, DBMS_DEFAULT_BUFFER_SIZE); + } + + std::unique_ptr read_buffer; + char buf[DBMS_DEFAULT_BUFFER_SIZE]; +}; + class LibArchiveReader::Handle { public: explicit Handle(std::string path_to_archive_, bool lock_on_reading_) - : path_to_archive(path_to_archive_), lock_on_reading(lock_on_reading_) + : path_to_archive(std::move(path_to_archive_)), lock_on_reading(lock_on_reading_) { - current_archive = open(path_to_archive); + current_archive = openWithPath(path_to_archive); + } + + explicit Handle(std::string path_to_archive_, bool lock_on_reading_, const ReadArchiveFunction & archive_read_function_) + : path_to_archive(std::move(path_to_archive_)), archive_read_function(archive_read_function_), lock_on_reading(lock_on_reading_) + { + read_stream = std::make_unique(archive_read_function()); + current_archive = openWithReader(read_stream.get()); } Handle(const Handle &) = delete; Handle(Handle && other) noexcept - : current_archive(other.current_archive) + : read_stream(std::move(other.read_stream)) + , current_archive(other.current_archive) , current_entry(other.current_entry) + , archive_read_function(std::move(other.archive_read_function)) , lock_on_reading(other.lock_on_reading) + { other.current_archive = nullptr; other.current_entry = nullptr; } - ~Handle() - { - close(current_archive); - } + ~Handle() { close(current_archive); } bool locateFile(const std::string & filename) { @@ -64,10 +85,14 @@ public: break; if (filter(archive_entry_pathname(current_entry))) + { + valid = true; return true; + } } checkError(err); + valid = false; return false; } @@ -81,17 +106,19 @@ public: } while (err == ARCHIVE_RETRY); checkError(err); - return err == ARCHIVE_OK; + valid = err == ARCHIVE_OK; + return valid; } std::vector getAllFiles(NameFilter filter) { - auto * archive = open(path_to_archive); - SCOPE_EXIT( - close(archive); - ); + std::unique_ptr rs + = archive_read_function ? std::make_unique(archive_read_function()) : nullptr; + auto * archive = rs ? openWithReader(rs.get()) : openWithPath(path_to_archive); - struct archive_entry * entry = nullptr; + SCOPE_EXIT(close(archive);); + + Entry entry = nullptr; std::vector files; int error = readNextHeader(archive, &entry); @@ -112,6 +139,8 @@ public: const String & getFileName() const { chassert(current_entry); + if (!valid) + throw Exception(ErrorCodes::CANNOT_UNPACK_ARCHIVE, "No current file"); if (!file_name) file_name.emplace(archive_entry_pathname(current_entry)); @@ -121,6 +150,8 @@ public: const FileInfo & getFileInfo() const { chassert(current_entry); + if (!valid) + throw Exception(ErrorCodes::CANNOT_UNPACK_ARCHIVE, "No current file"); if (!file_info) { file_info.emplace(); @@ -132,13 +163,21 @@ public: return *file_info; } - struct archive * current_archive; - struct archive_entry * current_entry = nullptr; + la_ssize_t readData(void * buf, size_t len) { return archive_read_data(current_archive, buf, len); } + + const char * getArchiveError() { return archive_error_string(current_archive); } + private: + using Archive = struct archive *; + using Entry = struct archive_entry *; + void checkError(int error) const { if (error == ARCHIVE_FATAL) - throw Exception(ErrorCodes::CANNOT_UNPACK_ARCHIVE, "Failed to read archive while fetching all files: {}", archive_error_string(current_archive)); + throw Exception( + ErrorCodes::CANNOT_UNPACK_ARCHIVE, + "Failed to read archive while fetching all files: {}", + archive_error_string(current_archive)); } void resetFileInfo() @@ -147,15 +186,29 @@ private: file_info.reset(); } - static struct archive * open(const String & path_to_archive) + Archive openWithReader(StreamInfo * read_stream_) { auto * archive = archive_read_new(); try { - archive_read_support_filter_all(archive); - archive_read_support_format_all(archive); - if (archive_read_open_filename(archive, path_to_archive.c_str(), 10240) != ARCHIVE_OK) - throw Exception(ErrorCodes::CANNOT_UNPACK_ARCHIVE, "Couldn't open archive {}: {}", quoteString(path_to_archive), archive_error_string(archive)); + // Support for bzip2, gzip, lzip, xz, zstd and lz4 + archive_read_support_filter_bzip2(archive); + archive_read_support_filter_gzip(archive); + archive_read_support_filter_xz(archive); + archive_read_support_filter_lz4(archive); + archive_read_support_filter_zstd(archive); + archive_read_support_filter_lzma(archive); + // Support tar, 7zip and zip + archive_read_support_format_tar(archive); + archive_read_support_format_7zip(archive); + archive_read_support_format_zip(archive); + + if (archive_read_open(archive, read_stream_, nullptr, StreamInfo::read, nullptr) != ARCHIVE_OK) + throw Exception( + ErrorCodes::CANNOT_UNPACK_ARCHIVE, + "Couldn't open archive {}: {}", + quoteString(path_to_archive), + archive_error_string(archive)); } catch (...) { @@ -166,7 +219,39 @@ private: return archive; } - static void close(struct archive * archive) + Archive openWithPath(const String & path_to_archive_) + { + auto * archive = archive_read_new(); + try + { + // Support for bzip2, gzip, lzip, xz, zstd and lz4 + archive_read_support_filter_bzip2(archive); + archive_read_support_filter_gzip(archive); + archive_read_support_filter_xz(archive); + archive_read_support_filter_lz4(archive); + archive_read_support_filter_zstd(archive); + archive_read_support_filter_lzma(archive); + // Support tar, 7zip and zip + archive_read_support_format_tar(archive); + archive_read_support_format_7zip(archive); + archive_read_support_format_zip(archive); + if (archive_read_open_filename(archive, path_to_archive_.c_str(), 10240) != ARCHIVE_OK) + throw Exception( + ErrorCodes::CANNOT_UNPACK_ARCHIVE, + "Couldn't open archive {}: {}", + quoteString(path_to_archive), + archive_error_string(archive)); + } + catch (...) + { + close(archive); + throw; + } + + return archive; + } + + static void close(Archive archive) { if (archive) { @@ -184,7 +269,12 @@ private: return archive_read_next_header(archive, entry); } - const String path_to_archive; + String path_to_archive; + std::unique_ptr read_stream; + Archive current_archive; + Entry current_entry = nullptr; + bool valid = true; + IArchiveReader::ReadArchiveFunction archive_read_function; /// for some archive types when we are reading headers static variables are used /// which are not thread-safe @@ -198,7 +288,7 @@ private: class LibArchiveReader::FileEnumeratorImpl : public FileEnumerator { public: - explicit FileEnumeratorImpl(Handle handle_) : handle(std::move(handle_)) {} + explicit FileEnumeratorImpl(Handle handle_) : handle(std::move(handle_)) { } const String & getFileName() const override { return handle.getFileName(); } const FileInfo & getFileInfo() const override { return handle.getFileInfo(); } @@ -206,6 +296,7 @@ public: /// Releases owned handle to pass it to a read buffer. Handle releaseHandle() && { return std::move(handle); } + private: Handle handle; }; @@ -217,36 +308,33 @@ public: : ReadBufferFromFileBase(DBMS_DEFAULT_BUFFER_SIZE, nullptr, 0) , handle(std::move(handle_)) , path_to_archive(std::move(path_to_archive_)) - {} + { + } off_t seek(off_t /* off */, int /* whence */) override { throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Seek is not supported when reading from archive"); } - bool checkIfActuallySeekable() override { return false; } - off_t getPosition() override - { - throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "getPosition not supported when reading from archive"); - } - + off_t getPosition() override { throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "getPosition not supported when reading from archive"); } String getFileName() const override { return handle.getFileName(); } size_t getFileSize() override { return handle.getFileInfo().uncompressed_size; } - Handle releaseHandle() && - { - return std::move(handle); - } + Handle releaseHandle() && { return std::move(handle); } private: bool nextImpl() override { - auto bytes_read = archive_read_data(handle.current_archive, internal_buffer.begin(), static_cast(internal_buffer.size())); - + auto bytes_read = handle.readData(internal_buffer.begin(), internal_buffer.size()); if (bytes_read < 0) - throw Exception(ErrorCodes::CANNOT_READ_ALL_DATA, "Failed to read file {} from {}: {}", handle.getFileName(), path_to_archive, archive_error_string(handle.current_archive)); + throw Exception( + ErrorCodes::CANNOT_READ_ALL_DATA, + "Failed to read file {} from {}: {}", + handle.getFileName(), + path_to_archive, + handle.getArchiveError()); if (!bytes_read) return false; @@ -265,7 +353,17 @@ private: LibArchiveReader::LibArchiveReader(std::string archive_name_, bool lock_on_reading_, std::string path_to_archive_) : archive_name(std::move(archive_name_)), lock_on_reading(lock_on_reading_), path_to_archive(std::move(path_to_archive_)) -{} +{ +} + +LibArchiveReader::LibArchiveReader( + std::string archive_name_, bool lock_on_reading_, std::string path_to_archive_, const ReadArchiveFunction & archive_read_function_) + : archive_name(std::move(archive_name_)) + , lock_on_reading(lock_on_reading_) + , path_to_archive(std::move(path_to_archive_)) + , archive_read_function(archive_read_function_) +{ +} LibArchiveReader::~LibArchiveReader() = default; @@ -276,21 +374,25 @@ const std::string & LibArchiveReader::getPath() const bool LibArchiveReader::fileExists(const String & filename) { - Handle handle(path_to_archive, lock_on_reading); + Handle handle = acquireHandle(); return handle.locateFile(filename); } LibArchiveReader::FileInfo LibArchiveReader::getFileInfo(const String & filename) { - Handle handle(path_to_archive, lock_on_reading); + Handle handle = acquireHandle(); if (!handle.locateFile(filename)) - throw Exception(ErrorCodes::CANNOT_UNPACK_ARCHIVE, "Couldn't unpack archive {}: file not found", path_to_archive); + throw Exception( + ErrorCodes::CANNOT_UNPACK_ARCHIVE, + "Couldn't unpack archive {}: File {} was not found in archive", + path_to_archive, + quoteString(filename)); return handle.getFileInfo(); } std::unique_ptr LibArchiveReader::firstFile() { - Handle handle(path_to_archive, lock_on_reading); + Handle handle = acquireHandle(); if (!handle.nextFile()) return nullptr; @@ -299,17 +401,28 @@ std::unique_ptr LibArchiveReader::firstFile() std::unique_ptr LibArchiveReader::readFile(const String & filename, bool throw_on_not_found) { - return readFile([&](const std::string & file) { return file == filename; }, throw_on_not_found); + Handle handle = acquireHandle(); + if (!handle.locateFile(filename)) + { + if (throw_on_not_found) + throw Exception( + ErrorCodes::CANNOT_UNPACK_ARCHIVE, + "Couldn't unpack archive {}: File {} was not found in archive", + path_to_archive, + quoteString(filename)); + return nullptr; + } + return std::make_unique(std::move(handle), path_to_archive); } std::unique_ptr LibArchiveReader::readFile(NameFilter filter, bool throw_on_not_found) { - Handle handle(path_to_archive, lock_on_reading); + Handle handle = acquireHandle(); if (!handle.locateFile(filter)) { if (throw_on_not_found) throw Exception( - ErrorCodes::CANNOT_UNPACK_ARCHIVE, "Couldn't unpack archive {}: no file found satisfying the filter", path_to_archive); + ErrorCodes::CANNOT_UNPACK_ARCHIVE, "Couldn't unpack archive {}: No file satisfying filter in archive", path_to_archive); return nullptr; } return std::make_unique(std::move(handle), path_to_archive); @@ -328,13 +441,24 @@ std::unique_ptr LibArchiveReader::nextFile(std { if (!dynamic_cast(read_buffer.get())) throw Exception(ErrorCodes::LOGICAL_ERROR, "Wrong ReadBuffer passed to nextFile()"); - auto read_buffer_from_libarchive = std::unique_ptr(static_cast(read_buffer.release())); + auto read_buffer_from_libarchive + = std::unique_ptr(static_cast(read_buffer.release())); auto handle = std::move(*read_buffer_from_libarchive).releaseHandle(); if (!handle.nextFile()) return nullptr; return std::make_unique(std::move(handle)); } +std::unique_ptr LibArchiveReader::currentFile(std::unique_ptr read_buffer) +{ + if (!dynamic_cast(read_buffer.get())) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Wrong ReadBuffer passed to nextFile()"); + auto read_buffer_from_libarchive + = std::unique_ptr(static_cast(read_buffer.release())); + auto handle = std::move(*read_buffer_from_libarchive).releaseHandle(); + return std::make_unique(std::move(handle)); +} + std::vector LibArchiveReader::getAllFiles() { return getAllFiles({}); @@ -342,13 +466,22 @@ std::vector LibArchiveReader::getAllFiles() std::vector LibArchiveReader::getAllFiles(NameFilter filter) { - Handle handle(path_to_archive, lock_on_reading); + Handle handle = acquireHandle(); return handle.getAllFiles(filter); } -void LibArchiveReader::setPassword(const String & /*password_*/) +void LibArchiveReader::setPassword(const String & password_) { - throw Exception(ErrorCodes::LOGICAL_ERROR, "Can not set password to {} archive", archive_name); + if (password_.empty()) + return; + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot set password to {} archive", archive_name); +} + +LibArchiveReader::Handle LibArchiveReader::acquireHandle() +{ + std::lock_guard lock{mutex}; + return archive_read_function ? Handle{path_to_archive, lock_on_reading, archive_read_function} + : Handle{path_to_archive, lock_on_reading}; } #endif diff --git a/src/IO/Archives/LibArchiveReader.h b/src/IO/Archives/LibArchiveReader.h index 3dadd710089..148d5dd17f2 100644 --- a/src/IO/Archives/LibArchiveReader.h +++ b/src/IO/Archives/LibArchiveReader.h @@ -1,8 +1,9 @@ #pragma once -#include "config.h" - +#include #include +#include +#include "config.h" namespace DB @@ -40,6 +41,7 @@ public: /// It's possible to convert a file enumerator to a read buffer and vice versa. std::unique_ptr readFile(std::unique_ptr enumerator) override; std::unique_ptr nextFile(std::unique_ptr read_buffer) override; + std::unique_ptr currentFile(std::unique_ptr read_buffer) override; std::vector getAllFiles() override; std::vector getAllFiles(NameFilter filter) override; @@ -51,26 +53,44 @@ protected: /// Constructs an archive's reader that will read from a file in the local filesystem. LibArchiveReader(std::string archive_name_, bool lock_on_reading_, std::string path_to_archive_); + LibArchiveReader( + std::string archive_name_, bool lock_on_reading_, std::string path_to_archive_, const ReadArchiveFunction & archive_read_function_); + private: class ReadBufferFromLibArchive; class Handle; class FileEnumeratorImpl; + class StreamInfo; + + Handle acquireHandle(); const std::string archive_name; const bool lock_on_reading; const String path_to_archive; + const ReadArchiveFunction archive_read_function; + mutable std::mutex mutex; }; class TarArchiveReader : public LibArchiveReader { public: - explicit TarArchiveReader(std::string path_to_archive) : LibArchiveReader("tar", /*lock_on_reading_=*/ true, std::move(path_to_archive)) { } + explicit TarArchiveReader(std::string path_to_archive) : LibArchiveReader("tar", /*lock_on_reading_=*/true, std::move(path_to_archive)) + { + } + + explicit TarArchiveReader(std::string path_to_archive, const ReadArchiveFunction & archive_read_function) + : LibArchiveReader("tar", /*lock_on_reading_=*/true, std::move(path_to_archive), archive_read_function) + { + } }; class SevenZipArchiveReader : public LibArchiveReader { public: - explicit SevenZipArchiveReader(std::string path_to_archive) : LibArchiveReader("7z", /*lock_on_reading_=*/ false, std::move(path_to_archive)) { } + explicit SevenZipArchiveReader(std::string path_to_archive) + : LibArchiveReader("7z", /*lock_on_reading_=*/false, std::move(path_to_archive)) + { + } }; #endif diff --git a/src/IO/Archives/LibArchiveWriter.cpp b/src/IO/Archives/LibArchiveWriter.cpp new file mode 100644 index 00000000000..55bc4c1f88c --- /dev/null +++ b/src/IO/Archives/LibArchiveWriter.cpp @@ -0,0 +1,248 @@ +#include + +#include +#include +#include +#include + +#include + +#if USE_LIBARCHIVE + +// this implemation follows the ZipArchiveWriter implemation as closely as possible. + +namespace DB +{ +namespace ErrorCodes +{ +extern const int CANNOT_PACK_ARCHIVE; +extern const int NOT_IMPLEMENTED; +} + +namespace +{ +void checkResultCodeImpl(int code, const String & filename) +{ + if (code == ARCHIVE_OK) + return; + throw Exception( + ErrorCodes::CANNOT_PACK_ARCHIVE, "Couldn't pack archive: LibArchive Code = {}, filename={}", code, quoteString(filename)); +} +} + +// this is a thin wrapper for libarchive to be able to write the archive to a WriteBuffer +class LibArchiveWriter::StreamInfo +{ +public: + explicit StreamInfo(std::unique_ptr archive_write_buffer_) : archive_write_buffer(std::move(archive_write_buffer_)) { } + static ssize_t memory_write(struct archive *, void * client_data, const void * buff, size_t length) + { + auto * stream_info = reinterpret_cast(client_data); + stream_info->archive_write_buffer->write(reinterpret_cast(buff), length); + return length; + } + + std::unique_ptr archive_write_buffer; +}; + +class LibArchiveWriter::WriteBufferFromLibArchive : public WriteBufferFromFileBase +{ +public: + WriteBufferFromLibArchive(std::shared_ptr archive_writer_, const String & filename_, const size_t & size_) + : WriteBufferFromFileBase(DBMS_DEFAULT_BUFFER_SIZE, nullptr, 0), archive_writer(archive_writer_), filename(filename_), size(size_) + { + startWritingFile(); + archive = archive_writer_->getArchive(); + entry = nullptr; + } + + ~WriteBufferFromLibArchive() override + { + try + { + closeFile(/* throw_if_error= */ false); + endWritingFile(); + } + catch (...) + { + tryLogCurrentException("WriteBufferFromTarArchive"); + } + } + + void finalizeImpl() override + { + next(); + closeFile(/* throw_if_error=*/true); + endWritingFile(); + } + + void sync() override { next(); } + std::string getFileName() const override { return filename; } + +private: + void nextImpl() override + { + if (!offset()) + return; + if (entry == nullptr) + writeEntry(); + ssize_t to_write = offset(); + ssize_t written = archive_write_data(archive, working_buffer.begin(), offset()); + if (written != to_write) + { + throw Exception( + ErrorCodes::CANNOT_PACK_ARCHIVE, + "Couldn't pack tar archive: Failed to write all bytes, {} of {}, filename={}", + written, + to_write, + quoteString(filename)); + } + } + + void writeEntry() + { + expected_size = getSize(); + entry = archive_entry_new(); + archive_entry_set_pathname(entry, filename.c_str()); + archive_entry_set_size(entry, expected_size); + archive_entry_set_filetype(entry, static_cast<__LA_MODE_T>(0100000)); + archive_entry_set_perm(entry, 0644); + checkResult(archive_write_header(archive, entry)); + } + + size_t getSize() const + { + if (size) + return size; + else + return offset(); + } + + void closeFile(bool throw_if_error) + { + if (entry) + { + archive_entry_free(entry); + entry = nullptr; + } + if (throw_if_error and bytes != expected_size) + { + throw Exception( + ErrorCodes::CANNOT_PACK_ARCHIVE, + "Couldn't pack tar archive: Wrote {} of expected {} , filename={}", + bytes, + expected_size, + quoteString(filename)); + } + } + + void endWritingFile() + { + if (auto archive_writer_ptr = archive_writer.lock()) + archive_writer_ptr->endWritingFile(); + } + + void startWritingFile() + { + if (auto archive_writer_ptr = archive_writer.lock()) + archive_writer_ptr->startWritingFile(); + } + + void checkResult(int code) { checkResultCodeImpl(code, filename); } + + std::weak_ptr archive_writer; + const String filename; + Entry entry; + Archive archive; + size_t size; + size_t expected_size; +}; + +LibArchiveWriter::LibArchiveWriter(const String & path_to_archive_, std::unique_ptr archive_write_buffer_) + : path_to_archive(path_to_archive_) +{ + if (archive_write_buffer_) + stream_info = std::make_unique(std::move(archive_write_buffer_)); +} + +void LibArchiveWriter::createArchive() +{ + std::lock_guard lock{mutex}; + archive = archive_write_new(); + setFormatAndSettings(); + if (stream_info) + { + //This allows use to write directly to a writebuffer rather than an intermediate buffer in libarchive. + //This has to be set otherwise zstd breaks due to extra bytes being written at the end of the archive. + archive_write_set_bytes_per_block(archive, 0); + archive_write_open2(archive, stream_info.get(), nullptr, &StreamInfo::memory_write, nullptr, nullptr); + } + else + archive_write_open_filename(archive, path_to_archive.c_str()); +} + +LibArchiveWriter::~LibArchiveWriter() +{ + chassert((finalized || std::uncaught_exceptions() || std::current_exception()) && "LibArchiveWriter is not finalized in destructor."); + if (archive) + archive_write_free(archive); +} + +std::unique_ptr LibArchiveWriter::writeFile(const String & filename, size_t size) +{ + return std::make_unique(std::static_pointer_cast(shared_from_this()), filename, size); +} + +std::unique_ptr LibArchiveWriter::writeFile(const String & filename) +{ + return std::make_unique(std::static_pointer_cast(shared_from_this()), filename, 0); +} + +bool LibArchiveWriter::isWritingFile() const +{ + std::lock_guard lock{mutex}; + return is_writing_file; +} + +void LibArchiveWriter::endWritingFile() +{ + std::lock_guard lock{mutex}; + is_writing_file = false; +} + +void LibArchiveWriter::startWritingFile() +{ + std::lock_guard lock{mutex}; + if (std::exchange(is_writing_file, true)) + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot write two files to a tar archive in parallel"); +} + +void LibArchiveWriter::finalize() +{ + std::lock_guard lock{mutex}; + if (finalized) + return; + if (archive) + archive_write_close(archive); + if (stream_info) + { + stream_info->archive_write_buffer->finalize(); + stream_info.reset(); + } + finalized = true; +} + +void LibArchiveWriter::setPassword(const String & password_) +{ + if (password_.empty()) + return; + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Setting a password is not currently supported for libarchive"); +} + +LibArchiveWriter::Archive LibArchiveWriter::getArchive() +{ + std::lock_guard lock{mutex}; + return archive; +} +} +#endif diff --git a/src/IO/Archives/LibArchiveWriter.h b/src/IO/Archives/LibArchiveWriter.h new file mode 100644 index 00000000000..f54a8ce2367 --- /dev/null +++ b/src/IO/Archives/LibArchiveWriter.h @@ -0,0 +1,77 @@ +#pragma once + +#include "config.h" + +#if USE_LIBARCHIVE +# include +# include +# include +# include + + +namespace DB +{ +class WriteBufferFromFileBase; + +/// Interface for writing an archive. +class LibArchiveWriter : public IArchiveWriter +{ +public: + /// Constructs an archive that will be written as a file in the local filesystem. + explicit LibArchiveWriter(const String & path_to_archive_, std::unique_ptr archive_write_buffer_); + + /// Call finalize() before destructing IArchiveWriter. + ~LibArchiveWriter() override; + + /// Starts writing a file to the archive. The function returns a write buffer, + /// any data written to that buffer will be compressed and then put to the archive. + /// You can keep only one such buffer at a time, a buffer returned by previous call + /// of the function `writeFile()` should be destroyed before next call of `writeFile()`. + std::unique_ptr writeFile(const String & filename) override; + /// LibArchive needs to know the size of the file being written. If the file size is not + /// passed in the the archive writer tries to infer the size by looking at the available + /// data in the buffer, if next is called before all data is written to the buffer + /// an exception is thrown. + std::unique_ptr writeFile(const String & filename, size_t size) override; + + /// Returns true if there is an active instance of WriteBuffer returned by writeFile(). + /// This function should be used mostly for debugging purposes. + bool isWritingFile() const override; + + /// Finalizes writing of the archive. This function must be always called at the end of writing. + /// (Unless an error appeared and the archive is in fact no longer needed.) + void finalize() override; + + /// Sets compression method and level. + /// Changing them will affect next file in the archive. + //void setCompression(const String & compression_method_, int compression_level_) override; + + /// Sets password. If the password is not empty it will enable encryption in the archive. + void setPassword(const String & password) override; + +protected: + using Archive = struct archive *; + using Entry = struct archive_entry *; + + /// derived classes must call createArchive. CreateArchive calls setFormatAndSettings. + void createArchive(); + virtual void setFormatAndSettings() = 0; + + Archive archive = nullptr; + String path_to_archive; + +private: + class WriteBufferFromLibArchive; + class StreamInfo; + + Archive getArchive(); + void startWritingFile(); + void endWritingFile(); + + std::unique_ptr stream_info TSA_GUARDED_BY(mutex) = nullptr; + bool is_writing_file TSA_GUARDED_BY(mutex) = false; + bool finalized TSA_GUARDED_BY(mutex) = false; + mutable std::mutex mutex; +}; +} +#endif diff --git a/src/IO/Archives/TarArchiveWriter.cpp b/src/IO/Archives/TarArchiveWriter.cpp new file mode 100644 index 00000000000..d390af89537 --- /dev/null +++ b/src/IO/Archives/TarArchiveWriter.cpp @@ -0,0 +1,42 @@ +#include + +#if USE_LIBARCHIVE +namespace DB +{ +namespace ErrorCodes +{ +extern const int NOT_IMPLEMENTED; +extern const int CANNOT_PACK_ARCHIVE; +} +void TarArchiveWriter::setCompression(const String & compression_method_, int compression_level_) +{ + // throw an error unless setCompression is passed the default value + if (compression_method_.empty() && compression_level_ == -1) + return; + throw Exception( + ErrorCodes::NOT_IMPLEMENTED, "Using compression_method and compression_level options are not supported for tar archives"); +} + +void TarArchiveWriter::setFormatAndSettings() +{ + archive_write_set_format_pax_restricted(archive); + inferCompressionFromPath(); +} + +void TarArchiveWriter::inferCompressionFromPath() +{ + if (path_to_archive.ends_with(".tar.gz") || path_to_archive.ends_with(".tgz")) + archive_write_add_filter_gzip(archive); + else if (path_to_archive.ends_with(".tar.bz2")) + archive_write_add_filter_bzip2(archive); + else if (path_to_archive.ends_with(".tar.lzma")) + archive_write_add_filter_lzma(archive); + else if (path_to_archive.ends_with(".tar.zst") || path_to_archive.ends_with(".tzst")) + archive_write_add_filter_zstd(archive); + else if (path_to_archive.ends_with(".tar.xz")) + archive_write_add_filter_xz(archive); + else if (!path_to_archive.ends_with(".tar")) + throw Exception(ErrorCodes::CANNOT_PACK_ARCHIVE, "Unknown compression format"); +} +} +#endif diff --git a/src/IO/Archives/TarArchiveWriter.h b/src/IO/Archives/TarArchiveWriter.h new file mode 100644 index 00000000000..b04ec4083af --- /dev/null +++ b/src/IO/Archives/TarArchiveWriter.h @@ -0,0 +1,26 @@ +#pragma once + +#include "config.h" + +#if USE_LIBARCHIVE + +# include +namespace DB +{ +using namespace std::literals; + +class TarArchiveWriter : public LibArchiveWriter +{ +public: + explicit TarArchiveWriter(const String & path_to_archive_, std::unique_ptr archive_write_buffer_) + : LibArchiveWriter(path_to_archive_, std::move(archive_write_buffer_)) + { + createArchive(); + } + + void setCompression(const String & compression_method_, int compression_level_) override; + void setFormatAndSettings() override; + void inferCompressionFromPath(); +}; +} +#endif diff --git a/src/IO/Archives/ZipArchiveReader.cpp b/src/IO/Archives/ZipArchiveReader.cpp index 8c9c37e4ae0..2a9b7a43519 100644 --- a/src/IO/Archives/ZipArchiveReader.cpp +++ b/src/IO/Archives/ZipArchiveReader.cpp @@ -583,6 +583,15 @@ std::unique_ptr ZipArchiveReader::nextFile(std return std::make_unique(std::move(handle)); } +std::unique_ptr ZipArchiveReader::currentFile(std::unique_ptr read_buffer) +{ + if (!dynamic_cast(read_buffer.get())) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Wrong ReadBuffer passed to nextFile()"); + auto read_buffer_from_zip = std::unique_ptr(static_cast(read_buffer.release())); + auto handle = std::move(*read_buffer_from_zip).releaseHandle(); + return std::make_unique(std::move(handle)); +} + std::vector ZipArchiveReader::getAllFiles() { return getAllFiles({}); diff --git a/src/IO/Archives/ZipArchiveReader.h b/src/IO/Archives/ZipArchiveReader.h index a8788064fec..4b1910839eb 100644 --- a/src/IO/Archives/ZipArchiveReader.h +++ b/src/IO/Archives/ZipArchiveReader.h @@ -47,6 +47,7 @@ public: /// It's possible to convert a file enumerator to a read buffer and vice versa. std::unique_ptr readFile(std::unique_ptr enumerator) override; std::unique_ptr nextFile(std::unique_ptr read_buffer) override; + std::unique_ptr currentFile(std::unique_ptr read_buffer) override; std::vector getAllFiles() override; std::vector getAllFiles(NameFilter filter) override; diff --git a/src/IO/Archives/ZipArchiveWriter.cpp b/src/IO/Archives/ZipArchiveWriter.cpp index 8cb4a2e0bd6..0cfe921f33f 100644 --- a/src/IO/Archives/ZipArchiveWriter.cpp +++ b/src/IO/Archives/ZipArchiveWriter.cpp @@ -274,6 +274,11 @@ std::unique_ptr ZipArchiveWriter::writeFile(const Strin return std::make_unique(std::static_pointer_cast(shared_from_this()), filename); } +std::unique_ptr ZipArchiveWriter::writeFile(const String & filename, [[maybe_unused]] size_t size) +{ + return ZipArchiveWriter::writeFile(filename); +} + bool ZipArchiveWriter::isWritingFile() const { std::lock_guard lock{mutex}; diff --git a/src/IO/Archives/ZipArchiveWriter.h b/src/IO/Archives/ZipArchiveWriter.h index 891da1a2e75..b2b77dce7e1 100644 --- a/src/IO/Archives/ZipArchiveWriter.h +++ b/src/IO/Archives/ZipArchiveWriter.h @@ -32,6 +32,9 @@ public: /// of the function `writeFile()` should be destroyed before next call of `writeFile()`. std::unique_ptr writeFile(const String & filename) override; + std::unique_ptr writeFile(const String & filename, size_t size) override; + + /// Returns true if there is an active instance of WriteBuffer returned by writeFile(). /// This function should be used mostly for debugging purposes. bool isWritingFile() const override; diff --git a/src/IO/Archives/createArchiveReader.cpp b/src/IO/Archives/createArchiveReader.cpp index 0c998971de1..782602091ac 100644 --- a/src/IO/Archives/createArchiveReader.cpp +++ b/src/IO/Archives/createArchiveReader.cpp @@ -1,6 +1,6 @@ -#include -#include #include +#include +#include #include @@ -8,8 +8,8 @@ namespace DB { namespace ErrorCodes { - extern const int CANNOT_UNPACK_ARCHIVE; - extern const int SUPPORT_IS_DISABLED; +extern const int CANNOT_UNPACK_ARCHIVE; +extern const int SUPPORT_IS_DISABLED; } @@ -25,16 +25,8 @@ std::shared_ptr createArchiveReader( [[maybe_unused]] size_t archive_size) { using namespace std::literals; - static constexpr std::array tar_extensions - { - ".tar"sv, - ".tar.gz"sv, - ".tgz"sv, - ".tar.zst"sv, - ".tzst"sv, - ".tar.xz"sv, - ".tar.bz2"sv - }; + static constexpr std::array tar_extensions{ + ".tar"sv, ".tar.gz"sv, ".tgz"sv, ".tar.zst"sv, ".tzst"sv, ".tar.xz"sv, ".tar.bz2"sv, ".tar.lzma"sv}; if (path_to_archive.ends_with(".zip") || path_to_archive.ends_with(".zipx")) { @@ -48,7 +40,7 @@ std::shared_ptr createArchiveReader( tar_extensions.begin(), tar_extensions.end(), [&](const auto extension) { return path_to_archive.ends_with(extension); })) { #if USE_LIBARCHIVE - return std::make_shared(path_to_archive); + return std::make_shared(path_to_archive, archive_read_function); #else throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "libarchive library is disabled"); #endif diff --git a/src/IO/Archives/createArchiveWriter.cpp b/src/IO/Archives/createArchiveWriter.cpp index 807fe66e6a9..9a169587088 100644 --- a/src/IO/Archives/createArchiveWriter.cpp +++ b/src/IO/Archives/createArchiveWriter.cpp @@ -1,5 +1,7 @@ -#include +#include +#include #include +#include #include #include @@ -8,8 +10,8 @@ namespace DB { namespace ErrorCodes { - extern const int CANNOT_PACK_ARCHIVE; - extern const int SUPPORT_IS_DISABLED; +extern const int CANNOT_PACK_ARCHIVE; +extern const int SUPPORT_IS_DISABLED; } @@ -19,20 +21,30 @@ std::shared_ptr createArchiveWriter(const String & path_to_archi } -std::shared_ptr createArchiveWriter( - const String & path_to_archive, - [[maybe_unused]] std::unique_ptr archive_write_buffer) +std::shared_ptr +createArchiveWriter(const String & path_to_archive, [[maybe_unused]] std::unique_ptr archive_write_buffer) { + using namespace std::literals; + static constexpr std::array tar_extensions{ + ".tar"sv, ".tar.gz"sv, ".tgz"sv, ".tar.bz2"sv, ".tar.lzma"sv, ".tar.zst"sv, ".tzst"sv, ".tar.xz"sv}; if (path_to_archive.ends_with(".zip") || path_to_archive.ends_with(".zipx")) { #if USE_MINIZIP return std::make_shared(path_to_archive, std::move(archive_write_buffer)); #else throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "minizip library is disabled"); +#endif + } + else if (std::any_of( + tar_extensions.begin(), tar_extensions.end(), [&](const auto extension) { return path_to_archive.ends_with(extension); })) + { +#if USE_LIBARCHIVE + return std::make_shared(path_to_archive, std::move(archive_write_buffer)); +#else + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "libarchive library is disabled"); #endif } else throw Exception(ErrorCodes::CANNOT_PACK_ARCHIVE, "Cannot determine the type of archive {}", path_to_archive); } - } diff --git a/src/IO/Archives/hasRegisteredArchiveFileExtension.cpp b/src/IO/Archives/hasRegisteredArchiveFileExtension.cpp index 6b2ef29d054..2a979f500f7 100644 --- a/src/IO/Archives/hasRegisteredArchiveFileExtension.cpp +++ b/src/IO/Archives/hasRegisteredArchiveFileExtension.cpp @@ -6,7 +6,10 @@ namespace DB bool hasRegisteredArchiveFileExtension(const String & path) { - return path.ends_with(".zip") || path.ends_with(".zipx"); + using namespace std::literals; + static constexpr std::array archive_extensions{ + ".zip"sv, ".zipx"sv, ".tar"sv, ".tar.gz"sv, ".tgz"sv, ".tar.bz2"sv, ".tar.lzma"sv, ".tar.zst"sv, ".tzst"sv, ".tar.xz"sv}; + return std::any_of( + archive_extensions.begin(), archive_extensions.end(), [&](const auto extension) { return path.ends_with(extension); }); } - } diff --git a/src/IO/AsynchronousReader.h b/src/IO/AsynchronousReader.h index 279a399caad..815a7b2774e 100644 --- a/src/IO/AsynchronousReader.h +++ b/src/IO/AsynchronousReader.h @@ -54,6 +54,9 @@ public: struct Result { + /// The read data is at [buf + offset, buf + size), where `buf` is from Request struct. + /// (Notice that `offset` is included in `size`.) + /// size /// Less than requested amount of data can be returned. /// If size is zero - the file has ended. @@ -66,7 +69,7 @@ public: std::unique_ptr execution_watch = {}; - operator std::tuple() { return {size, offset}; } + explicit operator std::tuple() { return {size, offset}; } }; /// Submit request and obtain a handle. This method don't perform any waits. diff --git a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.h b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.h index 83814f42693..1433f8d18ba 100644 --- a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.h +++ b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.h @@ -6,7 +6,7 @@ #include #include -#include +#include #include #include #include diff --git a/src/IO/BufferBase.h b/src/IO/BufferBase.h index 4c0a467b155..e98f00270e2 100644 --- a/src/IO/BufferBase.h +++ b/src/IO/BufferBase.h @@ -43,7 +43,7 @@ public: inline void resize(size_t size) { end_pos = begin_pos + size; } inline bool empty() const { return size() == 0; } - inline void swap(Buffer & other) + inline void swap(Buffer & other) noexcept { std::swap(begin_pos, other.begin_pos); std::swap(end_pos, other.end_pos); @@ -60,6 +60,9 @@ public: BufferBase(Position ptr, size_t size, size_t offset) : pos(ptr + offset), working_buffer(ptr, ptr + size), internal_buffer(ptr, ptr + size) {} + /// Assign the buffers and pos. + /// Be careful when calling this from ReadBuffer::nextImpl() implementations: `offset` is + /// effectively ignored because ReadBuffer::next() reassigns `pos`. void set(Position ptr, size_t size, size_t offset) { internal_buffer = Buffer(ptr, ptr + size); @@ -82,7 +85,7 @@ public: /// How many bytes are available for read/write inline size_t available() const { return size_t(working_buffer.end() - pos); } - inline void swap(BufferBase & other) + inline void swap(BufferBase & other) noexcept { internal_buffer.swap(other.internal_buffer); working_buffer.swap(other.working_buffer); diff --git a/src/IO/CachedInMemoryReadBufferFromFile.cpp b/src/IO/CachedInMemoryReadBufferFromFile.cpp new file mode 100644 index 00000000000..ceaf0ca4752 --- /dev/null +++ b/src/IO/CachedInMemoryReadBufferFromFile.cpp @@ -0,0 +1,186 @@ +#include "CachedInMemoryReadBufferFromFile.h" +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int UNEXPECTED_END_OF_FILE; + extern const int CANNOT_SEEK_THROUGH_FILE; + extern const int SEEK_POSITION_OUT_OF_BOUND; +} + +CachedInMemoryReadBufferFromFile::CachedInMemoryReadBufferFromFile( + FileChunkAddress cache_key_, PageCachePtr cache_, std::unique_ptr in_, const ReadSettings & settings_) + : ReadBufferFromFileBase(0, nullptr, 0, in_->getFileSize()), cache_key(cache_key_), cache(cache_), settings(settings_), in(std::move(in_)) + , read_until_position(file_size.value()) +{ + cache_key.offset = 0; +} + +String CachedInMemoryReadBufferFromFile::getFileName() const +{ + return in->getFileName(); +} + +off_t CachedInMemoryReadBufferFromFile::seek(off_t off, int whence) +{ + if (whence != SEEK_SET) + throw Exception(ErrorCodes::CANNOT_SEEK_THROUGH_FILE, "Only SEEK_SET mode is allowed."); + + size_t offset = static_cast(off); + if (offset > file_size.value()) + throw Exception(ErrorCodes::SEEK_POSITION_OUT_OF_BOUND, "Seek position is out of bounds. Offset: {}", off); + + if (offset >= file_offset_of_buffer_end - working_buffer.size() && offset <= file_offset_of_buffer_end) + { + pos = working_buffer.end() - (file_offset_of_buffer_end - offset); + chassert(getPosition() == off); + return off; + } + + resetWorkingBuffer(); + + file_offset_of_buffer_end = offset; + chunk.reset(); + + chassert(getPosition() == off); + return off; +} + +off_t CachedInMemoryReadBufferFromFile::getPosition() +{ + return file_offset_of_buffer_end - available(); +} + +size_t CachedInMemoryReadBufferFromFile::getFileOffsetOfBufferEnd() const +{ + return file_offset_of_buffer_end; +} + +void CachedInMemoryReadBufferFromFile::setReadUntilPosition(size_t position) +{ + read_until_position = position; + if (position < static_cast(getPosition())) + { + resetWorkingBuffer(); + chunk.reset(); + } + else if (position < file_offset_of_buffer_end) + { + size_t diff = file_offset_of_buffer_end - position; + working_buffer.resize(working_buffer.size() - diff); + file_offset_of_buffer_end -= diff; + } +} + +void CachedInMemoryReadBufferFromFile::setReadUntilEnd() +{ + setReadUntilPosition(file_size.value()); +} + +bool CachedInMemoryReadBufferFromFile::nextImpl() +{ + chassert(read_until_position <= file_size.value()); + if (file_offset_of_buffer_end >= read_until_position) + return false; + + if (chunk.has_value() && file_offset_of_buffer_end >= cache_key.offset + cache->chunkSize()) + { + chassert(file_offset_of_buffer_end == cache_key.offset + cache->chunkSize()); + chunk.reset(); + } + + if (!chunk.has_value()) + { + cache_key.offset = file_offset_of_buffer_end / cache->chunkSize() * cache->chunkSize(); + chunk = cache->getOrSet(cache_key.hash(), settings.read_from_page_cache_if_exists_otherwise_bypass_cache, settings.page_cache_inject_eviction); + + size_t chunk_size = std::min(cache->chunkSize(), file_size.value() - cache_key.offset); + + std::unique_lock download_lock(chunk->getChunk()->state.download_mutex); + + if (!chunk->isPrefixPopulated(chunk_size)) + { + /// A few things could be improved here, which may or may not be worth the added complexity: + /// * If the next file chunk is in cache, use in->setReadUntilPosition() to limit the read to + /// just one chunk. More generally, look ahead in the cache to count how many next chunks + /// need to be downloaded. (Up to some limit? And avoid changing `in`'s until-position if + /// it's already reasonable; otherwise we'd increase it by one chunk every chunk, discarding + /// a half-completed HTTP request every time.) + /// * If only a subset of pages are missing from this chunk, download only them, + /// with some threshold for avoiding short seeks. + /// In particular, if a previous download failed in the middle of the chunk, we could + /// resume from that position instead of from the beginning of the chunk. + /// (It's also possible in principle that a proper subset of chunk's pages was reclaimed + /// by the OS. But, for performance purposes, we should completely ignore that, because + /// (a) PageCache normally uses 2 MiB transparent huge pages and has just one such page + /// per chunk, and (b) even with 4 KiB pages partial chunk eviction is extremely rare.) + /// * If our [position, read_until_position) covers only part of the chunk, we could download + /// just that part. (Which would be bad if someone else needs the rest of the chunk and has + /// to do a whole new HTTP request to get it. Unclear what the policy should be.) + /// * Instead of doing in->next() in a loop until we get the whole chunk, we could return the + /// results as soon as in->next() produces them. + /// (But this would make the download_mutex situation much more complex, similar to the + /// FileSegment::State::PARTIALLY_DOWNLOADED and FileSegment::setRemoteFileReader() stuff.) + + Buffer prev_in_buffer = in->internalBuffer(); + SCOPE_EXIT({ in->set(prev_in_buffer.begin(), prev_in_buffer.size()); }); + + size_t pos = 0; + while (pos < chunk_size) + { + char * piece_start = chunk->getChunk()->data + pos; + size_t piece_size = chunk_size - pos; + in->set(piece_start, piece_size); + if (pos == 0) + in->seek(cache_key.offset, SEEK_SET); + else + chassert(!in->available()); + + if (in->eof()) + throw Exception(ErrorCodes::UNEXPECTED_END_OF_FILE, "File {} ended after {} bytes, but we expected {}", + getFileName(), cache_key.offset + pos, file_size.value()); + + chassert(in->position() >= piece_start && in->buffer().end() <= piece_start + piece_size); + chassert(in->getPosition() == static_cast(cache_key.offset + pos)); + + size_t n = in->available(); + chassert(n); + if (in->position() != piece_start) + memmove(piece_start, in->position(), n); + in->position() += n; + pos += n; + } + + chunk->markPrefixPopulated(chunk_size); + } + } + + nextimpl_working_buffer_offset = file_offset_of_buffer_end - cache_key.offset; + working_buffer = Buffer( + chunk->getChunk()->data, + chunk->getChunk()->data + std::min(chunk->getChunk()->size, read_until_position - cache_key.offset)); + pos = working_buffer.begin() + nextimpl_working_buffer_offset; + + if (!internal_buffer.empty()) + { + /// We were given an external buffer to read into. Copy the data into it. + /// Would be nice to avoid this copy, somehow, maybe by making ReadBufferFromRemoteFSGather + /// and AsynchronousBoundedReadBuffer explicitly aware of the page cache. + size_t n = std::min(available(), internal_buffer.size()); + memcpy(internal_buffer.begin(), pos, n); + working_buffer = Buffer(internal_buffer.begin(), internal_buffer.begin() + n); + pos = working_buffer.begin(); + nextimpl_working_buffer_offset = 0; + } + + file_offset_of_buffer_end += available(); + + return true; +} + +} diff --git a/src/IO/CachedInMemoryReadBufferFromFile.h b/src/IO/CachedInMemoryReadBufferFromFile.h new file mode 100644 index 00000000000..300c2e82386 --- /dev/null +++ b/src/IO/CachedInMemoryReadBufferFromFile.h @@ -0,0 +1,41 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class CachedInMemoryReadBufferFromFile : public ReadBufferFromFileBase +{ +public: + /// `in_` must support using external buffer. I.e. we assign its internal_buffer before each next() + /// call and expect the read data to be put into that buffer. + /// `in_` should be seekable and should be able to read the whole file from 0 to in_->getFileSize(); + /// if you set `in_`'s read-until-position bypassing CachedInMemoryReadBufferFromFile then + /// CachedInMemoryReadBufferFromFile will break. + CachedInMemoryReadBufferFromFile(FileChunkAddress cache_key_, PageCachePtr cache_, std::unique_ptr in_, const ReadSettings & settings_); + + String getFileName() const override; + off_t seek(off_t off, int whence) override; + off_t getPosition() override; + size_t getFileOffsetOfBufferEnd() const override; + bool supportsRightBoundedReads() const override { return true; } + void setReadUntilPosition(size_t position) override; + void setReadUntilEnd() override; + +private: + FileChunkAddress cache_key; // .offset is offset of `chunk` start + PageCachePtr cache; + ReadSettings settings; + std::unique_ptr in; + + size_t file_offset_of_buffer_end = 0; + size_t read_until_position; + + std::optional chunk; + + bool nextImpl() override; +}; + +} diff --git a/src/IO/ConnectionTimeouts.cpp b/src/IO/ConnectionTimeouts.cpp index ecc0d64580b..c4b636103fe 100644 --- a/src/IO/ConnectionTimeouts.cpp +++ b/src/IO/ConnectionTimeouts.cpp @@ -20,7 +20,7 @@ ConnectionTimeouts ConnectionTimeouts::getTCPTimeoutsWithoutFailover(const Setti .withConnectionTimeout(settings.connect_timeout) .withSendTimeout(settings.send_timeout) .withReceiveTimeout(settings.receive_timeout) - .withTcpKeepAliveTimeout(settings.tcp_keep_alive_timeout) + .withTCPKeepAliveTimeout(settings.tcp_keep_alive_timeout) .withHandshakeTimeout(settings.handshake_timeout_ms) .withHedgedConnectionTimeout(settings.hedged_connection_timeout_ms) .withReceiveDataTimeout(settings.receive_data_timeout_ms); @@ -40,8 +40,8 @@ ConnectionTimeouts ConnectionTimeouts::getHTTPTimeouts(const Settings & settings .withConnectionTimeout(settings.http_connection_timeout) .withSendTimeout(settings.http_send_timeout) .withReceiveTimeout(settings.http_receive_timeout) - .withHttpKeepAliveTimeout(http_keep_alive_timeout) - .withTcpKeepAliveTimeout(settings.tcp_keep_alive_timeout) + .withHTTPKeepAliveTimeout(http_keep_alive_timeout) + .withTCPKeepAliveTimeout(settings.tcp_keep_alive_timeout) .withHandshakeTimeout(settings.handshake_timeout_ms); } @@ -141,4 +141,19 @@ ConnectionTimeouts ConnectionTimeouts::getAdaptiveTimeouts(const String & method .withReceiveTimeout(saturate(recv, receive_timeout)); } +void setTimeouts(Poco::Net::HTTPClientSession & session, const ConnectionTimeouts & timeouts) +{ + session.setTimeout(timeouts.connection_timeout, timeouts.send_timeout, timeouts.receive_timeout); + session.setKeepAliveTimeout(timeouts.http_keep_alive_timeout); +} + +ConnectionTimeouts getTimeouts(const Poco::Net::HTTPClientSession & session) +{ + return ConnectionTimeouts() + .withConnectionTimeout(session.getConnectionTimeout()) + .withSendTimeout(session.getSendTimeout()) + .withReceiveTimeout(session.getReceiveTimeout()) + .withHTTPKeepAliveTimeout(session.getKeepAliveTimeout()); +} + } diff --git a/src/IO/ConnectionTimeouts.h b/src/IO/ConnectionTimeouts.h index 6967af08204..2b2ab0e7ab8 100644 --- a/src/IO/ConnectionTimeouts.h +++ b/src/IO/ConnectionTimeouts.h @@ -4,6 +4,7 @@ #include #include +#include #include namespace DB @@ -16,8 +17,8 @@ struct Settings; M(secure_connection_timeout, withSecureConnectionTimeout) \ M(send_timeout, withSendTimeout) \ M(receive_timeout, withReceiveTimeout) \ - M(tcp_keep_alive_timeout, withTcpKeepAliveTimeout) \ - M(http_keep_alive_timeout, withHttpKeepAliveTimeout) \ + M(tcp_keep_alive_timeout, withTCPKeepAliveTimeout) \ + M(http_keep_alive_timeout, withHTTPKeepAliveTimeout) \ M(hedged_connection_timeout, withHedgedConnectionTimeout) \ M(receive_data_timeout, withReceiveDataTimeout) \ M(handshake_timeout, withHandshakeTimeout) \ @@ -111,4 +112,7 @@ inline ConnectionTimeouts & ConnectionTimeouts::withConnectionTimeout(Poco::Time return *this; } +void setTimeouts(Poco::Net::HTTPClientSession & session, const ConnectionTimeouts & timeouts); +ConnectionTimeouts getTimeouts(const Poco::Net::HTTPClientSession & session); + } diff --git a/src/IO/DoubleConverter.h b/src/IO/DoubleConverter.h index 18cbe4e3a1d..45721da5248 100644 --- a/src/IO/DoubleConverter.h +++ b/src/IO/DoubleConverter.h @@ -1,17 +1,13 @@ #pragma once -#ifdef __clang__ #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wdouble-promotion" -#endif #include #include #include -#ifdef __clang__ #pragma clang diagnostic pop -#endif namespace DB diff --git a/src/IO/HTTPCommon.cpp b/src/IO/HTTPCommon.cpp index c4468a1b896..09f7724d613 100644 --- a/src/IO/HTTPCommon.cpp +++ b/src/IO/HTTPCommon.cpp @@ -2,13 +2,7 @@ #include #include -#include -#include #include -#include -#include -#include -#include #include "config.h" @@ -25,338 +19,18 @@ #include -#include -#include +#include #include #include -namespace ProfileEvents -{ - extern const Event CreatedHTTPConnections; -} - namespace DB { + namespace ErrorCodes { extern const int RECEIVED_ERROR_FROM_REMOTE_IO_SERVER; extern const int RECEIVED_ERROR_TOO_MANY_REQUESTS; - extern const int FEATURE_IS_NOT_ENABLED_AT_BUILD_TIME; - extern const int UNSUPPORTED_URI_SCHEME; - extern const int LOGICAL_ERROR; -} - - -namespace -{ - Poco::Net::HTTPClientSession::ProxyConfig proxyConfigurationToPocoProxyConfig(const ProxyConfiguration & proxy_configuration) - { - Poco::Net::HTTPClientSession::ProxyConfig poco_proxy_config; - - poco_proxy_config.host = proxy_configuration.host; - poco_proxy_config.port = proxy_configuration.port; - poco_proxy_config.protocol = ProxyConfiguration::protocolToString(proxy_configuration.protocol); - poco_proxy_config.tunnel = proxy_configuration.tunneling; - poco_proxy_config.originalRequestProtocol = ProxyConfiguration::protocolToString(proxy_configuration.original_request_protocol); - - return poco_proxy_config; - } - - template - requires std::derived_from - class HTTPSessionAdapter : public Session - { - static_assert(std::has_virtual_destructor_v, "The base class must have a virtual destructor"); - - public: - HTTPSessionAdapter(const std::string & host, UInt16 port) : Session(host, port), log{getLogger("HTTPSessionAdapter")} { } - ~HTTPSessionAdapter() override = default; - - protected: - void reconnect() override - { - // First of all will try to establish connection with last used addr. - if (!Session::getResolvedHost().empty()) - { - try - { - Session::reconnect(); - return; - } - catch (...) - { - Session::close(); - LOG_TRACE( - log, - "Last ip ({}) is unreachable for {}:{}. Will try another resolved address.", - Session::getResolvedHost(), - Session::getHost(), - Session::getPort()); - } - } - - const auto endpoinds = DNSResolver::instance().resolveHostAll(Session::getHost()); - - for (auto it = endpoinds.begin();;) - { - try - { - Session::setResolvedHost(it->toString()); - Session::reconnect(); - - LOG_TRACE( - log, - "Created HTTP(S) session with {}:{} ({}:{})", - Session::getHost(), - Session::getPort(), - it->toString(), - Session::getPort()); - - break; - } - catch (...) - { - Session::close(); - if (++it == endpoinds.end()) - { - Session::setResolvedHost(""); - throw; - } - LOG_TRACE( - log, - "Failed to create connection with {}:{}, Will try another resolved address. {}", - Session::getResolvedHost(), - Session::getPort(), - getCurrentExceptionMessage(false)); - } - } - } - LoggerPtr log; - }; - - bool isHTTPS(const Poco::URI & uri) - { - if (uri.getScheme() == "https") - return true; - else if (uri.getScheme() == "http") - return false; - else - throw Exception(ErrorCodes::UNSUPPORTED_URI_SCHEME, "Unsupported scheme in URI '{}'", uri.toString()); - } - - HTTPSessionPtr makeHTTPSessionImpl( - const std::string & host, - UInt16 port, - bool https, - bool keep_alive, - DB::ProxyConfiguration proxy_configuration = {}) - { - HTTPSessionPtr session; - - if (!proxy_configuration.host.empty()) - { - bool is_proxy_http_and_is_tunneling_off = DB::ProxyConfiguration::Protocol::HTTP == proxy_configuration.protocol - && !proxy_configuration.tunneling; - - // If it is an HTTPS request, proxy server is HTTP and user opted for tunneling off, we must not create an HTTPS request. - // The desired flow is: HTTP request to the proxy server, then proxy server will initiate an HTTPS request to the target server. - // There is a weak link in the security, but that's what the user opted for. - if (https && is_proxy_http_and_is_tunneling_off) - { - https = false; - } - } - - if (https) - { -#if USE_SSL - session = std::make_shared>(host, port); -#else - throw Exception(ErrorCodes::FEATURE_IS_NOT_ENABLED_AT_BUILD_TIME, "ClickHouse was built without HTTPS support"); -#endif - } - else - { - session = std::make_shared>(host, port); - } - - ProfileEvents::increment(ProfileEvents::CreatedHTTPConnections); - - /// doesn't work properly without patch - session->setKeepAlive(keep_alive); - - if (!proxy_configuration.host.empty()) - { - session->setProxyConfig(proxyConfigurationToPocoProxyConfig(proxy_configuration)); - } - - return session; - } - - class SingleEndpointHTTPSessionPool : public PoolBase - { - private: - const std::string host; - const UInt16 port; - const bool https; - ProxyConfiguration proxy_config; - - using Base = PoolBase; - - ObjectPtr allocObject() override - { - /// Pool is global, we shouldn't attribute this memory to query/user. - MemoryTrackerSwitcher switcher{&total_memory_tracker}; - - auto session = makeHTTPSessionImpl(host, port, https, true, proxy_config); - return session; - } - - public: - SingleEndpointHTTPSessionPool( - const std::string & host_, - UInt16 port_, - bool https_, - ProxyConfiguration proxy_config_, - size_t max_pool_size_, - bool wait_on_pool_size_limit) - : Base( - static_cast(max_pool_size_), - getLogger("HTTPSessionPool"), - wait_on_pool_size_limit ? BehaviourOnLimit::Wait : BehaviourOnLimit::AllocateNewBypassingPool) - , host(host_) - , port(port_) - , https(https_) - , proxy_config(proxy_config_) - { - } - }; - - class HTTPSessionPool : private boost::noncopyable - { - public: - struct Key - { - String target_host; - UInt16 target_port; - bool is_target_https; - ProxyConfiguration proxy_config; - bool wait_on_pool_size_limit; - - bool operator ==(const Key & rhs) const - { - return std::tie( - target_host, - target_port, - is_target_https, - proxy_config.host, - proxy_config.port, - proxy_config.protocol, - proxy_config.tunneling, - proxy_config.original_request_protocol, - wait_on_pool_size_limit) - == std::tie( - rhs.target_host, - rhs.target_port, - rhs.is_target_https, - rhs.proxy_config.host, - rhs.proxy_config.port, - rhs.proxy_config.protocol, - rhs.proxy_config.tunneling, - rhs.proxy_config.original_request_protocol, - rhs.wait_on_pool_size_limit); - } - }; - - private: - using PoolPtr = std::shared_ptr; - using Entry = SingleEndpointHTTPSessionPool::Entry; - - struct Hasher - { - size_t operator()(const Key & k) const - { - SipHash s; - s.update(k.target_host); - s.update(k.target_port); - s.update(k.is_target_https); - s.update(k.proxy_config.host); - s.update(k.proxy_config.port); - s.update(k.proxy_config.protocol); - s.update(k.proxy_config.tunneling); - s.update(k.proxy_config.original_request_protocol); - s.update(k.wait_on_pool_size_limit); - return s.get64(); - } - }; - - std::mutex mutex; - std::unordered_map endpoints_pool; - - protected: - HTTPSessionPool() = default; - - public: - static auto & instance() - { - static HTTPSessionPool instance; - return instance; - } - - Entry getSession( - const Poco::URI & uri, - const ProxyConfiguration & proxy_config, - const ConnectionTimeouts & timeouts, - size_t max_connections_per_endpoint, - bool wait_on_pool_size_limit) - { - std::unique_lock lock(mutex); - const std::string & host = uri.getHost(); - UInt16 port = uri.getPort(); - bool https = isHTTPS(uri); - - HTTPSessionPool::Key key{host, port, https, proxy_config, wait_on_pool_size_limit}; - auto pool_ptr = endpoints_pool.find(key); - if (pool_ptr == endpoints_pool.end()) - std::tie(pool_ptr, std::ignore) = endpoints_pool.emplace( - key, - std::make_shared( - host, - port, - https, - proxy_config, - max_connections_per_endpoint, - wait_on_pool_size_limit)); - - /// Some routines held session objects until the end of its lifetime. Also this routines may create another sessions in this time frame. - /// If some other session holds `lock` because it waits on another lock inside `pool_ptr->second->get` it isn't possible to create any - /// new session and thus finish routine, return session to the pool and unlock the thread waiting inside `pool_ptr->second->get`. - /// To avoid such a deadlock we unlock `lock` before entering `pool_ptr->second->get`. - lock.unlock(); - - auto retry_timeout = timeouts.connection_timeout.totalMilliseconds(); - auto session = pool_ptr->second->get(retry_timeout); - - const auto & session_data = session->sessionData(); - if (session_data.empty() || !Poco::AnyCast(&session_data)) - { - /// Reset session if it is not reusable. See comment for HTTPSessionReuseTag. - session->reset(); - } - session->attachSessionData({}); - - setTimeouts(*session, timeouts); - - return session; - } - }; -} - -void setTimeouts(Poco::Net::HTTPClientSession & session, const ConnectionTimeouts & timeouts) -{ - session.setTimeout(timeouts.connection_timeout, timeouts.send_timeout, timeouts.receive_timeout); - session.setKeepAliveTimeout(timeouts.http_keep_alive_timeout); } void setResponseDefaultHeaders(HTTPServerResponse & response, size_t keep_alive_timeout) @@ -370,28 +44,13 @@ void setResponseDefaultHeaders(HTTPServerResponse & response, size_t keep_alive_ } HTTPSessionPtr makeHTTPSession( + HTTPConnectionGroupType group, const Poco::URI & uri, const ConnectionTimeouts & timeouts, - ProxyConfiguration proxy_configuration -) + ProxyConfiguration proxy_configuration) { - const std::string & host = uri.getHost(); - UInt16 port = uri.getPort(); - bool https = isHTTPS(uri); - - auto session = makeHTTPSessionImpl(host, port, https, false, proxy_configuration); - setTimeouts(*session, timeouts); - return session; -} - -PooledHTTPSessionPtr makePooledHTTPSession( - const Poco::URI & uri, - const ConnectionTimeouts & timeouts, - size_t per_endpoint_pool_size, - bool wait_on_pool_size_limit, - ProxyConfiguration proxy_config) -{ - return HTTPSessionPool::instance().getSession(uri, proxy_config, timeouts, per_endpoint_pool_size, wait_on_pool_size_limit); + auto connection_pool = HTTPConnectionPools::instance().getPool(group, uri, proxy_configuration); + return connection_pool->getConnection(timeouts); } bool isRedirect(const Poco::Net::HTTPResponse::HTTPStatus status) { return status == Poco::Net::HTTPResponse::HTTP_MOVED_PERMANENTLY || status == Poco::Net::HTTPResponse::HTTP_FOUND || status == Poco::Net::HTTPResponse::HTTP_SEE_OTHER || status == Poco::Net::HTTPResponse::HTTP_TEMPORARY_REDIRECT; } @@ -400,11 +59,11 @@ std::istream * receiveResponse( Poco::Net::HTTPClientSession & session, const Poco::Net::HTTPRequest & request, Poco::Net::HTTPResponse & response, const bool allow_redirects) { auto & istr = session.receiveResponse(response); - assertResponseIsOk(request, response, istr, allow_redirects); + assertResponseIsOk(request.getURI(), response, istr, allow_redirects); return &istr; } -void assertResponseIsOk(const Poco::Net::HTTPRequest & request, Poco::Net::HTTPResponse & response, std::istream & istr, const bool allow_redirects) +void assertResponseIsOk(const String & uri, Poco::Net::HTTPResponse & response, std::istream & istr, const bool allow_redirects) { auto status = response.getStatus(); @@ -422,7 +81,7 @@ void assertResponseIsOk(const Poco::Net::HTTPRequest & request, Poco::Net::HTTPR body.exceptions(std::ios::failbit); body << istr.rdbuf(); - throw HTTPException(code, request.getURI(), status, response.getReason(), body.str()); + throw HTTPException(code, uri, status, response.getReason(), body.str()); } } @@ -440,24 +99,4 @@ Exception HTTPException::makeExceptionMessage( uri, static_cast(http_status), reason, body); } -void markSessionForReuse(Poco::Net::HTTPSession & session) -{ - const auto & session_data = session.sessionData(); - if (!session_data.empty() && !Poco::AnyCast(&session_data)) - throw Exception( - ErrorCodes::LOGICAL_ERROR, "Data of an unexpected type ({}) is attached to the session", session_data.type().name()); - - session.attachSessionData(HTTPSessionReuseTag{}); -} - -void markSessionForReuse(HTTPSessionPtr session) -{ - markSessionForReuse(*session); -} - -void markSessionForReuse(PooledHTTPSessionPtr session) -{ - markSessionForReuse(static_cast(*session)); -} - } diff --git a/src/IO/HTTPCommon.h b/src/IO/HTTPCommon.h index c9968fc6915..e27269e2559 100644 --- a/src/IO/HTTPCommon.h +++ b/src/IO/HTTPCommon.h @@ -7,9 +7,9 @@ #include #include #include -#include -#include #include +#include +#include #include @@ -36,7 +36,7 @@ public: HTTPException * clone() const override { return new HTTPException(*this); } void rethrow() const override { throw *this; } - int getHTTPStatus() const { return http_status; } + Poco::Net::HTTPResponse::HTTPStatus getHTTPStatus() const { return http_status; } private: Poco::Net::HTTPResponse::HTTPStatus http_status{}; @@ -52,55 +52,18 @@ private: const char * className() const noexcept override { return "DB::HTTPException"; } }; -using PooledHTTPSessionPtr = PoolBase::Entry; // SingleEndpointHTTPSessionPool::Entry using HTTPSessionPtr = std::shared_ptr; -/// If a session have this tag attached, it will be reused without calling `reset()` on it. -/// All pooled sessions don't have this tag attached after being taken from a pool. -/// If the request and the response were fully written/read, the client code should add this tag -/// explicitly by calling `markSessionForReuse()`. -/// -/// Note that HTTP response may contain extra bytes after the last byte of the payload. Specifically, -/// when chunked encoding is used, there's an empty chunk at the end. Those extra bytes must also be -/// read before the session can be reused. So we usually put an `istr->ignore(INT64_MAX)` call -/// before `markSessionForReuse()`. -struct HTTPSessionReuseTag -{ -}; - -void markSessionForReuse(Poco::Net::HTTPSession & session); -void markSessionForReuse(HTTPSessionPtr session); -void markSessionForReuse(PooledHTTPSessionPtr session); - - void setResponseDefaultHeaders(HTTPServerResponse & response, size_t keep_alive_timeout); /// Create session object to perform requests and set required parameters. HTTPSessionPtr makeHTTPSession( + HTTPConnectionGroupType group, const Poco::URI & uri, const ConnectionTimeouts & timeouts, ProxyConfiguration proxy_config = {} ); -/// As previous method creates session, but takes it from pool, without and with proxy uri. -/// -/// The max_connections_per_endpoint parameter makes it look like the pool size can be different for -/// different requests (whatever that means), but actually we just assign the endpoint's connection -/// pool size when we see the endpoint for the first time, then we never change it. -/// We should probably change how this configuration works, and how this pooling works in general: -/// * Make the per_endpoint_pool_size be a global server setting instead of per-disk or per-query. -/// * Have boolean per-disk/per-query settings for enabling/disabling pooling. -/// * Add a limit on the number of endpoints and the total number of sessions across all endpoints. -/// * Enable pooling by default everywhere. In particular StorageURL and StorageS3. -/// (Enabling it for StorageURL is scary without the previous item - the user may query lots of -/// different endpoints. So currently pooling is mainly used for S3.) -PooledHTTPSessionPtr makePooledHTTPSession( - const Poco::URI & uri, - const ConnectionTimeouts & timeouts, - size_t per_endpoint_pool_size, - bool wait_on_pool_size_limit = true, - ProxyConfiguration proxy_config = {}); - bool isRedirect(Poco::Net::HTTPResponse::HTTPStatus status); /** Used to receive response (response headers and possibly body) @@ -112,7 +75,6 @@ std::istream * receiveResponse( Poco::Net::HTTPClientSession & session, const Poco::Net::HTTPRequest & request, Poco::Net::HTTPResponse & response, bool allow_redirects); void assertResponseIsOk( - const Poco::Net::HTTPRequest & request, Poco::Net::HTTPResponse & response, std::istream & istr, bool allow_redirects = false); + const String & uri, Poco::Net::HTTPResponse & response, std::istream & istr, bool allow_redirects = false); -void setTimeouts(Poco::Net::HTTPClientSession & session, const ConnectionTimeouts & timeouts); } diff --git a/src/IO/LimitReadBuffer.cpp b/src/IO/LimitReadBuffer.cpp index e14112f8d19..84c7ac86227 100644 --- a/src/IO/LimitReadBuffer.cpp +++ b/src/IO/LimitReadBuffer.cpp @@ -1,5 +1,4 @@ #include - #include @@ -15,7 +14,7 @@ namespace ErrorCodes bool LimitReadBuffer::nextImpl() { - assert(position() >= in->position()); + chassert(position() >= in->position()); /// Let underlying buffer calculate read bytes in `next()` call. in->position() = position(); @@ -39,20 +38,18 @@ bool LimitReadBuffer::nextImpl() if (exact_limit && bytes != *exact_limit) throw Exception(ErrorCodes::CANNOT_READ_ALL_DATA, "Unexpected EOF, got {} of {} bytes", bytes, *exact_limit); /// Clearing the buffer with existing data. - set(in->position(), 0); + BufferBase::set(in->position(), 0, 0); + return false; } - working_buffer = in->buffer(); - - if (limit - bytes < working_buffer.size()) - working_buffer.resize(limit - bytes); + BufferBase::set(in->position(), std::min(in->available(), limit - bytes), 0); return true; } -LimitReadBuffer::LimitReadBuffer(ReadBuffer * in_, bool owns, UInt64 limit_, bool throw_exception_, +LimitReadBuffer::LimitReadBuffer(ReadBuffer * in_, bool owns, size_t limit_, bool throw_exception_, std::optional exact_limit_, std::string exception_message_) : ReadBuffer(in_ ? in_->position() : nullptr, 0) , in(in_) @@ -62,24 +59,20 @@ LimitReadBuffer::LimitReadBuffer(ReadBuffer * in_, bool owns, UInt64 limit_, boo , exact_limit(exact_limit_) , exception_message(std::move(exception_message_)) { - assert(in); + chassert(in); - size_t remaining_bytes_in_buffer = in->buffer().end() - in->position(); - if (remaining_bytes_in_buffer > limit) - remaining_bytes_in_buffer = limit; - - working_buffer = Buffer(in->position(), in->position() + remaining_bytes_in_buffer); + BufferBase::set(in->position(), std::min(in->available(), limit), 0); } -LimitReadBuffer::LimitReadBuffer(ReadBuffer & in_, UInt64 limit_, bool throw_exception_, +LimitReadBuffer::LimitReadBuffer(ReadBuffer & in_, size_t limit_, bool throw_exception_, std::optional exact_limit_, std::string exception_message_) : LimitReadBuffer(&in_, false, limit_, throw_exception_, exact_limit_, exception_message_) { } -LimitReadBuffer::LimitReadBuffer(std::unique_ptr in_, UInt64 limit_, bool throw_exception_, +LimitReadBuffer::LimitReadBuffer(std::unique_ptr in_, size_t limit_, bool throw_exception_, std::optional exact_limit_, std::string exception_message_) : LimitReadBuffer(in_.release(), true, limit_, throw_exception_, exact_limit_, exception_message_) { diff --git a/src/IO/LimitReadBuffer.h b/src/IO/LimitReadBuffer.h index 15885c1d850..b869f2935fb 100644 --- a/src/IO/LimitReadBuffer.h +++ b/src/IO/LimitReadBuffer.h @@ -13,22 +13,24 @@ namespace DB class LimitReadBuffer : public ReadBuffer { public: - LimitReadBuffer(ReadBuffer & in_, UInt64 limit_, bool throw_exception_, + LimitReadBuffer(ReadBuffer & in_, size_t limit_, bool throw_exception_, std::optional exact_limit_, std::string exception_message_ = {}); - LimitReadBuffer(std::unique_ptr in_, UInt64 limit_, bool throw_exception_, std::optional exact_limit_, + LimitReadBuffer(std::unique_ptr in_, size_t limit_, bool throw_exception_, std::optional exact_limit_, std::string exception_message_ = {}); ~LimitReadBuffer() override; private: ReadBuffer * in; - bool owns_in; + const bool owns_in; - UInt64 limit; - bool throw_exception; - std::optional exact_limit; - std::string exception_message; + const size_t limit; + const bool throw_exception; + const std::optional exact_limit; + const std::string exception_message; - LimitReadBuffer(ReadBuffer * in_, bool owns, UInt64 limit_, bool throw_exception_, std::optional exact_limit_, std::string exception_message_); + LoggerPtr log; + + LimitReadBuffer(ReadBuffer * in_, bool owns, size_t limit_, bool throw_exception_, std::optional exact_limit_, std::string exception_message_); bool nextImpl() override; }; diff --git a/src/IO/MMapReadBufferFromFileDescriptor.cpp b/src/IO/MMapReadBufferFromFileDescriptor.cpp index 9b1c132cc01..f27828f71b2 100644 --- a/src/IO/MMapReadBufferFromFileDescriptor.cpp +++ b/src/IO/MMapReadBufferFromFileDescriptor.cpp @@ -92,7 +92,7 @@ size_t MMapReadBufferFromFileDescriptor::getFileSize() return getSizeFromFileDescriptor(getFD(), getFileName()); } -size_t MMapReadBufferFromFileDescriptor::readBigAt(char * to, size_t n, size_t offset, const std::function &) +size_t MMapReadBufferFromFileDescriptor::readBigAt(char * to, size_t n, size_t offset, const std::function &) const { if (offset >= mapped.getLength()) return 0; diff --git a/src/IO/MMapReadBufferFromFileDescriptor.h b/src/IO/MMapReadBufferFromFileDescriptor.h index 2a039e04971..f774538374a 100644 --- a/src/IO/MMapReadBufferFromFileDescriptor.h +++ b/src/IO/MMapReadBufferFromFileDescriptor.h @@ -40,7 +40,7 @@ public: size_t getFileSize() override; - size_t readBigAt(char * to, size_t n, size_t offset, const std::function &) override; + size_t readBigAt(char * to, size_t n, size_t offset, const std::function &) const override; bool supportsReadAt() override { return true; } }; diff --git a/src/IO/ParallelReadBuffer.h b/src/IO/ParallelReadBuffer.h index e76b40f77b7..daac1190399 100644 --- a/src/IO/ParallelReadBuffer.h +++ b/src/IO/ParallelReadBuffer.h @@ -3,7 +3,7 @@ #include #include #include -#include +#include #include namespace DB diff --git a/src/IO/Progress.h b/src/IO/Progress.h index 0188f636f42..d0afc9d845f 100644 --- a/src/IO/Progress.h +++ b/src/IO/Progress.h @@ -1,7 +1,6 @@ #pragma once #include -#include #include #include diff --git a/src/IO/ReadBuffer.h b/src/IO/ReadBuffer.h index b45bc8f3dbc..056e25a5fbe 100644 --- a/src/IO/ReadBuffer.h +++ b/src/IO/ReadBuffer.h @@ -63,21 +63,23 @@ public: */ bool next() { - assert(!hasPendingData()); - assert(position() <= working_buffer.end()); + chassert(!hasPendingData()); + chassert(position() <= working_buffer.end()); bytes += offset(); bool res = nextImpl(); if (!res) + { working_buffer = Buffer(pos, pos); + } else { - pos = working_buffer.begin() + nextimpl_working_buffer_offset; - assert(position() != working_buffer.end()); + pos = working_buffer.begin() + std::min(nextimpl_working_buffer_offset, working_buffer.size()); + chassert(position() < working_buffer.end()); } nextimpl_working_buffer_offset = 0; - assert(position() <= working_buffer.end()); + chassert(position() <= working_buffer.end()); return res; } @@ -225,11 +227,22 @@ public: * - seek() to a position above the until position (even if you setReadUntilPosition() to a * higher value right after the seek!), * - * Typical implementations discard any current buffers and connections, even if the position is - * adjusted only a little. + * Implementations are recommended to: + * - Allow the read-until-position to go below current position, e.g.: + * // Read block [300, 400) + * setReadUntilPosition(400); + * seek(300); + * next(); + * // Read block [100, 200) + * setReadUntilPosition(200); // oh oh, this is below the current position, but should be allowed + * seek(100); // but now everything's fine again + * next(); + * // (Swapping the order of seek and setReadUntilPosition doesn't help: then it breaks if the order of blocks is reversed.) + * - Check if new read-until-position value is equal to the current value and do nothing in this case, + * so that the caller doesn't have to. * - * Typical usage is to call it right after creating the ReadBuffer, before it started doing any - * work. + * Typical implementations discard any current buffers and connections when the + * read-until-position changes even by a small (nonzero) amount. */ virtual void setReadUntilPosition(size_t /* position */) {} diff --git a/src/IO/ReadBufferFromFileDescriptor.cpp b/src/IO/ReadBufferFromFileDescriptor.cpp index 3211f8eeb35..57442a15853 100644 --- a/src/IO/ReadBufferFromFileDescriptor.cpp +++ b/src/IO/ReadBufferFromFileDescriptor.cpp @@ -49,7 +49,7 @@ std::string ReadBufferFromFileDescriptor::getFileName() const } -size_t ReadBufferFromFileDescriptor::readImpl(char * to, size_t min_bytes, size_t max_bytes, size_t offset) +size_t ReadBufferFromFileDescriptor::readImpl(char * to, size_t min_bytes, size_t max_bytes, size_t offset) const { chassert(min_bytes <= max_bytes); @@ -265,7 +265,7 @@ bool ReadBufferFromFileDescriptor::checkIfActuallySeekable() return res == 0 && S_ISREG(stat.st_mode); } -size_t ReadBufferFromFileDescriptor::readBigAt(char * to, size_t n, size_t offset, const std::function &) +size_t ReadBufferFromFileDescriptor::readBigAt(char * to, size_t n, size_t offset, const std::function &) const { chassert(use_pread); return readImpl(to, n, n, offset); diff --git a/src/IO/ReadBufferFromFileDescriptor.h b/src/IO/ReadBufferFromFileDescriptor.h index 4762998c67b..db256ef91c7 100644 --- a/src/IO/ReadBufferFromFileDescriptor.h +++ b/src/IO/ReadBufferFromFileDescriptor.h @@ -34,7 +34,7 @@ protected: /// Doesn't seek (`offset` must match fd's position if !use_pread). /// Stops after min_bytes or eof. Returns 0 if eof. /// Thread safe. - size_t readImpl(char * to, size_t min_bytes, size_t max_bytes, size_t offset); + size_t readImpl(char * to, size_t min_bytes, size_t max_bytes, size_t offset) const; public: explicit ReadBufferFromFileDescriptor( @@ -73,7 +73,7 @@ public: bool checkIfActuallySeekable() override; - size_t readBigAt(char * to, size_t n, size_t offset, const std::function &) override; + size_t readBigAt(char * to, size_t n, size_t offset, const std::function &) const override; bool supportsReadAt() override { return use_pread; } }; diff --git a/src/IO/ReadBufferFromIStream.cpp b/src/IO/ReadBufferFromIStream.cpp index 52546f1703d..bc90ec7ed15 100644 --- a/src/IO/ReadBufferFromIStream.cpp +++ b/src/IO/ReadBufferFromIStream.cpp @@ -5,52 +5,44 @@ namespace DB { -namespace ErrorCodes -{ - extern const int CANNOT_READ_FROM_ISTREAM; -} - bool ReadBufferFromIStream::nextImpl() { if (eof) return false; + chassert(internal_buffer.begin() != nullptr); + chassert(!internal_buffer.empty()); + size_t bytes_read = 0; char * read_to = internal_buffer.begin(); /// It is necessary to read in a loop, since socket usually returns only data available at the moment. while (bytes_read < internal_buffer.size()) { - try + const auto bytes_read_last_time = stream_buf.readFromDevice(read_to, internal_buffer.size() - bytes_read); + if (bytes_read_last_time <= 0) { - const auto bytes_read_last_time = stream_buf.readFromDevice(read_to, internal_buffer.size() - bytes_read); - if (bytes_read_last_time <= 0) - { - eof = true; - break; - } + eof = true; + break; + } - bytes_read += bytes_read_last_time; - read_to += bytes_read_last_time; - } - catch (...) - { - throw Exception( - ErrorCodes::CANNOT_READ_FROM_ISTREAM, - "Cannot read from istream at offset {}: {}", - count(), - getCurrentExceptionMessage(/*with_stacktrace=*/true)); - } + bytes_read += bytes_read_last_time; + read_to += bytes_read_last_time; } if (bytes_read) + { + working_buffer = internal_buffer; working_buffer.resize(bytes_read); + } return bytes_read; } ReadBufferFromIStream::ReadBufferFromIStream(std::istream & istr_, size_t size) - : BufferWithOwnMemory(size), istr(istr_), stream_buf(dynamic_cast(*istr.rdbuf())) + : BufferWithOwnMemory(size) + , istr(istr_) + , stream_buf(dynamic_cast(*istr.rdbuf())) { } diff --git a/src/IO/ReadBufferFromS3.cpp b/src/IO/ReadBufferFromS3.cpp index 4529771e7b2..491ff253066 100644 --- a/src/IO/ReadBufferFromS3.cpp +++ b/src/IO/ReadBufferFromS3.cpp @@ -34,61 +34,6 @@ namespace ProfileEvents extern const Event RemoteReadThrottlerSleepMicroseconds; } -namespace -{ -DB::PooledHTTPSessionPtr getSession(Aws::S3::Model::GetObjectResult & read_result) -{ - if (auto * session_aware_stream = dynamic_cast *>(&read_result.GetBody())) - return static_cast(session_aware_stream->getSession()); - - if (dynamic_cast *>(&read_result.GetBody())) - return {}; - - /// accept result from S# mock in gtest_writebuffer_s3.cpp - if (dynamic_cast(&read_result.GetBody())) - return {}; - - throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Session of unexpected type encountered"); -} - -void resetSession(Aws::S3::Model::GetObjectResult & read_result) -{ - if (auto session = getSession(read_result); !session.isNull()) - { - auto & http_session = static_cast(*session); - http_session.reset(); - } -} - -void resetSessionIfNeeded(bool read_all_range_successfully, std::optional & read_result) -{ - if (!read_result) - return; - - if (!read_all_range_successfully) - { - /// When we abandon a session with an ongoing GetObject request and there is another one trying to delete the same object this delete - /// operation will hang until GetObject's session idle timeouts. So we have to call `reset()` on GetObject's session session immediately. - resetSession(*read_result); - ProfileEvents::increment(ProfileEvents::ReadBufferFromS3ResetSessions); - } - else if (auto session = getSession(*read_result); !session.isNull()) - { - if (!session->getProxyHost().empty()) - { - /// Reset proxified sessions because proxy can change for every request. See ProxyConfigurationResolver. - resetSession(*read_result); - ProfileEvents::increment(ProfileEvents::ReadBufferFromS3ResetSessions); - } - else - { - DB::markSessionForReuse(session); - ProfileEvents::increment(ProfileEvents::ReadBufferFromS3PreservedSessions); - } - } -} -} - namespace DB { namespace ErrorCodes @@ -228,7 +173,7 @@ bool ReadBufferFromS3::nextImpl() } -size_t ReadBufferFromS3::readBigAt(char * to, size_t n, size_t range_begin, const std::function & progress_callback) +size_t ReadBufferFromS3::readBigAt(char * to, size_t n, size_t range_begin, const std::function & progress_callback) const { size_t initial_n = n; size_t sleep_time_with_backoff_milliseconds = 100; @@ -240,29 +185,6 @@ size_t ReadBufferFromS3::readBigAt(char * to, size_t n, size_t range_begin, cons ProfileEventTimeIncrement watch(ProfileEvents::ReadBufferFromS3Microseconds); std::optional result; - /// Connection is reusable if we've read the full response. - bool session_is_reusable = false; - SCOPE_EXIT( - { - if (!result.has_value()) - return; - if (session_is_reusable) - { - auto session = getSession(*result); - if (!session.isNull()) - { - DB::markSessionForReuse(session); - ProfileEvents::increment(ProfileEvents::ReadBufferFromS3PreservedSessions); - } - else - session_is_reusable = false; - } - if (!session_is_reusable) - { - resetSession(*result); - ProfileEvents::increment(ProfileEvents::ReadBufferFromS3ResetSessions); - } - }); try { @@ -276,9 +198,8 @@ size_t ReadBufferFromS3::readBigAt(char * to, size_t n, size_t range_begin, cons if (read_settings.remote_throttler) read_settings.remote_throttler->add(bytes_copied, ProfileEvents::RemoteReadThrottlerBytes, ProfileEvents::RemoteReadThrottlerSleepMicroseconds); - /// Read remaining bytes after the end of the payload, see HTTPSessionReuseTag. + /// Read remaining bytes after the end of the payload istr.ignore(INT64_MAX); - session_is_reusable = true; } catch (Poco::Exception & e) { @@ -451,21 +372,8 @@ bool ReadBufferFromS3::atEndOfRequestedRangeGuess() return false; } -ReadBufferFromS3::~ReadBufferFromS3() -{ - try - { - resetSessionIfNeeded(readAllRangeSuccessfully(), read_result); - } - catch (...) - { - tryLogCurrentException(log); - } -} - std::unique_ptr ReadBufferFromS3::initialize(size_t attempt) { - resetSessionIfNeeded(readAllRangeSuccessfully(), read_result); read_all_range_successfully = false; /** @@ -534,10 +442,6 @@ Aws::S3::Model::GetObjectResult ReadBufferFromS3::sendRequest(size_t attempt, si } } -bool ReadBufferFromS3::readAllRangeSuccessfully() const -{ - return read_until_position ? offset == read_until_position : read_all_range_successfully; -} } #endif diff --git a/src/IO/ReadBufferFromS3.h b/src/IO/ReadBufferFromS3.h index f28c23a71d7..003c88df7d2 100644 --- a/src/IO/ReadBufferFromS3.h +++ b/src/IO/ReadBufferFromS3.h @@ -55,7 +55,7 @@ public: bool restricted_seek_ = false, std::optional file_size = std::nullopt); - ~ReadBufferFromS3() override; + ~ReadBufferFromS3() override = default; bool nextImpl() override; @@ -74,7 +74,7 @@ public: String getFileName() const override { return bucket + "/" + key; } - size_t readBigAt(char * to, size_t n, size_t range_begin, const std::function & progress_callback) override; + size_t readBigAt(char * to, size_t n, size_t range_begin, const std::function & progress_callback) const override; bool supportsReadAt() override { return true; } @@ -90,8 +90,6 @@ private: Aws::S3::Model::GetObjectResult sendRequest(size_t attempt, size_t range_begin, std::optional range_end_incl) const; - bool readAllRangeSuccessfully() const; - ReadSettings read_settings; bool use_external_buffer; diff --git a/src/IO/ReadSettings.h b/src/IO/ReadSettings.h index c397689d6ad..6a0cac35878 100644 --- a/src/IO/ReadSettings.h +++ b/src/IO/ReadSettings.h @@ -61,6 +61,7 @@ enum class RemoteFSReadMethod }; class MMappedFileCache; +class PageCache; struct ReadSettings { @@ -98,9 +99,13 @@ struct ReadSettings bool enable_filesystem_cache = true; bool read_from_filesystem_cache_if_exists_otherwise_bypass_cache = false; bool enable_filesystem_cache_log = false; - /// Don't populate cache when the read is not part of query execution (e.g. background thread). - bool avoid_readthrough_cache_outside_query_context = true; size_t filesystem_cache_segments_batch_size = 20; + size_t filesystem_cache_reserve_space_wait_lock_timeout_milliseconds = 1000; + + bool use_page_cache_for_disks_without_file_cache = false; + bool read_from_page_cache_if_exists_otherwise_bypass_cache = false; + bool page_cache_inject_eviction = false; + std::shared_ptr page_cache; size_t filesystem_cache_max_download_size = (128UL * 1024 * 1024 * 1024); bool skip_download_if_exceeds_query_cache = true; @@ -116,7 +121,7 @@ struct ReadSettings // Resource to be used during reading ResourceLink resource_link; - size_t http_max_tries = 1; + size_t http_max_tries = 10; size_t http_retry_initial_backoff_ms = 100; size_t http_retry_max_backoff_ms = 1600; bool http_skip_not_found_url_for_globs = true; diff --git a/src/IO/ReadWriteBufferFromHTTP.cpp b/src/IO/ReadWriteBufferFromHTTP.cpp index a95d42ec7f3..fdc8ef04d2e 100644 --- a/src/IO/ReadWriteBufferFromHTTP.cpp +++ b/src/IO/ReadWriteBufferFromHTTP.cpp @@ -1,13 +1,65 @@ #include "ReadWriteBufferFromHTTP.h" #include +#include +#include + namespace ProfileEvents { extern const Event ReadBufferSeekCancelConnection; - extern const Event ReadWriteBufferFromHTTPPreservedSessions; } + +namespace +{ + +bool isRetriableError(const Poco::Net::HTTPResponse::HTTPStatus http_status) noexcept +{ + static constexpr std::array non_retriable_errors{ + Poco::Net::HTTPResponse::HTTPStatus::HTTP_BAD_REQUEST, + Poco::Net::HTTPResponse::HTTPStatus::HTTP_UNAUTHORIZED, + Poco::Net::HTTPResponse::HTTPStatus::HTTP_NOT_FOUND, + Poco::Net::HTTPResponse::HTTPStatus::HTTP_FORBIDDEN, + Poco::Net::HTTPResponse::HTTPStatus::HTTP_NOT_IMPLEMENTED, + Poco::Net::HTTPResponse::HTTPStatus::HTTP_METHOD_NOT_ALLOWED}; + + return std::all_of( + non_retriable_errors.begin(), non_retriable_errors.end(), [&](const auto status) { return http_status != status; }); +} + +Poco::URI getUriAfterRedirect(const Poco::URI & prev_uri, Poco::Net::HTTPResponse & response) +{ + chassert(DB::isRedirect(response.getStatus())); + + auto location = response.get("Location"); + auto location_uri = Poco::URI(location); + if (!location_uri.isRelative()) + return location_uri; + /// Location header contains relative path. So we need to concatenate it + /// with path from the original URI and normalize it. + auto path = std::filesystem::weakly_canonical(std::filesystem::path(prev_uri.getPath()) / location); + location_uri = prev_uri; + location_uri.setPath(path); + return location_uri; +} + +class ReadBufferFromSessionResponse : public DB::ReadBufferFromIStream +{ +private: + DB::HTTPSessionPtr session; + +public: + ReadBufferFromSessionResponse(DB::HTTPSessionPtr && session_, std::istream & rstr, size_t size) + : ReadBufferFromIStream(rstr, size) + , session(std::move(session_)) + { + } +}; + +} + + namespace DB { @@ -21,94 +73,29 @@ namespace ErrorCodes extern const int UNKNOWN_FILE_SIZE; } -template -UpdatableSession::UpdatableSession(const Poco::URI & uri, UInt64 max_redirects_, std::shared_ptr session_factory_) - : max_redirects{max_redirects_} - , initial_uri(uri) - , session_factory(std::move(session_factory_)) +std::unique_ptr ReadWriteBufferFromHTTP::CallResult::transformToReadBuffer(size_t buf_size) && { - session = session_factory->buildNewSession(uri); + chassert(session); + return std::make_unique(std::move(session), *response_stream, buf_size); } -template -typename UpdatableSession::SessionPtr UpdatableSession::getSession() { return session; } - -template -void UpdatableSession::updateSession(const Poco::URI & uri) -{ - ++redirects; - if (redirects <= max_redirects) - session = session_factory->buildNewSession(uri); - else - throw Exception(ErrorCodes::TOO_MANY_REDIRECTS, - "Too many redirects while trying to access {}." - " You can {} redirects by changing the setting 'max_http_get_redirects'." - " Example: `SET max_http_get_redirects = 10`." - " Redirects are restricted to prevent possible attack when a malicious server redirects to an internal resource, bypassing the authentication or firewall.", - initial_uri.toString(), max_redirects ? "increase the allowed maximum number of" : "allow"); -} - -template -typename UpdatableSession::SessionPtr UpdatableSession::createDetachedSession(const Poco::URI & uri) -{ - return session_factory->buildNewSession(uri); -} - -template -std::shared_ptr> UpdatableSession::clone(const Poco::URI & uri) -{ - return std::make_shared>(uri, max_redirects, session_factory); -} - - -namespace detail -{ - -static bool isRetriableError(const Poco::Net::HTTPResponse::HTTPStatus http_status) noexcept -{ - static constexpr std::array non_retriable_errors{ - Poco::Net::HTTPResponse::HTTPStatus::HTTP_BAD_REQUEST, - Poco::Net::HTTPResponse::HTTPStatus::HTTP_UNAUTHORIZED, - Poco::Net::HTTPResponse::HTTPStatus::HTTP_NOT_FOUND, - Poco::Net::HTTPResponse::HTTPStatus::HTTP_FORBIDDEN, - Poco::Net::HTTPResponse::HTTPStatus::HTTP_NOT_IMPLEMENTED, - Poco::Net::HTTPResponse::HTTPStatus::HTTP_METHOD_NOT_ALLOWED}; - - return std::all_of( - non_retriable_errors.begin(), non_retriable_errors.end(), [&](const auto status) { return http_status != status; }); -} - -static Poco::URI getUriAfterRedirect(const Poco::URI & prev_uri, Poco::Net::HTTPResponse & response) -{ - auto location = response.get("Location"); - auto location_uri = Poco::URI(location); - if (!location_uri.isRelative()) - return location_uri; - /// Location header contains relative path. So we need to concatenate it - /// with path from the original URI and normalize it. - auto path = std::filesystem::weakly_canonical(std::filesystem::path(prev_uri.getPath()) / location); - location_uri = prev_uri; - location_uri.setPath(path); - return location_uri; -} - -template -bool ReadWriteBufferFromHTTPBase::withPartialContent(const HTTPRange & range) const +bool ReadWriteBufferFromHTTP::withPartialContent() const { /** * Add range header if we have some passed range * or if we want to retry GET request on purpose. */ - return range.begin || range.end || retry_with_range_header; + return read_range.begin || read_range.end || getOffset() > 0; } -template -size_t ReadWriteBufferFromHTTPBase::getOffset() const { return read_range.begin.value_or(0) + offset_from_begin_pos; } - -template -void ReadWriteBufferFromHTTPBase::prepareRequest(Poco::Net::HTTPRequest & request, Poco::URI uri_, std::optional range) const +size_t ReadWriteBufferFromHTTP::getOffset() const { - request.setHost(uri_.getHost()); // use original, not resolved host name in header + return read_range.begin.value_or(0) + offset_from_begin_pos; +} + +void ReadWriteBufferFromHTTP::prepareRequest(Poco::Net::HTTPRequest & request, std::optional range) const +{ + request.setHost(initial_uri.getHost()); // use original, not resolved host name in header if (out_stream_callback) request.setChunkedTransferEncoding(true); @@ -125,7 +112,6 @@ void ReadWriteBufferFromHTTPBase::prepareRequest(Poco::Net: range_header_value = fmt::format("bytes={}-{}", *range->begin, *range->end); else range_header_value = fmt::format("bytes={}-", *range->begin); - LOG_TEST(log, "Adding header: Range: {}", range_header_value); request.set("Range", range_header_value); } @@ -133,45 +119,7 @@ void ReadWriteBufferFromHTTPBase::prepareRequest(Poco::Net: credentials.authenticate(request); } -template -std::istream * ReadWriteBufferFromHTTPBase::callImpl( - UpdatableSessionPtr & current_session, Poco::URI uri_, Poco::Net::HTTPResponse & response, const std::string & method_, bool for_object_info) -{ - // With empty path poco will send "POST HTTP/1.1" its bug. - if (uri_.getPath().empty()) - uri_.setPath("/"); - - std::optional range; - if (!for_object_info) - { - if (withPartialContent(read_range)) - range = HTTPRange{getOffset(), read_range.end}; - } - - Poco::Net::HTTPRequest request(method_, uri_.getPathAndQuery(), Poco::Net::HTTPRequest::HTTP_1_1); - prepareRequest(request, uri_, range); - - LOG_TRACE(log, "Sending request to {}", uri_.toString()); - - auto sess = current_session->getSession(); - auto & stream_out = sess->sendRequest(request); - - if (out_stream_callback) - out_stream_callback(stream_out); - - auto result_istr = receiveResponse(*sess, request, response, true); - response.getCookies(cookies); - - /// we can fetch object info while the request is being processed - /// and we don't want to override any context used by it - if (!for_object_info) - content_encoding = response.get("Content-Encoding", ""); - - return result_istr; -} - -template -size_t ReadWriteBufferFromHTTPBase::getFileSize() +size_t ReadWriteBufferFromHTTP::getFileSize() { if (!file_info) file_info = getFileInfo(); @@ -179,243 +127,288 @@ size_t ReadWriteBufferFromHTTPBase::getFileSize() if (file_info->file_size) return *file_info->file_size; - throw Exception(ErrorCodes::UNKNOWN_FILE_SIZE, "Cannot find out file size for: {}", uri.toString()); + throw Exception(ErrorCodes::UNKNOWN_FILE_SIZE, "Cannot find out file size for: {}", initial_uri.toString()); } -template -bool ReadWriteBufferFromHTTPBase::supportsReadAt() +bool ReadWriteBufferFromHTTP::supportsReadAt() { if (!file_info) file_info = getFileInfo(); return method == Poco::Net::HTTPRequest::HTTP_GET && file_info->seekable; } -template -bool ReadWriteBufferFromHTTPBase::checkIfActuallySeekable() +bool ReadWriteBufferFromHTTP::checkIfActuallySeekable() { if (!file_info) file_info = getFileInfo(); return file_info->seekable; } -template -String ReadWriteBufferFromHTTPBase::getFileName() const { return uri.toString(); } - -template -void ReadWriteBufferFromHTTPBase::getHeadResponse(Poco::Net::HTTPResponse & response) +String ReadWriteBufferFromHTTP::getFileName() const { - for (size_t i = 0; i < settings.http_max_tries; ++i) - { - try - { - callWithRedirects(response, Poco::Net::HTTPRequest::HTTP_HEAD, true, true); - break; - } - catch (const Poco::Exception & e) - { - if (i == settings.http_max_tries - 1 || e.code() == ErrorCodes::TOO_MANY_REDIRECTS || !isRetriableError(response.getStatus())) - throw; - - LOG_ERROR(log, "Failed to make HTTP_HEAD request to {}. Error: {}", uri.toString(), e.displayText()); - } - } + return initial_uri.toString(); } -template -void ReadWriteBufferFromHTTPBase::setupExternalBuffer() +void ReadWriteBufferFromHTTP::getHeadResponse(Poco::Net::HTTPResponse & response) { - /** - * use_external_buffer -- means we read into the buffer which - * was passed to us from somewhere else. We do not check whether - * previously returned buffer was read or not (no hasPendingData() check is needed), - * because this branch means we are prefetching data, - * each nextImpl() call we can fill a different buffer. - */ - impl->set(internal_buffer.begin(), internal_buffer.size()); - assert(working_buffer.begin() != nullptr); - assert(!internal_buffer.empty()); + doWithRetries( + [&] () + { + callWithRedirects(response, Poco::Net::HTTPRequest::HTTP_HEAD, {}); + }, + /*on_retry=*/ nullptr, + /*mute_logging=*/ true); } -template -ReadWriteBufferFromHTTPBase::ReadWriteBufferFromHTTPBase( - UpdatableSessionPtr session_, - Poco::URI uri_, - const Poco::Net::HTTPBasicCredentials & credentials_, +ReadWriteBufferFromHTTP::ReadWriteBufferFromHTTP( + const HTTPConnectionGroupType & connection_group_, + const Poco::URI & uri_, const std::string & method_, - OutStreamCallback out_stream_callback_, - size_t buffer_size_, - const ReadSettings & settings_, - HTTPHeaderEntries http_header_entries_, + ProxyConfiguration proxy_config_, + ReadSettings read_settings_, + ConnectionTimeouts timeouts_, + const Poco::Net::HTTPBasicCredentials & credentials_, const RemoteHostFilter * remote_host_filter_, - bool delay_initialization, + size_t buffer_size_, + size_t max_redirects_, + OutStreamCallback out_stream_callback_, bool use_external_buffer_, bool http_skip_not_found_url_, - std::optional file_info_, - ProxyConfiguration proxy_config_) + HTTPHeaderEntries http_header_entries_, + bool delay_initialization, + std::optional file_info_) : SeekableReadBuffer(nullptr, 0) - , uri {uri_} - , method {!method_.empty() ? method_ : out_stream_callback_ ? Poco::Net::HTTPRequest::HTTP_POST : Poco::Net::HTTPRequest::HTTP_GET} - , session {session_} - , out_stream_callback {out_stream_callback_} - , credentials {credentials_} - , http_header_entries {std::move(http_header_entries_)} - , remote_host_filter {remote_host_filter_} - , buffer_size {buffer_size_} - , use_external_buffer {use_external_buffer_} - , file_info(file_info_) + , connection_group(connection_group_) + , initial_uri(uri_) + , method(!method_.empty() ? method_ : out_stream_callback_ ? Poco::Net::HTTPRequest::HTTP_POST : Poco::Net::HTTPRequest::HTTP_GET) + , proxy_config(std::move(proxy_config_)) + , read_settings(std::move(read_settings_)) + , timeouts(std::move(timeouts_)) + , credentials(credentials_) + , remote_host_filter(remote_host_filter_) + , buffer_size(buffer_size_) + , max_redirects(max_redirects_) + , use_external_buffer(use_external_buffer_) , http_skip_not_found_url(http_skip_not_found_url_) - , settings {settings_} + , out_stream_callback(std::move(out_stream_callback_)) + , redirects(0) + , http_header_entries {std::move(http_header_entries_)} + , file_info(file_info_) , log(getLogger("ReadWriteBufferFromHTTP")) - , proxy_config(proxy_config_) { - if (settings.http_max_tries <= 0 || settings.http_retry_initial_backoff_ms <= 0 - || settings.http_retry_initial_backoff_ms >= settings.http_retry_max_backoff_ms) + current_uri = initial_uri; + + if (current_uri.getPath().empty()) + current_uri.setPath("/"); + + if (read_settings.http_max_tries <= 0 || read_settings.http_retry_initial_backoff_ms <= 0 + || read_settings.http_retry_initial_backoff_ms >= read_settings.http_retry_max_backoff_ms) throw Exception( ErrorCodes::BAD_ARGUMENTS, "Invalid setting for http backoff, " "must be http_max_tries >= 1 (current is {}) and " "0 < http_retry_initial_backoff_ms < settings.http_retry_max_backoff_ms (now 0 < {} < {})", - settings.http_max_tries, - settings.http_retry_initial_backoff_ms, - settings.http_retry_max_backoff_ms); + read_settings.http_max_tries, + read_settings.http_retry_initial_backoff_ms, + read_settings.http_retry_max_backoff_ms); // Configure User-Agent if it not already set. const std::string user_agent = "User-Agent"; - auto iter = std::find_if( - http_header_entries.begin(), - http_header_entries.end(), - [&user_agent](const HTTPHeaderEntry & entry) { return entry.name == user_agent; }); + auto iter = std::find_if(http_header_entries.begin(), http_header_entries.end(), + [&user_agent] (const HTTPHeaderEntry & entry) { return entry.name == user_agent; }); if (iter == http_header_entries.end()) { - http_header_entries.emplace_back("User-Agent", fmt::format("ClickHouse/{}", VERSION_STRING)); + http_header_entries.emplace_back(user_agent, fmt::format("ClickHouse/{}", VERSION_STRING)); } + if (!delay_initialization && use_external_buffer) + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "Invalid setting for ReadWriteBufferFromHTTP" + "delay_initialization is false and use_external_buffer it true."); + if (!delay_initialization) { - initialize(); - if (exception) - std::rethrow_exception(exception); + next(); } } -template -void ReadWriteBufferFromHTTPBase::callWithRedirects(Poco::Net::HTTPResponse & response, const String & method_, bool throw_on_all_errors, bool for_object_info) +ReadWriteBufferFromHTTP::CallResult ReadWriteBufferFromHTTP::callImpl( + Poco::Net::HTTPResponse & response, const Poco::URI & uri_, const std::string & method_, const std::optional & range, bool allow_redirects) const { - UpdatableSessionPtr current_session = nullptr; + if (remote_host_filter) + remote_host_filter->checkURL(uri_); - /// we can fetch object info while the request is being processed - /// and we don't want to override any context used by it - if (for_object_info) - current_session = session->clone(uri); - else - current_session = session; + Poco::Net::HTTPRequest request(method_, uri_.getPathAndQuery(), Poco::Net::HTTPRequest::HTTP_1_1); + prepareRequest(request, range); - call(current_session, response, method_, throw_on_all_errors, for_object_info); - saved_uri_redirect = uri; + auto session = makeHTTPSession(connection_group, uri_, timeouts, proxy_config); + + auto & stream_out = session->sendRequest(request); + if (out_stream_callback) + out_stream_callback(stream_out); + + auto & resp_stream = session->receiveResponse(response); + + assertResponseIsOk(current_uri.toString(), response, resp_stream, allow_redirects); + + return ReadWriteBufferFromHTTP::CallResult(std::move(session), resp_stream); +} + +ReadWriteBufferFromHTTP::CallResult ReadWriteBufferFromHTTP::callWithRedirects( + Poco::Net::HTTPResponse & response, const String & method_, const std::optional & range) +{ + auto result = callImpl(response, current_uri, method_, range, true); while (isRedirect(response.getStatus())) { - Poco::URI uri_redirect = getUriAfterRedirect(*saved_uri_redirect, response); - saved_uri_redirect = uri_redirect; - if (remote_host_filter) - remote_host_filter->checkURL(uri_redirect); + Poco::URI uri_redirect = getUriAfterRedirect(current_uri, response); + ++redirects; + if (redirects > max_redirects) + throw Exception( + ErrorCodes::TOO_MANY_REDIRECTS, + "Too many redirects while trying to access {}." + " You can {} redirects by changing the setting 'max_http_get_redirects'." + " Example: `SET max_http_get_redirects = 10`." + " Redirects are restricted to prevent possible attack when a malicious server redirects to an internal resource, bypassing the authentication or firewall.", + initial_uri.toString(), max_redirects ? "increase the allowed maximum number of" : "allow"); - current_session->updateSession(uri_redirect); + current_uri = uri_redirect; - /// we can fetch object info while the request is being processed - /// and we don't want to override any context used by it - auto result_istr = callImpl(current_session, uri_redirect, response, method, for_object_info); - if (!for_object_info) - istr = result_istr; + result = callImpl(response, uri_redirect, method_, range, true); } + + return result; } -template -void ReadWriteBufferFromHTTPBase::call(UpdatableSessionPtr & current_session, Poco::Net::HTTPResponse & response, const String & method_, bool throw_on_all_errors, bool for_object_info) + +void ReadWriteBufferFromHTTP::doWithRetries(std::function && callable, + std::function on_retry, + bool mute_logging) const { - try + [[maybe_unused]] auto milliseconds_to_wait = read_settings.http_retry_initial_backoff_ms; + + bool is_retriable = true; + std::exception_ptr exception = nullptr; + + for (size_t attempt = 1; attempt <= read_settings.http_max_tries; ++attempt) { - /// we can fetch object info while the request is being processed - /// and we don't want to override any context used by it - auto result_istr = callImpl(current_session, saved_uri_redirect ? *saved_uri_redirect : uri, response, method_, for_object_info); - if (!for_object_info) - istr = result_istr; - } - catch (...) - { - /// we can fetch object info while the request is being processed - /// and we don't want to override any context used by it - if (for_object_info) - throw; + [[maybe_unused]] bool last_attempt = attempt + 1 > read_settings.http_max_tries; - if (throw_on_all_errors) - throw; + String error_message; - auto http_status = response.getStatus(); - - if (http_status == Poco::Net::HTTPResponse::HTTPStatus::HTTP_NOT_FOUND && http_skip_not_found_url) + try { - initialization_error = InitializeError::SKIP_NOT_FOUND_URL; + callable(); + return; } - else if (!isRetriableError(http_status)) + catch (Poco::Net::NetException & e) { - initialization_error = InitializeError::NON_RETRYABLE_ERROR; + error_message = e.displayText(); exception = std::current_exception(); } + catch (DB::NetException & e) + { + error_message = e.displayText(); + exception = std::current_exception(); + } + catch (DB::HTTPException & e) + { + if (!isRetriableError(e.getHTTPStatus())) + is_retriable = false; + + error_message = e.displayText(); + exception = std::current_exception(); + } + catch (DB::Exception & e) + { + is_retriable = false; + + error_message = e.displayText(); + exception = std::current_exception(); + } + catch (Poco::Exception & e) + { + if (e.code() == POCO_EMFILE) + is_retriable = false; + + error_message = e.displayText(); + exception = std::current_exception(); + } + + chassert(exception); + + if (last_attempt || !is_retriable) + { + if (!mute_logging) + LOG_ERROR(log, + "Failed to make request to '{}'. Error: '{}'. " + "Failed at try {}/{}.", + initial_uri.toString(), error_message, + attempt, read_settings.http_max_tries); + + std::rethrow_exception(exception); + } else { - throw; + if (on_retry) + on_retry(); + + if (!mute_logging) + LOG_INFO(log, + "Failed to make request to `{}`. Error: {}. " + "Failed at try {}/{}. " + "Will retry with current backoff wait is {}/{} ms.", + initial_uri.toString(), error_message, + attempt + 1, read_settings.http_max_tries, + milliseconds_to_wait, read_settings.http_retry_max_backoff_ms); + + sleepForMilliseconds(milliseconds_to_wait); + milliseconds_to_wait = std::min(milliseconds_to_wait * 2, read_settings.http_retry_max_backoff_ms); } } } -template -void ReadWriteBufferFromHTTPBase::initialize() + +std::unique_ptr ReadWriteBufferFromHTTP::initialize() { Poco::Net::HTTPResponse response; - call(session, response, method); - if (initialization_error != InitializeError::NONE) - return; + std::optional range; + if (withPartialContent()) + range = HTTPRange{getOffset(), read_range.end}; - while (isRedirect(response.getStatus())) - { - Poco::URI uri_redirect = getUriAfterRedirect(saved_uri_redirect.value_or(uri), response); - if (remote_host_filter) - remote_host_filter->checkURL(uri_redirect); + auto result = callWithRedirects(response, method, range); - session->updateSession(uri_redirect); - - istr = callImpl(session, uri_redirect, response, method); - saved_uri_redirect = uri_redirect; - } - - if (response.hasContentLength()) - LOG_DEBUG(log, "Received response with content length: {}", response.getContentLength()); - - if (withPartialContent(read_range) && response.getStatus() != Poco::Net::HTTPResponse::HTTPStatus::HTTP_PARTIAL_CONTENT) + if (range.has_value() && response.getStatus() != Poco::Net::HTTPResponse::HTTPStatus::HTTP_PARTIAL_CONTENT) { /// Having `200 OK` instead of `206 Partial Content` is acceptable in case we retried with range.begin == 0. if (getOffset() != 0) { - if (!exception) + /// Retry 200OK + if (response.getStatus() == Poco::Net::HTTPResponse::HTTPStatus::HTTP_OK) { - exception = std::make_exception_ptr(Exception( + String reason = fmt::format( + "Cannot read with range: [{}, {}] (response status: {}, reason: {}), will retry", + *read_range.begin, read_range.end ? toString(*read_range.end) : "-", + toString(response.getStatus()), response.getReason()); + + /// it is retriable error + throw HTTPException( + ErrorCodes::HTTP_RANGE_NOT_SATISFIABLE, + current_uri.toString(), + Poco::Net::HTTPResponse::HTTP_REQUESTED_RANGE_NOT_SATISFIABLE, + reason, + ""); + } + else + throw Exception( ErrorCodes::HTTP_RANGE_NOT_SATISFIABLE, "Cannot read with range: [{}, {}] (response status: {}, reason: {})", *read_range.begin, read_range.end ? toString(*read_range.end) : "-", - toString(response.getStatus()), response.getReason())); - } - - /// Retry 200OK - if (response.getStatus() == Poco::Net::HTTPResponse::HTTPStatus::HTTP_OK) - initialization_error = InitializeError::RETRYABLE_ERROR; - else - initialization_error = InitializeError::NON_RETRYABLE_ERROR; - - return; + toString(response.getStatus()), response.getReason()); } else if (read_range.end) { @@ -425,257 +418,141 @@ void ReadWriteBufferFromHTTPBase::initialize() } } + response.getCookies(cookies); + content_encoding = response.get("Content-Encoding", ""); + // Remember file size. It'll be used to report eof in next nextImpl() call. if (!read_range.end && response.hasContentLength()) - file_info = parseFileInfo(response, withPartialContent(read_range) ? getOffset() : 0); + file_info = parseFileInfo(response, range.has_value() ? getOffset() : 0); - impl = std::make_unique(*istr, buffer_size); - - if (use_external_buffer) - setupExternalBuffer(); + return std::move(result).transformToReadBuffer(use_external_buffer ? 0 : buffer_size); } -template -bool ReadWriteBufferFromHTTPBase::nextImpl() +bool ReadWriteBufferFromHTTP::nextImpl() { - if (initialization_error == InitializeError::SKIP_NOT_FOUND_URL) - return false; - assert(initialization_error == InitializeError::NONE); - if (next_callback) next_callback(count()); - if ((read_range.end && getOffset() > read_range.end.value()) || - (file_info && file_info->file_size && getOffset() >= file_info->file_size.value())) - { - /// Response was fully read. - markSessionForReuse(session->getSession()); - ProfileEvents::increment(ProfileEvents::ReadWriteBufferFromHTTPPreservedSessions); - return false; - } + bool next_result = false; - if (impl) - { - if (use_external_buffer) - { - setupExternalBuffer(); - } - else - { - /** - * impl was initialized before, pass position() to it to make - * sure there is no pending data which was not read. - */ - if (!working_buffer.empty()) - impl->position() = position(); - } - } - - bool result = false; - size_t milliseconds_to_wait = settings.http_retry_initial_backoff_ms; - bool last_attempt = false; - - auto on_retriable_error = [&]() - { - retry_with_range_header = true; - impl.reset(); - auto http_session = session->getSession(); - http_session->reset(); - if (!last_attempt) - { - sleepForMilliseconds(milliseconds_to_wait); - milliseconds_to_wait = std::min(milliseconds_to_wait * 2, settings.http_retry_max_backoff_ms); - } - }; - - for (size_t i = 0;; ++i) - { - if (last_attempt) - break; - last_attempt = i + 1 >= settings.http_max_tries; - - exception = nullptr; - initialization_error = InitializeError::NONE; - - try + doWithRetries( + /*callable=*/ [&] () { if (!impl) { - initialize(); - - if (initialization_error == InitializeError::NON_RETRYABLE_ERROR) + try { - assert(exception); - break; + impl = initialize(); } - else if (initialization_error == InitializeError::SKIP_NOT_FOUND_URL) + catch (HTTPException & e) { - return false; - } - else if (initialization_error == InitializeError::RETRYABLE_ERROR) - { - LOG_TRACE( - log, - "HTTP request to `{}` failed at try {}/{} with bytes read: {}/{}. " - "(Current backoff wait is {}/{} ms)", - uri.toString(), i + 1, settings.http_max_tries, getOffset(), - read_range.end ? toString(*read_range.end) : "unknown", - milliseconds_to_wait, settings.http_retry_max_backoff_ms); + if (http_skip_not_found_url && e.getHTTPStatus() == Poco::Net::HTTPResponse::HTTPStatus::HTTP_NOT_FOUND) + { + next_result = false; + has_not_found_url = true; + return; + } - assert(exception); - on_retriable_error(); - continue; + throw; } - assert(!exception); - if (use_external_buffer) { - setupExternalBuffer(); + impl->set(internal_buffer.begin(), internal_buffer.size()); + } + else + { + BufferBase::set(impl->buffer().begin(), impl->buffer().size(), impl->offset()); } } - result = impl->next(); - exception = nullptr; - break; - } - catch (const Poco::Exception & e) + if (use_external_buffer) + { + impl->set(internal_buffer.begin(), internal_buffer.size()); + } + else + { + impl->position() = position(); + } + + next_result = impl->next(); + + BufferBase::set(impl->buffer().begin(), impl->buffer().size(), impl->offset()); + + offset_from_begin_pos += working_buffer.size(); + }, + /*on_retry=*/ [&] () { - /// Too many open files or redirects - non-retryable. - if (e.code() == POCO_EMFILE || e.code() == ErrorCodes::TOO_MANY_REDIRECTS) - throw; + impl.reset(); + }); - /** Retry request unconditionally if nothing has been read yet. - * Otherwise if it is GET method retry with range header. - */ - bool can_retry_request = !offset_from_begin_pos || method == Poco::Net::HTTPRequest::HTTP_GET; - if (!can_retry_request) - throw; - - LOG_WARNING( - log, - "HTTP request to `{}` failed at try {}/{} with bytes read: {}/{}. " - "Error: {}. (Current backoff wait is {}/{} ms)", - uri.toString(), - i + 1, - settings.http_max_tries, - getOffset(), - read_range.end ? toString(*read_range.end) : "unknown", - e.displayText(), - milliseconds_to_wait, - settings.http_retry_max_backoff_ms); - - on_retriable_error(); - exception = std::current_exception(); - } - } - - if (exception) - std::rethrow_exception(exception); - - if (!result) - { - /// Eof is reached, i.e response was fully read. - markSessionForReuse(session->getSession()); - ProfileEvents::increment(ProfileEvents::ReadWriteBufferFromHTTPPreservedSessions); - return false; - } - - internal_buffer = impl->buffer(); - working_buffer = internal_buffer; - offset_from_begin_pos += working_buffer.size(); - return true; + return next_result; } -template -size_t ReadWriteBufferFromHTTPBase::readBigAt(char * to, size_t n, size_t offset, const std::function & progress_callback) +size_t ReadWriteBufferFromHTTP::readBigAt(char * to, size_t n, size_t offset, const std::function & progress_callback) const { /// Caller must have checked supportsReadAt(). - /// This ensures we've sent at least one HTTP request and populated saved_uri_redirect. + /// This ensures we've sent at least one HTTP request and populated current_uri. chassert(file_info && file_info->seekable); - Poco::URI uri_ = saved_uri_redirect.value_or(uri); - if (uri_.getPath().empty()) - uri_.setPath("/"); - size_t initial_n = n; - size_t milliseconds_to_wait = settings.http_retry_initial_backoff_ms; + size_t total_bytes_copied = 0; + size_t bytes_copied = 0; + bool is_canceled = false; - for (size_t attempt = 0; n > 0; ++attempt) - { - bool last_attempt = attempt + 1 >= settings.http_max_tries; - - Poco::Net::HTTPRequest request(method, uri_.getPathAndQuery(), Poco::Net::HTTPRequest::HTTP_1_1); - prepareRequest(request, uri_, HTTPRange { .begin = offset, .end = offset + n - 1}); - - LOG_TRACE(log, "Sending request to {} for range [{}, {})", uri_.toString(), offset, offset + n); - - auto sess = session->createDetachedSession(uri_); - - Poco::Net::HTTPResponse response; - std::istream * result_istr; - size_t bytes_copied = 0; - - try + doWithRetries( + /*callable=*/ [&] () { - sess->sendRequest(request); - result_istr = receiveResponse(*sess, request, response, /*allow_redirects*/ false); + auto range = HTTPRange{offset, offset + n - 1}; + + Poco::Net::HTTPResponse response; + auto result = callImpl(response, current_uri, method, range, false); if (response.getStatus() != Poco::Net::HTTPResponse::HTTPStatus::HTTP_PARTIAL_CONTENT && (offset != 0 || offset + n < *file_info->file_size)) - throw Exception( - ErrorCodes::HTTP_RANGE_NOT_SATISFIABLE, - "Expected 206 Partial Content, got {} when reading {} range [{}, {})", - toString(response.getStatus()), uri_.toString(), offset, offset + n); - - copyFromIStreamWithProgressCallback(*result_istr, to, n, progress_callback, &bytes_copied); - if (bytes_copied == n) { - result_istr->ignore(UINT64_MAX); - /// Response was fully read. - markSessionForReuse(*sess); - ProfileEvents::increment(ProfileEvents::ReadWriteBufferFromHTTPPreservedSessions); + String reason = fmt::format( + "When reading with readBigAt {}." + "Cannot read with range: [{}, {}] (response status: {}, reason: {}), will retry", + initial_uri.toString(), + *range.begin, *range.end, + toString(response.getStatus()), response.getReason()); + + throw HTTPException( + ErrorCodes::HTTP_RANGE_NOT_SATISFIABLE, + current_uri.toString(), + Poco::Net::HTTPResponse::HTTP_REQUESTED_RANGE_NOT_SATISFIABLE, + reason, + ""); } - } - catch (const Poco::Exception & e) + + copyFromIStreamWithProgressCallback(*result.response_stream, to, n, progress_callback, &bytes_copied, &is_canceled); + + offset += bytes_copied; + total_bytes_copied += bytes_copied; + to += bytes_copied; + n -= bytes_copied; + bytes_copied = 0; + }, + /*on_retry=*/ [&] () { - LOG_ERROR( - log, - "HTTP request (positioned) to `{}` with range [{}, {}) failed at try {}/{}: {}", - uri_.toString(), offset, offset + n, attempt + 1, settings.http_max_tries, - e.what()); + offset += bytes_copied; + total_bytes_copied += bytes_copied; + to += bytes_copied; + n -= bytes_copied; + bytes_copied = 0; + }); - /// Decide whether to retry. - - if (last_attempt) - throw; - - /// Too many open files - non-retryable. - if (e.code() == POCO_EMFILE) - throw; - - if (const auto * h = dynamic_cast(&e); - h && !isRetriableError(static_cast(h->getHTTPStatus()))) - throw; - - sleepForMilliseconds(milliseconds_to_wait); - milliseconds_to_wait = std::min(milliseconds_to_wait * 2, settings.http_retry_max_backoff_ms); - } - - /// Make sure retries don't re-read the bytes that we've already reported to progress_callback. - offset += bytes_copied; - to += bytes_copied; - n -= bytes_copied; - } - - return initial_n; + chassert(total_bytes_copied == initial_n || is_canceled); + return total_bytes_copied; } -template -off_t ReadWriteBufferFromHTTPBase::getPosition() { return getOffset() - available(); } +off_t ReadWriteBufferFromHTTP::getPosition() +{ + return getOffset() - available(); +} -template -off_t ReadWriteBufferFromHTTPBase::seek(off_t offset_, int whence) +off_t ReadWriteBufferFromHTTP::seek(off_t offset_, int whence) { if (whence != SEEK_SET) throw Exception(ErrorCodes::CANNOT_SEEK_THROUGH_FILE, "Only SEEK_SET mode is allowed."); @@ -688,8 +565,8 @@ off_t ReadWriteBufferFromHTTPBase::seek(off_t offset_, int if (!working_buffer.empty() && size_t(offset_) >= current_offset - working_buffer.size() && offset_ < current_offset) { pos = working_buffer.end() - (current_offset - offset_); - assert(pos >= working_buffer.begin()); - assert(pos < working_buffer.end()); + chassert(pos >= working_buffer.begin()); + chassert(pos < working_buffer.end()); return getPosition(); } @@ -700,7 +577,7 @@ off_t ReadWriteBufferFromHTTPBase::seek(off_t offset_, int if (offset_ > position) { size_t diff = offset_ - position; - if (diff < settings.remote_read_min_bytes_for_seek) + if (diff < read_settings.remote_read_min_bytes_for_seek) { ignore(diff); return offset_; @@ -709,6 +586,7 @@ off_t ReadWriteBufferFromHTTPBase::seek(off_t offset_, int if (!atEndOfRequestedRangeGuess()) ProfileEvents::increment(ProfileEvents::ReadBufferSeekCancelConnection); + impl.reset(); } @@ -719,8 +597,8 @@ off_t ReadWriteBufferFromHTTPBase::seek(off_t offset_, int return offset_; } -template -void ReadWriteBufferFromHTTPBase::setReadUntilPosition(size_t until) + +void ReadWriteBufferFromHTTP::setReadUntilPosition(size_t until) { until = std::max(until, 1ul); if (read_range.end && *read_range.end + 1 == until) @@ -736,8 +614,7 @@ void ReadWriteBufferFromHTTPBase::setReadUntilPosition(size } } -template -void ReadWriteBufferFromHTTPBase::setReadUntilEnd() +void ReadWriteBufferFromHTTP::setReadUntilEnd() { if (!read_range.end) return; @@ -752,11 +629,9 @@ void ReadWriteBufferFromHTTPBase::setReadUntilEnd() } } -template -bool ReadWriteBufferFromHTTPBase::supportsRightBoundedReads() const { return true; } +bool ReadWriteBufferFromHTTP::supportsRightBoundedReads() const { return true; } -template -bool ReadWriteBufferFromHTTPBase::atEndOfRequestedRangeGuess() +bool ReadWriteBufferFromHTTP::atEndOfRequestedRangeGuess() { if (!impl) return true; @@ -767,8 +642,7 @@ bool ReadWriteBufferFromHTTPBase::atEndOfRequestedRangeGues return false; } -template -std::string ReadWriteBufferFromHTTPBase::getResponseCookie(const std::string & name, const std::string & def) const +std::string ReadWriteBufferFromHTTP::getResponseCookie(const std::string & name, const std::string & def) const { for (const auto & cookie : cookies) if (cookie.getName() == name) @@ -776,19 +650,19 @@ std::string ReadWriteBufferFromHTTPBase::getResponseCookie( return def; } -template -void ReadWriteBufferFromHTTPBase::setNextCallback(NextCallback next_callback_) +void ReadWriteBufferFromHTTP::setNextCallback(NextCallback next_callback_) { next_callback = next_callback_; /// Some data maybe already read next_callback(count()); } -template -const std::string & ReadWriteBufferFromHTTPBase::getCompressionMethod() const { return content_encoding; } +const std::string & ReadWriteBufferFromHTTP::getCompressionMethod() const +{ + return content_encoding; +} -template -std::optional ReadWriteBufferFromHTTPBase::tryGetLastModificationTime() +std::optional ReadWriteBufferFromHTTP::tryGetLastModificationTime() { if (!file_info) { @@ -805,12 +679,11 @@ std::optional ReadWriteBufferFromHTTPBase::tryGetLa return file_info->last_modified; } -template -HTTPFileInfo ReadWriteBufferFromHTTPBase::getFileInfo() +ReadWriteBufferFromHTTP::HTTPFileInfo ReadWriteBufferFromHTTP::getFileInfo() { /// May be disabled in case the user knows in advance that the server doesn't support HEAD requests. /// Allows to avoid making unnecessary requests in such cases. - if (!settings.http_make_head_request) + if (!read_settings.http_make_head_request) return HTTPFileInfo{}; Poco::Net::HTTPResponse response; @@ -832,11 +705,11 @@ HTTPFileInfo ReadWriteBufferFromHTTPBase::getFileInfo() throw; } + return parseFileInfo(response, 0); } -template -HTTPFileInfo ReadWriteBufferFromHTTPBase::parseFileInfo(const Poco::Net::HTTPResponse & response, size_t requested_range_begin) +ReadWriteBufferFromHTTP::HTTPFileInfo ReadWriteBufferFromHTTP::parseFileInfo(const Poco::Net::HTTPResponse & response, size_t requested_range_begin) { HTTPFileInfo res; @@ -868,79 +741,3 @@ HTTPFileInfo ReadWriteBufferFromHTTPBase::parseFileInfo(con } } - -SessionFactory::SessionFactory(const ConnectionTimeouts & timeouts_, ProxyConfiguration proxy_config_) - : timeouts(timeouts_), proxy_config(proxy_config_) {} - -SessionFactory::SessionType SessionFactory::buildNewSession(const Poco::URI & uri) -{ - return makeHTTPSession(uri, timeouts, proxy_config); -} - -ReadWriteBufferFromHTTP::ReadWriteBufferFromHTTP( - Poco::URI uri_, - const std::string & method_, - OutStreamCallback out_stream_callback_, - const ConnectionTimeouts & timeouts, - const Poco::Net::HTTPBasicCredentials & credentials_, - const UInt64 max_redirects, - size_t buffer_size_, - const ReadSettings & settings_, - const HTTPHeaderEntries & http_header_entries_, - const RemoteHostFilter * remote_host_filter_, - bool delay_initialization_, - bool use_external_buffer_, - bool skip_not_found_url_, - std::optional file_info_, - ProxyConfiguration proxy_config_) - : Parent( - std::make_shared(uri_, max_redirects, std::make_shared(timeouts, proxy_config_)), - uri_, - credentials_, - method_, - out_stream_callback_, - buffer_size_, - settings_, - http_header_entries_, - remote_host_filter_, - delay_initialization_, - use_external_buffer_, - skip_not_found_url_, - file_info_, - proxy_config_) {} - - -PooledSessionFactory::PooledSessionFactory( - const ConnectionTimeouts & timeouts_, size_t per_endpoint_pool_size_) - : timeouts(timeouts_) - , per_endpoint_pool_size(per_endpoint_pool_size_) {} - -PooledSessionFactory::SessionType PooledSessionFactory::buildNewSession(const Poco::URI & uri) -{ - return makePooledHTTPSession(uri, timeouts, per_endpoint_pool_size); -} - - -PooledReadWriteBufferFromHTTP::PooledReadWriteBufferFromHTTP( - Poco::URI uri_, - const std::string & method_, - OutStreamCallback out_stream_callback_, - const Poco::Net::HTTPBasicCredentials & credentials_, - size_t buffer_size_, - const UInt64 max_redirects, - PooledSessionFactoryPtr session_factory) - : Parent( - std::make_shared(uri_, max_redirects, session_factory), - uri_, - credentials_, - method_, - out_stream_callback_, - buffer_size_) {} - - -template class UpdatableSession; -template class UpdatableSession; -template class detail::ReadWriteBufferFromHTTPBase>>; -template class detail::ReadWriteBufferFromHTTPBase>>; - -} diff --git a/src/IO/ReadWriteBufferFromHTTP.h b/src/IO/ReadWriteBufferFromHTTP.h index 63ca3e0417c..1b7437ea0c6 100644 --- a/src/IO/ReadWriteBufferFromHTTP.h +++ b/src/IO/ReadWriteBufferFromHTTP.h @@ -20,7 +20,6 @@ #include #include #include -#include #include #include "config.h" #include @@ -30,44 +29,19 @@ namespace DB { -template -class UpdatableSession +class ReadWriteBufferFromHTTP : public SeekableReadBuffer, public WithFileName, public WithFileSize { public: - using SessionPtr = typename TSessionFactory::SessionType; - - explicit UpdatableSession(const Poco::URI & uri, UInt64 max_redirects_, std::shared_ptr session_factory_); - - SessionPtr getSession(); - - void updateSession(const Poco::URI & uri); - - /// Thread safe. - SessionPtr createDetachedSession(const Poco::URI & uri); - - std::shared_ptr> clone(const Poco::URI & uri); + /// Information from HTTP response header. + struct HTTPFileInfo + { + // nullopt if the server doesn't report it. + std::optional file_size; + std::optional last_modified; + bool seekable = false; + }; private: - SessionPtr session; - UInt64 redirects{0}; - UInt64 max_redirects; - Poco::URI initial_uri; - std::shared_ptr session_factory; -}; - - -/// Information from HTTP response header. -struct HTTPFileInfo -{ - // nullopt if the server doesn't report it. - std::optional file_size; - std::optional last_modified; - bool seekable = false; -}; - - -namespace detail -{ /// Byte range, including right bound [begin, end]. struct HTTPRange { @@ -75,218 +49,213 @@ namespace detail std::optional end; }; - template - class ReadWriteBufferFromHTTPBase : public SeekableReadBuffer, public WithFileName, public WithFileSize + struct CallResult { - protected: - Poco::URI uri; - std::string method; - std::string content_encoding; + HTTPSessionPtr session; + std::istream * response_stream = nullptr; - UpdatableSessionPtr session; - std::istream * istr; /// owned by session - std::unique_ptr impl; - std::function out_stream_callback; - const Poco::Net::HTTPBasicCredentials & credentials; - std::vector cookies; - HTTPHeaderEntries http_header_entries; - const RemoteHostFilter * remote_host_filter = nullptr; - std::function next_callback; + CallResult(HTTPSessionPtr && session_, std::istream & response_stream_) + : session(session_) + , response_stream(&response_stream_) + {} + CallResult(CallResult &&) = default; + CallResult & operator= (CallResult &&) = default; - size_t buffer_size; - bool use_external_buffer; - - size_t offset_from_begin_pos = 0; - HTTPRange read_range; - std::optional file_info; - - /// Delayed exception in case retries with partial content are not satisfiable. - std::exception_ptr exception; - bool retry_with_range_header = false; - /// In case of redirects, save result uri to use it if we retry the request. - std::optional saved_uri_redirect; - - bool http_skip_not_found_url; - - ReadSettings settings; - LoggerPtr log; - - ProxyConfiguration proxy_config; - - bool withPartialContent(const HTTPRange & range) const; - - size_t getOffset() const; - - void prepareRequest(Poco::Net::HTTPRequest & request, Poco::URI uri_, std::optional range) const; - - std::istream * callImpl(UpdatableSessionPtr & current_session, Poco::URI uri_, Poco::Net::HTTPResponse & response, const std::string & method_, bool for_object_info = false); - - size_t getFileSize() override; - - bool supportsReadAt() override; - - bool checkIfActuallySeekable() override; - - String getFileName() const override; - - enum class InitializeError - { - RETRYABLE_ERROR, - /// If error is not retriable, `exception` variable must be set. - NON_RETRYABLE_ERROR, - /// Allows to skip not found urls for globs - SKIP_NOT_FOUND_URL, - NONE, - }; - - InitializeError initialization_error = InitializeError::NONE; - - private: - void getHeadResponse(Poco::Net::HTTPResponse & response); - - void setupExternalBuffer(); - - public: - using NextCallback = std::function; - using OutStreamCallback = std::function; - - explicit ReadWriteBufferFromHTTPBase( - UpdatableSessionPtr session_, - Poco::URI uri_, - const Poco::Net::HTTPBasicCredentials & credentials_, - const std::string & method_ = {}, - OutStreamCallback out_stream_callback_ = {}, - size_t buffer_size_ = DBMS_DEFAULT_BUFFER_SIZE, - const ReadSettings & settings_ = {}, - HTTPHeaderEntries http_header_entries_ = {}, - const RemoteHostFilter * remote_host_filter_ = nullptr, - bool delay_initialization = false, - bool use_external_buffer_ = false, - bool http_skip_not_found_url_ = false, - std::optional file_info_ = std::nullopt, - ProxyConfiguration proxy_config_ = {}); - - void callWithRedirects(Poco::Net::HTTPResponse & response, const String & method_, bool throw_on_all_errors = false, bool for_object_info = false); - - void call(UpdatableSessionPtr & current_session, Poco::Net::HTTPResponse & response, const String & method_, bool throw_on_all_errors = false, bool for_object_info = false); - - /** - * Throws if error is retryable, otherwise sets initialization_error = NON_RETRYABLE_ERROR and - * saves exception into `exception` variable. In case url is not found and skip_not_found_url == true, - * sets initialization_error = SKIP_NOT_FOUND_URL, otherwise throws. - */ - void initialize(); - - bool nextImpl() override; - - size_t readBigAt(char * to, size_t n, size_t offset, const std::function & progress_callback) override; - - off_t getPosition() override; - - off_t seek(off_t offset_, int whence) override; - - void setReadUntilPosition(size_t until) override; - - void setReadUntilEnd() override; - - bool supportsRightBoundedReads() const override; - - // If true, if we destroy impl now, no work was wasted. Just for metrics. - bool atEndOfRequestedRangeGuess(); - - std::string getResponseCookie(const std::string & name, const std::string & def) const; - - /// Set function to call on each nextImpl, useful when you need to track - /// progress. - /// NOTE: parameter on each call is not incremental -- it's all bytes count - /// passed through the buffer - void setNextCallback(NextCallback next_callback_); - - const std::string & getCompressionMethod() const; - - std::optional tryGetLastModificationTime(); - - HTTPFileInfo getFileInfo(); - - HTTPFileInfo parseFileInfo(const Poco::Net::HTTPResponse & response, size_t requested_range_begin); + std::unique_ptr transformToReadBuffer(size_t buf_size) &&; }; -} -class SessionFactory -{ -public: - explicit SessionFactory(const ConnectionTimeouts & timeouts_, ProxyConfiguration proxy_config_ = {}); + const HTTPConnectionGroupType connection_group; + const Poco::URI initial_uri; + const std::string method; + const ProxyConfiguration proxy_config; + const ReadSettings read_settings; + const ConnectionTimeouts timeouts; - using SessionType = HTTPSessionPtr; + const Poco::Net::HTTPBasicCredentials & credentials; + const RemoteHostFilter * remote_host_filter; - SessionType buildNewSession(const Poco::URI & uri); -private: - ConnectionTimeouts timeouts; - ProxyConfiguration proxy_config; -}; + const size_t buffer_size; + const size_t max_redirects; -class ReadWriteBufferFromHTTP : public detail::ReadWriteBufferFromHTTPBase>> -{ - using SessionType = UpdatableSession; - using Parent = detail::ReadWriteBufferFromHTTPBase>; + const bool use_external_buffer; + const bool http_skip_not_found_url; + bool has_not_found_url = false; + + std::function out_stream_callback; + + Poco::URI current_uri; + size_t redirects = 0; + + std::string content_encoding; + std::unique_ptr impl; + + std::vector cookies; + HTTPHeaderEntries http_header_entries; + std::function next_callback; + + size_t offset_from_begin_pos = 0; + HTTPRange read_range; + std::optional file_info; + + LoggerPtr log; + + bool withPartialContent() const; + + void prepareRequest(Poco::Net::HTTPRequest & request, std::optional range) const; + + void doWithRetries(std::function && callable, std::function on_retry = nullptr, bool mute_logging = false) const; + + CallResult callImpl( + Poco::Net::HTTPResponse & response, + const Poco::URI & uri_, + const std::string & method_, + const std::optional & range, + bool allow_redirects) const; + + CallResult callWithRedirects( + Poco::Net::HTTPResponse & response, + const String & method_, + const std::optional & range); + + std::unique_ptr initialize(); + + size_t getFileSize() override; + + bool supportsReadAt() override; + + bool checkIfActuallySeekable() override; + + String getFileName() const override; + + void getHeadResponse(Poco::Net::HTTPResponse & response); + + void setupExternalBuffer(); + + size_t getOffset() const; + + // If true, if we destroy impl now, no work was wasted. Just for metrics. + bool atEndOfRequestedRangeGuess(); public: + using NextCallback = std::function; + using OutStreamCallback = std::function; + ReadWriteBufferFromHTTP( - Poco::URI uri_, + const HTTPConnectionGroupType & connection_group_, + const Poco::URI & uri_, const std::string & method_, - OutStreamCallback out_stream_callback_, - const ConnectionTimeouts & timeouts, - const Poco::Net::HTTPBasicCredentials & credentials_, - const UInt64 max_redirects = 0, - size_t buffer_size_ = DBMS_DEFAULT_BUFFER_SIZE, - const ReadSettings & settings_ = {}, - const HTTPHeaderEntries & http_header_entries_ = {}, - const RemoteHostFilter * remote_host_filter_ = nullptr, - bool delay_initialization_ = true, - bool use_external_buffer_ = false, - bool skip_not_found_url_ = false, - std::optional file_info_ = std::nullopt, - ProxyConfiguration proxy_config_ = {}); -}; - -class PooledSessionFactory -{ -public: - explicit PooledSessionFactory( - const ConnectionTimeouts & timeouts_, size_t per_endpoint_pool_size_); - - using SessionType = PooledHTTPSessionPtr; - - /// Thread safe. - SessionType buildNewSession(const Poco::URI & uri); - -private: - ConnectionTimeouts timeouts; - size_t per_endpoint_pool_size; -}; - -using PooledSessionFactoryPtr = std::shared_ptr; - -class PooledReadWriteBufferFromHTTP : public detail::ReadWriteBufferFromHTTPBase>> -{ - using SessionType = UpdatableSession; - using Parent = detail::ReadWriteBufferFromHTTPBase>; - -public: - explicit PooledReadWriteBufferFromHTTP( - Poco::URI uri_, - const std::string & method_, - OutStreamCallback out_stream_callback_, + ProxyConfiguration proxy_config_, + ReadSettings read_settings_, + ConnectionTimeouts timeouts_, const Poco::Net::HTTPBasicCredentials & credentials_, + const RemoteHostFilter * remote_host_filter_, size_t buffer_size_, - const UInt64 max_redirects, - PooledSessionFactoryPtr session_factory); + size_t max_redirects_, + OutStreamCallback out_stream_callback_, + bool use_external_buffer_, + bool http_skip_not_found_url_, + HTTPHeaderEntries http_header_entries_, + bool delay_initialization, + std::optional file_info_); + + bool nextImpl() override; + + size_t readBigAt(char * to, size_t n, size_t offset, const std::function & progress_callback) const override; + + off_t seek(off_t offset_, int whence) override; + + void setReadUntilPosition(size_t until) override; + + void setReadUntilEnd() override; + + bool supportsRightBoundedReads() const override; + + off_t getPosition() override; + + std::string getResponseCookie(const std::string & name, const std::string & def) const; + + /// Set function to call on each nextImpl, useful when you need to track + /// progress. + /// NOTE: parameter on each call is not incremental -- it's all bytes count + /// passed through the buffer + void setNextCallback(NextCallback next_callback_); + + const std::string & getCompressionMethod() const; + + std::optional tryGetLastModificationTime(); + + bool hasNotFoundURL() const { return has_not_found_url; } + + HTTPFileInfo getFileInfo(); + static HTTPFileInfo parseFileInfo(const Poco::Net::HTTPResponse & response, size_t requested_range_begin); }; +using ReadWriteBufferFromHTTPPtr = std::unique_ptr; -extern template class UpdatableSession; -extern template class UpdatableSession; -extern template class detail::ReadWriteBufferFromHTTPBase>>; -extern template class detail::ReadWriteBufferFromHTTPBase>>; +class BuilderRWBufferFromHTTP +{ + Poco::URI uri; + std::string method = Poco::Net::HTTPRequest::HTTP_GET; + HTTPConnectionGroupType connection_group = HTTPConnectionGroupType::HTTP; + ProxyConfiguration proxy_config{}; + ReadSettings read_settings{}; + ConnectionTimeouts timeouts{}; + const RemoteHostFilter * remote_host_filter = nullptr; + size_t buffer_size = DBMS_DEFAULT_BUFFER_SIZE; + size_t max_redirects = 0; + ReadWriteBufferFromHTTP::OutStreamCallback out_stream_callback = nullptr; + bool use_external_buffer = false; + bool http_skip_not_found_url = false; + HTTPHeaderEntries http_header_entries{}; + bool delay_initialization = true; + +public: + BuilderRWBufferFromHTTP(Poco::URI uri_) + : uri(uri_) + {} + +#define setterMember(name, member) \ + BuilderRWBufferFromHTTP & name(decltype(BuilderRWBufferFromHTTP::member) arg_##member) \ + { \ + member = std::move(arg_##member); \ + return *this; \ + } + + setterMember(withConnectionGroup, connection_group) + setterMember(withMethod, method) + setterMember(withProxy, proxy_config) + setterMember(withSettings, read_settings) + setterMember(withTimeouts, timeouts) + setterMember(withHostFilter, remote_host_filter) + setterMember(withBufSize, buffer_size) + setterMember(withRedirects, max_redirects) + setterMember(withOutCallback, out_stream_callback) + setterMember(withHeaders, http_header_entries) + setterMember(withExternalBuf, use_external_buffer) + setterMember(withDelayInit, delay_initialization) + setterMember(withSkipNotFound, http_skip_not_found_url) +#undef setterMember + + ReadWriteBufferFromHTTPPtr create(const Poco::Net::HTTPBasicCredentials & credentials_) + { + return std::make_unique( + connection_group, + uri, + method, + proxy_config, + read_settings, + timeouts, + credentials_, + remote_host_filter, + buffer_size, + max_redirects, + out_stream_callback, + use_external_buffer, + http_skip_not_found_url, + http_header_entries, + delay_initialization, + /*file_info_=*/ std::nullopt); + } +}; } diff --git a/src/IO/S3/Client.cpp b/src/IO/S3/Client.cpp index 7f0ede72740..1b6b245b89a 100644 --- a/src/IO/S3/Client.cpp +++ b/src/IO/S3/Client.cpp @@ -27,7 +27,6 @@ #include -#include namespace ProfileEvents { @@ -48,7 +47,6 @@ namespace ErrorCodes { extern const int LOGICAL_ERROR; extern const int TOO_MANY_REDIRECTS; - extern const int BAD_ARGUMENTS; } namespace S3 @@ -106,19 +104,6 @@ void verifyClientConfiguration(const Aws::Client::ClientConfiguration & client_c assert_cast(*client_config.retryStrategy); } -void validateCredentials(const Aws::Auth::AWSCredentials& auth_credentials) -{ - if (auth_credentials.GetAWSAccessKeyId().empty()) - { - return; - } - /// Follow https://docs.aws.amazon.com/IAM/latest/APIReference/API_AccessKey.html - if (!std::all_of(auth_credentials.GetAWSAccessKeyId().begin(), auth_credentials.GetAWSAccessKeyId().end(), isWordCharASCII)) - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Access key id has an invalid character"); - } -} - void addAdditionalAMZHeadersToCanonicalHeadersList( Aws::AmazonWebServiceRequest & request, const HTTPHeaderEntries & extra_headers @@ -144,7 +129,6 @@ std::unique_ptr Client::create( const ClientSettings & client_settings) { verifyClientConfiguration(client_configuration); - validateCredentials(credentials_provider->GetAWSCredentials()); return std::unique_ptr( new Client(max_redirects_, std::move(sse_kms_config_), credentials_provider, client_configuration, sign_payloads, client_settings)); } @@ -731,7 +715,7 @@ std::string Client::getRegionForBucket(const std::string & bucket, bool force_de if (outcome.IsSuccess()) { const auto & result = outcome.GetResult(); - region = result.GetRegion(); + region = result.GetBucketRegion(); } else { diff --git a/src/IO/S3/Credentials.cpp b/src/IO/S3/Credentials.cpp index e64f54b99ad..80366510b53 100644 --- a/src/IO/S3/Credentials.cpp +++ b/src/IO/S3/Credentials.cpp @@ -22,7 +22,6 @@ namespace ErrorCodes # include # include -# include # include # include @@ -31,9 +30,7 @@ namespace ErrorCodes # include # include -# include -# include # include # include # include @@ -755,7 +752,7 @@ S3CredentialsProviderChain::S3CredentialsProviderChain( configuration.put_request_throttler, Aws::Http::SchemeMapper::ToString(Aws::Http::Scheme::HTTP)); - /// See MakeDefaultHttpResourceClientConfiguration(). + /// See MakeDefaultHTTPResourceClientConfiguration(). /// This is part of EC2 metadata client, but unfortunately it can't be accessed from outside /// of contrib/aws/aws-cpp-sdk-core/source/internal/AWSHttpResourceClient.cpp aws_client_configuration.maxConnections = 2; diff --git a/src/IO/S3/PocoHTTPClient.cpp b/src/IO/S3/PocoHTTPClient.cpp index 21acdfd69f2..a29a4b0b8ee 100644 --- a/src/IO/S3/PocoHTTPClient.cpp +++ b/src/IO/S3/PocoHTTPClient.cpp @@ -1,5 +1,4 @@ #include -#include "Common/DNSResolver.h" #include "config.h" #if USE_AWS_S3 @@ -146,10 +145,8 @@ ConnectionTimeouts getTimeoutsFromConfiguration(const PocoHTTPClientConfiguratio .withConnectionTimeout(Poco::Timespan(client_configuration.connectTimeoutMs * 1000)) .withSendTimeout(Poco::Timespan(client_configuration.requestTimeoutMs * 1000)) .withReceiveTimeout(Poco::Timespan(client_configuration.requestTimeoutMs * 1000)) - .withTcpKeepAliveTimeout(Poco::Timespan( - client_configuration.enableTcpKeepAlive ? client_configuration.tcpKeepAliveIntervalMs * 1000 : 0)) - .withHttpKeepAliveTimeout(Poco::Timespan( - client_configuration.http_keep_alive_timeout_ms * 1000)); /// flag indicating whether keep-alive is enabled is set to each session upon creation + .withTCPKeepAliveTimeout(Poco::Timespan( + client_configuration.enableTcpKeepAlive ? client_configuration.tcpKeepAliveIntervalMs * 1000 : 0)); } PocoHTTPClient::PocoHTTPClient(const PocoHTTPClientConfiguration & client_configuration) @@ -164,8 +161,6 @@ PocoHTTPClient::PocoHTTPClient(const PocoHTTPClientConfiguration & client_config , get_request_throttler(client_configuration.get_request_throttler) , put_request_throttler(client_configuration.put_request_throttler) , extra_headers(client_configuration.extra_headers) - , http_connection_pool_size(client_configuration.http_connection_pool_size) - , wait_on_pool_size_limit(client_configuration.wait_on_pool_size_limit) { } @@ -308,12 +303,8 @@ void PocoHTTPClient::makeRequestInternal( Aws::Utils::RateLimits::RateLimiterInterface * readLimiter, Aws::Utils::RateLimits::RateLimiterInterface * writeLimiter) const { - /// Most sessions in pool are already connected and it is not possible to set proxy host/port to a connected session. const auto request_configuration = per_request_configuration(); - if (http_connection_pool_size) - makeRequestInternalImpl(request, request_configuration, response, readLimiter, writeLimiter); - else - makeRequestInternalImpl(request, request_configuration, response, readLimiter, writeLimiter); + makeRequestInternalImpl(request, request_configuration, response, readLimiter, writeLimiter); } String getMethod(const Aws::Http::HttpRequest & request) @@ -335,7 +326,6 @@ String getMethod(const Aws::Http::HttpRequest & request) } } -template void PocoHTTPClient::makeRequestInternalImpl( Aws::Http::HttpRequest & request, const DB::ProxyConfiguration & proxy_configuration, @@ -343,8 +333,6 @@ void PocoHTTPClient::makeRequestInternalImpl( Aws::Utils::RateLimits::RateLimiterInterface *, Aws::Utils::RateLimits::RateLimiterInterface *) const { - using SessionPtr = std::conditional_t; - LoggerPtr log = getLogger("AWSClient"); auto uri = request.GetUri().GetURIString(); @@ -396,40 +384,17 @@ void PocoHTTPClient::makeRequestInternalImpl( for (unsigned int attempt = 0; attempt <= s3_max_redirects; ++attempt) { Poco::URI target_uri(uri); - SessionPtr session; - if (!proxy_configuration.host.empty()) - { - if (enable_s3_requests_logging) - LOG_TEST(log, "Due to reverse proxy host name ({}) won't be resolved on ClickHouse side", uri); - /// Reverse proxy can replace host header with resolved ip address instead of host name. - /// This can lead to request signature difference on S3 side. - if constexpr (pooled) - session = makePooledHTTPSession( - target_uri, - getTimeouts(method, first_attempt, /*first_byte*/ true), - http_connection_pool_size, - wait_on_pool_size_limit, - proxy_configuration); - else - session = makeHTTPSession( - target_uri, - getTimeouts(method, first_attempt, /*first_byte*/ true), - proxy_configuration); - } - else - { - if constexpr (pooled) - session = makePooledHTTPSession( - target_uri, - getTimeouts(method, first_attempt, /*first_byte*/ true), - http_connection_pool_size, - wait_on_pool_size_limit); - else - session = makeHTTPSession( - target_uri, - getTimeouts(method, first_attempt, /*first_byte*/ true)); - } + if (enable_s3_requests_logging && !proxy_configuration.isEmpty()) + LOG_TEST(log, "Due to reverse proxy host name ({}) won't be resolved on ClickHouse side", uri); + + auto group = for_disk_s3 ? HTTPConnectionGroupType::DISK : HTTPConnectionGroupType::STORAGE; + + auto session = makeHTTPSession( + group, + target_uri, + getTimeouts(method, first_attempt, /*first_byte*/ true), + proxy_configuration); /// In case of error this address will be written to logs request.SetResolvedRemoteHost(session->getResolvedAddress()); @@ -612,10 +577,6 @@ void PocoHTTPClient::makeRequestInternalImpl( response->SetClientErrorMessage(getCurrentExceptionMessage(false)); addMetric(request, S3MetricType::Errors); - - /// Probably this is socket timeout or something more or less related to DNS - /// Let's just remove this host from DNS cache to be more safe - DNSResolver::instance().removeHostFromCache(Poco::URI(uri).getHost()); } } diff --git a/src/IO/S3/PocoHTTPClient.h b/src/IO/S3/PocoHTTPClient.h index 5178d75e7b6..a93a4dfbaf7 100644 --- a/src/IO/S3/PocoHTTPClient.h +++ b/src/IO/S3/PocoHTTPClient.h @@ -13,7 +13,7 @@ #include #include #include -#include +#include #include #include @@ -49,12 +49,7 @@ struct PocoHTTPClientConfiguration : public Aws::Client::ClientConfiguration ThrottlerPtr put_request_throttler; HTTPHeaderEntries extra_headers; - /// Not a client parameter in terms of HTTP and we won't send it to the server. Used internally to determine when connection have to be re-established. - uint32_t http_keep_alive_timeout_ms = 0; - /// Zero means pooling will not be used. - size_t http_connection_pool_size = 0; /// See PoolBase::BehaviourOnLimit - bool wait_on_pool_size_limit = true; bool s3_use_adaptive_timeouts = true; std::function error_report; @@ -98,12 +93,6 @@ public: ); } - void SetResponseBody(Aws::IStream & incoming_stream, PooledHTTPSessionPtr & session_) /// NOLINT - { - body_stream = Aws::Utils::Stream::ResponseStream( - Aws::New>("http result streambuf", session_, incoming_stream.rdbuf())); - } - void SetResponseBody(std::string & response_body) /// NOLINT { auto stream = Aws::New("http result buf", response_body); // STYLE_CHECK_ALLOW_STD_STRING_STREAM @@ -163,7 +152,6 @@ private: EnumSize, }; - template void makeRequestInternalImpl( Aws::Http::HttpRequest & request, const DB::ProxyConfiguration & proxy_configuration, @@ -196,9 +184,6 @@ protected: ThrottlerPtr put_request_throttler; const HTTPHeaderEntries extra_headers; - - size_t http_connection_pool_size = 0; - bool wait_on_pool_size_limit = true; }; } diff --git a/src/IO/S3/Requests.cpp b/src/IO/S3/Requests.cpp index 56d2e44a2c4..50ed2e21bfc 100644 --- a/src/IO/S3/Requests.cpp +++ b/src/IO/S3/Requests.cpp @@ -52,6 +52,20 @@ Aws::Http::HeaderValueCollection CopyObjectRequest::GetRequestSpecificHeaders() return headers; } +void CompleteMultipartUploadRequest::SetAdditionalCustomHeaderValue(const Aws::String& headerName, const Aws::String& headerValue) +{ + // S3's CompleteMultipartUpload doesn't support metadata headers so we skip adding them + if (!headerName.starts_with("x-amz-meta-")) + Model::CompleteMultipartUploadRequest::SetAdditionalCustomHeaderValue(headerName, headerValue); +} + +void UploadPartRequest::SetAdditionalCustomHeaderValue(const Aws::String& headerName, const Aws::String& headerValue) +{ + // S3's UploadPart doesn't support metadata headers so we skip adding them + if (!headerName.starts_with("x-amz-meta-")) + Model::UploadPartRequest::SetAdditionalCustomHeaderValue(headerName, headerValue); +} + Aws::String ComposeObjectRequest::SerializePayload() const { if (component_names.empty()) @@ -70,6 +84,7 @@ Aws::String ComposeObjectRequest::SerializePayload() const return payload_doc.ConvertToString(); } + void ComposeObjectRequest::AddQueryStringParameters(Aws::Http::URI & /*uri*/) const { } diff --git a/src/IO/S3/Requests.h b/src/IO/S3/Requests.h index bfb94a5a67e..7b4c3698f10 100644 --- a/src/IO/S3/Requests.h +++ b/src/IO/S3/Requests.h @@ -107,10 +107,20 @@ using ListObjectsV2Request = ExtendedRequest; using ListObjectsRequest = ExtendedRequest; using GetObjectRequest = ExtendedRequest; +class UploadPartRequest : public ExtendedRequest +{ +public: + void SetAdditionalCustomHeaderValue(const Aws::String& headerName, const Aws::String& headerValue) override; +}; + +class CompleteMultipartUploadRequest : public ExtendedRequest +{ +public: + void SetAdditionalCustomHeaderValue(const Aws::String& headerName, const Aws::String& headerValue) override; +}; + using CreateMultipartUploadRequest = ExtendedRequest; -using CompleteMultipartUploadRequest = ExtendedRequest; using AbortMultipartUploadRequest = ExtendedRequest; -using UploadPartRequest = ExtendedRequest; using UploadPartCopyRequest = ExtendedRequest; using PutObjectRequest = ExtendedRequest; diff --git a/src/IO/S3/copyS3File.cpp b/src/IO/S3/copyS3File.cpp index 98024e74f8e..51518df268c 100644 --- a/src/IO/S3/copyS3File.cpp +++ b/src/IO/S3/copyS3File.cpp @@ -746,7 +746,12 @@ namespace break; } - if (outcome.GetError().GetExceptionName() == "EntityTooLarge" || outcome.GetError().GetExceptionName() == "InvalidRequest" || outcome.GetError().GetExceptionName() == "InvalidArgument") + if (outcome.GetError().GetExceptionName() == "EntityTooLarge" || + outcome.GetError().GetExceptionName() == "InvalidRequest" || + outcome.GetError().GetExceptionName() == "InvalidArgument" || + (outcome.GetError().GetExceptionName() == "InternalError" && + outcome.GetError().GetResponseCode() == Aws::Http::HttpResponseCode::GATEWAY_TIMEOUT && + outcome.GetError().GetMessage().contains("use the Rewrite method in the JSON API"))) { if (!supports_multipart_copy) { diff --git a/src/IO/S3/copyS3File.h b/src/IO/S3/copyS3File.h index 607be51ed25..093d26ba7bb 100644 --- a/src/IO/S3/copyS3File.h +++ b/src/IO/S3/copyS3File.h @@ -5,7 +5,7 @@ #if USE_AWS_S3 #include -#include +#include #include #include #include diff --git a/src/IO/S3/tests/TestPocoHTTPServer.h b/src/IO/S3/tests/TestPocoHTTPServer.h index 0f5ce86b388..a26e811d646 100644 --- a/src/IO/S3/tests/TestPocoHTTPServer.h +++ b/src/IO/S3/tests/TestPocoHTTPServer.h @@ -21,7 +21,7 @@ class MockRequestHandler : public Poco::Net::HTTPRequestHandler Poco::Net::MessageHeader & last_request_header; public: - MockRequestHandler(Poco::Net::MessageHeader & last_request_header_) + explicit MockRequestHandler(Poco::Net::MessageHeader & last_request_header_) : Poco::Net::HTTPRequestHandler(), last_request_header(last_request_header_) { } @@ -44,7 +44,7 @@ class HTTPRequestHandlerFactory : public Poco::Net::HTTPRequestHandlerFactory } public: - HTTPRequestHandlerFactory(Poco::Net::MessageHeader & last_request_header_) + explicit HTTPRequestHandlerFactory(Poco::Net::MessageHeader & last_request_header_) : Poco::Net::HTTPRequestHandlerFactory(), last_request_header(last_request_header_) { } diff --git a/src/IO/S3/tests/gtest_aws_s3_client.cpp b/src/IO/S3/tests/gtest_aws_s3_client.cpp index 33917314bca..556b385e414 100644 --- a/src/IO/S3/tests/gtest_aws_s3_client.cpp +++ b/src/IO/S3/tests/gtest_aws_s3_client.cpp @@ -8,7 +8,7 @@ #include -#include +#include #include @@ -45,7 +45,7 @@ String getSSEAndSignedHeaders(const Poco::Net::MessageHeader & message_header) String content; for (const auto & [header_name, header_value] : message_header) { - if (boost::algorithm::starts_with(header_name, "x-amz-server-side-encryption")) + if (header_name.starts_with("x-amz-server-side-encryption")) { content += header_name + ": " + header_value + "\n"; } @@ -55,7 +55,7 @@ String getSSEAndSignedHeaders(const Poco::Net::MessageHeader & message_header) boost::split(parts, header_value, [](char c){ return c == ' '; }); for (const auto & part : parts) { - if (boost::algorithm::starts_with(part, "SignedHeaders=")) + if (part.starts_with("SignedHeaders=")) content += header_name + ": ... " + part + " ...\n"; } } diff --git a/src/IO/S3Common.cpp b/src/IO/S3Common.cpp index 5039059f522..56e3e0df21b 100644 --- a/src/IO/S3Common.cpp +++ b/src/IO/S3Common.cpp @@ -1,7 +1,9 @@ #include #include +#include #include + #include "config.h" #if USE_AWS_S3 @@ -124,6 +126,15 @@ AuthSettings AuthSettings::loadFromConfig(const std::string & config_elem, const HTTPHeaderEntries headers = getHTTPHeaders(config_elem, config); ServerSideEncryptionKMSConfig sse_kms_config = getSSEKMSConfig(config_elem, config); + std::unordered_set users; + Poco::Util::AbstractConfiguration::Keys keys; + config.keys(config_elem, keys); + for (const auto & key : keys) + { + if (startsWith(key, "user")) + users.insert(config.getString(config_elem + "." + key)); + } + return AuthSettings { std::move(access_key_id), std::move(secret_access_key), std::move(session_token), @@ -134,10 +145,16 @@ AuthSettings AuthSettings::loadFromConfig(const std::string & config_elem, const use_environment_credentials, use_insecure_imds_request, expiration_window_seconds, - no_sign_request + no_sign_request, + std::move(users) }; } +bool AuthSettings::canBeUsedByUser(const String & user) const +{ + return users.empty() || users.contains(user); +} + bool AuthSettings::hasUpdates(const AuthSettings & other) const { AuthSettings copy = *this; @@ -173,6 +190,8 @@ void AuthSettings::updateFrom(const AuthSettings & from) if (from.no_sign_request.has_value()) no_sign_request = from.no_sign_request; + + users.insert(from.users.begin(), from.users.end()); } } diff --git a/src/IO/S3Common.h b/src/IO/S3Common.h index 6ee8d96ed09..b3e01bd6132 100644 --- a/src/IO/S3Common.h +++ b/src/IO/S3Common.h @@ -6,6 +6,7 @@ #include #include +#include #include "config.h" @@ -92,9 +93,13 @@ struct AuthSettings std::optional expiration_window_seconds; std::optional no_sign_request; + std::unordered_set users; + bool hasUpdates(const AuthSettings & other) const; void updateFrom(const AuthSettings & from); + bool canBeUsedByUser(const String & user) const; + private: bool operator==(const AuthSettings & other) const = default; }; diff --git a/src/IO/SeekableReadBuffer.h b/src/IO/SeekableReadBuffer.h index c002d30e633..798833e1a9b 100644 --- a/src/IO/SeekableReadBuffer.h +++ b/src/IO/SeekableReadBuffer.h @@ -82,7 +82,7 @@ public: /// (e.g. next() or supportsReadAt()). /// * Performance: there's no buffering. Each readBigAt() call typically translates into actual /// IO operation (e.g. HTTP request). Don't use it for small adjacent reads. - virtual size_t readBigAt(char * /*to*/, size_t /*n*/, size_t /*offset*/, const std::function & /*progress_callback*/ = nullptr) + virtual size_t readBigAt(char * /*to*/, size_t /*n*/, size_t /*offset*/, const std::function & /*progress_callback*/ = nullptr) const { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method readBigAt() not implemented"); } /// Checks if readBigAt() is allowed. May be slow, may throw (e.g. it may do an HTTP request or an fstat). diff --git a/src/IO/S3/SessionAwareIOStream.h b/src/IO/SessionAwareIOStream.h similarity index 97% rename from src/IO/S3/SessionAwareIOStream.h rename to src/IO/SessionAwareIOStream.h index babe52545d1..2380bd0fd60 100644 --- a/src/IO/S3/SessionAwareIOStream.h +++ b/src/IO/SessionAwareIOStream.h @@ -3,7 +3,7 @@ #include -namespace DB::S3 +namespace DB { /** * Wrapper of IOStream to store response stream and corresponding HTTP session. diff --git a/src/IO/VarInt.h b/src/IO/VarInt.h index 8d10055a3df..12a86ac55f6 100644 --- a/src/IO/VarInt.h +++ b/src/IO/VarInt.h @@ -92,7 +92,7 @@ inline void readVarUInt(UInt64 & x, ReadBuffer & istr) if (istr.eof()) [[unlikely]] throwReadAfterEOF(); - UInt64 byte = *istr.position(); + UInt64 byte = static_cast(*istr.position()); ++istr.position(); x |= (byte & 0x7F) << (7 * i); @@ -133,7 +133,7 @@ inline const char * readVarUInt(UInt64 & x, const char * istr, size_t size) if (istr == end) [[unlikely]] throwReadAfterEOF(); - UInt64 byte = *istr; + UInt64 byte = static_cast(*istr); ++istr; x |= (byte & 0x7F) << (7 * i); diff --git a/src/IO/WriteBuffer.h b/src/IO/WriteBuffer.h index 67dbb9b2e7a..1ceb938e454 100644 --- a/src/IO/WriteBuffer.h +++ b/src/IO/WriteBuffer.h @@ -172,12 +172,12 @@ public: WriteBufferFromPointer(Position ptr, size_t size) : WriteBuffer(ptr, size) {} private: - virtual void finalizeImpl() override + void finalizeImpl() override { /// no op } - virtual void sync() override + void sync() override { /// no on } diff --git a/src/IO/WriteBufferFromHTTP.cpp b/src/IO/WriteBufferFromHTTP.cpp index 8ddcbc03b84..d54e1685017 100644 --- a/src/IO/WriteBufferFromHTTP.cpp +++ b/src/IO/WriteBufferFromHTTP.cpp @@ -7,6 +7,7 @@ namespace DB { WriteBufferFromHTTP::WriteBufferFromHTTP( + const HTTPConnectionGroupType & connection_group, const Poco::URI & uri, const std::string & method, const std::string & content_type, @@ -14,9 +15,10 @@ WriteBufferFromHTTP::WriteBufferFromHTTP( const HTTPHeaderEntries & additional_headers, const ConnectionTimeouts & timeouts, size_t buffer_size_, - ProxyConfiguration proxy_configuration) + ProxyConfiguration proxy_configuration +) : WriteBufferFromOStream(buffer_size_) - , session{makeHTTPSession(uri, timeouts, proxy_configuration)} + , session{makeHTTPSession(connection_group, uri, timeouts, proxy_configuration)} , request{method, uri.getPathAndQuery(), Poco::Net::HTTPRequest::HTTP_1_1} { request.setHost(uri.getHost()); diff --git a/src/IO/WriteBufferFromHTTP.h b/src/IO/WriteBufferFromHTTP.h index f1e1e2a9e91..09fd55ec290 100644 --- a/src/IO/WriteBufferFromHTTP.h +++ b/src/IO/WriteBufferFromHTTP.h @@ -19,7 +19,8 @@ namespace DB class WriteBufferFromHTTP : public WriteBufferFromOStream { public: - explicit WriteBufferFromHTTP(const Poco::URI & uri, + explicit WriteBufferFromHTTP(const HTTPConnectionGroupType & connection_group, + const Poco::URI & uri, const std::string & method = Poco::Net::HTTPRequest::HTTP_POST, // POST or PUT only const std::string & content_type = "", const std::string & content_encoding = "", diff --git a/src/IO/WriteBufferFromS3.h b/src/IO/WriteBufferFromS3.h index 230f39b074e..5dc269990a1 100644 --- a/src/IO/WriteBufferFromS3.h +++ b/src/IO/WriteBufferFromS3.h @@ -10,7 +10,7 @@ #include #include #include -#include +#include #include #include diff --git a/src/IO/WriteHelpers.cpp b/src/IO/WriteHelpers.cpp index 9dddcd4b60f..88c706a590f 100644 --- a/src/IO/WriteHelpers.cpp +++ b/src/IO/WriteHelpers.cpp @@ -91,7 +91,8 @@ static inline void writeProbablyQuotedStringImpl(StringRef s, WriteBuffer & buf, if (isValidIdentifier(s.toView()) /// This are valid identifiers but are problematic if present unquoted in SQL query. && !(s.size == strlen("distinct") && 0 == strncasecmp(s.data, "distinct", strlen("distinct"))) - && !(s.size == strlen("all") && 0 == strncasecmp(s.data, "all", strlen("all")))) + && !(s.size == strlen("all") && 0 == strncasecmp(s.data, "all", strlen("all"))) + && !(s.size == strlen("table") && 0 == strncasecmp(s.data, "table", strlen("table")))) { writeString(s, buf); } diff --git a/src/IO/WriteHelpers.h b/src/IO/WriteHelpers.h index f438990fd1c..8b743e6351b 100644 --- a/src/IO/WriteHelpers.h +++ b/src/IO/WriteHelpers.h @@ -39,15 +39,11 @@ #include #include -#ifdef __clang__ #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wunused-parameter" #pragma clang diagnostic ignored "-Wsign-compare" -#endif #include -#ifdef __clang__ #pragma clang diagnostic pop -#endif #include diff --git a/src/IO/WriteSettings.h b/src/IO/WriteSettings.h index fcadf34f021..7d36677b468 100644 --- a/src/IO/WriteSettings.h +++ b/src/IO/WriteSettings.h @@ -20,6 +20,7 @@ struct WriteSettings bool enable_filesystem_cache_on_write_operations = false; bool enable_filesystem_cache_log = false; bool throw_on_error_from_cache = false; + size_t filesystem_cache_reserve_space_wait_lock_timeout_milliseconds = 1000; bool s3_allow_parallel_part_upload = true; diff --git a/src/IO/copyData.cpp b/src/IO/copyData.cpp index 07222a930b5..d2c7200c350 100644 --- a/src/IO/copyData.cpp +++ b/src/IO/copyData.cpp @@ -35,7 +35,7 @@ void copyDataImpl(ReadBuffer & from, WriteBuffer & to, bool check_bytes, size_t } if (check_bytes && bytes > 0) - throw Exception(ErrorCodes::ATTEMPT_TO_READ_AFTER_EOF, "Attempt to read after EOF."); + throw Exception(ErrorCodes::ATTEMPT_TO_READ_AFTER_EOF, "Attempt to read after EOF, left to copy {} bytes.", bytes); } void copyDataImpl(ReadBuffer & from, WriteBuffer & to, bool check_bytes, size_t bytes, std::function cancellation_hook, ThrottlerPtr throttler) diff --git a/src/IO/parseDateTimeBestEffort.cpp b/src/IO/parseDateTimeBestEffort.cpp index 9734ba1c84f..caf51d94bb3 100644 --- a/src/IO/parseDateTimeBestEffort.cpp +++ b/src/IO/parseDateTimeBestEffort.cpp @@ -582,11 +582,18 @@ ReturnType parseDateTimeBestEffortImpl( day_of_month = 1; if (!month) month = 1; + if (!year) { + /// If year is not specified, it will be the current year if the date is unknown or not greater than today, + /// otherwise it will be the previous year. + /// This convoluted logic is needed to parse the syslog format, which looks as follows: "Mar 3 01:33:48". + /// If you have questions, ask Victor Krasnov, https://www.linkedin.com/in/vickr/ + time_t now = time(nullptr); - UInt16 curr_year = local_time_zone.toYear(now); - year = now < local_time_zone.makeDateTime(curr_year, month, day_of_month, hour, minute, second) ? curr_year - 1 : curr_year; + auto today = local_time_zone.toDayNum(now); + UInt16 curr_year = local_time_zone.toYear(today); + year = local_time_zone.makeDayNum(curr_year, month, day_of_month) <= today ? curr_year : curr_year - 1; } auto is_leap_year = (year % 400 == 0) || (year % 100 != 0 && year % 4 == 0); diff --git a/src/IO/readFloatText.cpp b/src/IO/readFloatText.cpp index d1143f7c62c..17ccc1b25b7 100644 --- a/src/IO/readFloatText.cpp +++ b/src/IO/readFloatText.cpp @@ -67,4 +67,7 @@ template void readFloatText(Float64 &, ReadBuffer &); template bool tryReadFloatText(Float32 &, ReadBuffer &); template bool tryReadFloatText(Float64 &, ReadBuffer &); +template bool tryReadFloatTextNoExponent(Float32 &, ReadBuffer &); +template bool tryReadFloatTextNoExponent(Float64 &, ReadBuffer &); + } diff --git a/src/IO/readFloatText.h b/src/IO/readFloatText.h index 23e904f305a..597f0a06fb9 100644 --- a/src/IO/readFloatText.h +++ b/src/IO/readFloatText.h @@ -6,14 +6,10 @@ #include #include -#ifdef __clang__ #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wunneeded-internal-declaration" -#endif #include -#ifdef __clang__ #pragma clang diagnostic pop -#endif /** Methods for reading floating point numbers from text with decimal representation. * There are "precise", "fast" and "simple" implementations. @@ -324,7 +320,7 @@ static inline void readUIntTextUpToNSignificantDigits(T & x, ReadBuffer & buf) } -template +template ReturnType readFloatTextFastImpl(T & x, ReadBuffer & in) { static_assert(std::is_same_v || std::is_same_v, "Argument for readFloatTextImpl must be float or double"); @@ -395,30 +391,33 @@ ReturnType readFloatTextFastImpl(T & x, ReadBuffer & in) after_point_exponent = (read_digits > significant_digits ? -significant_digits : static_cast(-read_digits)) - after_point_num_leading_zeros; } - if (checkChar('e', in) || checkChar('E', in)) + if constexpr (allow_exponent) { - if (in.eof()) + if (checkChar('e', in) || checkChar('E', in)) { - if constexpr (throw_exception) - throw Exception(ErrorCodes::CANNOT_PARSE_NUMBER, "Cannot read floating point value: nothing after exponent"); - else - return false; - } + if (in.eof()) + { + if constexpr (throw_exception) + throw Exception(ErrorCodes::CANNOT_PARSE_NUMBER, "Cannot read floating point value: nothing after exponent"); + else + return false; + } - bool exponent_negative = false; - if (*in.position() == '-') - { - exponent_negative = true; - ++in.position(); - } - else if (*in.position() == '+') - { - ++in.position(); - } + bool exponent_negative = false; + if (*in.position() == '-') + { + exponent_negative = true; + ++in.position(); + } + else if (*in.position() == '+') + { + ++in.position(); + } - readUIntTextUpToNSignificantDigits<4>(exponent, in); - if (exponent_negative) - exponent = -exponent; + readUIntTextUpToNSignificantDigits<4>(exponent, in); + if (exponent_negative) + exponent = -exponent; + } } if (after_point) @@ -604,4 +603,7 @@ template bool tryReadFloatTextSimple(T & x, ReadBuffer & in) { retu template void readFloatText(T & x, ReadBuffer & in) { readFloatTextFast(x, in); } template bool tryReadFloatText(T & x, ReadBuffer & in) { return tryReadFloatTextFast(x, in); } +/// Don't read exponent part of the number. +template bool tryReadFloatTextNoExponent(T & x, ReadBuffer & in) { return readFloatTextFastImpl(x, in); } + } diff --git a/src/IO/tests/gtest_archive_reader_and_writer.cpp b/src/IO/tests/gtest_archive_reader_and_writer.cpp index 37fbdff901a..898c7017e7d 100644 --- a/src/IO/tests/gtest_archive_reader_and_writer.cpp +++ b/src/IO/tests/gtest_archive_reader_and_writer.cpp @@ -1,26 +1,29 @@ #include #include "config.h" +#include +#include #include #include #include #include #include +#include #include #include #include +#include #include #include #include -#include #include -#include +#include namespace DB::ErrorCodes { - extern const int CANNOT_UNPACK_ARCHIVE; - extern const int LOGICAL_ERROR; +extern const int CANNOT_UNPACK_ARCHIVE; +extern const int LOGICAL_ERROR; } namespace fs = std::filesystem; @@ -49,7 +52,8 @@ bool createArchiveWithFiles(const std::string & archivename, const std::map @@ -114,11 +117,13 @@ TEST_P(ArchiveReaderAndWriterTest, EmptyArchive) EXPECT_FALSE(reader->fileExists("nofile.txt")); - expectException(ErrorCodes::CANNOT_UNPACK_ARCHIVE, "File 'nofile.txt' was not found in archive", - [&]{ reader->getFileInfo("nofile.txt"); }); + expectException( + ErrorCodes::CANNOT_UNPACK_ARCHIVE, "File 'nofile.txt' was not found in archive", [&] { reader->getFileInfo("nofile.txt"); }); - expectException(ErrorCodes::CANNOT_UNPACK_ARCHIVE, "File 'nofile.txt' was not found in archive", - [&]{ reader->readFile("nofile.txt", /*throw_on_not_found=*/true); }); + expectException( + ErrorCodes::CANNOT_UNPACK_ARCHIVE, + "File 'nofile.txt' was not found in archive", + [&] { reader->readFile("nofile.txt", /*throw_on_not_found=*/true); }); EXPECT_EQ(reader->firstFile(), nullptr); } @@ -182,11 +187,9 @@ TEST_P(ArchiveReaderAndWriterTest, SingleFileInArchive) auto enumerator = reader->firstFile(); ASSERT_NE(enumerator, nullptr); EXPECT_FALSE(enumerator->nextFile()); - expectException(ErrorCodes::CANNOT_UNPACK_ARCHIVE, "No current file", - [&]{ enumerator->getFileName(); }); + expectException(ErrorCodes::CANNOT_UNPACK_ARCHIVE, "No current file", [&] { enumerator->getFileName(); }); - expectException(ErrorCodes::CANNOT_UNPACK_ARCHIVE, "No current file", - [&] { reader->readFile(std::move(enumerator)); }); + expectException(ErrorCodes::CANNOT_UNPACK_ARCHIVE, "No current file", [&] { reader->readFile(std::move(enumerator)); }); } } @@ -217,6 +220,10 @@ TEST_P(ArchiveReaderAndWriterTest, TwoFilesInArchive) ASSERT_TRUE(reader->fileExists("a.txt")); ASSERT_TRUE(reader->fileExists("b/c.txt")); + // Get all files + auto files = reader->getAllFiles(); + EXPECT_EQ(files.size(), 2); + EXPECT_EQ(reader->getFileInfo("a.txt").uncompressed_size, a_contents.size()); EXPECT_EQ(reader->getFileInfo("b/c.txt").uncompressed_size, c_contents.size()); @@ -272,6 +279,10 @@ TEST_P(ArchiveReaderAndWriterTest, TwoFilesInArchive) enumerator = reader->nextFile(std::move(in)); EXPECT_EQ(enumerator, nullptr); } + + // Get all files one last time + files = reader->getAllFiles(); + EXPECT_EQ(files.size(), 2); } @@ -301,7 +312,8 @@ TEST_P(ArchiveReaderAndWriterTest, InMemory) ASSERT_FALSE(fs::exists(getPathToArchive())); /// Read the archive. - auto read_archive_func = [&]() -> std::unique_ptr { return std::make_unique(archive_in_memory); }; + auto read_archive_func + = [&]() -> std::unique_ptr { return std::make_unique(archive_in_memory); }; auto reader = createArchiveReader(getPathToArchive(), read_archive_func, archive_in_memory.size()); ASSERT_TRUE(reader->fileExists("a.txt")); @@ -334,16 +346,163 @@ TEST_P(ArchiveReaderAndWriterTest, InMemory) } +TEST_P(ArchiveReaderAndWriterTest, ManyFilesInMemory) +{ + String archive_in_memory; + int files = 1000; + size_t times = 1; + /// Make an archive. + { + auto writer = createArchiveWriter(getPathToArchive(), std::make_unique(archive_in_memory)); + { + for (int i = 0; i < files; i++) + { + auto filename = std::format("{}.txt", i); + auto contents = std::format("The contents of {}.txt", i); + auto out = writer->writeFile(filename, times * contents.size()); + for (int j = 0; j < times; j++) + writeString(contents, *out); + out->finalize(); + } + } + writer->finalize(); + } + + /// The created archive is really in memory. + ASSERT_FALSE(fs::exists(getPathToArchive())); + + /// Read the archive. + auto read_archive_func + = [&]() -> std::unique_ptr { return std::make_unique(archive_in_memory); }; + auto reader = createArchiveReader(getPathToArchive(), read_archive_func, archive_in_memory.size()); + + for (int i = 0; i < files; i++) + { + auto filename = std::format("{}.txt", i); + auto contents = std::format("The contents of {}.txt", i); + ASSERT_TRUE(reader->fileExists(filename)); + EXPECT_EQ(reader->getFileInfo(filename).uncompressed_size, times * contents.size()); + + { + auto in = reader->readFile(filename, /*throw_on_not_found=*/true); + for (int j = 0; j < times; j++) + ASSERT_TRUE(checkString(String(contents), *in)); + } + } +} + TEST_P(ArchiveReaderAndWriterTest, Password) { + auto writer = createArchiveWriter(getPathToArchive()); + //don't support passwords for tar archives + if (getPathToArchive().ends_with(".tar") || getPathToArchive().ends_with(".tar.gz") || getPathToArchive().ends_with(".tar.bz2") + || getPathToArchive().ends_with(".tar.lzma") || getPathToArchive().ends_with(".tar.zst") || getPathToArchive().ends_with(".tar.xz")) + { + expectException( + ErrorCodes::NOT_IMPLEMENTED, + "Setting a password is not currently supported for libarchive", + [&] { writer->setPassword("a.txt"); }); + writer->finalize(); + } + else + { + /// Make an archive. + std::string_view contents = "The contents of a.txt"; + { + writer->setPassword("Qwe123"); + { + auto out = writer->writeFile("a.txt"); + writeString(contents, *out); + out->finalize(); + } + writer->finalize(); + } + + /// Read the archive. + auto reader = createArchiveReader(getPathToArchive()); + + /// Try to read without a password. + expectException( + ErrorCodes::CANNOT_UNPACK_ARCHIVE, "Password is required", [&] { reader->readFile("a.txt", /*throw_on_not_found=*/true); }); + + { + /// Try to read with a wrong password. + reader->setPassword("123Qwe"); + expectException( + ErrorCodes::CANNOT_UNPACK_ARCHIVE, "Wrong password", [&] { reader->readFile("a.txt", /*throw_on_not_found=*/true); }); + } + + { + /// Reading with the right password is successful. + reader->setPassword("Qwe123"); + auto in = reader->readFile("a.txt", /*throw_on_not_found=*/true); + String str; + readStringUntilEOF(str, *in); + EXPECT_EQ(str, contents); + } + } +} + + +TEST_P(ArchiveReaderAndWriterTest, ArchiveNotExist) +{ + expectException(ErrorCodes::CANNOT_UNPACK_ARCHIVE, "Couldn't open", [&] { createArchiveReader(getPathToArchive()); }); +} + + +TEST_P(ArchiveReaderAndWriterTest, ManyFilesOnDisk) +{ + int files = 1000; + size_t times = 1; /// Make an archive. - std::string_view contents = "The contents of a.txt"; { auto writer = createArchiveWriter(getPathToArchive()); - writer->setPassword("Qwe123"); { - auto out = writer->writeFile("a.txt"); - writeString(contents, *out); + for (int i = 0; i < files; i++) + { + auto filename = std::format("{}.txt", i); + auto contents = std::format("The contents of {}.txt", i); + auto out = writer->writeFile(filename, times * contents.size()); + for (int j = 0; j < times; j++) + writeString(contents, *out); + out->finalize(); + } + } + writer->finalize(); + } + + /// The created archive is really in memory. + ASSERT_TRUE(fs::exists(getPathToArchive())); + + /// Read the archive. + auto reader = createArchiveReader(getPathToArchive()); + + for (int i = 0; i < files; i++) + { + auto filename = std::format("{}.txt", i); + auto contents = std::format("The contents of {}.txt", i); + ASSERT_TRUE(reader->fileExists(filename)); + EXPECT_EQ(reader->getFileInfo(filename).uncompressed_size, times * contents.size()); + + { + auto in = reader->readFile(filename, /*throw_on_not_found=*/true); + for (int j = 0; j < times; j++) + ASSERT_TRUE(checkString(String(contents), *in)); + } + } +} + +TEST_P(ArchiveReaderAndWriterTest, LargeFile) +{ + /// Make an archive. + std::string_view contents = "The contents of a.txt\n"; + int times = 10000000; + { + auto writer = createArchiveWriter(getPathToArchive()); + { + auto out = writer->writeFile("a.txt", times * contents.size()); + for (int i = 0; i < times; i++) + writeString(contents, *out); out->finalize(); } writer->finalize(); @@ -352,35 +511,31 @@ TEST_P(ArchiveReaderAndWriterTest, Password) /// Read the archive. auto reader = createArchiveReader(getPathToArchive()); - /// Try to read without a password. - expectException(ErrorCodes::CANNOT_UNPACK_ARCHIVE, "Password is required", - [&]{ reader->readFile("a.txt", /*throw_on_not_found=*/true); }); + ASSERT_TRUE(reader->fileExists("a.txt")); + + auto file_info = reader->getFileInfo("a.txt"); + EXPECT_EQ(file_info.uncompressed_size, contents.size() * times); + EXPECT_GT(file_info.compressed_size, 0); { - /// Try to read with a wrong password. - reader->setPassword("123Qwe"); - expectException(ErrorCodes::CANNOT_UNPACK_ARCHIVE, "Wrong password", - [&]{ reader->readFile("a.txt", /*throw_on_not_found=*/true); }); - } - - { - /// Reading with the right password is successful. - reader->setPassword("Qwe123"); auto in = reader->readFile("a.txt", /*throw_on_not_found=*/true); - String str; - readStringUntilEOF(str, *in); - EXPECT_EQ(str, contents); + for (int i = 0; i < times; i++) + ASSERT_TRUE(checkString(String(contents), *in)); + } + + { + /// Use an enumerator. + auto enumerator = reader->firstFile(); + ASSERT_NE(enumerator, nullptr); + EXPECT_EQ(enumerator->getFileName(), "a.txt"); + EXPECT_EQ(enumerator->getFileInfo().uncompressed_size, contents.size() * times); + EXPECT_GT(enumerator->getFileInfo().compressed_size, 0); + EXPECT_FALSE(enumerator->nextFile()); } } - -TEST_P(ArchiveReaderAndWriterTest, ArchiveNotExist) +TEST(TarArchiveReaderTest, FileExists) { - expectException(ErrorCodes::CANNOT_UNPACK_ARCHIVE, "Couldn't open", - [&]{ createArchiveReader(getPathToArchive()); }); -} - -TEST(TarArchiveReaderTest, FileExists) { String archive_path = "archive.tar"; String filename = "file.txt"; String contents = "test"; @@ -391,7 +546,8 @@ TEST(TarArchiveReaderTest, FileExists) { fs::remove(archive_path); } -TEST(TarArchiveReaderTest, ReadFile) { +TEST(TarArchiveReaderTest, ReadFile) +{ String archive_path = "archive.tar"; String filename = "file.txt"; String contents = "test"; @@ -405,7 +561,8 @@ TEST(TarArchiveReaderTest, ReadFile) { fs::remove(archive_path); } -TEST(TarArchiveReaderTest, ReadTwoFiles) { +TEST(TarArchiveReaderTest, ReadTwoFiles) +{ String archive_path = "archive.tar"; String file1 = "file1.txt"; String contents1 = "test1"; @@ -421,14 +578,15 @@ TEST(TarArchiveReaderTest, ReadTwoFiles) { readStringUntilEOF(str, *in); EXPECT_EQ(str, contents1); in = reader->readFile(file2, /*throw_on_not_found=*/true); - + readStringUntilEOF(str, *in); EXPECT_EQ(str, contents2); fs::remove(archive_path); } -TEST(TarArchiveReaderTest, CheckFileInfo) { +TEST(TarArchiveReaderTest, CheckFileInfo) +{ String archive_path = "archive.tar"; String filename = "file.txt"; String contents = "test"; @@ -441,7 +599,8 @@ TEST(TarArchiveReaderTest, CheckFileInfo) { fs::remove(archive_path); } -TEST(SevenZipArchiveReaderTest, FileExists) { +TEST(SevenZipArchiveReaderTest, FileExists) +{ String archive_path = "archive.7z"; String filename = "file.txt"; String contents = "test"; @@ -452,7 +611,8 @@ TEST(SevenZipArchiveReaderTest, FileExists) { fs::remove(archive_path); } -TEST(SevenZipArchiveReaderTest, ReadFile) { +TEST(SevenZipArchiveReaderTest, ReadFile) +{ String archive_path = "archive.7z"; String filename = "file.txt"; String contents = "test"; @@ -466,7 +626,8 @@ TEST(SevenZipArchiveReaderTest, ReadFile) { fs::remove(archive_path); } -TEST(SevenZipArchiveReaderTest, CheckFileInfo) { +TEST(SevenZipArchiveReaderTest, CheckFileInfo) +{ String archive_path = "archive.7z"; String filename = "file.txt"; String contents = "test"; @@ -479,7 +640,8 @@ TEST(SevenZipArchiveReaderTest, CheckFileInfo) { fs::remove(archive_path); } -TEST(SevenZipArchiveReaderTest, ReadTwoFiles) { +TEST(SevenZipArchiveReaderTest, ReadTwoFiles) +{ String archive_path = "archive.7z"; String file1 = "file1.txt"; String contents1 = "test1"; @@ -495,23 +657,28 @@ TEST(SevenZipArchiveReaderTest, ReadTwoFiles) { readStringUntilEOF(str, *in); EXPECT_EQ(str, contents1); in = reader->readFile(file2, /*throw_on_not_found=*/true); - + readStringUntilEOF(str, *in); EXPECT_EQ(str, contents2); fs::remove(archive_path); } -#if USE_MINIZIP - namespace { - const char * supported_archive_file_exts[] = - { - ".zip" - }; +const char * supported_archive_file_exts[] = { +#if USE_MINIZIP + ".zip", +#endif +#if USE_LIBARCHIVE + ".tar", + ".tar.gz", + ".tar.bz2", + ".tar.lzma", + ".tar.zst", + ".tar.xz", +#endif +}; } INSTANTIATE_TEST_SUITE_P(All, ArchiveReaderAndWriterTest, ::testing::ValuesIn(supported_archive_file_exts)); - -#endif diff --git a/src/Interpreters/ActionsDAG.cpp b/src/Interpreters/ActionsDAG.cpp index 7240679abb7..37ef217cb6d 100644 --- a/src/Interpreters/ActionsDAG.cpp +++ b/src/Interpreters/ActionsDAG.cpp @@ -64,6 +64,37 @@ std::pair getFunctionArguments(const ActionsDAG::N return { std::move(arguments), all_const }; } +bool isConstantFromScalarSubquery(const ActionsDAG::Node * node) +{ + std::stack stack; + stack.push(node); + while (!stack.empty()) + { + const auto * arg = stack.top(); + stack.pop(); + + if (arg->column && isColumnConst(*arg->column)) + continue; + + while (arg->type == ActionsDAG::ActionType::ALIAS) + arg = arg->children.at(0); + + if (arg->type != ActionsDAG::ActionType::FUNCTION) + return false; + + if (arg->function_base->getName() == "__scalarSubqueryResult") + continue; + + if (arg->children.empty() || !arg->function_base->isSuitableForConstantFolding()) + return false; + + for (const auto * child : arg->children) + stack.push(child); + } + + return true; +} + } void ActionsDAG::Node::toTree(JSONBuilder::JSONMap & map) const @@ -196,6 +227,19 @@ const ActionsDAG::Node & ActionsDAG::addFunction( { auto [arguments, all_const] = getFunctionArguments(children); + auto constant_args = function->getArgumentsThatAreAlwaysConstant(); + for (size_t pos : constant_args) + { + if (pos >= children.size()) + continue; + + if (arguments[pos].column && isColumnConst(*arguments[pos].column)) + continue; + + if (isConstantFromScalarSubquery(children[pos])) + arguments[pos].column = arguments[pos].type->createColumnConstWithDefaultValue(0); + } + auto function_base = function->build(arguments); return addFunctionImpl( function_base, @@ -1318,7 +1362,7 @@ ActionsDAGPtr ActionsDAG::makeConvertingActions( size_t num_result_columns = result.size(); if (mode == MatchColumnsMode::Position && num_input_columns != num_result_columns) - throw Exception(ErrorCodes::NUMBER_OF_COLUMNS_DOESNT_MATCH, "Number of columns doesn't match"); + throw Exception(ErrorCodes::NUMBER_OF_COLUMNS_DOESNT_MATCH, "Number of columns doesn't match (source: {} and result: {})", num_input_columns, num_result_columns); if (add_casted_columns && mode != MatchColumnsMode::Name) throw Exception(ErrorCodes::LOGICAL_ERROR, "Converting with add_casted_columns supported only for MatchColumnsMode::Name"); diff --git a/src/Interpreters/ActionsVisitor.cpp b/src/Interpreters/ActionsVisitor.cpp index 78e125146d4..f9a58c8c3ca 100644 --- a/src/Interpreters/ActionsVisitor.cpp +++ b/src/Interpreters/ActionsVisitor.cpp @@ -1322,7 +1322,12 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data & void ActionsMatcher::visit(const ASTLiteral & literal, const ASTPtr & /* ast */, Data & data) { - DataTypePtr type = applyVisitor(FieldToDataType(), literal.value); + DataTypePtr type; + if (data.getContext()->getSettingsRef().allow_experimental_variant_type && data.getContext()->getSettingsRef().use_variant_as_common_type) + type = applyVisitor(FieldToDataType(), literal.value); + else + type = applyVisitor(FieldToDataType(), literal.value); + const auto value = convertFieldToType(literal.value, *type); // FIXME why do we have a second pass with a clean sample block over the same diff --git a/src/Interpreters/AddDefaultDatabaseVisitor.h b/src/Interpreters/AddDefaultDatabaseVisitor.h index b977a73d461..e36f22e8ba1 100644 --- a/src/Interpreters/AddDefaultDatabaseVisitor.h +++ b/src/Interpreters/AddDefaultDatabaseVisitor.h @@ -275,13 +275,7 @@ private: if (only_replace_current_database_function) return; - for (ASTRenameQuery::Element & elem : node.elements) - { - if (!elem.from.database) - elem.from.database = std::make_shared(database_name); - if (!elem.to.database) - elem.to.database = std::make_shared(database_name); - } + node.setDatabaseIfNotExists(database_name); } void visitDDL(ASTAlterQuery & node, ASTPtr &) const diff --git a/src/Interpreters/AggregationCommon.h b/src/Interpreters/AggregationCommon.h index 722675f62ed..ced29c2f862 100644 --- a/src/Interpreters/AggregationCommon.h +++ b/src/Interpreters/AggregationCommon.h @@ -89,7 +89,7 @@ void fillFixedBatch(size_t keys_size, const ColumnRawPtrs & key_columns, const S /// Note: here we violate strict aliasing. /// It should be ok as log as we do not reffer to any value from `out` before filling. - const char * source = static_cast(column)->getRawDataBegin(); + const char * source = static_cast(column)->getRawDataBegin(); size_t offset_to = offset; if constexpr (std::endian::native == std::endian::big) offset_to = sizeof(Key) - sizeof(T) - offset; @@ -151,33 +151,33 @@ static inline T ALWAYS_INLINE packFixed( { case 1: { - memcpy(bytes + offset, static_cast(column)->getRawDataBegin<1>() + index, 1); + memcpy(bytes + offset, static_cast(column)->getRawDataBegin<1>() + index, 1); offset += 1; } break; case 2: if constexpr (sizeof(T) >= 2) /// To avoid warning about memcpy exceeding object size. { - memcpy(bytes + offset, static_cast(column)->getRawDataBegin<2>() + index * 2, 2); + memcpy(bytes + offset, static_cast(column)->getRawDataBegin<2>() + index * 2, 2); offset += 2; } break; case 4: if constexpr (sizeof(T) >= 4) { - memcpy(bytes + offset, static_cast(column)->getRawDataBegin<4>() + index * 4, 4); + memcpy(bytes + offset, static_cast(column)->getRawDataBegin<4>() + index * 4, 4); offset += 4; } break; case 8: if constexpr (sizeof(T) >= 8) { - memcpy(bytes + offset, static_cast(column)->getRawDataBegin<8>() + index * 8, 8); + memcpy(bytes + offset, static_cast(column)->getRawDataBegin<8>() + index * 8, 8); offset += 8; } break; default: - memcpy(bytes + offset, static_cast(column)->getRawDataBegin<1>() + index * key_sizes[j], key_sizes[j]); + memcpy(bytes + offset, static_cast(column)->getRawDataBegin<1>() + index * key_sizes[j], key_sizes[j]); offset += key_sizes[j]; } } @@ -227,23 +227,23 @@ static inline T ALWAYS_INLINE packFixed( switch (key_sizes[j]) { case 1: - memcpy(bytes + offset, static_cast(key_columns[j])->getRawDataBegin<1>() + i, 1); + memcpy(bytes + offset, static_cast(key_columns[j])->getRawDataBegin<1>() + i, 1); offset += 1; break; case 2: - memcpy(bytes + offset, static_cast(key_columns[j])->getRawDataBegin<2>() + i * 2, 2); + memcpy(bytes + offset, static_cast(key_columns[j])->getRawDataBegin<2>() + i * 2, 2); offset += 2; break; case 4: - memcpy(bytes + offset, static_cast(key_columns[j])->getRawDataBegin<4>() + i * 4, 4); + memcpy(bytes + offset, static_cast(key_columns[j])->getRawDataBegin<4>() + i * 4, 4); offset += 4; break; case 8: - memcpy(bytes + offset, static_cast(key_columns[j])->getRawDataBegin<8>() + i * 8, 8); + memcpy(bytes + offset, static_cast(key_columns[j])->getRawDataBegin<8>() + i * 8, 8); offset += 8; break; default: - memcpy(bytes + offset, static_cast(key_columns[j])->getRawDataBegin<1>() + i * key_sizes[j], key_sizes[j]); + memcpy(bytes + offset, static_cast(key_columns[j])->getRawDataBegin<1>() + i * key_sizes[j], key_sizes[j]); offset += key_sizes[j]; } } diff --git a/src/Interpreters/Aggregator.cpp b/src/Interpreters/Aggregator.cpp index 331cd991ea1..a9578b5540f 100644 --- a/src/Interpreters/Aggregator.cpp +++ b/src/Interpreters/Aggregator.cpp @@ -624,7 +624,7 @@ Aggregator::Aggregator(const Block & header_, const Params & params_) { size_t alignment_of_next_state = params.aggregates[i + 1].function->alignOfData(); if ((alignment_of_next_state & (alignment_of_next_state - 1)) != 0) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: alignOfData is not 2^N"); + throw Exception(ErrorCodes::LOGICAL_ERROR, "`alignOfData` is not 2^N"); /// Extend total_size to next alignment requirement /// Add padding by rounding up 'total_size_of_aggregate_states' to be a multiplier of alignment_of_next_state. @@ -774,6 +774,17 @@ AggregatedDataVariants::Type Aggregator::chooseAggregationMethod() } } + bool all_keys_are_numbers_or_strings = true; + for (size_t j = 0; j < params.keys_size; ++j) + { + if (!types_removed_nullable[j]->isValueRepresentedByNumber() && !isString(types_removed_nullable[j]) + && !isFixedString(types_removed_nullable[j])) + { + all_keys_are_numbers_or_strings = false; + break; + } + } + if (has_nullable_key) { /// Optimization for one key @@ -832,8 +843,11 @@ AggregatedDataVariants::Type Aggregator::chooseAggregationMethod() return AggregatedDataVariants::Type::low_cardinality_key_fixed_string; } + if (params.keys_size > 1 && all_keys_are_numbers_or_strings) + return AggregatedDataVariants::Type::nullable_prealloc_serialized; + /// Fallback case. - return AggregatedDataVariants::Type::serialized; + return AggregatedDataVariants::Type::nullable_serialized; } /// No key has been found to be nullable. @@ -857,7 +871,7 @@ AggregatedDataVariants::Type Aggregator::chooseAggregationMethod() return AggregatedDataVariants::Type::low_cardinality_keys128; if (size_of_field == 32) return AggregatedDataVariants::Type::low_cardinality_keys256; - throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: low cardinality numeric column has sizeOfField not in 1, 2, 4, 8, 16, 32."); + throw Exception(ErrorCodes::LOGICAL_ERROR, "LowCardinality numeric column has sizeOfField not in 1, 2, 4, 8, 16, 32."); } if (size_of_field == 1) @@ -872,7 +886,7 @@ AggregatedDataVariants::Type Aggregator::chooseAggregationMethod() return AggregatedDataVariants::Type::keys128; if (size_of_field == 32) return AggregatedDataVariants::Type::keys256; - throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: numeric column has sizeOfField not in 1, 2, 4, 8, 16, 32."); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Numeric column has sizeOfField not in 1, 2, 4, 8, 16, 32."); } if (params.keys_size == 1 && isFixedString(types_removed_nullable[0])) @@ -915,6 +929,9 @@ AggregatedDataVariants::Type Aggregator::chooseAggregationMethod() return AggregatedDataVariants::Type::key_string; } + if (params.keys_size > 1 && all_keys_are_numbers_or_strings) + return AggregatedDataVariants::Type::prealloc_serialized; + return AggregatedDataVariants::Type::serialized; } @@ -1094,6 +1111,7 @@ void NO_INLINE Aggregator::executeImpl( bool all_keys_are_const, AggregateDataPtr overflow_row) const { + bool use_compiled_functions = false; if (!no_more_keys) { /// Prefetching doesn't make sense for small hash tables, because they fit in caches entirely. @@ -1101,33 +1119,47 @@ void NO_INLINE Aggregator::executeImpl( && (method.data.getBufferSizeInBytes() > min_bytes_for_prefetch); #if USE_EMBEDDED_COMPILER - if (compiled_aggregate_functions_holder && !hasSparseArguments(aggregate_instructions)) - { - if (prefetch) - executeImplBatch( - method, state, aggregates_pool, row_begin, row_end, aggregate_instructions, all_keys_are_const, overflow_row); - else - executeImplBatch( - method, state, aggregates_pool, row_begin, row_end, aggregate_instructions, all_keys_are_const, overflow_row); - } - else + use_compiled_functions = compiled_aggregate_functions_holder && !hasSparseArguments(aggregate_instructions); #endif - { - if (prefetch) - executeImplBatch( - method, state, aggregates_pool, row_begin, row_end, aggregate_instructions, all_keys_are_const, overflow_row); - else - executeImplBatch( - method, state, aggregates_pool, row_begin, row_end, aggregate_instructions, all_keys_are_const, overflow_row); - } + if (prefetch) + executeImplBatch( + method, + state, + aggregates_pool, + row_begin, + row_end, + aggregate_instructions, + all_keys_are_const, + use_compiled_functions, + overflow_row); + else + executeImplBatch( + method, + state, + aggregates_pool, + row_begin, + row_end, + aggregate_instructions, + all_keys_are_const, + use_compiled_functions, + overflow_row); } else { - executeImplBatch(method, state, aggregates_pool, row_begin, row_end, aggregate_instructions, all_keys_are_const, overflow_row); + executeImplBatch( + method, + state, + aggregates_pool, + row_begin, + row_end, + aggregate_instructions, + all_keys_are_const, + use_compiled_functions, + overflow_row); } } -template +template void NO_INLINE Aggregator::executeImplBatch( Method & method, State & state, @@ -1136,6 +1168,7 @@ void NO_INLINE Aggregator::executeImplBatch( size_t row_end, AggregateFunctionInstruction * aggregate_instructions, bool all_keys_are_const, + bool use_compiled_functions [[maybe_unused]], AggregateDataPtr overflow_row) const { using KeyHolder = decltype(state.getKeyHolder(0, std::declval())); @@ -1267,7 +1300,7 @@ void NO_INLINE Aggregator::executeImplBatch( aggregate_data = aggregates_pool->alignedAlloc(total_size_of_aggregate_states, align_aggregate_states); #if USE_EMBEDDED_COMPILER - if constexpr (use_compiled_functions) + if (use_compiled_functions) { const auto & compiled_aggregate_functions = compiled_aggregate_functions_holder->compiled_aggregate_functions; compiled_aggregate_functions.create_aggregate_states_function(aggregate_data); @@ -1276,20 +1309,6 @@ void NO_INLINE Aggregator::executeImplBatch( static constexpr bool skip_compiled_aggregate_functions = true; createAggregateStates(aggregate_data); } - -#if defined(MEMORY_SANITIZER) - - /// We compile only functions that do not allocate some data in Arena. Only store necessary state in AggregateData place. - for (size_t aggregate_function_index = 0; aggregate_function_index < aggregate_functions.size(); ++aggregate_function_index) - { - if (!is_aggregate_function_compiled[aggregate_function_index]) - continue; - - auto aggregate_data_with_offset = aggregate_data + offsets_of_aggregate_states[aggregate_function_index]; - auto data_size = params.aggregates[aggregate_function_index].function->sizeOfData(); - __msan_unpoison(aggregate_data_with_offset, data_size); - } -#endif } else #endif @@ -1322,7 +1341,7 @@ void NO_INLINE Aggregator::executeImplBatch( } #if USE_EMBEDDED_COMPILER - if constexpr (use_compiled_functions) + if (use_compiled_functions) { std::vector columns_data; @@ -1355,9 +1374,8 @@ void NO_INLINE Aggregator::executeImplBatch( for (size_t i = 0; i < aggregate_functions.size(); ++i) { #if USE_EMBEDDED_COMPILER - if constexpr (use_compiled_functions) - if (is_aggregate_function_compiled[i]) - continue; + if (use_compiled_functions && is_aggregate_function_compiled[i]) + continue; #endif AggregateFunctionInstruction * inst = aggregate_instructions + i; @@ -1370,18 +1388,19 @@ void NO_INLINE Aggregator::executeImplBatch( } -template void NO_INLINE Aggregator::executeWithoutKeyImpl( AggregatedDataWithoutKey & res, - size_t row_begin, size_t row_end, + size_t row_begin, + size_t row_end, AggregateFunctionInstruction * aggregate_instructions, - Arena * arena) const + Arena * arena, + bool use_compiled_functions [[maybe_unused]]) const { if (row_begin == row_end) return; #if USE_EMBEDDED_COMPILER - if constexpr (use_compiled_functions) + if (use_compiled_functions) { std::vector columns_data; @@ -1401,20 +1420,6 @@ void NO_INLINE Aggregator::executeWithoutKeyImpl( auto add_into_aggregate_states_function_single_place = compiled_aggregate_functions_holder->compiled_aggregate_functions.add_into_aggregate_states_function_single_place; add_into_aggregate_states_function_single_place(row_begin, row_end, columns_data.data(), res); - -#if defined(MEMORY_SANITIZER) - - /// We compile only functions that do not allocate some data in Arena. Only store necessary state in AggregateData place. - for (size_t aggregate_function_index = 0; aggregate_function_index < aggregate_functions.size(); ++aggregate_function_index) - { - if (!is_aggregate_function_compiled[aggregate_function_index]) - continue; - - auto aggregate_data_with_offset = res + offsets_of_aggregate_states[aggregate_function_index]; - auto data_size = params.aggregates[aggregate_function_index].function->sizeOfData(); - __msan_unpoison(aggregate_data_with_offset, data_size); - } -#endif } #endif @@ -1422,13 +1427,10 @@ void NO_INLINE Aggregator::executeWithoutKeyImpl( for (size_t i = 0; i < aggregate_functions.size(); ++i) { AggregateFunctionInstruction * inst = aggregate_instructions + i; - #if USE_EMBEDDED_COMPILER - if constexpr (use_compiled_functions) - if (is_aggregate_function_compiled[i]) - continue; + if (use_compiled_functions && is_aggregate_function_compiled[i]) + continue; #endif - addBatchSinglePlace(row_begin, row_end, inst, res + inst->state_offset, arena); } } @@ -1687,16 +1689,14 @@ bool Aggregator::executeOnBlock(Columns columns, if (result.type == AggregatedDataVariants::Type::without_key) { /// TODO: Enable compilation after investigation -// #if USE_EMBEDDED_COMPILER -// if (compiled_aggregate_functions_holder) -// { -// executeWithoutKeyImpl(result.without_key, row_begin, row_end, aggregate_functions_instructions.data(), result.aggregates_pool); -// } -// else -// #endif - { - executeWithoutKeyImpl(result.without_key, row_begin, row_end, aggregate_functions_instructions.data(), result.aggregates_pool); - } + bool use_compiled_functions = false; + executeWithoutKeyImpl( + result.without_key, + row_begin, + row_end, + aggregate_functions_instructions.data(), + result.aggregates_pool, + use_compiled_functions); } else { @@ -1948,19 +1948,13 @@ Aggregator::convertToBlockImpl(Method & method, Table & data, Arena * arena, Are ConvertToBlockRes res; + bool use_compiled_functions = false; if (final) { #if USE_EMBEDDED_COMPILER - if (compiled_aggregate_functions_holder) - { - static constexpr bool use_compiled_functions = !Method::low_cardinality_optimization; - res = convertToBlockImplFinal(method, data, arena, aggregates_pools, rows); - } - else + use_compiled_functions = compiled_aggregate_functions_holder != nullptr && !Method::low_cardinality_optimization; #endif - { - res = convertToBlockImplFinal(method, data, arena, aggregates_pools, rows); - } + res = convertToBlockImplFinal(method, data, arena, aggregates_pools, use_compiled_functions, rows); } else { @@ -2042,8 +2036,12 @@ inline void Aggregator::insertAggregatesIntoColumns(Mapped & mapped, MutableColu } -template -Block Aggregator::insertResultsIntoColumns(PaddedPODArray & places, OutputBlockColumns && out_cols, Arena * arena, bool has_null_key_data [[maybe_unused]]) const +Block Aggregator::insertResultsIntoColumns( + PaddedPODArray & places, + OutputBlockColumns && out_cols, + Arena * arena, + bool has_null_key_data [[maybe_unused]], + bool use_compiled_functions [[maybe_unused]]) const { std::exception_ptr exception; size_t aggregate_functions_destroy_index = 0; @@ -2051,7 +2049,7 @@ Block Aggregator::insertResultsIntoColumns(PaddedPODArray & pl try { #if USE_EMBEDDED_COMPILER - if constexpr (use_compiled_functions) + if (use_compiled_functions) { /** For JIT compiled functions we need to resize columns before pass them into compiled code. * insert_aggregates_into_columns_function function does not throw exception. @@ -2081,14 +2079,13 @@ Block Aggregator::insertResultsIntoColumns(PaddedPODArray & pl for (; aggregate_functions_destroy_index < params.aggregates_size;) { - if constexpr (use_compiled_functions) +#if USE_EMBEDDED_COMPILER + if (use_compiled_functions && is_aggregate_function_compiled[aggregate_functions_destroy_index]) { - if (is_aggregate_function_compiled[aggregate_functions_destroy_index]) - { - ++aggregate_functions_destroy_index; - continue; - } + ++aggregate_functions_destroy_index; + continue; } +#endif auto & final_aggregate_column = out_cols.final_aggregate_columns[aggregate_functions_destroy_index]; size_t offset = offsets_of_aggregate_states[aggregate_functions_destroy_index]; @@ -2110,14 +2107,13 @@ Block Aggregator::insertResultsIntoColumns(PaddedPODArray & pl for (; aggregate_functions_destroy_index < params.aggregates_size; ++aggregate_functions_destroy_index) { - if constexpr (use_compiled_functions) +#if USE_EMBEDDED_COMPILER + if (use_compiled_functions && is_aggregate_function_compiled[aggregate_functions_destroy_index]) { - if (is_aggregate_function_compiled[aggregate_functions_destroy_index]) - { - ++aggregate_functions_destroy_index; - continue; - } + ++aggregate_functions_destroy_index; + continue; } +#endif size_t offset = offsets_of_aggregate_states[aggregate_functions_destroy_index]; aggregate_functions[aggregate_functions_destroy_index]->destroyBatch(0, places.size(), places.data(), offset); @@ -2129,9 +2125,9 @@ Block Aggregator::insertResultsIntoColumns(PaddedPODArray & pl return finalizeBlock(params, getHeader(/* final */ true), std::move(out_cols), /* final */ true, places.size()); } -template -Aggregator::ConvertToBlockRes NO_INLINE -Aggregator::convertToBlockImplFinal(Method & method, Table & data, Arena * arena, Arenas & aggregates_pools, size_t) const +template +Aggregator::ConvertToBlockRes NO_INLINE Aggregator::convertToBlockImplFinal( + Method & method, Table & data, Arena * arena, Arenas & aggregates_pools, bool use_compiled_functions [[maybe_unused]], size_t) const { /// +1 for nullKeyData, if `data` doesn't have it - not a problem, just some memory for one excessive row will be preallocated const size_t max_block_size = (return_single_block ? data.size() : std::min(params.max_block_size, data.size())) + 1; @@ -2187,7 +2183,8 @@ Aggregator::convertToBlockImplFinal(Method & method, Table & data, Arena * arena { if (places.size() >= max_block_size) { - res.emplace_back(insertResultsIntoColumns(places, std::move(out_cols.value()), arena, has_null_key_data)); + res.emplace_back( + insertResultsIntoColumns(places, std::move(out_cols.value()), arena, has_null_key_data, use_compiled_functions)); places.clear(); out_cols.reset(); has_null_key_data = false; @@ -2197,12 +2194,13 @@ Aggregator::convertToBlockImplFinal(Method & method, Table & data, Arena * arena if constexpr (return_single_block) { - return insertResultsIntoColumns(places, std::move(out_cols.value()), arena, has_null_key_data); + return insertResultsIntoColumns(places, std::move(out_cols.value()), arena, has_null_key_data, use_compiled_functions); } else { if (out_cols.has_value()) - res.emplace_back(insertResultsIntoColumns(places, std::move(out_cols.value()), arena, has_null_key_data)); + res.emplace_back( + insertResultsIntoColumns(places, std::move(out_cols.value()), arena, has_null_key_data, use_compiled_functions)); return res; } } @@ -2592,8 +2590,9 @@ void NO_INLINE Aggregator::mergeDataNullKey( } } -template -void NO_INLINE Aggregator::mergeDataImpl(Table & table_dst, Table & table_src, Arena * arena) const +template +void NO_INLINE +Aggregator::mergeDataImpl(Table & table_dst, Table & table_src, Arena * arena, bool use_compiled_functions [[maybe_unused]]) const { if constexpr (Method::low_cardinality_optimization || Method::one_key_nullable_optimization) mergeDataNullKey(table_dst, table_src, arena); @@ -2620,7 +2619,7 @@ void NO_INLINE Aggregator::mergeDataImpl(Table & table_dst, Table & table_src, A table_src.clearAndShrink(); #if USE_EMBEDDED_COMPILER - if constexpr (use_compiled_functions) + if (use_compiled_functions) { const auto & compiled_functions = compiled_aggregate_functions_holder->compiled_aggregate_functions; compiled_functions.merge_aggregate_states_function(dst_places.data(), src_places.data(), dst_places.size()); @@ -2770,26 +2769,16 @@ void NO_INLINE Aggregator::mergeSingleLevelDataImpl( if (!no_more_keys) { + bool use_compiled_functions = false; #if USE_EMBEDDED_COMPILER - if (compiled_aggregate_functions_holder) - { - if (prefetch) - mergeDataImpl( - getDataVariant(*res).data, getDataVariant(current).data, res->aggregates_pool); - else - mergeDataImpl( - getDataVariant(*res).data, getDataVariant(current).data, res->aggregates_pool); - } - else + use_compiled_functions = compiled_aggregate_functions_holder != nullptr; #endif - { - if (prefetch) - mergeDataImpl( - getDataVariant(*res).data, getDataVariant(current).data, res->aggregates_pool); - else - mergeDataImpl( - getDataVariant(*res).data, getDataVariant(current).data, res->aggregates_pool); - } + if (prefetch) + mergeDataImpl( + getDataVariant(*res).data, getDataVariant(current).data, res->aggregates_pool, use_compiled_functions); + else + mergeDataImpl( + getDataVariant(*res).data, getDataVariant(current).data, res->aggregates_pool, use_compiled_functions); } else if (res->without_key) { @@ -2834,26 +2823,22 @@ void NO_INLINE Aggregator::mergeBucketImpl( return; AggregatedDataVariants & current = *data[result_num]; + bool use_compiled_functions = false; #if USE_EMBEDDED_COMPILER - if (compiled_aggregate_functions_holder) - { - if (prefetch) - mergeDataImpl( - getDataVariant(*res).data.impls[bucket], getDataVariant(current).data.impls[bucket], arena); - else - mergeDataImpl( - getDataVariant(*res).data.impls[bucket], getDataVariant(current).data.impls[bucket], arena); - } - else + use_compiled_functions = compiled_aggregate_functions_holder != nullptr; #endif - { - if (prefetch) - mergeDataImpl( - getDataVariant(*res).data.impls[bucket], getDataVariant(current).data.impls[bucket], arena); - else - mergeDataImpl( - getDataVariant(*res).data.impls[bucket], getDataVariant(current).data.impls[bucket], arena); - } + if (prefetch) + mergeDataImpl( + getDataVariant(*res).data.impls[bucket], + getDataVariant(current).data.impls[bucket], + arena, + use_compiled_functions); + else + mergeDataImpl( + getDataVariant(*res).data.impls[bucket], + getDataVariant(current).data.impls[bucket], + arena, + use_compiled_functions); } } @@ -2921,11 +2906,12 @@ ManyAggregatedDataVariants Aggregator::prepareVariantsToMerge(ManyAggregatedData return non_empty_data; } -template +template void NO_INLINE Aggregator::mergeStreamsImplCase( Arena * aggregates_pool, State & state, Table & data, + bool no_more_keys, AggregateDataPtr overflow_row, size_t row_begin, size_t row_end, @@ -2937,36 +2923,34 @@ void NO_INLINE Aggregator::mergeStreamsImplCase( if (!arena_for_keys) arena_for_keys = aggregates_pool; - for (size_t i = row_begin; i < row_end; ++i) + if (no_more_keys) { - AggregateDataPtr aggregate_data = nullptr; - - if constexpr (!no_more_keys) + for (size_t i = row_begin; i < row_end; i++) { - auto emplace_result = state.emplaceKey(data, i, *arena_for_keys); // NOLINT - if (emplace_result.isInserted()) + auto find_result = state.findKey(data, i, *arena_for_keys); + /// aggregate_date == nullptr means that the new key did not fit in the hash table because of no_more_keys. + AggregateDataPtr value = find_result.isFound() ? find_result.getMapped() : overflow_row; + places[i] = value; + } + } + else + { + for (size_t i = row_begin; i < row_end; i++) + { + auto emplace_result = state.emplaceKey(data, i, *arena_for_keys); + if (!emplace_result.isInserted()) + places[i] = emplace_result.getMapped(); + else { emplace_result.setMapped(nullptr); - aggregate_data = aggregates_pool->alignedAlloc(total_size_of_aggregate_states, align_aggregate_states); + AggregateDataPtr aggregate_data = aggregates_pool->alignedAlloc(total_size_of_aggregate_states, align_aggregate_states); createAggregateStates(aggregate_data); emplace_result.setMapped(aggregate_data); + places[i] = aggregate_data; } - else - aggregate_data = emplace_result.getMapped(); } - else - { - auto find_result = state.findKey(data, i, *arena_for_keys); - if (find_result.isFound()) - aggregate_data = find_result.getMapped(); - } - - /// aggregate_date == nullptr means that the new key did not fit in the hash table because of no_more_keys. - - AggregateDataPtr value = aggregate_data ? aggregate_data : overflow_row; - places[i] = value; } for (size_t j = 0; j < params.aggregates_size; ++j) @@ -3020,22 +3004,16 @@ void NO_INLINE Aggregator::mergeStreamsImpl( if (use_cache) { typename Method::State state(key_columns, key_sizes, aggregation_state_cache); - - if (!no_more_keys) - mergeStreamsImplCase(aggregates_pool, state, data, overflow_row, row_begin, row_end, aggregate_columns_data, arena_for_keys); - else - mergeStreamsImplCase(aggregates_pool, state, data, overflow_row, row_begin, row_end, aggregate_columns_data, arena_for_keys); + mergeStreamsImplCase( + aggregates_pool, state, data, no_more_keys, overflow_row, row_begin, row_end, aggregate_columns_data, arena_for_keys); consecutive_keys_cache_stats.update(row_end - row_begin, state.getCacheMissesSinceLastReset()); } else { typename Method::StateNoCache state(key_columns, key_sizes, aggregation_state_cache); - - if (!no_more_keys) - mergeStreamsImplCase(aggregates_pool, state, data, overflow_row, row_begin, row_end, aggregate_columns_data, arena_for_keys); - else - mergeStreamsImplCase(aggregates_pool, state, data, overflow_row, row_begin, row_end, aggregate_columns_data, arena_for_keys); + mergeStreamsImplCase( + aggregates_pool, state, data, no_more_keys, overflow_row, row_begin, row_end, aggregate_columns_data, arena_for_keys); } } @@ -3308,12 +3286,15 @@ Block Aggregator::mergeBlocks(BlocksList & blocks, bool final) auto merge_method = method_chosen; #define APPLY_FOR_VARIANTS_THAT_MAY_USE_BETTER_HASH_FUNCTION(M) \ - M(key64) \ - M(key_string) \ - M(key_fixed_string) \ - M(keys128) \ - M(keys256) \ - M(serialized) \ + M(key64) \ + M(key_string) \ + M(key_fixed_string) \ + M(keys128) \ + M(keys256) \ + M(serialized) \ + M(nullable_serialized) \ + M(prealloc_serialized) \ + M(nullable_prealloc_serialized) \ #define M(NAME) \ if (merge_method == AggregatedDataVariants::Type::NAME) \ diff --git a/src/Interpreters/Aggregator.h b/src/Interpreters/Aggregator.h index 109bd0dd939..6c357623003 100644 --- a/src/Interpreters/Aggregator.h +++ b/src/Interpreters/Aggregator.h @@ -227,17 +227,17 @@ struct AggregationMethodOneNumber // Insert the key from the hash table into columns. static void insertKeyIntoColumns(const Key & key, std::vector & key_columns, const Sizes & /*key_sizes*/) { - ColumnVectorHelper * column; + ColumnFixedSizeHelper * column; if constexpr (nullable) { ColumnNullable & nullable_col = assert_cast(*key_columns[0]); ColumnUInt8 * null_map = assert_cast(&nullable_col.getNullMapColumn()); null_map->insertDefault(); - column = static_cast(&nullable_col.getNestedColumn()); + column = static_cast(&nullable_col.getNestedColumn()); } else { - column = static_cast(key_columns[0]); + column = static_cast(key_columns[0]); } static_assert(sizeof(FieldType) <= sizeof(Key)); const auto * key_holder = reinterpret_cast(&key); @@ -561,7 +561,7 @@ struct AggregationMethodKeysFixed * That is, for example, for strings, it contains first the serialized length of the string, and then the bytes. * Therefore, when aggregating by several strings, there is no ambiguity. */ -template +template struct AggregationMethodSerialized { using Data = TData; @@ -580,7 +580,7 @@ struct AggregationMethodSerialized } template - using StateImpl = ColumnsHashing::HashMethodSerialized; + using StateImpl = ColumnsHashing::HashMethodSerialized; using State = StateImpl; using StateNoCache = StateImpl; @@ -598,6 +598,14 @@ struct AggregationMethodSerialized } }; +template +using AggregationMethodNullableSerialized = AggregationMethodSerialized; + +template +using AggregationMethodPreallocSerialized = AggregationMethodSerialized; + +template +using AggregationMethodNullablePreallocSerialized = AggregationMethodSerialized; class Aggregator; @@ -655,7 +663,10 @@ struct AggregatedDataVariants : private boost::noncopyable std::unique_ptr> keys64; std::unique_ptr> keys128; std::unique_ptr> keys256; - std::unique_ptr> serialized; + std::unique_ptr> serialized; + std::unique_ptr> nullable_serialized; + std::unique_ptr> prealloc_serialized; + std::unique_ptr> nullable_prealloc_serialized; std::unique_ptr> key32_two_level; std::unique_ptr> key64_two_level; @@ -665,14 +676,20 @@ struct AggregatedDataVariants : private boost::noncopyable std::unique_ptr> keys64_two_level; std::unique_ptr> keys128_two_level; std::unique_ptr> keys256_two_level; - std::unique_ptr> serialized_two_level; + std::unique_ptr> serialized_two_level; + std::unique_ptr> nullable_serialized_two_level; + std::unique_ptr> prealloc_serialized_two_level; + std::unique_ptr> nullable_prealloc_serialized_two_level; std::unique_ptr> key64_hash64; std::unique_ptr> key_string_hash64; std::unique_ptr> key_fixed_string_hash64; std::unique_ptr> keys128_hash64; std::unique_ptr> keys256_hash64; - std::unique_ptr> serialized_hash64; + std::unique_ptr> serialized_hash64; + std::unique_ptr> nullable_serialized_hash64; + std::unique_ptr> prealloc_serialized_hash64; + std::unique_ptr> nullable_prealloc_serialized_hash64; /// Support for nullable keys. std::unique_ptr> nullable_key8; @@ -723,7 +740,10 @@ struct AggregatedDataVariants : private boost::noncopyable M(keys64, false) \ M(keys128, false) \ M(keys256, false) \ - M(serialized, false) \ + M(serialized, false) \ + M(nullable_serialized, false) \ + M(prealloc_serialized, false) \ + M(nullable_prealloc_serialized, false) \ M(key32_two_level, true) \ M(key64_two_level, true) \ M(key_string_two_level, true) \ @@ -732,13 +752,19 @@ struct AggregatedDataVariants : private boost::noncopyable M(keys64_two_level, true) \ M(keys128_two_level, true) \ M(keys256_two_level, true) \ - M(serialized_two_level, true) \ + M(serialized_two_level, true) \ + M(nullable_serialized_two_level, true) \ + M(prealloc_serialized_two_level, true) \ + M(nullable_prealloc_serialized_two_level, true) \ M(key64_hash64, false) \ M(key_string_hash64, false) \ M(key_fixed_string_hash64, false) \ M(keys128_hash64, false) \ M(keys256_hash64, false) \ - M(serialized_hash64, false) \ + M(serialized_hash64, false) \ + M(nullable_serialized_hash64, false) \ + M(prealloc_serialized_hash64, false) \ + M(nullable_prealloc_serialized_hash64, false) \ M(nullable_key8, false) \ M(nullable_key16, false) \ M(nullable_key32, false) \ @@ -863,6 +889,9 @@ struct AggregatedDataVariants : private boost::noncopyable M(keys128) \ M(keys256) \ M(serialized) \ + M(nullable_serialized) \ + M(prealloc_serialized) \ + M(nullable_prealloc_serialized) \ M(nullable_key32) \ M(nullable_key64) \ M(nullable_key_string) \ @@ -889,6 +918,9 @@ struct AggregatedDataVariants : private boost::noncopyable M(keys128_hash64) \ M(keys256_hash64) \ M(serialized_hash64) \ + M(nullable_serialized_hash64) \ + M(prealloc_serialized_hash64) \ + M(nullable_prealloc_serialized_hash64) \ M(low_cardinality_key8) \ M(low_cardinality_key16) \ @@ -925,6 +957,9 @@ struct AggregatedDataVariants : private boost::noncopyable M(keys128_two_level) \ M(keys256_two_level) \ M(serialized_two_level) \ + M(nullable_serialized_two_level) \ + M(prealloc_serialized_two_level) \ + M(nullable_prealloc_serialized_two_level) \ M(nullable_key32_two_level) \ M(nullable_key64_two_level) \ M(nullable_key_string_two_level) \ @@ -1360,7 +1395,7 @@ private: AggregateDataPtr overflow_row) const; /// Specialization for a particular value no_more_keys. - template + template void executeImplBatch( Method & method, State & state, @@ -1369,16 +1404,17 @@ private: size_t row_end, AggregateFunctionInstruction * aggregate_instructions, bool all_keys_are_const, + bool use_compiled_functions, AggregateDataPtr overflow_row) const; /// For case when there are no keys (all aggregate into one row). - template void executeWithoutKeyImpl( AggregatedDataWithoutKey & res, size_t row_begin, size_t row_end, AggregateFunctionInstruction * aggregate_instructions, - Arena * arena) const; + Arena * arena, + bool use_compiled_functions) const; template void writeToTemporaryFileImpl( @@ -1394,8 +1430,8 @@ private: Arena * arena) const; /// Merge data from hash table `src` into `dst`. - template - void mergeDataImpl(Table & table_dst, Table & table_src, Arena * arena) const; + template + void mergeDataImpl(Table & table_dst, Table & table_src, Arena * arena, bool use_compiled_functions) const; /// Merge data from hash table `src` into `dst`, but only for keys that already exist in dst. In other cases, merge the data into `overflows`. template @@ -1432,12 +1468,16 @@ private: MutableColumns & final_aggregate_columns, Arena * arena) const; - template - Block insertResultsIntoColumns(PaddedPODArray & places, OutputBlockColumns && out_cols, Arena * arena, bool has_null_key_data) const; + Block insertResultsIntoColumns( + PaddedPODArray & places, + OutputBlockColumns && out_cols, + Arena * arena, + bool has_null_key_data, + bool use_compiled_functions) const; - template - ConvertToBlockRes - convertToBlockImplFinal(Method & method, Table & data, Arena * arena, Arenas & aggregates_pools, size_t rows) const; + template + ConvertToBlockRes convertToBlockImplFinal( + Method & method, Table & data, Arena * arena, Arenas & aggregates_pools, bool use_compiled_functions, size_t rows) const; template ConvertToBlockRes @@ -1473,11 +1513,12 @@ private: bool final, ThreadPool * thread_pool) const; - template + template void mergeStreamsImplCase( Arena * aggregates_pool, State & state, Table & data, + bool no_more_keys, AggregateDataPtr overflow_row, size_t row_begin, size_t row_end, diff --git a/src/Interpreters/ArrayJoinedColumnsVisitor.h b/src/Interpreters/ArrayJoinedColumnsVisitor.h index 3bbd6982213..f16751c4561 100644 --- a/src/Interpreters/ArrayJoinedColumnsVisitor.h +++ b/src/Interpreters/ArrayJoinedColumnsVisitor.h @@ -62,7 +62,7 @@ private: { auto [array_join_expression_list, _] = node.arrayJoinExpressionList(); if (!array_join_expression_list) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: no ARRAY JOIN"); + throw Exception(ErrorCodes::LOGICAL_ERROR, "No ARRAY JOIN"); std::vector out; out.reserve(array_join_expression_list->children.size()); diff --git a/src/Interpreters/AsynchronousInsertLog.cpp b/src/Interpreters/AsynchronousInsertLog.cpp index 5d851f6b47d..0fc39c77fb4 100644 --- a/src/Interpreters/AsynchronousInsertLog.cpp +++ b/src/Interpreters/AsynchronousInsertLog.cpp @@ -33,26 +33,26 @@ ColumnsDescription AsynchronousInsertLogElement::getColumnsDescription() }); return ColumnsDescription{ - {"hostname", std::make_shared(std::make_shared())}, - {"event_date", std::make_shared()}, - {"event_time", std::make_shared()}, - {"event_time_microseconds", std::make_shared(6)}, + {"hostname", std::make_shared(std::make_shared()), "Hostname of the server executing the query."}, + {"event_date", std::make_shared(), "The date when the async insert happened."}, + {"event_time", std::make_shared(), "The date and time when the async insert finished execution."}, + {"event_time_microseconds", std::make_shared(6), "The date and time when the async insert finished execution with microseconds precision."}, - {"query", std::make_shared()}, - {"database", std::make_shared(std::make_shared())}, - {"table", std::make_shared(std::make_shared())}, - {"format", std::make_shared(std::make_shared())}, - {"query_id", std::make_shared()}, - {"bytes", std::make_shared()}, - {"rows", std::make_shared()}, - {"exception", std::make_shared()}, - {"status", type_status}, - {"data_kind", type_data_kind}, + {"query", std::make_shared(), "Query string."}, + {"database", std::make_shared(std::make_shared()), "The name of the database the table is in."}, + {"table", std::make_shared(std::make_shared()), "Table name."}, + {"format", std::make_shared(std::make_shared()), "Format name."}, + {"query_id", std::make_shared(), "ID of the initial query."}, + {"bytes", std::make_shared(), "Number of inserted bytes."}, + {"rows", std::make_shared(), "Number of inserted rows."}, + {"exception", std::make_shared(), "Exception message."}, + {"status", type_status, "Status of the view. Values: 'Ok' = 1 — Successful insert, 'ParsingError' = 2 — Exception when parsing the data, 'FlushError' = 3 — Exception when flushing the data"}, + {"data_kind", type_data_kind, "The status of the data. Value: 'Parsed' and 'Preprocessed'."}, - {"flush_time", std::make_shared()}, - {"flush_time_microseconds", std::make_shared(6)}, - {"flush_query_id", std::make_shared()}, - {"timeout_milliseconds", std::make_shared()}, + {"flush_time", std::make_shared(), "The date and time when the flush happened."}, + {"flush_time_microseconds", std::make_shared(6), "The date and time when the flush happened with microseconds precision."}, + {"flush_query_id", std::make_shared(), "ID of the flush query."}, + {"timeout_milliseconds", std::make_shared(), "The adaptive timeout calculated for this entry."}, }; } diff --git a/src/Interpreters/AsynchronousInsertLog.h b/src/Interpreters/AsynchronousInsertLog.h index 70b56a273ad..b42c28ed41a 100644 --- a/src/Interpreters/AsynchronousInsertLog.h +++ b/src/Interpreters/AsynchronousInsertLog.h @@ -12,7 +12,7 @@ namespace DB struct AsynchronousInsertLogElement { - enum Status : Int8 + enum Status : int8_t { Ok = 0, ParsingError = 1, diff --git a/src/Interpreters/AsynchronousInsertQueue.cpp b/src/Interpreters/AsynchronousInsertQueue.cpp index 44cc58cec84..7d56dbabe3c 100644 --- a/src/Interpreters/AsynchronousInsertQueue.cpp +++ b/src/Interpreters/AsynchronousInsertQueue.cpp @@ -214,44 +214,57 @@ AsynchronousInsertQueue::AsynchronousInsertQueue(ContextPtr context_, size_t poo AsynchronousInsertQueue::~AsynchronousInsertQueue() { - LOG_TRACE(log, "Shutting down the asynchronous insertion queue"); - shutdown = true; - - for (size_t i = 0; i < pool_size; ++i) + try { - auto & shard = queue_shards[i]; + LOG_TRACE(log, "Shutting down the asynchronous insertion queue"); + shutdown = true; - shard.are_tasks_available.notify_one(); - assert(dump_by_first_update_threads[i].joinable()); - dump_by_first_update_threads[i].join(); - - if (flush_on_shutdown) - { - for (auto & [_, elem] : shard.queue) - scheduleDataProcessingJob(elem.key, std::move(elem.data), getContext(), i); - } - else + for (size_t i = 0; i < pool_size; ++i) { + auto & shard = queue_shards[i]; - for (auto & [_, elem] : shard.queue) - for (const auto & entry : elem.data->entries) - entry->finish(std::make_exception_ptr(Exception( - ErrorCodes::TIMEOUT_EXCEEDED, "Wait for async insert timeout exceeded)"))); + shard.are_tasks_available.notify_one(); + assert(dump_by_first_update_threads[i].joinable()); + dump_by_first_update_threads[i].join(); + + if (flush_on_shutdown) + { + for (auto & [_, elem] : shard.queue) + scheduleDataProcessingJob(elem.key, std::move(elem.data), getContext(), i); + } + else + { + for (auto & [_, elem] : shard.queue) + for (const auto & entry : elem.data->entries) + entry->finish( + std::make_exception_ptr(Exception(ErrorCodes::TIMEOUT_EXCEEDED, "Wait for async insert timeout exceeded)"))); + } } + + pool.wait(); + LOG_TRACE(log, "Asynchronous insertion queue finished"); + } + catch (...) + { + tryLogCurrentException(log); + pool.wait(); } - - pool.wait(); - LOG_TRACE(log, "Asynchronous insertion queue finished"); } void AsynchronousInsertQueue::scheduleDataProcessingJob( const InsertQuery & key, InsertDataPtr data, ContextPtr global_context, size_t shard_num) { + /// Intuitively it seems reasonable to process first inserted blocks first. + /// We add new chunks in the end of entries list, so they are automatically ordered by creation time + chassert(!data->entries.empty()); + const auto priority = Priority{data->entries.front()->create_time.time_since_epoch().count()}; + /// Wrap 'unique_ptr' with 'shared_ptr' to make this /// lambda copyable and allow to save it to the thread pool. pool.scheduleOrThrowOnError( [this, key, global_context, shard_num, my_data = std::make_shared(std::move(data))]() mutable - { processData(key, std::move(*my_data), std::move(global_context), flush_time_history_per_queue_shard[shard_num]); }); + { processData(key, std::move(*my_data), std::move(global_context), flush_time_history_per_queue_shard[shard_num]); }, + priority); } void AsynchronousInsertQueue::preprocessInsertQuery(const ASTPtr & query, const ContextPtr & query_context) @@ -261,7 +274,7 @@ void AsynchronousInsertQueue::preprocessInsertQuery(const ASTPtr & query, const InterpreterInsertQuery interpreter(query, query_context, query_context->getSettingsRef().insert_allow_materialized_columns); auto table = interpreter.getTable(insert_query); - auto sample_block = interpreter.getSampleBlock(insert_query, table, table->getInMemoryMetadataPtr()); + auto sample_block = interpreter.getSampleBlock(insert_query, table, table->getInMemoryMetadataPtr(), query_context); if (!FormatFactory::instance().isInputFormat(insert_query.format)) throw Exception(ErrorCodes::UNKNOWN_FORMAT, "Unknown input format {}", insert_query.format); @@ -375,6 +388,7 @@ AsynchronousInsertQueue::pushDataChunk(ASTPtr query, DataChunk chunk, ContextPtr assert(data); auto size_in_bytes = data->size_in_bytes; data->size_in_bytes += entry_data_size; + /// We rely on the fact that entries are being added to the list in order of creation time in `scheduleDataProcessingJob()` data->entries.emplace_back(entry); insert_future = entry->getFuture(); diff --git a/src/Interpreters/AsynchronousInsertQueue.h b/src/Interpreters/AsynchronousInsertQueue.h index 17140030766..f60b3d343fb 100644 --- a/src/Interpreters/AsynchronousInsertQueue.h +++ b/src/Interpreters/AsynchronousInsertQueue.h @@ -37,15 +37,15 @@ public: Status status; /// Future that allows to wait until the query is flushed. - std::future future; + std::future future{}; /// Read buffer that contains extracted /// from query data in case of too much data. - std::unique_ptr insert_data_buffer; + std::unique_ptr insert_data_buffer{}; /// Block that contains received by Native /// protocol data in case of too much data. - Block insert_block; + Block insert_block{}; }; enum class DataKind diff --git a/src/Interpreters/AsynchronousMetricLog.h b/src/Interpreters/AsynchronousMetricLog.h index 0be8dbc2ec8..739b2aa5b56 100644 --- a/src/Interpreters/AsynchronousMetricLog.h +++ b/src/Interpreters/AsynchronousMetricLog.h @@ -29,18 +29,6 @@ struct AsynchronousMetricLogElement static ColumnsDescription getColumnsDescription(); static NamesAndAliases getNamesAndAliases() { return {}; } void appendToBlock(MutableColumns & columns) const; - - /// Returns the list of columns as in CREATE TABLE statement or nullptr. - /// If it's not nullptr, this list of columns will be used to create the table. - /// Otherwise the list will be constructed from LogElement::getNamesAndTypes and LogElement::getNamesAndAliases. - static const char * getCustomColumnList() - { - return "hostname LowCardinality(String) CODEC(ZSTD(1)), " - "event_date Date CODEC(Delta(2), ZSTD(1)), " - "event_time DateTime CODEC(Delta(4), ZSTD(1)), " - "metric LowCardinality(String) CODEC(ZSTD(1)), " - "value Float64 CODEC(ZSTD(3))"; - } }; class AsynchronousMetricLog : public SystemLog diff --git a/src/Interpreters/BackupLog.cpp b/src/Interpreters/BackupLog.cpp index d34e982ffc3..af6c7cf6234 100644 --- a/src/Interpreters/BackupLog.cpp +++ b/src/Interpreters/BackupLog.cpp @@ -22,23 +22,24 @@ ColumnsDescription BackupLogElement::getColumnsDescription() { return ColumnsDescription { - {"hostname", std::make_shared(std::make_shared())}, - {"event_date", std::make_shared()}, - {"event_time_microseconds", std::make_shared(6)}, - {"id", std::make_shared()}, - {"name", std::make_shared()}, - {"base_backup_name", std::make_shared()}, - {"status", std::make_shared(getBackupStatusEnumValues())}, - {"error", std::make_shared()}, - {"start_time", std::make_shared()}, - {"end_time", std::make_shared()}, - {"num_files", std::make_shared()}, - {"total_size", std::make_shared()}, - {"num_entries", std::make_shared()}, - {"uncompressed_size", std::make_shared()}, - {"compressed_size", std::make_shared()}, - {"files_read", std::make_shared()}, - {"bytes_read", std::make_shared()}, + {"hostname", std::make_shared(std::make_shared()), "Hostname of the server executing the query."}, + {"event_date", std::make_shared(), "Date of the entry."}, + {"event_time_microseconds", std::make_shared(6), "Time of the entry with microseconds precision."}, + {"id", std::make_shared(), "Identifier of the backup or restore operation."}, + {"name", std::make_shared(), "Name of the backup storage (the contents of the FROM or TO clause)."}, + {"base_backup_name", std::make_shared(), "The name of base backup in case incremental one."}, + {"query_id", std::make_shared(), "The ID of a query associated with a backup operation."}, + {"status", std::make_shared(getBackupStatusEnumValues()), "Operation status."}, + {"error", std::make_shared(), "Error message of the failed operation (empty string for successful operations)."}, + {"start_time", std::make_shared(), "Start time of the operation."}, + {"end_time", std::make_shared(), "End time of the operation."}, + {"num_files", std::make_shared(), "Number of files stored in the backup."}, + {"total_size", std::make_shared(), "Total size of files stored in the backup."}, + {"num_entries", std::make_shared(), "Number of entries in the backup, i.e. the number of files inside the folder if the backup is stored as a folder, or the number of files inside the archive if the backup is stored as an archive. It is not the same as num_files if it's an incremental backup or if it contains empty files or duplicates. The following is always true: num_entries <= num_files."}, + {"uncompressed_size", std::make_shared(), "Uncompressed size of the backup."}, + {"compressed_size", std::make_shared(), "Compressed size of the backup. If the backup is not stored as an archive it equals to uncompressed_size."}, + {"files_read", std::make_shared(), "Number of files read during the restore operation."}, + {"bytes_read", std::make_shared(), "Total size of files read during the restore operation."}, }; } @@ -51,6 +52,7 @@ void BackupLogElement::appendToBlock(MutableColumns & columns) const columns[i++]->insert(info.id); columns[i++]->insert(info.name); columns[i++]->insert(info.base_backup_name); + columns[i++]->insert(info.query_id); columns[i++]->insert(static_cast(info.status)); columns[i++]->insert(info.error_message); columns[i++]->insert(static_cast(std::chrono::system_clock::to_time_t(info.start_time))); diff --git a/src/Interpreters/BackupLog.h b/src/Interpreters/BackupLog.h index 626bd55726d..ee6a4c214ee 100644 --- a/src/Interpreters/BackupLog.h +++ b/src/Interpreters/BackupLog.h @@ -15,7 +15,7 @@ namespace DB struct BackupLogElement { BackupLogElement() = default; - BackupLogElement(BackupOperationInfo info_); + explicit BackupLogElement(BackupOperationInfo info_); BackupLogElement(const BackupLogElement &) = default; BackupLogElement & operator=(const BackupLogElement &) = default; BackupLogElement(BackupLogElement &&) = default; diff --git a/src/Interpreters/BlobStorageLog.cpp b/src/Interpreters/BlobStorageLog.cpp index 520405374ca..f9d5b0d6790 100644 --- a/src/Interpreters/BlobStorageLog.cpp +++ b/src/Interpreters/BlobStorageLog.cpp @@ -26,23 +26,23 @@ ColumnsDescription BlobStorageLogElement::getColumnsDescription() return ColumnsDescription { - {"event_date", std::make_shared()}, - {"event_time", std::make_shared()}, - {"event_time_microseconds", std::make_shared(6)}, + {"event_date", std::make_shared(), "Date of the event."}, + {"event_time", std::make_shared(), "Time of the event."}, + {"event_time_microseconds", std::make_shared(6), "Time of the event with microseconds precision."}, - {"event_type", event_enum_type}, + {"event_type", event_enum_type, "Type of the event. Possible values: 'Upload', 'Delete', 'MultiPartUploadCreate', 'MultiPartUploadWrite', 'MultiPartUploadComplete', 'MultiPartUploadAbort'"}, - {"query_id", std::make_shared()}, - {"thread_id", std::make_shared()}, - {"thread_name", std::make_shared()}, + {"query_id", std::make_shared(), "Identifier of the query associated with the event, if any."}, + {"thread_id", std::make_shared(), "Identifier of the thread performing the operation."}, + {"thread_name", std::make_shared(), "Name of the thread performing the operation."}, - {"disk_name", std::make_shared(std::make_shared())}, - {"bucket", std::make_shared()}, - {"remote_path", std::make_shared()}, - {"local_path", std::make_shared()}, - {"data_size", std::make_shared()}, + {"disk_name", std::make_shared(std::make_shared()), "Name of the associated disk."}, + {"bucket", std::make_shared(), "Name of the bucket."}, + {"remote_path", std::make_shared(), "Path to the remote resource."}, + {"local_path", std::make_shared(), "Path to the metadata file on the local system, which references the remote resource."}, + {"data_size", std::make_shared(), "Size of the data involved in the upload event."}, - {"error", std::make_shared()}, + {"error", std::make_shared(), "Error message associated with the event, if any."}, }; } diff --git a/src/Interpreters/BlobStorageLog.h b/src/Interpreters/BlobStorageLog.h index aa57ee44c0f..15e15be4f87 100644 --- a/src/Interpreters/BlobStorageLog.h +++ b/src/Interpreters/BlobStorageLog.h @@ -12,7 +12,7 @@ namespace DB struct BlobStorageLogElement { - enum class EventType : Int8 + enum class EventType : int8_t { Upload = 1, Delete = 2, diff --git a/src/Interpreters/Cache/FileCache.cpp b/src/Interpreters/Cache/FileCache.cpp index d242544f787..ea40ffcfa3c 100644 --- a/src/Interpreters/Cache/FileCache.cpp +++ b/src/Interpreters/Cache/FileCache.cpp @@ -27,6 +27,7 @@ namespace ProfileEvents extern const Event FilesystemCacheReserveMicroseconds; extern const Event FilesystemCacheGetOrSetMicroseconds; extern const Event FilesystemCacheGetMicroseconds; + extern const Event FilesystemCacheFailToReserveSpaceBecauseOfLockContention; } namespace DB @@ -188,6 +189,11 @@ CacheGuard::Lock FileCache::lockCache() const return cache_guard.lock(); } +CacheGuard::Lock FileCache::tryLockCache(std::optional acquire_timeout) const +{ + return acquire_timeout.has_value() ? cache_guard.tryLockFor(acquire_timeout.value()) : cache_guard.tryLock(); +} + FileSegments FileCache::getImpl(const LockedKey & locked_key, const FileSegment::Range & range, size_t file_segments_limit) const { /// Given range = [left, right] and non-overlapping ordered set of file segments, @@ -771,12 +777,18 @@ bool FileCache::tryReserve( FileSegment & file_segment, const size_t size, FileCacheReserveStat & reserve_stat, - const UserInfo & user) + const UserInfo & user, + size_t lock_wait_timeout_milliseconds) { ProfileEventTimeIncrement watch(ProfileEvents::FilesystemCacheReserveMicroseconds); assertInitialized(); - auto cache_lock = lockCache(); + auto cache_lock = tryLockCache(std::chrono::milliseconds(lock_wait_timeout_milliseconds)); + if (!cache_lock) + { + ProfileEvents::increment(ProfileEvents::FilesystemCacheFailToReserveSpaceBecauseOfLockContention); + return false; + } LOG_TEST( log, "Trying to reserve space ({} bytes) for {}:{}, current usage {}/{}", diff --git a/src/Interpreters/Cache/FileCache.h b/src/Interpreters/Cache/FileCache.h index 2de2f347999..007c4fd9483 100644 --- a/src/Interpreters/Cache/FileCache.h +++ b/src/Interpreters/Cache/FileCache.h @@ -161,7 +161,8 @@ public: FileSegment & file_segment, size_t size, FileCacheReserveStat & stat, - const UserInfo & user); + const UserInfo & user, + size_t lock_wait_timeout_milliseconds); std::vector getFileSegmentInfos(const UserID & user_id); @@ -173,6 +174,7 @@ public: void deactivateBackgroundOperations(); CacheGuard::Lock lockCache() const; + CacheGuard::Lock tryLockCache(std::optional acquire_timeout = std::nullopt) const; std::vector sync(); diff --git a/src/Interpreters/Cache/FileSegment.cpp b/src/Interpreters/Cache/FileSegment.cpp index 8bd89465917..9ec2b090dc7 100644 --- a/src/Interpreters/Cache/FileSegment.cpp +++ b/src/Interpreters/Cache/FileSegment.cpp @@ -10,6 +10,7 @@ #include #include #include +#include #include @@ -23,6 +24,7 @@ namespace ProfileEvents extern const Event FileSegmentWriteMicroseconds; extern const Event FileSegmentUseMicroseconds; extern const Event FileSegmentHolderCompleteMicroseconds; + extern const Event FileSegmentFailToIncreasePriority; extern const Event FilesystemCacheHoldFileSegments; extern const Event FilesystemCacheUnusedHoldFileSegments; } @@ -194,7 +196,7 @@ bool FileSegment::isDownloaded() const String FileSegment::getCallerId() { if (!CurrentThread::isInitialized() || CurrentThread::getQueryId().empty()) - return "None:" + toString(getThreadId()); + return fmt::format("None:{}:{}", getThreadName(), toString(getThreadId())); return std::string(CurrentThread::getQueryId()) + ":" + toString(getThreadId()); } @@ -495,7 +497,7 @@ LockedKeyPtr FileSegment::lockKeyMetadata(bool assert_exists) const return metadata->tryLock(); } -bool FileSegment::reserve(size_t size_to_reserve, FileCacheReserveStat * reserve_stat) +bool FileSegment::reserve(size_t size_to_reserve, size_t lock_wait_timeout_milliseconds, FileCacheReserveStat * reserve_stat) { if (!size_to_reserve) throw Exception(ErrorCodes::LOGICAL_ERROR, "Zero space reservation is not allowed"); @@ -547,7 +549,7 @@ bool FileSegment::reserve(size_t size_to_reserve, FileCacheReserveStat * reserve if (!reserve_stat) reserve_stat = &dummy_stat; - bool reserved = cache->tryReserve(*this, size_to_reserve, *reserve_stat, getKeyMetadata()->user); + bool reserved = cache->tryReserve(*this, size_to_reserve, *reserve_stat, getKeyMetadata()->user, lock_wait_timeout_milliseconds); if (!reserved) setDownloadFailedUnlocked(lockFileSegment()); @@ -965,8 +967,10 @@ void FileSegment::increasePriority() auto it = getQueueIterator(); if (it) { - auto cache_lock = cache->lockCache(); - hits_count = it->increasePriority(cache_lock); + if (auto cache_lock = cache->tryLockCache()) + hits_count = it->increasePriority(cache_lock); + else + ProfileEvents::increment(ProfileEvents::FileSegmentFailToIncreasePriority); } } diff --git a/src/Interpreters/Cache/FileSegment.h b/src/Interpreters/Cache/FileSegment.h index ea97a6b0157..c34ee064345 100644 --- a/src/Interpreters/Cache/FileSegment.h +++ b/src/Interpreters/Cache/FileSegment.h @@ -199,7 +199,7 @@ public: /// Try to reserve exactly `size` bytes (in addition to the getDownloadedSize() bytes already downloaded). /// Returns true if reservation was successful, false otherwise. - bool reserve(size_t size_to_reserve, FileCacheReserveStat * reserve_stat = nullptr); + bool reserve(size_t size_to_reserve, size_t lock_wait_timeout_milliseconds, FileCacheReserveStat * reserve_stat = nullptr); /// Write data into reserved space. void write(const char * from, size_t size, size_t offset); diff --git a/src/Interpreters/Cache/Guards.h b/src/Interpreters/Cache/Guards.h index 09586b55c61..0ac7cb80483 100644 --- a/src/Interpreters/Cache/Guards.h +++ b/src/Interpreters/Cache/Guards.h @@ -61,15 +61,26 @@ namespace DB */ struct CacheGuard : private boost::noncopyable { + using Mutex = std::timed_mutex; /// struct is used (not keyword `using`) to make CacheGuard::Lock non-interchangable with other guards locks /// so, we wouldn't be able to pass CacheGuard::Lock to a function which accepts KeyGuard::Lock, for example - struct Lock : public std::unique_lock + struct Lock : public std::unique_lock { - explicit Lock(std::mutex & mutex_) : std::unique_lock(mutex_) {} + using Base = std::unique_lock; + using Base::Base; }; Lock lock() { return Lock(mutex); } - std::mutex mutex; + + Lock tryLock() { return Lock(mutex, std::try_to_lock); } + + Lock tryLockFor(const std::chrono::milliseconds & acquire_timeout) + { + return Lock(mutex, std::chrono::duration(acquire_timeout)); + } + +private: + Mutex mutex; }; /** diff --git a/src/Interpreters/Cache/Metadata.cpp b/src/Interpreters/Cache/Metadata.cpp index 727f2762cca..b79605622b6 100644 --- a/src/Interpreters/Cache/Metadata.cpp +++ b/src/Interpreters/Cache/Metadata.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #include #include #include @@ -693,6 +694,9 @@ void CacheMetadata::downloadImpl(FileSegment & file_segment, std::optionalset(memory->data(), memory->size()); } + const auto reserve_space_lock_wait_timeout_milliseconds = + Context::getGlobalContextInstance()->getReadSettings().filesystem_cache_reserve_space_wait_lock_timeout_milliseconds; + size_t offset = file_segment.getCurrentWriteOffset(); if (offset != static_cast(reader->getPosition())) reader->seek(offset, SEEK_SET); @@ -701,7 +705,7 @@ void CacheMetadata::downloadImpl(FileSegment & file_segment, std::optionalavailable(); - if (!file_segment.reserve(size)) + if (!file_segment.reserve(size, reserve_space_lock_wait_timeout_milliseconds)) { LOG_TEST( log, "Failed to reserve space during background download " diff --git a/src/Interpreters/Cache/WriteBufferToFileSegment.cpp b/src/Interpreters/Cache/WriteBufferToFileSegment.cpp index 7cd4e2d6e8d..51914c0a14e 100644 --- a/src/Interpreters/Cache/WriteBufferToFileSegment.cpp +++ b/src/Interpreters/Cache/WriteBufferToFileSegment.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #include #include @@ -18,9 +19,22 @@ namespace ErrorCodes extern const int NOT_ENOUGH_SPACE; } +namespace +{ + size_t getCacheLockWaitTimeout() + { + auto query_context = CurrentThread::getQueryContext(); + if (query_context) + return query_context->getReadSettings().filesystem_cache_reserve_space_wait_lock_timeout_milliseconds; + else + return Context::getGlobalContextInstance()->getReadSettings().filesystem_cache_reserve_space_wait_lock_timeout_milliseconds; + } +} + WriteBufferToFileSegment::WriteBufferToFileSegment(FileSegment * file_segment_) : WriteBufferFromFileDecorator(std::make_unique(file_segment_->getPath())) , file_segment(file_segment_) + , reserve_space_lock_wait_timeout_milliseconds(getCacheLockWaitTimeout()) { } @@ -31,6 +45,7 @@ WriteBufferToFileSegment::WriteBufferToFileSegment(FileSegmentsHolderPtr segment : throw Exception(ErrorCodes::LOGICAL_ERROR, "WriteBufferToFileSegment can be created only from single segment")) , file_segment(&segment_holder_->front()) , segment_holder(std::move(segment_holder_)) + , reserve_space_lock_wait_timeout_milliseconds(getCacheLockWaitTimeout()) { } @@ -49,7 +64,7 @@ void WriteBufferToFileSegment::nextImpl() FileCacheReserveStat reserve_stat; /// In case of an error, we don't need to finalize the file segment /// because it will be deleted soon and completed in the holder's destructor. - bool ok = file_segment->reserve(bytes_to_write, &reserve_stat); + bool ok = file_segment->reserve(bytes_to_write, reserve_space_lock_wait_timeout_milliseconds, &reserve_stat); if (!ok) { diff --git a/src/Interpreters/Cache/WriteBufferToFileSegment.h b/src/Interpreters/Cache/WriteBufferToFileSegment.h index feb33472513..822488ceb48 100644 --- a/src/Interpreters/Cache/WriteBufferToFileSegment.h +++ b/src/Interpreters/Cache/WriteBufferToFileSegment.h @@ -28,6 +28,8 @@ private: /// Empty if file_segment is not owned by this WriteBufferToFileSegment FileSegmentsHolderPtr segment_holder; + + const size_t reserve_space_lock_wait_timeout_milliseconds; }; diff --git a/src/Interpreters/ClientInfo.cpp b/src/Interpreters/ClientInfo.cpp index 347ec115aba..e4778edeb9c 100644 --- a/src/Interpreters/ClientInfo.cpp +++ b/src/Interpreters/ClientInfo.cpp @@ -23,7 +23,7 @@ namespace ErrorCodes void ClientInfo::write(WriteBuffer & out, UInt64 server_protocol_revision) const { if (server_protocol_revision < DBMS_MIN_REVISION_WITH_CLIENT_INFO) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: method ClientInfo::write is called for unsupported server revision"); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Method ClientInfo::write is called for unsupported server revision"); writeBinary(static_cast(query_kind), out); if (empty()) @@ -103,7 +103,7 @@ void ClientInfo::write(WriteBuffer & out, UInt64 server_protocol_revision) const void ClientInfo::read(ReadBuffer & in, UInt64 client_protocol_revision) { if (client_protocol_revision < DBMS_MIN_REVISION_WITH_CLIENT_INFO) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: method ClientInfo::read is called for unsupported client revision"); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Method ClientInfo::read is called for unsupported client revision"); UInt8 read_query_kind = 0; readBinary(read_query_kind, in); diff --git a/src/Interpreters/ClusterProxy/executeQuery.cpp b/src/Interpreters/ClusterProxy/executeQuery.cpp index 33b86854ba9..6cdff939af1 100644 --- a/src/Interpreters/ClusterProxy/executeQuery.cpp +++ b/src/Interpreters/ClusterProxy/executeQuery.cpp @@ -148,7 +148,7 @@ ContextMutablePtr updateSettingsForCluster(const Cluster & cluster, } } if (disable_parallel_replicas) - new_settings.allow_experimental_parallel_reading_from_replicas = false; + new_settings.allow_experimental_parallel_reading_from_replicas = 0; } if (settings.max_execution_time_leaf.value > 0) diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 77998d9dd2f..6a0657a842c 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -15,9 +15,9 @@ #include #include #include -#include #include #include +#include #include #include #include @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -94,6 +95,7 @@ #include #include #include +#include #include #include #include @@ -111,6 +113,7 @@ #include #include #include +#include namespace fs = std::filesystem; @@ -292,6 +295,7 @@ struct ContextSharedPart : boost::noncopyable mutable MarkCachePtr index_mark_cache TSA_GUARDED_BY(mutex); /// Cache of marks in compressed files of MergeTree indices. mutable MMappedFileCachePtr mmap_cache TSA_GUARDED_BY(mutex); /// Cache of mmapped files to avoid frequent open/map/unmap/close and to reuse from several threads. AsynchronousMetrics * asynchronous_metrics TSA_GUARDED_BY(mutex) = nullptr; /// Points to asynchronous metrics + mutable PageCachePtr page_cache TSA_GUARDED_BY(mutex); /// Userspace page cache. ProcessList process_list; /// Executing queries at the moment. SessionTracker session_tracker; GlobalOvercommitTracker global_overcommit_tracker; @@ -352,6 +356,7 @@ struct ContextSharedPart : boost::noncopyable std::optional merge_tree_settings TSA_GUARDED_BY(mutex); /// Settings of MergeTree* engines. std::optional replicated_merge_tree_settings TSA_GUARDED_BY(mutex); /// Settings of ReplicatedMergeTree* engines. + std::optional distributed_settings TSA_GUARDED_BY(mutex); std::atomic_size_t max_table_size_to_drop = 50000000000lu; /// Protects MergeTree tables from accidental DROP (50GB by default) std::atomic_size_t max_partition_size_to_drop = 50000000000lu; /// Protects MergeTree partitions from accidental DROP (50GB by default) /// No lock required for format_schema_path modified only during initialization @@ -375,11 +380,9 @@ struct ContextSharedPart : boost::noncopyable OrdinaryBackgroundExecutorPtr moves_executor TSA_GUARDED_BY(background_executors_mutex); OrdinaryBackgroundExecutorPtr fetch_executor TSA_GUARDED_BY(background_executors_mutex); OrdinaryBackgroundExecutorPtr common_executor TSA_GUARDED_BY(background_executors_mutex); - /// The global pool of HTTP sessions for background fetches. - PooledSessionFactoryPtr fetches_session_factory TSA_GUARDED_BY(background_executors_mutex); - RemoteHostFilter remote_host_filter TSA_GUARDED_BY(mutex); /// Allowed URL from config.xml - HTTPHeaderFilter http_header_filter TSA_GUARDED_BY(mutex); /// Forbidden HTTP headers from config.xml + RemoteHostFilter remote_host_filter; /// Allowed URL from config.xml + HTTPHeaderFilter http_header_filter; /// Forbidden HTTP headers from config.xml /// No lock required for trace_collector modified only during initialization std::optional trace_collector; /// Thread collecting traces from threads executing queries @@ -721,7 +724,7 @@ struct ContextSharedPart : boost::noncopyable void addWarningMessage(const String & message) TSA_REQUIRES(mutex) { /// A warning goes both: into server's log; stored to be placed in `system.warnings` table. - log->warning(message); + LOG_WARNING(log, "{}", message); warnings.push_back(message); } @@ -793,6 +796,7 @@ ContextMutablePtr Context::createGlobal(ContextSharedPart * shared_part) { auto res = std::shared_ptr(new Context); res->shared = shared_part; + res->query_access_info = std::make_shared(); return res; } @@ -812,7 +816,9 @@ SharedContextHolder Context::createShared() ContextMutablePtr Context::createCopy(const ContextPtr & other) { SharedLockGuard lock(other->mutex); - return std::shared_ptr(new Context(*other)); + auto new_context = std::shared_ptr(new Context(*other)); + new_context->query_access_info = std::make_shared(*other->query_access_info); + return new_context; } ContextMutablePtr Context::createCopy(const ContextWeakPtr & other) @@ -1171,6 +1177,29 @@ void Context::addWarningMessage(const String & msg) const shared->addWarningMessage(msg); } +void Context::addWarningMessageAboutDatabaseOrdinary(const String & database_name) const +{ + std::lock_guard lock(shared->mutex); + + /// We would like to report only about the first database with engine Ordinary + static std::atomic_bool is_called = false; + if (is_called.exchange(true)) + return; + + auto suppress_re = shared->getConfigRefWithLock(lock).getString("warning_supress_regexp", ""); + /// We don't use getFlagsPath method, because it takes a shared lock. + auto convert_databases_flag = fs::path(shared->flags_path) / "convert_ordinary_to_atomic"; + auto message = fmt::format("Server has databases (for example `{}`) with Ordinary engine, which was deprecated. " + "To convert this database to a new Atomic engine, please create a forcing flag {} and make sure that ClickHouse has write permission for it. " + "Example: sudo touch '{}' && sudo chmod 666 '{}'", + database_name, + convert_databases_flag.string(), convert_databases_flag.string(), convert_databases_flag.string()); + + bool is_supressed = !suppress_re.empty() && re2::RE2::PartialMatch(message, suppress_re); + if (!is_supressed) + shared->addWarningMessage(message); +} + void Context::setConfig(const ConfigurationPtr & config) { shared->setConfig(config); @@ -1222,7 +1251,7 @@ void Context::setUser(const UUID & user_id_, const std::optional() and other AccessControl's functions may require some IO work, - /// so Context::getLock() must be unlocked while we're doing this. + /// so Context::getLocalLock() and Context::getGlobalLock() must be unlocked while we're doing this. auto & access_control = getAccessControl(); auto user = access_control.read(user_id_); @@ -1313,6 +1342,23 @@ std::shared_ptr Context::getRolesInfo() const return getAccess()->getRolesInfo(); } +namespace +{ +ALWAYS_INLINE inline void +contextSanityCheckWithLock(const Context & context, const Settings & settings, const std::lock_guard &) +{ + const auto type = context.getApplicationType(); + if (type == Context::ApplicationType::LOCAL || type == Context::ApplicationType::SERVER) + doSettingsSanityCheck(settings); +} + +ALWAYS_INLINE inline void contextSanityCheck(const Context & context, const Settings & settings) +{ + const auto type = context.getApplicationType(); + if (type == Context::ApplicationType::LOCAL || type == Context::ApplicationType::SERVER) + doSettingsSanityCheck(settings); +} +} template void Context::checkAccessImpl(const Args &... args) const @@ -1335,7 +1381,7 @@ void Context::checkAccess(const AccessRightsElements & elements) const { return std::shared_ptr Context::getAccess() const { - /// A helper function to collect parameters for calculating access rights, called with Context::getLock() acquired. + /// A helper function to collect parameters for calculating access rights, called with Context::getLocalSharedLock() acquired. auto get_params = [this]() { /// If setUserID() was never called then this must be the global context with the full access. @@ -1362,7 +1408,8 @@ std::shared_ptr Context::getAccess() const } /// Calculate new access rights according to the collected parameters. - /// NOTE: AccessControl::getContextAccess() may require some IO work, so Context::getLock() must be unlocked while we're doing this. + /// NOTE: AccessControl::getContextAccess() may require some IO work, so Context::getLocalLock() + /// and Context::getGlobalLock() must be unlocked while we're doing this. auto res = getAccessControl().getContextAccess(*params); { @@ -1422,6 +1469,7 @@ void Context::setCurrentProfilesWithLock(const SettingsProfilesInfo & profiles_i checkSettingsConstraintsWithLock(profiles_info.settings, SettingSource::PROFILE); applySettingsChangesWithLock(profiles_info.settings, lock); settings_constraints_and_current_profiles = profiles_info.getConstraintsAndProfileIDs(settings_constraints_and_current_profiles); + contextSanityCheckWithLock(*this, settings, lock); } void Context::setCurrentProfile(const String & profile_name, bool check_constraints) @@ -1533,7 +1581,7 @@ void Context::addExternalTable(const String & table_name, TemporaryTableHolder & std::lock_guard lock(mutex); if (external_tables_mapping.end() != external_tables_mapping.find(table_name)) - throw Exception(ErrorCodes::TABLE_ALREADY_EXISTS, "Temporary table {} already exists.", backQuoteIfNeed(table_name)); + throw Exception(ErrorCodes::TABLE_ALREADY_EXISTS, "Temporary table {} already exists", backQuoteIfNeed(table_name)); external_tables_mapping.emplace(table_name, std::make_shared(std::move(temporary_table))); } @@ -1606,12 +1654,12 @@ void Context::addQueryAccessInfo( if (isGlobalContext()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Global context cannot have query access info"); - std::lock_guard lock(query_access_info.mutex); - query_access_info.databases.emplace(quoted_database_name); - query_access_info.tables.emplace(full_quoted_table_name); + std::lock_guard lock(query_access_info->mutex); + query_access_info->databases.emplace(quoted_database_name); + query_access_info->tables.emplace(full_quoted_table_name); for (const auto & column_name : column_names) - query_access_info.columns.emplace(full_quoted_table_name + "." + backQuoteIfNeed(column_name)); + query_access_info->columns.emplace(full_quoted_table_name + "." + backQuoteIfNeed(column_name)); } void Context::addQueryAccessInfo(const Names & partition_names) @@ -1619,9 +1667,9 @@ void Context::addQueryAccessInfo(const Names & partition_names) if (isGlobalContext()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Global context cannot have query access info"); - std::lock_guard lock(query_access_info.mutex); + std::lock_guard lock(query_access_info->mutex); for (const auto & partition_name : partition_names) - query_access_info.partitions.emplace(partition_name); + query_access_info->partitions.emplace(partition_name); } void Context::addViewAccessInfo(const String & view_name) @@ -1629,8 +1677,8 @@ void Context::addViewAccessInfo(const String & view_name) if (isGlobalContext()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Global context cannot have query access info"); - std::lock_guard lock(query_access_info.mutex); - query_access_info.views.emplace(view_name); + std::lock_guard lock(query_access_info->mutex); + query_access_info->views.emplace(view_name); } void Context::addQueryAccessInfo(const QualifiedProjectionName & qualified_projection_name) @@ -1641,8 +1689,8 @@ void Context::addQueryAccessInfo(const QualifiedProjectionName & qualified_proje if (isGlobalContext()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Global context cannot have query access info"); - std::lock_guard lock(query_access_info.mutex); - query_access_info.projections.emplace(fmt::format( + std::lock_guard lock(query_access_info->mutex); + query_access_info->projections.emplace(fmt::format( "{}.{}", qualified_projection_name.storage_id.getFullTableName(), backQuoteIfNeed(qualified_projection_name.projection_name))); } @@ -1766,7 +1814,7 @@ StoragePtr Context::executeTableFunction(const ASTPtr & table_expression, const } uint64_t use_structure_from_insertion_table_in_table_functions = getSettingsRef().use_structure_from_insertion_table_in_table_functions; - if (use_structure_from_insertion_table_in_table_functions && table_function_ptr->needStructureHint() && hasInsertionTable()) + if (select_query_hint && use_structure_from_insertion_table_in_table_functions && table_function_ptr->needStructureHint() && hasInsertionTable()) { const auto & insert_columns = DatabaseCatalog::instance() .getTable(getInsertionTable(), shared_from_this()) @@ -1931,6 +1979,35 @@ StoragePtr Context::executeTableFunction(const ASTPtr & table_expression, const } +StoragePtr Context::buildParametrizedViewStorage(const ASTPtr & table_expression, const String & database_name, const String & table_name) +{ + if (table_name.empty()) + return nullptr; + + StoragePtr original_view = DatabaseCatalog::instance().tryGetTable({database_name, table_name}, getQueryContext()); + if (!original_view || !original_view->isView()) + return nullptr; + auto * storage_view = original_view->as(); + if (!storage_view || !storage_view->isParameterizedView()) + return nullptr; + + auto query = original_view->getInMemoryMetadataPtr()->getSelectQuery().inner_query->clone(); + NameToNameMap parameterized_view_values = analyzeFunctionParamValues(table_expression); + StorageView::replaceQueryParametersIfParametrizedView(query, parameterized_view_values); + + ASTCreateQuery create; + create.select = query->as(); + auto sample_block = InterpreterSelectQueryAnalyzer::getSampleBlock(query, shared_from_this()); + auto res = std::make_shared(StorageID(database_name, table_name), + create, + ColumnsDescription(sample_block.getNamesAndTypesList()), + /* comment */ "", + /* is_parameterized_view */ true); + res->startup(); + return res; +} + + void Context::addViewSource(const StoragePtr & storage) { if (view_source) @@ -1961,6 +2038,7 @@ void Context::setSettings(const Settings & settings_) std::lock_guard lock(mutex); settings = settings_; need_recalculate_access = true; + contextSanityCheck(*this, settings); } void Context::setSettingWithLock(std::string_view name, const String & value, const std::lock_guard & lock) @@ -1973,6 +2051,7 @@ void Context::setSettingWithLock(std::string_view name, const String & value, co settings.set(name, value); if (ContextAccessParams::dependsOnSettingName(name)) need_recalculate_access = true; + contextSanityCheckWithLock(*this, settings, lock); } void Context::setSettingWithLock(std::string_view name, const Field & value, const std::lock_guard & lock) @@ -1992,6 +2071,7 @@ void Context::applySettingChangeWithLock(const SettingChange & change, const std try { setSettingWithLock(change.name, change.value, lock); + contextSanityCheckWithLock(*this, settings, lock); } catch (Exception & e) { @@ -2019,6 +2099,7 @@ void Context::setSetting(std::string_view name, const Field & value) { std::lock_guard lock(mutex); setSettingWithLock(name, value, lock); + contextSanityCheckWithLock(*this, settings, lock); } void Context::applySettingChange(const SettingChange & change) @@ -2046,26 +2127,36 @@ void Context::applySettingsChanges(const SettingsChanges & changes) void Context::checkSettingsConstraintsWithLock(const SettingsProfileElements & profile_elements, SettingSource source) const { getSettingsConstraintsAndCurrentProfilesWithLock()->constraints.check(settings, profile_elements, source); + if (getApplicationType() == ApplicationType::LOCAL || getApplicationType() == ApplicationType::SERVER) + doSettingsSanityCheck(settings); } void Context::checkSettingsConstraintsWithLock(const SettingChange & change, SettingSource source) const { getSettingsConstraintsAndCurrentProfilesWithLock()->constraints.check(settings, change, source); + if (getApplicationType() == ApplicationType::LOCAL || getApplicationType() == ApplicationType::SERVER) + doSettingsSanityCheck(settings); } void Context::checkSettingsConstraintsWithLock(const SettingsChanges & changes, SettingSource source) const { getSettingsConstraintsAndCurrentProfilesWithLock()->constraints.check(settings, changes, source); + if (getApplicationType() == ApplicationType::LOCAL || getApplicationType() == ApplicationType::SERVER) + doSettingsSanityCheck(settings); } void Context::checkSettingsConstraintsWithLock(SettingsChanges & changes, SettingSource source) const { getSettingsConstraintsAndCurrentProfilesWithLock()->constraints.check(settings, changes, source); + if (getApplicationType() == ApplicationType::LOCAL || getApplicationType() == ApplicationType::SERVER) + doSettingsSanityCheck(settings); } void Context::clampToSettingsConstraintsWithLock(SettingsChanges & changes, SettingSource source) const { getSettingsConstraintsAndCurrentProfilesWithLock()->constraints.clamp(settings, changes, source); + if (getApplicationType() == ApplicationType::LOCAL || getApplicationType() == ApplicationType::SERVER) + doSettingsSanityCheck(settings); } void Context::checkMergeTreeSettingsConstraintsWithLock(const MergeTreeSettings & merge_tree_settings, const SettingsChanges & changes) const @@ -2089,6 +2180,7 @@ void Context::checkSettingsConstraints(const SettingsChanges & changes, SettingS { SharedLockGuard lock(mutex); getSettingsConstraintsAndCurrentProfilesWithLock()->constraints.check(settings, changes, source); + doSettingsSanityCheck(settings); } void Context::checkSettingsConstraints(SettingsChanges & changes, SettingSource source) const @@ -2264,7 +2356,8 @@ void Context::setMacros(std::unique_ptr && macros) ContextMutablePtr Context::getQueryContext() const { auto ptr = query_context.lock(); - if (!ptr) throw Exception(ErrorCodes::THERE_IS_NO_QUERY, "There is no query or query context has expired"); + if (!ptr) + throw Exception(ErrorCodes::THERE_IS_NO_QUERY, "There is no query or query context has expired"); return ptr; } @@ -2556,15 +2649,11 @@ BackupsWorker & Context::getBackupsWorker() const { callOnce(shared->backups_worker_initialized, [&] { const auto & config = getConfigRef(); - const bool allow_concurrent_backups = config.getBool("backups.allow_concurrent_backups", true); - const bool allow_concurrent_restores = config.getBool("backups.allow_concurrent_restores", true); - const bool test_inject_sleep = config.getBool("backups.test_inject_sleep", false); - const auto & settings_ref = getSettingsRef(); UInt64 backup_threads = config.getUInt64("backup_threads", settings_ref.backup_threads); UInt64 restore_threads = config.getUInt64("restore_threads", settings_ref.restore_threads); - shared->backups_worker.emplace(getGlobalContext(), backup_threads, restore_threads, allow_concurrent_backups, allow_concurrent_restores, test_inject_sleep); + shared->backups_worker.emplace(getGlobalContext(), backup_threads, restore_threads); }); return *shared->backups_worker; @@ -2649,6 +2738,33 @@ void Context::clearUncompressedCache() const shared->uncompressed_cache->clear(); } +void Context::setPageCache(size_t bytes_per_chunk, size_t bytes_per_mmap, size_t bytes_total, bool use_madv_free, bool use_huge_pages) +{ + std::lock_guard lock(shared->mutex); + + if (shared->page_cache) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Page cache has been already created."); + + shared->page_cache = std::make_shared(bytes_per_chunk, bytes_per_mmap, bytes_total, use_madv_free, use_huge_pages); +} + +PageCachePtr Context::getPageCache() const +{ + SharedLockGuard lock(shared->mutex); + return shared->page_cache; +} + +void Context::dropPageCache() const +{ + PageCachePtr cache; + { + SharedLockGuard lock(shared->mutex); + cache = shared->page_cache; + } + if (cache) + cache->dropCache(); +} + void Context::setMarkCache(const String & cache_policy, size_t max_cache_size_in_bytes, double size_ratio) { std::lock_guard lock(shared->mutex); @@ -3122,7 +3238,7 @@ zkutil::ZooKeeperPtr Context::getZooKeeper() const const auto & config = shared->zookeeper_config ? *shared->zookeeper_config : getConfigRef(); if (!shared->zookeeper) - shared->zookeeper = std::make_shared(config, zkutil::getZooKeeperConfigName(config), getZooKeeperLog()); + shared->zookeeper = zkutil::ZooKeeper::create(config, zkutil::getZooKeeperConfigName(config), getZooKeeperLog()); else if (shared->zookeeper->hasReachedDeadline()) shared->zookeeper->finalize("ZooKeeper session has reached its deadline"); @@ -3152,7 +3268,7 @@ bool checkZooKeeperConfigIsLocal(const Poco::Util::AbstractConfiguration & confi if (startsWith(key, "node")) { String host = config.getString(config_name + "." + key + ".host"); - if (isLocalAddress(DNSResolver::instance().resolveHost(host))) + if (isLocalAddress(DNSResolver::instance().resolveHostAllInOriginOrder(host).front())) return true; } } @@ -3274,7 +3390,7 @@ void Context::initializeKeeperDispatcher([[maybe_unused]] bool start_async) cons } #if USE_NURAFT -std::shared_ptr & Context::getKeeperDispatcher() const +std::shared_ptr Context::getKeeperDispatcher() const { std::lock_guard lock(shared->keeper_dispatcher_mutex); if (!shared->keeper_dispatcher) @@ -3283,7 +3399,7 @@ std::shared_ptr & Context::getKeeperDispatcher() const return shared->keeper_dispatcher; } -std::shared_ptr & Context::tryGetKeeperDispatcher() const +std::shared_ptr Context::tryGetKeeperDispatcher() const { std::lock_guard lock(shared->keeper_dispatcher_mutex); return shared->keeper_dispatcher; @@ -3334,7 +3450,7 @@ zkutil::ZooKeeperPtr Context::getAuxiliaryZooKeeper(const String & name) const name); zookeeper = shared->auxiliary_zookeepers.emplace(name, - std::make_shared(config, "auxiliary_zookeepers." + name, getZooKeeperLog())).first; + zkutil::ZooKeeper::create(config, "auxiliary_zookeepers." + name, getZooKeeperLog())).first; } else if (zookeeper->second->expired()) zookeeper->second = zookeeper->second->startNewSession(); @@ -3367,7 +3483,7 @@ static void reloadZooKeeperIfChangedImpl( if (zk) zk->finalize("Config changed"); - zk = std::make_shared(*config, config_name, std::move(zk_log)); + zk = zkutil::ZooKeeper::create(*config, config_name, std::move(zk_log)); if (server_started) zk->setServerCompletelyStarted(); } @@ -3460,25 +3576,21 @@ String Context::getInterserverScheme() const void Context::setRemoteHostFilter(const Poco::Util::AbstractConfiguration & config) { - std::lock_guard lock(shared->mutex); shared->remote_host_filter.setValuesFromConfig(config); } const RemoteHostFilter & Context::getRemoteHostFilter() const { - SharedLockGuard lock(shared->mutex); return shared->remote_host_filter; } void Context::setHTTPHeaderFilter(const Poco::Util::AbstractConfiguration & config) { - std::lock_guard lock(shared->mutex); shared->http_header_filter.setValuesFromConfig(config); } const HTTPHeaderFilter & Context::getHTTPHeaderFilter() const { - SharedLockGuard lock(shared->mutex); return shared->http_header_filter; } @@ -4092,6 +4204,21 @@ const MergeTreeSettings & Context::getReplicatedMergeTreeSettings() const return *shared->replicated_merge_tree_settings; } +const DistributedSettings & Context::getDistributedSettings() const +{ + std::lock_guard lock(shared->mutex); + + if (!shared->distributed_settings) + { + const auto & config = shared->getConfigRefWithLock(lock); + DistributedSettings distributed_settings; + distributed_settings.loadFromConfig("distributed", config); + shared->distributed_settings.emplace(distributed_settings); + } + + return *shared->distributed_settings; +} + const StorageS3Settings & Context::getStorageS3Settings() const { std::lock_guard lock(shared->mutex); @@ -4192,7 +4319,7 @@ void Context::checkPartitionCanBeDropped(const String & database, const String & checkCanBeDropped(database, table, partition_size, max_partition_size_to_drop); } -InputFormatPtr Context::getInputFormat(const String & name, ReadBuffer & buf, const Block & sample, UInt64 max_block_size, const std::optional & format_settings, const std::optional max_parsing_threads) const +InputFormatPtr Context::getInputFormat(const String & name, ReadBuffer & buf, const Block & sample, UInt64 max_block_size, const std::optional & format_settings, std::optional max_parsing_threads) const { return FormatFactory::instance().getInput(name, buf, sample, shared_from_this(), max_block_size, format_settings, max_parsing_threads); } @@ -4293,6 +4420,7 @@ void Context::setDefaultProfiles(const Poco::Util::AbstractConfiguration & confi setCurrentProfile(shared->system_profile_name); applySettingsQuirks(settings, getLogger("SettingsQuirks")); + doSettingsSanityCheck(settings); shared->buffer_profile_name = config.getString("buffer_profile", shared->system_profile_name); buffer_context = Context::createCopy(shared_from_this()); @@ -4483,7 +4611,7 @@ void Context::setClientConnectionId(uint32_t connection_id_) client_info.connection_id = connection_id_; } -void Context::setHttpClientInfo(ClientInfo::HTTPMethod http_method, const String & http_user_agent, const String & http_referer) +void Context::setHTTPClientInfo(ClientInfo::HTTPMethod http_method, const String & http_user_agent, const String & http_referer) { client_info.http_method = http_method; client_info.http_user_agent = http_user_agent; @@ -4909,11 +5037,6 @@ void Context::initializeBackgroundExecutorsIfNeeded() ); LOG_INFO(shared->log, "Initialized background executor for move operations with num_threads={}, num_tasks={}", background_move_pool_size, background_move_pool_size); - auto timeouts = ConnectionTimeouts::getFetchPartHTTPTimeouts(getServerSettings(), getSettingsRef()); - /// The number of background fetches is limited by the number of threads in the background thread pool. - /// It doesn't make any sense to limit the number of connections per host any further. - shared->fetches_session_factory = std::make_shared(timeouts, background_fetches_pool_size); - shared->fetch_executor = std::make_shared ( "Fetch", @@ -4967,12 +5090,6 @@ OrdinaryBackgroundExecutorPtr Context::getCommonExecutor() const return shared->common_executor; } -PooledSessionFactoryPtr Context::getCommonFetchesSessionFactory() const -{ - SharedLockGuard lock(shared->background_executors_mutex); - return shared->fetches_session_factory; -} - IAsynchronousReader & Context::getThreadPoolReader(FilesystemReaderType type) const { callOnce(shared->readers_initialized, [&] { @@ -5049,10 +5166,16 @@ ReadSettings Context::getReadSettings() const res.read_from_filesystem_cache_if_exists_otherwise_bypass_cache = settings.read_from_filesystem_cache_if_exists_otherwise_bypass_cache; res.enable_filesystem_cache_log = settings.enable_filesystem_cache_log; res.filesystem_cache_segments_batch_size = settings.filesystem_cache_segments_batch_size; + res.filesystem_cache_reserve_space_wait_lock_timeout_milliseconds = settings.filesystem_cache_reserve_space_wait_lock_timeout_milliseconds; res.filesystem_cache_max_download_size = settings.filesystem_cache_max_download_size; res.skip_download_if_exceeds_query_cache = settings.skip_download_if_exceeds_query_cache; + res.page_cache = getPageCache(); + res.use_page_cache_for_disks_without_file_cache = settings.use_page_cache_for_disks_without_file_cache; + res.read_from_page_cache_if_exists_otherwise_bypass_cache = settings.read_from_page_cache_if_exists_otherwise_bypass_cache; + res.page_cache_inject_eviction = settings.page_cache_inject_eviction; + res.remote_read_min_bytes_for_seek = settings.remote_read_min_bytes_for_seek; /// Zero read buffer will not make progress. @@ -5092,6 +5215,7 @@ WriteSettings Context::getWriteSettings() const res.enable_filesystem_cache_on_write_operations = settings.enable_filesystem_cache_on_write_operations; res.enable_filesystem_cache_log = settings.enable_filesystem_cache_log; res.throw_on_error_from_cache = settings.throw_on_error_from_cache_on_write_operations; + res.filesystem_cache_reserve_space_wait_lock_timeout_milliseconds = settings.filesystem_cache_reserve_space_wait_lock_timeout_milliseconds; res.s3_allow_parallel_part_upload = settings.s3_allow_parallel_part_upload; diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index 8d40ccb301b..c8aa3604a6f 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -79,6 +79,7 @@ class RefreshSet; class Cluster; class Compiler; class MarkCache; +class PageCache; class MMappedFileCache; class UncompressedCache; class ProcessList; @@ -113,6 +114,7 @@ class BlobStorageLog; class IAsynchronousReader; class IOUringReader; struct MergeTreeSettings; +struct DistributedSettings; struct InitialAllRangesAnnouncement; struct ParallelReadRequest; struct ParallelReadResponse; @@ -204,9 +206,6 @@ using TemporaryDataOnDiskScopePtr = std::shared_ptr; class PreparedSetsCache; using PreparedSetsCachePtr = std::shared_ptr; -class PooledSessionFactory; -using PooledSessionFactoryPtr = std::shared_ptr; - class SessionTracker; struct ServerSettings; @@ -350,8 +349,11 @@ protected: std::set projections{}; std::set views{}; }; + using QueryAccessInfoPtr = std::shared_ptr; - QueryAccessInfo query_access_info; + /// In some situations, we want to be able to transfer the access info from children back to parents (e.g. definers context). + /// Therefore, query_access_info must be a pointer. + QueryAccessInfoPtr query_access_info; /// Record names of created objects of factories (for testing, etc) struct QueryFactoriesInfo @@ -532,6 +534,7 @@ public: void setUserScriptsPath(const String & path); void addWarningMessage(const String & msg) const; + void addWarningMessageAboutDatabaseOrdinary(const String & database_name) const; void setTemporaryStorageInCache(const String & cache_disk_name, size_t max_size); void setTemporaryStoragePolicy(const String & policy_name, size_t max_size); @@ -630,7 +633,7 @@ public: void setClientInterface(ClientInfo::Interface interface); void setClientVersion(UInt64 client_version_major, UInt64 client_version_minor, UInt64 client_version_patch, unsigned client_tcp_protocol_version); void setClientConnectionId(uint32_t connection_id); - void setHttpClientInfo(ClientInfo::HTTPMethod http_method, const String & http_user_agent, const String & http_referer); + void setHTTPClientInfo(ClientInfo::HTTPMethod http_method, const String & http_user_agent, const String & http_referer); void setForwardedFor(const String & forwarded_for); void setQueryKind(ClientInfo::QueryKind query_kind); void setQueryKindInitial(); @@ -676,7 +679,9 @@ public: const Block * tryGetSpecialScalar(const String & name) const; void addSpecialScalar(const String & name, const Block & block); - const QueryAccessInfo & getQueryAccessInfo() const { return query_access_info; } + const QueryAccessInfo & getQueryAccessInfo() const { return *getQueryAccessInfoPtr(); } + const QueryAccessInfoPtr getQueryAccessInfoPtr() const { return query_access_info; } + void setQueryAccessInfo(QueryAccessInfoPtr other) { query_access_info = other; } void addQueryAccessInfo( const String & quoted_database_name, @@ -718,6 +723,8 @@ public: /// Overload for the new analyzer. Structure inference is performed in QueryAnalysisPass. StoragePtr executeTableFunction(const ASTPtr & table_expression, const TableFunctionPtr & table_function_ptr); + StoragePtr buildParametrizedViewStorage(const ASTPtr & table_expression, const String & database_name, const String & table_name); + void addViewSource(const StoragePtr & storage); StoragePtr getViewSource() const; @@ -805,7 +812,7 @@ public: /// I/O formats. InputFormatPtr getInputFormat(const String & name, ReadBuffer & buf, const Block & sample, UInt64 max_block_size, - const std::optional & format_settings = std::nullopt, const std::optional max_parsing_threads = std::nullopt) const; + const std::optional & format_settings = std::nullopt, std::optional max_parsing_threads = std::nullopt) const; OutputFormatPtr getOutputFormat(const String & name, WriteBuffer & buf, const Block & sample) const; OutputFormatPtr getOutputFormatParallelIfPossible(const String & name, WriteBuffer & buf, const Block & sample) const; @@ -931,8 +938,8 @@ public: void setClientProtocolVersion(UInt64 version); #if USE_NURAFT - std::shared_ptr & getKeeperDispatcher() const; - std::shared_ptr & tryGetKeeperDispatcher() const; + std::shared_ptr getKeeperDispatcher() const; + std::shared_ptr tryGetKeeperDispatcher() const; #endif void initializeKeeperDispatcher(bool start_async) const; void shutdownKeeperDispatcher() const; @@ -960,6 +967,10 @@ public: std::shared_ptr getUncompressedCache() const; void clearUncompressedCache() const; + void setPageCache(size_t bytes_per_chunk, size_t bytes_per_mmap, size_t bytes_total, bool use_madv_free, bool use_huge_pages); + std::shared_ptr getPageCache() const; + void dropPageCache() const; + void setMarkCache(const String & cache_policy, size_t max_cache_size_in_bytes, double size_ratio); void updateMarkCacheConfiguration(const Poco::Util::AbstractConfiguration & config); std::shared_ptr getMarkCache() const; @@ -1073,6 +1084,7 @@ public: const MergeTreeSettings & getMergeTreeSettings() const; const MergeTreeSettings & getReplicatedMergeTreeSettings() const; + const DistributedSettings & getDistributedSettings() const; const StorageS3Settings & getStorageS3Settings() const; /// Prevents DROP TABLE if its size is greater than max_size (50GB by default, max_size=0 turn off this check) @@ -1211,7 +1223,6 @@ public: OrdinaryBackgroundExecutorPtr getMovesExecutor() const; OrdinaryBackgroundExecutorPtr getFetchesExecutor() const; OrdinaryBackgroundExecutorPtr getCommonExecutor() const; - PooledSessionFactoryPtr getCommonFetchesSessionFactory() const; IAsynchronousReader & getThreadPoolReader(FilesystemReaderType type) const; #if USE_LIBURING diff --git a/src/Interpreters/CrashLog.cpp b/src/Interpreters/CrashLog.cpp index 4fb81e4bcf7..410ea922429 100644 --- a/src/Interpreters/CrashLog.cpp +++ b/src/Interpreters/CrashLog.cpp @@ -23,18 +23,18 @@ ColumnsDescription CrashLogElement::getColumnsDescription() { return ColumnsDescription { - {"hostname", std::make_shared(std::make_shared())}, - {"event_date", std::make_shared()}, - {"event_time", std::make_shared()}, - {"timestamp_ns", std::make_shared()}, - {"signal", std::make_shared()}, - {"thread_id", std::make_shared()}, - {"query_id", std::make_shared()}, - {"trace", std::make_shared(std::make_shared())}, - {"trace_full", std::make_shared(std::make_shared())}, - {"version", std::make_shared()}, - {"revision", std::make_shared()}, - {"build_id", std::make_shared()}, + {"hostname", std::make_shared(std::make_shared()), "The hostname where the crash occurred."}, + {"event_date", std::make_shared(), "The date of the crash."}, + {"event_time", std::make_shared(), "The time of the crash."}, + {"timestamp_ns", std::make_shared(), "Timestamp of the event with nanoseconds."}, + {"signal", std::make_shared(), "Signal number."}, + {"thread_id", std::make_shared(), "Thread ID."}, + {"query_id", std::make_shared(), "Query ID."}, + {"trace", std::make_shared(std::make_shared()), "Stack trace at the moment of crash. Each element is a virtual memory address inside ClickHouse server process."}, + {"trace_full", std::make_shared(std::make_shared()), "Stack trace at the moment of crash. Each element contains a called method inside ClickHouse server process."}, + {"version", std::make_shared(), "ClickHouse server version."}, + {"revision", std::make_shared(), "ClickHouse server revision."}, + {"build_id", std::make_shared(), "BuildID that is generated by compiler."}, }; } diff --git a/src/Interpreters/CrossToInnerJoinVisitor.cpp b/src/Interpreters/CrossToInnerJoinVisitor.cpp index 42af164f4ad..e3e8b80e437 100644 --- a/src/Interpreters/CrossToInnerJoinVisitor.cpp +++ b/src/Interpreters/CrossToInnerJoinVisitor.cpp @@ -173,7 +173,7 @@ std::vector getTables(const ASTSelectQuery & select) { const auto * table_element = child->as(); if (!table_element) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: TablesInSelectQueryElement expected"); + throw Exception(ErrorCodes::LOGICAL_ERROR, "TablesInSelectQueryElement expected"); JoinedElement & t = joined_tables.emplace_back(*table_element); t.rewriteCommaToCross(); @@ -224,7 +224,7 @@ void CrossToInnerJoinMatcher::visit(ASTSelectQuery & select, ASTPtr &, Data & da { if (joined_tables.size() != data.tables_with_columns.size()) throw Exception(ErrorCodes::LOGICAL_ERROR, - "Logical error: inconsistent number of tables: {} != {}", + "Inconsistent number of tables: {} != {}", joined_tables.size(), data.tables_with_columns.size()); for (size_t i = 0; i < joined_tables.size(); ++i) diff --git a/src/Interpreters/DDLTask.cpp b/src/Interpreters/DDLTask.cpp index 90eec421abf..fe2baea6b4e 100644 --- a/src/Interpreters/DDLTask.cpp +++ b/src/Interpreters/DDLTask.cpp @@ -44,6 +44,11 @@ bool HostID::isLocalAddress(UInt16 clickhouse_port) const { return DB::isLocalAddress(DNSResolver::instance().resolveAddress(host_name, port), clickhouse_port); } + catch (const DB::NetException &) + { + /// Avoid "Host not found" exceptions + return false; + } catch (const Poco::Net::NetException &) { /// Avoid "Host not found" exceptions @@ -148,9 +153,8 @@ void DDLLogEntry::parse(const String & data) String settings_str; rb >> "settings: " >> settings_str >> "\n"; ParserSetQuery parser{true}; - constexpr UInt64 max_size = 4096; constexpr UInt64 max_depth = 16; - ASTPtr settings_ast = parseQuery(parser, settings_str, max_size, max_depth); + ASTPtr settings_ast = parseQuery(parser, settings_str, Context::getGlobalContextInstance()->getSettingsRef().max_query_size, max_depth); settings.emplace(std::move(settings_ast->as()->changes)); } } @@ -556,7 +560,7 @@ void ZooKeeperMetadataTransaction::commit() if (state != CREATED) throw Exception(ErrorCodes::LOGICAL_ERROR, "Incorrect state ({}), it's a bug", state); state = FAILED; - current_zookeeper->multi(ops); + current_zookeeper->multi(ops, /* check_session_valid */ true); state = COMMITTED; } diff --git a/src/Interpreters/DatabaseAndTableWithAlias.cpp b/src/Interpreters/DatabaseAndTableWithAlias.cpp index db020cb9166..329391b45d7 100644 --- a/src/Interpreters/DatabaseAndTableWithAlias.cpp +++ b/src/Interpreters/DatabaseAndTableWithAlias.cpp @@ -71,7 +71,7 @@ DatabaseAndTableWithAlias::DatabaseAndTableWithAlias(const ASTTableExpression & alias = table_expression.subquery->tryGetAlias(); } else - throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: no known elements in ASTTableExpression"); + throw Exception(ErrorCodes::LOGICAL_ERROR, "No known elements in ASTTableExpression"); } bool DatabaseAndTableWithAlias::satisfies(const DatabaseAndTableWithAlias & db_table, bool table_may_be_an_alias) const diff --git a/src/Interpreters/DatabaseCatalog.cpp b/src/Interpreters/DatabaseCatalog.cpp index 87985d1d12b..a9fd5c852ba 100644 --- a/src/Interpreters/DatabaseCatalog.cpp +++ b/src/Interpreters/DatabaseCatalog.cpp @@ -1094,7 +1094,7 @@ void DatabaseCatalog::enqueueDroppedTableCleanup(StorageID table_id, StoragePtr create->setTable(table_id.table_name); try { - table = createTableFromAST(*create, table_id.getDatabaseName(), data_path, getContext(), /* force_restore */ true).second; + table = createTableFromAST(*create, table_id.getDatabaseName(), data_path, getContext(), LoadingStrictnessLevel::FORCE_RESTORE).second; table->is_dropped = true; } catch (...) diff --git a/src/Interpreters/DatabaseCatalog.h b/src/Interpreters/DatabaseCatalog.h index beb73e3ef96..6f05a3cea0f 100644 --- a/src/Interpreters/DatabaseCatalog.h +++ b/src/Interpreters/DatabaseCatalog.h @@ -1,15 +1,14 @@ #pragma once #include +#include +#include #include #include -#include #include #include -#include "Common/NamePrompter.h" +#include #include -#include "Storages/IStorage.h" -#include "Databases/IDatabase.h" #include #include @@ -444,7 +443,7 @@ class TemporaryLockForUUIDDirectory : private boost::noncopyable UUID uuid = UUIDHelpers::Nil; public: TemporaryLockForUUIDDirectory() = default; - TemporaryLockForUUIDDirectory(UUID uuid_); + explicit TemporaryLockForUUIDDirectory(UUID uuid_); ~TemporaryLockForUUIDDirectory(); TemporaryLockForUUIDDirectory(TemporaryLockForUUIDDirectory && rhs) noexcept; diff --git a/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp b/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp index 77a022e066b..0cf138c14f6 100644 --- a/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp +++ b/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp @@ -281,7 +281,7 @@ void ExecuteScalarSubqueriesMatcher::visit(const ASTSubquery & subquery, ASTPtr if (data.only_analyze) { ast->as()->alias.clear(); - auto func = makeASTFunction("identity", std::move(ast)); + auto func = makeASTFunction("__scalarSubqueryResult", std::move(ast)); func->alias = subquery_alias; func->prefer_alias_to_column_name = prefer_alias_to_column_name; ast = std::move(func); diff --git a/src/Interpreters/ExpressionActions.cpp b/src/Interpreters/ExpressionActions.cpp index 1bd1e2c318f..1d193b3499c 100644 --- a/src/Interpreters/ExpressionActions.cpp +++ b/src/Interpreters/ExpressionActions.cpp @@ -180,14 +180,17 @@ static void setLazyExecutionInfo( indexes.insert(i); } - if (!short_circuit_nodes.at(parent).enable_lazy_execution_for_first_argument && node == parent->children[0]) + for (auto idx : short_circuit_nodes.at(parent).arguments_with_disabled_lazy_execution) { - /// We shouldn't add 0 index in node info in this case. - indexes.erase(0); - /// Disable lazy execution for current node only if it's disabled for short-circuit node, - /// because we can have nested short-circuit nodes. - if (!lazy_execution_infos[parent].can_be_lazy_executed) - lazy_execution_info.can_be_lazy_executed = false; + if (idx < parent->children.size() && node == parent->children[idx]) + { + /// We shouldn't add this index in node info in this case. + indexes.erase(idx); + /// Disable lazy execution for current node only if it's disabled for short-circuit node, + /// because we can have nested short-circuit nodes. + if (!lazy_execution_infos[parent].can_be_lazy_executed) + lazy_execution_info.can_be_lazy_executed = false; + } } lazy_execution_info.short_circuit_ancestors_info[parent].insert(indexes.begin(), indexes.end()); @@ -563,7 +566,7 @@ namespace }; } -static void executeAction(const ExpressionActions::Action & action, ExecutionContext & execution_context, bool dry_run) +static void executeAction(const ExpressionActions::Action & action, ExecutionContext & execution_context, bool dry_run, bool allow_duplicates_in_input) { auto & inputs = execution_context.inputs; auto & columns = execution_context.columns; @@ -694,14 +697,19 @@ static void executeAction(const ExpressionActions::Action & action, ExecutionCon action.node->result_name); } else - columns[action.result_position] = std::move(inputs[pos]); + { + if (allow_duplicates_in_input) + columns[action.result_position] = inputs[pos]; + else + columns[action.result_position] = std::move(inputs[pos]); + } break; } } } -void ExpressionActions::execute(Block & block, size_t & num_rows, bool dry_run) const +void ExpressionActions::execute(Block & block, size_t & num_rows, bool dry_run, bool allow_duplicates_in_input) const { ExecutionContext execution_context { @@ -722,7 +730,8 @@ void ExpressionActions::execute(Block & block, size_t & num_rows, bool dry_run) if (execution_context.inputs_pos[input_pos] < 0) { execution_context.inputs_pos[input_pos] = pos; - break; + if (!allow_duplicates_in_input) + break; } } } @@ -734,12 +743,8 @@ void ExpressionActions::execute(Block & block, size_t & num_rows, bool dry_run) { try { - executeAction(action, execution_context, dry_run); + executeAction(action, execution_context, dry_run, allow_duplicates_in_input); checkLimits(execution_context.columns); - - //std::cerr << "Action: " << action.toString() << std::endl; - //for (const auto & col : execution_context.columns) - // std::cerr << col.dumpStructure() << std::endl; } catch (Exception & e) { @@ -752,6 +757,12 @@ void ExpressionActions::execute(Block & block, size_t & num_rows, bool dry_run) { block.clear(); } + else if (allow_duplicates_in_input) + { + /// This case is the same as when the input is projected + /// since we do not need any input columns. + block.clear(); + } else { ::sort(execution_context.inputs_pos.rbegin(), execution_context.inputs_pos.rend()); @@ -774,11 +785,11 @@ void ExpressionActions::execute(Block & block, size_t & num_rows, bool dry_run) num_rows = execution_context.num_rows; } -void ExpressionActions::execute(Block & block, bool dry_run) const +void ExpressionActions::execute(Block & block, bool dry_run, bool allow_duplicates_in_input) const { size_t num_rows = block.rows(); - execute(block, num_rows, dry_run); + execute(block, num_rows, dry_run, allow_duplicates_in_input); if (!block) block.insert({DataTypeUInt8().createColumnConst(num_rows, 0), std::make_shared(), "_dummy"}); diff --git a/src/Interpreters/ExpressionActions.h b/src/Interpreters/ExpressionActions.h index db6670c50b9..cb467004d29 100644 --- a/src/Interpreters/ExpressionActions.h +++ b/src/Interpreters/ExpressionActions.h @@ -98,9 +98,15 @@ public: const NamesAndTypesList & getRequiredColumnsWithTypes() const { return required_columns; } /// Execute the expression on the block. The block must contain all the columns returned by getRequiredColumns. - void execute(Block & block, size_t & num_rows, bool dry_run = false) const; + /// + /// @param allow_duplicates_in_input - actions are allowed to have + /// duplicated input (that will refer into the block). This is needed for + /// preliminary query filtering (filterBlockWithDAG()), because they just + /// pass available virtual columns, which cannot be moved in case they are + /// used multiple times. + void execute(Block & block, size_t & num_rows, bool dry_run = false, bool allow_duplicates_in_input = false) const; /// The same, but without `num_rows`. If result block is empty, adds `_dummy` column to keep block size. - void execute(Block & block, bool dry_run = false) const; + void execute(Block & block, bool dry_run = false, bool allow_duplicates_in_input = false) const; bool hasArrayJoin() const; void assertDeterministic() const; diff --git a/src/Interpreters/FilesystemCacheLog.cpp b/src/Interpreters/FilesystemCacheLog.cpp index ccfee49a66f..80fe1c3a8ef 100644 --- a/src/Interpreters/FilesystemCacheLog.cpp +++ b/src/Interpreters/FilesystemCacheLog.cpp @@ -38,20 +38,20 @@ ColumnsDescription FilesystemCacheLogElement::getColumnsDescription() return ColumnsDescription { - {"hostname", std::make_shared(std::make_shared())}, - {"event_date", std::make_shared()}, - {"event_time", std::make_shared()}, - {"query_id", std::make_shared()}, - {"source_file_path", std::make_shared()}, - {"file_segment_range", std::make_shared(types)}, - {"total_requested_range", std::make_shared(types)}, - {"key", std::make_shared()}, - {"offset", std::make_shared()}, - {"size", std::make_shared()}, - {"read_type", std::make_shared()}, - {"read_from_cache_attempted", std::make_shared()}, - {"ProfileEvents", std::make_shared(std::make_shared(), std::make_shared())}, - {"read_buffer_id", std::make_shared()}, + {"hostname", std::make_shared(std::make_shared()), "Hostname"}, + {"event_date", std::make_shared(), "Event date"}, + {"event_time", std::make_shared(), "Event time"}, + {"query_id", std::make_shared(), "Id of the query"}, + {"source_file_path", std::make_shared(), "File segment path on filesystem"}, + {"file_segment_range", std::make_shared(types), "File segment range"}, + {"total_requested_range", std::make_shared(types), "Full read range"}, + {"key", std::make_shared(), "File segment key"}, + {"offset", std::make_shared(), "File segment offset"}, + {"size", std::make_shared(), "Read size"}, + {"read_type", std::make_shared(), "Read type: READ_FROM_CACHE, READ_FROM_FS_AND_DOWNLOADED_TO_CACHE, READ_FROM_FS_BYPASSING_CACHE"}, + {"read_from_cache_attempted", std::make_shared(), "Whether reading from cache was attempted"}, + {"ProfileEvents", std::make_shared(std::make_shared(), std::make_shared()), "Profile events collected while reading this file segment"}, + {"read_buffer_id", std::make_shared(), "Internal implementation read buffer id"}, }; } diff --git a/src/Interpreters/FilesystemReadPrefetchesLog.h b/src/Interpreters/FilesystemReadPrefetchesLog.h index ee6da4966e9..088613cbeac 100644 --- a/src/Interpreters/FilesystemReadPrefetchesLog.h +++ b/src/Interpreters/FilesystemReadPrefetchesLog.h @@ -38,7 +38,6 @@ struct FilesystemReadPrefetchesLogElement static NamesAndAliases getNamesAndAliases() { return {}; } void appendToBlock(MutableColumns & columns) const; - static const char * getCustomColumnList() { return nullptr; } }; class FilesystemReadPrefetchesLog : public SystemLog diff --git a/src/Interpreters/GlobalSubqueriesVisitor.h b/src/Interpreters/GlobalSubqueriesVisitor.h index 5f029395df9..64b6eb5dce9 100644 --- a/src/Interpreters/GlobalSubqueriesVisitor.h +++ b/src/Interpreters/GlobalSubqueriesVisitor.h @@ -216,7 +216,7 @@ private: if (enable_parallel_processing_of_joins) { /// We don't enable parallel replicas for IN (subquery) - if (ast->as()) + if (!settings.parallel_replicas_allow_in_with_subquery && ast->as()) { if (settings.allow_experimental_parallel_reading_from_replicas == 1) { @@ -224,7 +224,7 @@ private: data.getContext()->getQueryContext()->setSetting("allow_experimental_parallel_reading_from_replicas", Field(0)); return; } - else if (settings.allow_experimental_parallel_reading_from_replicas == 2) + else if (settings.allow_experimental_parallel_reading_from_replicas >= 2) throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "IN with subquery is not supported with parallel replicas"); } } @@ -282,7 +282,7 @@ private: data.getContext()->getQueryContext()->setSetting("allow_experimental_parallel_reading_from_replicas", Field(0)); return; } - else if (settings.allow_experimental_parallel_reading_from_replicas == 2) + else if (settings.allow_experimental_parallel_reading_from_replicas >= 2) throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "JOIN with parallel replicas is only supported with subqueries"); } } diff --git a/src/Interpreters/GraceHashJoin.cpp b/src/Interpreters/GraceHashJoin.cpp index 5fb92a68a29..53d1f48c291 100644 --- a/src/Interpreters/GraceHashJoin.cpp +++ b/src/Interpreters/GraceHashJoin.cpp @@ -1,21 +1,18 @@ +#include +#include +#include +#include #include #include #include - -#include #include - -#include +#include #include #include #include -#include -#include - -#include - #include +#include namespace CurrentMetrics diff --git a/src/Interpreters/HashJoin.cpp b/src/Interpreters/HashJoin.cpp index 33dc178ca00..ddd65f95627 100644 --- a/src/Interpreters/HashJoin.cpp +++ b/src/Interpreters/HashJoin.cpp @@ -368,7 +368,7 @@ HashJoin::Type HashJoin::chooseMethod(JoinKind kind, const ColumnRawPtrs & key_c return Type::keys128; if (size_of_field == 32) return Type::keys256; - throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: numeric column has sizeOfField not in 1, 2, 4, 8, 16, 32."); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Numeric column has sizeOfField not in 1, 2, 4, 8, 16, 32."); } /// If the keys fit in N bits, we will use a hash table for N-bit-packed keys @@ -1019,6 +1019,7 @@ struct JoinOnKeyColumns bool isRowFiltered(size_t i) const { return join_mask_column.isRowFiltered(i); } }; +template class AddedColumns { public: @@ -1034,6 +1035,12 @@ public: } }; + struct LazyOutput + { + PaddedPODArray blocks; + PaddedPODArray row_nums; + }; + AddedColumns( const Block & left_block, const Block & block_with_columns_to_add, @@ -1050,6 +1057,13 @@ public: if (is_asof_join) ++num_columns_to_add; + if constexpr (lazy) + { + has_columns_to_add = num_columns_to_add > 0; + lazy_output.blocks.reserve(rows_to_add); + lazy_output.row_nums.reserve(rows_to_add); + } + columns.reserve(num_columns_to_add); type_name.reserve(num_columns_to_add); right_indexes.reserve(num_columns_to_add); @@ -1089,81 +1103,18 @@ public: size_t size() const { return columns.size(); } + void buildOutput(); + ColumnWithTypeAndName moveColumn(size_t i) { return ColumnWithTypeAndName(std::move(columns[i]), type_name[i].type, type_name[i].qualified_name); } + void appendFromBlock(const Block & block, size_t row_num, bool has_default); - template - void appendFromBlock(const Block & block, size_t row_num) - { - if constexpr (has_defaults) - applyLazyDefaults(); + void appendDefaultRow(); -#ifndef NDEBUG - for (size_t j = 0; j < right_indexes.size(); ++j) - { - const auto * column_from_block = block.getByPosition(right_indexes[j]).column.get(); - const auto * dest_column = columns[j].get(); - if (auto * nullable_col = nullable_column_ptrs[j]) - { - if (!is_join_get) - throw Exception(ErrorCodes::LOGICAL_ERROR, - "Columns {} and {} can have different nullability only in joinGetOrNull", - dest_column->getName(), column_from_block->getName()); - dest_column = nullable_col->getNestedColumnPtr().get(); - } - /** Using dest_column->structureEquals(*column_from_block) will not work for low cardinality columns, - * because dictionaries can be different, while calling insertFrom on them is safe, for example: - * ColumnLowCardinality(size = 0, UInt8(size = 0), ColumnUnique(size = 1, String(size = 1))) - * and - * ColumnLowCardinality(size = 0, UInt16(size = 0), ColumnUnique(size = 1, String(size = 1))) - */ - if (typeid(*dest_column) != typeid(*column_from_block)) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Columns {} and {} have different types {} and {}", - dest_column->getName(), column_from_block->getName(), - demangle(typeid(*dest_column).name()), demangle(typeid(*column_from_block).name())); - } -#endif - - if (is_join_get) - { - size_t right_indexes_size = right_indexes.size(); - for (size_t j = 0; j < right_indexes_size; ++j) - { - const auto & column_from_block = block.getByPosition(right_indexes[j]); - if (auto * nullable_col = nullable_column_ptrs[j]) - nullable_col->insertFromNotNullable(*column_from_block.column, row_num); - else - columns[j]->insertFrom(*column_from_block.column, row_num); - } - } - else - { - size_t right_indexes_size = right_indexes.size(); - for (size_t j = 0; j < right_indexes_size; ++j) - { - const auto & column_from_block = block.getByPosition(right_indexes[j]); - columns[j]->insertFrom(*column_from_block.column, row_num); - } - } - } - - void appendDefaultRow() - { - ++lazy_defaults_count; - } - - void applyLazyDefaults() - { - if (lazy_defaults_count) - { - for (size_t j = 0, size = right_indexes.size(); j < size; ++j) - JoinCommon::addDefaultValues(*columns[j], type_name[j].type, lazy_defaults_count); - lazy_defaults_count = 0; - } - } + void applyLazyDefaults(); const IColumn & leftAsofKey() const { return *left_asof_key; } @@ -1192,16 +1143,50 @@ public: } private: - std::vector type_name; - MutableColumns columns; - std::vector nullable_column_ptrs; + void checkBlock(const Block & block) + { + for (size_t j = 0; j < right_indexes.size(); ++j) + { + const auto * column_from_block = block.getByPosition(right_indexes[j]).column.get(); + const auto * dest_column = columns[j].get(); + if (auto * nullable_col = nullable_column_ptrs[j]) + { + if (!is_join_get) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Columns {} and {} can have different nullability only in joinGetOrNull", + dest_column->getName(), column_from_block->getName()); + dest_column = nullable_col->getNestedColumnPtr().get(); + } + /** Using dest_column->structureEquals(*column_from_block) will not work for low cardinality columns, + * because dictionaries can be different, while calling insertFrom on them is safe, for example: + * ColumnLowCardinality(size = 0, UInt8(size = 0), ColumnUnique(size = 1, String(size = 1))) + * and + * ColumnLowCardinality(size = 0, UInt16(size = 0), ColumnUnique(size = 1, String(size = 1))) + */ + if (typeid(*dest_column) != typeid(*column_from_block)) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Columns {} and {} have different types {} and {}", + dest_column->getName(), column_from_block->getName(), + demangle(typeid(*dest_column).name()), demangle(typeid(*column_from_block).name())); + } + } + + MutableColumns columns; + bool is_join_get; std::vector right_indexes; + std::vector type_name; + std::vector nullable_column_ptrs; size_t lazy_defaults_count = 0; + + /// for lazy + // The default row is represented by an empty RowRef, so that fixed-size blocks can be generated sequentially, + // default_count cannot represent the position of the row + LazyOutput lazy_output; + bool has_columns_to_add; + /// for ASOF const IColumn * left_asof_key = nullptr; - bool is_join_get; void addColumn(const ColumnWithTypeAndName & src_column, const std::string & qualified_name) { @@ -1210,6 +1195,126 @@ private: type_name.emplace_back(src_column.type, src_column.name, qualified_name); } }; +template<> void AddedColumns::buildOutput() +{ +} + +template<> +void AddedColumns::buildOutput() +{ + for (size_t i = 0; i < this->size(); ++i) + { + auto& col = columns[i]; + size_t default_count = 0; + auto apply_default = [&]() + { + if (default_count > 0) + { + JoinCommon::addDefaultValues(*col, type_name[i].type, default_count); + default_count = 0; + } + }; + + for (size_t j = 0; j < lazy_output.blocks.size(); ++j) + { + if (!lazy_output.blocks[j]) + { + default_count ++; + continue; + } + apply_default(); + const auto & column_from_block = reinterpret_cast(lazy_output.blocks[j])->getByPosition(right_indexes[i]); + /// If it's joinGetOrNull, we need to wrap not-nullable columns in StorageJoin. + if (is_join_get) + { + if (auto * nullable_col = typeid_cast(col.get()); + nullable_col && !column_from_block.column->isNullable()) + { + nullable_col->insertFromNotNullable(*column_from_block.column, lazy_output.row_nums[j]); + continue; + } + } + col->insertFrom(*column_from_block.column, lazy_output.row_nums[j]); + } + apply_default(); + } +} + +template<> +void AddedColumns::applyLazyDefaults() +{ + if (lazy_defaults_count) + { + for (size_t j = 0, size = right_indexes.size(); j < size; ++j) + JoinCommon::addDefaultValues(*columns[j], type_name[j].type, lazy_defaults_count); + lazy_defaults_count = 0; + } +} + +template<> +void AddedColumns::applyLazyDefaults() +{ +} + +template <> +void AddedColumns::appendFromBlock(const Block & block, size_t row_num,const bool has_defaults) +{ + if (has_defaults) + applyLazyDefaults(); + +#ifndef NDEBUG + checkBlock(block); +#endif + if (is_join_get) + { + size_t right_indexes_size = right_indexes.size(); + for (size_t j = 0; j < right_indexes_size; ++j) + { + const auto & column_from_block = block.getByPosition(right_indexes[j]); + if (auto * nullable_col = nullable_column_ptrs[j]) + nullable_col->insertFromNotNullable(*column_from_block.column, row_num); + else + columns[j]->insertFrom(*column_from_block.column, row_num); + } + } + else + { + size_t right_indexes_size = right_indexes.size(); + for (size_t j = 0; j < right_indexes_size; ++j) + { + const auto & column_from_block = block.getByPosition(right_indexes[j]); + columns[j]->insertFrom(*column_from_block.column, row_num); + } + } +} + +template <> +void AddedColumns::appendFromBlock(const Block & block, size_t row_num, bool) +{ +#ifndef NDEBUG + checkBlock(block); +#endif + if (has_columns_to_add) + { + lazy_output.blocks.emplace_back(reinterpret_cast(&block)); + lazy_output.row_nums.emplace_back(static_cast(row_num)); + } +} +template<> +void AddedColumns::appendDefaultRow() +{ + ++lazy_defaults_count; +} + +template<> +void AddedColumns::appendDefaultRow() +{ + if (has_columns_to_add) + { + lazy_output.blocks.emplace_back(0); + lazy_output.row_nums.emplace_back(0); + } +} template struct JoinFeatures @@ -1308,7 +1413,7 @@ public: } }; -template +template void addFoundRowAll( const typename Map::mapped_type & mapped, AddedColumns & added, @@ -1327,7 +1432,7 @@ void addFoundRowAll( { if (!known_rows.isKnown(std::make_pair(it->block, it->row_num))) { - added.appendFromBlock(*it->block, it->row_num); + added.appendFromBlock(*it->block, it->row_num, false); ++current_offset; if (!new_known_rows_ptr) { @@ -1351,13 +1456,13 @@ void addFoundRowAll( { for (auto it = mapped.begin(); it.ok(); ++it) { - added.appendFromBlock(*it->block, it->row_num); + added.appendFromBlock(*it->block, it->row_num, false); ++current_offset; } } } -template +template void addNotFoundRow(AddedColumns & added [[maybe_unused]], IColumn::Offset & current_offset [[maybe_unused]]) { if constexpr (add_missing) @@ -1377,7 +1482,7 @@ void setUsed(IColumn::Filter & filter [[maybe_unused]], size_t pos [[maybe_unuse /// Joins right table columns which indexes are present in right_indexes using specified map. /// Makes filter (1 if row presented in right table) and returns offsets to replicate (for ALL JOINS). -template +template NO_INLINE size_t joinRightColumns( std::vector && key_getter_vector, const std::vector & mapv, @@ -1440,7 +1545,7 @@ NO_INLINE size_t joinRightColumns( else used_flags.template setUsed(find_result); - added_columns.appendFromBlock(*row_ref.block, row_ref.row_num); + added_columns.appendFromBlock(*row_ref.block, row_ref.row_num, join_features.add_missing); } else addNotFoundRow(added_columns, current_offset); @@ -1471,7 +1576,7 @@ NO_INLINE size_t joinRightColumns( if (used_once) { setUsed(added_columns.filter, i); - added_columns.appendFromBlock(*mapped.block, mapped.row_num); + added_columns.appendFromBlock(*mapped.block, mapped.row_num, join_features.add_missing); } break; @@ -1489,7 +1594,7 @@ NO_INLINE size_t joinRightColumns( { setUsed(added_columns.filter, i); used_flags.template setUsed(find_result); - added_columns.appendFromBlock(*mapped.block, mapped.row_num); + added_columns.appendFromBlock(*mapped.block, mapped.row_num, join_features.add_missing); if (join_features.is_any_or_semi_join) { @@ -1516,7 +1621,7 @@ NO_INLINE size_t joinRightColumns( return i; } -template +template size_t joinRightColumnsSwitchMultipleDisjuncts( std::vector && key_getter_vector, const std::vector & mapv, @@ -1528,7 +1633,7 @@ size_t joinRightColumnsSwitchMultipleDisjuncts( : joinRightColumns(std::forward>(key_getter_vector), mapv, added_columns, used_flags); } -template +template size_t joinRightColumnsSwitchNullability( std::vector && key_getter_vector, const std::vector & mapv, @@ -1541,11 +1646,11 @@ size_t joinRightColumnsSwitchNullability( } else { - return joinRightColumnsSwitchMultipleDisjuncts(std::forward>(key_getter_vector), mapv, added_columns, used_flags); + return joinRightColumnsSwitchMultipleDisjuncts(std::forward>(key_getter_vector), mapv, added_columns, used_flags); } } -template +template size_t switchJoinRightColumns( const std::vector & mapv, AddedColumns & added_columns, @@ -1680,14 +1785,9 @@ Block HashJoin::joinBlockImpl( * but they will not be used at this stage of joining (and will be in `AdderNonJoined`), and they need to be skipped. * For ASOF, the last column is used as the ASOF column */ - AddedColumns added_columns( - block, - block_with_columns_to_add, - savedBlockSample(), - *this, - std::move(join_on_keys), - join_features.is_asof_join, - is_join_get); + AddedColumns added_columns( + block, block_with_columns_to_add, savedBlockSample(), *this, std::move(join_on_keys), join_features.is_asof_join, is_join_get); + bool has_required_right_keys = (required_right_keys.columns() != 0); added_columns.need_filter = join_features.need_filter || has_required_right_keys; @@ -1702,6 +1802,7 @@ Block HashJoin::joinBlockImpl( added_columns.join_on_keys.clear(); Block remaining_block = sliceBlock(block, num_joined); + added_columns.buildOutput(); for (size_t i = 0; i < added_columns.size(); ++i) block.insert(added_columns.moveColumn(i)); diff --git a/src/Interpreters/InJoinSubqueriesPreprocessor.cpp b/src/Interpreters/InJoinSubqueriesPreprocessor.cpp index 3858830a43b..ec4241a2740 100644 --- a/src/Interpreters/InJoinSubqueriesPreprocessor.cpp +++ b/src/Interpreters/InJoinSubqueriesPreprocessor.cpp @@ -103,12 +103,12 @@ private: /// Already processed. } else - throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: unexpected function name {}", concrete->name); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected function name {}", concrete->name); } else if (table_join) table_join->locality = JoinLocality::Global; else - throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: unexpected AST node"); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected AST node"); } else if (distributed_product_mode == DistributedProductMode::DENY) { diff --git a/src/Interpreters/InterpreterAlterQuery.cpp b/src/Interpreters/InterpreterAlterQuery.cpp index bfcb0d6dd39..b768593da98 100644 --- a/src/Interpreters/InterpreterAlterQuery.cpp +++ b/src/Interpreters/InterpreterAlterQuery.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -60,8 +61,7 @@ BlockIO InterpreterAlterQuery::execute() { return executeToDatabase(alter); } - else if (alter.alter_object == ASTAlterQuery::AlterObjectType::TABLE - || alter.alter_object == ASTAlterQuery::AlterObjectType::LIVE_VIEW) + else if (alter.alter_object == ASTAlterQuery::AlterObjectType::TABLE) { return executeToTable(alter); } @@ -71,6 +71,13 @@ BlockIO InterpreterAlterQuery::execute() BlockIO InterpreterAlterQuery::executeToTable(const ASTAlterQuery & alter) { + for (auto & child : alter.command_list->children) + { + auto * command_ast = child->as(); + if (command_ast->sql_security) + InterpreterCreateQuery::processSQLSecurityOption(getContext(), command_ast->sql_security->as()); + } + BlockIO res; if (!UserDefinedSQLFunctionFactory::instance().empty()) @@ -412,6 +419,7 @@ AccessRightsElements InterpreterAlterQuery::getRequiredAccessForCommand(const AS case ASTAlterCommand::APPLY_DELETED_MASK: case ASTAlterCommand::DROP_PARTITION: case ASTAlterCommand::DROP_DETACHED_PARTITION: + case ASTAlterCommand::FORGET_PARTITION: { required_access.emplace_back(AccessType::ALTER_DELETE, database, table); break; @@ -466,11 +474,6 @@ AccessRightsElements InterpreterAlterQuery::getRequiredAccessForCommand(const AS required_access.emplace_back(AccessType::ALTER_VIEW_MODIFY_REFRESH, database, table); break; } - case ASTAlterCommand::LIVE_VIEW_REFRESH: - { - required_access.emplace_back(AccessType::ALTER_VIEW_REFRESH, database, table); - break; - } case ASTAlterCommand::RENAME_COLUMN: { required_access.emplace_back(AccessType::ALTER_RENAME_COLUMN, database, table, column_name()); @@ -487,6 +490,11 @@ AccessRightsElements InterpreterAlterQuery::getRequiredAccessForCommand(const AS required_access.emplace_back(AccessType::ALTER_MODIFY_COMMENT, database, table); break; } + case ASTAlterCommand::MODIFY_SQL_SECURITY: + { + required_access.emplace_back(AccessType::ALTER_VIEW_MODIFY_SQL_SECURITY, database, table); + break; + } } return required_access; diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index c491ee30321..edd7452c130 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -2,6 +2,9 @@ #include +#include +#include + #include "Common/Exception.h" #include #include @@ -35,7 +38,6 @@ #include #include #include -#include #include #include @@ -50,6 +52,7 @@ #include #include #include +#include #include @@ -82,7 +85,8 @@ #include #include - +#include +#include namespace DB { @@ -98,7 +102,6 @@ namespace ErrorCodes extern const int DATABASE_ALREADY_EXISTS; extern const int BAD_ARGUMENTS; extern const int BAD_DATABASE_FOR_TEMPORARY_TABLE; - extern const int SUSPICIOUS_TYPE_FOR_LOW_CARDINALITY; extern const int ILLEGAL_SYNTAX_FOR_DATA_TYPE; extern const int ILLEGAL_COLUMN; extern const int ILLEGAL_INDEX; @@ -275,7 +278,7 @@ BlockIO InterpreterCreateQuery::createDatabase(ASTCreateQuery & create) bool need_write_metadata = !create.attach || !fs::exists(metadata_file_path); bool need_lock_uuid = internal || need_write_metadata; - auto mode = getLoadingStrictnessLevel(create.attach, force_attach, has_force_restore_data_flag); + auto mode = getLoadingStrictnessLevel(create.attach, force_attach, has_force_restore_data_flag, /*secondary*/ false); /// Lock uuid, so we will known it's already in use. /// We do it when attaching databases on server startup (internal) and on CREATE query (!create.attach); @@ -692,6 +695,7 @@ ColumnsDescription InterpreterCreateQuery::getColumnsDescription( if (!attach && !is_restore_from_backup && context_->getSettingsRef().flatten_nested) res.flattenNested(); + if (res.getAllPhysical().empty()) throw Exception(ErrorCodes::EMPTY_LIST_OF_COLUMNS_PASSED, "Cannot CREATE table without physical columns"); @@ -796,6 +800,9 @@ InterpreterCreateQuery::TableProperties InterpreterCreateQuery::getTableProperti } else if (create.select) { + if (create.isParameterizedView()) + return properties; + Block as_select_sample; if (getContext()->getSettingsRef().allow_experimental_analyzer) @@ -804,28 +811,7 @@ InterpreterCreateQuery::TableProperties InterpreterCreateQuery::getTableProperti } else { - /** To get valid sample block we need to prepare query without only_analyze, because we need to execute scalar - * subqueries. Otherwise functions that expect only constant arguments will throw error during query analysis, - * because the result of scalar subquery is not a constant. - * - * Example: - * CREATE MATERIALIZED VIEW test_mv ENGINE=MergeTree ORDER BY arr - * AS - * WITH (SELECT '\d[a-z]') AS constant_value - * SELECT extractAll(concat(toString(number), 'a'), assumeNotNull(constant_value)) AS arr - * FROM test_table; - * - * For new analyzer this issue does not exists because we always execute scalar subqueries. - * We can improve this in new analyzer, and execute scalar subqueries only in contexts when we expect constant - * for example: LIMIT, OFFSET, functions parameters, functions constant only arguments. - */ - - SelectQueryOptions options; - if (create.isParameterizedView()) - options = options.createParameterizedView(); - - InterpreterSelectWithUnionQuery interpreter(create.select->clone(), getContext(), options); - as_select_sample = interpreter.getSampleBlock(); + as_select_sample = InterpreterSelectWithUnionQuery::getSampleBlock(create.select->clone(), getContext()); } properties.columns = ColumnsDescription(as_select_sample.getNamesAndTypesList()); @@ -890,92 +876,35 @@ void InterpreterCreateQuery::validateTableStructure(const ASTCreateQuery & creat throw Exception(ErrorCodes::DUPLICATE_COLUMN, "Column {} already exists", backQuoteIfNeed(column.name)); } - /// Check if _row_exists for lightweight delete column in column_lists for merge tree family. - if (create.storage && create.storage->engine && endsWith(create.storage->engine->name, "MergeTree")) - { - auto search = all_columns.find(LightweightDeleteDescription::FILTER_COLUMN.name); - if (search != all_columns.end()) - throw Exception(ErrorCodes::ILLEGAL_COLUMN, - "Cannot create table with column '{}' for *MergeTree engines because it " - "is reserved for lightweight delete feature", - LightweightDeleteDescription::FILTER_COLUMN.name); - - auto search_block_number = all_columns.find(BlockNumberColumn::name); - if (search_block_number != all_columns.end()) - throw Exception(ErrorCodes::ILLEGAL_COLUMN, - "Cannot create table with column '{}' for *MergeTree engines because it " - "is reserved for storing block number", - BlockNumberColumn::name); - } - const auto & settings = getContext()->getSettingsRef(); - /// Check low cardinality types in creating table if it was not allowed in setting - if (!create.attach && !settings.allow_suspicious_low_cardinality_types && !create.is_materialized_view) + /// If it's not attach and not materialized view to existing table, + /// we need to validate data types (check for experimental or suspicious types). + if (!create.attach && !create.is_materialized_view) { + DataTypeValidationSettings validation_settings(settings); for (const auto & name_and_type_pair : properties.columns.getAllPhysical()) - { - if (const auto * current_type_ptr = typeid_cast(name_and_type_pair.type.get())) - { - if (!isStringOrFixedString(*removeNullable(current_type_ptr->getDictionaryType()))) - throw Exception(ErrorCodes::SUSPICIOUS_TYPE_FOR_LOW_CARDINALITY, - "Creating columns of type {} is prohibited by default " - "due to expected negative impact on performance. " - "It can be enabled with the \"allow_suspicious_low_cardinality_types\" setting.", - current_type_ptr->getName()); - } - } + validateDataType(name_and_type_pair.type, validation_settings); } +} - if (!create.attach && !settings.allow_experimental_object_type) +void validateVirtualColumns(const IStorage & storage) +{ + auto virtual_columns = storage.getVirtualsPtr(); + for (const auto & storage_column : storage.getInMemoryMetadataPtr()->getColumns()) { - for (const auto & [name, type] : properties.columns.getAllPhysical()) + if (virtual_columns->tryGet(storage_column.name, VirtualsKind::Persistent)) { - if (type->hasDynamicSubcolumns()) - { - throw Exception(ErrorCodes::ILLEGAL_COLUMN, - "Cannot create table with column '{}' which type is '{}' " - "because experimental Object type is not allowed. " - "Set setting allow_experimental_object_type = 1 in order to allow it", - name, type->getName()); - } - } - } - if (!create.attach && !settings.allow_suspicious_fixed_string_types) - { - for (const auto & [name, type] : properties.columns.getAllPhysical()) - { - auto basic_type = removeLowCardinalityAndNullable(type); - if (const auto * fixed_string = typeid_cast(basic_type.get())) - { - if (fixed_string->getN() > MAX_FIXEDSTRING_SIZE_WITHOUT_SUSPICIOUS) - throw Exception(ErrorCodes::ILLEGAL_COLUMN, - "Cannot create table with column '{}' which type is '{}' " - "because fixed string with size > {} is suspicious. " - "Set setting allow_suspicious_fixed_string_types = 1 in order to allow it", - name, type->getName(), MAX_FIXEDSTRING_SIZE_WITHOUT_SUSPICIOUS); - } - } - } - if (!create.attach && !settings.allow_experimental_variant_type) - { - for (const auto & [name, type] : properties.columns.getAllPhysical()) - { - if (isVariant(type)) - { - throw Exception(ErrorCodes::ILLEGAL_COLUMN, - "Cannot create table with column '{}' which type is '{}' " - "because experimental Variant type is not allowed. " - "Set setting allow_experimental_variant_type = 1 in order to allow it", - name, type->getName()); - } + throw Exception(ErrorCodes::ILLEGAL_COLUMN, + "Cannot create table with column '{}' for {} engines because it is reserved for persistent virtual column", + storage_column.name, storage.getName()); } } } namespace { - void checkTemporaryTableEngineName(const String& name) + void checkTemporaryTableEngineName(const String & name) { if (name.starts_with("Replicated") || name.starts_with("Shared") || name == "KeeperMap") throw Exception(ErrorCodes::INCORRECT_QUERY, "Temporary tables cannot be created with Replicated, Shared or KeeperMap table engines"); @@ -1147,6 +1076,12 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create) String current_database = getContext()->getCurrentDatabase(); auto database_name = create.database ? create.getDatabase() : current_database; + if (!create.sql_security && !getContext()->getServerSettings().ignore_empty_sql_security_in_create_view_query) + create.sql_security = std::make_shared(); + + if (create.sql_security) + processSQLSecurityOption(getContext(), create.sql_security->as(), create.attach, create.is_materialized_view); + DDLGuardPtr ddl_guard; // If this is a stub ATTACH query, read the query definition from the database @@ -1289,7 +1224,7 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create) { input_block = InterpreterSelectWithUnionQuery(create.select->clone(), getContext(), - {}).getSampleBlock(); + SelectQueryOptions().analyze()).getSampleBlock(); } Block output_block = to_table->getInMemoryMetadataPtr()->getSampleBlock(); @@ -1379,6 +1314,9 @@ bool InterpreterCreateQuery::doCreateTable(ASTCreateQuery & create, const InterpreterCreateQuery::TableProperties & properties, DDLGuardPtr & ddl_guard) { + bool is_secondary_query = getContext()->getZooKeeperMetadataTransaction() && !getContext()->getZooKeeperMetadataTransaction()->isInitialQuery(); + auto mode = getLoadingStrictnessLevel(create.attach, /*force_attach*/ false, /*has_force_restore_data_flag*/ false, is_secondary_query); + if (create.temporary) { if (create.if_not_exists && getContext()->tryResolveStorageID({"", create.getTable()}, Context::ResolveExternal)) @@ -1395,7 +1333,7 @@ bool InterpreterCreateQuery::doCreateTable(ASTCreateQuery & create, getContext()->getGlobalContext(), properties.columns, properties.constraints, - false); + mode); }; auto temporary_table = TemporaryTableHolder(getContext(), creator, query_ptr); @@ -1543,12 +1481,22 @@ bool InterpreterCreateQuery::doCreateTable(ASTCreateQuery & create, getContext()->getGlobalContext(), properties.columns, properties.constraints, - false); + mode); /// If schema wes inferred while storage creation, add columns description to create query. addColumnsDescriptionToCreateQueryIfNecessary(query_ptr->as(), res); } + validateVirtualColumns(*res); + + if (!res->supportsDynamicSubcolumns() && hasDynamicSubcolumns(res->getInMemoryMetadataPtr()->getColumns())) + { + throw Exception(ErrorCodes::ILLEGAL_COLUMN, + "Cannot create table with column of type Object, " + "because storage {} doesn't support dynamic subcolumns", + res->getName()); + } + if (!create.attach && getContext()->getSettingsRef().database_replicated_allow_only_replicated_engine) { bool is_replicated_storage = typeid_cast(res.get()) != nullptr; @@ -1598,14 +1546,6 @@ bool InterpreterCreateQuery::doCreateTable(ASTCreateQuery & create, /// we can safely destroy the object without a call to "shutdown", because there is guarantee /// that no background threads/similar resources remain after exception from "startup". - if (!res->supportsDynamicSubcolumns() && hasDynamicSubcolumns(res->getInMemoryMetadataPtr()->getColumns())) - { - throw Exception(ErrorCodes::ILLEGAL_COLUMN, - "Cannot create table with column of type Object, " - "because storage {} doesn't support dynamic subcolumns", - res->getName()); - } - res->startup(); return true; } @@ -1672,7 +1612,6 @@ BlockIO InterpreterCreateQuery::doCreateOrReplaceTable(ASTCreateQuery & create, executeTrivialBlockIO(fill_io, getContext()); /// Replace target table with created one - auto ast_rename = std::make_shared(); ASTRenameQuery::Element elem { ASTRenameQuery::Table @@ -1687,7 +1626,7 @@ BlockIO InterpreterCreateQuery::doCreateOrReplaceTable(ASTCreateQuery & create, } }; - ast_rename->elements.push_back(std::move(elem)); + auto ast_rename = std::make_shared(ASTRenameQuery::Elements{std::move(elem)}); ast_rename->dictionary = create.is_dictionary; if (create.create_or_replace) { @@ -1932,6 +1871,61 @@ void InterpreterCreateQuery::addColumnsDescriptionToCreateQueryIfNecessary(ASTCr } } +void InterpreterCreateQuery::processSQLSecurityOption(ContextPtr context_, ASTSQLSecurity & sql_security, bool is_attach, bool is_materialized_view) +{ + /// If no SQL security is specified, apply default from default_*_view_sql_security setting. + if (!sql_security.type.has_value()) + { + SQLSecurityType default_security; + + if (is_materialized_view) + default_security = context_->getSettingsRef().default_materialized_view_sql_security; + else + default_security = context_->getSettingsRef().default_normal_view_sql_security; + + if (default_security == SQLSecurityType::DEFINER) + { + String default_definer = context_->getSettingsRef().default_view_definer; + if (default_definer == "CURRENT_USER") + sql_security.is_definer_current_user = true; + else + sql_security.definer = std::make_shared(default_definer); + } + + sql_security.type = default_security; + } + + /// Resolves `DEFINER = CURRENT_USER`. Can change the SQL security type if we try to resolve the user during the attachment. + const auto current_user_name = context_->getUserName(); + if (sql_security.is_definer_current_user) + { + if (current_user_name.empty()) + /// This can happen only when attaching a view for the first time after migration and with `CURRENT_USER` default. + if (is_materialized_view) + sql_security.type = SQLSecurityType::NONE; + else + sql_security.type = SQLSecurityType::INVOKER; + else if (sql_security.definer) + sql_security.definer->replace(current_user_name); + else + sql_security.definer = std::make_shared(current_user_name); + } + + /// Checks the permissions for the specified definer user. + if (sql_security.definer && !sql_security.is_definer_current_user && !is_attach) + { + const auto definer_name = sql_security.definer->toString(); + + /// Validate that the user exists. + context_->getAccessControl().getID(definer_name); + if (definer_name != current_user_name) + context_->checkAccess(AccessType::SET_DEFINER, definer_name); + } + + if (sql_security.type == SQLSecurityType::NONE && !is_attach) + context_->checkAccess(AccessType::ALLOW_SQL_SECURITY_NONE); +} + void registerInterpreterCreateQuery(InterpreterFactory & factory) { auto create_fn = [] (const InterpreterFactory::Arguments & args) diff --git a/src/Interpreters/InterpreterCreateQuery.h b/src/Interpreters/InterpreterCreateQuery.h index 0843a7ad15a..865f2736784 100644 --- a/src/Interpreters/InterpreterCreateQuery.h +++ b/src/Interpreters/InterpreterCreateQuery.h @@ -80,6 +80,9 @@ public: void extendQueryLogElemImpl(QueryLogElement & elem, const ASTPtr & ast, ContextPtr) const override; + /// Check access right, validate definer statement and replace `CURRENT USER` with actual name. + static void processSQLSecurityOption(ContextPtr context_, ASTSQLSecurity & sql_security, bool is_attach = false, bool is_materialized_view = false); + private: struct TableProperties { diff --git a/src/Interpreters/InterpreterDeleteQuery.cpp b/src/Interpreters/InterpreterDeleteQuery.cpp index 5c13a1145d1..97ae9649ae8 100644 --- a/src/Interpreters/InterpreterDeleteQuery.cpp +++ b/src/Interpreters/InterpreterDeleteQuery.cpp @@ -15,7 +15,6 @@ #include #include #include -#include namespace DB diff --git a/src/Interpreters/InterpreterDescribeQuery.cpp b/src/Interpreters/InterpreterDescribeQuery.cpp index 1aab72afcc1..04d44e34fff 100644 --- a/src/Interpreters/InterpreterDescribeQuery.cpp +++ b/src/Interpreters/InterpreterDescribeQuery.cpp @@ -123,28 +123,29 @@ BlockIO InterpreterDescribeQuery::execute() void InterpreterDescribeQuery::fillColumnsFromSubquery(const ASTTableExpression & table_expression) { - NamesAndTypesList names_and_types; + Block sample_block; auto select_query = table_expression.subquery->children.at(0); auto current_context = getContext(); if (settings.allow_experimental_analyzer) { SelectQueryOptions select_query_options; - names_and_types = InterpreterSelectQueryAnalyzer(select_query, current_context, select_query_options).getSampleBlock().getNamesAndTypesList(); + sample_block = InterpreterSelectQueryAnalyzer(select_query, current_context, select_query_options).getSampleBlock(); } else { - names_and_types = InterpreterSelectWithUnionQuery::getSampleBlock(select_query, current_context).getNamesAndTypesList(); + sample_block = InterpreterSelectWithUnionQuery::getSampleBlock(select_query, current_context); } - for (auto && [name, type] : names_and_types) - columns.emplace_back(std::move(name), std::move(type)); + for (auto && column : sample_block) + columns.emplace_back(std::move(column.name), std::move(column.type)); } void InterpreterDescribeQuery::fillColumnsFromTableFunction(const ASTTableExpression & table_expression) { auto current_context = getContext(); TableFunctionPtr table_function_ptr = TableFunctionFactory::instance().get(table_expression.table_function, current_context); + auto column_descriptions = table_function_ptr->getActualTableStructure(getContext(), /*is_insert_query*/ true); for (const auto & column : column_descriptions) columns.emplace_back(column); @@ -154,14 +155,16 @@ void InterpreterDescribeQuery::fillColumnsFromTableFunction(const ASTTableExpres auto table = table_function_ptr->execute(table_expression.table_function, getContext(), table_function_ptr->getName()); if (table) { - for (const auto & column : table->getVirtuals()) + auto virtuals = table->getVirtualsPtr(); + for (const auto & column : *virtuals) { if (!column_descriptions.has(column.name)) - virtual_columns.emplace_back(column.name, column.type); + virtual_columns.push_back(column); } } } } + void InterpreterDescribeQuery::fillColumnsFromTable(const ASTTableExpression & table_expression) { auto table_id = getContext()->resolveStorageID(table_expression.database_and_table_name); @@ -176,10 +179,11 @@ void InterpreterDescribeQuery::fillColumnsFromTable(const ASTTableExpression & t if (settings.describe_include_virtual_columns) { - for (const auto & column : table->getVirtuals()) + auto virtuals = table->getVirtualsPtr(); + for (const auto & column : *virtuals) { if (!column_descriptions.has(column.name)) - virtual_columns.emplace_back(column.name, column.type); + virtual_columns.push_back(column); } } diff --git a/src/Interpreters/InterpreterFactory.cpp b/src/Interpreters/InterpreterFactory.cpp index 1b6e6be2ea2..0b87ec3cc0e 100644 --- a/src/Interpreters/InterpreterFactory.cpp +++ b/src/Interpreters/InterpreterFactory.cpp @@ -106,7 +106,8 @@ InterpreterFactory::InterpreterPtr InterpreterFactory::get(ASTPtr & query, Conte ProfileEvents::increment(ProfileEvents::QueriesWithSubqueries); } - Arguments arguments { + Arguments arguments + { .query = query, .context = context, .options = options diff --git a/src/Interpreters/InterpreterFactory.h b/src/Interpreters/InterpreterFactory.h index 3cf3b02d826..9c29e5ff7b4 100644 --- a/src/Interpreters/InterpreterFactory.h +++ b/src/Interpreters/InterpreterFactory.h @@ -21,7 +21,7 @@ public: ASTPtr & query; ContextMutablePtr context; const SelectQueryOptions & options; - bool allow_materialized; + bool allow_materialized = false; }; using InterpreterPtr = std::unique_ptr; diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index 724cfca6a80..3e8bb268fe7 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -125,7 +125,10 @@ StoragePtr InterpreterInsertQuery::getTable(ASTInsertQuery & query) Block InterpreterInsertQuery::getSampleBlock( const ASTInsertQuery & query, const StoragePtr & table, - const StorageMetadataPtr & metadata_snapshot) const + const StorageMetadataPtr & metadata_snapshot, + ContextPtr context_, + bool no_destination, + bool allow_materialized) { /// If the query does not include information about columns if (!query.columns) @@ -133,13 +136,13 @@ Block InterpreterInsertQuery::getSampleBlock( if (auto * window_view = dynamic_cast(table.get())) return window_view->getInputHeader(); else if (no_destination) - return metadata_snapshot->getSampleBlockWithVirtuals(table->getVirtuals()); + return metadata_snapshot->getSampleBlockWithVirtuals(table->getVirtualsList()); else return metadata_snapshot->getSampleBlockNonMaterialized(); } /// Form the block based on the column names from the query - const auto columns_ast = processColumnTransformers(getContext()->getCurrentDatabase(), table, metadata_snapshot, query.columns); + const auto columns_ast = processColumnTransformers(context_->getCurrentDatabase(), table, metadata_snapshot, query.columns); Names names; names.reserve(columns_ast->children.size()); for (const auto & identifier : columns_ast->children) @@ -148,7 +151,7 @@ Block InterpreterInsertQuery::getSampleBlock( names.emplace_back(std::move(current_name)); } - return getSampleBlock(names, table, metadata_snapshot); + return getSampleBlockImpl(names, table, metadata_snapshot, no_destination, allow_materialized); } std::optional InterpreterInsertQuery::getInsertColumnNames() const @@ -170,12 +173,18 @@ std::optional InterpreterInsertQuery::getInsertColumnNames() const return names; } -Block InterpreterInsertQuery::getSampleBlock( +Block InterpreterInsertQuery::getSampleBlockImpl( const Names & names, const StoragePtr & table, - const StorageMetadataPtr & metadata_snapshot) const + const StorageMetadataPtr & metadata_snapshot, + bool no_destination, + bool allow_materialized) { Block table_sample_physical = metadata_snapshot->getSampleBlock(); + Block table_sample_virtuals; + if (no_destination) + table_sample_virtuals = table->getVirtualsHeader(); + Block table_sample_insertable = metadata_snapshot->getSampleBlockInsertable(); Block res; for (const auto & current_name : names) @@ -190,13 +199,19 @@ Block InterpreterInsertQuery::getSampleBlock( if (table_sample_physical.has(current_name)) { if (!allow_materialized) - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot insert column {}, because it is MATERIALIZED column.", - current_name); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot insert column {}, because it is MATERIALIZED column", current_name); res.insert(ColumnWithTypeAndName(table_sample_physical.getByName(current_name).type, current_name)); } - else /// The table does not have a column with that name + else if (table_sample_virtuals.has(current_name)) + { + res.insert(ColumnWithTypeAndName(table_sample_virtuals.getByName(current_name).type, current_name)); + } + else + { + /// The table does not have a column with that name throw Exception(ErrorCodes::NO_SUCH_COLUMN_IN_TABLE, "No such column {} in table {}", current_name, table->getStorageID().getNameForLogs()); + } } else res.insert(ColumnWithTypeAndName(table_sample_insertable.getByName(current_name).type, current_name)); @@ -260,7 +275,8 @@ Chain InterpreterInsertQuery::buildChain( const StorageMetadataPtr & metadata_snapshot, const Names & columns, ThreadStatusesHolderPtr thread_status_holder, - std::atomic_uint64_t * elapsed_counter_ms) + std::atomic_uint64_t * elapsed_counter_ms, + bool check_access) { ProfileEvents::increment(ProfileEvents::InsertQueriesWithSubqueries); ProfileEvents::increment(ProfileEvents::QueriesWithSubqueries); @@ -271,7 +287,9 @@ Chain InterpreterInsertQuery::buildChain( if (!running_group) running_group = std::make_shared(getContext()); - auto sample = getSampleBlock(columns, table, metadata_snapshot); + auto sample = getSampleBlockImpl(columns, table, metadata_snapshot, no_destination, allow_materialized); + if (check_access) + getContext()->checkAccess(AccessType::INSERT, table->getStorageID(), sample.getNames()); Chain sink = buildSink(table, metadata_snapshot, thread_status_holder, running_group, elapsed_counter_ms); Chain chain = buildPreSinkChain(sink.getInputHeader(), table, metadata_snapshot, sample); @@ -397,7 +415,7 @@ BlockIO InterpreterInsertQuery::execute() auto table_lock = table->lockForShare(getContext()->getInitialQueryId(), settings.lock_acquire_timeout); auto metadata_snapshot = table->getInMemoryMetadataPtr(); - auto query_sample_block = getSampleBlock(query, table, metadata_snapshot); + auto query_sample_block = getSampleBlock(query, table, metadata_snapshot, getContext(), no_destination, allow_materialized); /// For table functions we check access while executing /// getTable() -> ITableFunction::execute(). @@ -499,7 +517,23 @@ BlockIO InterpreterInsertQuery::execute() if (settings.max_insert_threads > 1) { - pre_streams_size = std::min(static_cast(settings.max_insert_threads), pipeline.getNumStreams()); + auto table_id = table->getStorageID(); + auto views = DatabaseCatalog::instance().getDependentViews(table_id); + + /// It breaks some views-related tests and we have dedicated `parallel_view_processing` for views, so let's just skip them. + const bool resize_to_max_insert_threads = !table->isView() && views.empty(); + pre_streams_size = resize_to_max_insert_threads ? settings.max_insert_threads + : std::min(settings.max_insert_threads, pipeline.getNumStreams()); + + /// Deduplication when passing insert_deduplication_token breaks if using more than one thread + if (!settings.insert_deduplication_token.toString().empty()) + { + LOG_DEBUG( + getLogger("InsertQuery"), + "Insert-select query using insert_deduplication_token, setting streams to 1 to avoid deduplication issues"); + pre_streams_size = 1; + } + if (table->supportsParallelInsert()) sink_streams_size = pre_streams_size; } diff --git a/src/Interpreters/InterpreterInsertQuery.h b/src/Interpreters/InterpreterInsertQuery.h index 74baf4bc4f6..bf73fb2a319 100644 --- a/src/Interpreters/InterpreterInsertQuery.h +++ b/src/Interpreters/InterpreterInsertQuery.h @@ -46,14 +46,21 @@ public: const StorageMetadataPtr & metadata_snapshot, const Names & columns, ThreadStatusesHolderPtr thread_status_holder = {}, - std::atomic_uint64_t * elapsed_counter_ms = nullptr); + std::atomic_uint64_t * elapsed_counter_ms = nullptr, + bool check_access = false); static void extendQueryLogElemImpl(QueryLogElement & elem, ContextPtr context_); void extendQueryLogElemImpl(QueryLogElement & elem, const ASTPtr & ast, ContextPtr context_) const override; StoragePtr getTable(ASTInsertQuery & query); - Block getSampleBlock(const ASTInsertQuery & query, const StoragePtr & table, const StorageMetadataPtr & metadata_snapshot) const; + static Block getSampleBlock( + const ASTInsertQuery & query, + const StoragePtr & table, + const StorageMetadataPtr & metadata_snapshot, + ContextPtr context_, + bool no_destination = false, + bool allow_materialized = false); bool supportsTransactions() const override { return true; } @@ -62,7 +69,7 @@ public: bool shouldAddSquashingFroStorage(const StoragePtr & table) const; private: - Block getSampleBlock(const Names & names, const StoragePtr & table, const StorageMetadataPtr & metadata_snapshot) const; + static Block getSampleBlockImpl(const Names & names, const StoragePtr & table, const StorageMetadataPtr & metadata_snapshot, bool no_destination, bool allow_materialized); ASTPtr query_ptr; const bool allow_materialized; diff --git a/src/Interpreters/InterpreterKillQueryQuery.cpp b/src/Interpreters/InterpreterKillQueryQuery.cpp index 3431cd5e568..86196270ed1 100644 --- a/src/Interpreters/InterpreterKillQueryQuery.cpp +++ b/src/Interpreters/InterpreterKillQueryQuery.cpp @@ -277,9 +277,11 @@ BlockIO InterpreterKillQueryQuery::execute() code = CancellationCode::NotFound; else { - ParserAlterCommand parser; + const auto alter_command = command_col.getDataAt(i).toString(); + const auto with_round_bracket = alter_command.front() == '('; + ParserAlterCommand parser{with_round_bracket}; auto command_ast - = parseQuery(parser, command_col.getDataAt(i).toString(), 0, getContext()->getSettingsRef().max_parser_depth); + = parseQuery(parser, alter_command, 0, getContext()->getSettingsRef().max_parser_depth); required_access_rights = InterpreterAlterQuery::getRequiredAccessForCommand( command_ast->as(), table_id.database_name, table_id.table_name); if (!access->isGranted(required_access_rights)) diff --git a/src/Interpreters/InterpreterRenameQuery.cpp b/src/Interpreters/InterpreterRenameQuery.cpp index 52001fdcaf4..06b6ebc9cbb 100644 --- a/src/Interpreters/InterpreterRenameQuery.cpp +++ b/src/Interpreters/InterpreterRenameQuery.cpp @@ -47,12 +47,12 @@ BlockIO InterpreterRenameQuery::execute() */ RenameDescriptions descriptions; - descriptions.reserve(rename.elements.size()); + descriptions.reserve(rename.getElements().size()); /// Don't allow to drop tables (that we are renaming); don't allow to create tables in places where tables will be renamed. TableGuards table_guards; - for (const auto & elem : rename.elements) + for (const auto & elem : rename.getElements()) { descriptions.emplace_back(elem, current_database); const auto & description = descriptions.back(); @@ -186,7 +186,7 @@ AccessRightsElements InterpreterRenameQuery::getRequiredAccess(InterpreterRename { AccessRightsElements required_access; const auto & rename = query_ptr->as(); - for (const auto & elem : rename.elements) + for (const auto & elem : rename.getElements()) { if (type == RenameType::RenameTable) { @@ -214,7 +214,7 @@ AccessRightsElements InterpreterRenameQuery::getRequiredAccess(InterpreterRename void InterpreterRenameQuery::extendQueryLogElemImpl(QueryLogElement & elem, const ASTPtr & ast, ContextPtr) const { const auto & rename = ast->as(); - for (const auto & element : rename.elements) + for (const auto & element : rename.getElements()) { { String database = backQuoteIfNeed(!element.from.database ? getContext()->getCurrentDatabase() : element.from.getDatabase()); diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index d0cf9f1160c..a314492c5b0 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -76,6 +76,8 @@ #include #include #include +#include +#include #include #include @@ -224,8 +226,10 @@ InterpreterSelectQuery::InterpreterSelectQuery( const StoragePtr & storage_, const StorageMetadataPtr & metadata_snapshot_, const SelectQueryOptions & options_) - : InterpreterSelectQuery(query_ptr_, context_, std::nullopt, storage_, options_.copy().noSubquery(), {}, metadata_snapshot_) -{} + : InterpreterSelectQuery( + query_ptr_, context_, std::nullopt, storage_, options_.copy().noSubquery(), {}, metadata_snapshot_) +{ +} InterpreterSelectQuery::InterpreterSelectQuery( const ASTPtr & query_ptr_, @@ -486,7 +490,7 @@ InterpreterSelectQuery::InterpreterSelectQuery( LOG_DEBUG(log, "FINAL modifier is not supported with parallel replicas. Query will be executed without using them."); context->setSetting("allow_experimental_parallel_reading_from_replicas", Field(0)); } - else if (settings.allow_experimental_parallel_reading_from_replicas == 2) + else if (settings.allow_experimental_parallel_reading_from_replicas >= 2) { throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "FINAL modifier is not supported with parallel replicas"); } @@ -501,7 +505,7 @@ InterpreterSelectQuery::InterpreterSelectQuery( LOG_DEBUG(log, "To use parallel replicas with plain MergeTree tables please enable setting `parallel_replicas_for_non_replicated_merge_tree`. For now query will be executed without using them."); context->setSetting("allow_experimental_parallel_reading_from_replicas", Field(0)); } - else if (settings.allow_experimental_parallel_reading_from_replicas == 2) + else if (settings.allow_experimental_parallel_reading_from_replicas >= 2) { throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "To use parallel replicas with plain MergeTree tables please enable setting `parallel_replicas_for_non_replicated_merge_tree`"); } @@ -618,7 +622,6 @@ InterpreterSelectQuery::InterpreterSelectQuery( required_result_column_names, table_join); - query_info.syntax_analyzer_result = syntax_analyzer_result; context->setDistributed(syntax_analyzer_result->is_remote_storage); @@ -777,12 +780,30 @@ InterpreterSelectQuery::InterpreterSelectQuery( result_header = getSampleBlockImpl(); }; - analyze(shouldMoveToPrewhere()); + + /// This is a hack to make sure we reanalyze if GlobalSubqueriesVisitor changed allow_experimental_parallel_reading_from_replicas + /// inside the query context (because it doesn't have write access to the main context) + UInt64 parallel_replicas_before_analysis + = context->hasQueryContext() ? context->getQueryContext()->getSettingsRef().allow_experimental_parallel_reading_from_replicas : 0; + + /// Conditionally support AST-based PREWHERE optimization. + analyze(shouldMoveToPrewhere() && (!settings.query_plan_optimize_prewhere || !settings.query_plan_enable_optimizations)); + bool need_analyze_again = false; bool can_analyze_again = false; + if (context->hasQueryContext()) { + /// As this query can't be executed with parallel replicas, we must reanalyze it + if (context->getQueryContext()->getSettingsRef().allow_experimental_parallel_reading_from_replicas + != parallel_replicas_before_analysis) + { + context->setSetting("allow_experimental_parallel_reading_from_replicas", Field(0)); + context->setSetting("max_parallel_replicas", UInt64{0}); + need_analyze_again = true; + } + /// Check number of calls of 'analyze' function. /// If it is too big, we will not analyze the query again not to have exponential blowup. std::atomic & current_query_analyze_count = context->getQueryContext()->kitchen_sink.analyze_counter; @@ -830,7 +851,7 @@ InterpreterSelectQuery::InterpreterSelectQuery( if (query.prewhere() && !query.where()) analysis_result.prewhere_info->need_filter = true; - if (table_id && got_storage_from_query && !joined_tables.isLeftTableFunction()) + if (table_id && got_storage_from_query && !joined_tables.isLeftTableFunction() && !options.ignore_access_check) { /// The current user should have the SELECT privilege. If this table_id is for a table /// function we don't check access rights here because in this case they have been already @@ -871,7 +892,7 @@ bool InterpreterSelectQuery::adjustParallelReplicasAfterAnalysis() { /// The query could use trivial count if it didn't use parallel replicas, so let's disable it and reanalyze context->setSetting("allow_experimental_parallel_reading_from_replicas", Field(0)); - context->setSetting("max_parallel_replicas", UInt64{0}); + context->setSetting("max_parallel_replicas", UInt64{1}); LOG_DEBUG(log, "Disabling parallel replicas to be able to use a trivial count optimization"); return true; } @@ -901,7 +922,24 @@ bool InterpreterSelectQuery::adjustParallelReplicasAfterAnalysis() } ActionDAGNodes added_filter_nodes = MergeTreeData::getFiltersForPrimaryKeyAnalysis(*this); - UInt64 rows_to_read = storage_merge_tree->estimateNumberOfRowsToRead(context, storage_snapshot, query_info_copy, added_filter_nodes); + if (query_info_copy.prewhere_info) + { + { + const auto & node + = query_info_copy.prewhere_info->prewhere_actions->findInOutputs(query_info_copy.prewhere_info->prewhere_column_name); + added_filter_nodes.nodes.push_back(&node); + } + + if (query_info_copy.prewhere_info->row_level_filter) + { + const auto & node + = query_info_copy.prewhere_info->row_level_filter->findInOutputs(query_info_copy.prewhere_info->row_level_column_name); + added_filter_nodes.nodes.push_back(&node); + } + } + + query_info_copy.filter_actions_dag = ActionsDAG::buildFilterActionsDAG(added_filter_nodes.nodes); + UInt64 rows_to_read = storage_merge_tree->estimateNumberOfRowsToRead(context, storage_snapshot, query_info_copy); /// Note that we treat an estimation of 0 rows as a real estimation size_t number_of_replicas_to_use = rows_to_read / settings.parallel_replicas_min_number_of_rows_per_replica; LOG_TRACE(log, "Estimated {} rows to read. It is enough work for {} parallel replicas", rows_to_read, number_of_replicas_to_use); @@ -2073,8 +2111,7 @@ void InterpreterSelectQuery::applyFiltersToPrewhereInAnalysis(ExpressionAnalysis if (does_storage_support_prewhere && shouldMoveToPrewhere()) { /// Execute row level filter in prewhere as a part of "move to prewhere" optimization. - analysis.prewhere_info - = std::make_shared(std::move(analysis.filter_info->actions), std::move(analysis.filter_info->column_name)); + analysis.prewhere_info = std::make_shared(analysis.filter_info->actions, analysis.filter_info->column_name); analysis.prewhere_info->prewhere_actions->projectInput(false); analysis.prewhere_info->remove_prewhere_column = analysis.filter_info->do_remove_column; analysis.prewhere_info->need_filter = true; @@ -2084,8 +2121,8 @@ void InterpreterSelectQuery::applyFiltersToPrewhereInAnalysis(ExpressionAnalysis else { /// Add row level security actions to prewhere. - analysis.prewhere_info->row_level_filter = std::move(analysis.filter_info->actions); - analysis.prewhere_info->row_level_column_name = std::move(analysis.filter_info->column_name); + analysis.prewhere_info->row_level_filter = analysis.filter_info->actions; + analysis.prewhere_info->row_level_column_name = analysis.filter_info->column_name; analysis.prewhere_info->row_level_filter->projectInput(false); analysis.filter_info = nullptr; } @@ -2336,6 +2373,49 @@ UInt64 InterpreterSelectQuery::maxBlockSizeByLimit() const return 0; } +/** Storages can rely that filters that for storage will be available for analysis before + * plan is fully constructed and optimized. + * + * StorageMerge common header calculation and prewhere push-down relies on this. + * + * This is similar to Planner::collectFiltersForAnalysis + */ +void collectFiltersForAnalysis( + const ASTPtr & query_ptr, + const ContextPtr & query_context, + const StorageSnapshotPtr & storage_snapshot, + const SelectQueryOptions & options, + SelectQueryInfo & query_info) +{ + auto get_column_options = GetColumnsOptions(GetColumnsOptions::All).withExtendedObjects().withVirtuals(); + + auto dummy = std::make_shared( + storage_snapshot->storage.getStorageID(), ColumnsDescription(storage_snapshot->getColumns(get_column_options)), storage_snapshot); + + QueryPlan query_plan; + InterpreterSelectQuery(query_ptr, query_context, dummy, dummy->getInMemoryMetadataPtr(), options).buildQueryPlan(query_plan); + + auto optimization_settings = QueryPlanOptimizationSettings::fromContext(query_context); + query_plan.optimize(optimization_settings); + + std::vector nodes_to_process; + nodes_to_process.push_back(query_plan.getRootNode()); + + while (!nodes_to_process.empty()) + { + const auto * node_to_process = nodes_to_process.back(); + nodes_to_process.pop_back(); + nodes_to_process.insert(nodes_to_process.end(), node_to_process->children.begin(), node_to_process->children.end()); + + auto * read_from_dummy = typeid_cast(node_to_process->step.get()); + if (!read_from_dummy) + continue; + + query_info.filter_actions_dag = read_from_dummy->getFilterActionsDAG(); + query_info.optimized_prewhere_info = read_from_dummy->getPrewhereInfo(); + } +} + void InterpreterSelectQuery::executeFetchColumns(QueryProcessingStage::Enum processing_stage, QueryPlan & query_plan) { auto & query = getSelectQuery(); @@ -2462,13 +2542,22 @@ void InterpreterSelectQuery::executeFetchColumns(QueryProcessingStage::Enum proc } else if (storage) { + if (shouldMoveToPrewhere() && settings.query_plan_optimize_prewhere && settings.query_plan_enable_optimizations + && typeid_cast(storage.get())) + collectFiltersForAnalysis(query_ptr, context, storage_snapshot, options, query_info); + /// Table. if (max_streams == 0) max_streams = 1; /// If necessary, we request more sources than the number of threads - to distribute the work evenly over the threads. if (max_streams > 1 && !is_sync_remote) - max_streams = static_cast(max_streams * settings.max_streams_to_max_threads_ratio); + { + if (auto streams_with_ratio = max_streams * settings.max_streams_to_max_threads_ratio; streams_with_ratio < SIZE_MAX) + max_streams = static_cast(streams_with_ratio); + else + throw Exception(ErrorCodes::PARAMETER_OUT_OF_BOUND, "Exceeded limit for `max_streams` with `max_streams_to_max_threads_ratio`. Make sure that `max_streams * max_streams_to_max_threads_ratio` not exceeds {}, current value: {}", SIZE_MAX, streams_with_ratio); + } auto & prewhere_info = analysis_result.prewhere_info; diff --git a/src/Interpreters/InterpreterSelectQueryAnalyzer.cpp b/src/Interpreters/InterpreterSelectQueryAnalyzer.cpp index 4897101d80b..922f4a99b4a 100644 --- a/src/Interpreters/InterpreterSelectQueryAnalyzer.cpp +++ b/src/Interpreters/InterpreterSelectQueryAnalyzer.cpp @@ -103,7 +103,7 @@ QueryTreeNodePtr buildQueryTreeAndRunPasses(const ASTPtr & query, auto query_tree = buildQueryTree(query, context); QueryTreePassManager query_tree_pass_manager(context); - addQueryTreePasses(query_tree_pass_manager); + addQueryTreePasses(query_tree_pass_manager, select_query_options.only_analyze); /// We should not apply any query tree level optimizations on shards /// because it can lead to a changed header. diff --git a/src/Interpreters/InterpreterSelectWithUnionQuery.cpp b/src/Interpreters/InterpreterSelectWithUnionQuery.cpp index 16bc4b1fe2e..cc1d7dd6531 100644 --- a/src/Interpreters/InterpreterSelectWithUnionQuery.cpp +++ b/src/Interpreters/InterpreterSelectWithUnionQuery.cpp @@ -56,7 +56,7 @@ InterpreterSelectWithUnionQuery::InterpreterSelectWithUnionQuery( size_t num_children = ast->list_of_selects->children.size(); if (!num_children) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: no children in ASTSelectWithUnionQuery"); + throw Exception(ErrorCodes::LOGICAL_ERROR, "No children in ASTSelectWithUnionQuery"); /// Note that we pass 'required_result_column_names' to first SELECT. /// And for the rest, we pass names at the corresponding positions of 'required_result_column_names' in the result of first SELECT, diff --git a/src/Interpreters/InterpreterSetQuery.cpp b/src/Interpreters/InterpreterSetQuery.cpp index 261c781e0ba..7e68fc5c4c1 100644 --- a/src/Interpreters/InterpreterSetQuery.cpp +++ b/src/Interpreters/InterpreterSetQuery.cpp @@ -19,7 +19,7 @@ BlockIO InterpreterSetQuery::execute() getContext()->checkSettingsConstraints(ast.changes, SettingSource::QUERY); auto session_context = getContext()->getSessionContext(); session_context->applySettingsChanges(ast.changes); - session_context->addQueryParameters(ast.query_parameters); + session_context->addQueryParameters(NameToNameMap{ast.query_parameters.begin(), ast.query_parameters.end()}); session_context->resetSettingsToDefaultValue(ast.default_settings); return {}; } diff --git a/src/Interpreters/InterpreterShowColumnsQuery.cpp b/src/Interpreters/InterpreterShowColumnsQuery.cpp index 149ba6d7575..f32ebceaa63 100644 --- a/src/Interpreters/InterpreterShowColumnsQuery.cpp +++ b/src/Interpreters/InterpreterShowColumnsQuery.cpp @@ -107,7 +107,7 @@ SELECT '' AS extra )"; // TODO Interpret query.extended. It is supposed to show internal/virtual columns. Need to fetch virtual column names, see - // IStorage::getVirtuals(). We can't easily do that via SQL. + // IStorage::getVirtualsList(). We can't easily do that via SQL. if (query.full) { diff --git a/src/Interpreters/InterpreterShowFunctionsQuery.cpp b/src/Interpreters/InterpreterShowFunctionsQuery.cpp index e83f61eac53..829670d7929 100644 --- a/src/Interpreters/InterpreterShowFunctionsQuery.cpp +++ b/src/Interpreters/InterpreterShowFunctionsQuery.cpp @@ -25,13 +25,13 @@ String InterpreterShowFunctionsQuery::getRewrittenQuery() const auto & query = query_ptr->as(); - DatabasePtr systemDb = DatabaseCatalog::instance().getSystemDatabase(); + DatabasePtr system_db = DatabaseCatalog::instance().getSystemDatabase(); String rewritten_query = fmt::format( R"( SELECT * FROM {}.{})", - systemDb->getDatabaseName(), + system_db->getDatabaseName(), functions_table); if (!query.like.empty()) diff --git a/src/Interpreters/InterpreterSystemQuery.cpp b/src/Interpreters/InterpreterSystemQuery.cpp index 19449cd9e28..026e0c166b4 100644 --- a/src/Interpreters/InterpreterSystemQuery.cpp +++ b/src/Interpreters/InterpreterSystemQuery.cpp @@ -10,6 +10,8 @@ #include #include #include +#include +#include #include #include #include @@ -20,7 +22,6 @@ #include #include #include -#include #include #include #include @@ -36,7 +37,6 @@ #include #include #include -#include #include #include #include @@ -44,7 +44,6 @@ #include #include #include -#include #include #include #include @@ -60,6 +59,7 @@ #include #include #include +#include #include #include #include @@ -334,10 +334,17 @@ BlockIO InterpreterSystemQuery::execute() { getContext()->checkAccess(AccessType::SYSTEM_DROP_DNS_CACHE); DNSResolver::instance().dropCache(); + HostResolversPool::instance().dropCache(); /// Reinitialize clusters to update their resolved_addresses system_context->reloadClusterConfig(); break; } + case Type::DROP_CONNECTIONS_CACHE: + { + getContext()->checkAccess(AccessType::SYSTEM_DROP_CONNECTIONS_CACHE); + HTTPConnectionPools::instance().dropCache(); + break; + } case Type::DROP_MARK_CACHE: getContext()->checkAccess(AccessType::SYSTEM_DROP_MARK_CACHE); system_context->clearMarkCache(); @@ -362,18 +369,22 @@ BlockIO InterpreterSystemQuery::execute() getContext()->checkAccess(AccessType::SYSTEM_DROP_QUERY_CACHE); getContext()->clearQueryCache(); break; -#if USE_EMBEDDED_COMPILER case Type::DROP_COMPILED_EXPRESSION_CACHE: +#if USE_EMBEDDED_COMPILER getContext()->checkAccess(AccessType::SYSTEM_DROP_COMPILED_EXPRESSION_CACHE); if (auto * cache = CompiledExpressionCacheFactory::instance().tryGetCache()) cache->clear(); break; +#else + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "The server was compiled without the support for JIT compilation"); #endif -#if USE_AWS_S3 case Type::DROP_S3_CLIENT_CACHE: +#if USE_AWS_S3 getContext()->checkAccess(AccessType::SYSTEM_DROP_S3_CLIENT_CACHE); S3::ClientCacheRegistry::instance().clearCacheForAll(); break; +#else + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "The server was compiled without the support for AWS S3"); #endif case Type::DROP_FILESYSTEM_CACHE: @@ -459,6 +470,13 @@ BlockIO InterpreterSystemQuery::execute() { throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Not implemented"); } + case Type::DROP_PAGE_CACHE: + { + getContext()->checkAccess(AccessType::SYSTEM_DROP_PAGE_CACHE); + + getContext()->dropPageCache(); + break; + } case Type::DROP_SCHEMA_CACHE: { getContext()->checkAccess(AccessType::SYSTEM_DROP_SCHEMA_CACHE); @@ -768,6 +786,12 @@ BlockIO InterpreterSystemQuery::execute() flushJemallocProfile("/tmp/jemalloc_clickhouse"); break; } +#else + case Type::JEMALLOC_PURGE: + case Type::JEMALLOC_ENABLE_PROFILE: + case Type::JEMALLOC_DISABLE_PROFILE: + case Type::JEMALLOC_FLUSH_PROFILE: + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "The server was compiled without JEMalloc"); #endif default: throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown type of SYSTEM query"); @@ -839,7 +863,7 @@ StoragePtr InterpreterSystemQuery::tryRestartReplica(const StorageID & replica, system_context->getGlobalContext(), columns, constraints, - false); + LoadingStrictnessLevel::ATTACH); database->attachTable(system_context, replica.table_name, table, data_path); @@ -1081,7 +1105,9 @@ void InterpreterSystemQuery::syncReplica(ASTSystemQuery & query) { LOG_TRACE(log, "Synchronizing entries in replica's queue with table's log and waiting for current last entry to be processed"); auto sync_timeout = getContext()->getSettingsRef().receive_timeout.totalMilliseconds(); - if (!storage_replicated->waitForProcessingQueue(sync_timeout, query.sync_replica_mode, query.src_replicas)) + + std::unordered_set replicas(query.src_replicas.begin(), query.src_replicas.end()); + if (!storage_replicated->waitForProcessingQueue(sync_timeout, query.sync_replica_mode, replicas)) { LOG_ERROR(log, "SYNC REPLICA {}: Timed out.", table_id.getNameForLogs()); throw Exception(ErrorCodes::TIMEOUT_EXCEEDED, "SYNC REPLICA {}: command timed out. " \ @@ -1183,22 +1209,20 @@ AccessRightsElements InterpreterSystemQuery::getRequiredAccessForDDLOnCluster() break; } case Type::DROP_DNS_CACHE: + case Type::DROP_CONNECTIONS_CACHE: case Type::DROP_MARK_CACHE: case Type::DROP_MMAP_CACHE: case Type::DROP_QUERY_CACHE: -#if USE_EMBEDDED_COMPILER case Type::DROP_COMPILED_EXPRESSION_CACHE: -#endif case Type::DROP_UNCOMPRESSED_CACHE: case Type::DROP_INDEX_MARK_CACHE: case Type::DROP_INDEX_UNCOMPRESSED_CACHE: case Type::DROP_FILESYSTEM_CACHE: case Type::SYNC_FILESYSTEM_CACHE: + case Type::DROP_PAGE_CACHE: case Type::DROP_SCHEMA_CACHE: case Type::DROP_FORMAT_SCHEMA_CACHE: -#if USE_AWS_S3 case Type::DROP_S3_CLIENT_CACHE: -#endif { required_access.emplace_back(AccessType::SYSTEM_DROP_CACHE); break; @@ -1414,7 +1438,6 @@ AccessRightsElements InterpreterSystemQuery::getRequiredAccessForDDLOnCluster() required_access.emplace_back(AccessType::SYSTEM_LISTEN); break; } -#if USE_JEMALLOC case Type::JEMALLOC_PURGE: case Type::JEMALLOC_ENABLE_PROFILE: case Type::JEMALLOC_DISABLE_PROFILE: @@ -1423,7 +1446,6 @@ AccessRightsElements InterpreterSystemQuery::getRequiredAccessForDDLOnCluster() required_access.emplace_back(AccessType::SYSTEM_JEMALLOC); break; } -#endif case Type::STOP_THREAD_FUZZER: case Type::START_THREAD_FUZZER: case Type::ENABLE_FAILPOINT: diff --git a/src/Interpreters/JoinToSubqueryTransformVisitor.cpp b/src/Interpreters/JoinToSubqueryTransformVisitor.cpp index bf2d1eb79cd..6251a9604e1 100644 --- a/src/Interpreters/JoinToSubqueryTransformVisitor.cpp +++ b/src/Interpreters/JoinToSubqueryTransformVisitor.cpp @@ -168,7 +168,7 @@ private: has_asterisks = true; if (!qualified_asterisk->qualifier) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: qualified asterisk must have a qualifier"); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Qualified asterisk must have a qualifier"); auto & identifier = qualified_asterisk->qualifier->as(); @@ -183,7 +183,7 @@ private: transformer->as()) IASTColumnsTransformer::transform(transformer, columns); else - throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: qualified asterisk must only have children of IASTColumnsTransformer type"); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Qualified asterisk must only have children of IASTColumnsTransformer type"); } } } diff --git a/src/Interpreters/JoinedTables.cpp b/src/Interpreters/JoinedTables.cpp index 9be8bf178a1..49693332280 100644 --- a/src/Interpreters/JoinedTables.cpp +++ b/src/Interpreters/JoinedTables.cpp @@ -272,7 +272,7 @@ void JoinedTables::makeFakeTable(StoragePtr storage, const StorageMetadataPtr & auto & table = tables_with_columns.back(); table.addHiddenColumns(storage_columns.getMaterialized()); table.addHiddenColumns(storage_columns.getAliases()); - table.addHiddenColumns(storage->getVirtuals()); + table.addHiddenColumns(storage->getVirtualsList()); } else tables_with_columns.emplace_back(DatabaseAndTableWithAlias{}, source_header.getNamesAndTypesList()); diff --git a/src/Interpreters/MergeJoin.cpp b/src/Interpreters/MergeJoin.cpp index 901c82029ee..d5fb0208d45 100644 --- a/src/Interpreters/MergeJoin.cpp +++ b/src/Interpreters/MergeJoin.cpp @@ -239,7 +239,7 @@ public: /// SortCursorImpl can work with permutation, but MergeJoinCursor can't. if (impl.permutation) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: MergeJoinCursor doesn't support permutation"); + throw Exception(ErrorCodes::LOGICAL_ERROR, "MergeJoinCursor doesn't support permutation"); } size_t position() const { return impl.getRow(); } diff --git a/src/Interpreters/MetricLog.h b/src/Interpreters/MetricLog.h index 482681d8276..4f1e8fafc11 100644 --- a/src/Interpreters/MetricLog.h +++ b/src/Interpreters/MetricLog.h @@ -31,7 +31,6 @@ struct MetricLogElement static ColumnsDescription getColumnsDescription(); static NamesAndAliases getNamesAndAliases() { return {}; } void appendToBlock(MutableColumns & columns) const; - static const char * getCustomColumnList() { return nullptr; } }; diff --git a/src/Interpreters/MutationsInterpreter.cpp b/src/Interpreters/MutationsInterpreter.cpp index 502b961ced8..3b1a499255b 100644 --- a/src/Interpreters/MutationsInterpreter.cpp +++ b/src/Interpreters/MutationsInterpreter.cpp @@ -7,7 +7,7 @@ #include #include #include -#include +#include #include #include #include @@ -31,7 +31,6 @@ #include #include #include -#include #include #include #include @@ -265,7 +264,7 @@ MutationCommand createCommandToApplyDeletedMask(const MutationCommand & command) alter_command->partition = alter_command->children.emplace_back(command.partition).get(); auto row_exists_predicate = makeASTFunction("equals", - std::make_shared(LightweightDeleteDescription::FILTER_COLUMN.name), + std::make_shared(RowExistsColumn::name), std::make_shared(Field(0))); if (command.predicate) @@ -342,11 +341,6 @@ bool MutationsInterpreter::Source::hasProjection(const String & name) const return part && part->hasProjection(name); } -bool MutationsInterpreter::Source::hasBrokenProjection(const String & name) const -{ - return part && part->hasBrokenProjection(name); -} - bool MutationsInterpreter::Source::isCompactPart() const { return part && part->getType() == MergeTreeDataPartType::Compact; @@ -355,7 +349,8 @@ bool MutationsInterpreter::Source::isCompactPart() const static Names getAvailableColumnsWithVirtuals(StorageMetadataPtr metadata_snapshot, const IStorage & storage) { auto all_columns = metadata_snapshot->getColumns().getNamesOfPhysical(); - for (const auto & column : storage.getVirtuals()) + auto virtuals = storage.getVirtualsPtr(); + for (const auto & column : *virtuals) all_columns.push_back(column.name); return all_columns; } @@ -440,60 +435,54 @@ static NameSet getKeyColumns(const MutationsInterpreter::Source & source, const static void validateUpdateColumns( const MutationsInterpreter::Source & source, - const StorageMetadataPtr & metadata_snapshot, const NameSet & updated_columns, - const std::unordered_map & column_to_affected_materialized) + const StorageMetadataPtr & metadata_snapshot, + const NameSet & updated_columns, + const std::unordered_map & column_to_affected_materialized, + const ContextPtr & context) { + auto storage_snapshot = source.getStorageSnapshot(metadata_snapshot, context); NameSet key_columns = getKeyColumns(source, metadata_snapshot); - for (const String & column_name : updated_columns) + const auto & storage_columns = storage_snapshot->metadata->getColumns(); + const auto & virtual_columns = *storage_snapshot->virtual_columns; + + for (const auto & column_name : updated_columns) { - auto found = false; - for (const auto & col : metadata_snapshot->getColumns().getOrdinary()) - { - if (col.name == column_name) - { - found = true; - break; - } - } - - /// Allow to override value of lightweight delete filter virtual column - if (!found && column_name == LightweightDeleteDescription::FILTER_COLUMN.name) - { - if (!source.supportsLightweightDelete()) - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Lightweight delete is not supported for table"); - found = true; - } - - /// Dont allow to override value of block number virtual column - if (!found && column_name == BlockNumberColumn::name) - { - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Update is not supported for virtual column {} ", backQuote(column_name)); - } - - if (!found) - { - for (const auto & col : metadata_snapshot->getColumns().getMaterialized()) - { - if (col.name == column_name) - throw Exception(ErrorCodes::CANNOT_UPDATE_COLUMN, "Cannot UPDATE materialized column {}", backQuote(column_name)); - } - - throw Exception(ErrorCodes::NO_SUCH_COLUMN_IN_TABLE, "There is no column {} in table", backQuote(column_name)); - } - if (key_columns.contains(column_name)) throw Exception(ErrorCodes::CANNOT_UPDATE_COLUMN, "Cannot UPDATE key column {}", backQuote(column_name)); + if (storage_columns.tryGetColumn(GetColumnsOptions::Materialized, column_name)) + throw Exception(ErrorCodes::CANNOT_UPDATE_COLUMN, "Cannot UPDATE materialized column {}", backQuote(column_name)); + auto materialized_it = column_to_affected_materialized.find(column_name); if (materialized_it != column_to_affected_materialized.end()) { - for (const String & materialized : materialized_it->second) + for (const auto & materialized : materialized_it->second) { if (key_columns.contains(materialized)) + { throw Exception(ErrorCodes::CANNOT_UPDATE_COLUMN, "Updated column {} affects MATERIALIZED column {}, which is a key column. " "Cannot UPDATE it.", backQuote(column_name), backQuote(materialized)); + } + } + } + + if (!storage_columns.tryGetColumn(GetColumnsOptions::Ordinary, column_name)) + { + /// Allow to override value of lightweight delete filter virtual column + if (column_name == RowExistsColumn::name) + { + if (!source.supportsLightweightDelete()) + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Lightweight delete is not supported for table"); + } + else if (virtual_columns.tryGet(column_name)) + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Update is not supported for virtual column {} ", backQuote(column_name)); + } + else + { + throw Exception(ErrorCodes::NO_SUCH_COLUMN_IN_TABLE, "There is no column {} in table", backQuote(column_name)); } } } @@ -551,8 +540,8 @@ void MutationsInterpreter::prepare(bool dry_run) /// Add _row_exists column if it is physically present in the part if (source.hasLightweightDeleteMask()) { - all_columns.push_back({LightweightDeleteDescription::FILTER_COLUMN}); - available_columns_set.insert(LightweightDeleteDescription::FILTER_COLUMN.name); + all_columns.emplace_back(RowExistsColumn::name, RowExistsColumn::type); + available_columns_set.insert(RowExistsColumn::name); } NameSet updated_columns; @@ -568,9 +557,7 @@ void MutationsInterpreter::prepare(bool dry_run) for (const auto & [name, _] : command.column_to_update_expression) { - if (!available_columns_set.contains(name) - && name != LightweightDeleteDescription::FILTER_COLUMN.name - && name != BlockNumberColumn::name) + if (!available_columns_set.contains(name) && name != RowExistsColumn::name) throw Exception(ErrorCodes::THERE_IS_NO_COLUMN, "Column {} is updated but not requested to read", name); @@ -595,7 +582,7 @@ void MutationsInterpreter::prepare(bool dry_run) } } - validateUpdateColumns(source, metadata_snapshot, updated_columns, column_to_affected_materialized); + validateUpdateColumns(source, metadata_snapshot, updated_columns, column_to_affected_materialized, context); } StorageInMemoryMetadata::HasDependencyCallback has_dependency = @@ -671,15 +658,11 @@ void MutationsInterpreter::prepare(bool dry_run) { type = physical_column->type; } - else if (column_name == LightweightDeleteDescription::FILTER_COLUMN.name) + else if (column_name == RowExistsColumn::name) { - type = LightweightDeleteDescription::FILTER_COLUMN.type; + type = RowExistsColumn::type; deleted_mask_updated = true; } - else if (column_name == BlockNumberColumn::name) - { - type = BlockNumberColumn::type; - } else { throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown column {}", column_name); @@ -812,7 +795,7 @@ void MutationsInterpreter::prepare(bool dry_run) { mutation_kind.set(MutationKind::MUTATE_INDEX_STATISTIC_PROJECTION); const auto & projection = projections_desc.get(command.projection_name); - if (!source.hasProjection(projection.name) || source.hasBrokenProjection(projection.name)) + if (!source.hasProjection(projection.name)) { for (const auto & column : projection.required_columns) dependencies.emplace(column, ColumnDependency::PROJECTION); @@ -999,13 +982,6 @@ void MutationsInterpreter::prepare(bool dry_run) if (!source.hasProjection(projection.name)) continue; - /// Always rebuild broken projections. - if (source.hasBrokenProjection(projection.name)) - { - materialized_projections.insert(projection.name); - continue; - } - if (need_rebuild_projections) { materialized_projections.insert(projection.name); @@ -1040,7 +1016,7 @@ void MutationsInterpreter::prepareMutationStages(std::vector & prepared_s /// Add _row_exists column if it is present in the part if (source.hasLightweightDeleteMask() || deleted_mask_updated) - all_columns.push_back(LightweightDeleteDescription::FILTER_COLUMN); + all_columns.emplace_back(RowExistsColumn::name, RowExistsColumn::type); bool has_filters = false; /// Next, for each stage calculate columns changed by this and previous stages. @@ -1050,7 +1026,7 @@ void MutationsInterpreter::prepareMutationStages(std::vector & prepared_s { for (const auto & column : all_columns) { - if (column.name == LightweightDeleteDescription::FILTER_COLUMN.name && !deleted_mask_updated) + if (column.name == RowExistsColumn::name && !deleted_mask_updated) continue; prepared_stages[i].output_columns.insert(column.name); @@ -1069,7 +1045,7 @@ void MutationsInterpreter::prepareMutationStages(std::vector & prepared_s /// and so it is not in the list of AllPhysical columns. for (const auto & [column_name, _] : prepared_stages[i].column_to_updated) { - if (column_name == LightweightDeleteDescription::FILTER_COLUMN.name && has_filters && !deleted_mask_updated) + if (column_name == RowExistsColumn::name && has_filters && !deleted_mask_updated) continue; prepared_stages[i].output_columns.insert(column_name); @@ -1160,93 +1136,6 @@ void MutationsInterpreter::prepareMutationStages(std::vector & prepared_s } } -/// This structure re-implements adding virtual columns while reading from MergeTree part. -/// It would be good to unify it with IMergeTreeSelectAlgorithm. -struct VirtualColumns -{ - struct ColumnAndPosition - { - ColumnWithTypeAndName column; - size_t position; - }; - - using Columns = std::vector; - - Columns virtuals; - Names columns_to_read; - - VirtualColumns(Names required_columns, const MergeTreeData::DataPartPtr & part) : columns_to_read(std::move(required_columns)) - { - for (size_t i = 0; i < columns_to_read.size(); ++i) - { - if (columns_to_read[i] == LightweightDeleteDescription::FILTER_COLUMN.name) - { - if (!part->getColumns().contains(LightweightDeleteDescription::FILTER_COLUMN.name)) - { - ColumnWithTypeAndName mask_column; - mask_column.type = LightweightDeleteDescription::FILTER_COLUMN.type; - mask_column.column = mask_column.type->createColumnConst(0, 1); - mask_column.name = std::move(columns_to_read[i]); - - virtuals.emplace_back(ColumnAndPosition{.column = std::move(mask_column), .position = i}); - } - } - else if (columns_to_read[i] == "_partition_id") - { - ColumnWithTypeAndName column; - column.type = std::make_shared(); - column.column = column.type->createColumnConst(0, part->info.partition_id); - column.name = std::move(columns_to_read[i]); - - virtuals.emplace_back(ColumnAndPosition{.column = std::move(column), .position = i}); - } - else if (columns_to_read[i] == BlockNumberColumn::name) - { - if (!part->getColumns().contains(BlockNumberColumn::name)) - { - ColumnWithTypeAndName block_number_column; - block_number_column.type = BlockNumberColumn::type; - block_number_column.column = block_number_column.type->createColumnConst(0, part->info.min_block); - block_number_column.name = std::move(columns_to_read[i]); - - virtuals.emplace_back(ColumnAndPosition{.column = std::move(block_number_column), .position = i}); - } - } - } - - if (!virtuals.empty()) - { - Names columns_no_virtuals; - columns_no_virtuals.reserve(columns_to_read.size()); - size_t next_virtual = 0; - for (size_t i = 0; i < columns_to_read.size(); ++i) - { - if (next_virtual < virtuals.size() && i == virtuals[next_virtual].position) - ++next_virtual; - else - columns_no_virtuals.emplace_back(std::move(columns_to_read[i])); - } - - columns_to_read.swap(columns_no_virtuals); - } - } - - void addVirtuals(QueryPlan & plan) - { - auto dag = std::make_unique(plan.getCurrentDataStream().header.getColumnsWithTypeAndName()); - - for (auto & column : virtuals) - { - const auto & adding_const = dag->addColumn(std::move(column.column)); - auto & outputs = dag->getOutputs(); - outputs.insert(outputs.begin() + column.position, &adding_const); - } - - auto step = std::make_unique(plan.getCurrentDataStream(), std::move(dag)); - plan.addStep(std::move(step)); - } -}; - void MutationsInterpreter::Source::read( Stage & first_stage, QueryPlan & plan, @@ -1289,16 +1178,12 @@ void MutationsInterpreter::Source::read( filter = ActionsDAG::buildFilterActionsDAG(nodes); } - VirtualColumns virtual_columns(std::move(required_columns), part); - createReadFromPartStep( MergeTreeSequentialSourceType::Mutation, - plan, *data, storage_snapshot, part, - std::move(virtual_columns.columns_to_read), + plan, *data, storage_snapshot, + part, required_columns, apply_deleted_mask_, filter, context_, getLogger("MutationsInterpreter")); - - virtual_columns.addVirtuals(plan); } else { diff --git a/src/Interpreters/MutationsInterpreter.h b/src/Interpreters/MutationsInterpreter.h index 4c35ec34b58..eda94190185 100644 --- a/src/Interpreters/MutationsInterpreter.h +++ b/src/Interpreters/MutationsInterpreter.h @@ -126,7 +126,6 @@ public: bool materializeTTLRecalculateOnly() const; bool hasSecondaryIndex(const String & name) const; bool hasProjection(const String & name) const; - bool hasBrokenProjection(const String & name) const; bool isCompactPart() const; void read( diff --git a/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp b/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp index 0fdc9347ee9..fd7ffca2872 100644 --- a/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp +++ b/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp @@ -337,7 +337,7 @@ static ASTPtr getPartitionPolicy(const NamesAndTypesList & primary_keys) WhichDataType which(type); if (which.isNullable()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "LOGICAL ERROR: MySQL primary key must be not null, it is a bug."); + throw Exception(ErrorCodes::LOGICAL_ERROR, "MySQL's primary key must be not null, it is a bug."); if (which.isDate() || which.isDate32() || which.isDateTime() || which.isDateTime64()) { @@ -579,7 +579,7 @@ ASTs InterpreterRenameImpl::getRewrittenQueries( const InterpreterRenameImpl::TQuery & rename_query, ContextPtr context, const String & mapped_to_database, const String & mysql_database) { ASTRenameQuery::Elements elements; - for (const auto & rename_element : rename_query.elements) + for (const auto & rename_element : rename_query.getElements()) { const auto & to_database = resolveDatabase(rename_element.to.getDatabase(), mysql_database, mapped_to_database, context); const auto & from_database = resolveDatabase(rename_element.from.getDatabase(), mysql_database, mapped_to_database, context); @@ -600,8 +600,7 @@ ASTs InterpreterRenameImpl::getRewrittenQueries( if (elements.empty()) return ASTs{}; - auto rewritten_query = std::make_shared(); - rewritten_query->elements = elements; + auto rewritten_query = std::make_shared(std::move(elements)); return ASTs{rewritten_query}; } @@ -616,7 +615,8 @@ ASTs InterpreterAlterImpl::getRewrittenQueries( return {}; auto rewritten_alter_query = std::make_shared(); - auto rewritten_rename_query = std::make_shared(); + ASTRenameQuery::Elements rename_elements; + rewritten_alter_query->setDatabase(mapped_to_database); rewritten_alter_query->setTable(alter_query.table); rewritten_alter_query->alter_object = ASTAlterQuery::AlterObjectType::TABLE; @@ -749,13 +749,13 @@ ASTs InterpreterAlterImpl::getRewrittenQueries( /// For ALTER TABLE table_name RENAME TO new_table_name_1, RENAME TO new_table_name_2; /// We just need to generate RENAME TABLE table_name TO new_table_name_2; - if (rewritten_rename_query->elements.empty()) - rewritten_rename_query->elements.push_back(ASTRenameQuery::Element()); + if (rename_elements.empty()) + rename_elements.push_back(ASTRenameQuery::Element()); - rewritten_rename_query->elements.back().from.database = std::make_shared(mapped_to_database); - rewritten_rename_query->elements.back().from.table = std::make_shared(alter_query.table); - rewritten_rename_query->elements.back().to.database = std::make_shared(mapped_to_database); - rewritten_rename_query->elements.back().to.table = std::make_shared(alter_command->new_table_name); + rename_elements.back().from.database = std::make_shared(mapped_to_database); + rename_elements.back().from.table = std::make_shared(alter_query.table); + rename_elements.back().to.database = std::make_shared(mapped_to_database); + rename_elements.back().to.table = std::make_shared(alter_command->new_table_name); } } @@ -765,8 +765,11 @@ ASTs InterpreterAlterImpl::getRewrittenQueries( if (!rewritten_alter_query->command_list->children.empty()) rewritten_queries.push_back(rewritten_alter_query); - if (!rewritten_rename_query->elements.empty()) + if (!rename_elements.empty()) + { + auto rewritten_rename_query = std::make_shared(std::move(rename_elements)); rewritten_queries.push_back(rewritten_rename_query); + } return rewritten_queries; } diff --git a/src/Interpreters/OpenTelemetrySpanLog.cpp b/src/Interpreters/OpenTelemetrySpanLog.cpp index fffc1e50da0..aa11749f8a6 100644 --- a/src/Interpreters/OpenTelemetrySpanLog.cpp +++ b/src/Interpreters/OpenTelemetrySpanLog.cpp @@ -32,12 +32,17 @@ ColumnsDescription OpenTelemetrySpanLogElement::getColumnsDescription() return ColumnsDescription { - {"hostname", low_cardinality_string}, - {"trace_id", std::make_shared()}, - {"span_id", std::make_shared()}, - {"parent_span_id", std::make_shared()}, - {"operation_name", low_cardinality_string}, - {"kind", std::move(span_kind_type)}, + {"hostname", low_cardinality_string, "The hostname where this span was captured."}, + {"trace_id", std::make_shared(), "ID of the trace for executed query."}, + {"span_id", std::make_shared(), "ID of the trace span."}, + {"parent_span_id", std::make_shared(), "ID of the parent trace span."}, + {"operation_name", low_cardinality_string, "The name of the operation."}, + {"kind", std::move(span_kind_type), "The SpanKind of the span. " + "INTERNAL — Indicates that the span represents an internal operation within an application. " + "SERVER — Indicates that the span covers server-side handling of a synchronous RPC or other remote request. " + "CLIENT — Indicates that the span describes a request to some remote service. " + "PRODUCER — Indicates that the span describes the initiators of an asynchronous request. This parent span will often end before the corresponding child CONSUMER span, possibly even before the child span starts. " + "CONSUMER - Indicates that the span describes a child of an asynchronous PRODUCER request."}, // DateTime64 is really unwieldy -- there is no "normal" way to convert // it to an UInt64 count of microseconds, except: // 1) reinterpretAsUInt64(reinterpretAsFixedString(date)), which just @@ -48,10 +53,10 @@ ColumnsDescription OpenTelemetrySpanLogElement::getColumnsDescription() // Also subtraction of two DateTime64 points doesn't work, so you can't // get duration. // It is much less hassle to just use UInt64 of microseconds. - {"start_time_us", std::make_shared()}, - {"finish_time_us", std::make_shared()}, - {"finish_date", std::make_shared()}, - {"attribute", std::make_shared(low_cardinality_string, std::make_shared())}, + {"start_time_us", std::make_shared(), "The start time of the trace span (in microseconds)."}, + {"finish_time_us", std::make_shared(), "The finish time of the trace span (in microseconds)."}, + {"finish_date", std::make_shared(), "The finish date of the trace span."}, + {"attribute", std::make_shared(low_cardinality_string, std::make_shared()), "Attribute depending on the trace span. They are filled in according to the recommendations in the OpenTelemetry standard."}, }; } diff --git a/src/Interpreters/OpenTelemetrySpanLog.h b/src/Interpreters/OpenTelemetrySpanLog.h index 4907a8feb5a..ef63044d323 100644 --- a/src/Interpreters/OpenTelemetrySpanLog.h +++ b/src/Interpreters/OpenTelemetrySpanLog.h @@ -12,7 +12,7 @@ namespace DB struct OpenTelemetrySpanLogElement : public OpenTelemetry::Span { OpenTelemetrySpanLogElement() = default; - OpenTelemetrySpanLogElement(const OpenTelemetry::Span & span) + explicit OpenTelemetrySpanLogElement(const OpenTelemetry::Span & span) : OpenTelemetry::Span(span) {} static std::string name() { return "OpenTelemetrySpanLog"; } @@ -20,7 +20,6 @@ struct OpenTelemetrySpanLogElement : public OpenTelemetry::Span static ColumnsDescription getColumnsDescription(); static NamesAndAliases getNamesAndAliases(); void appendToBlock(MutableColumns & columns) const; - static const char * getCustomColumnList() { return nullptr; } }; // OpenTelemetry standardizes some Log data as well, so it's not just diff --git a/src/Interpreters/OptimizeIfWithConstantConditionVisitor.cpp b/src/Interpreters/OptimizeIfWithConstantConditionVisitor.cpp index 13b6311a877..f3504f3f403 100644 --- a/src/Interpreters/OptimizeIfWithConstantConditionVisitor.cpp +++ b/src/Interpreters/OptimizeIfWithConstantConditionVisitor.cpp @@ -53,7 +53,7 @@ static bool tryExtractConstValueFromCondition(const ASTPtr & condition, bool & v } } } - else if (function->name == "toUInt8" || function->name == "toInt8" || function->name == "identity") + else if (function->name == "toUInt8" || function->name == "toInt8" || function->name == "identity" || function->name == "__scalarSubqueryResult") { if (const auto * expr_list = function->arguments->as()) { diff --git a/src/Interpreters/PartLog.cpp b/src/Interpreters/PartLog.cpp index a7f20a06785..66f933f1afa 100644 --- a/src/Interpreters/PartLog.cpp +++ b/src/Interpreters/PartLog.cpp @@ -123,7 +123,7 @@ ColumnsDescription PartLogElement::getColumnsDescription() {"table_uuid", std::make_shared(), "UUID of the table the data part belongs to."}, {"part_name", std::make_shared(), "Name of the data part."}, {"partition_id", std::make_shared(), "ID of the partition that the data part was inserted to. The column takes the `all` value if the partitioning is by `tuple()`."}, - {"partition", std::make_shared()}, + {"partition", std::make_shared(), "The partition name."}, {"part_type", std::make_shared(), "The type of the part. Possible values: Wide and Compact."}, {"disk_name", std::make_shared(), "The disk name data part lies on."}, {"path_on_disk", std::make_shared(), "Absolute path to the folder with data part files."}, diff --git a/src/Interpreters/PartLog.h b/src/Interpreters/PartLog.h index d4cd571d69b..6dc3116ad48 100644 --- a/src/Interpreters/PartLog.h +++ b/src/Interpreters/PartLog.h @@ -96,7 +96,6 @@ struct PartLogElement static ColumnsDescription getColumnsDescription(); static NamesAndAliases getNamesAndAliases(); void appendToBlock(MutableColumns & columns) const; - static const char * getCustomColumnList() { return nullptr; } }; class IMergeTreeDataPart; diff --git a/src/Interpreters/ProcessList.cpp b/src/Interpreters/ProcessList.cpp index 5b3b87114ae..3bd7b2d4206 100644 --- a/src/Interpreters/ProcessList.cpp +++ b/src/Interpreters/ProcessList.cpp @@ -295,7 +295,7 @@ ProcessListEntry::~ProcessListEntry() auto user_process_list_it = parent.user_to_queries.find(user); if (user_process_list_it == parent.user_to_queries.end()) { - LOG_ERROR(getLogger("ProcessList"), "Logical error: cannot find user in ProcessList"); + LOG_ERROR(getLogger("ProcessList"), "Cannot find user in ProcessList"); std::terminate(); } @@ -323,7 +323,7 @@ ProcessListEntry::~ProcessListEntry() if (!found) { - LOG_ERROR(getLogger("ProcessList"), "Logical error: cannot find query by query_id and pointer to ProcessListElement in ProcessListForUser"); + LOG_ERROR(getLogger("ProcessList"), "Cannot find query by query_id and pointer to ProcessListElement in ProcessListForUser"); std::terminate(); } diff --git a/src/Interpreters/ProcessList.h b/src/Interpreters/ProcessList.h index fe4ee934ed8..1c253f562e8 100644 --- a/src/Interpreters/ProcessList.h +++ b/src/Interpreters/ProcessList.h @@ -127,7 +127,7 @@ protected: struct ExecutorHolder { - ExecutorHolder(PipelineExecutor * e) : executor(e) {} + explicit ExecutorHolder(PipelineExecutor * e) : executor(e) {} void cancel(); diff --git a/src/Interpreters/ProcessorsProfileLog.cpp b/src/Interpreters/ProcessorsProfileLog.cpp index 088d193257c..015b4abc712 100644 --- a/src/Interpreters/ProcessorsProfileLog.cpp +++ b/src/Interpreters/ProcessorsProfileLog.cpp @@ -21,26 +21,26 @@ ColumnsDescription ProcessorProfileLogElement::getColumnsDescription() { return ColumnsDescription { - {"hostname", std::make_shared(std::make_shared())}, - {"event_date", std::make_shared()}, - {"event_time", std::make_shared()}, - {"event_time_microseconds", std::make_shared(6)}, + {"hostname", std::make_shared(std::make_shared()), "Hostname of the server executing the query."}, + {"event_date", std::make_shared(), "The date when the event happened."}, + {"event_time", std::make_shared(), "The date and time when the event happened."}, + {"event_time_microseconds", std::make_shared(6), "The date and time with microseconds precision when the event happened."}, - {"id", std::make_shared()}, - {"parent_ids", std::make_shared(std::make_shared())}, - {"plan_step", std::make_shared()}, - {"plan_group", std::make_shared()}, + {"id", std::make_shared(), "ID of processor."}, + {"parent_ids", std::make_shared(std::make_shared()), "Parent processors IDs."}, + {"plan_step", std::make_shared(), "ID of the query plan step which created this processor. The value is zero if the processor was not added from any step."}, + {"plan_group", std::make_shared(), "Group of the processor if it was created by query plan step. A group is a logical partitioning of processors added from the same query plan step. Group is used only for beautifying the result of EXPLAIN PIPELINE result."}, - {"initial_query_id", std::make_shared()}, - {"query_id", std::make_shared()}, - {"name", std::make_shared(std::make_shared())}, - {"elapsed_us", std::make_shared()}, - {"input_wait_elapsed_us", std::make_shared()}, - {"output_wait_elapsed_us", std::make_shared()}, - {"input_rows", std::make_shared()}, - {"input_bytes", std::make_shared()}, - {"output_rows", std::make_shared()}, - {"output_bytes", std::make_shared()}, + {"initial_query_id", std::make_shared(), "ID of the initial query (for distributed query execution)."}, + {"query_id", std::make_shared(), "ID of the query."}, + {"name", std::make_shared(std::make_shared()), "Name of the processor."}, + {"elapsed_us", std::make_shared(), "Number of microseconds this processor was executed."}, + {"input_wait_elapsed_us", std::make_shared(), "Number of microseconds this processor was waiting for data (from other processor)."}, + {"output_wait_elapsed_us", std::make_shared(), "Number of microseconds this processor was waiting because output port was full."}, + {"input_rows", std::make_shared(), "The number of rows consumed by processor."}, + {"input_bytes", std::make_shared(), "The number of bytes consumed by processor."}, + {"output_rows", std::make_shared(), "The number of rows generated by processor."}, + {"output_bytes", std::make_shared(), "The number of bytes generated by processor."}, }; } diff --git a/src/Interpreters/ProcessorsProfileLog.h b/src/Interpreters/ProcessorsProfileLog.h index 49d2c21af89..8319d373f39 100644 --- a/src/Interpreters/ProcessorsProfileLog.h +++ b/src/Interpreters/ProcessorsProfileLog.h @@ -40,7 +40,6 @@ struct ProcessorProfileLogElement static ColumnsDescription getColumnsDescription(); static NamesAndAliases getNamesAndAliases() { return {}; } void appendToBlock(MutableColumns & columns) const; - static const char * getCustomColumnList() { return nullptr; } }; class ProcessorsProfileLog : public SystemLog diff --git a/src/Interpreters/QueryLog.cpp b/src/Interpreters/QueryLog.cpp index ad6e344655b..92f8ddae141 100644 --- a/src/Interpreters/QueryLog.cpp +++ b/src/Interpreters/QueryLog.cpp @@ -134,13 +134,13 @@ ColumnsDescription QueryLogElement::getColumnsDescription() {"used_storages", array_low_cardinality_string, "Canonical names of storages, which were used during query execution."}, {"used_table_functions", array_low_cardinality_string, "Canonical names of table functions, which were used during query execution."}, - {"used_row_policies", array_low_cardinality_string}, + {"used_row_policies", array_low_cardinality_string, "The list of row policies names that were used during query execution."}, - {"transaction_id", getTransactionIDDataType()}, + {"transaction_id", getTransactionIDDataType(), "The identifier of the transaction in scope of which this query was executed."}, {"query_cache_usage", std::move(query_cache_usage_datatype), "Usage of the query cache during query execution. Values: 'Unknown' = Status unknown, 'None' = The query result was neither written into nor read from the query cache, 'Write' = The query result was written into the query cache, 'Read' = The query result was read from the query cache."}, - {"asynchronous_read_counters", std::make_shared(low_cardinality_string, std::make_shared())}, + {"asynchronous_read_counters", std::make_shared(low_cardinality_string, std::make_shared()), "Metrics for asynchronous reading."}, }; } diff --git a/src/Interpreters/QueryLog.h b/src/Interpreters/QueryLog.h index be5cb5835c5..f9ed8a2fb9e 100644 --- a/src/Interpreters/QueryLog.h +++ b/src/Interpreters/QueryLog.h @@ -106,7 +106,6 @@ struct QueryLogElement static ColumnsDescription getColumnsDescription(); static NamesAndAliases getNamesAndAliases(); void appendToBlock(MutableColumns & columns) const; - static const char * getCustomColumnList() { return nullptr; } static void appendClientInfo(const ClientInfo & client_info, MutableColumns & columns, size_t & i); }; diff --git a/src/Interpreters/QueryThreadLog.cpp b/src/Interpreters/QueryThreadLog.cpp index d153e30a4ce..f50458745b9 100644 --- a/src/Interpreters/QueryThreadLog.cpp +++ b/src/Interpreters/QueryThreadLog.cpp @@ -28,58 +28,58 @@ ColumnsDescription QueryThreadLogElement::getColumnsDescription() return ColumnsDescription { - {"hostname", low_cardinality_string}, - {"event_date", std::make_shared()}, - {"event_time", std::make_shared()}, - {"event_time_microseconds", std::make_shared(6)}, - {"query_start_time", std::make_shared()}, - {"query_start_time_microseconds", std::make_shared(6)}, - {"query_duration_ms", std::make_shared()}, + {"hostname", low_cardinality_string, "Hostname of the server executing the query."}, + {"event_date", std::make_shared(), "The date when the thread has finished execution of the query."}, + {"event_time", std::make_shared(), "The date and time when the thread has finished execution of the query."}, + {"event_time_microseconds", std::make_shared(6), "The date and time when the thread has finished execution of the query with microseconds precision."}, + {"query_start_time", std::make_shared(), "Start time of query execution."}, + {"query_start_time_microseconds", std::make_shared(6), "Start time of query execution with microsecond precision."}, + {"query_duration_ms", std::make_shared(), "Duration of query execution."}, - {"read_rows", std::make_shared()}, - {"read_bytes", std::make_shared()}, - {"written_rows", std::make_shared()}, - {"written_bytes", std::make_shared()}, - {"memory_usage", std::make_shared()}, - {"peak_memory_usage", std::make_shared()}, + {"read_rows", std::make_shared(), "Number of read rows."}, + {"read_bytes", std::make_shared(), "Number of read bytes."}, + {"written_rows", std::make_shared(), "For INSERT queries, the number of written rows. For other queries, the column value is 0."}, + {"written_bytes", std::make_shared(), "For INSERT queries, the number of written bytes. For other queries, the column value is 0."}, + {"memory_usage", std::make_shared(), "The difference between the amount of allocated and freed memory in context of this thread."}, + {"peak_memory_usage", std::make_shared(), "The maximum difference between the amount of allocated and freed memory in context of this thread."}, - {"thread_name", low_cardinality_string}, - {"thread_id", std::make_shared()}, - {"master_thread_id", std::make_shared()}, - {"current_database", low_cardinality_string}, - {"query", std::make_shared()}, - {"normalized_query_hash", std::make_shared()}, + {"thread_name", low_cardinality_string, "Name of the thread."}, + {"thread_id", std::make_shared(), "Internal thread ID."}, + {"master_thread_id", std::make_shared(), "OS initial ID of initial thread."}, + {"current_database", low_cardinality_string, "Name of the current database."}, + {"query", std::make_shared(), "Query string."}, + {"normalized_query_hash", std::make_shared(), "The hash of normalized query - with wiped constanstans, etc."}, - {"is_initial_query", std::make_shared()}, - {"user", low_cardinality_string}, - {"query_id", std::make_shared()}, - {"address", DataTypeFactory::instance().get("IPv6")}, - {"port", std::make_shared()}, - {"initial_user", low_cardinality_string}, - {"initial_query_id", std::make_shared()}, - {"initial_address", DataTypeFactory::instance().get("IPv6")}, - {"initial_port", std::make_shared()}, - {"initial_query_start_time", std::make_shared()}, - {"initial_query_start_time_microseconds", std::make_shared(6)}, - {"interface", std::make_shared()}, - {"is_secure", std::make_shared()}, - {"os_user", low_cardinality_string}, - {"client_hostname", low_cardinality_string}, - {"client_name", low_cardinality_string}, - {"client_revision", std::make_shared()}, - {"client_version_major", std::make_shared()}, - {"client_version_minor", std::make_shared()}, - {"client_version_patch", std::make_shared()}, - {"http_method", std::make_shared()}, - {"http_user_agent", low_cardinality_string}, - {"http_referer", std::make_shared()}, - {"forwarded_for", std::make_shared()}, - {"quota_key", std::make_shared()}, - {"distributed_depth", std::make_shared()}, + {"is_initial_query", std::make_shared(), "Query type. Possible values: 1 — Query was initiated by the client, 0 — Query was initiated by another query for distributed query execution."}, + {"user", low_cardinality_string, "Name of the user who initiated the current query."}, + {"query_id", std::make_shared(), "ID of the query."}, + {"address", DataTypeFactory::instance().get("IPv6"), "IP address that was used to make the query."}, + {"port", std::make_shared(), "The client port that was used to make the query."}, + {"initial_user", low_cardinality_string, "Name of the user who ran the initial query (for distributed query execution)."}, + {"initial_query_id", std::make_shared(), "ID of the initial query (for distributed query execution)."}, + {"initial_address", DataTypeFactory::instance().get("IPv6"), "IP address that the parent query was launched from."}, + {"initial_port", std::make_shared(), "The client port that was used to make the parent query."}, + {"initial_query_start_time", std::make_shared(), "Start time of the initial query execution."}, + {"initial_query_start_time_microseconds", std::make_shared(6), "Start time of the initial query execution "}, + {"interface", std::make_shared(), "Interface that the query was initiated from. Possible values: 1 — TCP, 2 — HTTP."}, + {"is_secure", std::make_shared(), "The flag which shows whether the connection was secure."}, + {"os_user", low_cardinality_string, "OSs username who runs clickhouse-client."}, + {"client_hostname", low_cardinality_string, "Hostname of the client machine where the clickhouse-client or another TCP client is run."}, + {"client_name", low_cardinality_string, "The clickhouse-client or another TCP client name."}, + {"client_revision", std::make_shared(), "Revision of the clickhouse-client or another TCP client."}, + {"client_version_major", std::make_shared(), "Major version of the clickhouse-client or another TCP client."}, + {"client_version_minor", std::make_shared(), "Minor version of the clickhouse-client or another TCP client."}, + {"client_version_patch", std::make_shared(), "Patch component of the clickhouse-client or another TCP client version."}, + {"http_method", std::make_shared(), "HTTP method that initiated the query. Possible values: 0 — The query was launched from the TCP interface, 1 — GET method was used., 2 — POST method was used."}, + {"http_user_agent", low_cardinality_string, "The UserAgent header passed in the HTTP request."}, + {"http_referer", std::make_shared(), "HTTP header `Referer` passed in the HTTP query (contains an absolute or partial address of the page making the query)."}, + {"forwarded_for", std::make_shared(), "HTTP header `X-Forwarded-For` passed in the HTTP query."}, + {"quota_key", std::make_shared(), "The 'quota key' specified in the quotas setting."}, + {"distributed_depth", std::make_shared(), "How many times a query was forwarded between servers."}, - {"revision", std::make_shared()}, + {"revision", std::make_shared(), "ClickHouse revision."}, - {"ProfileEvents", std::make_shared(low_cardinality_string, std::make_shared())}, + {"ProfileEvents", std::make_shared(low_cardinality_string, std::make_shared()), "ProfileEvents that measure different metrics for this thread. The description of them could be found in the table system.events."}, }; } diff --git a/src/Interpreters/QueryThreadLog.h b/src/Interpreters/QueryThreadLog.h index fcce9232dc1..0bffc218a10 100644 --- a/src/Interpreters/QueryThreadLog.h +++ b/src/Interpreters/QueryThreadLog.h @@ -49,7 +49,6 @@ struct QueryThreadLogElement static ColumnsDescription getColumnsDescription(); static NamesAndAliases getNamesAndAliases(); void appendToBlock(MutableColumns & columns) const; - static const char * getCustomColumnList() { return nullptr; } }; diff --git a/src/Interpreters/QueryViewsLog.cpp b/src/Interpreters/QueryViewsLog.cpp index c426f2d3cf0..a5441363340 100644 --- a/src/Interpreters/QueryViewsLog.cpp +++ b/src/Interpreters/QueryViewsLog.cpp @@ -35,30 +35,34 @@ ColumnsDescription QueryViewsLogElement::getColumnsDescription() return ColumnsDescription { - {"hostname", std::make_shared(std::make_shared())}, - {"event_date", std::make_shared()}, - {"event_time", std::make_shared()}, - {"event_time_microseconds", std::make_shared(6)}, - {"view_duration_ms", std::make_shared()}, + {"hostname", std::make_shared(std::make_shared()), "Hostname of the server executing the query."}, + {"event_date", std::make_shared(), "The date when the last event of the view happened."}, + {"event_time", std::make_shared(), "The date and time when the view finished execution."}, + {"event_time_microseconds", std::make_shared(6), "The date and time when the view finished execution with microseconds precision."}, + {"view_duration_ms", std::make_shared(), "Duration of view execution (sum of its stages) in milliseconds."}, - {"initial_query_id", std::make_shared()}, - {"view_name", std::make_shared()}, - {"view_uuid", std::make_shared()}, - {"view_type", std::move(view_type_datatype)}, - {"view_query", std::make_shared()}, - {"view_target", std::make_shared()}, + {"initial_query_id", std::make_shared(), "ID of the initial query (for distributed query execution)."}, + {"view_name", std::make_shared(), "Name of the view."}, + {"view_uuid", std::make_shared(), "UUID of the view."}, + {"view_type", std::move(view_type_datatype), "Type of the view. Values: 'Default' = 1 — Default views. Should not appear in this log, 'Materialized' = 2 — Materialized views, 'Live' = 3 — Live views."}, + {"view_query", std::make_shared(), "The query executed by the view."}, + {"view_target", std::make_shared(), "The name of the view target table."}, - {"read_rows", std::make_shared()}, - {"read_bytes", std::make_shared()}, - {"written_rows", std::make_shared()}, - {"written_bytes", std::make_shared()}, - {"peak_memory_usage", std::make_shared()}, - {"ProfileEvents", std::make_shared(std::make_shared(), std::make_shared())}, + {"read_rows", std::make_shared(), "Number of read rows."}, + {"read_bytes", std::make_shared(), "Number of read bytes."}, + {"written_rows", std::make_shared(), "Number of written rows."}, + {"written_bytes", std::make_shared(), "Number of written bytes."}, + {"peak_memory_usage", std::make_shared(), "The maximum difference between the amount of allocated and freed memory in context of this view."}, + {"ProfileEvents", std::make_shared(std::make_shared(), std::make_shared()), "ProfileEvents that measure different metrics. The description of them could be found in the table system.events."}, - {"status", std::move(view_status_datatype)}, - {"exception_code", std::make_shared()}, - {"exception", std::make_shared()}, - {"stack_trace", std::make_shared()} + {"status", std::move(view_status_datatype), "Status of the view. Values: " + "'QueryStart' = 1 — Successful start the view execution. Should not appear, " + "'QueryFinish' = 2 — Successful end of the view execution, " + "'ExceptionBeforeStart' = 3 — Exception before the start of the view execution., " + "'ExceptionWhileProcessing' = 4 — Exception during the view execution."}, + {"exception_code", std::make_shared(), "Code of an exception."}, + {"exception", std::make_shared(), "Exception message."}, + {"stack_trace", std::make_shared(), "Stack trace. An empty string, if the query was completed successfully."} }; } diff --git a/src/Interpreters/QueryViewsLog.h b/src/Interpreters/QueryViewsLog.h index 000d0bd385a..2de06fe3ddc 100644 --- a/src/Interpreters/QueryViewsLog.h +++ b/src/Interpreters/QueryViewsLog.h @@ -81,7 +81,6 @@ struct QueryViewsLogElement static ColumnsDescription getColumnsDescription(); static NamesAndAliases getNamesAndAliases(); void appendToBlock(MutableColumns & columns) const; - static const char * getCustomColumnList() { return nullptr; } }; diff --git a/src/Interpreters/RewriteUniqToCountVisitor.cpp b/src/Interpreters/RewriteUniqToCountVisitor.cpp index ddec6fe063e..a2e3a790c27 100644 --- a/src/Interpreters/RewriteUniqToCountVisitor.cpp +++ b/src/Interpreters/RewriteUniqToCountVisitor.cpp @@ -156,7 +156,11 @@ void RewriteUniqToCountMatcher::visit(ASTPtr & ast, Data & /*data*/) }; if (match_subquery_with_distinct() || match_subquery_with_group_by()) + { + auto main_alias = expr_list->children[0]->tryGetAlias(); expr_list->children[0] = makeASTFunction("count"); + expr_list->children[0]->setAlias(main_alias); + } } } diff --git a/src/Interpreters/S3QueueLog.cpp b/src/Interpreters/S3QueueLog.cpp index 967becb6e0f..ba990a8ac25 100644 --- a/src/Interpreters/S3QueueLog.cpp +++ b/src/Interpreters/S3QueueLog.cpp @@ -25,17 +25,19 @@ ColumnsDescription S3QueueLogElement::getColumnsDescription() return ColumnsDescription { - {"hostname", std::make_shared(std::make_shared())}, - {"event_date", std::make_shared()}, - {"event_time", std::make_shared()}, - {"table_uuid", std::make_shared()}, - {"file_name", std::make_shared()}, - {"rows_processed", std::make_shared()}, - {"status", status_datatype}, - {"processing_start_time", std::make_shared(std::make_shared())}, - {"processing_end_time", std::make_shared(std::make_shared())}, - {"ProfileEvents", std::make_shared(std::make_shared(), std::make_shared())}, - {"exception", std::make_shared()}, + {"hostname", std::make_shared(std::make_shared()), "Hostname"}, + {"event_date", std::make_shared(), "Event date of writing this log row"}, + {"event_time", std::make_shared(), "Event time of writing this log row"}, + {"database", std::make_shared(), "The name of a database where current S3Queue table lives."}, + {"table", std::make_shared(), "The name of S3Queue table."}, + {"uuid", std::make_shared(), "The UUID of S3Queue table"}, + {"file_name", std::make_shared(), "File name of the processing file"}, + {"rows_processed", std::make_shared(), "Number of processed rows"}, + {"status", status_datatype, "Status of the processing file"}, + {"processing_start_time", std::make_shared(std::make_shared()), "Time of the start of processing the file"}, + {"processing_end_time", std::make_shared(std::make_shared()), "Time of the end of processing the file"}, + {"ProfileEvents", std::make_shared(std::make_shared(), std::make_shared()), "Profile events collected while loading this file"}, + {"exception", std::make_shared(), "Exception message if happened"}, }; } @@ -45,7 +47,9 @@ void S3QueueLogElement::appendToBlock(MutableColumns & columns) const columns[i++]->insert(getFQDNOrHostName()); columns[i++]->insert(DateLUT::instance().toDayNum(event_time).toUnderType()); columns[i++]->insert(event_time); - columns[i++]->insert(table_uuid); + columns[i++]->insert(database); + columns[i++]->insert(table); + columns[i++]->insert(uuid); columns[i++]->insert(file_name); columns[i++]->insert(rows_processed); columns[i++]->insert(status); diff --git a/src/Interpreters/S3QueueLog.h b/src/Interpreters/S3QueueLog.h index e0362bf9716..a7adbf28b3d 100644 --- a/src/Interpreters/S3QueueLog.h +++ b/src/Interpreters/S3QueueLog.h @@ -12,7 +12,11 @@ namespace DB struct S3QueueLogElement { time_t event_time{}; - std::string table_uuid; + + std::string database; + std::string table; + std::string uuid; + std::string file_name; size_t rows_processed = 0; @@ -33,7 +37,6 @@ struct S3QueueLogElement static NamesAndAliases getNamesAndAliases() { return {}; } void appendToBlock(MutableColumns & columns) const; - static const char * getCustomColumnList() { return nullptr; } }; class S3QueueLog : public SystemLog diff --git a/src/Interpreters/SelectQueryOptions.h b/src/Interpreters/SelectQueryOptions.h index 1e08aec3813..877f95cbb74 100644 --- a/src/Interpreters/SelectQueryOptions.h +++ b/src/Interpreters/SelectQueryOptions.h @@ -46,13 +46,17 @@ struct SelectQueryOptions /// Bypass setting constraints for some internal queries such as projection ASTs. bool ignore_setting_constraints = false; + /// Bypass access check for select query. + /// This allows to skip double access check in some specific cases (e.g. insert into table with materialized view) + bool ignore_access_check = false; + /// These two fields are used to evaluate shardNum() and shardCount() function when /// prefer_localhost_replica == 1 and local instance is selected. They are needed because local /// instance might have multiple shards and scalars can only hold one value. std::optional shard_num; std::optional shard_count; - SelectQueryOptions( + SelectQueryOptions( /// NOLINT(google-explicit-constructor) QueryProcessingStage::Enum stage = QueryProcessingStage::Complete, size_t depth = 0, bool is_subquery_ = false, @@ -129,6 +133,12 @@ struct SelectQueryOptions return *this; } + SelectQueryOptions & ignoreAccessCheck(bool value = true) + { + ignore_access_check = value; + return *this; + } + SelectQueryOptions & setInternal(bool value = false) { is_internal = value; diff --git a/src/Interpreters/ServerAsynchronousMetrics.cpp b/src/Interpreters/ServerAsynchronousMetrics.cpp index bdf314f35b9..fe7ccd64ffe 100644 --- a/src/Interpreters/ServerAsynchronousMetrics.cpp +++ b/src/Interpreters/ServerAsynchronousMetrics.cpp @@ -9,6 +9,8 @@ #include #include +#include + #include #include @@ -77,6 +79,16 @@ void ServerAsynchronousMetrics::updateImpl(TimePoint update_time, TimePoint curr new_values["MarkCacheFiles"] = { mark_cache->count(), "Total number of mark files cached in the mark cache" }; } + if (auto page_cache = getContext()->getPageCache()) + { + auto rss = page_cache->getResidentSetSize(); + new_values["PageCacheBytes"] = { rss.page_cache_rss, "Userspace page cache memory usage in bytes" }; + new_values["PageCachePinnedBytes"] = { page_cache->getPinnedSize(), "Userspace page cache memory that's currently in use and can't be evicted" }; + + if (rss.unreclaimable_rss.has_value()) + new_values["UnreclaimableRSS"] = { *rss.unreclaimable_rss, "The amount of physical memory used by the server process, in bytes, excluding memory reclaimable by the OS (MADV_FREE)" }; + } + if (auto uncompressed_cache = getContext()->getUncompressedCache()) { new_values["UncompressedCacheBytes"] = { uncompressed_cache->sizeInBytes(), diff --git a/src/Interpreters/Session.cpp b/src/Interpreters/Session.cpp index df97a09f686..b52f8a507e3 100644 --- a/src/Interpreters/Session.cpp +++ b/src/Interpreters/Session.cpp @@ -429,11 +429,11 @@ void Session::setClientConnectionId(uint32_t connection_id) prepared_client_info->connection_id = connection_id; } -void Session::setHttpClientInfo(ClientInfo::HTTPMethod http_method, const String & http_user_agent, const String & http_referer) +void Session::setHTTPClientInfo(ClientInfo::HTTPMethod http_method, const String & http_user_agent, const String & http_referer) { if (session_context) { - session_context->setHttpClientInfo(http_method, http_user_agent, http_referer); + session_context->setHTTPClientInfo(http_method, http_user_agent, http_referer); } else { diff --git a/src/Interpreters/Session.h b/src/Interpreters/Session.h index cde000d89fa..334560a33c8 100644 --- a/src/Interpreters/Session.h +++ b/src/Interpreters/Session.h @@ -65,7 +65,7 @@ public: void setClientInterface(ClientInfo::Interface interface); void setClientVersion(UInt64 client_version_major, UInt64 client_version_minor, UInt64 client_version_patch, unsigned client_tcp_protocol_version); void setClientConnectionId(uint32_t connection_id); - void setHttpClientInfo(ClientInfo::HTTPMethod http_method, const String & http_user_agent, const String & http_referer); + void setHTTPClientInfo(ClientInfo::HTTPMethod http_method, const String & http_user_agent, const String & http_referer); void setForwardedFor(const String & forwarded_for); void setQuotaClientKey(const String & quota_key); void setConnectionClientVersion(UInt64 client_version_major, UInt64 client_version_minor, UInt64 client_version_patch, unsigned client_tcp_protocol_version); diff --git a/src/Interpreters/SessionLog.cpp b/src/Interpreters/SessionLog.cpp index a5bc5012292..dc0ac963d0b 100644 --- a/src/Interpreters/SessionLog.cpp +++ b/src/Interpreters/SessionLog.cpp @@ -121,33 +121,36 @@ ColumnsDescription SessionLogElement::getColumnsDescription() return ColumnsDescription { - {"hostname", lc_string_datatype}, - {"type", std::move(event_type)}, - {"auth_id", std::make_shared()}, - {"session_id", std::make_shared()}, - {"event_date", std::make_shared()}, - {"event_time", std::make_shared()}, - {"event_time_microseconds", std::make_shared(6)}, + {"hostname", lc_string_datatype, "Hostname of the server executing the query."}, + {"type", std::move(event_type), "Login/logout result. Possible values: " + "LoginFailure — Login error. " + "LoginSuccess — Successful login. " + "Logout — Logout from the system."}, + {"auth_id", std::make_shared(), "Authentication ID, which is a UUID that is automatically generated each time user logins."}, + {"session_id", std::make_shared(), "Session ID that is passed by client via HTTP interface."}, + {"event_date", std::make_shared(), "Login/logout date."}, + {"event_time", std::make_shared(), "Login/logout time."}, + {"event_time_microseconds", std::make_shared(6), "Login/logout starting time with microseconds precision."}, - {"user", std::make_shared(std::make_shared())}, - {"auth_type", std::make_shared(std::move(identified_with_column))}, + {"user", std::make_shared(std::make_shared()), "User name."}, + {"auth_type", std::make_shared(std::move(identified_with_column)), "The authentication type."}, - {"profiles", std::make_shared(lc_string_datatype)}, - {"roles", std::make_shared(lc_string_datatype)}, - {"settings", std::move(settings_type_column)}, + {"profiles", std::make_shared(lc_string_datatype), "The list of profiles set for all roles and/or users."}, + {"roles", std::make_shared(lc_string_datatype), "The list of roles to which the profile is applied."}, + {"settings", std::move(settings_type_column), "Settings that were changed when the client logged in/out."}, - {"client_address", DataTypeFactory::instance().get("IPv6")}, - {"client_port", std::make_shared()}, - {"interface", std::move(interface_type_column)}, + {"client_address", DataTypeFactory::instance().get("IPv6"), "The IP address that was used to log in/out."}, + {"client_port", std::make_shared(), "The client port that was used to log in/out."}, + {"interface", std::move(interface_type_column), "The interface from which the login was initiated."}, - {"client_hostname", std::make_shared()}, - {"client_name", std::make_shared()}, - {"client_revision", std::make_shared()}, - {"client_version_major", std::make_shared()}, - {"client_version_minor", std::make_shared()}, - {"client_version_patch", std::make_shared()}, + {"client_hostname", std::make_shared(), "The hostname of the client machine where the clickhouse-client or another TCP client is run."}, + {"client_name", std::make_shared(), "The clickhouse-client or another TCP client name."}, + {"client_revision", std::make_shared(), "Revision of the clickhouse-client or another TCP client."}, + {"client_version_major", std::make_shared(), "The major version of the clickhouse-client or another TCP client."}, + {"client_version_minor", std::make_shared(), "The minor version of the clickhouse-client or another TCP client."}, + {"client_version_patch", std::make_shared(), "Patch component of the clickhouse-client or another TCP client version."}, - {"failure_reason", std::make_shared()}, + {"failure_reason", std::make_shared(), "The exception message containing the reason for the login/logout failure."}, }; } diff --git a/src/Interpreters/SessionLog.h b/src/Interpreters/SessionLog.h index 0f79a3e5ca7..74e72c54bcc 100644 --- a/src/Interpreters/SessionLog.h +++ b/src/Interpreters/SessionLog.h @@ -64,7 +64,6 @@ struct SessionLogElement static NamesAndAliases getNamesAndAliases() { return {}; } void appendToBlock(MutableColumns & columns) const; - static const char * getCustomColumnList() { return nullptr; } }; diff --git a/src/Interpreters/Set.cpp b/src/Interpreters/Set.cpp index 84260faafd4..8f11754b3be 100644 --- a/src/Interpreters/Set.cpp +++ b/src/Interpreters/Set.cpp @@ -275,7 +275,7 @@ void Set::appendSetElements(SetKeyColumns & holder) void Set::checkIsCreated() const { if (!is_created.load()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: Trying to use set before it has been built."); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Trying to use set before it has been built."); } ColumnPtr Set::execute(const ColumnsWithTypeAndName & columns, bool negative) const @@ -283,7 +283,7 @@ ColumnPtr Set::execute(const ColumnsWithTypeAndName & columns, bool negative) co size_t num_key_columns = columns.size(); if (0 == num_key_columns) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: no columns passed to Set::execute method."); + throw Exception(ErrorCodes::LOGICAL_ERROR, "No columns passed to Set::execute method."); auto res = ColumnUInt8::create(); ColumnUInt8::Container & vec_res = res->getData(); diff --git a/src/Interpreters/SetVariants.cpp b/src/Interpreters/SetVariants.cpp index cd9148a01cf..0fb2e5189d4 100644 --- a/src/Interpreters/SetVariants.cpp +++ b/src/Interpreters/SetVariants.cpp @@ -146,7 +146,7 @@ typename SetVariantsTemplate::Type SetVariantsTemplate::choose return Type::keys128; if (size_of_field == 32) return Type::keys256; - throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: numeric column has sizeOfField not in 1, 2, 4, 8, 16, 32."); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Numeric column has sizeOfField not in 1, 2, 4, 8, 16, 32."); } /// If the keys fit in N bits, we will use a hash table for N-bit-packed keys diff --git a/src/Interpreters/SubstituteColumnOptimizer.h b/src/Interpreters/SubstituteColumnOptimizer.h index 63867e80386..28aa8be0801 100644 --- a/src/Interpreters/SubstituteColumnOptimizer.h +++ b/src/Interpreters/SubstituteColumnOptimizer.h @@ -15,7 +15,7 @@ struct StorageInMemoryMetadata; using StorageMetadataPtr = std::shared_ptr; /// Optimizer that tries to replace columns to equal columns (according to constraints) -/// with lower size (accorsing to compressed and uncomressed size). +/// with lower size (according to compressed and uncomressed size). class SubstituteColumnOptimizer { public: diff --git a/src/Interpreters/SystemLog.cpp b/src/Interpreters/SystemLog.cpp index 6580dc3e9b7..a74b5c67726 100644 --- a/src/Interpreters/SystemLog.cpp +++ b/src/Interpreters/SystemLog.cpp @@ -563,7 +563,6 @@ void SystemLog::prepareTable() {table_id.database_name, table_id.table_name + "_" + toString(suffix)}, getContext())) ++suffix; - auto rename = std::make_shared(); ASTRenameQuery::Element elem { ASTRenameQuery::Table @@ -586,7 +585,7 @@ void SystemLog::prepareTable() old_create_query, create_query); - rename->elements.emplace_back(std::move(elem)); + auto rename = std::make_shared(ASTRenameQuery::Elements{std::move(elem)}); ActionLock merges_lock; if (DatabaseCatalog::instance().getDatabase(table_id.database_name)->getUUID() == UUIDHelpers::Nil) diff --git a/src/Interpreters/TablesStatus.cpp b/src/Interpreters/TablesStatus.cpp index 005a4515c3a..911a028f813 100644 --- a/src/Interpreters/TablesStatus.cpp +++ b/src/Interpreters/TablesStatus.cpp @@ -35,7 +35,7 @@ void TableStatus::read(ReadBuffer & in) void TablesStatusRequest::write(WriteBuffer & out, UInt64 server_protocol_revision) const { if (server_protocol_revision < DBMS_MIN_REVISION_WITH_TABLES_STATUS) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: method TablesStatusRequest::write is called for unsupported server revision"); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Method TablesStatusRequest::write is called for unsupported server revision"); writeVarUInt(tables.size(), out); for (const auto & table_name : tables) diff --git a/src/Interpreters/TemporaryDataOnDisk.h b/src/Interpreters/TemporaryDataOnDisk.h index e57d9130369..8b0649be1b1 100644 --- a/src/Interpreters/TemporaryDataOnDisk.h +++ b/src/Interpreters/TemporaryDataOnDisk.h @@ -2,11 +2,11 @@ #include -#include -#include +#include #include -#include +#include #include +#include namespace CurrentMetrics diff --git a/src/Interpreters/TextLog.h b/src/Interpreters/TextLog.h index cdb4de76722..90fc4ef36a0 100644 --- a/src/Interpreters/TextLog.h +++ b/src/Interpreters/TextLog.h @@ -34,7 +34,6 @@ struct TextLogElement static ColumnsDescription getColumnsDescription(); static NamesAndAliases getNamesAndAliases() { return {}; } void appendToBlock(MutableColumns & columns) const; - static const char * getCustomColumnList() { return nullptr; } }; class TextLog : public SystemLog diff --git a/src/Interpreters/TraceCollector.cpp b/src/Interpreters/TraceCollector.cpp index 1fe11be6090..8e9c397b7a1 100644 --- a/src/Interpreters/TraceCollector.cpp +++ b/src/Interpreters/TraceCollector.cpp @@ -6,7 +6,6 @@ #include #include #include -#include #include #include #include diff --git a/src/Interpreters/TraceLog.cpp b/src/Interpreters/TraceLog.cpp index 26adb0cfc3f..01bedf34f15 100644 --- a/src/Interpreters/TraceLog.cpp +++ b/src/Interpreters/TraceLog.cpp @@ -29,20 +29,27 @@ ColumnsDescription TraceLogElement::getColumnsDescription() { return ColumnsDescription { - {"hostname", std::make_shared(std::make_shared())}, - {"event_date", std::make_shared()}, - {"event_time", std::make_shared()}, - {"event_time_microseconds", std::make_shared(6)}, - {"timestamp_ns", std::make_shared()}, - {"revision", std::make_shared()}, - {"trace_type", std::make_shared(trace_values)}, - {"thread_id", std::make_shared()}, - {"query_id", std::make_shared()}, - {"trace", std::make_shared(std::make_shared())}, - {"size", std::make_shared()}, - {"ptr", std::make_shared()}, - {"event", std::make_shared(std::make_shared())}, - {"increment", std::make_shared()}, + {"hostname", std::make_shared(std::make_shared()), "Hostname of the server executing the query."}, + {"event_date", std::make_shared(), "Date of sampling moment."}, + {"event_time", std::make_shared(), "Timestamp of the sampling moment."}, + {"event_time_microseconds", std::make_shared(6), "Timestamp of the sampling moment with microseconds precision."}, + {"timestamp_ns", std::make_shared(), "Timestamp of the sampling moment in nanoseconds."}, + {"revision", std::make_shared(), "ClickHouse server build revision."}, + {"trace_type", std::make_shared(trace_values), "Trace type: " + "`Real` represents collecting stack traces by wall-clock time. " + "`CPU` represents collecting stack traces by CPU time. " + "`Memory` represents collecting allocations and deallocations when memory allocation exceeds the subsequent watermark. " + "`MemorySample` represents collecting random allocations and deallocations. " + "`MemoryPeak` represents collecting updates of peak memory usage. " + "`ProfileEvent` represents collecting of increments of profile events." + }, + {"thread_id", std::make_shared(), "Thread identifier."}, + {"query_id", std::make_shared(), "Query identifier that can be used to get details about a query that was running from the query_log system table."}, + {"trace", std::make_shared(std::make_shared()), "Stack trace at the moment of sampling. Each element is a virtual memory address inside ClickHouse server process."}, + {"size", std::make_shared(), "For trace types Memory, MemorySample or MemoryPeak is the amount of memory allocated, for other trace types is 0."}, + {"ptr", std::make_shared(), "The address of the allocated chunk."}, + {"event", std::make_shared(std::make_shared()), "For trace type ProfileEvent is the name of updated profile event, for other trace types is an empty string."}, + {"increment", std::make_shared(), "For trace type ProfileEvent is the amount of increment of profile event, for other trace types is 0."}, }; } diff --git a/src/Interpreters/TraceLog.h b/src/Interpreters/TraceLog.h index f4cd29a7a2d..418b8d546a0 100644 --- a/src/Interpreters/TraceLog.h +++ b/src/Interpreters/TraceLog.h @@ -41,7 +41,6 @@ struct TraceLogElement static ColumnsDescription getColumnsDescription(); static NamesAndAliases getNamesAndAliases() { return {}; } void appendToBlock(MutableColumns & columns) const; - static const char * getCustomColumnList() { return nullptr; } }; class TraceLog : public SystemLog diff --git a/src/Interpreters/TransactionLog.cpp b/src/Interpreters/TransactionLog.cpp index 96c69536c9a..e0c5d01e153 100644 --- a/src/Interpreters/TransactionLog.cpp +++ b/src/Interpreters/TransactionLog.cpp @@ -407,8 +407,13 @@ CSN TransactionLog::commitTransaction(const MergeTreeTransactionPtr & txn, bool { Coordination::SimpleFaultInjection fault(fault_probability_before_commit, fault_probability_after_commit, "commit"); + Coordination::Requests requests; + requests.push_back(zkutil::makeCreateRequest(zookeeper_path_log + "/csn-", serializeTID(txn->tid), zkutil::CreateMode::PersistentSequential)); + /// Commit point - csn_path_created = current_zookeeper->create(zookeeper_path_log + "/csn-", serializeTID(txn->tid), zkutil::CreateMode::PersistentSequential); + auto res = current_zookeeper->multi(requests, /* check_session_valid */ true); + + csn_path_created = dynamic_cast(res.back().get())->path_created; } catch (const Coordination::Exception & e) { diff --git a/src/Interpreters/TransactionsInfoLog.cpp b/src/Interpreters/TransactionsInfoLog.cpp index 4a413439671..d13b31518d2 100644 --- a/src/Interpreters/TransactionsInfoLog.cpp +++ b/src/Interpreters/TransactionsInfoLog.cpp @@ -34,22 +34,22 @@ ColumnsDescription TransactionsInfoLogElement::getColumnsDescription() return ColumnsDescription { - {"hostname", std::make_shared(std::make_shared())}, - {"type", std::move(type_enum)}, - {"event_date", std::make_shared()}, - {"event_time", std::make_shared(6)}, - {"thread_id", std::make_shared()}, + {"hostname", std::make_shared(std::make_shared()), "The hostname where transaction was executed."}, + {"type", std::move(type_enum), "The type of the transaction. Possible values: Begin, Commit, Rollback, AddPart, LockPart, UnlockPart."}, + {"event_date", std::make_shared(), "Date of the entry."}, + {"event_time", std::make_shared(6), "Time of the entry"}, + {"thread_id", std::make_shared(), "The identifier of a thread."}, /// which thread? - {"query_id", std::make_shared()}, - {"tid", getTransactionIDDataType()}, - {"tid_hash", std::make_shared()}, + {"query_id", std::make_shared(), "The ID of a query executed in a scope of transaction."}, + {"tid", getTransactionIDDataType(), "The identifier of a transaction."}, + {"tid_hash", std::make_shared(), "The hash of the identifier."}, - {"csn", std::make_shared()}, + {"csn", std::make_shared(), "The Commit Sequence Number"}, - {"database", std::make_shared()}, - {"table", std::make_shared()}, - {"uuid", std::make_shared()}, - {"part", std::make_shared()}, + {"database", std::make_shared(), "The name of the database the transaction was executed against."}, + {"table", std::make_shared(), "The name of the table the transaction was executed against."}, + {"uuid", std::make_shared(), "The uuid of the table the transaction was executed against."}, + {"part", std::make_shared(), "The name of the part participated in the transaction."}, // ? }; } diff --git a/src/Interpreters/TransactionsInfoLog.h b/src/Interpreters/TransactionsInfoLog.h index 009d1b67474..e0d4e8b299a 100644 --- a/src/Interpreters/TransactionsInfoLog.h +++ b/src/Interpreters/TransactionsInfoLog.h @@ -43,7 +43,6 @@ struct TransactionsInfoLogElement static ColumnsDescription getColumnsDescription(); static NamesAndAliases getNamesAndAliases() { return {}; } void appendToBlock(MutableColumns & columns) const; - static const char * getCustomColumnList() { return nullptr; } void fillCommonFields(const TransactionInfoContext * context = nullptr); }; diff --git a/src/Interpreters/TranslateQualifiedNamesVisitor.cpp b/src/Interpreters/TranslateQualifiedNamesVisitor.cpp index 130ce2194fd..3de7e217e53 100644 --- a/src/Interpreters/TranslateQualifiedNamesVisitor.cpp +++ b/src/Interpreters/TranslateQualifiedNamesVisitor.cpp @@ -158,7 +158,7 @@ void TranslateQualifiedNamesMatcher::visit(ASTFunction & node, const ASTPtr &, D void TranslateQualifiedNamesMatcher::visit(const ASTQualifiedAsterisk & node, const ASTPtr &, Data & data) { if (!node.qualifier) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: qualified asterisk must have a qualifier"); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Qualified asterisk must have a qualifier"); /// @note it could contain table alias as table name. DatabaseAndTableWithAlias db_and_table(node.qualifier); diff --git a/src/Interpreters/TreeOptimizer.cpp b/src/Interpreters/TreeOptimizer.cpp index 9a311d20c28..b71a8e3681d 100644 --- a/src/Interpreters/TreeOptimizer.cpp +++ b/src/Interpreters/TreeOptimizer.cpp @@ -755,7 +755,8 @@ void TreeOptimizer::apply(ASTPtr & query, TreeRewriterResult & result, rewriteSumFunctionWithSumAndCount(query, tables_with_columns); /// Rewrite date filters to avoid the calls of converters such as toYear, toYYYYMM, etc. - optimizeDateFilters(select_query, tables_with_columns, context); + if (settings.optimize_time_filter_with_preimage) + optimizeDateFilters(select_query, tables_with_columns, context); /// GROUP BY injective function elimination. optimizeGroupBy(select_query, context); diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp index ecd021328e7..5588fc55a64 100644 --- a/src/Interpreters/TreeRewriter.cpp +++ b/src/Interpreters/TreeRewriter.cpp @@ -56,10 +56,13 @@ #include #include #include +#include #include #include +#include + namespace DB { @@ -262,8 +265,7 @@ struct ExistsExpressionData select_with_union_query->list_of_selects->children.push_back(std::move(select_query)); select_with_union_query->children.push_back(select_with_union_query->list_of_selects); - auto new_subquery = std::make_shared(); - new_subquery->children.push_back(select_with_union_query); + auto new_subquery = std::make_shared(std::move(select_with_union_query)); auto function = makeASTFunction("in", std::make_shared(1u), new_subquery); func = *function; @@ -777,7 +779,7 @@ void expandGroupByAll(ASTSelectQuery * select_query) select_query->setExpression(ASTSelectQuery::Expression::GROUP_BY, group_expression_list); } -void expandOrderByAll(ASTSelectQuery * select_query) +void expandOrderByAll(ASTSelectQuery * select_query, [[maybe_unused]] const TablesWithColumns & tables_with_columns) { auto * all_elem = select_query->orderBy()->children[0]->as(); if (!all_elem) @@ -787,16 +789,32 @@ void expandOrderByAll(ASTSelectQuery * select_query) for (const auto & expr : select_query->select()->children) { + /// Detect and reject ambiguous statements: + /// E.g. for a table with columns "all", "a", "b": + /// - SELECT all, a, b ORDER BY all; -- should we sort by all columns in SELECT or by column "all"? + /// - SELECT a, b AS all ORDER BY all; -- like before but "all" as alias + /// - SELECT func(...) AS all ORDER BY all; -- like before but "all" as function + /// - SELECT a, b ORDER BY all; -- tricky in other way: does the user want to sort by columns in SELECT clause or by not SELECTed column "all"? + + static const String all = "all"; if (auto * identifier = expr->as(); identifier != nullptr) - if (Poco::toUpper(identifier->name()) == "ALL" || Poco::toUpper(identifier->alias) == "ALL") + if (boost::iequals(identifier->name(), all) || boost::iequals(identifier->alias, all)) throw Exception(ErrorCodes::UNEXPECTED_EXPRESSION, "Cannot use ORDER BY ALL to sort a column with name 'all', please disable setting `enable_order_by_all` and try again"); if (auto * function = expr->as(); function != nullptr) - if (Poco::toUpper(function->alias) == "ALL") + if (boost::iequals(function->alias, all)) throw Exception(ErrorCodes::UNEXPECTED_EXPRESSION, "Cannot use ORDER BY ALL to sort a column with name 'all', please disable setting `enable_order_by_all` and try again"); + for (const auto & table_with_columns : tables_with_columns) + { + const auto & columns = table_with_columns.columns; + if (columns.containsCaseInsensitive(all)) + throw Exception(ErrorCodes::UNEXPECTED_EXPRESSION, + "Cannot use ORDER BY ALL to sort a column with name 'all', please disable setting `enable_order_by_all` and try again"); + } + auto elem = std::make_shared(); elem->direction = all_elem->direction; elem->nulls_direction = all_elem->nulls_direction; @@ -973,8 +991,7 @@ void TreeRewriterResult::collectSourceColumns(bool add_special) { auto options = GetColumnsOptions(add_special ? GetColumnsOptions::All : GetColumnsOptions::AllPhysical); options.withExtendedObjects(); - if (storage->supportsSubcolumns()) - options.withSubcolumns(); + options.withSubcolumns(storage->supportsSubcolumns()); auto columns_from_storage = storage_snapshot->getColumns(options); @@ -984,8 +1001,7 @@ void TreeRewriterResult::collectSourceColumns(bool add_special) source_columns.insert(source_columns.end(), columns_from_storage.begin(), columns_from_storage.end()); auto metadata_snapshot = storage->getInMemoryMetadataPtr(); - auto metadata_column_descriptions = metadata_snapshot->getColumns(); - source_columns_ordinary = metadata_column_descriptions.getOrdinary(); + source_columns_ordinary = metadata_snapshot->getColumns().getOrdinary(); } source_columns_set = removeDuplicateColumns(source_columns); @@ -1092,16 +1108,16 @@ bool TreeRewriterResult::collectUsedColumns(const ASTPtr & query, bool is_select const auto & partition_desc = storage_snapshot->metadata->getPartitionKey(); if (partition_desc.expression) { - auto partition_source_columns = partition_desc.expression->getRequiredColumns(); - partition_source_columns.push_back("_part"); - partition_source_columns.push_back("_partition_id"); - partition_source_columns.push_back("_part_uuid"); - partition_source_columns.push_back("_partition_value"); + auto partition_columns = partition_desc.expression->getRequiredColumns(); + NameSet partition_columns_set(partition_columns.begin(), partition_columns.end()); + + const auto & parititon_virtuals = MergeTreeData::virtuals_useful_for_filter; + partition_columns_set.insert(parititon_virtuals.begin(), parititon_virtuals.end()); + optimize_trivial_count = true; for (const auto & required_column : required) { - if (std::find(partition_source_columns.begin(), partition_source_columns.end(), required_column) - == partition_source_columns.end()) + if (!partition_columns_set.contains(required_column)) { optimize_trivial_count = false; break; @@ -1112,7 +1128,7 @@ bool TreeRewriterResult::collectUsedColumns(const ASTPtr & query, bool is_select NameSet unknown_required_source_columns = required; - for (NamesAndTypesList::iterator it = source_columns.begin(); it != source_columns.end();) + for (auto it = source_columns.begin(); it != source_columns.end();) { const String & column_name = it->name; unknown_required_source_columns.erase(column_name); @@ -1126,32 +1142,23 @@ bool TreeRewriterResult::collectUsedColumns(const ASTPtr & query, bool is_select has_virtual_shard_num = false; /// If there are virtual columns among the unknown columns. Remove them from the list of unknown and add /// in columns list, so that when further processing they are also considered. - if (storage) + if (storage_snapshot) { - const auto storage_virtuals = storage->getVirtuals(); + const auto & virtuals = storage_snapshot->virtual_columns; for (auto it = unknown_required_source_columns.begin(); it != unknown_required_source_columns.end();) { - auto column = storage_virtuals.tryGetByName(*it); - if (column) + if (auto column = virtuals->tryGet(*it)) { source_columns.push_back(*column); it = unknown_required_source_columns.erase(it); } else - ++it; - } - - if (is_remote_storage) - { - for (const auto & name_type : storage_virtuals) { - if (name_type.name == "_shard_num" && storage->isVirtualColumn("_shard_num", storage_snapshot->getMetadataForQuery())) - { - has_virtual_shard_num = true; - break; - } + ++it; } } + + has_virtual_shard_num = is_remote_storage && storage->isVirtualColumn("_shard_num", storage_snapshot->getMetadataForQuery()) && virtuals->has("_shard_num"); } /// Collect missed object subcolumns @@ -1325,7 +1332,7 @@ TreeRewriterResultPtr TreeRewriter::analyzeSelect( // expand ORDER BY ALL if (settings.enable_order_by_all && select_query->order_by_all) - expandOrderByAll(select_query); + expandOrderByAll(select_query, tables_with_columns); /// Remove unneeded columns according to 'required_result_columns'. /// Leave all selected columns in case of DISTINCT; columns that contain arrayJoin function inside. diff --git a/src/Interpreters/ZooKeeperLog.cpp b/src/Interpreters/ZooKeeperLog.cpp index 9cc31edfe56..6f6d4568064 100644 --- a/src/Interpreters/ZooKeeperLog.cpp +++ b/src/Interpreters/ZooKeeperLog.cpp @@ -122,49 +122,49 @@ ColumnsDescription ZooKeeperLogElement::getColumnsDescription() return ColumnsDescription { - {"hostname", std::make_shared(std::make_shared())}, - {"type", std::move(type_enum)}, - {"event_date", std::make_shared()}, - {"event_time", std::make_shared(6)}, - {"thread_id", std::make_shared()}, - {"query_id", std::make_shared()}, - {"address", DataTypeFactory::instance().get("IPv6")}, - {"port", std::make_shared()}, - {"session_id", std::make_shared()}, - {"duration_ms", std::make_shared()}, + {"hostname", std::make_shared(std::make_shared()), "Hostname of the server executing the query."}, + {"type", std::move(type_enum), "Event type in the ZooKeeper client. Can have one of the following values: Request — The request has been sent, Response — The response was received, Finalize — The connection is lost, no response was received."}, + {"event_date", std::make_shared(), "The date when the event happened."}, + {"event_time", std::make_shared(6), "The date and time when the event happened."}, + {"thread_id", std::make_shared(), "The ID of the thread executed this request."}, + {"query_id", std::make_shared(), "The ID of a query in scope of which this request was executed."}, + {"address", DataTypeFactory::instance().get("IPv6"), "IP address of ZooKeeper server that was used to make the request."}, + {"port", std::make_shared(), "The port of ZooKeeper server that was used to make the request."}, + {"session_id", std::make_shared(), "The session ID that the ZooKeeper server sets for each connection."}, + {"duration_ms", std::make_shared(), "The time taken by ZooKeeper to execute the request."}, - {"xid", std::make_shared()}, - {"has_watch", std::make_shared()}, - {"op_num", op_num_enum}, - {"path", std::make_shared()}, + {"xid", std::make_shared(), "The ID of the request within the session. This is usually a sequential request number. It is the same for the request row and the paired response/finalize row."}, + {"has_watch", std::make_shared(), "The request whether the watch has been set."}, + {"op_num", op_num_enum, "The type of request or response."}, + {"path", std::make_shared(), "The path to the ZooKeeper node specified in the request, or an empty string if the request not requires specifying a path."}, - {"data", std::make_shared()}, + {"data", std::make_shared(), "The data written to the ZooKeeper node (for the SET and CREATE requests — what the request wanted to write, for the response to the GET request — what was read) or an empty string."}, - {"is_ephemeral", std::make_shared()}, - {"is_sequential", std::make_shared()}, + {"is_ephemeral", std::make_shared(), "Is the ZooKeeper node being created as an ephemeral."}, + {"is_sequential", std::make_shared(), "Is the ZooKeeper node being created as an sequential."}, - {"version", std::make_shared(std::make_shared())}, + {"version", std::make_shared(std::make_shared()), "The version of the ZooKeeper node that the request expects when executing. This is supported for CHECK, SET, REMOVE requests (is relevant -1 if the request does not check the version or NULL for other requests that do not support version checking)."}, - {"requests_size", std::make_shared()}, - {"request_idx", std::make_shared()}, + {"requests_size", std::make_shared(), "The number of requests included in the multi request (this is a special request that consists of several consecutive ordinary requests and executes them atomically). All requests included in multi request will have the same xid."}, + {"request_idx", std::make_shared(), "The number of the request included in multi request (for multi request — 0, then in order from 1)."}, - {"zxid", std::make_shared()}, - {"error", std::make_shared(error_enum)}, + {"zxid", std::make_shared(), "ZooKeeper transaction ID. The serial number issued by the ZooKeeper server in response to a successfully executed request (0 if the request was not executed/returned an error/the client does not know whether the request was executed)."}, + {"error", std::make_shared(error_enum), "Error code. Can have many values, here are just some of them: ZOK — The request was executed successfully, ZCONNECTIONLOSS — The connection was lost, ZOPERATIONTIMEOUT — The request execution timeout has expired, ZSESSIONEXPIRED — The session has expired, NULL — The request is completed."}, - {"watch_type", std::make_shared(watch_type_enum)}, - {"watch_state", std::make_shared(watch_state_enum)}, + {"watch_type", std::make_shared(watch_type_enum), "The type of the watch event (for responses with op_num = Watch), for the remaining responses: NULL."}, + {"watch_state", std::make_shared(watch_state_enum), "The status of the watch event (for responses with op_num = Watch), for the remaining responses: NULL."}, - {"path_created", std::make_shared()}, + {"path_created", std::make_shared(), "The path to the created ZooKeeper node (for responses to the CREATE request), may differ from the path if the node is created as a sequential."}, - {"stat_czxid", std::make_shared()}, - {"stat_mzxid", std::make_shared()}, - {"stat_pzxid", std::make_shared()}, - {"stat_version", std::make_shared()}, - {"stat_cversion", std::make_shared()}, - {"stat_dataLength", std::make_shared()}, - {"stat_numChildren", std::make_shared()}, + {"stat_czxid", std::make_shared(), "The zxid of the change that caused this ZooKeeper node to be created."}, + {"stat_mzxid", std::make_shared(), "The zxid of the change that last modified this ZooKeeper node."}, + {"stat_pzxid", std::make_shared(), "The transaction ID of the change that last modified children of this ZooKeeper node."}, + {"stat_version", std::make_shared(), "The number of changes to the data of this ZooKeeper node."}, + {"stat_cversion", std::make_shared(), "The number of changes to the children of this ZooKeeper node."}, + {"stat_dataLength", std::make_shared(), "The length of the data field of this ZooKeeper node."}, + {"stat_numChildren", std::make_shared(), "The number of children of this ZooKeeper node."}, - {"children", std::make_shared(std::make_shared())}, + {"children", std::make_shared(std::make_shared()), "The list of child ZooKeeper nodes (for responses to LIST request)."}, }; } diff --git a/src/Interpreters/ZooKeeperLog.h b/src/Interpreters/ZooKeeperLog.h index 90d36d22a59..0d9dc618eb4 100644 --- a/src/Interpreters/ZooKeeperLog.h +++ b/src/Interpreters/ZooKeeperLog.h @@ -72,7 +72,6 @@ struct ZooKeeperLogElement static ColumnsDescription getColumnsDescription(); static NamesAndAliases getNamesAndAliases() { return {}; } void appendToBlock(MutableColumns & columns) const; - static const char * getCustomColumnList() { return nullptr; } }; class ZooKeeperLog : public SystemLog diff --git a/src/Interpreters/convertFieldToType.cpp b/src/Interpreters/convertFieldToType.cpp index c3b8405659a..25085ff4823 100644 --- a/src/Interpreters/convertFieldToType.cpp +++ b/src/Interpreters/convertFieldToType.cpp @@ -16,6 +16,7 @@ #include #include #include +#include #include @@ -251,8 +252,21 @@ Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const ID if (which_type.isDateTime64() && src.getType() == Field::Types::Decimal64) { - /// Already in needed type. - return src; + const auto & from_type = src.get(); + const auto & to_type = static_cast(type); + + const auto scale_from = from_type.getScale(); + const auto scale_to = to_type.getScale(); + const auto scale_multiplier_diff = scale_from > scale_to ? from_type.getScaleMultiplier() / to_type.getScaleMultiplier() : to_type.getScaleMultiplier() / from_type.getScaleMultiplier(); + + if (scale_multiplier_diff == 1) /// Already in needed type. + return src; + + /// in case if we need to make DateTime64(a) from DateTime64(b), a != b, we need to convert datetime value to the right scale + const UInt64 value = scale_from > scale_to ? from_type.getValue().value / scale_multiplier_diff : from_type.getValue().value * scale_multiplier_diff; + return DecimalField( + DecimalUtils::decimalFromComponentsWithMultiplier(value, 0, 1), + scale_to); } /// For toDate('xxx') in 1::Int64, we CAST `src` to UInt64, which may @@ -487,16 +501,30 @@ Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const ID return object; } } + else if (const DataTypeVariant * type_variant = typeid_cast(&type)) + { + /// If we have type hint and Variant contains such type, no need to convert field. + if (from_type_hint && type_variant->tryGetVariantDiscriminator(*from_type_hint)) + return src; + + /// Create temporary column and check if we can insert this field to the variant. + /// If we can insert, no need to convert anything. + auto col = type_variant->createColumn(); + if (col->tryInsert(src)) + return src; + } /// Conversion from string by parsing. if (src.getType() == Field::Types::String) { /// Promote data type to avoid overflows. Note that overflows in the largest data type are still possible. /// But don't promote Float32, since we want to keep the exact same value + /// Also don't promote domain types (like bool) because we would otherwise use the serializer of the promoted type (e.g. UInt64 for + /// bool, which does not allow 'true' and 'false' as input values) const IDataType * type_to_parse = &type; DataTypePtr holder; - if (type.canBePromoted() && !which_type.isFloat32()) + if (type.canBePromoted() && !which_type.isFloat32() && !type.getCustomSerialization()) { holder = type.promoteNumericType(); type_to_parse = holder.get(); diff --git a/src/Interpreters/evaluateConstantExpression.cpp b/src/Interpreters/evaluateConstantExpression.cpp index 00d36750cc1..b5c3e00e299 100644 --- a/src/Interpreters/evaluateConstantExpression.cpp +++ b/src/Interpreters/evaluateConstantExpression.cpp @@ -106,7 +106,7 @@ std::optional evaluateConstantExpressionImpl(c if (result_column->empty()) throw Exception(ErrorCodes::LOGICAL_ERROR, - "Logical error: empty result column after evaluation " + "Empty result column after evaluation " "of constant expression for IN, VALUES, or LIMIT, or aggregate function parameter, or a table function argument"); /// Expressions like rand() or now() are not constant diff --git a/src/Interpreters/examples/hash_map_string.cpp b/src/Interpreters/examples/hash_map_string.cpp index f3ec104a5f7..f30a9a4cac1 100644 --- a/src/Interpreters/examples/hash_map_string.cpp +++ b/src/Interpreters/examples/hash_map_string.cpp @@ -20,9 +20,7 @@ #include #include -#if defined(__clang__) - #pragma clang diagnostic ignored "-Wgnu-anonymous-struct" -#endif +#pragma clang diagnostic ignored "-Wgnu-anonymous-struct" struct CompactStringRef diff --git a/src/Interpreters/executeDDLQueryOnCluster.cpp b/src/Interpreters/executeDDLQueryOnCluster.cpp index 6122ec6180a..df8236c11f4 100644 --- a/src/Interpreters/executeDDLQueryOnCluster.cpp +++ b/src/Interpreters/executeDDLQueryOnCluster.cpp @@ -252,7 +252,8 @@ BlockIO getDistributedDDLStatus(const String & node_path, const DDLLogEntry & en auto source = std::make_shared(node_path, entry, context, hosts_to_wait); io.pipeline = QueryPipeline(std::move(source)); - if (context->getSettingsRef().distributed_ddl_output_mode == DistributedDDLOutputMode::NONE) + if (context->getSettingsRef().distributed_ddl_output_mode == DistributedDDLOutputMode::NONE || + context->getSettingsRef().distributed_ddl_output_mode == DistributedDDLOutputMode::NONE_ONLY_ACTIVE) io.pipeline.complete(std::make_shared(io.pipeline.getHeader())); return io; @@ -264,7 +265,9 @@ Block DDLQueryStatusSource::getSampleBlock(ContextPtr context_, bool hosts_to_wa auto maybe_make_nullable = [&](const DataTypePtr & type) -> DataTypePtr { - if (output_mode == DistributedDDLOutputMode::THROW || output_mode == DistributedDDLOutputMode::NONE) + if (output_mode == DistributedDDLOutputMode::THROW || + output_mode == DistributedDDLOutputMode::NONE || + output_mode == DistributedDDLOutputMode::NONE_ONLY_ACTIVE) return type; return std::make_shared(type); }; @@ -313,14 +316,15 @@ DDLQueryStatusSource::DDLQueryStatusSource( { auto output_mode = context->getSettingsRef().distributed_ddl_output_mode; throw_on_timeout = output_mode == DistributedDDLOutputMode::THROW || output_mode == DistributedDDLOutputMode::THROW_ONLY_ACTIVE - || output_mode == DistributedDDLOutputMode::NONE; + || output_mode == DistributedDDLOutputMode::NONE || output_mode == DistributedDDLOutputMode::NONE_ONLY_ACTIVE; if (hosts_to_wait) { waiting_hosts = NameSet(hosts_to_wait->begin(), hosts_to_wait->end()); is_replicated_database = true; only_running_hosts = output_mode == DistributedDDLOutputMode::THROW_ONLY_ACTIVE || - output_mode == DistributedDDLOutputMode::NULL_STATUS_ON_TIMEOUT_ONLY_ACTIVE; + output_mode == DistributedDDLOutputMode::NULL_STATUS_ON_TIMEOUT_ONLY_ACTIVE || + output_mode == DistributedDDLOutputMode::NONE_ONLY_ACTIVE; } else { @@ -442,14 +446,16 @@ Chunk DDLQueryStatusSource::generate() size_t num_unfinished_hosts = waiting_hosts.size() - num_hosts_finished; size_t num_active_hosts = current_active_hosts.size(); - constexpr auto msg_format = "Watching task {} is executing longer than distributed_ddl_task_timeout (={}) seconds. " - "There are {} unfinished hosts ({} of them are currently executing the task), " - "they are going to execute the query in background"; + constexpr auto msg_format = "Distributed DDL task {} is not finished on {} of {} hosts " + "({} of them are currently executing the task, {} are inactive). " + "They are going to execute the query in background. Was waiting for {} seconds{}"; + if (throw_on_timeout) { if (!first_exception) first_exception = std::make_unique(Exception(ErrorCodes::TIMEOUT_EXCEEDED, - msg_format, node_path, timeout_seconds, num_unfinished_hosts, num_active_hosts)); + msg_format, node_path, num_unfinished_hosts, waiting_hosts.size(), num_active_hosts, offline_hosts.size(), + watch.elapsedSeconds(), stop_waiting_offline_hosts ? "" : ", which is longer than distributed_ddl_task_timeout")); /// For Replicated database print a list of unfinished hosts as well. Will return empty block on next iteration. if (is_replicated_database) @@ -457,7 +463,8 @@ Chunk DDLQueryStatusSource::generate() return {}; } - LOG_INFO(log, msg_format, node_path, timeout_seconds, num_unfinished_hosts, num_active_hosts); + LOG_INFO(log, msg_format, node_path, num_unfinished_hosts, waiting_hosts.size(), num_active_hosts, offline_hosts.size(), + watch.elapsedSeconds(), stop_waiting_offline_hosts ? "" : "which is longer than distributed_ddl_task_timeout"); return generateChunkWithUnfinishedHosts(); } diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index dfa9cc056ee..88021038ebb 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include @@ -102,9 +103,15 @@ namespace ErrorCodes extern const int NOT_IMPLEMENTED; extern const int QUERY_WAS_CANCELLED; extern const int INCORRECT_DATA; + extern const int SYNTAX_ERROR; extern const int SUPPORT_IS_DISABLED; + extern const int INCORRECT_QUERY; } +namespace FailPoints +{ + extern const char execute_query_calling_empty_set_result_func_on_exception[]; +} static void checkASTSizeLimits(const IAST & ast, const Settings & settings) { @@ -599,6 +606,9 @@ void logExceptionBeforeStart( if (auto txn = context->getCurrentTransaction()) elem.tid = txn->tid; + if (settings.log_query_settings) + elem.query_settings = std::make_shared(context->getSettingsRef()); + if (settings.calculate_text_stack_trace) setExceptionStackTrace(elem); logException(context, elem); @@ -643,6 +653,36 @@ static void setQuerySpecificSettings(ASTPtr & ast, ContextMutablePtr context) } } +void validateAnalyzerSettings(ASTPtr ast, bool context_value) +{ + if (ast->as()) + return; + + bool top_level = context_value; + + std::vector nodes_to_process{ ast }; + while (!nodes_to_process.empty()) + { + auto node = nodes_to_process.back(); + nodes_to_process.pop_back(); + + if (auto * set_query = node->as()) + { + if (auto * value = set_query->changes.tryGet("allow_experimental_analyzer")) + { + if (top_level != value->safeGet()) + throw Exception(ErrorCodes::INCORRECT_QUERY, "Setting 'allow_experimental_analyzer' is changed in the subquery. Top level value: {}", top_level); + } + } + + for (auto child : node->children) + { + if (child) + nodes_to_process.push_back(std::move(child)); + } + } +} + static std::tuple executeQueryImpl( const char * begin, const char * end, @@ -704,7 +744,9 @@ static std::tuple executeQueryImpl( { if (settings.dialect == Dialect::kusto && !internal) { - throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Kusto dialect is disabled until these two bugs will be fixed: https://github.com/ClickHouse/ClickHouse/issues/59037 and https://github.com/ClickHouse/ClickHouse/issues/59036"); + ParserKQLStatement parser(end, settings.allow_settings_after_format_in_insert); + /// TODO: parser should fail early when max_query_size limit is reached. + ast = parseKQLQuery(parser, begin, end, "", max_query_size, settings.max_parser_depth); } else if (settings.dialect == Dialect::prql && !internal) { @@ -716,6 +758,43 @@ static std::tuple executeQueryImpl( ParserQuery parser(end, settings.allow_settings_after_format_in_insert); /// TODO: parser should fail early when max_query_size limit is reached. ast = parseQuery(parser, begin, end, "", max_query_size, settings.max_parser_depth); + +#ifndef NDEBUG + /// Verify that AST formatting is consistent: + /// If you format AST, parse it back, and format it again, you get the same string. + + String formatted1 = ast->formatWithPossiblyHidingSensitiveData(0, true, true); + + /// The query can become more verbose after formatting, so: + size_t new_max_query_size = max_query_size > 0 ? (1000 + 2 * max_query_size) : 0; + + ASTPtr ast2; + try + { + ast2 = parseQuery(parser, + formatted1.data(), + formatted1.data() + formatted1.size(), + "", new_max_query_size, settings.max_parser_depth); + } + catch (const Exception & e) + { + if (e.code() == ErrorCodes::SYNTAX_ERROR) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Inconsistent AST formatting: the query:\n{}\ncannot parse.", + formatted1); + else + throw; + } + + chassert(ast2); + + String formatted2 = ast2->formatWithPossiblyHidingSensitiveData(0, true, true); + + if (formatted1 != formatted2) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Inconsistent AST formatting: the query:\n{}\nWas parsed and formatted back as:\n{}", + formatted1, formatted2); +#endif } const char * query_end = end; @@ -814,6 +893,7 @@ static std::tuple executeQueryImpl( /// Interpret SETTINGS clauses as early as possible (before invoking the corresponding interpreter), /// to allow settings to take effect. InterpreterSetQuery::applySettingsFromQuery(ast, context); + validateAnalyzerSettings(ast, context->getSettingsRef().allow_experimental_analyzer); if (auto * insert_query = ast->as()) insert_query->tail = istr; @@ -944,6 +1024,21 @@ static std::tuple executeQueryImpl( if (settings.implicit_transaction && settings.throw_on_unsupported_query_inside_transaction) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Async inserts with 'implicit_transaction' are not supported"); + /// Let's agree on terminology and say that a mini-INSERT is an asynchronous INSERT + /// which typically contains not a lot of data inside and a big-INSERT in an INSERT + /// which was formed by concatenating several mini-INSERTs together. + /// In case when the client had to retry some mini-INSERTs then they will be properly deduplicated + /// by the source tables. This functionality is controlled by a setting `async_insert_deduplicate`. + /// But then they will be glued together into a block and pushed through a chain of Materialized Views if any. + /// The process of forming such blocks is not deteministic so each time we retry mini-INSERTs the resulting + /// block may be concatenated differently. + /// That's why deduplication in dependent Materialized Views doesn't make sense in presence of async INSERTs. + if (settings.throw_if_deduplication_in_dependent_materialized_views_enabled_with_async_insert && + settings.deduplicate_blocks_in_dependent_materialized_views) + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, + "Deduplication is dependent materialized view cannot work together with async inserts. "\ + "Please disable eiher `deduplicate_blocks_in_dependent_materialized_views` or `async_insert` setting."); + quota = context->getQuota(); if (quota) { @@ -1120,7 +1215,7 @@ static std::tuple executeQueryImpl( std::chrono::system_clock::now() + std::chrono::seconds(settings.query_cache_ttl), settings.query_cache_compress_entries); - const size_t num_query_runs = query_cache->recordQueryRun(key); + const size_t num_query_runs = settings.query_cache_min_query_runs ? query_cache->recordQueryRun(key) : 1; /// try to avoid locking a mutex in recordQueryRun() if (num_query_runs <= settings.query_cache_min_query_runs) { LOG_TRACE(getLogger("QueryCache"), @@ -1359,7 +1454,7 @@ void executeQuery( BlockIO streams; OutputFormatPtr output_format; - auto update_format_for_exception_if_needed = [&]() + auto update_format_on_exception_if_needed = [&]() { if (!output_format) { @@ -1372,10 +1467,19 @@ void executeQuery( /// Force an update of the headers before we start writing result_details.content_type = output_format->getContentType(); result_details.format = format_name; + + fiu_do_on(FailPoints::execute_query_calling_empty_set_result_func_on_exception, { + // it will throw std::bad_function_call + set_result_details = nullptr; + set_result_details(result_details); + }); + if (set_result_details) { - set_result_details(result_details); + /// reset set_result_details func to avoid calling in SCOPE_EXIT() + auto set_result_details_copy = set_result_details; set_result_details = nullptr; + set_result_details_copy(result_details); } } } @@ -1395,7 +1499,7 @@ void executeQuery( { if (handle_exception_in_output_format) { - update_format_for_exception_if_needed(); + update_format_on_exception_if_needed(); if (output_format) handle_exception_in_output_format(*output_format); } @@ -1496,13 +1600,17 @@ void executeQuery( } catch (...) { + /// first execute on exception callback, it includes updating query_log + /// otherwise closing record ('ExceptionWhileProcessing') can be not appended in query_log + /// due to possible exceptions in functions called below (passed as parameter here) + streams.onException(); + if (handle_exception_in_output_format) { - update_format_for_exception_if_needed(); + update_format_on_exception_if_needed(); if (output_format) handle_exception_in_output_format(*output_format); } - streams.onException(); throw; } diff --git a/src/Interpreters/fuzzers/execute_query_fuzzer.cpp b/src/Interpreters/fuzzers/execute_query_fuzzer.cpp index 6f84a60f2af..edff202d547 100644 --- a/src/Interpreters/fuzzers/execute_query_fuzzer.cpp +++ b/src/Interpreters/fuzzers/execute_query_fuzzer.cpp @@ -3,7 +3,7 @@ #include #include "Processors/Executors/PullingPipelineExecutor.h" -#include +#include #include #include #include diff --git a/src/Interpreters/getHeaderForProcessingStage.cpp b/src/Interpreters/getHeaderForProcessingStage.cpp index d16e01ef2d2..06c5d424d2f 100644 --- a/src/Interpreters/getHeaderForProcessingStage.cpp +++ b/src/Interpreters/getHeaderForProcessingStage.cpp @@ -98,23 +98,7 @@ Block getHeaderForProcessingStage( case QueryProcessingStage::FetchColumns: { Block header = storage_snapshot->getSampleBlockForColumns(column_names); - - if (query_info.prewhere_info) - { - auto & prewhere_info = *query_info.prewhere_info; - - if (prewhere_info.row_level_filter) - { - header = prewhere_info.row_level_filter->updateHeader(std::move(header)); - header.erase(prewhere_info.row_level_column_name); - } - - if (prewhere_info.prewhere_actions) - header = prewhere_info.prewhere_actions->updateHeader(std::move(header)); - - if (prewhere_info.remove_prewhere_column) - header.erase(prewhere_info.prewhere_column_name); - } + header = SourceStepWithFilter::applyPrewhereActions(header, query_info.prewhere_info); return header; } case QueryProcessingStage::WithMergeableState: @@ -137,7 +121,12 @@ Block getHeaderForProcessingStage( auto & table_expression_data = query_info.planner_context->getTableExpressionDataOrThrow(left_table_expression); const auto & query_context = query_info.planner_context->getQueryContext(); - auto columns = table_expression_data.getColumns(); + + NamesAndTypes columns; + const auto & column_name_to_column = table_expression_data.getColumnNameToColumn(); + for (const auto & column_name : table_expression_data.getSelectedColumnsNames()) + columns.push_back(column_name_to_column.at(column_name)); + auto new_query_node = buildSubqueryToReadColumnsFromTableExpression(columns, left_table_expression, query_context); query = new_query_node->toAST(); } @@ -153,7 +142,9 @@ Block getHeaderForProcessingStage( if (context->getSettingsRef().allow_experimental_analyzer) { - auto storage = std::make_shared(storage_snapshot->storage.getStorageID(), storage_snapshot->metadata->getColumns()); + auto storage = std::make_shared(storage_snapshot->storage.getStorageID(), + storage_snapshot->getAllColumnsDescription(), + storage_snapshot); InterpreterSelectQueryAnalyzer interpreter(query, context, storage, SelectQueryOptions(processed_stage).analyze()); result = interpreter.getSampleBlock(); } @@ -167,8 +158,7 @@ Block getHeaderForProcessingStage( return result; } } - throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical Error: unknown processed stage."); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown processed stage."); } } - diff --git a/src/Interpreters/getTableExpressions.cpp b/src/Interpreters/getTableExpressions.cpp index 70e38526648..2853be4c05e 100644 --- a/src/Interpreters/getTableExpressions.cpp +++ b/src/Interpreters/getTableExpressions.cpp @@ -99,7 +99,7 @@ static NamesAndTypesList getColumnsFromTableExpression( names_and_type_list = columns.getOrdinary(); materialized = columns.getMaterialized(); aliases = columns.getAliases(); - virtuals = function_storage->getVirtuals(); + virtuals = function_storage->getVirtualsList(); } else if (table_expression.database_and_table_name) { @@ -110,7 +110,7 @@ static NamesAndTypesList getColumnsFromTableExpression( names_and_type_list = columns.getOrdinary(); materialized = columns.getMaterialized(); aliases = columns.getAliases(); - virtuals = table->getVirtuals(); + virtuals = table->getVirtualsList(); } return names_and_type_list; diff --git a/src/Interpreters/inplaceBlockConversions.cpp b/src/Interpreters/inplaceBlockConversions.cpp index fd8f5b154c4..239cce5b427 100644 --- a/src/Interpreters/inplaceBlockConversions.cpp +++ b/src/Interpreters/inplaceBlockConversions.cpp @@ -20,7 +20,6 @@ #include #include #include -#include namespace DB @@ -280,7 +279,7 @@ void fillMissingColumns( const NamesAndTypesList & requested_columns, const NamesAndTypesList & available_columns, const NameSet & partially_read_columns, - StorageMetadataPtr metadata_snapshot, size_t block_number) + StorageMetadataPtr metadata_snapshot) { size_t num_columns = requested_columns.size(); if (num_columns != res_columns.size()) @@ -359,14 +358,9 @@ void fillMissingColumns( } else { - if (requested_column->name == BlockNumberColumn::name) - res_columns[i] = type->createColumnConst(num_rows, block_number)->convertToFullColumnIfConst(); - else - /// We must turn a constant column into a full column because the interpreter could infer - /// that it is constant everywhere but in some blocks (from other parts) it can be a full column. - res_columns[i] = type->createColumnConstWithDefaultValue(num_rows)->convertToFullColumnIfConst(); - - + /// We must turn a constant column into a full column because the interpreter could infer + /// that it is constant everywhere but in some blocks (from other parts) it can be a full column. + res_columns[i] = type->createColumnConstWithDefaultValue(num_rows)->convertToFullColumnIfConst(); } } } diff --git a/src/Interpreters/inplaceBlockConversions.h b/src/Interpreters/inplaceBlockConversions.h index 7a13a75ec8b..bea44bf6db9 100644 --- a/src/Interpreters/inplaceBlockConversions.h +++ b/src/Interpreters/inplaceBlockConversions.h @@ -46,6 +46,6 @@ void fillMissingColumns( const NamesAndTypesList & requested_columns, const NamesAndTypesList & available_columns, const NameSet & partially_read_columns, - StorageMetadataPtr metadata_snapshot, size_t block_number = 0); + StorageMetadataPtr metadata_snapshot); } diff --git a/src/Interpreters/loadMetadata.cpp b/src/Interpreters/loadMetadata.cpp index 0b7a6dc92b0..2723eb37350 100644 --- a/src/Interpreters/loadMetadata.cpp +++ b/src/Interpreters/loadMetadata.cpp @@ -234,7 +234,7 @@ LoadTaskPtrs loadMetadata(ContextMutablePtr context, const String & default_data loaded_databases.insert({name, DatabaseCatalog::instance().getDatabase(name)}); } - auto mode = getLoadingStrictnessLevel(/* attach */ true, /* force_attach */ true, has_force_restore_data_flag); + auto mode = getLoadingStrictnessLevel(/* attach */ true, /* force_attach */ true, has_force_restore_data_flag, /*secondary*/ false); TablesLoader loader{context, std::move(loaded_databases), mode}; auto load_tasks = loader.loadTablesAsync(); auto startup_tasks = loader.startupTablesAsync(); diff --git a/src/Interpreters/parseColumnsListForTableFunction.cpp b/src/Interpreters/parseColumnsListForTableFunction.cpp index 551a883d093..78b72022a9a 100644 --- a/src/Interpreters/parseColumnsListForTableFunction.cpp +++ b/src/Interpreters/parseColumnsListForTableFunction.cpp @@ -1,76 +1,113 @@ +#include +#include +#include +#include +#include +#include +#include +#include #include #include #include -#include -#include -#include -#include -#include -#include - namespace DB { namespace ErrorCodes { - extern const int LOGICAL_ERROR; - extern const int SUSPICIOUS_TYPE_FOR_LOW_CARDINALITY; - extern const int ILLEGAL_COLUMN; +extern const int LOGICAL_ERROR; +extern const int SUSPICIOUS_TYPE_FOR_LOW_CARDINALITY; +extern const int ILLEGAL_COLUMN; } -void validateDataType(const DataTypePtr & type, const DataTypeValidationSettings & settings) +void validateDataType(const DataTypePtr & type_to_check, const DataTypeValidationSettings & settings) { - if (!settings.allow_suspicious_low_cardinality_types) + auto validate_callback = [&](const IDataType & data_type) { - if (const auto * lc_type = typeid_cast(type.get())) + if (!settings.allow_suspicious_low_cardinality_types) { - if (!isStringOrFixedString(*removeNullable(lc_type->getDictionaryType()))) - throw Exception( - ErrorCodes::SUSPICIOUS_TYPE_FOR_LOW_CARDINALITY, - "Creating columns of type {} is prohibited by default due to expected negative impact on performance. " - "It can be enabled with the \"allow_suspicious_low_cardinality_types\" setting.", - lc_type->getName()); + if (const auto * lc_type = typeid_cast(&data_type)) + { + if (!isStringOrFixedString(*removeNullable(lc_type->getDictionaryType()))) + throw Exception( + ErrorCodes::SUSPICIOUS_TYPE_FOR_LOW_CARDINALITY, + "Creating columns of type {} is prohibited by default due to expected negative impact on performance. " + "It can be enabled with the \"allow_suspicious_low_cardinality_types\" setting.", + lc_type->getName()); + } } - } - if (!settings.allow_experimental_object_type) - { - if (type->hasDynamicSubcolumns()) + if (!settings.allow_experimental_object_type) { - throw Exception( - ErrorCodes::ILLEGAL_COLUMN, - "Cannot create column with type '{}' because experimental Object type is not allowed. " - "Set setting allow_experimental_object_type = 1 in order to allow it", type->getName()); - } - } - - if (!settings.allow_suspicious_fixed_string_types) - { - auto basic_type = removeLowCardinalityAndNullable(type); - if (const auto * fixed_string = typeid_cast(basic_type.get())) - { - if (fixed_string->getN() > MAX_FIXEDSTRING_SIZE_WITHOUT_SUSPICIOUS) + if (data_type.hasDynamicSubcolumns()) + { throw Exception( ErrorCodes::ILLEGAL_COLUMN, - "Cannot create column with type '{}' because fixed string with size > {} is suspicious. " - "Set setting allow_suspicious_fixed_string_types = 1 in order to allow it", - type->getName(), - MAX_FIXEDSTRING_SIZE_WITHOUT_SUSPICIOUS); + "Cannot create column with type '{}' because experimental Object type is not allowed. " + "Set setting allow_experimental_object_type = 1 in order to allow it", + data_type.getName()); + } } - } - if (!settings.allow_experimental_variant_type) - { - if (isVariant(type)) + if (!settings.allow_suspicious_fixed_string_types) { - throw Exception( - ErrorCodes::ILLEGAL_COLUMN, - "Cannot create column with type '{}' because experimental Variant type is not allowed. " - "Set setting allow_experimental_variant_type = 1 in order to allow it", type->getName()); + if (const auto * fixed_string = typeid_cast(&data_type)) + { + if (fixed_string->getN() > MAX_FIXEDSTRING_SIZE_WITHOUT_SUSPICIOUS) + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, + "Cannot create column with type '{}' because fixed string with size > {} is suspicious. " + "Set setting allow_suspicious_fixed_string_types = 1 in order to allow it", + data_type.getName(), + MAX_FIXEDSTRING_SIZE_WITHOUT_SUSPICIOUS); + } } - } + + if (!settings.allow_experimental_variant_type) + { + if (isVariant(data_type)) + { + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, + "Cannot create column with type '{}' because experimental Variant type is not allowed. " + "Set setting allow_experimental_variant_type = 1 in order to allow it", + data_type.getName()); + } + } + + if (!settings.allow_suspicious_variant_types) + { + if (const auto * variant_type = typeid_cast(&data_type)) + { + const auto & variants = variant_type->getVariants(); + chassert(!variants.empty()); + for (size_t i = 0; i < variants.size() - 1; ++i) + { + for (size_t j = i + 1; j < variants.size(); ++j) + { + if (auto supertype = tryGetLeastSupertype(DataTypes{variants[i], variants[j]})) + { + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, + "Cannot create column with type '{}' because variants '{}' and '{}' have similar types and working with values " + "of these types may lead to ambiguity. " + "Consider using common single variant '{}' instead of these 2 variants or set setting allow_suspicious_variant_types = 1 " + "in order to allow it", + data_type.getName(), + variants[i]->getName(), + variants[j]->getName(), + supertype->getName()); + } + } + } + } + } + }; + + validate_callback(*type_to_check); + if (settings.validate_nested_types) + type_to_check->forEachChild(validate_callback); } ColumnsDescription parseColumnsListFromString(const std::string & structure, const ContextPtr & context) @@ -98,7 +135,8 @@ bool tryParseColumnsListFromString(const std::string & structure, ColumnsDescrip const char * start = structure.data(); const char * end = structure.data() + structure.size(); - ASTPtr columns_list_raw = tryParseQuery(parser, start, end, error, false, "columns declaration list", false, settings.max_query_size, settings.max_parser_depth); + ASTPtr columns_list_raw = tryParseQuery( + parser, start, end, error, false, "columns declaration list", false, settings.max_query_size, settings.max_parser_depth); if (!columns_list_raw) return false; diff --git a/src/Interpreters/parseColumnsListForTableFunction.h b/src/Interpreters/parseColumnsListForTableFunction.h index 1fbbfa4b12f..ffb59bfa457 100644 --- a/src/Interpreters/parseColumnsListForTableFunction.h +++ b/src/Interpreters/parseColumnsListForTableFunction.h @@ -19,6 +19,8 @@ struct DataTypeValidationSettings , allow_experimental_object_type(settings.allow_experimental_object_type) , allow_suspicious_fixed_string_types(settings.allow_suspicious_fixed_string_types) , allow_experimental_variant_type(settings.allow_experimental_variant_type) + , allow_suspicious_variant_types(settings.allow_suspicious_variant_types) + , validate_nested_types(settings.validate_experimental_and_suspicious_types_inside_nested_types) { } @@ -26,6 +28,8 @@ struct DataTypeValidationSettings bool allow_experimental_object_type = true; bool allow_suspicious_fixed_string_types = true; bool allow_experimental_variant_type = true; + bool allow_suspicious_variant_types = true; + bool validate_nested_types = true; }; void validateDataType(const DataTypePtr & type, const DataTypeValidationSettings & settings); diff --git a/src/Interpreters/processColumnTransformers.cpp b/src/Interpreters/processColumnTransformers.cpp index 2a704d4a937..5ef331eb119 100644 --- a/src/Interpreters/processColumnTransformers.cpp +++ b/src/Interpreters/processColumnTransformers.cpp @@ -32,7 +32,7 @@ ASTPtr processColumnTransformers( tables_with_columns[0].addHiddenColumns(columns.getMaterialized()); tables_with_columns[0].addHiddenColumns(columns.getAliases()); - tables_with_columns[0].addHiddenColumns(table->getVirtuals()); + tables_with_columns[0].addHiddenColumns(table->getVirtualsList()); NameSet source_columns_set; for (const auto & identifier : query_columns->children) diff --git a/src/Interpreters/tests/gtest_filecache.cpp b/src/Interpreters/tests/gtest_filecache.cpp index b596ccb0285..2679d1b8d18 100644 --- a/src/Interpreters/tests/gtest_filecache.cpp +++ b/src/Interpreters/tests/gtest_filecache.cpp @@ -245,7 +245,7 @@ void download(FileSegment & file_segment) ASSERT_EQ(file_segment.state(), State::DOWNLOADING); ASSERT_EQ(file_segment.getDownloadedSize(), 0); - ASSERT_TRUE(file_segment.reserve(file_segment.range().size())); + ASSERT_TRUE(file_segment.reserve(file_segment.range().size(), 1000)); download(cache_base_path, file_segment); ASSERT_EQ(file_segment.state(), State::DOWNLOADING); @@ -257,7 +257,7 @@ void assertDownloadFails(FileSegment & file_segment) { ASSERT_EQ(file_segment.getOrSetDownloader(), FileSegment::getCallerId()); ASSERT_EQ(file_segment.getDownloadedSize(), 0); - ASSERT_FALSE(file_segment.reserve(file_segment.range().size())); + ASSERT_FALSE(file_segment.reserve(file_segment.range().size(), 1000)); file_segment.complete(); } @@ -956,7 +956,7 @@ TEST_F(FileCacheTest, temporaryData) for (auto & segment : *some_data_holder) { ASSERT_TRUE(segment->getOrSetDownloader() == DB::FileSegment::getCallerId()); - ASSERT_TRUE(segment->reserve(segment->range().size())); + ASSERT_TRUE(segment->reserve(segment->range().size(), 1000)); download(*segment); segment->complete(); } diff --git a/src/Interpreters/tests/gtest_page_cache.cpp b/src/Interpreters/tests/gtest_page_cache.cpp new file mode 100644 index 00000000000..1e2688c0ca2 --- /dev/null +++ b/src/Interpreters/tests/gtest_page_cache.cpp @@ -0,0 +1,267 @@ +#include +#include +#include + +#ifdef OS_LINUX +#include +#endif + +using namespace DB; + +namespace ProfileEvents +{ + extern const Event PageCacheChunkMisses; + extern const Event PageCacheChunkShared; + extern const Event PageCacheChunkDataHits; + extern const Event PageCacheChunkDataPartialHits; + extern const Event PageCacheChunkDataMisses; +} + +#define CHECK(x) \ + do { \ + if (!(x)) \ + { \ + std::cerr << "check on line " << __LINE__ << " failed: " << #x << std::endl; \ + std::abort(); \ + } \ + } while (false) + +size_t estimateRAMSize() +{ +#ifdef OS_LINUX + struct sysinfo info; + int r = sysinfo(&info); + CHECK(r == 0); + return static_cast(info.totalram * info.mem_unit); +#else + return 128ul << 30; +#endif +} + +/// Do random reads and writes in PageCache from multiple threads, check that the data read matches the data written. +TEST(PageCache, DISABLED_Stress) +{ + /// There doesn't seem to be a reasonable way to simulate memory pressure or force the eviction of MADV_FREE-d pages. + /// So we actually map more virtual memory than we have RAM and fill it all up a few times. + /// This takes an eternity (a few minutes), but idk how else to hit MADV_FREE eviction. + /// Expect ~1 GB/s, bottlenecked by page faults. + size_t ram_size = estimateRAMSize(); + PageCache cache(2 << 20, 1 << 30, ram_size + ram_size / 10, /* use_madv_free */ true, /* use_huge_pages */ true); + + CHECK(cache.getResidentSetSize().page_cache_rss); + + const size_t num_keys = static_cast(cache.maxChunks() * 1.5); + const size_t pages_per_chunk = cache.chunkSize() / cache.pageSize(); + const size_t items_per_page = cache.pageSize() / 8; + + const size_t passes = 2; + const size_t step = 20; + const size_t num_threads = 20; + const size_t chunks_touched = num_keys * passes * num_threads / step; + std::atomic progress {0}; + std::atomic threads_finished {0}; + + std::atomic total_racing_writes {0}; + + auto thread_func = [&] + { + pcg64 rng(randomSeed()); + std::vector pinned; + + /// Stats. + size_t racing_writes = 0; + + for (size_t i = 0; i < num_keys * passes; i += step) + { + progress += 1; + + /// Touch the chunks sequentially + noise (to increase interference across threads), or at random 10% of the time. + size_t key_idx; + if (rng() % 10 == 0) + key_idx = std::uniform_int_distribution(0, num_keys - 1)(rng); + else + key_idx = (i + std::uniform_int_distribution(0, num_keys / 1000)(rng)) % num_keys; + + /// For some keys, always use detached_if_missing = true and check that cache always misses. + bool key_detached_if_missing = key_idx % 100 == 42; + bool detached_if_missing = key_detached_if_missing || i % 101 == 42; + + PageCacheKey key = key_idx * 0xcafebabeb0bad00dul; // a simple reversible hash (the constant can be any odd number) + + PinnedPageChunk chunk = cache.getOrSet(key, detached_if_missing, /* inject_eviction */ false); + + if (key_detached_if_missing) + CHECK(!chunk.getChunk()->pages_populated.any()); + + for (size_t page_idx = 0; page_idx < pages_per_chunk; ++page_idx) + { + bool populated = chunk.getChunk()->pages_populated.get(page_idx); + /// Generate page contents deterministically from key and page index. + size_t start = key_idx * page_idx; + if (start % 37 == 13) + { + /// Leave ~1/37 of the pages unpopulated. + CHECK(!populated); + } + else + { + /// We may write/read the same memory from multiple threads in parallel here. + std::atomic * items = reinterpret_cast *>(chunk.getChunk()->data + cache.pageSize() * page_idx); + if (populated) + { + for (size_t j = 0; j < items_per_page; ++j) + CHECK(items[j].load(std::memory_order_relaxed) == start + j); + } + else + { + for (size_t j = 0; j < items_per_page; ++j) + items[j].store(start + j, std::memory_order_relaxed); + if (!chunk.markPagePopulated(page_idx)) + racing_writes += 1; + } + } + } + + pinned.push_back(std::move(chunk)); + CHECK(cache.getPinnedSize() >= cache.chunkSize()); + /// Unpin 2 chunks on average. + while (rng() % 3 != 0 && !pinned.empty()) + { + size_t idx = rng() % pinned.size(); + if (idx != pinned.size() - 1) + pinned[idx] = std::move(pinned.back()); + pinned.pop_back(); + } + } + + total_racing_writes += racing_writes; + threads_finished += 1; + }; + + std::cout << fmt::format("doing {:.1f} passes over {:.1f} GiB of virtual memory\nthis will take a few minutes, progress printed every 10 seconds", + chunks_touched * 1. / cache.maxChunks(), cache.maxChunks() * cache.chunkSize() * 1. / (1ul << 30)) << std::endl; + + auto start_time = std::chrono::steady_clock::now(); + + std::vector threads; + for (size_t i = 0; i < num_threads; ++i) + threads.emplace_back(thread_func); + + for (size_t poll = 0;; ++poll) + { + if (threads_finished == num_threads) + break; + if (poll % 100 == 0) + std::cout << fmt::format("{:.3f}%", progress.load() * 100. / num_keys / passes / num_threads * step) << std::endl; + std::this_thread::sleep_for(std::chrono::milliseconds(100)); + } + for (std::thread & t : threads) + t.join(); + + auto end_time = std::chrono::steady_clock::now(); + double elapsed_seconds = std::chrono::duration_cast>(end_time - start_time).count(); + double touched_gib = chunks_touched * cache.chunkSize() * 1. / (1ul << 30); + std::cout << fmt::format("touched {:.1f} GiB in {:.1f} seconds, that's {:.3f} GiB/s", + touched_gib, elapsed_seconds, touched_gib / elapsed_seconds) << std::endl; + + auto & counters = CurrentThread::getProfileEvents(); + + std::cout << "stats:" + << "\nchunk misses: " << counters[ProfileEvents::PageCacheChunkMisses].load() + << "\nchunk shared: " << counters[ProfileEvents::PageCacheChunkShared].load() + << "\nchunk data misses: " << counters[ProfileEvents::PageCacheChunkDataMisses].load() + << "\nchunk data partial hits: " << counters[ProfileEvents::PageCacheChunkDataPartialHits].load() + << "\nchunk data hits: " << counters[ProfileEvents::PageCacheChunkDataHits].load() + << "\nracing page writes: " << total_racing_writes << std::endl; + + /// Check that we at least hit all the cases. + CHECK(counters[ProfileEvents::PageCacheChunkMisses].load() > 0); + CHECK(counters[ProfileEvents::PageCacheChunkShared].load() > 0); + CHECK(counters[ProfileEvents::PageCacheChunkDataMisses].load() > 0); + /// Partial hits are rare enough that sometimes this is zero, so don't check it. + /// That's good news because we don't need to implement downloading parts of a chunk. + /// CHECK(counters[ProfileEvents::PageCacheChunkDataPartialHits].load() > 0); + CHECK(counters[ProfileEvents::PageCacheChunkDataHits].load() > 0); + CHECK(total_racing_writes > 0); + CHECK(cache.getPinnedSize() == 0); + + size_t rss = cache.getResidentSetSize().page_cache_rss; + std::cout << "RSS: " << rss * 1. / (1ul << 30) << " GiB" << std::endl; + /// This can be flaky if the system has < 10% free memory. If this turns out to be a problem, feel free to remove or reduce. + CHECK(rss > ram_size / 10); + + cache.dropCache(); + +#ifdef OS_LINUX + /// MADV_DONTNEED is not synchronous, and we're freeing lots of pages. Let's give Linux a lot of time. + std::this_thread::sleep_for(std::chrono::seconds(10)); + size_t new_rss = cache.getResidentSetSize().page_cache_rss; + std::cout << "RSS after dropping cache: " << new_rss * 1. / (1ul << 30) << " GiB" << std::endl; + CHECK(new_rss < rss / 2); +#endif +} + +/// Benchmark that measures the PageCache overhead for cache hits. Doesn't touch the actual data, so +/// memory bandwidth mostly doesn't factor into this. +/// This measures the overhead of things like madvise(MADV_FREE) and probing the pages (restoreChunkFromLimbo()). +/// Disabled in CI, run manually with --gtest_also_run_disabled_tests --gtest_filter=PageCache.DISABLED_HitsBench +TEST(PageCache, DISABLED_HitsBench) +{ + /// Do a few runs, with and without MADV_FREE. + for (size_t num_threads = 1; num_threads <= 16; num_threads *= 2) + { + for (size_t run = 0; run < 8; ++ run) + { + bool use_madv_free = run % 2 == 1; + bool use_huge_pages = run % 4 / 2 == 1; + + PageCache cache(2 << 20, 1ul << 30, 20ul << 30, use_madv_free, use_huge_pages); + size_t passes = 3; + std::atomic total_misses {0}; + + /// Prepopulate all chunks. + for (size_t i = 0; i < cache.maxChunks(); ++i) + { + PageCacheKey key = i * 0xcafebabeb0bad00dul; + PinnedPageChunk chunk = cache.getOrSet(key, /* detache_if_missing */ false, /* inject_eviction */ false); + memset(chunk.getChunk()->data, 42, chunk.getChunk()->size); + chunk.markPrefixPopulated(cache.chunkSize()); + } + + auto thread_func = [&] + { + pcg64 rng(randomSeed()); + size_t misses = 0; + for (size_t i = 0; i < cache.maxChunks() * passes; ++i) + { + PageCacheKey key = rng() % cache.maxChunks() * 0xcafebabeb0bad00dul; + PinnedPageChunk chunk = cache.getOrSet(key, /* detache_if_missing */ false, /* inject_eviction */ false); + if (!chunk.isPrefixPopulated(cache.chunkSize())) + misses += 1; + } + total_misses += misses; + }; + + auto start_time = std::chrono::steady_clock::now(); + + std::vector threads; + for (size_t i = 0; i < num_threads; ++i) + threads.emplace_back(thread_func); + + for (std::thread & t : threads) + t.join(); + + auto end_time = std::chrono::steady_clock::now(); + double elapsed_seconds = std::chrono::duration_cast>(end_time - start_time).count(); + double fetched_gib = cache.chunkSize() * cache.maxChunks() * passes * 1. / (1ul << 30); + std::cout << fmt::format( + "threads {}, run {}, use_madv_free = {}, use_huge_pages = {}\nrequested {:.1f} GiB in {:.1f} seconds\n" + "that's {:.1f} GiB/s, or overhead of {:.3}us/{:.1}MiB\n", + num_threads, run, use_madv_free, use_huge_pages, fetched_gib, elapsed_seconds, fetched_gib / elapsed_seconds, + elapsed_seconds * 1e6 / cache.maxChunks() / passes, cache.chunkSize() * 1. / (1 << 20)) << std::endl; + + if (total_misses != 0) + std::cout << "!got " << total_misses.load() << " misses! perhaps your system doesn't have enough free memory, consider decreasing cache size in the benchmark code" << std::endl; + } + } +} diff --git a/src/Loggers/Loggers.cpp b/src/Loggers/Loggers.cpp index 1d17585cc96..cc6e4691737 100644 --- a/src/Loggers/Loggers.cpp +++ b/src/Loggers/Loggers.cpp @@ -304,6 +304,9 @@ void Loggers::buildLoggers(Poco::Util::AbstractConfiguration & config, Poco::Log log_settings.turn_off_logger = DB::TextLog::shouldTurnOffLogger(); + log_settings.database = config.getString("text_log.database", "system"); + log_settings.table = config.getString("text_log.table", "text_log"); + split->addTextLog(DB::TextLog::getLogQueue(log_settings), text_log_level); } #endif diff --git a/src/Loggers/OwnJSONPatternFormatter.h b/src/Loggers/OwnJSONPatternFormatter.h index 6a31e6bbd8a..51827f34b22 100644 --- a/src/Loggers/OwnJSONPatternFormatter.h +++ b/src/Loggers/OwnJSONPatternFormatter.h @@ -26,7 +26,7 @@ class Loggers; class OwnJSONPatternFormatter : public OwnPatternFormatter { public: - OwnJSONPatternFormatter(Poco::Util::AbstractConfiguration & config); + explicit OwnJSONPatternFormatter(Poco::Util::AbstractConfiguration & config); void format(const Poco::Message & msg, std::string & text) override; void formatExtended(const DB::ExtendedLogMessage & msg_ext, std::string & text) const override; diff --git a/src/Loggers/OwnPatternFormatter.h b/src/Loggers/OwnPatternFormatter.h index 8b0d11bcec1..176dbcd85c1 100644 --- a/src/Loggers/OwnPatternFormatter.h +++ b/src/Loggers/OwnPatternFormatter.h @@ -25,7 +25,7 @@ class Loggers; class OwnPatternFormatter : public Poco::PatternFormatter { public: - OwnPatternFormatter(bool color_ = false); + explicit OwnPatternFormatter(bool color_ = false); void format(const Poco::Message & msg, std::string & text) override; virtual void formatExtended(const DB::ExtendedLogMessage & msg_ext, std::string & text) const; diff --git a/src/NOTICE b/src/NOTICE index c68280b1529..4e5f66c65c9 100644 --- a/src/NOTICE +++ b/src/NOTICE @@ -13,18 +13,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. -Common/ErrorCodes.cpp -Common/UInt128.h -Core/Block.h -Core/Defines.h -Core/Settings.h -Databases/DatabasesCommon.cpp -IO/WriteBufferValidUTF8.cpp -Interpreters/InterpreterAlterQuery.cpp Interpreters/InterpreterCreateQuery.cpp Interpreters/InterpreterFactory.cpp Parsers/ASTAlterQuery.cpp -Parsers/ASTAlterQuery.h Parsers/ASTCreateQuery.cpp Parsers/ASTCreateQuery.h Parsers/ParserAlterQuery.cpp diff --git a/src/Parsers/ASTAlterQuery.cpp b/src/Parsers/ASTAlterQuery.cpp index a6543190904..f104e715452 100644 --- a/src/Parsers/ASTAlterQuery.cpp +++ b/src/Parsers/ASTAlterQuery.cpp @@ -1,6 +1,7 @@ -#include -#include #include + +#include +#include #include @@ -59,8 +60,6 @@ ASTPtr ASTAlterCommand::clone() const res->settings_resets = res->children.emplace_back(settings_resets->clone()).get(); if (select) res->select = res->children.emplace_back(select->clone()).get(); - if (values) - res->values = res->children.emplace_back(values->clone()).get(); if (rename_to) res->rename_to = res->children.emplace_back(rename_to->clone()).get(); @@ -69,6 +68,9 @@ ASTPtr ASTAlterCommand::clone() const void ASTAlterCommand::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const { + if (format_alter_commands_with_parentheses) + settings.ostr << "("; + if (type == ASTAlterCommand::ADD_COLUMN) { settings.ostr << (settings.hilite ? hilite_keyword : "") << "ADD COLUMN " << (if_not_exists ? "IF NOT EXISTS " : "") @@ -284,6 +286,12 @@ void ASTAlterCommand::formatImpl(const FormatSettings & settings, FormatState & << (settings.hilite ? hilite_none : ""); partition->formatImpl(settings, state, frame); } + else if (type == ASTAlterCommand::FORGET_PARTITION) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << "FORGET PARTITION " + << (settings.hilite ? hilite_none : ""); + partition->formatImpl(settings, state, frame); + } else if (type == ASTAlterCommand::ATTACH_PARTITION) { settings.ostr << (settings.hilite ? hilite_keyword : "") << "ATTACH " << (part ? "PART " : "PARTITION ") @@ -446,20 +454,16 @@ void ASTAlterCommand::formatImpl(const FormatSettings & settings, FormatState & } else if (type == ASTAlterCommand::MODIFY_QUERY) { - settings.ostr << (settings.hilite ? hilite_keyword : "") << "MODIFY QUERY " << settings.nl_or_ws + settings.ostr << (settings.hilite ? hilite_keyword : "") << "MODIFY QUERY" << settings.nl_or_ws << (settings.hilite ? hilite_none : ""); select->formatImpl(settings, state, frame); } else if (type == ASTAlterCommand::MODIFY_REFRESH) { - settings.ostr << (settings.hilite ? hilite_keyword : "") << "MODIFY REFRESH " << settings.nl_or_ws + settings.ostr << (settings.hilite ? hilite_keyword : "") << "MODIFY" << settings.nl_or_ws << (settings.hilite ? hilite_none : ""); refresh->formatImpl(settings, state, frame); } - else if (type == ASTAlterCommand::LIVE_VIEW_REFRESH) - { - settings.ostr << (settings.hilite ? hilite_keyword : "") << "REFRESH " << (settings.hilite ? hilite_none : ""); - } else if (type == ASTAlterCommand::RENAME_COLUMN) { settings.ostr << (settings.hilite ? hilite_keyword : "") << "RENAME COLUMN " << (if_exists ? "IF EXISTS " : "") @@ -469,6 +473,11 @@ void ASTAlterCommand::formatImpl(const FormatSettings & settings, FormatState & settings.ostr << (settings.hilite ? hilite_keyword : "") << " TO "; rename_to->formatImpl(settings, state, frame); } + else if (type == ASTAlterCommand::MODIFY_SQL_SECURITY) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << "MODIFY " << (settings.hilite ? hilite_none : ""); + sql_security->formatImpl(settings, state, frame); + } else if (type == ASTAlterCommand::APPLY_DELETED_MASK) { settings.ostr << (settings.hilite ? hilite_keyword : "") << "APPLY DELETED MASK" << (settings.hilite ? hilite_none : ""); @@ -481,6 +490,9 @@ void ASTAlterCommand::formatImpl(const FormatSettings & settings, FormatState & } else throw Exception(ErrorCodes::UNEXPECTED_AST_STRUCTURE, "Unexpected type of ALTER"); + + if (format_alter_commands_with_parentheses) + settings.ostr << ")"; } void ASTAlterCommand::forEachPointerToChild(std::function f) @@ -504,7 +516,6 @@ void ASTAlterCommand::forEachPointerToChild(std::function f) f(reinterpret_cast(&settings_changes)); f(reinterpret_cast(&settings_resets)); f(reinterpret_cast(&select)); - f(reinterpret_cast(&values)); f(reinterpret_cast(&rename_to)); } @@ -608,9 +619,6 @@ void ASTAlterQuery::formatQueryImpl(const FormatSettings & settings, FormatState case AlterObjectType::DATABASE: settings.ostr << "ALTER DATABASE "; break; - case AlterObjectType::LIVE_VIEW: - settings.ostr << "ALTER LIVE VIEW "; - break; default: break; } @@ -619,16 +627,20 @@ void ASTAlterQuery::formatQueryImpl(const FormatSettings & settings, FormatState if (table) { + settings.ostr << indent_str; if (database) { - settings.ostr << indent_str << backQuoteIfNeed(getDatabase()); - settings.ostr << "."; + database->formatImpl(settings, state, frame); + settings.ostr << '.'; } - settings.ostr << indent_str << backQuoteIfNeed(getTable()); + + chassert(table); + table->formatImpl(settings, state, frame); } else if (alter_object == AlterObjectType::DATABASE && database) { - settings.ostr << indent_str << backQuoteIfNeed(getDatabase()); + settings.ostr << indent_str; + database->formatImpl(settings, state, frame); } formatOnCluster(settings); diff --git a/src/Parsers/ASTAlterQuery.h b/src/Parsers/ASTAlterQuery.h index c17f260b660..1799b75fce4 100644 --- a/src/Parsers/ASTAlterQuery.h +++ b/src/Parsers/ASTAlterQuery.h @@ -17,8 +17,6 @@ namespace DB * MODIFY COLUMN col_name type, * DROP PARTITION partition, * COMMENT_COLUMN col_name 'comment', - * ALTER LIVE VIEW [db.]name_type - * REFRESH */ class ASTAlterCommand : public IAST @@ -63,6 +61,7 @@ public: DROP_PARTITION, DROP_DETACHED_PARTITION, + FORGET_PARTITION, ATTACH_PARTITION, MOVE_PARTITION, REPLACE_PARTITION, @@ -78,11 +77,10 @@ public: NO_TYPE, - LIVE_VIEW_REFRESH, - MODIFY_DATABASE_SETTING, MODIFY_COMMENT, + MODIFY_SQL_SECURITY, }; Type type = NO_TYPE; @@ -139,7 +137,7 @@ public: IAST * statistic_decl = nullptr; - /** Used in DROP PARTITION, ATTACH PARTITION FROM, UPDATE, DELETE queries. + /** Used in DROP PARTITION, ATTACH PARTITION FROM, FORGET PARTITION, UPDATE, DELETE queries. * The value or ID of the partition is stored here. */ IAST * partition = nullptr; @@ -165,8 +163,8 @@ public: /// For MODIFY_QUERY IAST * select = nullptr; - /// In ALTER CHANNEL, ADD, DROP, SUSPEND, RESUME, REFRESH, MODIFY queries, the list of live views is stored here - IAST * values = nullptr; + /// For MODIFY_SQL_SECURITY + IAST * sql_security = nullptr; /// Target column name IAST * rename_to = nullptr; @@ -222,10 +220,16 @@ public: ASTPtr clone() const override; + // This function is only meant to be called during application startup + // For reasons see https://github.com/ClickHouse/ClickHouse/pull/59532 + static void setFormatAlterCommandsWithParentheses(bool value) { format_alter_commands_with_parentheses = value; } + protected: void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override; void forEachPointerToChild(std::function f) override; + + static inline bool format_alter_commands_with_parentheses = false; }; class ASTAlterQuery : public ASTQueryWithTableAndOutput, public ASTQueryWithOnCluster @@ -235,7 +239,6 @@ public: { TABLE, DATABASE, - LIVE_VIEW, UNKNOWN, }; diff --git a/src/Parsers/ASTCheckQuery.h b/src/Parsers/ASTCheckQuery.h index c9a17ca4a54..eca08b2b094 100644 --- a/src/Parsers/ASTCheckQuery.h +++ b/src/Parsers/ASTCheckQuery.h @@ -49,10 +49,12 @@ protected: { if (database) { - settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << backQuoteIfNeed(getDatabase()) << (settings.hilite ? hilite_none : ""); - settings.ostr << "."; + database->formatImpl(settings, state, frame); + settings.ostr << '.'; } - settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << backQuoteIfNeed(getTable()) << (settings.hilite ? hilite_none : ""); + + chassert(table); + table->formatImpl(settings, state, frame); } if (partition) diff --git a/src/Parsers/ASTCreateIndexQuery.cpp b/src/Parsers/ASTCreateIndexQuery.cpp index 4e03dad5b5b..f6def3ed85c 100644 --- a/src/Parsers/ASTCreateIndexQuery.cpp +++ b/src/Parsers/ASTCreateIndexQuery.cpp @@ -48,10 +48,12 @@ void ASTCreateIndexQuery::formatQueryImpl(const FormatSettings & settings, Forma { if (database) { - settings.ostr << indent_str << backQuoteIfNeed(getDatabase()); - settings.ostr << "."; + database->formatImpl(settings, state, frame); + settings.ostr << '.'; } - settings.ostr << indent_str << backQuoteIfNeed(getTable()); + + chassert(table); + table->formatImpl(settings, state, frame); } formatOnCluster(settings); diff --git a/src/Parsers/ASTCreateIndexQuery.h b/src/Parsers/ASTCreateIndexQuery.h index b7577f2634e..f5e35e270e9 100644 --- a/src/Parsers/ASTCreateIndexQuery.h +++ b/src/Parsers/ASTCreateIndexQuery.h @@ -16,7 +16,7 @@ class ASTCreateIndexQuery : public ASTQueryWithTableAndOutput, public ASTQueryWi public: ASTPtr index_name; - /// Stores the IndexDeclaration here. + /// Stores the ASTIndexDeclaration here. ASTPtr index_decl; bool if_not_exists{false}; diff --git a/src/Parsers/ASTCreateQuery.cpp b/src/Parsers/ASTCreateQuery.cpp index 9d5f0bcddbd..de5eb40837f 100644 --- a/src/Parsers/ASTCreateQuery.cpp +++ b/src/Parsers/ASTCreateQuery.cpp @@ -12,6 +12,37 @@ namespace DB { +void ASTSQLSecurity::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const +{ + if (!type.has_value()) + return; + + if (definer || is_definer_current_user) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << "DEFINER" << (settings.hilite ? hilite_none : ""); + settings.ostr << " = "; + if (definer) + definer->formatImpl(settings, state, frame); + else + settings.ostr << "CURRENT_USER"; + settings.ostr << " "; + } + + settings.ostr << (settings.hilite ? hilite_keyword : "") << "SQL SECURITY" << (settings.hilite ? hilite_none : ""); + switch (*type) + { + case SQLSecurityType::INVOKER: + settings.ostr << " INVOKER"; + break; + case SQLSecurityType::DEFINER: + settings.ostr << " DEFINER"; + break; + case SQLSecurityType::NONE: + settings.ostr << " NONE"; + break; + } +} + ASTPtr ASTStorage::clone() const { auto res = std::make_shared(*this); @@ -241,8 +272,9 @@ void ASTCreateQuery::formatQueryImpl(const FormatSettings & settings, FormatStat settings.ostr << (settings.hilite ? hilite_keyword : "") << (attach ? "ATTACH DATABASE " : "CREATE DATABASE ") << (if_not_exists ? "IF NOT EXISTS " : "") - << (settings.hilite ? hilite_none : "") - << backQuoteIfNeed(getDatabase()); + << (settings.hilite ? hilite_none : ""); + + database->formatImpl(settings, state, frame); if (uuid != UUIDHelpers::Nil) { @@ -292,14 +324,21 @@ void ASTCreateQuery::formatQueryImpl(const FormatSettings & settings, FormatStat else if (is_window_view) what = "WINDOW VIEW"; - settings.ostr - << (settings.hilite ? hilite_keyword : "") - << action << " " - << (temporary ? "TEMPORARY " : "") + settings.ostr << (settings.hilite ? hilite_keyword : "") << action << (settings.hilite ? hilite_none : ""); + settings.ostr << " "; + settings.ostr << (settings.hilite ? hilite_keyword : "") << (temporary ? "TEMPORARY " : "") << what << " " << (if_not_exists ? "IF NOT EXISTS " : "") - << (settings.hilite ? hilite_none : "") - << (database ? backQuoteIfNeed(getDatabase()) + "." : "") << backQuoteIfNeed(getTable()); + << (settings.hilite ? hilite_none : ""); + + if (database) + { + database->formatImpl(settings, state, frame); + settings.ostr << '.'; + } + + chassert(table); + table->formatImpl(settings, state, frame); if (uuid != UUIDHelpers::Nil) settings.ostr << (settings.hilite ? hilite_keyword : "") << " UUID " << (settings.hilite ? hilite_none : "") @@ -310,13 +349,6 @@ void ASTCreateQuery::formatQueryImpl(const FormatSettings & settings, FormatStat settings.ostr << (settings.hilite ? hilite_keyword : "") << " FROM " << (settings.hilite ? hilite_none : "") << quoteString(*attach_from_path); - if (live_view_periodic_refresh) - { - settings.ostr << (settings.hilite ? hilite_keyword : "") << " WITH" << (settings.hilite ? hilite_none : "") - << (settings.hilite ? hilite_keyword : "") << " PERIODIC REFRESH " << (settings.hilite ? hilite_none : "") - << *live_view_periodic_refresh; - } - formatOnCluster(settings); } else @@ -331,8 +363,17 @@ void ASTCreateQuery::formatQueryImpl(const FormatSettings & settings, FormatStat /// Always DICTIONARY settings.ostr << (settings.hilite ? hilite_keyword : "") << action << " DICTIONARY " - << (if_not_exists ? "IF NOT EXISTS " : "") << (settings.hilite ? hilite_none : "") - << (database ? backQuoteIfNeed(getDatabase()) + "." : "") << backQuoteIfNeed(getTable()); + << (if_not_exists ? "IF NOT EXISTS " : "") << (settings.hilite ? hilite_none : ""); + + if (database) + { + database->formatImpl(settings, state, frame); + settings.ostr << '.'; + } + + chassert(table); + table->formatImpl(settings, state, frame); + if (uuid != UUIDHelpers::Nil) settings.ostr << (settings.hilite ? hilite_keyword : "") << " UUID " << (settings.hilite ? hilite_none : "") << quoteString(toString(uuid)); @@ -444,10 +485,16 @@ void ASTCreateQuery::formatQueryImpl(const FormatSettings & settings, FormatStat else if (is_create_empty) settings.ostr << (settings.hilite ? hilite_keyword : "") << " EMPTY" << (settings.hilite ? hilite_none : ""); + if (sql_security && sql_security->as().type.has_value()) + { + settings.ostr << settings.nl_or_ws; + sql_security->formatImpl(settings, state, frame); + } + if (select) { - settings.ostr << (settings.hilite ? hilite_keyword : "") << " AS" - << settings.nl_or_ws + settings.ostr << settings.nl_or_ws; + settings.ostr << (settings.hilite ? hilite_keyword : "") << "AS " << (comment ? "(" : "") << (settings.hilite ? hilite_none : ""); select->formatImpl(settings, state, frame); settings.ostr << (settings.hilite ? hilite_keyword : "") << (comment ? ")" : "") << (settings.hilite ? hilite_none : ""); diff --git a/src/Parsers/ASTCreateQuery.h b/src/Parsers/ASTCreateQuery.h index 49a0140625c..64e6bc8ce48 100644 --- a/src/Parsers/ASTCreateQuery.h +++ b/src/Parsers/ASTCreateQuery.h @@ -5,6 +5,7 @@ #include #include #include +#include #include #include @@ -15,6 +16,7 @@ class ASTFunction; class ASTSetQuery; class ASTSelectWithUnionQuery; + class ASTStorage : public IAST { public: @@ -96,6 +98,7 @@ public: bool is_populate{false}; bool is_create_empty{false}; /// CREATE TABLE ... EMPTY AS SELECT ... bool replace_view{false}; /// CREATE OR REPLACE VIEW + bool has_uuid{false}; // CREATE TABLE x UUID '...' ASTColumns * columns_list = nullptr; @@ -110,6 +113,7 @@ public: IAST * as_table_function = nullptr; ASTSelectWithUnionQuery * select = nullptr; IAST * comment = nullptr; + ASTPtr sql_security = nullptr; ASTTableOverrideList * table_overrides = nullptr; /// For CREATE DATABASE with engines that automatically create tables @@ -118,7 +122,6 @@ public: ASTDictionary * dictionary = nullptr; /// dictionary definition (layout, primary key, etc.) ASTRefreshStrategy * refresh_strategy = nullptr; // For CREATE MATERIALIZED VIEW ... REFRESH ... - std::optional live_view_periodic_refresh; /// For CREATE LIVE VIEW ... WITH [PERIODIC] REFRESH ... bool is_watermark_strictly_ascending{false}; /// STRICTLY ASCENDING WATERMARK STRATEGY FOR WINDOW VIEW bool is_watermark_ascending{false}; /// ASCENDING WATERMARK STRATEGY FOR WINDOW VIEW diff --git a/src/Parsers/ASTDeleteQuery.cpp b/src/Parsers/ASTDeleteQuery.cpp index 09dc4b936ae..67f3a85c9a5 100644 --- a/src/Parsers/ASTDeleteQuery.cpp +++ b/src/Parsers/ASTDeleteQuery.cpp @@ -36,10 +36,12 @@ void ASTDeleteQuery::formatQueryImpl(const FormatSettings & settings, FormatStat if (database) { - settings.ostr << backQuoteIfNeed(getDatabase()); - settings.ostr << "."; + database->formatImpl(settings, state, frame); + settings.ostr << '.'; } - settings.ostr << backQuoteIfNeed(getTable()); + + chassert(table); + table->formatImpl(settings, state, frame); formatOnCluster(settings); diff --git a/src/Parsers/ASTDescribeCacheQuery.cpp b/src/Parsers/ASTDescribeCacheQuery.cpp new file mode 100644 index 00000000000..6daedfdb3a4 --- /dev/null +++ b/src/Parsers/ASTDescribeCacheQuery.cpp @@ -0,0 +1,23 @@ +#include +#include + + +namespace DB +{ + +String ASTDescribeCacheQuery::getID(char) const { return "DescribeCacheQuery"; } + +ASTPtr ASTDescribeCacheQuery::clone() const +{ + auto res = std::make_shared(*this); + cloneOutputOptions(*res); + return res; +} + +void ASTDescribeCacheQuery::formatQueryImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const +{ + settings.ostr << (settings.hilite ? hilite_keyword : "") << "DESCRIBE FILESYSTEM CACHE" << (settings.hilite ? hilite_none : "") + << " " << quoteString(cache_name); +} + +} diff --git a/src/Parsers/ASTDescribeCacheQuery.h b/src/Parsers/ASTDescribeCacheQuery.h index 3d1cbaef070..55b841e03f6 100644 --- a/src/Parsers/ASTDescribeCacheQuery.h +++ b/src/Parsers/ASTDescribeCacheQuery.h @@ -1,6 +1,8 @@ #pragma once + #include + namespace DB { @@ -9,20 +11,11 @@ class ASTDescribeCacheQuery : public ASTQueryWithOutput public: String cache_name; - String getID(char) const override { return "DescribeCacheQuery"; } - - ASTPtr clone() const override - { - auto res = std::make_shared(*this); - cloneOutputOptions(*res); - return res; - } + String getID(char) const override; + ASTPtr clone() const override; protected: - void formatQueryImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override - { - settings.ostr << (settings.hilite ? hilite_keyword : "") << "DESCRIBE FILESYSTEM CACHE" << (settings.hilite ? hilite_none : "") << " " << cache_name; - } + void formatQueryImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override; }; } diff --git a/src/Parsers/ASTDropIndexQuery.cpp b/src/Parsers/ASTDropIndexQuery.cpp index 6866770d2ab..1109f32f019 100644 --- a/src/Parsers/ASTDropIndexQuery.cpp +++ b/src/Parsers/ASTDropIndexQuery.cpp @@ -43,10 +43,12 @@ void ASTDropIndexQuery::formatQueryImpl(const FormatSettings & settings, FormatS { if (database) { - settings.ostr << indent_str << backQuoteIfNeed(getDatabase()); - settings.ostr << "."; + database->formatImpl(settings, state, frame); + settings.ostr << '.'; } - settings.ostr << indent_str << backQuoteIfNeed(getTable()); + + chassert(table); + table->formatImpl(settings, state, frame); } formatOnCluster(settings); diff --git a/src/Parsers/ASTDropQuery.cpp b/src/Parsers/ASTDropQuery.cpp index ad1294c6e71..ca47ceccb85 100644 --- a/src/Parsers/ASTDropQuery.cpp +++ b/src/Parsers/ASTDropQuery.cpp @@ -32,7 +32,7 @@ ASTPtr ASTDropQuery::clone() const return res; } -void ASTDropQuery::formatQueryImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const +void ASTDropQuery::formatQueryImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const { settings.ostr << (settings.hilite ? hilite_keyword : ""); if (kind == ASTDropQuery::Kind::Drop) @@ -47,7 +47,6 @@ void ASTDropQuery::formatQueryImpl(const FormatSettings & settings, FormatState if (temporary) settings.ostr << "TEMPORARY "; - if (!table && database) settings.ostr << "DATABASE "; else if (is_dictionary) @@ -66,9 +65,20 @@ void ASTDropQuery::formatQueryImpl(const FormatSettings & settings, FormatState settings.ostr << (settings.hilite ? hilite_none : ""); if (!table && database) - settings.ostr << backQuoteIfNeed(getDatabase()); + { + database->formatImpl(settings, state, frame); + } else - settings.ostr << (database ? backQuoteIfNeed(getDatabase()) + "." : "") << backQuoteIfNeed(getTable()); + { + if (database) + { + database->formatImpl(settings, state, frame); + settings.ostr << '.'; + } + + chassert(table); + table->formatImpl(settings, state, frame); + } formatOnCluster(settings); diff --git a/src/Parsers/ASTExpressionList.cpp b/src/Parsers/ASTExpressionList.cpp index 2590c6b2941..61ac482af82 100644 --- a/src/Parsers/ASTExpressionList.cpp +++ b/src/Parsers/ASTExpressionList.cpp @@ -17,21 +17,23 @@ void ASTExpressionList::formatImpl(const FormatSettings & settings, FormatState if (frame.expression_list_prepend_whitespace) settings.ostr << ' '; - for (ASTs::const_iterator it = children.begin(); it != children.end(); ++it) + for (size_t i = 0, size = children.size(); i < size; ++i) { - if (it != children.begin()) + if (i) { if (separator) settings.ostr << separator; settings.ostr << ' '; } + FormatStateStacked frame_nested = frame; + frame_nested.surround_each_list_element_with_parens = false; + frame_nested.list_element_index = i; + if (frame.surround_each_list_element_with_parens) settings.ostr << "("; - FormatStateStacked frame_nested = frame; - frame_nested.surround_each_list_element_with_parens = false; - (*it)->formatImpl(settings, state, frame_nested); + children[i]->formatImpl(settings, state, frame_nested); if (frame.surround_each_list_element_with_parens) settings.ostr << ")"; @@ -50,25 +52,23 @@ void ASTExpressionList::formatImplMultiline(const FormatSettings & settings, For ++frame.indent; - for (ASTs::const_iterator it = children.begin(); it != children.end(); ++it) + for (size_t i = 0, size = children.size(); i < size; ++i) { - if (it != children.begin()) - { - if (separator) - settings.ostr << separator; - } + if (i && separator) + settings.ostr << separator; - if (children.size() > 1 || frame.expression_list_always_start_on_new_line) + if (size > 1 || frame.expression_list_always_start_on_new_line) settings.ostr << indent_str; FormatStateStacked frame_nested = frame; frame_nested.expression_list_always_start_on_new_line = false; frame_nested.surround_each_list_element_with_parens = false; + frame_nested.list_element_index = i; if (frame.surround_each_list_element_with_parens) settings.ostr << "("; - (*it)->formatImpl(settings, state, frame_nested); + children[i]->formatImpl(settings, state, frame_nested); if (frame.surround_each_list_element_with_parens) settings.ostr << ")"; diff --git a/src/Parsers/ASTFunction.cpp b/src/Parsers/ASTFunction.cpp index e7f7b48091a..07eea86ef81 100644 --- a/src/Parsers/ASTFunction.cpp +++ b/src/Parsers/ASTFunction.cpp @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -36,508 +37,6 @@ namespace ErrorCodes } -namespace -{ - /// Finds arguments of a specified function which should not be displayed for most users for security reasons. - /// That involves passwords and secret keys. - class FunctionSecretArgumentsFinder - { - public: - explicit FunctionSecretArgumentsFinder(const ASTFunction & function_) : function(function_) - { - if (!function.arguments) - return; - - const auto * expr_list = function.arguments->as(); - if (!expr_list) - return; - - arguments = &expr_list->children; - switch (function.kind) - { - case ASTFunction::Kind::ORDINARY_FUNCTION: findOrdinaryFunctionSecretArguments(); break; - case ASTFunction::Kind::WINDOW_FUNCTION: break; - case ASTFunction::Kind::LAMBDA_FUNCTION: break; - case ASTFunction::Kind::TABLE_ENGINE: findTableEngineSecretArguments(); break; - case ASTFunction::Kind::DATABASE_ENGINE: findDatabaseEngineSecretArguments(); break; - case ASTFunction::Kind::BACKUP_NAME: findBackupNameSecretArguments(); break; - } - } - - struct Result - { - /// Result constructed by default means no arguments will be hidden. - size_t start = static_cast(-1); - size_t count = 0; /// Mostly it's either 0 or 1. There are only a few cases where `count` can be greater than 1 (e.g. see `encrypt`). - /// In all known cases secret arguments are consecutive - bool are_named = false; /// Arguments like `password = 'password'` are considered as named arguments. - /// E.g. "headers" in `url('..', headers('foo' = '[HIDDEN]'))` - std::vector nested_maps; - - bool hasSecrets() const - { - return count != 0 || !nested_maps.empty(); - } - }; - - Result getResult() const { return result; } - - private: - const ASTFunction & function; - const ASTs * arguments = nullptr; - Result result; - - void markSecretArgument(size_t index, bool argument_is_named = false) - { - if (index >= arguments->size()) - return; - if (!result.count) - { - result.start = index; - result.are_named = argument_is_named; - } - chassert(index >= result.start); /// We always check arguments consecutively - result.count = index + 1 - result.start; - if (!argument_is_named) - result.are_named = false; - } - - void findOrdinaryFunctionSecretArguments() - { - if ((function.name == "mysql") || (function.name == "postgresql") || (function.name == "mongodb")) - { - /// mysql('host:port', 'database', 'table', 'user', 'password', ...) - /// postgresql('host:port', 'database', 'table', 'user', 'password', ...) - /// mongodb('host:port', 'database', 'collection', 'user', 'password', ...) - findMySQLFunctionSecretArguments(); - } - else if ((function.name == "s3") || (function.name == "cosn") || (function.name == "oss") || - (function.name == "deltaLake") || (function.name == "hudi") || (function.name == "iceberg")) - { - /// s3('url', 'aws_access_key_id', 'aws_secret_access_key', ...) - findS3FunctionSecretArguments(/* is_cluster_function= */ false); - } - else if (function.name == "s3Cluster") - { - /// s3Cluster('cluster_name', 'url', 'aws_access_key_id', 'aws_secret_access_key', ...) - findS3FunctionSecretArguments(/* is_cluster_function= */ true); - } - else if ((function.name == "remote") || (function.name == "remoteSecure")) - { - /// remote('addresses_expr', 'db', 'table', 'user', 'password', ...) - findRemoteFunctionSecretArguments(); - } - else if ((function.name == "encrypt") || (function.name == "decrypt") || - (function.name == "aes_encrypt_mysql") || (function.name == "aes_decrypt_mysql") || - (function.name == "tryDecrypt")) - { - /// encrypt('mode', 'plaintext', 'key' [, iv, aad]) - findEncryptionFunctionSecretArguments(); - } - else if (function.name == "url") - { - findURLSecretArguments(); - } - } - - void findMySQLFunctionSecretArguments() - { - if (isNamedCollectionName(0)) - { - /// mysql(named_collection, ..., password = 'password', ...) - findSecretNamedArgument("password", 1); - } - else - { - /// mysql('host:port', 'database', 'table', 'user', 'password', ...) - markSecretArgument(4); - } - } - - /// Returns the number of arguments excluding "headers" and "extra_credentials" (which should - /// always be at the end). Marks "headers" as secret, if found. - size_t excludeS3OrURLNestedMaps() - { - size_t count = arguments->size(); - while (count > 0) - { - const ASTFunction * f = arguments->at(count - 1)->as(); - if (!f) - break; - if (f->name == "headers") - result.nested_maps.push_back(f->name); - else if (f->name != "extra_credentials") - break; - count -= 1; - } - return count; - } - - void findS3FunctionSecretArguments(bool is_cluster_function) - { - /// s3Cluster('cluster_name', 'url', ...) has 'url' as its second argument. - size_t url_arg_idx = is_cluster_function ? 1 : 0; - - if (!is_cluster_function && isNamedCollectionName(0)) - { - /// s3(named_collection, ..., secret_access_key = 'secret_access_key', ...) - findSecretNamedArgument("secret_access_key", 1); - return; - } - - /// We should check other arguments first because we don't need to do any replacement in case of - /// s3('url', NOSIGN, 'format' [, 'compression'] [, extra_credentials(..)] [, headers(..)]) - /// s3('url', 'format', 'structure' [, 'compression'] [, extra_credentials(..)] [, headers(..)]) - size_t count = excludeS3OrURLNestedMaps(); - if ((url_arg_idx + 3 <= count) && (count <= url_arg_idx + 4)) - { - String second_arg; - if (tryGetStringFromArgument(url_arg_idx + 1, &second_arg)) - { - if (boost::iequals(second_arg, "NOSIGN")) - return; /// The argument after 'url' is "NOSIGN". - - if (second_arg == "auto" || KnownFormatNames::instance().exists(second_arg)) - return; /// The argument after 'url' is a format: s3('url', 'format', ...) - } - } - - /// We're going to replace 'aws_secret_access_key' with '[HIDDEN]' for the following signatures: - /// s3('url', 'aws_access_key_id', 'aws_secret_access_key', ...) - /// s3Cluster('cluster_name', 'url', 'aws_access_key_id', 'aws_secret_access_key', 'format', 'compression') - if (url_arg_idx + 2 < count) - markSecretArgument(url_arg_idx + 2); - } - - void findURLSecretArguments() - { - if (!isNamedCollectionName(0)) - excludeS3OrURLNestedMaps(); - } - - bool tryGetStringFromArgument(size_t arg_idx, String * res, bool allow_identifier = true) const - { - if (arg_idx >= arguments->size()) - return false; - - return tryGetStringFromArgument(*(*arguments)[arg_idx], res, allow_identifier); - } - - static bool tryGetStringFromArgument(const IAST & argument, String * res, bool allow_identifier = true) - { - if (const auto * literal = argument.as()) - { - if (literal->value.getType() != Field::Types::String) - return false; - if (res) - *res = literal->value.safeGet(); - return true; - } - - if (allow_identifier) - { - if (const auto * id = argument.as()) - { - if (res) - *res = id->name(); - return true; - } - } - - return false; - } - - void findRemoteFunctionSecretArguments() - { - if (isNamedCollectionName(0)) - { - /// remote(named_collection, ..., password = 'password', ...) - findSecretNamedArgument("password", 1); - return; - } - - /// We're going to replace 'password' with '[HIDDEN'] for the following signatures: - /// remote('addresses_expr', db.table, 'user' [, 'password'] [, sharding_key]) - /// remote('addresses_expr', 'db', 'table', 'user' [, 'password'] [, sharding_key]) - /// remote('addresses_expr', table_function(), 'user' [, 'password'] [, sharding_key]) - - /// But we should check the number of arguments first because we don't need to do any replacements in case of - /// remote('addresses_expr', db.table) - if (arguments->size() < 3) - return; - - size_t arg_num = 1; - - /// Skip 1 or 2 arguments with table_function() or db.table or 'db', 'table'. - const auto * table_function = (*arguments)[arg_num]->as(); - if (table_function && KnownTableFunctionNames::instance().exists(table_function->name)) - { - ++arg_num; - } - else - { - std::optional database; - std::optional qualified_table_name; - if (!tryGetDatabaseNameOrQualifiedTableName(arg_num, database, qualified_table_name)) - { - /// We couldn't evaluate the argument so we don't know whether it is 'db.table' or just 'db'. - /// Hence we can't figure out whether we should skip one argument 'user' or two arguments 'table', 'user' - /// before the argument 'password'. So it's safer to wipe two arguments just in case. - /// The last argument can be also a `sharding_key`, so we need to check that argument is a literal string - /// before wiping it (because the `password` argument is always a literal string). - if (tryGetStringFromArgument(arg_num + 2, nullptr, /* allow_identifier= */ false)) - { - /// Wipe either `password` or `user`. - markSecretArgument(arg_num + 2); - } - if (tryGetStringFromArgument(arg_num + 3, nullptr, /* allow_identifier= */ false)) - { - /// Wipe either `password` or `sharding_key`. - markSecretArgument(arg_num + 3); - } - return; - } - - /// Skip the current argument (which is either a database name or a qualified table name). - ++arg_num; - if (database) - { - /// Skip the 'table' argument if the previous argument was a database name. - ++arg_num; - } - } - - /// Skip username. - ++arg_num; - - /// Do our replacement: - /// remote('addresses_expr', db.table, 'user', 'password', ...) -> remote('addresses_expr', db.table, 'user', '[HIDDEN]', ...) - /// The last argument can be also a `sharding_key`, so we need to check that argument is a literal string - /// before wiping it (because the `password` argument is always a literal string). - bool can_be_password = tryGetStringFromArgument(arg_num, nullptr, /* allow_identifier= */ false); - if (can_be_password) - markSecretArgument(arg_num); - } - - /// Tries to get either a database name or a qualified table name from an argument. - /// Empty string is also allowed (it means the default database). - /// The function is used by findRemoteFunctionSecretArguments() to determine how many arguments to skip before a password. - bool tryGetDatabaseNameOrQualifiedTableName( - size_t arg_idx, - std::optional & res_database, - std::optional & res_qualified_table_name) const - { - res_database.reset(); - res_qualified_table_name.reset(); - - String str; - if (!tryGetStringFromArgument(arg_idx, &str, /* allow_identifier= */ true)) - return false; - - if (str.empty()) - { - res_database = ""; - return true; - } - - auto qualified_table_name = QualifiedTableName::tryParseFromString(str); - if (!qualified_table_name) - return false; - - if (qualified_table_name->database.empty()) - res_database = std::move(qualified_table_name->table); - else - res_qualified_table_name = std::move(qualified_table_name); - return true; - } - - void findEncryptionFunctionSecretArguments() - { - if (arguments->empty()) - return; - - /// We replace all arguments after 'mode' with '[HIDDEN]': - /// encrypt('mode', 'plaintext', 'key' [, iv, aad]) -> encrypt('mode', '[HIDDEN]') - result.start = 1; - result.count = arguments->size() - 1; - } - - void findTableEngineSecretArguments() - { - const String & engine_name = function.name; - if (engine_name == "ExternalDistributed") - { - /// ExternalDistributed('engine', 'host:port', 'database', 'table', 'user', 'password') - findExternalDistributedTableEngineSecretArguments(); - } - else if ((engine_name == "MySQL") || (engine_name == "PostgreSQL") || - (engine_name == "MaterializedPostgreSQL") || (engine_name == "MongoDB")) - { - /// MySQL('host:port', 'database', 'table', 'user', 'password', ...) - /// PostgreSQL('host:port', 'database', 'table', 'user', 'password', ...) - /// MaterializedPostgreSQL('host:port', 'database', 'table', 'user', 'password', ...) - /// MongoDB('host:port', 'database', 'collection', 'user', 'password', ...) - findMySQLFunctionSecretArguments(); - } - else if ((engine_name == "S3") || (engine_name == "COSN") || (engine_name == "OSS") || - (engine_name == "DeltaLake") || (engine_name == "Hudi") || (engine_name == "Iceberg")) - { - /// S3('url', ['aws_access_key_id', 'aws_secret_access_key',] ...) - findS3TableEngineSecretArguments(); - } - else if (engine_name == "URL") - { - findURLSecretArguments(); - } - } - - void findExternalDistributedTableEngineSecretArguments() - { - if (isNamedCollectionName(1)) - { - /// ExternalDistributed('engine', named_collection, ..., password = 'password', ...) - findSecretNamedArgument("password", 2); - } - else - { - /// ExternalDistributed('engine', 'host:port', 'database', 'table', 'user', 'password') - markSecretArgument(5); - } - } - - void findS3TableEngineSecretArguments() - { - if (isNamedCollectionName(0)) - { - /// S3(named_collection, ..., secret_access_key = 'secret_access_key') - findSecretNamedArgument("secret_access_key", 1); - return; - } - - /// We should check other arguments first because we don't need to do any replacement in case of - /// S3('url', NOSIGN, 'format' [, 'compression'] [, extra_credentials(..)] [, headers(..)]) - /// S3('url', 'format', 'compression' [, extra_credentials(..)] [, headers(..)]) - size_t count = excludeS3OrURLNestedMaps(); - if ((3 <= count) && (count <= 4)) - { - String second_arg; - if (tryGetStringFromArgument(1, &second_arg)) - { - if (boost::iequals(second_arg, "NOSIGN")) - return; /// The argument after 'url' is "NOSIGN". - - if (count == 3) - { - if (second_arg == "auto" || KnownFormatNames::instance().exists(second_arg)) - return; /// The argument after 'url' is a format: S3('url', 'format', ...) - } - } - } - - /// We replace 'aws_secret_access_key' with '[HIDDEN]' for the following signatures: - /// S3('url', 'aws_access_key_id', 'aws_secret_access_key') - /// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'format') - /// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'format', 'compression') - if (2 < count) - markSecretArgument(2); - } - - void findDatabaseEngineSecretArguments() - { - const String & engine_name = function.name; - if ((engine_name == "MySQL") || (engine_name == "MaterializeMySQL") || - (engine_name == "MaterializedMySQL") || (engine_name == "PostgreSQL") || - (engine_name == "MaterializedPostgreSQL")) - { - /// MySQL('host:port', 'database', 'user', 'password') - /// PostgreSQL('host:port', 'database', 'user', 'password') - findMySQLDatabaseSecretArguments(); - } - else if (engine_name == "S3") - { - /// S3('url', 'access_key_id', 'secret_access_key') - findS3DatabaseSecretArguments(); - } - } - - void findMySQLDatabaseSecretArguments() - { - if (isNamedCollectionName(0)) - { - /// MySQL(named_collection, ..., password = 'password', ...) - findSecretNamedArgument("password", 1); - } - else - { - /// MySQL('host:port', 'database', 'user', 'password') - markSecretArgument(3); - } - } - - void findS3DatabaseSecretArguments() - { - if (isNamedCollectionName(0)) - { - /// S3(named_collection, ..., secret_access_key = 'password', ...) - findSecretNamedArgument("secret_access_key", 1); - } - else - { - /// S3('url', 'access_key_id', 'secret_access_key') - markSecretArgument(2); - } - } - - void findBackupNameSecretArguments() - { - const String & engine_name = function.name; - if (engine_name == "S3") - { - /// BACKUP ... TO S3(url, [aws_access_key_id, aws_secret_access_key]) - markSecretArgument(2); - } - } - - /// Whether a specified argument can be the name of a named collection? - bool isNamedCollectionName(size_t arg_idx) const - { - if (arguments->size() <= arg_idx) - return false; - - const auto * identifier = (*arguments)[arg_idx]->as(); - return identifier != nullptr; - } - - /// Looks for a secret argument with a specified name. This function looks for arguments in format `key=value` where the key is specified. - void findSecretNamedArgument(const std::string_view & key, size_t start = 0) - { - for (size_t i = start; i < arguments->size(); ++i) - { - const auto & argument = (*arguments)[i]; - const auto * equals_func = argument->as(); - if (!equals_func || (equals_func->name != "equals")) - continue; - - const auto * expr_list = equals_func->arguments->as(); - if (!expr_list) - continue; - - const auto & equal_args = expr_list->children; - if (equal_args.size() != 2) - continue; - - String found_key; - if (!tryGetStringFromArgument(*equal_args[0], &found_key)) - continue; - - if (found_key == key) - markSecretArgument(i, /* argument_is_named= */ true); - } - } - }; -} - - void ASTFunction::appendColumnNameImpl(WriteBuffer & ostr) const { /// These functions contain some unexpected ASTs in arguments (e.g. SETTINGS or even a SELECT query) @@ -813,8 +312,7 @@ void ASTFunction::formatImplWithoutAlias(const FormatSettings & settings, Format /// Should this function to be written as operator? bool written = false; - - if (arguments && !parameters) + if (arguments && !parameters && nulls_action == NullsAction::EMPTY) { /// Unary prefix operators. if (arguments->children.size() == 1) @@ -835,34 +333,37 @@ void ASTFunction::formatImplWithoutAlias(const FormatSettings & settings, Format const auto * literal = arguments->children[0]->as(); const auto * function = arguments->children[0]->as(); - bool negate = name == "negate"; bool is_tuple = literal && literal->value.getType() == Field::Types::Tuple; // do not add parentheses for tuple literal, otherwise extra parens will be added `-((3, 7, 3), 1)` -> `-(((3, 7, 3), 1))` bool literal_need_parens = literal && !is_tuple; + // negate always requires parentheses, otherwise -(-1) will be printed as --1 - bool negate_need_parens = negate && (literal_need_parens || (function && function->name == "negate")); - // We don't need parentheses around a single literal. - bool need_parens = !literal && frame.need_parens && !negate_need_parens; + bool inside_parens = name == "negate" && (literal_need_parens || (function && function->name == "negate")); + + /// We DO need parentheses around a single literal + /// For example, SELECT (NOT 0) + (NOT 0) cannot be transformed into SELECT NOT 0 + NOT 0, since + /// this is equal to SELECT NOT (0 + NOT 0) + bool outside_parens = frame.need_parens && !inside_parens; // do not add extra parentheses for functions inside negate, i.e. -(-toUInt64(-(1))) - if (negate_need_parens) + if (inside_parens) nested_need_parens.need_parens = false; - if (need_parens) + if (outside_parens) settings.ostr << '('; settings.ostr << (settings.hilite ? hilite_operator : "") << func[1] << (settings.hilite ? hilite_none : ""); - if (negate_need_parens) + if (inside_parens) settings.ostr << '('; arguments->formatImpl(settings, state, nested_need_parens); written = true; - if (negate_need_parens) + if (inside_parens) settings.ostr << ')'; - if (need_parens) + if (outside_parens) settings.ostr << ')'; break; @@ -1034,31 +535,39 @@ void ASTFunction::formatImplWithoutAlias(const FormatSettings & settings, Format } } - if (!written && name == "lambda"sv) + const auto & first_argument = arguments->children[0]; + const ASTIdentifier * first_argument_identifier = first_argument->as(); + const ASTFunction * first_argument_function = first_argument->as(); + bool first_argument_is_tuple = first_argument_function && first_argument_function->name == "tuple"; + + /// Only these types of arguments are accepted by the parser of the '->' operator. + bool acceptable_first_argument_for_lambda_expression = first_argument_identifier || first_argument_is_tuple; + + if (!written && name == "lambda"sv && acceptable_first_argument_for_lambda_expression) { /// Special case: zero elements tuple in lhs of lambda is printed as (). /// Special case: one-element tuple in lhs of lambda is printed as its element. + /// If lambda function is not the first element in the list, it has to be put in parentheses. + /// Example: f(x, (y -> z)) should not be printed as f((x, y) -> z). - if (frame.need_parens) + if (frame.need_parens || frame.list_element_index > 0) settings.ostr << '('; - const auto * first_arg_func = arguments->children[0]->as(); - if (first_arg_func - && first_arg_func->name == "tuple" - && first_arg_func->arguments - && (first_arg_func->arguments->children.size() == 1 || first_arg_func->arguments->children.empty())) + if (first_argument_is_tuple + && first_argument_function->arguments + && (first_argument_function->arguments->children.size() == 1 || first_argument_function->arguments->children.empty())) { - if (first_arg_func->arguments->children.size() == 1) - first_arg_func->arguments->children[0]->formatImpl(settings, state, nested_need_parens); + if (first_argument_function->arguments->children.size() == 1) + first_argument_function->arguments->children[0]->formatImpl(settings, state, nested_need_parens); else settings.ostr << "()"; } else - arguments->children[0]->formatImpl(settings, state, nested_need_parens); + first_argument->formatImpl(settings, state, nested_need_parens); settings.ostr << (settings.hilite ? hilite_operator : "") << " -> " << (settings.hilite ? hilite_none : ""); arguments->children[1]->formatImpl(settings, state, nested_need_parens); - if (frame.need_parens) + if (frame.need_parens || frame.list_element_index > 0) settings.ostr << ')'; written = true; } @@ -1185,7 +694,7 @@ void ASTFunction::formatImplWithoutAlias(const FormatSettings & settings, Format FunctionSecretArgumentsFinder::Result secret_arguments; if (!settings.show_secrets) - secret_arguments = FunctionSecretArgumentsFinder{*this}.getResult(); + secret_arguments = FunctionSecretArgumentsFinderAST(*this).getResult(); for (size_t i = 0, size = arguments->children.size(); i < size; ++i) { @@ -1235,6 +744,7 @@ void ASTFunction::formatImplWithoutAlias(const FormatSettings & settings, Format continue; } + nested_dont_need_parens.list_element_index = i; argument->formatImpl(settings, state, nested_dont_need_parens); } } @@ -1249,7 +759,7 @@ void ASTFunction::formatImplWithoutAlias(const FormatSettings & settings, Format bool ASTFunction::hasSecretParts() const { - return (FunctionSecretArgumentsFinder{*this}.getResult().hasSecrets()) || childrenHaveSecretParts(); + return (FunctionSecretArgumentsFinderAST(*this).getResult().hasSecrets()) || childrenHaveSecretParts(); } String getFunctionName(const IAST * ast) diff --git a/src/Parsers/ASTIndexDeclaration.cpp b/src/Parsers/ASTIndexDeclaration.cpp index 12d59681cc3..8dac5389c80 100644 --- a/src/Parsers/ASTIndexDeclaration.cpp +++ b/src/Parsers/ASTIndexDeclaration.cpp @@ -36,7 +36,7 @@ void ASTIndexDeclaration::formatImpl(const FormatSettings & s, FormatState & sta s.ostr << ")"; } else - expr->formatImpl(s, state, frame); + expr->formatImpl(s, state, frame); } else { @@ -59,4 +59,3 @@ void ASTIndexDeclaration::formatImpl(const FormatSettings & s, FormatState & sta } } - diff --git a/src/Parsers/ASTInsertQuery.cpp b/src/Parsers/ASTInsertQuery.cpp index 88e087dd4ee..72a569fe047 100644 --- a/src/Parsers/ASTInsertQuery.cpp +++ b/src/Parsers/ASTInsertQuery.cpp @@ -68,8 +68,14 @@ void ASTInsertQuery::formatImpl(const FormatSettings & settings, FormatState & s } else { - settings.ostr << (settings.hilite ? hilite_none : "") - << (database ? backQuoteIfNeed(getDatabase()) + "." : "") << backQuoteIfNeed(getTable()); + if (database) + { + database->formatImpl(settings, state, frame); + settings.ostr << '.'; + } + + chassert(table); + table->formatImpl(settings, state, frame); } if (columns) diff --git a/src/Parsers/ASTInsertQuery.h b/src/Parsers/ASTInsertQuery.h index 6a4ce078f79..b0f444ed755 100644 --- a/src/Parsers/ASTInsertQuery.h +++ b/src/Parsers/ASTInsertQuery.h @@ -59,11 +59,13 @@ public: if (database) { res->database = database->clone(); res->children.push_back(res->database); } if (table) { res->table = table->clone(); res->children.push_back(res->table); } if (columns) { res->columns = columns->clone(); res->children.push_back(res->columns); } - if (select) { res->select = select->clone(); res->children.push_back(res->select); } - if (watch) { res->watch = watch->clone(); res->children.push_back(res->watch); } if (table_function) { res->table_function = table_function->clone(); res->children.push_back(res->table_function); } if (partition_by) { res->partition_by = partition_by->clone(); res->children.push_back(res->partition_by); } if (settings_ast) { res->settings_ast = settings_ast->clone(); res->children.push_back(res->settings_ast); } + if (select) { res->select = select->clone(); res->children.push_back(res->select); } + if (watch) { res->watch = watch->clone(); res->children.push_back(res->watch); } + if (infile) { res->infile = infile->clone(); res->children.push_back(res->infile); } + if (compression) { res->compression = compression->clone(); res->children.push_back(res->compression); } return res; } diff --git a/src/Parsers/ASTOptimizeQuery.cpp b/src/Parsers/ASTOptimizeQuery.cpp index 173310f7930..397a37586fc 100644 --- a/src/Parsers/ASTOptimizeQuery.cpp +++ b/src/Parsers/ASTOptimizeQuery.cpp @@ -7,8 +7,16 @@ namespace DB void ASTOptimizeQuery::formatQueryImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const { - settings.ostr << (settings.hilite ? hilite_keyword : "") << "OPTIMIZE TABLE " << (settings.hilite ? hilite_none : "") - << (database ? backQuoteIfNeed(getDatabase()) + "." : "") << backQuoteIfNeed(getTable()); + settings.ostr << (settings.hilite ? hilite_keyword : "") << "OPTIMIZE TABLE " << (settings.hilite ? hilite_none : ""); + + if (database) + { + database->formatImpl(settings, state, frame); + settings.ostr << '.'; + } + + chassert(table); + table->formatImpl(settings, state, frame); formatOnCluster(settings); diff --git a/src/Parsers/ASTQueryWithOutput.cpp b/src/Parsers/ASTQueryWithOutput.cpp index 4bf1e6cb231..c57aa759969 100644 --- a/src/Parsers/ASTQueryWithOutput.cpp +++ b/src/Parsers/ASTQueryWithOutput.cpp @@ -23,6 +23,16 @@ void ASTQueryWithOutput::cloneOutputOptions(ASTQueryWithOutput & cloned) const cloned.settings_ast = settings_ast->clone(); cloned.children.push_back(cloned.settings_ast); } + if (compression) + { + cloned.compression = compression->clone(); + cloned.children.push_back(cloned.compression); + } + if (compression_level) + { + cloned.compression_level = compression_level->clone(); + cloned.children.push_back(cloned.compression_level); + } } void ASTQueryWithOutput::formatImpl(const FormatSettings & s, FormatState & state, FormatStateStacked frame) const @@ -64,9 +74,23 @@ bool ASTQueryWithOutput::resetOutputASTIfExist(IAST & ast) /// FIXME: try to prettify this cast using `as<>()` if (auto * ast_with_output = dynamic_cast(&ast)) { - ast_with_output->format.reset(); - ast_with_output->out_file.reset(); - ast_with_output->settings_ast.reset(); + auto remove_if_exists = [&](ASTPtr & p) + { + if (p) + { + if (auto * it = std::find(ast_with_output->children.begin(), ast_with_output->children.end(), p); + it != ast_with_output->children.end()) + ast_with_output->children.erase(it); + p.reset(); + } + }; + + remove_if_exists(ast_with_output->out_file); + remove_if_exists(ast_with_output->format); + remove_if_exists(ast_with_output->settings_ast); + remove_if_exists(ast_with_output->compression); + remove_if_exists(ast_with_output->compression_level); + return true; } diff --git a/src/Parsers/ASTQueryWithTableAndOutput.cpp b/src/Parsers/ASTQueryWithTableAndOutput.cpp index a216aeaa11f..85bfe2ae80f 100644 --- a/src/Parsers/ASTQueryWithTableAndOutput.cpp +++ b/src/Parsers/ASTQueryWithTableAndOutput.cpp @@ -64,11 +64,5 @@ void ASTQueryWithTableAndOutput::cloneTableOptions(ASTQueryWithTableAndOutput & cloned.children.push_back(cloned.table); } } -void ASTQueryWithTableAndOutput::formatHelper(const FormatSettings & settings, const char * name) const -{ - settings.ostr << (settings.hilite ? hilite_keyword : "") << name << " " << (settings.hilite ? hilite_none : ""); - settings.ostr << (database ? backQuoteIfNeed(getDatabase()) + "." : "") << backQuoteIfNeed(getTable()); -} } - diff --git a/src/Parsers/ASTQueryWithTableAndOutput.h b/src/Parsers/ASTQueryWithTableAndOutput.h index 10f0e76f29c..b0227d68672 100644 --- a/src/Parsers/ASTQueryWithTableAndOutput.h +++ b/src/Parsers/ASTQueryWithTableAndOutput.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include #include @@ -28,9 +29,6 @@ public: void setTable(const String & name); void cloneTableOptions(ASTQueryWithTableAndOutput & cloned) const; - -protected: - void formatHelper(const FormatSettings & settings, const char * name) const; }; @@ -52,9 +50,20 @@ public: QueryKind getQueryKind() const override { return QueryKind::Show; } protected: - void formatQueryImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override + void formatQueryImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override { - formatHelper(settings, temporary ? AstIDAndQueryNames::QueryTemporary : AstIDAndQueryNames::Query); + settings.ostr << (settings.hilite ? hilite_keyword : "") + << (temporary ? AstIDAndQueryNames::QueryTemporary : AstIDAndQueryNames::Query) + << " " << (settings.hilite ? hilite_none : ""); + + if (database) + { + database->formatImpl(settings, state, frame); + settings.ostr << '.'; + } + + chassert(table != nullptr, "Table is empty for the ASTQueryWithTableAndOutputImpl."); + table->formatImpl(settings, state, frame); } }; diff --git a/src/Parsers/ASTRenameQuery.h b/src/Parsers/ASTRenameQuery.h index 5d07cb976af..beaf93c4761 100644 --- a/src/Parsers/ASTRenameQuery.h +++ b/src/Parsers/ASTRenameQuery.h @@ -45,7 +45,6 @@ public: }; using Elements = std::vector; - Elements elements; bool exchange{false}; /// For EXCHANGE TABLES bool database{false}; /// For RENAME DATABASE @@ -54,12 +53,48 @@ public: /// Special flag for CREATE OR REPLACE. Do not throw if the second table does not exist. bool rename_if_cannot_exchange{false}; + explicit ASTRenameQuery(Elements elements_ = {}) + : elements(std::move(elements_)) + { + for (const auto & elem : elements) + { + if (elem.from.database) + children.push_back(elem.from.database); + if (elem.from.table) + children.push_back(elem.from.table); + if (elem.to.database) + children.push_back(elem.to.database); + if (elem.to.table) + children.push_back(elem.to.table); + } + } + + void setDatabaseIfNotExists(const String & database_name) + { + for (auto & elem : elements) + { + if (!elem.from.database) + { + elem.from.database = std::make_shared(database_name); + children.push_back(elem.from.database); + } + if (!elem.to.database) + { + elem.to.database = std::make_shared(database_name); + children.push_back(elem.to.database); + } + } + } + + const Elements & getElements() const { return elements; } + /** Get the text that identifies this element. */ String getID(char) const override { return "Rename"; } ASTPtr clone() const override { auto res = std::make_shared(*this); + res->cloneChildren(); cloneOutputOptions(*res); return res; } @@ -84,7 +119,7 @@ public: QueryKind getQueryKind() const override { return QueryKind::Rename; } protected: - void formatQueryImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override + void formatQueryImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override { if (database) { @@ -93,9 +128,9 @@ protected: if (elements.at(0).if_exists) settings.ostr << (settings.hilite ? hilite_keyword : "") << "IF EXISTS " << (settings.hilite ? hilite_none : ""); - settings.ostr << backQuoteIfNeed(elements.at(0).from.getDatabase()); + elements.at(0).from.database->formatImpl(settings, state, frame); settings.ostr << (settings.hilite ? hilite_keyword : "") << " TO " << (settings.hilite ? hilite_none : ""); - settings.ostr << backQuoteIfNeed(elements.at(0).to.getDatabase()); + elements.at(0).to.database->formatImpl(settings, state, frame); formatOnCluster(settings); return; } @@ -119,13 +154,34 @@ protected: if (it->if_exists) settings.ostr << (settings.hilite ? hilite_keyword : "") << "IF EXISTS " << (settings.hilite ? hilite_none : ""); - settings.ostr << (it->from.database ? backQuoteIfNeed(it->from.getDatabase()) + "." : "") << backQuoteIfNeed(it->from.getTable()) - << (settings.hilite ? hilite_keyword : "") << (exchange ? " AND " : " TO ") << (settings.hilite ? hilite_none : "") - << (it->to.database ? backQuoteIfNeed(it->to.getDatabase()) + "." : "") << backQuoteIfNeed(it->to.getTable()); + + + if (it->from.database) + { + it->from.database->formatImpl(settings, state, frame); + settings.ostr << '.'; + } + + chassert(it->from.table); + it->from.table->formatImpl(settings, state, frame); + + settings.ostr << (settings.hilite ? hilite_keyword : "") << (exchange ? " AND " : " TO ") << (settings.hilite ? hilite_none : ""); + + if (it->to.database) + { + it->to.database->formatImpl(settings, state, frame); + settings.ostr << '.'; + } + + chassert(it->to.table); + it->to.table->formatImpl(settings, state, frame); + } formatOnCluster(settings); } + + Elements elements; }; } diff --git a/src/Parsers/ASTSQLSecurity.cpp b/src/Parsers/ASTSQLSecurity.cpp new file mode 100644 index 00000000000..d6f1c21d035 --- /dev/null +++ b/src/Parsers/ASTSQLSecurity.cpp @@ -0,0 +1,39 @@ + +#include +#include + +namespace DB +{ + +void ASTSQLSecurity::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const +{ + if (!type.has_value()) + return; + + if (definer || is_definer_current_user) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << "DEFINER" << (settings.hilite ? hilite_none : ""); + settings.ostr << " = "; + if (definer) + definer->formatImpl(settings, state, frame); + else + settings.ostr << "CURRENT_USER"; + settings.ostr << " "; + } + + settings.ostr << (settings.hilite ? hilite_keyword : "") << "SQL SECURITY" << (settings.hilite ? hilite_none : ""); + switch (*type) + { + case SQLSecurityType::INVOKER: + settings.ostr << " INVOKER"; + break; + case SQLSecurityType::DEFINER: + settings.ostr << " DEFINER"; + break; + case SQLSecurityType::NONE: + settings.ostr << " NONE"; + break; + } +} + +} diff --git a/src/Parsers/ASTSQLSecurity.h b/src/Parsers/ASTSQLSecurity.h new file mode 100644 index 00000000000..47fd8752a67 --- /dev/null +++ b/src/Parsers/ASTSQLSecurity.h @@ -0,0 +1,26 @@ +#pragma once + +#include +#include + + +namespace DB +{ + +/// DEFINER = SQL SECURITY +/// If type was not set during parsing, the default type from settings will be used. +/// Currently supports only views. +class ASTSQLSecurity : public IAST +{ +public: + bool is_definer_current_user{false}; + std::shared_ptr definer = nullptr; + std::optional type = std::nullopt; + + String getID(char) const override { return "View SQL Security"; } + ASTPtr clone() const override { return std::make_shared(*this); } + + void formatImpl(const FormatSettings & s, FormatState & state, FormatStateStacked frame) const override; +}; + +} diff --git a/src/Parsers/ASTSelectQuery.cpp b/src/Parsers/ASTSelectQuery.cpp index 2115de1c124..586477e1cfd 100644 --- a/src/Parsers/ASTSelectQuery.cpp +++ b/src/Parsers/ASTSelectQuery.cpp @@ -108,12 +108,6 @@ void ASTSelectQuery::formatImpl(const FormatSettings & s, FormatState & state, F if (group_by_all) s.ostr << (s.hilite ? hilite_keyword : "") << s.nl_or_ws << indent_str << "GROUP BY ALL" << (s.hilite ? hilite_none : ""); - if (group_by_with_rollup) - s.ostr << (s.hilite ? hilite_keyword : "") << s.nl_or_ws << indent_str << (s.one_line ? "" : " ") << "WITH ROLLUP" << (s.hilite ? hilite_none : ""); - - if (group_by_with_cube) - s.ostr << (s.hilite ? hilite_keyword : "") << s.nl_or_ws << indent_str << (s.one_line ? "" : " ") << "WITH CUBE" << (s.hilite ? hilite_none : ""); - if (group_by_with_grouping_sets && groupBy()) { auto nested_frame = frame; @@ -128,6 +122,12 @@ void ASTSelectQuery::formatImpl(const FormatSettings & s, FormatState & state, F s.ostr << ")"; } + if (group_by_with_rollup) + s.ostr << (s.hilite ? hilite_keyword : "") << s.nl_or_ws << indent_str << (s.one_line ? "" : " ") << "WITH ROLLUP" << (s.hilite ? hilite_none : ""); + + if (group_by_with_cube) + s.ostr << (s.hilite ? hilite_keyword : "") << s.nl_or_ws << indent_str << (s.one_line ? "" : " ") << "WITH CUBE" << (s.hilite ? hilite_none : ""); + if (group_by_with_totals) s.ostr << (s.hilite ? hilite_keyword : "") << s.nl_or_ws << indent_str << (s.one_line ? "" : " ") << "WITH TOTALS" << (s.hilite ? hilite_none : ""); diff --git a/src/Parsers/ASTSelectWithUnionQuery.cpp b/src/Parsers/ASTSelectWithUnionQuery.cpp index 48b4ae3c38d..cf72358dea7 100644 --- a/src/Parsers/ASTSelectWithUnionQuery.cpp +++ b/src/Parsers/ASTSelectWithUnionQuery.cpp @@ -63,18 +63,12 @@ void ASTSelectWithUnionQuery::formatQueryImpl(const FormatSettings & settings, F if (auto * node = (*it)->as()) { - settings.ostr << settings.nl_or_ws << indent_str; + if (it != list_of_selects->children.begin()) + settings.ostr << settings.nl_or_ws; - if (node->list_of_selects->children.size() == 1) - { - (node->list_of_selects->children.at(0))->formatImpl(settings, state, frame); - } - else - { - auto sub_query = std::make_shared(); - sub_query->children.push_back(*it); - sub_query->formatImpl(settings, state, frame); - } + settings.ostr << indent_str; + auto sub_query = std::make_shared(*it); + sub_query->formatImpl(settings, state, frame); } else { diff --git a/src/Parsers/ASTSetQuery.cpp b/src/Parsers/ASTSetQuery.cpp index d41e9aba4cc..94b44ed2fa7 100644 --- a/src/Parsers/ASTSetQuery.cpp +++ b/src/Parsers/ASTSetQuery.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include @@ -106,7 +107,7 @@ void ASTSetQuery::formatImpl(const FormatSettings & format, FormatState &, Forma first = false; formatSettingName(QUERY_PARAMETER_NAME_PREFIX + name, format.ostr); - format.ostr << " = " << value; + format.ostr << " = " << quoteString(value); } } diff --git a/src/Parsers/ASTSetQuery.h b/src/Parsers/ASTSetQuery.h index 944f08dcbaa..42d63944b4f 100644 --- a/src/Parsers/ASTSetQuery.h +++ b/src/Parsers/ASTSetQuery.h @@ -25,7 +25,7 @@ public: SettingsChanges changes; /// settings that will be reset to default value std::vector default_settings; - NameToNameMap query_parameters; + NameToNameVector query_parameters; /** Get the text that identifies this element. */ String getID(char) const override { return "Set"; } diff --git a/src/Parsers/ASTSubquery.h b/src/Parsers/ASTSubquery.h index ef277a63126..c48307ed68c 100644 --- a/src/Parsers/ASTSubquery.h +++ b/src/Parsers/ASTSubquery.h @@ -26,6 +26,13 @@ public: return clone; } + ASTSubquery() = default; + + explicit ASTSubquery(ASTPtr child) + { + children.emplace_back(std::move(child)); + } + void updateTreeHashImpl(SipHash & hash_state, bool ignore_aliases) const override; String getAliasOrColumnName() const override; String tryGetAlias() const override; diff --git a/src/Parsers/ASTSystemQuery.cpp b/src/Parsers/ASTSystemQuery.cpp index fc4ecf4763a..effc7207793 100644 --- a/src/Parsers/ASTSystemQuery.cpp +++ b/src/Parsers/ASTSystemQuery.cpp @@ -7,9 +7,15 @@ #include + namespace DB { +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + namespace { std::vector getTypeIndexToTypeName() @@ -85,7 +91,7 @@ void ASTSystemQuery::setTable(const String & name) } } -void ASTSystemQuery::formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const +void ASTSystemQuery::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const { auto print_identifier = [&](const String & identifier) -> WriteBuffer & { @@ -104,9 +110,12 @@ void ASTSystemQuery::formatImpl(const FormatSettings & settings, FormatState &, { if (database) { - print_identifier(getDatabase()) << "."; + database->formatImpl(settings, state, frame); + settings.ostr << '.'; } - print_identifier(getTable()); + + chassert(table); + table->formatImpl(settings, state, frame); return settings.ostr; }; @@ -144,185 +153,275 @@ void ASTSystemQuery::formatImpl(const FormatSettings & settings, FormatState &, if (!cluster.empty()) formatOnCluster(settings); - if ( type == Type::STOP_MERGES - || type == Type::START_MERGES - || type == Type::STOP_TTL_MERGES - || type == Type::START_TTL_MERGES - || type == Type::STOP_MOVES - || type == Type::START_MOVES - || type == Type::STOP_FETCHES - || type == Type::START_FETCHES - || type == Type::STOP_REPLICATED_SENDS - || type == Type::START_REPLICATED_SENDS - || type == Type::STOP_REPLICATION_QUEUES - || type == Type::START_REPLICATION_QUEUES - || type == Type::STOP_DISTRIBUTED_SENDS - || type == Type::START_DISTRIBUTED_SENDS - || type == Type::STOP_PULLING_REPLICATION_LOG - || type == Type::START_PULLING_REPLICATION_LOG - || type == Type::STOP_CLEANUP - || type == Type::START_CLEANUP) + switch (type) { - if (table) + case Type::STOP_MERGES: + case Type::START_MERGES: + case Type::STOP_TTL_MERGES: + case Type::START_TTL_MERGES: + case Type::STOP_MOVES: + case Type::START_MOVES: + case Type::STOP_FETCHES: + case Type::START_FETCHES: + case Type::STOP_REPLICATED_SENDS: + case Type::START_REPLICATED_SENDS: + case Type::STOP_REPLICATION_QUEUES: + case Type::START_REPLICATION_QUEUES: + case Type::STOP_DISTRIBUTED_SENDS: + case Type::START_DISTRIBUTED_SENDS: + case Type::STOP_PULLING_REPLICATION_LOG: + case Type::START_PULLING_REPLICATION_LOG: + case Type::STOP_CLEANUP: + case Type::START_CLEANUP: { - settings.ostr << ' '; - print_database_table(); - } - else if (!volume.empty()) - print_on_volume(); - } - else if ( type == Type::RESTART_REPLICA - || type == Type::RESTORE_REPLICA - || type == Type::SYNC_REPLICA - || type == Type::WAIT_LOADING_PARTS - || type == Type::FLUSH_DISTRIBUTED - || type == Type::RELOAD_DICTIONARY - || type == Type::RELOAD_MODEL - || type == Type::RELOAD_FUNCTION - || type == Type::RESTART_DISK - || type == Type::DROP_DISK_METADATA_CACHE) - { - if (table) - { - settings.ostr << ' '; - print_database_table(); - } - else if (!target_model.empty()) - { - settings.ostr << ' '; - print_identifier(target_model); - } - else if (!target_function.empty()) - { - settings.ostr << ' '; - print_identifier(target_function); - } - else if (!disk.empty()) - { - settings.ostr << ' '; - print_identifier(disk); - } - - if (sync_replica_mode != SyncReplicaMode::DEFAULT) - { - settings.ostr << ' '; - print_keyword(magic_enum::enum_name(sync_replica_mode)); - - // If the mode is LIGHTWEIGHT and specific source replicas are specified - if (sync_replica_mode == SyncReplicaMode::LIGHTWEIGHT && !src_replicas.empty()) + if (table) { settings.ostr << ' '; - print_keyword("FROM"); + print_database_table(); + } + else if (!volume.empty()) + { + print_on_volume(); + } + break; + } + case Type::RESTART_REPLICA: + case Type::RESTORE_REPLICA: + case Type::SYNC_REPLICA: + case Type::WAIT_LOADING_PARTS: + case Type::FLUSH_DISTRIBUTED: + case Type::RELOAD_DICTIONARY: + case Type::RELOAD_MODEL: + case Type::RELOAD_FUNCTION: + case Type::RESTART_DISK: + case Type::DROP_DISK_METADATA_CACHE: + { + if (table) + { settings.ostr << ' '; + print_database_table(); + } + else if (!target_model.empty()) + { + settings.ostr << ' '; + print_identifier(target_model); + } + else if (!target_function.empty()) + { + settings.ostr << ' '; + print_identifier(target_function); + } + else if (!disk.empty()) + { + settings.ostr << ' '; + print_identifier(disk); + } - for (auto it = src_replicas.begin(); it != src_replicas.end(); ++it) + if (sync_replica_mode != SyncReplicaMode::DEFAULT) + { + settings.ostr << ' '; + print_keyword(magic_enum::enum_name(sync_replica_mode)); + + // If the mode is LIGHTWEIGHT and specific source replicas are specified + if (sync_replica_mode == SyncReplicaMode::LIGHTWEIGHT && !src_replicas.empty()) { - print_identifier(*it); + settings.ostr << ' '; + print_keyword("FROM"); + settings.ostr << ' '; - // Add a comma and space after each identifier, except the last one - if (std::next(it) != src_replicas.end()) - settings.ostr << ", "; + bool first = true; + for (const auto & src : src_replicas) + { + if (!first) + settings.ostr << ", "; + first = false; + settings.ostr << quoteString(src); + } } } + break; } - } - else if (type == Type::SYNC_DATABASE_REPLICA) - { - settings.ostr << ' '; - print_identifier(database->as()->name()); - } - else if (type == Type::DROP_REPLICA || type == Type::DROP_DATABASE_REPLICA) - { - print_drop_replica(); - } - else if (type == Type::SUSPEND) - { - print_keyword(" FOR ") << seconds; - print_keyword(" SECOND"); - } - else if (type == Type::DROP_FORMAT_SCHEMA_CACHE) - { - if (!schema_cache_format.empty()) - { - print_keyword(" FOR "); - print_identifier(schema_cache_format); - } - } - else if (type == Type::DROP_FILESYSTEM_CACHE) - { - if (!filesystem_cache_name.empty()) + case Type::SYNC_DATABASE_REPLICA: { settings.ostr << ' '; - print_identifier(filesystem_cache_name); - if (!key_to_drop.empty()) + print_identifier(database->as()->name()); + break; + } + case Type::DROP_REPLICA: + case Type::DROP_DATABASE_REPLICA: + { + print_drop_replica(); + break; + } + case Type::SUSPEND: + { + print_keyword(" FOR ") << seconds; + print_keyword(" SECOND"); + break; + } + case Type::DROP_FORMAT_SCHEMA_CACHE: + { + if (!schema_cache_format.empty()) { - print_keyword(" KEY "); - print_identifier(key_to_drop); - if (offset_to_drop.has_value()) + print_keyword(" FOR "); + print_identifier(schema_cache_format); + } + break; + } + case Type::DROP_FILESYSTEM_CACHE: + { + if (!filesystem_cache_name.empty()) + { + settings.ostr << ' ' << quoteString(filesystem_cache_name); + if (!key_to_drop.empty()) { - print_keyword(" OFFSET "); - settings.ostr << offset_to_drop.value(); + print_keyword(" KEY "); + print_identifier(key_to_drop); + if (offset_to_drop.has_value()) + { + print_keyword(" OFFSET "); + settings.ostr << offset_to_drop.value(); + } } } + break; } - } - else if (type == Type::DROP_SCHEMA_CACHE) - { - if (!schema_cache_storage.empty()) + case Type::DROP_SCHEMA_CACHE: { - print_keyword(" FOR "); - print_identifier(schema_cache_storage); - } - } - else if (type == Type::UNFREEZE) - { - print_keyword(" WITH NAME "); - settings.ostr << quoteString(backup_name); - } - else if (type == Type::START_LISTEN || type == Type::STOP_LISTEN) - { - settings.ostr << ' '; - print_keyword(ServerType::serverTypeToString(server_type.type)); - - if (server_type.type == ServerType::Type::CUSTOM) - settings.ostr << ' ' << quoteString(server_type.custom_name); - - bool comma = false; - - if (!server_type.exclude_types.empty()) - { - print_keyword(" EXCEPT"); - - for (auto cur_type : server_type.exclude_types) + if (!schema_cache_storage.empty()) { - if (cur_type == ServerType::Type::CUSTOM) - continue; - - if (comma) - settings.ostr << ','; - else - comma = true; - - settings.ostr << ' '; - print_keyword(ServerType::serverTypeToString(cur_type)); + print_keyword(" FOR "); + print_identifier(schema_cache_storage); } + break; + } + case Type::UNFREEZE: + { + print_keyword(" WITH NAME "); + settings.ostr << quoteString(backup_name); + break; + } + case Type::START_LISTEN: + case Type::STOP_LISTEN: + { + settings.ostr << ' '; + print_keyword(ServerType::serverTypeToString(server_type.type)); - if (server_type.exclude_types.contains(ServerType::Type::CUSTOM)) + if (server_type.type == ServerType::Type::CUSTOM) + settings.ostr << ' ' << quoteString(server_type.custom_name); + + bool comma = false; + + if (!server_type.exclude_types.empty()) { - for (const auto & cur_name : server_type.exclude_custom_names) + print_keyword(" EXCEPT"); + + for (auto cur_type : server_type.exclude_types) { + if (cur_type == ServerType::Type::CUSTOM) + continue; + if (comma) settings.ostr << ','; else comma = true; settings.ostr << ' '; - print_keyword(ServerType::serverTypeToString(ServerType::Type::CUSTOM)); - settings.ostr << " " << quoteString(cur_name); + print_keyword(ServerType::serverTypeToString(cur_type)); + } + + if (server_type.exclude_types.contains(ServerType::Type::CUSTOM)) + { + for (const auto & cur_name : server_type.exclude_custom_names) + { + if (comma) + settings.ostr << ','; + else + comma = true; + + settings.ostr << ' '; + print_keyword(ServerType::serverTypeToString(ServerType::Type::CUSTOM)); + settings.ostr << " " << quoteString(cur_name); + } } } + break; } + case Type::ENABLE_FAILPOINT: + case Type::DISABLE_FAILPOINT: + { + settings.ostr << ' '; + print_identifier(fail_point_name); + break; + } + case Type::REFRESH_VIEW: + case Type::START_VIEW: + case Type::STOP_VIEW: + case Type::CANCEL_VIEW: + { + settings.ostr << ' '; + print_database_table(); + break; + } + case Type::TEST_VIEW: + { + settings.ostr << ' '; + print_database_table(); + if (!fake_time_for_view) + { + settings.ostr << ' '; + print_keyword("UNSET FAKE TIME"); + } + else + { + settings.ostr << ' '; + print_keyword("SET FAKE TIME"); + settings.ostr << " '" << LocalDateTime(*fake_time_for_view) << "'"; + } + break; + } + case Type::KILL: + case Type::SHUTDOWN: + case Type::DROP_DNS_CACHE: + case Type::DROP_CONNECTIONS_CACHE: + case Type::DROP_MMAP_CACHE: + case Type::DROP_QUERY_CACHE: + case Type::DROP_MARK_CACHE: + case Type::DROP_INDEX_MARK_CACHE: + case Type::DROP_UNCOMPRESSED_CACHE: + case Type::DROP_INDEX_UNCOMPRESSED_CACHE: + case Type::DROP_COMPILED_EXPRESSION_CACHE: + case Type::DROP_S3_CLIENT_CACHE: + case Type::RESET_COVERAGE: + case Type::RESTART_REPLICAS: + case Type::JEMALLOC_PURGE: + case Type::JEMALLOC_ENABLE_PROFILE: + case Type::JEMALLOC_DISABLE_PROFILE: + case Type::JEMALLOC_FLUSH_PROFILE: + case Type::SYNC_TRANSACTION_LOG: + case Type::SYNC_FILE_CACHE: + case Type::SYNC_FILESYSTEM_CACHE: + case Type::REPLICA_READY: /// Obsolete + case Type::REPLICA_UNREADY: /// Obsolete + case Type::RELOAD_DICTIONARIES: + case Type::RELOAD_EMBEDDED_DICTIONARIES: + case Type::RELOAD_MODELS: + case Type::RELOAD_FUNCTIONS: + case Type::RELOAD_CONFIG: + case Type::RELOAD_USERS: + case Type::RELOAD_ASYNCHRONOUS_METRICS: + case Type::FLUSH_LOGS: + case Type::FLUSH_ASYNC_INSERT_QUEUE: + case Type::START_THREAD_FUZZER: + case Type::STOP_THREAD_FUZZER: + case Type::START_VIEWS: + case Type::STOP_VIEWS: + case Type::DROP_PAGE_CACHE: + break; + case Type::UNKNOWN: + case Type::END: + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown SYSTEM command"); } } diff --git a/src/Parsers/ASTSystemQuery.h b/src/Parsers/ASTSystemQuery.h index a74275f1638..70a9e27178d 100644 --- a/src/Parsers/ASTSystemQuery.h +++ b/src/Parsers/ASTSystemQuery.h @@ -22,22 +22,20 @@ public: KILL, SUSPEND, DROP_DNS_CACHE, + DROP_CONNECTIONS_CACHE, DROP_MARK_CACHE, DROP_UNCOMPRESSED_CACHE, DROP_INDEX_MARK_CACHE, DROP_INDEX_UNCOMPRESSED_CACHE, DROP_MMAP_CACHE, DROP_QUERY_CACHE, -#if USE_EMBEDDED_COMPILER DROP_COMPILED_EXPRESSION_CACHE, -#endif DROP_FILESYSTEM_CACHE, DROP_DISK_METADATA_CACHE, + DROP_PAGE_CACHE, DROP_SCHEMA_CACHE, DROP_FORMAT_SCHEMA_CACHE, -#if USE_AWS_S3 DROP_S3_CLIENT_CACHE, -#endif STOP_LISTEN, START_LISTEN, RESTART_REPLICAS, @@ -46,12 +44,10 @@ public: WAIT_LOADING_PARTS, DROP_REPLICA, DROP_DATABASE_REPLICA, -#if USE_JEMALLOC JEMALLOC_PURGE, JEMALLOC_ENABLE_PROFILE, JEMALLOC_DISABLE_PROFILE, JEMALLOC_FLUSH_PROFILE, -#endif SYNC_REPLICA, SYNC_DATABASE_REPLICA, SYNC_TRANSACTION_LOG, @@ -145,7 +141,7 @@ public: SyncReplicaMode sync_replica_mode = SyncReplicaMode::DEFAULT; - std::unordered_set src_replicas; + std::vector src_replicas; ServerType server_type; diff --git a/src/Parsers/ASTUndropQuery.cpp b/src/Parsers/ASTUndropQuery.cpp index 0b8a18b12c9..7212e264c0e 100644 --- a/src/Parsers/ASTUndropQuery.cpp +++ b/src/Parsers/ASTUndropQuery.cpp @@ -19,18 +19,26 @@ ASTPtr ASTUndropQuery::clone() const return res; } -void ASTUndropQuery::formatQueryImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const +void ASTUndropQuery::formatQueryImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const { - settings.ostr << (settings.hilite ? hilite_keyword : ""); - settings.ostr << "UNDROP "; - settings.ostr << "TABLE "; - settings.ostr << (settings.hilite ? hilite_none : ""); + settings.ostr << (settings.hilite ? hilite_keyword : "") + << "UNDROP TABLE" + << (settings.hilite ? hilite_none : "") + << " "; - assert (table); - if (!database) - settings.ostr << backQuoteIfNeed(getTable()); - else - settings.ostr << backQuoteIfNeed(getDatabase()) + "." << backQuoteIfNeed(getTable()); + chassert(table); + + if (table) + { + if (database) + { + database->formatImpl(settings, state, frame); + settings.ostr << '.'; + } + + chassert(table); + table->formatImpl(settings, state, frame); + } if (uuid != UUIDHelpers::Nil) settings.ostr << (settings.hilite ? hilite_keyword : "") << " UUID " << (settings.hilite ? hilite_none : "") diff --git a/src/Parsers/ASTWatchQuery.h b/src/Parsers/ASTWatchQuery.h index 156fe6828bc..a5b76c07605 100644 --- a/src/Parsers/ASTWatchQuery.h +++ b/src/Parsers/ASTWatchQuery.h @@ -40,22 +40,30 @@ public: QueryKind getQueryKind() const override { return QueryKind::Create; } protected: - void formatQueryImpl(const FormatSettings & s, FormatState & state, FormatStateStacked frame) const override + void formatQueryImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override { - std::string indent_str = s.one_line ? "" : std::string(4 * frame.indent, ' '); + std::string indent_str = settings.one_line ? "" : std::string(4 * frame.indent, ' '); - s.ostr << (s.hilite ? hilite_keyword : "") << "WATCH " << (s.hilite ? hilite_none : "") - << (database ? backQuoteIfNeed(getDatabase()) + "." : "") << backQuoteIfNeed(getTable()); + settings.ostr << (settings.hilite ? hilite_keyword : "") << "WATCH " << (settings.hilite ? hilite_none : ""); + + if (database) + { + database->formatImpl(settings, state, frame); + settings.ostr << '.'; + } + + chassert(table); + table->formatImpl(settings, state, frame); if (is_watch_events) { - s.ostr << " " << (s.hilite ? hilite_keyword : "") << "EVENTS" << (s.hilite ? hilite_none : ""); + settings.ostr << " " << (settings.hilite ? hilite_keyword : "") << "EVENTS" << (settings.hilite ? hilite_none : ""); } if (limit_length) { - s.ostr << (s.hilite ? hilite_keyword : "") << s.nl_or_ws << indent_str << "LIMIT " << (s.hilite ? hilite_none : ""); - limit_length->formatImpl(s, state, frame); + settings.ostr << (settings.hilite ? hilite_keyword : "") << settings.nl_or_ws << indent_str << "LIMIT " << (settings.hilite ? hilite_none : ""); + limit_length->formatImpl(settings, state, frame); } } }; diff --git a/src/Parsers/ASTWindowDefinition.cpp b/src/Parsers/ASTWindowDefinition.cpp index 29e42de3ce0..21c44a166af 100644 --- a/src/Parsers/ASTWindowDefinition.cpp +++ b/src/Parsers/ASTWindowDefinition.cpp @@ -94,9 +94,9 @@ void ASTWindowDefinition::formatImpl(const FormatSettings & settings, if (!frame_is_default) { if (need_space) - { settings.ostr << " "; - } + + format_frame.need_parens = true; settings.ostr << frame_type << " BETWEEN "; if (frame_begin_type == WindowFrame::BoundaryType::Current) diff --git a/src/Parsers/ASTWindowDefinition.h b/src/Parsers/ASTWindowDefinition.h index 507825f11d2..72ad8f6abda 100644 --- a/src/Parsers/ASTWindowDefinition.h +++ b/src/Parsers/ASTWindowDefinition.h @@ -41,7 +41,6 @@ struct ASTWindowListElement : public IAST // ASTWindowDefinition ASTPtr definition; - ASTPtr clone() const override; String getID(char delimiter) const override; diff --git a/src/Parsers/Access/ASTCreateSettingsProfileQuery.cpp b/src/Parsers/Access/ASTCreateSettingsProfileQuery.cpp index 3379486d789..e1b42bfb33d 100644 --- a/src/Parsers/Access/ASTCreateSettingsProfileQuery.cpp +++ b/src/Parsers/Access/ASTCreateSettingsProfileQuery.cpp @@ -17,7 +17,7 @@ namespace { if (std::exchange(need_comma, true)) settings.ostr << ", "; - settings.ostr << backQuoteIfNeed(name); + settings.ostr << backQuote(name); } } diff --git a/src/Parsers/Access/ASTCreateUserQuery.cpp b/src/Parsers/Access/ASTCreateUserQuery.cpp index 96306fa0dd2..02735568a04 100644 --- a/src/Parsers/Access/ASTCreateUserQuery.cpp +++ b/src/Parsers/Access/ASTCreateUserQuery.cpp @@ -18,7 +18,6 @@ namespace << quoteString(new_name); } - void formatAuthenticationData(const ASTAuthenticationData & auth_data, const IAST::FormatSettings & settings) { auth_data.format(settings); diff --git a/src/Parsers/Access/ASTGrantQuery.cpp b/src/Parsers/Access/ASTGrantQuery.cpp index 331f1798d77..f60fa7e4a23 100644 --- a/src/Parsers/Access/ASTGrantQuery.cpp +++ b/src/Parsers/Access/ASTGrantQuery.cpp @@ -93,6 +93,29 @@ namespace if (no_output) settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << "USAGE ON " << (settings.hilite ? IAST::hilite_none : "") << "*.*"; } + + + void formatCurrentGrantsElements(const AccessRightsElements & elements, const IAST::FormatSettings & settings) + { + for (size_t i = 0; i != elements.size(); ++i) + { + const auto & element = elements[i]; + + bool next_element_on_same_db_and_table = false; + if (i != elements.size() - 1) + { + const auto & next_element = elements[i + 1]; + if (element.sameDatabaseAndTableAndParameter(next_element)) + next_element_on_same_db_and_table = true; + } + + if (!next_element_on_same_db_and_table) + { + settings.ostr << " "; + formatONClause(element, settings); + } + } + } } @@ -148,9 +171,14 @@ void ASTGrantQuery::formatImpl(const FormatSettings & settings, FormatState &, F "to grant or revoke, not both of them"); } else if (current_grants) - settings.ostr << (settings.hilite ? hilite_keyword : "") << " CURRENT GRANTS" << (settings.hilite ? hilite_none : ""); + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << "CURRENT GRANTS" << (settings.hilite ? hilite_none : ""); + formatCurrentGrantsElements(access_rights_elements, settings); + } else + { formatElementsWithoutOptions(access_rights_elements, settings); + } settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << (is_revoke ? " FROM " : " TO ") << (settings.hilite ? IAST::hilite_none : ""); diff --git a/src/Parsers/Access/ASTSettingsProfileElement.cpp b/src/Parsers/Access/ASTSettingsProfileElement.cpp index 7b29b15cb29..014b97132de 100644 --- a/src/Parsers/Access/ASTSettingsProfileElement.cpp +++ b/src/Parsers/Access/ASTSettingsProfileElement.cpp @@ -18,7 +18,7 @@ namespace } else { - settings.ostr << backQuoteIfNeed(str); + settings.ostr << backQuote(str); } } } diff --git a/src/Parsers/Access/ASTUserNameWithHost.cpp b/src/Parsers/Access/ASTUserNameWithHost.cpp index af84399ae45..667a8e37414 100644 --- a/src/Parsers/Access/ASTUserNameWithHost.cpp +++ b/src/Parsers/Access/ASTUserNameWithHost.cpp @@ -28,6 +28,12 @@ void ASTUserNameWithHost::concatParts() host_pattern.clear(); } +void ASTUserNameWithHost::replace(const String name_) +{ + base_name = name_; + host_pattern.clear(); +} + void ASTUserNamesWithHost::formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const { diff --git a/src/Parsers/Access/ASTUserNameWithHost.h b/src/Parsers/Access/ASTUserNameWithHost.h index bd28b42b48a..8e6a7e78987 100644 --- a/src/Parsers/Access/ASTUserNameWithHost.h +++ b/src/Parsers/Access/ASTUserNameWithHost.h @@ -27,6 +27,7 @@ public: String getID(char) const override { return "UserNameWithHost"; } ASTPtr clone() const override { return std::make_shared(*this); } void formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override; + void replace(String name_); }; diff --git a/src/Parsers/Access/ParserCreateUserQuery.cpp b/src/Parsers/Access/ParserCreateUserQuery.cpp index bb90bc4c5a7..8e5a4d789c6 100644 --- a/src/Parsers/Access/ParserCreateUserQuery.cpp +++ b/src/Parsers/Access/ParserCreateUserQuery.cpp @@ -69,7 +69,6 @@ namespace bool expect_public_ssh_key = false; bool expect_http_auth_server = false; - if (ParserKeyword{"WITH"}.ignore(pos, expected)) { for (auto check_type : collections::range(AuthenticationType::MAX)) diff --git a/src/Parsers/Access/ParserGrantQuery.cpp b/src/Parsers/Access/ParserGrantQuery.cpp index f495cb60786..bdb338e9338 100644 --- a/src/Parsers/Access/ParserGrantQuery.cpp +++ b/src/Parsers/Access/ParserGrantQuery.cpp @@ -200,8 +200,10 @@ namespace if (!ParserKeyword{"ON"}.ignore(pos, expected)) return false; - String database_name, table_name; - bool any_database = false, any_table = false; + String database_name; + String table_name; + bool any_database = false; + bool any_table = false; if (!parseDatabaseAndTableNameOrAsterisks(pos, expected, database_name, any_database, table_name, any_table)) return false; diff --git a/src/Parsers/Access/ParserUserNameWithHost.h b/src/Parsers/Access/ParserUserNameWithHost.h index 453b816a98d..8c85e733bef 100644 --- a/src/Parsers/Access/ParserUserNameWithHost.h +++ b/src/Parsers/Access/ParserUserNameWithHost.h @@ -5,6 +5,7 @@ namespace DB { + /** Parses a user name. * It can be a simple string or identifier or something like `name@host`. */ diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp index 42400a0f13b..486555ae86d 100644 --- a/src/Parsers/ExpressionElementParsers.cpp +++ b/src/Parsers/ExpressionElementParsers.cpp @@ -123,7 +123,7 @@ bool ParserSubquery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) throw Exception(ErrorCodes::BAD_ARGUMENTS, "EXPLAIN in a subquery cannot have a table function or table override"); /// Replace subquery `(EXPLAIN SELECT ...)` - /// with `(SELECT * FROM viewExplain("", "", SELECT ...))` + /// with `(SELECT * FROM viewExplain('', '', (SELECT ...)))` String kind_str = ASTExplainQuery::toString(explain_query.getKind()); @@ -141,7 +141,7 @@ bool ParserSubquery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) auto view_explain = makeASTFunction("viewExplain", std::make_shared(kind_str), std::make_shared(settings_str), - explained_ast); + std::make_shared(explained_ast)); result_node = buildSelectFromTableFunction(view_explain); } else @@ -161,8 +161,7 @@ bool ParserSubquery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) return false; ++pos; - node = std::make_shared(); - node->children.push_back(result_node); + node = std::make_shared(std::move(result_node)); return true; } @@ -935,7 +934,7 @@ bool ParserNumber::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { if (float_value < 0) throw Exception(ErrorCodes::LOGICAL_ERROR, - "Logical error: token number cannot begin with minus, " + "Token number cannot begin with minus, " "but parsed float number is less than zero."); if (negative) diff --git a/src/Parsers/ExpressionElementParsers.h b/src/Parsers/ExpressionElementParsers.h index 2104a71cd0d..b29f5cc4251 100644 --- a/src/Parsers/ExpressionElementParsers.h +++ b/src/Parsers/ExpressionElementParsers.h @@ -39,7 +39,7 @@ protected: class ParserTableAsStringLiteralIdentifier : public IParserBase { public: - explicit ParserTableAsStringLiteralIdentifier() {} + explicit ParserTableAsStringLiteralIdentifier() = default; protected: const char * getName() const override { return "string literal table identifier"; } diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp index 1e9383f96ae..6d267a7d215 100644 --- a/src/Parsers/ExpressionListParsers.cpp +++ b/src/Parsers/ExpressionListParsers.cpp @@ -225,8 +225,7 @@ static bool modifyAST(ASTPtr ast, SubqueryFunctionType type) select_with_union_query->list_of_selects->children.push_back(std::move(select_query)); select_with_union_query->children.push_back(select_with_union_query->list_of_selects); - auto new_subquery = std::make_shared(); - new_subquery->children.push_back(select_with_union_query); + auto new_subquery = std::make_shared(std::move(select_with_union_query)); ast->children[0]->children.back() = std::move(new_subquery); return true; @@ -1582,8 +1581,7 @@ public: if (!ParserToken(TokenType::ClosingRoundBracket).ignore(pos, expected)) return false; - auto subquery = std::make_shared(); - subquery->children.push_back(std::move(node)); + auto subquery = std::make_shared(std::move(node)); elements = {makeASTFunction("exists", subquery)}; finished = true; diff --git a/src/Parsers/ExpressionListParsers.h b/src/Parsers/ExpressionListParsers.h index 6dba5a9c31f..235d5782630 100644 --- a/src/Parsers/ExpressionListParsers.h +++ b/src/Parsers/ExpressionListParsers.h @@ -9,10 +9,8 @@ #include #include -#ifdef __clang__ #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wc99-extensions" -#endif namespace DB { @@ -297,6 +295,4 @@ protected: } -#ifdef __clang__ #pragma clang diagnostic pop -#endif diff --git a/src/Parsers/FunctionSecretArgumentsFinder.h b/src/Parsers/FunctionSecretArgumentsFinder.h new file mode 100644 index 00000000000..002ad94f6ea --- /dev/null +++ b/src/Parsers/FunctionSecretArgumentsFinder.h @@ -0,0 +1,28 @@ +#pragma once + +#include + +namespace DB +{ + +class FunctionSecretArgumentsFinder +{ +public: + struct Result + { + /// Result constructed by default means no arguments will be hidden. + size_t start = static_cast(-1); + size_t count = 0; /// Mostly it's either 0 or 1. There are only a few cases where `count` can be greater than 1 (e.g. see `encrypt`). + /// In all known cases secret arguments are consecutive + bool are_named = false; /// Arguments like `password = 'password'` are considered as named arguments. + /// E.g. "headers" in `url('..', headers('foo' = '[HIDDEN]'))` + std::vector nested_maps; + + bool hasSecrets() const + { + return count != 0 || !nested_maps.empty(); + } + }; +}; + +} diff --git a/src/Parsers/FunctionSecretArgumentsFinderAST.h b/src/Parsers/FunctionSecretArgumentsFinderAST.h new file mode 100644 index 00000000000..348b2ca9e3a --- /dev/null +++ b/src/Parsers/FunctionSecretArgumentsFinderAST.h @@ -0,0 +1,499 @@ +#pragma once + +#include +#include +#include +#include +#include +#include + +#include + + +namespace DB +{ + + +/// Finds arguments of a specified function which should not be displayed for most users for security reasons. +/// That involves passwords and secret keys. +class FunctionSecretArgumentsFinderAST +{ +public: + explicit FunctionSecretArgumentsFinderAST(const ASTFunction & function_) : function(function_) + { + if (!function.arguments) + return; + + const auto * expr_list = function.arguments->as(); + if (!expr_list) + return; + + arguments = &expr_list->children; + switch (function.kind) + { + case ASTFunction::Kind::ORDINARY_FUNCTION: findOrdinaryFunctionSecretArguments(); break; + case ASTFunction::Kind::WINDOW_FUNCTION: break; + case ASTFunction::Kind::LAMBDA_FUNCTION: break; + case ASTFunction::Kind::TABLE_ENGINE: findTableEngineSecretArguments(); break; + case ASTFunction::Kind::DATABASE_ENGINE: findDatabaseEngineSecretArguments(); break; + case ASTFunction::Kind::BACKUP_NAME: findBackupNameSecretArguments(); break; + } + } + + FunctionSecretArgumentsFinder::Result getResult() const { return result; } + +private: + const ASTFunction & function; + const ASTs * arguments = nullptr; + FunctionSecretArgumentsFinder::Result result; + + void markSecretArgument(size_t index, bool argument_is_named = false) + { + if (index >= arguments->size()) + return; + if (!result.count) + { + result.start = index; + result.are_named = argument_is_named; + } + chassert(index >= result.start); /// We always check arguments consecutively + result.count = index + 1 - result.start; + if (!argument_is_named) + result.are_named = false; + } + + void findOrdinaryFunctionSecretArguments() + { + if ((function.name == "mysql") || (function.name == "postgresql") || (function.name == "mongodb")) + { + /// mysql('host:port', 'database', 'table', 'user', 'password', ...) + /// postgresql('host:port', 'database', 'table', 'user', 'password', ...) + /// mongodb('host:port', 'database', 'collection', 'user', 'password', ...) + findMySQLFunctionSecretArguments(); + } + else if ((function.name == "s3") || (function.name == "cosn") || (function.name == "oss") || + (function.name == "deltaLake") || (function.name == "hudi") || (function.name == "iceberg")) + { + /// s3('url', 'aws_access_key_id', 'aws_secret_access_key', ...) + findS3FunctionSecretArguments(/* is_cluster_function= */ false); + } + else if (function.name == "s3Cluster") + { + /// s3Cluster('cluster_name', 'url', 'aws_access_key_id', 'aws_secret_access_key', ...) + findS3FunctionSecretArguments(/* is_cluster_function= */ true); + } + else if ((function.name == "remote") || (function.name == "remoteSecure")) + { + /// remote('addresses_expr', 'db', 'table', 'user', 'password', ...) + findRemoteFunctionSecretArguments(); + } + else if ((function.name == "encrypt") || (function.name == "decrypt") || + (function.name == "aes_encrypt_mysql") || (function.name == "aes_decrypt_mysql") || + (function.name == "tryDecrypt")) + { + /// encrypt('mode', 'plaintext', 'key' [, iv, aad]) + findEncryptionFunctionSecretArguments(); + } + else if (function.name == "url") + { + findURLSecretArguments(); + } + } + + void findMySQLFunctionSecretArguments() + { + if (isNamedCollectionName(0)) + { + /// mysql(named_collection, ..., password = 'password', ...) + findSecretNamedArgument("password", 1); + } + else + { + /// mysql('host:port', 'database', 'table', 'user', 'password', ...) + markSecretArgument(4); + } + } + + /// Returns the number of arguments excluding "headers" and "extra_credentials" (which should + /// always be at the end). Marks "headers" as secret, if found. + size_t excludeS3OrURLNestedMaps() + { + size_t count = arguments->size(); + while (count > 0) + { + const ASTFunction * f = arguments->at(count - 1)->as(); + if (!f) + break; + if (f->name == "headers") + result.nested_maps.push_back(f->name); + else if (f->name != "extra_credentials") + break; + count -= 1; + } + return count; + } + + void findS3FunctionSecretArguments(bool is_cluster_function) + { + /// s3Cluster('cluster_name', 'url', ...) has 'url' as its second argument. + size_t url_arg_idx = is_cluster_function ? 1 : 0; + + if (!is_cluster_function && isNamedCollectionName(0)) + { + /// s3(named_collection, ..., secret_access_key = 'secret_access_key', ...) + findSecretNamedArgument("secret_access_key", 1); + return; + } + + /// We should check other arguments first because we don't need to do any replacement in case of + /// s3('url', NOSIGN, 'format' [, 'compression'] [, extra_credentials(..)] [, headers(..)]) + /// s3('url', 'format', 'structure' [, 'compression'] [, extra_credentials(..)] [, headers(..)]) + size_t count = excludeS3OrURLNestedMaps(); + if ((url_arg_idx + 3 <= count) && (count <= url_arg_idx + 4)) + { + String second_arg; + if (tryGetStringFromArgument(url_arg_idx + 1, &second_arg)) + { + if (boost::iequals(second_arg, "NOSIGN")) + return; /// The argument after 'url' is "NOSIGN". + + if (second_arg == "auto" || KnownFormatNames::instance().exists(second_arg)) + return; /// The argument after 'url' is a format: s3('url', 'format', ...) + } + } + + /// We're going to replace 'aws_secret_access_key' with '[HIDDEN]' for the following signatures: + /// s3('url', 'aws_access_key_id', 'aws_secret_access_key', ...) + /// s3Cluster('cluster_name', 'url', 'aws_access_key_id', 'aws_secret_access_key', 'format', 'compression') + if (url_arg_idx + 2 < count) + markSecretArgument(url_arg_idx + 2); + } + + void findURLSecretArguments() + { + if (!isNamedCollectionName(0)) + excludeS3OrURLNestedMaps(); + } + + bool tryGetStringFromArgument(size_t arg_idx, String * res, bool allow_identifier = true) const + { + if (arg_idx >= arguments->size()) + return false; + + return tryGetStringFromArgument(*(*arguments)[arg_idx], res, allow_identifier); + } + + static bool tryGetStringFromArgument(const IAST & argument, String * res, bool allow_identifier = true) + { + if (const auto * literal = argument.as()) + { + if (literal->value.getType() != Field::Types::String) + return false; + if (res) + *res = literal->value.safeGet(); + return true; + } + + if (allow_identifier) + { + if (const auto * id = argument.as()) + { + if (res) + *res = id->name(); + return true; + } + } + + return false; + } + + void findRemoteFunctionSecretArguments() + { + if (isNamedCollectionName(0)) + { + /// remote(named_collection, ..., password = 'password', ...) + findSecretNamedArgument("password", 1); + return; + } + + /// We're going to replace 'password' with '[HIDDEN'] for the following signatures: + /// remote('addresses_expr', db.table, 'user' [, 'password'] [, sharding_key]) + /// remote('addresses_expr', 'db', 'table', 'user' [, 'password'] [, sharding_key]) + /// remote('addresses_expr', table_function(), 'user' [, 'password'] [, sharding_key]) + + /// But we should check the number of arguments first because we don't need to do any replacements in case of + /// remote('addresses_expr', db.table) + if (arguments->size() < 3) + return; + + size_t arg_num = 1; + + /// Skip 1 or 2 arguments with table_function() or db.table or 'db', 'table'. + const auto * table_function = (*arguments)[arg_num]->as(); + if (table_function && KnownTableFunctionNames::instance().exists(table_function->name)) + { + ++arg_num; + } + else + { + std::optional database; + std::optional qualified_table_name; + if (!tryGetDatabaseNameOrQualifiedTableName(arg_num, database, qualified_table_name)) + { + /// We couldn't evaluate the argument so we don't know whether it is 'db.table' or just 'db'. + /// Hence we can't figure out whether we should skip one argument 'user' or two arguments 'table', 'user' + /// before the argument 'password'. So it's safer to wipe two arguments just in case. + /// The last argument can be also a `sharding_key`, so we need to check that argument is a literal string + /// before wiping it (because the `password` argument is always a literal string). + if (tryGetStringFromArgument(arg_num + 2, nullptr, /* allow_identifier= */ false)) + { + /// Wipe either `password` or `user`. + markSecretArgument(arg_num + 2); + } + if (tryGetStringFromArgument(arg_num + 3, nullptr, /* allow_identifier= */ false)) + { + /// Wipe either `password` or `sharding_key`. + markSecretArgument(arg_num + 3); + } + return; + } + + /// Skip the current argument (which is either a database name or a qualified table name). + ++arg_num; + if (database) + { + /// Skip the 'table' argument if the previous argument was a database name. + ++arg_num; + } + } + + /// Skip username. + ++arg_num; + + /// Do our replacement: + /// remote('addresses_expr', db.table, 'user', 'password', ...) -> remote('addresses_expr', db.table, 'user', '[HIDDEN]', ...) + /// The last argument can be also a `sharding_key`, so we need to check that argument is a literal string + /// before wiping it (because the `password` argument is always a literal string). + bool can_be_password = tryGetStringFromArgument(arg_num, nullptr, /* allow_identifier= */ false); + if (can_be_password) + markSecretArgument(arg_num); + } + + /// Tries to get either a database name or a qualified table name from an argument. + /// Empty string is also allowed (it means the default database). + /// The function is used by findRemoteFunctionSecretArguments() to determine how many arguments to skip before a password. + bool tryGetDatabaseNameOrQualifiedTableName( + size_t arg_idx, + std::optional & res_database, + std::optional & res_qualified_table_name) const + { + res_database.reset(); + res_qualified_table_name.reset(); + + String str; + if (!tryGetStringFromArgument(arg_idx, &str, /* allow_identifier= */ true)) + return false; + + if (str.empty()) + { + res_database = ""; + return true; + } + + auto qualified_table_name = QualifiedTableName::tryParseFromString(str); + if (!qualified_table_name) + return false; + + if (qualified_table_name->database.empty()) + res_database = std::move(qualified_table_name->table); + else + res_qualified_table_name = std::move(qualified_table_name); + return true; + } + + void findEncryptionFunctionSecretArguments() + { + if (arguments->empty()) + return; + + /// We replace all arguments after 'mode' with '[HIDDEN]': + /// encrypt('mode', 'plaintext', 'key' [, iv, aad]) -> encrypt('mode', '[HIDDEN]') + result.start = 1; + result.count = arguments->size() - 1; + } + + void findTableEngineSecretArguments() + { + const String & engine_name = function.name; + if (engine_name == "ExternalDistributed") + { + /// ExternalDistributed('engine', 'host:port', 'database', 'table', 'user', 'password') + findExternalDistributedTableEngineSecretArguments(); + } + else if ((engine_name == "MySQL") || (engine_name == "PostgreSQL") || + (engine_name == "MaterializedPostgreSQL") || (engine_name == "MongoDB")) + { + /// MySQL('host:port', 'database', 'table', 'user', 'password', ...) + /// PostgreSQL('host:port', 'database', 'table', 'user', 'password', ...) + /// MaterializedPostgreSQL('host:port', 'database', 'table', 'user', 'password', ...) + /// MongoDB('host:port', 'database', 'collection', 'user', 'password', ...) + findMySQLFunctionSecretArguments(); + } + else if ((engine_name == "S3") || (engine_name == "COSN") || (engine_name == "OSS") || + (engine_name == "DeltaLake") || (engine_name == "Hudi") || (engine_name == "Iceberg") || (engine_name == "S3Queue")) + { + /// S3('url', ['aws_access_key_id', 'aws_secret_access_key',] ...) + findS3TableEngineSecretArguments(); + } + else if (engine_name == "URL") + { + findURLSecretArguments(); + } + } + + void findExternalDistributedTableEngineSecretArguments() + { + if (isNamedCollectionName(1)) + { + /// ExternalDistributed('engine', named_collection, ..., password = 'password', ...) + findSecretNamedArgument("password", 2); + } + else + { + /// ExternalDistributed('engine', 'host:port', 'database', 'table', 'user', 'password') + markSecretArgument(5); + } + } + + void findS3TableEngineSecretArguments() + { + if (isNamedCollectionName(0)) + { + /// S3(named_collection, ..., secret_access_key = 'secret_access_key') + findSecretNamedArgument("secret_access_key", 1); + return; + } + + /// We should check other arguments first because we don't need to do any replacement in case of + /// S3('url', NOSIGN, 'format' [, 'compression'] [, extra_credentials(..)] [, headers(..)]) + /// S3('url', 'format', 'compression' [, extra_credentials(..)] [, headers(..)]) + size_t count = excludeS3OrURLNestedMaps(); + if ((3 <= count) && (count <= 4)) + { + String second_arg; + if (tryGetStringFromArgument(1, &second_arg)) + { + if (boost::iequals(second_arg, "NOSIGN")) + return; /// The argument after 'url' is "NOSIGN". + + if (count == 3) + { + if (second_arg == "auto" || KnownFormatNames::instance().exists(second_arg)) + return; /// The argument after 'url' is a format: S3('url', 'format', ...) + } + } + } + + /// We replace 'aws_secret_access_key' with '[HIDDEN]' for the following signatures: + /// S3('url', 'aws_access_key_id', 'aws_secret_access_key') + /// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'format') + /// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'format', 'compression') + if (2 < count) + markSecretArgument(2); + } + + void findDatabaseEngineSecretArguments() + { + const String & engine_name = function.name; + if ((engine_name == "MySQL") || (engine_name == "MaterializeMySQL") || + (engine_name == "MaterializedMySQL") || (engine_name == "PostgreSQL") || + (engine_name == "MaterializedPostgreSQL")) + { + /// MySQL('host:port', 'database', 'user', 'password') + /// PostgreSQL('host:port', 'database', 'user', 'password') + findMySQLDatabaseSecretArguments(); + } + else if (engine_name == "S3") + { + /// S3('url', 'access_key_id', 'secret_access_key') + findS3DatabaseSecretArguments(); + } + } + + void findMySQLDatabaseSecretArguments() + { + if (isNamedCollectionName(0)) + { + /// MySQL(named_collection, ..., password = 'password', ...) + findSecretNamedArgument("password", 1); + } + else + { + /// MySQL('host:port', 'database', 'user', 'password') + markSecretArgument(3); + } + } + + void findS3DatabaseSecretArguments() + { + if (isNamedCollectionName(0)) + { + /// S3(named_collection, ..., secret_access_key = 'password', ...) + findSecretNamedArgument("secret_access_key", 1); + } + else + { + /// S3('url', 'access_key_id', 'secret_access_key') + markSecretArgument(2); + } + } + + void findBackupNameSecretArguments() + { + const String & engine_name = function.name; + if (engine_name == "S3") + { + /// BACKUP ... TO S3(url, [aws_access_key_id, aws_secret_access_key]) + markSecretArgument(2); + } + } + + /// Whether a specified argument can be the name of a named collection? + bool isNamedCollectionName(size_t arg_idx) const + { + if (arguments->size() <= arg_idx) + return false; + + const auto * identifier = (*arguments)[arg_idx]->as(); + return identifier != nullptr; + } + + /// Looks for a secret argument with a specified name. This function looks for arguments in format `key=value` where the key is specified. + void findSecretNamedArgument(const std::string_view & key, size_t start = 0) + { + for (size_t i = start; i < arguments->size(); ++i) + { + const auto & argument = (*arguments)[i]; + const auto * equals_func = argument->as(); + if (!equals_func || (equals_func->name != "equals")) + continue; + + const auto * expr_list = equals_func->arguments->as(); + if (!expr_list) + continue; + + const auto & equal_args = expr_list->children; + if (equal_args.size() != 2) + continue; + + String found_key; + if (!tryGetStringFromArgument(*equal_args[0], &found_key)) + continue; + + if (found_key == key) + markSecretArgument(i, /* argument_is_named= */ true); + } + } +}; + +} diff --git a/src/Parsers/IAST.h b/src/Parsers/IAST.h index fa2b3b0d1c0..ee70fed0f07 100644 --- a/src/Parsers/IAST.h +++ b/src/Parsers/IAST.h @@ -256,6 +256,7 @@ public: bool expression_list_always_start_on_new_line = false; /// Line feed and indent before expression list even if it's of single element. bool expression_list_prepend_whitespace = false; /// Prepend whitespace (if it is required) bool surround_each_list_element_with_parens = false; + size_t list_element_index = 0; const IAST * current_select = nullptr; }; diff --git a/src/Parsers/IAST_fwd.h b/src/Parsers/IAST_fwd.h index 53d41d42d65..ff66d42dba3 100644 --- a/src/Parsers/IAST_fwd.h +++ b/src/Parsers/IAST_fwd.h @@ -18,7 +18,7 @@ using ASTs = absl::InlinedVector; namespace std { -inline typename DB::ASTs::size_type erase(DB::ASTs & asts, const DB::ASTPtr & element) +inline typename DB::ASTs::size_type erase(DB::ASTs & asts, const DB::ASTPtr & element) /// NOLINT(cert-dcl58-cpp) { auto old_size = asts.size(); asts.erase(std::remove(asts.begin(), asts.end(), element), asts.end()); @@ -26,7 +26,7 @@ inline typename DB::ASTs::size_type erase(DB::ASTs & asts, const DB::ASTPtr & el } template -inline typename DB::ASTs::size_type erase_if(DB::ASTs & asts, Predicate pred) +inline typename DB::ASTs::size_type erase_if(DB::ASTs & asts, Predicate pred) /// NOLINT(cert-dcl58-cpp) { auto old_size = asts.size(); asts.erase(std::remove_if(asts.begin(), asts.end(), pred), asts.end()); diff --git a/src/Parsers/IParser.h b/src/Parsers/IParser.h index d53b58baa7c..198ec0346ff 100644 --- a/src/Parsers/IParser.h +++ b/src/Parsers/IParser.h @@ -9,6 +9,7 @@ #include #include #include +#include namespace DB @@ -73,6 +74,21 @@ public: if (unlikely(max_depth > 0 && depth > max_depth)) throw Exception(ErrorCodes::TOO_DEEP_RECURSION, "Maximum parse depth ({}) exceeded. " "Consider rising max_parser_depth parameter.", max_depth); + + /** Sometimes the maximum parser depth can be set to a high value by the user, + * but we still want to avoid stack overflow. + * For this purpose, we can use the checkStackSize function, but it is too heavy. + * The solution is to check not too frequently. + * The frequency is arbitrary, but not too large, not too small, + * and a power of two to simplify the division. + */ +#if defined(USE_MUSL) || defined(SANITIZER) || !defined(NDEBUG) + static constexpr uint32_t check_frequency = 128; +#else + static constexpr uint32_t check_frequency = 8192; +#endif + if (depth % check_frequency == 0) + checkStackSize(); } ALWAYS_INLINE void decreaseDepth() diff --git a/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp index 6ef4a52ceff..152c29e5941 100644 --- a/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp +++ b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp @@ -73,7 +73,7 @@ bool IParserKQLFunction::directMapping( int argument_count = 0; const auto begin = pos; - while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) + while (isValidKQLPos(pos) && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) { if (pos != begin) out.append(", "); @@ -148,11 +148,11 @@ String IParserKQLFunction::getConvertedArgument(const String & fn_name, IParser: if (pos->type == TokenType::ClosingRoundBracket || pos->type == TokenType::ClosingSquareBracket) return {}; - if (pos->isEnd() || pos->type == TokenType::PipeMark || pos->type == TokenType::Semicolon) + if (!isValidKQLPos(pos) || pos->type == TokenType::PipeMark || pos->type == TokenType::Semicolon) throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Need more argument(s) in function: {}", fn_name); std::vector tokens; - while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) + while (isValidKQLPos(pos) && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) { if (pos->type == TokenType::OpeningRoundBracket) ++round_bracket_count; @@ -191,7 +191,7 @@ String IParserKQLFunction::getConvertedArgument(const String & fn_name, IParser: { ++pos; String array_index; - while (!pos->isEnd() && pos->type != TokenType::ClosingSquareBracket) + while (isValidKQLPos(pos) && pos->type != TokenType::ClosingSquareBracket) { array_index += getExpression(pos); ++pos; @@ -246,7 +246,7 @@ IParserKQLFunction::getOptionalArgument(const String & function_name, DB::IParse const auto * begin = pos->begin; std::stack scopes; - while (!pos->isEnd() && (!scopes.empty() || (pos->type != DB::TokenType::Comma && pos->type != DB::TokenType::ClosingRoundBracket))) + while (isValidKQLPos(pos) && (!scopes.empty() || (pos->type != DB::TokenType::Comma && pos->type != DB::TokenType::ClosingRoundBracket))) { const auto token_type = pos->type; if (isOpeningBracket(token_type)) @@ -357,7 +357,7 @@ String IParserKQLFunction::getExpression(IParser::Pos & pos) { ++pos; String array_index; - while (!pos->isEnd() && pos->type != TokenType::ClosingSquareBracket) + while (isValidKQLPos(pos) && pos->type != TokenType::ClosingSquareBracket) { array_index += getExpression(pos); ++pos; diff --git a/src/Parsers/Kusto/KustoFunctions/KQLDataTypeFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLDataTypeFunctions.cpp index 8530fa6623d..cca4fb3ce33 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLDataTypeFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLDataTypeFunctions.cpp @@ -7,6 +7,7 @@ #include #include #include +#include #include #include "Poco/String.h" #include @@ -51,7 +52,7 @@ bool DatatypeDatetime::convertImpl(String & out, IParser::Pos & pos) else { auto start = pos; - while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) + while (isValidKQLPos(pos) && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) { ++pos; if (pos->type == TokenType::ClosingRoundBracket) @@ -77,7 +78,7 @@ bool DatatypeDynamic::convertImpl(String & out, IParser::Pos & pos) if (pos->type == TokenType::OpeningCurlyBrace) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Property bags are not supported for now in {}", function_name); - while (!pos->isEnd() && pos->type != TokenType::ClosingRoundBracket) + while (isValidKQLPos(pos) && pos->type != TokenType::ClosingRoundBracket) { if (const auto token_type = pos->type; token_type == TokenType::BareWord || token_type == TokenType::Number || token_type == TokenType::QuotedIdentifier || token_type == TokenType::StringLiteral) @@ -117,7 +118,7 @@ bool DatatypeGuid::convertImpl(String & out, IParser::Pos & pos) else { auto start = pos; - while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) + while (isValidKQLPos(pos) && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) { ++pos; if (pos->type == TokenType::ClosingRoundBracket) diff --git a/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.cpp index e5f40ee604d..f059fd9aa6b 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.cpp @@ -14,6 +14,7 @@ #include #include #include +#include #include #include "Poco/String.h" namespace DB::ErrorCodes @@ -521,7 +522,7 @@ bool MakeTimeSpan::convertImpl(String & out, IParser::Pos & pos) String second; int arg_count = 0; std::vector args; - while (!pos->isEnd() && pos->type != TokenType::ClosingRoundBracket) + while (isValidKQLPos(pos) && pos->type != TokenType::ClosingRoundBracket) { String arg = getConvertedArgument(fn_name, pos); args.insert(args.begin(), arg); @@ -588,7 +589,7 @@ bool MakeDateTime::convertImpl(String & out, IParser::Pos & pos) String arguments; int arg_count = 0; - while (!pos->isEnd() && pos->type != TokenType::ClosingRoundBracket) + while (isValidKQLPos(pos) && pos->type != TokenType::ClosingRoundBracket) { String arg = getConvertedArgument(fn_name, pos); if (pos->type == TokenType::Comma) diff --git a/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.cpp b/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.cpp index adac892b49d..044cc2e0622 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.cpp @@ -359,7 +359,7 @@ std::unique_ptr KQLFunctionFactory::get(String & kql_functio return std::make_unique(); case KQLFunctionValue::extract_json: - return std::make_unique(); + return std::make_unique(); case KQLFunctionValue::has_any_index: return std::make_unique(); @@ -389,7 +389,7 @@ std::unique_ptr KQLFunctionFactory::get(String & kql_functio return std::make_unique(); case KQLFunctionValue::parse_json: - return std::make_unique(); + return std::make_unique(); case KQLFunctionValue::parse_url: return std::make_unique(); diff --git a/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp index 0f9ca67d6dc..82cfa68b180 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp @@ -3,7 +3,7 @@ #include #include #include - +#include #include #include #include @@ -240,7 +240,7 @@ bool ExtractAll::convertImpl(String & out, IParser::Pos & pos) return true; } -bool ExtractJson::convertImpl(String & out, IParser::Pos & pos) +bool ExtractJSON::convertImpl(String & out, IParser::Pos & pos) { String datatype = "String"; ParserKeyword s_kql("typeof"); @@ -431,7 +431,7 @@ bool ParseCSV::convertImpl(String & out, IParser::Pos & pos) return true; } -bool ParseJson::convertImpl(String & out, IParser::Pos & pos) +bool ParseJSON::convertImpl(String & out, IParser::Pos & pos) { const String fn_name = getKQLFunctionName(pos); if (fn_name.empty()) @@ -590,7 +590,7 @@ bool StrCatDelim::convertImpl(String & out, IParser::Pos & pos) int arg_count = 0; String args; - while (!pos->isEnd() && pos->type != TokenType::Semicolon && pos->type != TokenType::ClosingRoundBracket) + while (isValidKQLPos(pos) && pos->type != TokenType::Semicolon && pos->type != TokenType::ClosingRoundBracket) { ++pos; String arg = getConvertedArgument(fn_name, pos); diff --git a/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.h b/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.h index 492a59263ec..9b0c6327e01 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.h +++ b/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.h @@ -62,7 +62,7 @@ protected: bool convertImpl(String & out, IParser::Pos & pos) override; }; -class ExtractJson : public IParserKQLFunction +class ExtractJSON : public IParserKQLFunction { protected: const char * getName() const override { return "extract_json(), extractjson()"; } @@ -125,7 +125,7 @@ protected: bool convertImpl(String & out, IParser::Pos & pos) override; }; -class ParseJson : public IParserKQLFunction +class ParseJSON : public IParserKQLFunction { protected: const char * getName() const override { return "parse_json()"; } diff --git a/src/Parsers/Kusto/ParserKQLExtend.cpp b/src/Parsers/Kusto/ParserKQLExtend.cpp index ac5beb80576..b37618f69fd 100644 --- a/src/Parsers/Kusto/ParserKQLExtend.cpp +++ b/src/Parsers/Kusto/ParserKQLExtend.cpp @@ -7,6 +7,7 @@ #include #include #include +#include #include #include @@ -44,7 +45,7 @@ bool ParserKQLExtend ::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) int32_t round_bracket_count = 0; int32_t square_bracket_count = 0; - while (!npos->isEnd()) + while (isValidKQLPos(npos)) { if (npos->type == TokenType::OpeningRoundBracket) ++round_bracket_count; diff --git a/src/Parsers/Kusto/ParserKQLMVExpand.cpp b/src/Parsers/Kusto/ParserKQLMVExpand.cpp index bf5fbe64d0b..7d242dffaf7 100644 --- a/src/Parsers/Kusto/ParserKQLMVExpand.cpp +++ b/src/Parsers/Kusto/ParserKQLMVExpand.cpp @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -49,7 +50,7 @@ bool ParserKQLMVExpand::parseColumnArrayExprs(ColumnArrayExprs & column_array_ex String to_type; --expr_end_pos; - while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) + while (isValidKQLPos(pos) && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) { if (pos->type == TokenType::OpeningRoundBracket) ++bracket_count; @@ -125,9 +126,9 @@ bool ParserKQLMVExpand::parseColumnArrayExprs(ColumnArrayExprs & column_array_ex if (String(pos->begin, pos->end) == "limit") break; - if (!pos->isEnd()) + if (isValidKQLPos(pos)) ++pos; - if (pos->isEnd() || pos->type == TokenType::PipeMark || pos->type == TokenType::Semicolon) + if (!isValidKQLPos(pos) || pos->type == TokenType::PipeMark || pos->type == TokenType::Semicolon) { if (expr_end_pos < expr_begin_pos) { diff --git a/src/Parsers/Kusto/ParserKQLMakeSeries.cpp b/src/Parsers/Kusto/ParserKQLMakeSeries.cpp index a3727653049..e89423e2fc9 100644 --- a/src/Parsers/Kusto/ParserKQLMakeSeries.cpp +++ b/src/Parsers/Kusto/ParserKQLMakeSeries.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include @@ -39,7 +40,7 @@ bool ParserKQLMakeSeries ::parseAggregationColumns(AggregationColumns & aggregat ParserToken close_bracket(TokenType::ClosingRoundBracket); ParserToken comma(TokenType::Comma); - while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) + while (isValidKQLPos(pos) && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) { String alias; String aggregation_fun; @@ -96,7 +97,7 @@ bool ParserKQLMakeSeries ::parseFromToStepClause(FromToStepClause & from_to_step auto step_pos = begin; auto end_pos = begin; - while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) + while (isValidKQLPos(pos) && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) { if (String(pos->begin, pos->end) == "from") from_pos = pos; @@ -175,7 +176,7 @@ bool ParserKQLMakeSeries ::makeSeries(KQLMakeSeries & kql_make_series, ASTPtr & Tokens tokens(src.c_str(), src.c_str() + src.size()); IParser::Pos pos(tokens, max_depth); String res; - while (!pos->isEnd()) + while (isValidKQLPos(pos)) { String tmp = String(pos->begin, pos->end); if (tmp == "parseDateTime64BestEffortOrNull") @@ -201,7 +202,7 @@ bool ParserKQLMakeSeries ::makeSeries(KQLMakeSeries & kql_make_series, ASTPtr & std::vector group_expression_tokens; Tokens tokens(group_expression.c_str(), group_expression.c_str() + group_expression.size()); IParser::Pos pos(tokens, max_depth); - while (!pos->isEnd()) + while (isValidKQLPos(pos)) { if (String(pos->begin, pos->end) == "AS") { diff --git a/src/Parsers/Kusto/ParserKQLOperators.cpp b/src/Parsers/Kusto/ParserKQLOperators.cpp index 47982a5f73e..d7364cb5fd7 100644 --- a/src/Parsers/Kusto/ParserKQLOperators.cpp +++ b/src/Parsers/Kusto/ParserKQLOperators.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -148,7 +149,7 @@ String KQLOperators::genHasAnyAllOpExpr(std::vector & tokens, IParser::P String logic_op = (kql_op == "has_all") ? " and " : " or "; - while (!token_pos->isEnd() && token_pos->type != TokenType::PipeMark && token_pos->type != TokenType::Semicolon) + while (isValidKQLPos(token_pos) && token_pos->type != TokenType::PipeMark && token_pos->type != TokenType::Semicolon) { auto tmp_arg = IParserKQLFunction::getExpression(token_pos); if (token_pos->type == TokenType::Comma) @@ -217,7 +218,7 @@ String genInOpExprCis(std::vector & tokens, DB::IParser::Pos & token_pos --token_pos; new_expr += ch_op; - while (!token_pos->isEnd() && token_pos->type != DB::TokenType::PipeMark && token_pos->type != DB::TokenType::Semicolon) + while (isValidKQLPos(token_pos) && token_pos->type != DB::TokenType::PipeMark && token_pos->type != DB::TokenType::Semicolon) { auto tmp_arg = String(token_pos->begin, token_pos->end); if (token_pos->type != DB::TokenType::Comma && token_pos->type != DB::TokenType::ClosingRoundBracket @@ -329,7 +330,7 @@ bool KQLOperators::convert(std::vector & tokens, IParser::Pos & pos) { auto begin = pos; - if (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) + if (isValidKQLPos(pos) && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) { KQLOperatorValue op_value = KQLOperatorValue::none; @@ -339,14 +340,14 @@ bool KQLOperators::convert(std::vector & tokens, IParser::Pos & pos) if (token == "!") { ++pos; - if (pos->isEnd() || pos->type == TokenType::PipeMark || pos->type == TokenType::Semicolon) + if (!isValidKQLPos(pos) || pos->type == TokenType::PipeMark || pos->type == TokenType::Semicolon) throw Exception(ErrorCodes::SYNTAX_ERROR, "Invalid negative operator"); op = "!" + String(pos->begin, pos->end); } else if (token == "matches") { ++pos; - if (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) + if (isValidKQLPos(pos) && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) { if (String(pos->begin, pos->end) == "regex") op += " regex"; @@ -360,7 +361,7 @@ bool KQLOperators::convert(std::vector & tokens, IParser::Pos & pos) } ++pos; - if (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) + if (isValidKQLPos(pos) && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) { if (String(pos->begin, pos->end) == "~") op += "~"; diff --git a/src/Parsers/Kusto/ParserKQLQuery.cpp b/src/Parsers/Kusto/ParserKQLQuery.cpp index 47986943662..a54a2b0eda9 100644 --- a/src/Parsers/Kusto/ParserKQLQuery.cpp +++ b/src/Parsers/Kusto/ParserKQLQuery.cpp @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -59,33 +60,34 @@ bool ParserKQLBase::setSubQuerySource(ASTPtr & select_query, ASTPtr & source, bo if (!select_query || !select_query->as()->tables() || select_query->as()->tables()->as()->children.empty()) return false; - table_expr = select_query->as()->tables()->as()->children[0]; + table_expr = select_query->as()->tables()->as()->children.at(0); table_expr->as()->table_expression - = source->as()->tables()->children[0]->as()->table_expression; + = source->as()->tables()->children.at(0)->as()->table_expression; + table_expr->children.at(0) = table_expr->as()->table_expression; return true; } if (!select_query || select_query->as()->children.empty() - || !select_query->as()->children[0]->as()->table_expression + || !select_query->as()->children.at(0)->as()->table_expression || select_query->as() - ->children[0] + ->children.at(0) ->as() ->table_expression->as() ->subquery->children.empty() || select_query->as() - ->children[0] + ->children.at(0) ->as() ->table_expression->as() - ->subquery->children[0] + ->subquery->children.at(0) ->as() ->list_of_selects->children.empty() || select_query->as() - ->children[0] + ->children.at(0) ->as() ->table_expression->as() - ->subquery->children[0] + ->subquery->children.at(0) ->as() - ->list_of_selects->children[0] + ->list_of_selects->children.at(0) ->as() ->tables() ->as() @@ -93,28 +95,29 @@ bool ParserKQLBase::setSubQuerySource(ASTPtr & select_query, ASTPtr & source, bo return false; table_expr = select_query->as() - ->children[0] + ->children.at(0) ->as() ->table_expression->as() - ->subquery->children[0] + ->subquery->children.at(0) ->as() - ->list_of_selects->children[0] + ->list_of_selects->children.at(0) ->as() ->tables() ->as() - ->children[0]; + ->children.at(0); if (!src_is_subquery) { table_expr->as()->table_expression - = source->as()->tables()->children[0]->as()->table_expression; + = source->as()->tables()->children.at(0)->as()->table_expression; } else { table_expr->as()->table_expression - = source->children[0]->as()->table_expression; + = source->children.at(0)->as()->table_expression; } + table_expr->children.at(0) = table_expr->as()->table_expression; return true; } @@ -130,7 +133,7 @@ String ParserKQLBase::getExprFromPipe(Pos & pos) { BracketCount bracket_count; auto end = pos; - while (!end->isEnd() && end->type != TokenType::Semicolon) + while (isValidKQLPos(end) && end->type != TokenType::Semicolon) { bracket_count.count(end); if (end->type == TokenType::PipeMark && bracket_count.isZero()) @@ -149,7 +152,7 @@ String ParserKQLBase::getExprFromToken(Pos & pos) comma_pos.push_back(pos); size_t paren_count = 0; - while (!pos->isEnd() && pos->type != TokenType::Semicolon) + while (isValidKQLPos(pos) && pos->type != TokenType::Semicolon) { if (pos->type == TokenType::PipeMark && paren_count == 0) break; @@ -373,7 +376,7 @@ bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) uint16_t bracket_count = 0; - while (!pos->isEnd() && pos->type != TokenType::Semicolon) + while (isValidKQLPos(pos) && pos->type != TokenType::Semicolon) { if (pos->type == TokenType::OpeningRoundBracket) ++bracket_count; @@ -383,6 +386,9 @@ bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (pos->type == TokenType::PipeMark && bracket_count == 0) { ++pos; + if (!isValidKQLPos(pos)) + return false; + String kql_operator(pos->begin, pos->end); auto validate_kql_operator = [&] @@ -390,6 +396,9 @@ bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (kql_operator == "order" || kql_operator == "sort") { ++pos; + if (!isValidKQLPos(pos)) + return false; + ParserKeyword s_by("by"); if (s_by.ignore(pos, expected)) { @@ -401,11 +410,15 @@ bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { auto op_pos_begin = pos; ++pos; + if (!isValidKQLPos(pos)) + return false; + ParserToken s_dash(TokenType::Minus); if (s_dash.ignore(pos, expected)) { - String tmp_op(op_pos_begin->begin, pos->end); - kql_operator = tmp_op; + if (!isValidKQLPos(pos)) + return false; + kql_operator = String(op_pos_begin->begin, pos->end); } else --pos; @@ -418,6 +431,9 @@ bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (!validate_kql_operator()) return false; ++pos; + if (!isValidKQLPos(pos)) + return false; + operation_pos.push_back(std::make_pair(kql_operator, pos)); } else @@ -576,20 +592,19 @@ bool ParserKQLSubquery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (!ParserKQLTableFunction().parse(pos, select_node, expected)) return false; - ASTPtr node_subquery = std::make_shared(); - node_subquery->children.push_back(select_node); + ASTPtr node_subquery = std::make_shared(std::move(select_node)); ASTPtr node_table_expr = std::make_shared(); node_table_expr->as()->subquery = node_subquery; node_table_expr->children.emplace_back(node_subquery); - ASTPtr node_table_in_select_query_emlement = std::make_shared(); - node_table_in_select_query_emlement->as()->table_expression = node_table_expr; + ASTPtr node_table_in_select_query_element = std::make_shared(); + node_table_in_select_query_element->as()->table_expression = node_table_expr; ASTPtr res = std::make_shared(); - res->children.emplace_back(node_table_in_select_query_emlement); + res->children.emplace_back(node_table_in_select_query_element); node = res; return true; @@ -618,20 +633,20 @@ bool ParserSimpleCHSubquery::parseImpl(Pos & pos, ASTPtr & node, Expected & expe ASTSelectQuery::Expression::TABLES, parent_select_node->as()->tables()); } - ASTPtr node_subquery = std::make_shared(); - node_subquery->children.push_back(sub_select_node); + ASTPtr node_subquery = std::make_shared(std::move(sub_select_node)); ASTPtr node_table_expr = std::make_shared(); node_table_expr->as()->subquery = node_subquery; node_table_expr->children.emplace_back(node_subquery); - ASTPtr node_table_in_select_query_emlement = std::make_shared(); - node_table_in_select_query_emlement->as()->table_expression = node_table_expr; + ASTPtr node_table_in_select_query_element = std::make_shared(); + node_table_in_select_query_element->as()->table_expression = node_table_expr; + node_table_in_select_query_element->children.emplace_back(node_table_expr); ASTPtr res = std::make_shared(); - res->children.emplace_back(node_table_in_select_query_emlement); + res->children.emplace_back(node_table_in_select_query_element); node = res; return true; diff --git a/src/Parsers/Kusto/ParserKQLQuery.h b/src/Parsers/Kusto/ParserKQLQuery.h index 516acb09dea..a594f43ceec 100644 --- a/src/Parsers/Kusto/ParserKQLQuery.h +++ b/src/Parsers/Kusto/ParserKQLQuery.h @@ -9,11 +9,11 @@ class ParserKQLBase : public IParserBase { public: static String getExprFromToken(Pos & pos); - static String getExprFromToken(const String & text, const uint32_t max_depth); + static String getExprFromToken(const String & text, uint32_t max_depth); static String getExprFromPipe(Pos & pos); static bool setSubQuerySource(ASTPtr & select_query, ASTPtr & source, bool dest_is_subquery, bool src_is_subquery); static bool parseSQLQueryByString(ParserPtr && parser, String & query, ASTPtr & select_node, int32_t max_depth); - bool parseByString(const String expr, ASTPtr & node, const uint32_t max_depth); + bool parseByString(String expr, ASTPtr & node, uint32_t max_depth); }; class ParserKQLQuery : public IParserBase @@ -34,7 +34,7 @@ protected: class ParserSimpleCHSubquery : public ParserKQLBase { public: - ParserSimpleCHSubquery(ASTPtr parent_select_node_ = nullptr) { parent_select_node = parent_select_node_; } + explicit ParserSimpleCHSubquery(ASTPtr parent_select_node_ = nullptr) { parent_select_node = parent_select_node_; } protected: const char * getName() const override { return "Simple ClickHouse subquery"; } diff --git a/src/Parsers/Kusto/ParserKQLSort.cpp b/src/Parsers/Kusto/ParserKQLSort.cpp index 193b75567aa..7e5ac2b17e7 100644 --- a/src/Parsers/Kusto/ParserKQLSort.cpp +++ b/src/Parsers/Kusto/ParserKQLSort.cpp @@ -4,6 +4,7 @@ #include #include #include +#include namespace DB { @@ -24,7 +25,7 @@ bool ParserKQLSort::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (!order_list.parse(pos_backup, order_expression_list, expected)) return false; - while (!new_pos->isEnd() && new_pos->type != TokenType::PipeMark && new_pos->type != TokenType::Semicolon) + while (isValidKQLPos(new_pos) && new_pos->type != TokenType::PipeMark && new_pos->type != TokenType::Semicolon) { String tmp(new_pos->begin, new_pos->end); if (tmp == "desc" || tmp == "asc") diff --git a/src/Parsers/Kusto/ParserKQLStatement.cpp b/src/Parsers/Kusto/ParserKQLStatement.cpp index f61e2e99ffb..668696fa9dc 100644 --- a/src/Parsers/Kusto/ParserKQLStatement.cpp +++ b/src/Parsers/Kusto/ParserKQLStatement.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include namespace DB @@ -79,7 +80,7 @@ bool ParserKQLTableFunction::parseImpl(Pos & pos, ASTPtr & node, Expected & expe { ++paren_count; auto pos_start = pos; - while (!pos->isEnd()) + while (isValidKQLPos(pos)) { if (pos->type == TokenType::ClosingRoundBracket) --paren_count; diff --git a/src/Parsers/Kusto/ParserKQLSummarize.cpp b/src/Parsers/Kusto/ParserKQLSummarize.cpp index 11518fcc684..a45717930bb 100644 --- a/src/Parsers/Kusto/ParserKQLSummarize.cpp +++ b/src/Parsers/Kusto/ParserKQLSummarize.cpp @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -120,7 +121,7 @@ bool ParserKQLSummarize::parseImpl(Pos & pos, ASTPtr & node, Expected & expected if (String(equal_pos->begin, equal_pos->end) != "=") { String groupby_fun = String(begin_pos->begin, begin_pos->end); - if (equal_pos->isEnd() || equal_pos->type == TokenType::Comma || equal_pos->type == TokenType::Semicolon + if (!equal_pos.isValid() || equal_pos->type == TokenType::Comma || equal_pos->type == TokenType::Semicolon || equal_pos->type == TokenType::PipeMark) { expr = groupby_fun; @@ -151,7 +152,7 @@ bool ParserKQLSummarize::parseImpl(Pos & pos, ASTPtr & node, Expected & expected } }; - while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) + while (isValidKQLPos(pos) && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) { if (pos->type == TokenType::OpeningRoundBracket) ++bracket_count; diff --git a/src/Parsers/Kusto/Utilities.cpp b/src/Parsers/Kusto/Utilities.cpp index 72c1ee86628..c43c9784328 100644 --- a/src/Parsers/Kusto/Utilities.cpp +++ b/src/Parsers/Kusto/Utilities.cpp @@ -71,4 +71,12 @@ ASTPtr wrapInSelectWithUnion(const ASTPtr & select_query) return select_with_union_query; } + +bool isValidKQLPos(IParser::Pos & pos) +{ + return (pos.isValid() || + pos->type == TokenType::ErrorSingleExclamationMark || // allow kql negative operators + pos->type == TokenType::ErrorWrongNumber || // allow kql timespan data type with decimal like 2.6h + std::string_view(pos->begin, pos->end) == "~"); // allow kql Case-Sensitive operators +} } diff --git a/src/Parsers/Kusto/Utilities.h b/src/Parsers/Kusto/Utilities.h index f19fad72287..ae84a13a100 100644 --- a/src/Parsers/Kusto/Utilities.h +++ b/src/Parsers/Kusto/Utilities.h @@ -10,4 +10,5 @@ String extractTokenWithoutQuotes(IParser::Pos & pos); void setSelectAll(ASTSelectQuery & select_query); String wildcardToRegex(const String & wildcard); ASTPtr wrapInSelectWithUnion(const ASTPtr & select_query); +bool isValidKQLPos(IParser::Pos & pos); } diff --git a/src/Parsers/MySQL/ASTDeclareOption.h b/src/Parsers/MySQL/ASTDeclareOption.h index 6e248b647c9..62fb42ded28 100644 --- a/src/Parsers/MySQL/ASTDeclareOption.h +++ b/src/Parsers/MySQL/ASTDeclareOption.h @@ -79,7 +79,7 @@ protected: bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; public: - ParserDeclareOptionImpl(const std::vector & options_collection_) : options_collection(options_collection_) {} + explicit ParserDeclareOptionImpl(const std::vector & options_collection_) : options_collection(options_collection_) {} }; using ParserDeclareOption = ParserDeclareOptionImpl; diff --git a/src/Parsers/ParserAlterQuery.cpp b/src/Parsers/ParserAlterQuery.cpp index d72fb493368..b1cc7622e00 100644 --- a/src/Parsers/ParserAlterQuery.cpp +++ b/src/Parsers/ParserAlterQuery.cpp @@ -40,6 +40,7 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected ParserKeyword s_modify_setting("MODIFY SETTING"); ParserKeyword s_reset_setting("RESET SETTING"); ParserKeyword s_modify_query("MODIFY QUERY"); + ParserKeyword s_modify_sql_security("MODIFY SQL SECURITY"); ParserKeyword s_modify_refresh("MODIFY REFRESH"); ParserKeyword s_add_index("ADD INDEX"); @@ -63,9 +64,6 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected ParserKeyword s_add("ADD"); ParserKeyword s_drop("DROP"); - ParserKeyword s_suspend("SUSPEND"); - ParserKeyword s_resume("RESUME"); - ParserKeyword s_refresh("REFRESH"); ParserKeyword s_modify("MODIFY"); ParserKeyword s_attach_partition("ATTACH PARTITION"); @@ -74,6 +72,7 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected ParserKeyword s_detach_part("DETACH PART"); ParserKeyword s_drop_partition("DROP PARTITION"); ParserKeyword s_drop_part("DROP PART"); + ParserKeyword s_forget_partition("FORGET PARTITION"); ParserKeyword s_move_partition("MOVE PARTITION"); ParserKeyword s_move_part("MOVE PART"); ParserKeyword s_drop_detached_partition("DROP DETACHED PARTITION"); @@ -117,6 +116,9 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected ParserKeyword s_remove_sample_by("REMOVE SAMPLE BY"); ParserKeyword s_apply_deleted_mask("APPLY DELETED MASK"); + ParserToken parser_opening_round_bracket(TokenType::OpeningRoundBracket); + ParserToken parser_closing_round_bracket(TokenType::ClosingRoundBracket); + ParserCompoundIdentifier parser_name; ParserStringLiteral parser_string_literal; ParserStringAndSubstitution parser_string_and_substituion; @@ -136,8 +138,8 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected ParserList parser_reset_setting( std::make_unique(), std::make_unique(TokenType::Comma), /* allow_empty = */ false); - ParserNameList values_p; ParserSelectWithUnionQuery select_p; + ParserSQLSecurity sql_security_p; ParserRefreshStrategy refresh_p; ParserTTLExpressionList parser_ttl_list; @@ -160,21 +162,17 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected ASTPtr command_settings_changes; ASTPtr command_settings_resets; ASTPtr command_select; - ASTPtr command_values; ASTPtr command_rename_to; + ASTPtr command_sql_security; + + if (with_round_bracket) + { + if (!parser_opening_round_bracket.ignore(pos, expected)) + return false; + } switch (alter_object) { - case ASTAlterQuery::AlterObjectType::LIVE_VIEW: - { - if (s_refresh.ignore(pos, expected)) - { - command->type = ASTAlterCommand::LIVE_VIEW_REFRESH; - } - else - return false; - break; - } case ASTAlterQuery::AlterObjectType::DATABASE: { if (s_modify_setting.ignore(pos, expected)) @@ -254,6 +252,13 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected command->type = ASTAlterCommand::DROP_PARTITION; command->part = true; } + else if (s_forget_partition.ignore(pos, expected)) + { + if (!parser_partition.parse(pos, command_partition, expected)) + return false; + + command->type = ASTAlterCommand::FORGET_PARTITION; + } else if (s_drop_detached_partition.ignore(pos, expected)) { if (!parser_partition.parse(pos, command_partition, expected)) @@ -857,6 +862,14 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected return false; command->type = ASTAlterCommand::MODIFY_QUERY; } + else if (s_modify_sql_security.ignore(pos, expected)) + { + /// This is a hack so we can reuse parser from create and don't have to write `MODIFY SQL SECURITY SQL SECURITY INVOKER` + pos -= 2; + if (!sql_security_p.parse(pos, command_sql_security, expected)) + return false; + command->type = ASTAlterCommand::MODIFY_SQL_SECURITY; + } else if (s_modify_refresh.ignore(pos, expected)) { if (!refresh_p.parse(pos, command->refresh, expected)) @@ -885,6 +898,12 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected } } + if (with_round_bracket) + { + if (!parser_closing_round_bracket.ignore(pos, expected)) + return false; + } + if (command_col_decl) command->col_decl = command->children.emplace_back(std::move(command_col_decl)).get(); if (command_column) @@ -923,8 +942,8 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected command->settings_resets = command->children.emplace_back(std::move(command_settings_resets)).get(); if (command_select) command->select = command->children.emplace_back(std::move(command_select)).get(); - if (command_values) - command->values = command->children.emplace_back(std::move(command_values)).get(); + if (command_sql_security) + command->sql_security = command->children.emplace_back(std::move(command_sql_security)).get(); if (command_rename_to) command->rename_to = command->children.emplace_back(std::move(command_rename_to)).get(); @@ -938,7 +957,10 @@ bool ParserAlterCommandList::parseImpl(Pos & pos, ASTPtr & node, Expected & expe node = command_list; ParserToken s_comma(TokenType::Comma); - ParserAlterCommand p_command(alter_object); + + const auto with_round_bracket = pos->type == TokenType::OpeningRoundBracket; + + ParserAlterCommand p_command(with_round_bracket, alter_object); do { @@ -960,7 +982,6 @@ bool ParserAlterQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) ParserKeyword s_alter_table("ALTER TABLE"); ParserKeyword s_alter_temporary_table("ALTER TEMPORARY TABLE"); - ParserKeyword s_alter_live_view("ALTER LIVE VIEW"); ParserKeyword s_alter_database("ALTER DATABASE"); ASTAlterQuery::AlterObjectType alter_object_type; @@ -969,10 +990,6 @@ bool ParserAlterQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { alter_object_type = ASTAlterQuery::AlterObjectType::TABLE; } - else if (s_alter_live_view.ignore(pos, expected)) - { - alter_object_type = ASTAlterQuery::AlterObjectType::LIVE_VIEW; - } else if (s_alter_database.ignore(pos, expected)) { alter_object_type = ASTAlterQuery::AlterObjectType::DATABASE; diff --git a/src/Parsers/ParserAlterQuery.h b/src/Parsers/ParserAlterQuery.h index b0029ff88fd..50a8f6f68d4 100644 --- a/src/Parsers/ParserAlterQuery.h +++ b/src/Parsers/ParserAlterQuery.h @@ -28,8 +28,6 @@ namespace DB * [DROP INDEX [IF EXISTS] index_name] * [CLEAR INDEX [IF EXISTS] index_name IN PARTITION partition] * [MATERIALIZE INDEX [IF EXISTS] index_name [IN PARTITION partition]] - * ALTER LIVE VIEW [db.name] - * [REFRESH] */ class ParserAlterQuery : public IParserBase @@ -49,7 +47,7 @@ protected: public: ASTAlterQuery::AlterObjectType alter_object; - ParserAlterCommandList(ASTAlterQuery::AlterObjectType alter_object_ = ASTAlterQuery::AlterObjectType::TABLE) + explicit ParserAlterCommandList(ASTAlterQuery::AlterObjectType alter_object_ = ASTAlterQuery::AlterObjectType::TABLE) : alter_object(alter_object_) {} }; @@ -61,10 +59,15 @@ protected: bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; public: + bool with_round_bracket; ASTAlterQuery::AlterObjectType alter_object; - ParserAlterCommand(ASTAlterQuery::AlterObjectType alter_object_ = ASTAlterQuery::AlterObjectType::TABLE) - : alter_object(alter_object_) {} + + explicit ParserAlterCommand( + bool with_round_bracket_, ASTAlterQuery::AlterObjectType alter_object_ = ASTAlterQuery::AlterObjectType::TABLE) + : with_round_bracket(with_round_bracket_), alter_object(alter_object_) + { + } }; diff --git a/src/Parsers/ParserCreateIndexQuery.cpp b/src/Parsers/ParserCreateIndexQuery.cpp index 81954e3c247..e654656b68a 100644 --- a/src/Parsers/ParserCreateIndexQuery.cpp +++ b/src/Parsers/ParserCreateIndexQuery.cpp @@ -25,7 +25,6 @@ bool ParserCreateIndexDeclaration::parseImpl(Pos & pos, ASTPtr & node, Expected ParserUnsignedInteger granularity_p; ASTPtr expr; - ASTPtr order; ASTPtr type; ASTPtr granularity; @@ -35,7 +34,7 @@ bool ParserCreateIndexDeclaration::parseImpl(Pos & pos, ASTPtr & node, Expected } else if (open.ignore(pos, expected)) { - if (!order_list.parse(pos, order, expected)) + if (!order_list.parse(pos, expr, expected)) return false; if (!close.ignore(pos, expected)) @@ -72,7 +71,6 @@ bool ParserCreateIndexDeclaration::parseImpl(Pos & pos, ASTPtr & node, Expected index->granularity = ASTIndexDeclaration::DEFAULT_INDEX_GRANULARITY; } node = index; - return true; } diff --git a/src/Parsers/ParserCreateQuery.cpp b/src/Parsers/ParserCreateQuery.cpp index 1f6f68c9d8e..8ebadf4606f 100644 --- a/src/Parsers/ParserCreateQuery.cpp +++ b/src/Parsers/ParserCreateQuery.cpp @@ -1,4 +1,5 @@ #include +#include #include #include #include @@ -84,6 +85,65 @@ bool ParserNestedTable::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) return true; } +bool ParserSQLSecurity::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + ParserToken s_eq(TokenType::Equals); + ParserKeyword s_definer("DEFINER"); + + bool is_definer_current_user = false; + ASTPtr definer; + std::optional type; + + while (true) + { + if (!definer && s_definer.ignore(pos, expected)) + { + s_eq.ignore(pos, expected); + if (ParserKeyword{"CURRENT_USER"}.ignore(pos, expected)) + is_definer_current_user = true; + else if (!ParserUserNameWithHost{}.parse(pos, definer, expected)) + return false; + + continue; + } + + if (!type && ParserKeyword{"SQL SECURITY"}.ignore(pos, expected)) + { + if (s_definer.ignore(pos, expected)) + type = SQLSecurityType::DEFINER; + else if (ParserKeyword{"INVOKER"}.ignore(pos, expected)) + type = SQLSecurityType::INVOKER; + else if (ParserKeyword{"NONE"}.ignore(pos, expected)) + type = SQLSecurityType::NONE; + else + return false; + + continue; + } + + break; + } + + if (!type) + { + if (is_definer_current_user || definer) + type = SQLSecurityType::DEFINER; + else + return false; + } + else if (type == SQLSecurityType::DEFINER && !definer) + is_definer_current_user = true; + + auto result = std::make_shared(); + result->is_definer_current_user = is_definer_current_user; + result->type = type; + if (definer) + result->definer = typeid_cast>(definer); + + node = std::move(result); + return true; +} + bool ParserIdentifierWithParameters::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { @@ -684,6 +744,7 @@ bool ParserCreateTableQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expe query->database = table_id->getDatabase(); query->table = table_id->getTable(); query->uuid = table_id->uuid; + query->has_uuid = table_id->uuid != UUIDHelpers::Nil; if (query->database) query->children.push_back(query->database); @@ -783,6 +844,7 @@ bool ParserCreateTableQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expe query->database = table_id->getDatabase(); query->table = table_id->getTable(); query->uuid = table_id->uuid; + query->has_uuid = table_id->uuid != UUIDHelpers::Nil; query->cluster = cluster_str; if (query->database) @@ -847,6 +909,7 @@ bool ParserCreateLiveViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & e ParserStorage storage_inner{ParserStorage::TABLE_ENGINE}; ParserTablePropertiesDeclarationList table_properties_p; ParserSelectWithUnionQuery select_p; + ParserSQLSecurity sql_security_p; ASTPtr table; ASTPtr to_table; @@ -854,14 +917,11 @@ bool ParserCreateLiveViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & e ASTPtr as_database; ASTPtr as_table; ASTPtr select; - ASTPtr live_view_periodic_refresh; + ASTPtr sql_security; String cluster_str; bool attach = false; bool if_not_exists = false; - bool with_and = false; - bool with_timeout = false; - bool with_periodic_refresh = false; if (!s_create.ignore(pos, expected)) { @@ -871,6 +931,8 @@ bool ParserCreateLiveViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & e return false; } + sql_security_p.parse(pos, sql_security, expected); + if (!s_live.ignore(pos, expected)) return false; @@ -883,23 +945,6 @@ bool ParserCreateLiveViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & e if (!table_name_p.parse(pos, table, expected)) return false; - if (ParserKeyword{"WITH"}.ignore(pos, expected)) - { - if (ParserKeyword{"REFRESH"}.ignore(pos, expected) || ParserKeyword{"PERIODIC REFRESH"}.ignore(pos, expected)) - { - if (!ParserNumber{}.parse(pos, live_view_periodic_refresh, expected)) - live_view_periodic_refresh = std::make_shared(static_cast(DEFAULT_PERIODIC_LIVE_VIEW_REFRESH_SEC)); - - with_periodic_refresh = true; - } - - else if (with_and) - return false; - - if (!with_timeout && !with_periodic_refresh) - return false; - } - if (ParserKeyword{"ON"}.ignore(pos, expected)) { if (!ASTQueryWithOnCluster::parse(pos, cluster_str, expected)) @@ -923,6 +968,9 @@ bool ParserCreateLiveViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & e return false; } + if (!sql_security && !sql_security_p.parse(pos, sql_security, expected)) + sql_security = std::make_shared(); + /// AS SELECT ... if (!s_as.ignore(pos, expected)) return false; @@ -959,12 +1007,12 @@ bool ParserCreateLiveViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & e tryGetIdentifierNameInto(as_table, query->as_table); query->set(query->select, select); - if (live_view_periodic_refresh) - query->live_view_periodic_refresh.emplace(live_view_periodic_refresh->as().value.safeGet()); - if (comment) query->set(query->comment, comment); + if (sql_security) + query->sql_security = typeid_cast>(sql_security); + return true; } @@ -1382,6 +1430,7 @@ bool ParserCreateViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec ParserTablePropertiesDeclarationList table_properties_p; ParserSelectWithUnionQuery select_p; ParserNameList names_p; + ParserSQLSecurity sql_security_p; ASTPtr table; ASTPtr to_table; @@ -1391,6 +1440,7 @@ bool ParserCreateViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec ASTPtr as_database; ASTPtr as_table; ASTPtr select; + ASTPtr sql_security; ASTPtr refresh_strategy; String cluster_str; @@ -1416,6 +1466,8 @@ bool ParserCreateViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec replace_view = true; } + sql_security_p.parse(pos, sql_security, expected); + if (!replace_view && s_materialized.ignore(pos, expected)) { is_materialized_view = true; @@ -1508,6 +1560,9 @@ bool ParserCreateViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec } } + if (!sql_security) + sql_security_p.parse(pos, sql_security, expected); + /// AS SELECT ... if (!s_as.ignore(pos, expected)) return false; @@ -1550,6 +1605,8 @@ bool ParserCreateViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec query->set(query->refresh_strategy, refresh_strategy); if (comment) query->set(query->comment, comment); + if (sql_security) + query->sql_security = typeid_cast>(sql_security); tryGetIdentifierNameInto(as_database, query->as_database); tryGetIdentifierNameInto(as_table, query->as_table); diff --git a/src/Parsers/ParserCreateQuery.h b/src/Parsers/ParserCreateQuery.h index c9059324bbe..440a8bc1dc7 100644 --- a/src/Parsers/ParserCreateQuery.h +++ b/src/Parsers/ParserCreateQuery.h @@ -25,6 +25,14 @@ protected: bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; }; +/** Parses sql security option. DEFINER = user_name SQL SECURITY DEFINER + */ +class ParserSQLSecurity : public IParserBase +{ +protected: + const char * getName() const override { return "sql security"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; /** Storage engine or Codec. For example: * Memory() @@ -220,11 +228,9 @@ bool IParserColumnDeclaration::parseImpl(Pos & pos, ASTPtr & node, E return false; if (!type_parser.parse(pos, type, expected)) return false; - if (s_collate.ignore(pos, expected)) - { - if (!collation_parser.parse(pos, collation_expression, expected)) - return false; - } + if (s_collate.ignore(pos, expected) + && !collation_parser.parse(pos, collation_expression, expected)) + return false; } if (allow_null_modifiers) @@ -239,6 +245,11 @@ bool IParserColumnDeclaration::parseImpl(Pos & pos, ASTPtr & node, E null_modifier.emplace(true); } + /// Collate is also allowed after NULL/NOT NULL + if (!collation_expression && s_collate.ignore(pos, expected) + && !collation_parser.parse(pos, collation_expression, expected)) + return false; + Pos pos_before_specifier = pos; if (s_default.ignore(pos, expected) || s_materialized.ignore(pos, expected) || s_alias.ignore(pos, expected)) { @@ -523,7 +534,7 @@ public: DATABASE_ENGINE, }; - ParserStorage(EngineKind engine_kind_) : engine_kind(engine_kind_) {} + explicit ParserStorage(EngineKind engine_kind_) : engine_kind(engine_kind_) {} protected: const char * getName() const override { return "storage definition"; } diff --git a/src/Parsers/ParserQueryWithOutput.cpp b/src/Parsers/ParserQueryWithOutput.cpp index 7a627ae5f6a..4a0b928608b 100644 --- a/src/Parsers/ParserQueryWithOutput.cpp +++ b/src/Parsers/ParserQueryWithOutput.cpp @@ -136,6 +136,7 @@ bool ParserQueryWithOutput::parseImpl(Pos & pos, ASTPtr & node, Expected & expec ParserStringLiteral compression; if (!compression.parse(pos, query_with_output.compression, expected)) return false; + query_with_output.children.push_back(query_with_output.compression); ParserKeyword s_compression_level("LEVEL"); if (s_compression_level.ignore(pos, expected)) @@ -143,6 +144,7 @@ bool ParserQueryWithOutput::parseImpl(Pos & pos, ASTPtr & node, Expected & expec ParserNumber compression_level; if (!compression_level.parse(pos, query_with_output.compression_level, expected)) return false; + query_with_output.children.push_back(query_with_output.compression_level); } } diff --git a/src/Parsers/ParserRenameQuery.cpp b/src/Parsers/ParserRenameQuery.cpp index 27f2ed1cd22..f9d29108ed6 100644 --- a/src/Parsers/ParserRenameQuery.cpp +++ b/src/Parsers/ParserRenameQuery.cpp @@ -44,15 +44,14 @@ bool ParserRenameQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (!ASTQueryWithOnCluster::parse(pos, cluster_str, expected)) return false; } + ASTRenameQuery::Elements rename_elements; + rename_elements.emplace_back(); + rename_elements.back().if_exists = if_exists; + rename_elements.back().from.database = from_db; + rename_elements.back().to.database = to_db; - auto query = std::make_shared(); + auto query = std::make_shared(std::move(rename_elements)); query->database = true; - query->elements.emplace({}); - query->elements.front().if_exists = if_exists; - query->elements.front().from.database = from_db; - query->elements.front().to.database = to_db; - query->children.push_back(std::move(from_db)); - query->children.push_back(std::move(to_db)); query->cluster = cluster_str; node = query; return true; @@ -75,9 +74,8 @@ bool ParserRenameQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) const auto ignore_delim = [&] { return exchange ? s_and.ignore(pos) : s_to.ignore(pos); }; - auto query = std::make_shared(); - ASTRenameQuery::Elements & elements = query->elements; + ASTRenameQuery::Elements elements; while (true) { @@ -93,15 +91,6 @@ bool ParserRenameQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) || !ignore_delim() || !parseDatabaseAndTableAsAST(pos, expected, ref.to.database, ref.to.table)) return false; - - if (ref.from.database) - query->children.push_back(ref.from.database); - if (ref.from.table) - query->children.push_back(ref.from.table); - if (ref.to.database) - query->children.push_back(ref.to.database); - if (ref.to.table) - query->children.push_back(ref.to.table); } String cluster_str; @@ -111,6 +100,7 @@ bool ParserRenameQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) return false; } + auto query = std::make_shared(std::move(elements)); query->cluster = cluster_str; query->exchange = exchange; query->dictionary = dictionary; diff --git a/src/Parsers/ParserSetQuery.cpp b/src/Parsers/ParserSetQuery.cpp index 4df74c2dd82..4ad715ba499 100644 --- a/src/Parsers/ParserSetQuery.cpp +++ b/src/Parsers/ParserSetQuery.cpp @@ -314,7 +314,7 @@ bool ParserSetQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) } SettingsChanges changes; - NameToNameMap query_parameters; + NameToNameVector query_parameters; std::vector default_settings; while (true) @@ -330,7 +330,7 @@ bool ParserSetQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) return false; if (!parameter.first.empty()) - query_parameters.emplace(std::move(parameter)); + query_parameters.emplace_back(std::move(parameter)); else if (!name_of_default_setting.empty()) default_settings.emplace_back(std::move(name_of_default_setting)); else diff --git a/src/Parsers/ParserSystemQuery.cpp b/src/Parsers/ParserSystemQuery.cpp index 24979af9d40..a50e65aa134 100644 --- a/src/Parsers/ParserSystemQuery.cpp +++ b/src/Parsers/ParserSystemQuery.cpp @@ -293,7 +293,7 @@ bool ParserSystemQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected & ASTPtr replica_ast; if (!ParserStringLiteral{}.parse(pos, replica_ast, expected)) return false; - res->src_replicas.insert(replica_ast->as().value.safeGet()); + res->src_replicas.emplace_back(replica_ast->as().value.safeGet()); } while (ParserToken{TokenType::Comma}.ignore(pos, expected)); } } diff --git a/src/Parsers/ParserUndropQuery.cpp b/src/Parsers/ParserUndropQuery.cpp index 3784ab0f353..e67fd7f5f01 100644 --- a/src/Parsers/ParserUndropQuery.cpp +++ b/src/Parsers/ParserUndropQuery.cpp @@ -58,7 +58,7 @@ bool parseUndropQuery(IParser::Pos & pos, ASTPtr & node, Expected & expected) if (database) query->children.push_back(database); - assert (table); + chassert(table); query->children.push_back(table); query->cluster = cluster_str; diff --git a/src/Parsers/TablePropertiesQueriesASTs.h b/src/Parsers/TablePropertiesQueriesASTs.h index 038936a0297..81ad975aa37 100644 --- a/src/Parsers/TablePropertiesQueriesASTs.h +++ b/src/Parsers/TablePropertiesQueriesASTs.h @@ -85,11 +85,21 @@ using ASTShowCreateDictionaryQuery = ASTQueryWithTableAndOutputImpl { +public: + ASTPtr clone() const override + { + auto res = std::make_shared(*this); + res->children.clear(); + cloneTableOptions(*res); + return res; + } + protected: - void formatQueryImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override + void formatQueryImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override { settings.ostr << (settings.hilite ? hilite_keyword : "") << ASTExistsDatabaseQueryIDAndQueryNames::Query - << " " << (settings.hilite ? hilite_none : "") << backQuoteIfNeed(getDatabase()); + << " " << (settings.hilite ? hilite_none : ""); + database->formatImpl(settings, state, frame); } QueryKind getQueryKind() const override { return QueryKind::Exists; } @@ -97,11 +107,21 @@ protected: class ASTShowCreateDatabaseQuery : public ASTQueryWithTableAndOutputImpl { +public: + ASTPtr clone() const override + { + auto res = std::make_shared(*this); + res->children.clear(); + cloneTableOptions(*res); + return res; + } + protected: - void formatQueryImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override + void formatQueryImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override { settings.ostr << (settings.hilite ? hilite_keyword : "") << ASTShowCreateDatabaseQueryIDAndQueryNames::Query - << " " << (settings.hilite ? hilite_none : "") << backQuoteIfNeed(getDatabase()); + << " " << (settings.hilite ? hilite_none : ""); + database->formatImpl(settings, state, frame); } }; diff --git a/src/Parsers/TokenIterator.h b/src/Parsers/TokenIterator.h index 192f2f55e6a..8cb59aa12e2 100644 --- a/src/Parsers/TokenIterator.h +++ b/src/Parsers/TokenIterator.h @@ -62,6 +62,18 @@ public: return *this; } + ALWAYS_INLINE TokenIterator & operator-=(int value) + { + index -= value; + return *this; + } + + ALWAYS_INLINE TokenIterator & operator+=(int value) + { + index += value; + return *this; + } + ALWAYS_INLINE bool operator<(const TokenIterator & rhs) const { return index < rhs.index; } ALWAYS_INLINE bool operator<=(const TokenIterator & rhs) const { return index <= rhs.index; } ALWAYS_INLINE bool operator==(const TokenIterator & rhs) const { return index == rhs.index; } diff --git a/src/Parsers/parseIdentifierOrStringLiteral.cpp b/src/Parsers/parseIdentifierOrStringLiteral.cpp index 3f2bd8ac8ad..bb93145772a 100644 --- a/src/Parsers/parseIdentifierOrStringLiteral.cpp +++ b/src/Parsers/parseIdentifierOrStringLiteral.cpp @@ -7,8 +7,10 @@ #include #include + namespace DB { + bool parseIdentifierOrStringLiteral(IParser::Pos & pos, Expected & expected, String & result) { return IParserBase::wrapParseImpl(pos, [&] @@ -23,7 +25,7 @@ bool parseIdentifierOrStringLiteral(IParser::Pos & pos, Expected & expected, Str if (ParserStringLiteral().parse(pos, ast, expected)) { result = ast->as().value.safeGet(); - return true; + return !result.empty(); } return false; diff --git a/src/Parsers/parseIdentifierOrStringLiteral.h b/src/Parsers/parseIdentifierOrStringLiteral.h index d029c0c1371..b450ce8f2f0 100644 --- a/src/Parsers/parseIdentifierOrStringLiteral.h +++ b/src/Parsers/parseIdentifierOrStringLiteral.h @@ -3,11 +3,14 @@ #include #include + namespace DB { -/** Parses a name of an object which could be written in 3 forms: - * name, `name` or 'name' */ +/** Parses a name of an object which could be written in the following forms: + * name / `name` / "name" (identifier) or 'name'. + * Note that empty strings are not allowed. + */ bool parseIdentifierOrStringLiteral(IParser::Pos & pos, Expected & expected, String & result); /** Parse a list of identifiers or string literals. */ diff --git a/src/Parsers/parseIntervalKind.cpp b/src/Parsers/parseIntervalKind.cpp index fe052287083..124246f2950 100644 --- a/src/Parsers/parseIntervalKind.cpp +++ b/src/Parsers/parseIntervalKind.cpp @@ -11,7 +11,7 @@ bool parseIntervalKind(IParser::Pos & pos, Expected & expected, IntervalKind & r || ParserKeyword("SQL_TSI_NANOSECOND").ignore(pos, expected) || ParserKeyword("NS").ignore(pos, expected)) { - result = IntervalKind::Nanosecond; + result = IntervalKind::Kind::Nanosecond; return true; } @@ -19,7 +19,7 @@ bool parseIntervalKind(IParser::Pos & pos, Expected & expected, IntervalKind & r || ParserKeyword("SQL_TSI_MICROSECOND").ignore(pos, expected) || ParserKeyword("MCS").ignore(pos, expected)) { - result = IntervalKind::Microsecond; + result = IntervalKind::Kind::Microsecond; return true; } @@ -27,7 +27,7 @@ bool parseIntervalKind(IParser::Pos & pos, Expected & expected, IntervalKind & r || ParserKeyword("SQL_TSI_MILLISECOND").ignore(pos, expected) || ParserKeyword("MS").ignore(pos, expected)) { - result = IntervalKind::Millisecond; + result = IntervalKind::Kind::Millisecond; return true; } @@ -35,7 +35,7 @@ bool parseIntervalKind(IParser::Pos & pos, Expected & expected, IntervalKind & r || ParserKeyword("SQL_TSI_SECOND").ignore(pos, expected) || ParserKeyword("SS").ignore(pos, expected) || ParserKeyword("S").ignore(pos, expected)) { - result = IntervalKind::Second; + result = IntervalKind::Kind::Second; return true; } @@ -43,7 +43,7 @@ bool parseIntervalKind(IParser::Pos & pos, Expected & expected, IntervalKind & r || ParserKeyword("SQL_TSI_MINUTE").ignore(pos, expected) || ParserKeyword("MI").ignore(pos, expected) || ParserKeyword("N").ignore(pos, expected)) { - result = IntervalKind::Minute; + result = IntervalKind::Kind::Minute; return true; } @@ -51,7 +51,7 @@ bool parseIntervalKind(IParser::Pos & pos, Expected & expected, IntervalKind & r || ParserKeyword("SQL_TSI_HOUR").ignore(pos, expected) || ParserKeyword("HH").ignore(pos, expected) || ParserKeyword("H").ignore(pos, expected)) { - result = IntervalKind::Hour; + result = IntervalKind::Kind::Hour; return true; } @@ -59,7 +59,7 @@ bool parseIntervalKind(IParser::Pos & pos, Expected & expected, IntervalKind & r || ParserKeyword("SQL_TSI_DAY").ignore(pos, expected) || ParserKeyword("DD").ignore(pos, expected) || ParserKeyword("D").ignore(pos, expected)) { - result = IntervalKind::Day; + result = IntervalKind::Kind::Day; return true; } @@ -67,7 +67,7 @@ bool parseIntervalKind(IParser::Pos & pos, Expected & expected, IntervalKind & r || ParserKeyword("SQL_TSI_WEEK").ignore(pos, expected) || ParserKeyword("WK").ignore(pos, expected) || ParserKeyword("WW").ignore(pos, expected)) { - result = IntervalKind::Week; + result = IntervalKind::Kind::Week; return true; } @@ -75,7 +75,7 @@ bool parseIntervalKind(IParser::Pos & pos, Expected & expected, IntervalKind & r || ParserKeyword("SQL_TSI_MONTH").ignore(pos, expected) || ParserKeyword("MM").ignore(pos, expected) || ParserKeyword("M").ignore(pos, expected)) { - result = IntervalKind::Month; + result = IntervalKind::Kind::Month; return true; } @@ -83,7 +83,7 @@ bool parseIntervalKind(IParser::Pos & pos, Expected & expected, IntervalKind & r || ParserKeyword("SQL_TSI_QUARTER").ignore(pos, expected) || ParserKeyword("QQ").ignore(pos, expected) || ParserKeyword("Q").ignore(pos, expected)) { - result = IntervalKind::Quarter; + result = IntervalKind::Kind::Quarter; return true; } @@ -91,7 +91,7 @@ bool parseIntervalKind(IParser::Pos & pos, Expected & expected, IntervalKind & r || ParserKeyword("SQL_TSI_YEAR").ignore(pos, expected) || ParserKeyword("YYYY").ignore(pos, expected) || ParserKeyword("YY").ignore(pos, expected)) { - result = IntervalKind::Year; + result = IntervalKind::Kind::Year; return true; } diff --git a/src/Parsers/tests/gtest_Parser.cpp b/src/Parsers/tests/gtest_Parser.cpp index 4914c3889dd..19947cd38cc 100644 --- a/src/Parsers/tests/gtest_Parser.cpp +++ b/src/Parsers/tests/gtest_Parser.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -62,10 +63,29 @@ TEST_P(ParserTest, parseQuery) if (std::string("CREATE USER or ALTER USER query") != parser->getName() && std::string("ATTACH access entity query") != parser->getName()) { - WriteBufferFromOwnString buf; - formatAST(*ast->clone(), buf, false, false); - String formatted_ast = buf.str(); - EXPECT_EQ(expected_ast, formatted_ast); + ASTPtr ast_clone = ast->clone(); + { + WriteBufferFromOwnString buf; + formatAST(*ast_clone, buf, false, false); + String formatted_ast = buf.str(); + EXPECT_EQ(expected_ast, formatted_ast); + } + + + ASTPtr ast_clone2 = ast_clone->clone(); + /// Break `ast_clone2`, it should not affect `ast_clone` if `clone()` implemented properly + for (auto & child : ast_clone2->children) + { + if (auto * identifier = dynamic_cast(child.get())) + identifier->setShortName("new_name"); + } + + { + WriteBufferFromOwnString buf; + formatAST(*ast_clone, buf, false, false); + String formatted_ast = buf.str(); + EXPECT_EQ(expected_ast, formatted_ast); + } } else { @@ -132,7 +152,7 @@ INSTANTIATE_TEST_SUITE_P(ParserOptimizeQuery, ParserTest, INSTANTIATE_TEST_SUITE_P(ParserOptimizeQuery_FAIL, ParserTest, ::testing::Combine( - ::testing::Values(std::make_shared()), + ::testing::Values(std::make_shared(false)), ::testing::ValuesIn(std::initializer_list { { @@ -159,7 +179,7 @@ INSTANTIATE_TEST_SUITE_P(ParserOptimizeQuery_FAIL, ParserTest, INSTANTIATE_TEST_SUITE_P(ParserAlterCommand_MODIFY_COMMENT, ParserTest, ::testing::Combine( - ::testing::Values(std::make_shared()), + ::testing::Values(std::make_shared(false)), ::testing::ValuesIn(std::initializer_list { { @@ -299,6 +319,16 @@ INSTANTIATE_TEST_SUITE_P(ParserAttachUserQuery, ParserTest, } }))); +INSTANTIATE_TEST_SUITE_P(ParserRenameQuery, ParserTest, + ::testing::Combine( + ::testing::Values(std::make_shared()), + ::testing::ValuesIn(std::initializer_list{ + { + "RENAME TABLE eligible_test TO eligible_test2", + "RENAME TABLE eligible_test TO eligible_test2" + } +}))); + INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserKQLTest, ::testing::Combine( ::testing::Values(std::make_shared()), diff --git a/src/Parsers/tests/gtest_format_hiliting.cpp b/src/Parsers/tests/gtest_format_hiliting.cpp index a4c3ed86182..e87b093db9d 100644 --- a/src/Parsers/tests/gtest_format_hiliting.cpp +++ b/src/Parsers/tests/gtest_format_hiliting.cpp @@ -5,9 +5,11 @@ #include #include #include -#include +#include +using namespace DB; + String hilite(const String & s, const char * hilite_type) { return hilite_type + s + DB::IAST::hilite_none; @@ -20,12 +22,12 @@ String keyword(const String & s) String identifier(const String & s) { - return hilite(s, DB::IAST::hilite_identifier); + return hilite(backQuoteIfNeed(s), DB::IAST::hilite_identifier); } String alias(const String & s) { - return hilite(s, DB::IAST::hilite_alias); + return hilite(backQuoteIfNeed(s), DB::IAST::hilite_alias); } String op(const String & s) @@ -61,65 +63,65 @@ void compare(const String & expected, const String & query) const std::vector> expected_and_query_pairs = { // Simple select { - keyword("SELECT ") + "* " + keyword("FROM ") + identifier("table"), - "select * from table" + keyword("SELECT") + " * " + keyword("FROM") + " " + identifier("table"), + "select * from `table`" }, // ASTWithElement { - keyword("WITH ") + alias("alias ") + keyword("AS ") - + "(" + keyword("SELECT ") + "* " + keyword("FROM ") + identifier("table") + ") " - + keyword("SELECT ") + "* " + keyword("FROM ") + identifier("table"), - "with alias as (select * from table) select * from table" + keyword("WITH ") + alias("alias ") + " " + keyword("AS") + + " (" + keyword("SELECT") + " * " + keyword("FROM") + " " + identifier("table") + ") " + + keyword("SELECT") + " * " + keyword("FROM") + " " + identifier("table"), + "with `alias ` as (select * from `table`) select * from `table`" }, // ASTWithAlias { - keyword("SELECT ") + identifier("a ") + op("+ ") + "1 " + keyword("AS ") + alias("b") + ", " + identifier("b"), + keyword("SELECT") + " " + identifier("a") + " " + op("+") + " 1 " + keyword("AS") + " " + alias("b") + ", " + identifier("b"), "select a + 1 as b, b" }, // ASTFunction { keyword("SELECT ") + "* " + keyword("FROM ") - + function("view(") + keyword("SELECT ") + "* " + keyword("FROM ") + identifier("table") + function(")"), - "select * from view(select * from table)" + + function("view(") + keyword("SELECT") + " * " + keyword("FROM ") + identifier("table") + function(")"), + "select * from view(select * from `table`)" }, // ASTDictionaryAttributeDeclaration { - keyword("CREATE DICTIONARY ") + "name " - + "(`Name` " + function("ClickHouseDataType ") - + keyword("DEFAULT ") + "'' " - + keyword("EXPRESSION ") + function("rand64() ") + keyword("CREATE DICTIONARY ") + identifier("name") + " " + + "(`Name` " + function("ClickHouseDataType") + + keyword(" DEFAULT") + " '' " + + keyword("EXPRESSION") + " " + function("rand64()") + " " + keyword("IS_OBJECT_ID") + ")", "CREATE DICTIONARY name (`Name` ClickHouseDataType DEFAULT '' EXPRESSION rand64() IS_OBJECT_ID)" }, // ASTDictionary, SOURCE keyword { - keyword("CREATE DICTIONARY ") + "name " - + "(`Name` " + function("ClickHouseDataType ") - + keyword("DEFAULT ") + "'' " - + keyword("EXPRESSION ") + function("rand64() ") + keyword("CREATE DICTIONARY ") + identifier("name") + " " + + "(`Name`" + " " + function("ClickHouseDataType ") + + keyword("DEFAULT") + " '' " + + keyword("EXPRESSION") + " " + function("rand64()") + " " + keyword("IS_OBJECT_ID") + ") " - + keyword("SOURCE") + "(" + keyword("FILE") + "(" + keyword("PATH ") + "'path'))", + + keyword("SOURCE") + "(" + keyword("FILE") + "(" + keyword("PATH") + " 'path'))", "CREATE DICTIONARY name (`Name` ClickHouseDataType DEFAULT '' EXPRESSION rand64() IS_OBJECT_ID) " "SOURCE(FILE(PATH 'path'))" }, // ASTKillQueryQuery { - keyword("KILL QUERY ON CLUSTER ") + "clustername " - + keyword("WHERE ") + identifier("user ") + op("= ") + "'username' " + keyword("KILL QUERY ON CLUSTER") + " clustername " + + keyword("WHERE") + " " + identifier("user") + op(" = ") + "'username' " + keyword("SYNC"), "KILL QUERY ON CLUSTER clustername WHERE user = 'username' SYNC" }, // ASTCreateQuery { - keyword("CREATE TABLE ") + "name " + keyword("AS (SELECT ") + "*" + keyword(") ") - + keyword("COMMENT ") + "'hello'", + keyword("CREATE TABLE ") + identifier("name") + " " + keyword("AS (SELECT") + " *" + keyword(")") + " " + + keyword("COMMENT") + " 'hello'", "CREATE TABLE name AS (SELECT *) COMMENT 'hello'" }, }; diff --git a/src/Planner/CollectTableExpressionData.cpp b/src/Planner/CollectTableExpressionData.cpp index 78a7c7074c3..385381f1355 100644 --- a/src/Planner/CollectTableExpressionData.cpp +++ b/src/Planner/CollectTableExpressionData.cpp @@ -29,34 +29,13 @@ namespace class CollectSourceColumnsVisitor : public InDepthQueryTreeVisitor { public: - explicit CollectSourceColumnsVisitor(PlannerContext & planner_context_) + explicit CollectSourceColumnsVisitor(PlannerContextPtr & planner_context_, bool keep_alias_columns_ = true) : planner_context(planner_context_) + , keep_alias_columns(keep_alias_columns_) {} void visitImpl(QueryTreeNodePtr & node) { - /// Special case for USING clause which contains references to ALIAS columns. - /// We can not modify such ColumnNode. - if (auto * join_node = node->as()) - { - if (!join_node->isUsingJoinExpression()) - return; - - auto & using_list = join_node->getJoinExpression()->as(); - for (auto & using_element : using_list) - { - auto & column_node = using_element->as(); - /// This list contains column nodes from left and right tables. - auto & columns_from_subtrees = column_node.getExpressionOrThrow()->as().getNodes(); - - /// Visit left table column node. - visitUsingColumn(columns_from_subtrees[0]); - /// Visit right table column node. - visitUsingColumn(columns_from_subtrees[1]); - } - return; - } - auto * column_node = node->as(); if (!column_node) return; @@ -72,22 +51,55 @@ public: /// JOIN using expression if (column_node->hasExpression() && column_source_node_type == QueryTreeNodeType::JOIN) - return; - - auto & table_expression_data = planner_context.getOrCreateTableExpressionData(column_source_node); - - if (column_node->hasExpression() && column_source_node_type != QueryTreeNodeType::ARRAY_JOIN) { - /// Replace ALIAS column with expression + auto & columns_from_subtrees = column_node->getExpression()->as().getNodes(); + if (columns_from_subtrees.size() != 2) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Expected two columns in JOIN using expression for column {}", column_node->dumpTree()); + + visit(columns_from_subtrees[0]); + visit(columns_from_subtrees[1]); + return; + } + + auto & table_expression_data = planner_context->getOrCreateTableExpressionData(column_source_node); + + if (isAliasColumn(node)) + { + /// Column is an ALIAS column with expression bool column_already_exists = table_expression_data.hasColumn(column_node->getColumnName()); if (!column_already_exists) { - auto column_identifier = planner_context.getGlobalPlannerContext()->createColumnIdentifier(node); - table_expression_data.addAliasColumnName(column_node->getColumnName(), column_identifier); + CollectSourceColumnsVisitor visitor_for_alias_column(planner_context); + /// While we are processing expression of ALIAS columns we should not add source columns to selected. + /// See also comment for `select_added_columns` + visitor_for_alias_column.select_added_columns = false; + visitor_for_alias_column.keep_alias_columns = keep_alias_columns; + visitor_for_alias_column.visit(column_node->getExpression()); + + if (!keep_alias_columns) + { + /// For PREWHERE we can just replace ALIAS column with it's expression, + /// because ActionsDAG for PREWHERE applied right on top of table expression + /// and cannot affect subqueries or other table expressions. + node = column_node->getExpression(); + return; + } + + auto column_identifier = planner_context->getGlobalPlannerContext()->createColumnIdentifier(node); + + ActionsDAGPtr alias_column_actions_dag = std::make_shared(); + PlannerActionsVisitor actions_visitor(planner_context, false); + auto outputs = actions_visitor.visit(alias_column_actions_dag, column_node->getExpression()); + if (outputs.size() != 1) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Expected single output in actions dag for alias column {}. Actual {}", column_node->dumpTree(), outputs.size()); + const auto & column_name = column_node->getColumnName(); + const auto & alias_node = alias_column_actions_dag->addAlias(*outputs[0], column_name); + alias_column_actions_dag->addOrReplaceInOutputs(alias_node); + table_expression_data.addAliasColumn(column_node->getColumn(), column_identifier, alias_column_actions_dag, select_added_columns); } - node = column_node->getExpression(); - visitImpl(node); return; } @@ -102,45 +114,58 @@ public: bool column_already_exists = table_expression_data.hasColumn(column_node->getColumnName()); if (column_already_exists) + { + /// Column may be added when we collected data for ALIAS column + /// But now we see it directly in the query, so make sure it's marked as selected + if (select_added_columns) + table_expression_data.markSelectedColumn(column_node->getColumnName()); return; + } - auto column_identifier = planner_context.getGlobalPlannerContext()->createColumnIdentifier(node); - table_expression_data.addColumn(column_node->getColumn(), column_identifier); + auto column_identifier = planner_context->getGlobalPlannerContext()->createColumnIdentifier(node); + table_expression_data.addColumn(column_node->getColumn(), column_identifier, select_added_columns); } - static bool needChildVisit(const QueryTreeNodePtr & parent, const QueryTreeNodePtr & child_node) + static bool isAliasColumn(const QueryTreeNodePtr & node) + { + const auto * column_node = node->as(); + if (!column_node || !column_node->hasExpression()) + return false; + const auto & column_source = column_node->getColumnSourceOrNull(); + if (!column_source) + return false; + return column_source->getNodeType() != QueryTreeNodeType::JOIN && + column_source->getNodeType() != QueryTreeNodeType::ARRAY_JOIN; + } + + static bool needChildVisit(const QueryTreeNodePtr & parent_node, const QueryTreeNodePtr & child_node) { - if (auto * join_node = parent->as()) - { - if (join_node->getJoinExpression() == child_node && join_node->isUsingJoinExpression()) - return false; - } auto child_node_type = child_node->getNodeType(); - return !(child_node_type == QueryTreeNodeType::QUERY || child_node_type == QueryTreeNodeType::UNION); + return !(child_node_type == QueryTreeNodeType::QUERY || + child_node_type == QueryTreeNodeType::UNION || + isAliasColumn(parent_node)); + } + + void setKeepAliasColumns(bool keep_alias_columns_) + { + keep_alias_columns = keep_alias_columns_; } private: + PlannerContextPtr & planner_context; - void visitUsingColumn(QueryTreeNodePtr & node) - { - auto & column_node = node->as(); - if (column_node.hasExpression()) - { - auto & table_expression_data = planner_context.getOrCreateTableExpressionData(column_node.getColumnSource()); - bool column_already_exists = table_expression_data.hasColumn(column_node.getColumnName()); - if (column_already_exists) - return; + /// Replace ALIAS columns with their expressions or register them in table expression data. + /// Usually we can replace them when we build some "local" actions DAG + /// (for example Row Policy or PREWHERE) that is applied on top of the table expression. + /// In other cases, we keep ALIAS columns as ColumnNode with an expression child node, + /// and handle them in the Planner by inserting ActionsDAG to compute them after reading from storage. + bool keep_alias_columns = true; - auto column_identifier = planner_context.getGlobalPlannerContext()->createColumnIdentifier(node); - table_expression_data.addAliasColumnName(column_node.getColumnName(), column_identifier); - - visitImpl(column_node.getExpressionOrThrow()); - } - else - visitImpl(node); - } - - PlannerContext & planner_context; + /// Flag `select_added_columns` indicates if we should mark column as explicitly selected. + /// For example, for table with columns (a Int32, b ALIAS a+1) and query SELECT b FROM table + /// Column `b` is selected explicitly by user, but not `a` (that is also read though). + /// Distinguishing such columns is important for checking access rights for ALIAS columns. + bool select_added_columns = true; }; class CollectPrewhereTableExpressionVisitor : public ConstInDepthQueryTreeVisitor @@ -274,7 +299,7 @@ void collectTableExpressionData(QueryTreeNodePtr & query_node, PlannerContextPtr } } - CollectSourceColumnsVisitor collect_source_columns_visitor(*planner_context); + CollectSourceColumnsVisitor collect_source_columns_visitor(planner_context); for (auto & node : query_node_typed.getChildren()) { if (!node || node == query_node_typed.getPrewhere()) @@ -300,21 +325,26 @@ void collectTableExpressionData(QueryTreeNodePtr & query_node, PlannerContextPtr } auto & table_expression_data = planner_context->getOrCreateTableExpressionData(prewhere_table_expression); - const auto & column_names = table_expression_data.getColumnNames(); - NameSet required_column_names_without_prewhere(column_names.begin(), column_names.end()); + const auto & read_column_names = table_expression_data.getColumnNames(); + NameSet required_column_names_without_prewhere(read_column_names.begin(), read_column_names.end()); + const auto & selected_column_names = table_expression_data.getSelectedColumnsNames(); + required_column_names_without_prewhere.insert(selected_column_names.begin(), selected_column_names.end()); + collect_source_columns_visitor.setKeepAliasColumns(false); collect_source_columns_visitor.visit(query_node_typed.getPrewhere()); auto prewhere_actions_dag = std::make_shared(); + QueryTreeNodePtr query_tree_node = query_node_typed.getPrewhere(); + PlannerActionsVisitor visitor(planner_context, false /*use_column_identifier_as_action_node_name*/); - auto expression_nodes = visitor.visit(prewhere_actions_dag, query_node_typed.getPrewhere()); + auto expression_nodes = visitor.visit(prewhere_actions_dag, query_tree_node); if (expression_nodes.size() != 1) throw Exception(ErrorCodes::ILLEGAL_PREWHERE, "Invalid PREWHERE. Expected single boolean expression. In query {}", query_node->formatASTForErrorMessage()); - prewhere_actions_dag->getOutputs().push_back(expression_nodes[0]); + prewhere_actions_dag->getOutputs().push_back(expression_nodes.back()); for (const auto & prewhere_input_node : prewhere_actions_dag->getInputs()) if (required_column_names_without_prewhere.contains(prewhere_input_node->result_name)) @@ -324,9 +354,9 @@ void collectTableExpressionData(QueryTreeNodePtr & query_node, PlannerContextPtr } } -void collectSourceColumns(QueryTreeNodePtr & expression_node, PlannerContextPtr & planner_context) +void collectSourceColumns(QueryTreeNodePtr & expression_node, PlannerContextPtr & planner_context, bool keep_alias_columns) { - CollectSourceColumnsVisitor collect_source_columns_visitor(*planner_context); + CollectSourceColumnsVisitor collect_source_columns_visitor(planner_context, keep_alias_columns); collect_source_columns_visitor.visit(expression_node); } diff --git a/src/Planner/CollectTableExpressionData.h b/src/Planner/CollectTableExpressionData.h index ed3f0ff7a47..b0cebc15682 100644 --- a/src/Planner/CollectTableExpressionData.h +++ b/src/Planner/CollectTableExpressionData.h @@ -19,6 +19,6 @@ void collectTableExpressionData(QueryTreeNodePtr & query_node, PlannerContextPtr * * ALIAS table column nodes are registered in table expression data and replaced in query tree with inner alias expression. */ -void collectSourceColumns(QueryTreeNodePtr & expression_node, PlannerContextPtr & planner_context); +void collectSourceColumns(QueryTreeNodePtr & expression_node, PlannerContextPtr & planner_context, bool keep_alias_columns = true); } diff --git a/src/Planner/Planner.cpp b/src/Planner/Planner.cpp index ace6d500482..bc1fb30781d 100644 --- a/src/Planner/Planner.cpp +++ b/src/Planner/Planner.cpp @@ -45,6 +45,7 @@ #include #include #include +#include #include #include @@ -135,6 +136,7 @@ void checkStoragesSupportTransactions(const PlannerContextPtr & planner_context) * * StorageDistributed skip unused shards optimization relies on this. * Parallel replicas estimation relies on this too. + * StorageMerge common header calculation relies on this too. * * To collect filters that will be applied to specific table in case we have JOINs requires * to run query plan optimization pipeline. @@ -145,16 +147,16 @@ void checkStoragesSupportTransactions(const PlannerContextPtr & planner_context) * 3. Optimize query plan. * 4. Extract filters from ReadFromDummy query plan steps from query plan leaf nodes. */ -void collectFiltersForAnalysis(const QueryTreeNodePtr & query_tree, const PlannerContextPtr & planner_context) + +FiltersForTableExpressionMap collectFiltersForAnalysis(const QueryTreeNodePtr & query_tree, const QueryTreeNodes & table_nodes, const ContextPtr & query_context) { bool collect_filters = false; - const auto & query_context = planner_context->getQueryContext(); const auto & settings = query_context->getSettingsRef(); bool parallel_replicas_estimation_enabled = query_context->canUseParallelReplicasOnInitiator() && settings.parallel_replicas_min_number_of_rows_per_replica > 0; - for (auto & [table_expression, table_expression_data] : planner_context->getTableExpressionNodeToData()) + for (const auto & table_expression : table_nodes) { auto * table_node = table_expression->as(); auto * table_function_node = table_expression->as(); @@ -162,7 +164,7 @@ void collectFiltersForAnalysis(const QueryTreeNodePtr & query_tree, const Planne continue; const auto & storage = table_node ? table_node->getStorage() : table_function_node->getStorage(); - if (typeid_cast(storage.get()) + if (typeid_cast(storage.get()) || typeid_cast(storage.get()) || (parallel_replicas_estimation_enabled && std::dynamic_pointer_cast(storage))) { collect_filters = true; @@ -171,18 +173,18 @@ void collectFiltersForAnalysis(const QueryTreeNodePtr & query_tree, const Planne } if (!collect_filters) - return; + return {}; ResultReplacementMap replacement_map; - auto updated_query_tree = replaceTableExpressionsWithDummyTables(query_tree, planner_context->getQueryContext(), &replacement_map); - std::unordered_map dummy_storage_to_table_expression_data; + auto updated_query_tree = replaceTableExpressionsWithDummyTables(query_tree, table_nodes, query_context, &replacement_map); + + std::unordered_map dummy_storage_to_table; for (auto & [from_table_expression, dummy_table_expression] : replacement_map) { auto * dummy_storage = dummy_table_expression->as().getStorage().get(); - auto * table_expression_data = &planner_context->getTableExpressionDataOrThrow(from_table_expression); - dummy_storage_to_table_expression_data.emplace(dummy_storage, table_expression_data); + dummy_storage_to_table.emplace(dummy_storage, from_table_expression); } SelectQueryOptions select_query_options; @@ -194,6 +196,8 @@ void collectFiltersForAnalysis(const QueryTreeNodePtr & query_tree, const Planne auto optimization_settings = QueryPlanOptimizationSettings::fromContext(query_context); result_query_plan.optimize(optimization_settings); + FiltersForTableExpressionMap res; + std::vector nodes_to_process; nodes_to_process.push_back(result_query_plan.getRootNode()); @@ -207,10 +211,33 @@ void collectFiltersForAnalysis(const QueryTreeNodePtr & query_tree, const Planne if (!read_from_dummy) continue; - auto filter_actions = ActionsDAG::buildFilterActionsDAG(read_from_dummy->getFilterNodes().nodes); - auto & table_expression_data = dummy_storage_to_table_expression_data.at(&read_from_dummy->getStorage()); - table_expression_data->setFilterActions(std::move(filter_actions)); + auto filter_actions = read_from_dummy->getFilterActionsDAG(); + const auto & table_node = dummy_storage_to_table.at(&read_from_dummy->getStorage()); + res[table_node] = FiltersForTableExpression{std::move(filter_actions), read_from_dummy->getPrewhereInfo()}; } + + return res; +} + +FiltersForTableExpressionMap collectFiltersForAnalysis(const QueryTreeNodePtr & query_tree_node, SelectQueryOptions & select_query_options) +{ + if (select_query_options.only_analyze) + return {}; + + auto * query_node = query_tree_node->as(); + auto * union_node = query_tree_node->as(); + + if (!query_node && !union_node) + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, + "Expected QUERY or UNION node. Actual {}", + query_tree_node->formatASTForErrorMessage()); + + auto context = query_node ? query_node->getContext() : union_node->getContext(); + + auto table_expressions_nodes + = extractTableExpressions(query_tree_node, false /* add_array_join */, true /* recursive */); + + return collectFiltersForAnalysis(query_tree_node, table_expressions_nodes, context); } /// Extend lifetime of query context, storages, and table locks @@ -1058,7 +1085,7 @@ void addBuildSubqueriesForSetsStepIfNeeded( Planner subquery_planner( query_tree, subquery_options, - std::make_shared(nullptr, nullptr)); + std::make_shared(nullptr, nullptr, FiltersForTableExpressionMap{})); subquery_planner.buildQueryPlanIfNeeded(); subquery->setQueryPlan(std::make_unique(std::move(subquery_planner).extractQueryPlan())); @@ -1154,7 +1181,7 @@ PlannerContextPtr buildPlannerContext(const QueryTreeNodePtr & query_tree_node, if (select_query_options.is_subquery) updateContextForSubqueryExecution(mutable_context); - return std::make_shared(mutable_context, std::move(global_planner_context)); + return std::make_shared(mutable_context, std::move(global_planner_context), select_query_options); } Planner::Planner(const QueryTreeNodePtr & query_tree_, @@ -1164,7 +1191,8 @@ Planner::Planner(const QueryTreeNodePtr & query_tree_, , planner_context(buildPlannerContext(query_tree, select_query_options, std::make_shared( findQueryForParallelReplicas(query_tree, select_query_options), - findTableForParallelReplicas(query_tree, select_query_options)))) + findTableForParallelReplicas(query_tree, select_query_options), + collectFiltersForAnalysis(query_tree, select_query_options)))) { } @@ -1345,9 +1373,9 @@ void Planner::buildPlanForQueryNode() const auto & settings = query_context->getSettingsRef(); if (query_context->canUseTaskBasedParallelReplicas()) { - if (planner_context->getPreparedSets().hasSubqueries()) + if (!settings.parallel_replicas_allow_in_with_subquery && planner_context->getPreparedSets().hasSubqueries()) { - if (settings.allow_experimental_parallel_reading_from_replicas == 2) + if (settings.allow_experimental_parallel_reading_from_replicas >= 2) throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "IN with subquery is not supported with parallel replicas"); auto & mutable_context = planner_context->getMutableQueryContext(); @@ -1359,8 +1387,20 @@ void Planner::buildPlanForQueryNode() collectTableExpressionData(query_tree, planner_context); checkStoragesSupportTransactions(planner_context); - if (!select_query_options.only_analyze) - collectFiltersForAnalysis(query_tree, planner_context); + const auto & table_filters = planner_context->getGlobalPlannerContext()->filters_for_table_expressions; + if (!select_query_options.only_analyze && !table_filters.empty()) // && top_level) + { + for (auto & [table_node, table_expression_data] : planner_context->getTableExpressionNodeToData()) + { + auto it = table_filters.find(table_node); + if (it != table_filters.end()) + { + const auto & filters = it->second; + table_expression_data.setFilterActions(filters.filter_actions); + table_expression_data.setPrewhereInfo(filters.prewhere_info); + } + } + } if (query_context->canUseTaskBasedParallelReplicas()) { @@ -1374,7 +1414,7 @@ void Planner::buildPlanForQueryNode() const auto & modifiers = table_node->getTableExpressionModifiers(); if (modifiers.has_value() && modifiers->hasFinal()) { - if (settings.allow_experimental_parallel_reading_from_replicas == 2) + if (settings.allow_experimental_parallel_reading_from_replicas >= 2) throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "FINAL modifier is not supported with parallel replicas"); else { @@ -1393,7 +1433,7 @@ void Planner::buildPlanForQueryNode() /// Check support for JOIN for parallel replicas with custom key if (planner_context->getTableExpressionNodeToData().size() > 1) { - if (settings.allow_experimental_parallel_reading_from_replicas == 2) + if (settings.allow_experimental_parallel_reading_from_replicas >= 2) throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "JOINs are not supported with parallel replicas"); else { diff --git a/src/Planner/PlannerActionsVisitor.cpp b/src/Planner/PlannerActionsVisitor.cpp index 511e9396a35..af23e684f23 100644 --- a/src/Planner/PlannerActionsVisitor.cpp +++ b/src/Planner/PlannerActionsVisitor.cpp @@ -44,6 +44,27 @@ namespace ErrorCodes namespace { +/* Calculates Action node name for ConstantNode. + * + * If converting to AST will add a '_CAST' function call, + * the result action name will also include it. + */ +String calculateActionNodeNameWithCastIfNeeded(const ConstantNode & constant_node) +{ + WriteBufferFromOwnString buffer; + if (constant_node.requiresCastCall()) + buffer << "_CAST("; + + buffer << calculateConstantActionNodeName(constant_node.getValue(), constant_node.getResultType()); + + if (constant_node.requiresCastCall()) + { + buffer << ", '" << constant_node.getResultType()->getName() << "'_String)"; + } + + return buffer.str(); +} + class ActionNodeNameHelper { public: @@ -88,7 +109,49 @@ public: case QueryTreeNodeType::CONSTANT: { const auto & constant_node = node->as(); - result = calculateConstantActionNodeName(constant_node.getValue(), constant_node.getResultType()); + /* To ensure that headers match during distributed query we need to simulate action node naming on + * secondary servers. If we don't do that headers will mismatch due to constant folding. + * + * +--------+ + * -----------------| Server |---------------- + * / +--------+ \ + * / \ + * v v + * +-----------+ +-----------+ + * | Initiator | ------ | Secondary |------ + * +-----------+ / +-----------+ \ + * | / \ + * | / \ + * v / \ + * +---------------+ v v + * | Wrap in _CAST | +----------------------------+ +----------------------+ + * | if needed | | Constant folded from _CAST | | Constant folded from | + * +---------------+ +----------------------------+ | another expression | + * | +----------------------+ + * v | + * +----------------------------+ v + * | Name ConstantNode the same | +--------------------------+ + * | as on initiator server | | Generate action name for | + * | (wrap in _CAST if needed) | | original expression | + * +----------------------------+ +--------------------------+ + */ + if (planner_context.isASTLevelOptimizationAllowed()) + { + result = calculateActionNodeNameWithCastIfNeeded(constant_node); + } + else + { + // Need to check if constant folded from QueryNode until https://github.com/ClickHouse/ClickHouse/issues/60847 is fixed. + if (constant_node.hasSourceExpression() && constant_node.getSourceExpression()->getNodeType() != QueryTreeNodeType::QUERY) + { + if (constant_node.receivedFromInitiatorServer()) + result = calculateActionNodeNameWithCastIfNeeded(constant_node); + else + result = calculateActionNodeName(constant_node.getSourceExpression()); + } + else + result = calculateConstantActionNodeName(constant_node.getValue(), constant_node.getResultType()); + } break; } case QueryTreeNodeType::FUNCTION: @@ -451,6 +514,7 @@ private: std::unordered_map node_to_node_name; const PlannerContextPtr planner_context; ActionNodeNameHelper action_node_name_helper; + bool use_column_identifier_as_action_node_name; }; PlannerActionsVisitorImpl::PlannerActionsVisitorImpl(ActionsDAGPtr actions_dag, @@ -458,6 +522,7 @@ PlannerActionsVisitorImpl::PlannerActionsVisitorImpl(ActionsDAGPtr actions_dag, bool use_column_identifier_as_action_node_name_) : planner_context(planner_context_) , action_node_name_helper(node_to_node_name, *planner_context, use_column_identifier_as_action_node_name_) + , use_column_identifier_as_action_node_name(use_column_identifier_as_action_node_name_) { actions_stack.emplace_back(std::move(actions_dag), nullptr); } @@ -503,7 +568,8 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::vi { auto column_node_name = action_node_name_helper.calculateActionNodeName(node); const auto & column_node = node->as(); - + if (column_node.hasExpression() && !use_column_identifier_as_action_node_name) + return visitImpl(column_node.getExpression()); Int64 actions_stack_size = static_cast(actions_stack.size() - 1); for (Int64 i = actions_stack_size; i >= 0; --i) { @@ -527,7 +593,52 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::vi const auto & constant_literal = constant_node.getValue(); const auto & constant_type = constant_node.getResultType(); - auto constant_node_name = calculateConstantActionNodeName(constant_literal, constant_type); + auto constant_node_name = [&]() + { + /* To ensure that headers match during distributed query we need to simulate action node naming on + * secondary servers. If we don't do that headers will mismatch due to constant folding. + * + * +--------+ + * -----------------| Server |---------------- + * / +--------+ \ + * / \ + * v v + * +-----------+ +-----------+ + * | Initiator | ------ | Secondary |------ + * +-----------+ / +-----------+ \ + * | / \ + * | / \ + * v / \ + * +---------------+ v v + * | Wrap in _CAST | +----------------------------+ +----------------------+ + * | if needed | | Constant folded from _CAST | | Constant folded from | + * +---------------+ +----------------------------+ | another expression | + * | +----------------------+ + * v | + * +----------------------------+ v + * | Name ConstantNode the same | +--------------------------+ + * | as on initiator server | | Generate action name for | + * | (wrap in _CAST if needed) | | original expression | + * +----------------------------+ +--------------------------+ + */ + if (planner_context->isASTLevelOptimizationAllowed()) + { + return calculateActionNodeNameWithCastIfNeeded(constant_node); + } + else + { + // Need to check if constant folded from QueryNode until https://github.com/ClickHouse/ClickHouse/issues/60847 is fixed. + if (constant_node.hasSourceExpression() && constant_node.getSourceExpression()->getNodeType() != QueryTreeNodeType::QUERY) + { + if (constant_node.receivedFromInitiatorServer()) + return calculateActionNodeNameWithCastIfNeeded(constant_node); + else + return action_node_name_helper.calculateActionNodeName(constant_node.getSourceExpression()); + } + else + return calculateConstantActionNodeName(constant_literal, constant_type); + } + }(); ColumnWithTypeAndName column; column.name = constant_node_name; diff --git a/src/Planner/PlannerContext.cpp b/src/Planner/PlannerContext.cpp index 422c8c1d01f..f33255f0a44 100644 --- a/src/Planner/PlannerContext.cpp +++ b/src/Planner/PlannerContext.cpp @@ -3,6 +3,7 @@ #include #include #include +#include namespace DB { @@ -41,9 +42,10 @@ bool GlobalPlannerContext::hasColumnIdentifier(const ColumnIdentifier & column_i return column_identifiers.contains(column_identifier); } -PlannerContext::PlannerContext(ContextMutablePtr query_context_, GlobalPlannerContextPtr global_planner_context_) +PlannerContext::PlannerContext(ContextMutablePtr query_context_, GlobalPlannerContextPtr global_planner_context_, const SelectQueryOptions & select_query_options_) : query_context(std::move(query_context_)) , global_planner_context(std::move(global_planner_context_)) + , is_ast_level_optimization_allowed(!(query_context->getClientInfo().query_kind == ClientInfo::QueryKind::SECONDARY_QUERY || select_query_options_.ignore_ast_optimizations)) {} TableExpressionData & PlannerContext::getOrCreateTableExpressionData(const QueryTreeNodePtr & table_expression_node) diff --git a/src/Planner/PlannerContext.h b/src/Planner/PlannerContext.h index fe9eabc558b..4d9ba037cac 100644 --- a/src/Planner/PlannerContext.h +++ b/src/Planner/PlannerContext.h @@ -10,6 +10,7 @@ #include #include +#include namespace DB { @@ -22,12 +23,25 @@ namespace DB class QueryNode; class TableNode; +struct FiltersForTableExpression +{ + ActionsDAGPtr filter_actions; + PrewhereInfoPtr prewhere_info; +}; + +using FiltersForTableExpressionMap = std::map; + + class GlobalPlannerContext { public: - explicit GlobalPlannerContext(const QueryNode * parallel_replicas_node_, const TableNode * parallel_replicas_table_) + GlobalPlannerContext( + const QueryNode * parallel_replicas_node_, + const TableNode * parallel_replicas_table_, + FiltersForTableExpressionMap filters_for_table_expressions_) : parallel_replicas_node(parallel_replicas_node_) , parallel_replicas_table(parallel_replicas_table_) + , filters_for_table_expressions(std::move(filters_for_table_expressions_)) { } @@ -53,6 +67,8 @@ public: /// It is the left-most table of the query (in JOINs, UNIONs and subqueries). const TableNode * const parallel_replicas_table = nullptr; + const FiltersForTableExpressionMap filters_for_table_expressions; + private: std::unordered_set column_identifiers; }; @@ -63,7 +79,7 @@ class PlannerContext { public: /// Create planner context with query context and global planner context - PlannerContext(ContextMutablePtr query_context_, GlobalPlannerContextPtr global_planner_context_); + PlannerContext(ContextMutablePtr query_context_, GlobalPlannerContextPtr global_planner_context_, const SelectQueryOptions & select_query_options_); /// Get planner context query context ContextPtr getQueryContext() const @@ -150,6 +166,12 @@ public: static SetKey createSetKey(const DataTypePtr & left_operand_type, const QueryTreeNodePtr & set_source_node); PreparedSets & getPreparedSets() { return prepared_sets; } + + /// Returns false if any of following conditions met: + /// 1. Query is executed on a follower node. + /// 2. ignore_ast_optimizations is set. + bool isASTLevelOptimizationAllowed() const { return is_ast_level_optimization_allowed; } + private: /// Query context ContextMutablePtr query_context; @@ -157,6 +179,8 @@ private: /// Global planner context GlobalPlannerContextPtr global_planner_context; + bool is_ast_level_optimization_allowed; + /// Column node to column identifier std::unordered_map column_node_to_column_identifier; diff --git a/src/Planner/PlannerExpressionAnalysis.cpp b/src/Planner/PlannerExpressionAnalysis.cpp index 52001eb27c5..30d90a68072 100644 --- a/src/Planner/PlannerExpressionAnalysis.cpp +++ b/src/Planner/PlannerExpressionAnalysis.cpp @@ -3,6 +3,8 @@ #include #include +#include + #include #include #include diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp index 227ac86d3a5..7b3fb0c5c91 100644 --- a/src/Planner/PlannerJoinTree.cpp +++ b/src/Planner/PlannerJoinTree.cpp @@ -86,7 +86,7 @@ namespace /// Check if current user has privileges to SELECT columns from table /// Throws an exception if access to any column from `column_names` is not granted /// If `column_names` is empty, check access to any columns and return names of accessible columns -NameSet checkAccessRights(const TableNode & table_node, Names & column_names, const ContextPtr & query_context) +NameSet checkAccessRights(const TableNode & table_node, const Names & column_names, const ContextPtr & query_context) { /// StorageDummy is created on preliminary stage, ignore access check for it. if (typeid_cast(table_node.getStorage().get())) @@ -353,9 +353,7 @@ void prepareBuildQueryPlanForTableExpression(const QueryTreeNodePtr & table_expr NameSet columns_names_allowed_to_select; if (table_node) { - auto column_names_with_aliases = columns_names; - const auto & alias_columns_names = table_expression_data.getAliasColumnsNames(); - column_names_with_aliases.insert(column_names_with_aliases.end(), alias_columns_names.begin(), alias_columns_names.end()); + const auto & column_names_with_aliases = table_expression_data.getSelectedColumnsNames(); columns_names_allowed_to_select = checkAccessRights(*table_node, column_names_with_aliases, query_context); } @@ -624,6 +622,7 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres auto table_expression_query_info = select_query_info; table_expression_query_info.table_expression = table_expression; table_expression_query_info.filter_actions_dag = table_expression_data.getFilterActions(); + table_expression_query_info.optimized_prewhere_info = table_expression_data.getPrewhereInfo(); table_expression_query_info.analyzer_can_use_parallel_replicas_on_follower = table_node == planner_context->getGlobalPlannerContext()->parallel_replicas_table; size_t max_streams = settings.max_threads; @@ -717,12 +716,16 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres } /// Apply trivial_count optimization if possible - bool is_trivial_count_applied = !select_query_options.only_analyze && - is_single_table_expression && - (table_node || table_function_node) && - select_query_info.has_aggregates && - settings.additional_table_filters.value.empty() && - applyTrivialCountIfPossible(query_plan, table_expression_query_info, table_node, table_function_node, select_query_info.query_tree, planner_context->getMutableQueryContext(), table_expression_data.getColumnNames()); + bool is_trivial_count_applied = !select_query_options.only_analyze && is_single_table_expression + && (table_node || table_function_node) && select_query_info.has_aggregates && settings.additional_table_filters.value.empty() + && applyTrivialCountIfPossible( + query_plan, + table_expression_query_info, + table_node, + table_function_node, + select_query_info.query_tree, + planner_context->getMutableQueryContext(), + table_expression_data.getColumnNames()); if (is_trivial_count_applied) { @@ -736,11 +739,8 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres if (storage_merge_tree && query_context->canUseParallelReplicasOnInitiator() && settings.parallel_replicas_min_number_of_rows_per_replica > 0) { - ActionDAGNodes filter_nodes; - if (table_expression_query_info.filter_actions_dag) - filter_nodes.nodes = table_expression_query_info.filter_actions_dag->getOutputs(); - UInt64 rows_to_read = storage_merge_tree->estimateNumberOfRowsToRead( - query_context, storage_snapshot, table_expression_query_info, filter_nodes); + UInt64 rows_to_read + = storage_merge_tree->estimateNumberOfRowsToRead(query_context, storage_snapshot, table_expression_query_info); if (max_block_size_limited && (max_block_size_limited < rows_to_read)) rows_to_read = max_block_size_limited; @@ -766,15 +766,16 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres } } + auto & prewhere_info = table_expression_query_info.prewhere_info; const auto & prewhere_actions = table_expression_data.getPrewhereFilterActions(); if (prewhere_actions) { - table_expression_query_info.prewhere_info = std::make_shared(); - table_expression_query_info.prewhere_info->prewhere_actions = prewhere_actions; - table_expression_query_info.prewhere_info->prewhere_column_name = prewhere_actions->getOutputs().at(0)->result_name; - table_expression_query_info.prewhere_info->remove_prewhere_column = true; - table_expression_query_info.prewhere_info->need_filter = true; + prewhere_info = std::make_shared(); + prewhere_info->prewhere_actions = prewhere_actions; + prewhere_info->prewhere_column_name = prewhere_actions->getOutputs().at(0)->result_name; + prewhere_info->remove_prewhere_column = true; + prewhere_info->need_filter = true; } updatePrewhereOutputsIfNeeded(table_expression_query_info, table_expression_data.getColumnNames(), storage_snapshot); @@ -787,28 +788,34 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres if (!filter_info.actions) return; - bool is_final = table_expression_query_info.table_expression_modifiers && - table_expression_query_info.table_expression_modifiers->hasFinal(); - bool optimize_move_to_prewhere = settings.optimize_move_to_prewhere && (!is_final || settings.optimize_move_to_prewhere_if_final); + bool is_final = table_expression_query_info.table_expression_modifiers + && table_expression_query_info.table_expression_modifiers->hasFinal(); + bool optimize_move_to_prewhere + = settings.optimize_move_to_prewhere && (!is_final || settings.optimize_move_to_prewhere_if_final); if (storage->supportsPrewhere() && optimize_move_to_prewhere) { - if (!table_expression_query_info.prewhere_info) - table_expression_query_info.prewhere_info = std::make_shared(); + if (!prewhere_info) + prewhere_info = std::make_shared(); - if (!table_expression_query_info.prewhere_info->prewhere_actions) + if (!prewhere_info->prewhere_actions) { - table_expression_query_info.prewhere_info->prewhere_actions = filter_info.actions; - table_expression_query_info.prewhere_info->prewhere_column_name = filter_info.column_name; - table_expression_query_info.prewhere_info->remove_prewhere_column = filter_info.do_remove_column; + prewhere_info->prewhere_actions = filter_info.actions; + prewhere_info->prewhere_column_name = filter_info.column_name; + prewhere_info->remove_prewhere_column = filter_info.do_remove_column; + prewhere_info->need_filter = true; + } + else if (!prewhere_info->row_level_filter) + { + prewhere_info->row_level_filter = filter_info.actions; + prewhere_info->row_level_column_name = filter_info.column_name; + prewhere_info->need_filter = true; } else { - table_expression_query_info.prewhere_info->row_level_filter = filter_info.actions; - table_expression_query_info.prewhere_info->row_level_column_name = filter_info.column_name; + where_filters.emplace_back(filter_info, std::move(description)); } - table_expression_query_info.prewhere_info->need_filter = true; } else { @@ -816,7 +823,8 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres } }; - auto row_policy_filter_info = buildRowPolicyFilterIfNeeded(storage, table_expression_query_info, planner_context, used_row_policies); + auto row_policy_filter_info + = buildRowPolicyFilterIfNeeded(storage, table_expression_query_info, planner_context, used_row_policies); add_filter(row_policy_filter_info, "Row-level security filter"); if (row_policy_filter_info.actions) table_expression_data.setRowLevelFilterActions(row_policy_filter_info.actions); @@ -825,25 +833,56 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres { if (settings.parallel_replicas_count > 1) { - auto parallel_replicas_custom_key_filter_info = buildCustomKeyFilterIfNeeded(storage, table_expression_query_info, planner_context); + auto parallel_replicas_custom_key_filter_info + = buildCustomKeyFilterIfNeeded(storage, table_expression_query_info, planner_context); add_filter(parallel_replicas_custom_key_filter_info, "Parallel replicas custom key filter"); } - else + else if (auto * distributed = typeid_cast(storage.get()); + distributed && query_context->canUseParallelReplicasCustomKey(*distributed->getCluster())) { - if (auto * distributed = typeid_cast(storage.get()); - distributed && query_context->canUseParallelReplicasCustomKey(*distributed->getCluster())) - { - planner_context->getMutableQueryContext()->setSetting("distributed_group_by_no_merge", 2); - } + planner_context->getMutableQueryContext()->setSetting("distributed_group_by_no_merge", 2); } } const auto & table_expression_alias = table_expression->getOriginalAlias(); - auto additional_filters_info = buildAdditionalFiltersIfNeeded(storage, table_expression_alias, table_expression_query_info, planner_context); + auto additional_filters_info + = buildAdditionalFiltersIfNeeded(storage, table_expression_alias, table_expression_query_info, planner_context); add_filter(additional_filters_info, "additional filter"); - from_stage = storage->getQueryProcessingStage(query_context, select_query_options.to_stage, storage_snapshot, table_expression_query_info); - storage->read(query_plan, columns_names, storage_snapshot, table_expression_query_info, query_context, from_stage, max_block_size, max_streams); + from_stage = storage->getQueryProcessingStage( + query_context, select_query_options.to_stage, storage_snapshot, table_expression_query_info); + + storage->read( + query_plan, + columns_names, + storage_snapshot, + table_expression_query_info, + query_context, + from_stage, + max_block_size, + max_streams); + + const auto & alias_column_expressions = table_expression_data.getAliasColumnExpressions(); + if (!alias_column_expressions.empty() && query_plan.isInitialized() && from_stage == QueryProcessingStage::FetchColumns) + { + ActionsDAGPtr merged_alias_columns_actions_dag = std::make_shared(query_plan.getCurrentDataStream().header.getColumnsWithTypeAndName()); + ActionsDAG::NodeRawConstPtrs action_dag_outputs = merged_alias_columns_actions_dag->getInputs(); + + for (const auto & [column_name, alias_column_actions_dag] : alias_column_expressions) + { + const auto & current_outputs = alias_column_actions_dag->getOutputs(); + action_dag_outputs.insert(action_dag_outputs.end(), current_outputs.begin(), current_outputs.end()); + merged_alias_columns_actions_dag->mergeNodes(std::move(*alias_column_actions_dag)); + } + + for (const auto * output_node : action_dag_outputs) + merged_alias_columns_actions_dag->addOrReplaceInOutputs(*output_node); + merged_alias_columns_actions_dag->removeUnusedActions(false); + + auto alias_column_step = std::make_unique(query_plan.getCurrentDataStream(), std::move(merged_alias_columns_actions_dag)); + alias_column_step->setStepDescription("Compute alias columns"); + query_plan.addStep(std::move(alias_column_step)); + } for (const auto & filter_info_and_description : where_filters) { @@ -888,7 +927,8 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres else { /// Create step which reads from empty source if storage has no data. - auto source_header = storage_snapshot->getSampleBlockForColumns(table_expression_data.getColumnNames()); + const auto & column_names = table_expression_data.getSelectedColumnsNames(); + auto source_header = storage_snapshot->getSampleBlockForColumns(column_names); Pipe pipe(std::make_shared(source_header)); auto read_from_pipe = std::make_unique(std::move(pipe)); read_from_pipe->setStepDescription("Read from NullSource"); @@ -1005,57 +1045,6 @@ void joinCastPlanColumnsToNullable(QueryPlan & plan_to_add_cast, PlannerContextP plan_to_add_cast.addStep(std::move(cast_join_columns_step)); } -/// Actions to calculate table columns that have a functional representation (ALIASes and subcolumns) -/// and used in USING clause of JOIN expression. -struct UsingAliasKeyActions -{ - UsingAliasKeyActions( - const ColumnsWithTypeAndName & left_plan_output_columns, - const ColumnsWithTypeAndName & right_plan_output_columns - ) - : left_alias_columns_keys(std::make_shared(left_plan_output_columns)) - , right_alias_columns_keys(std::make_shared(right_plan_output_columns)) - {} - - void addLeftColumn(QueryTreeNodePtr & node, const ColumnsWithTypeAndName & plan_output_columns, const PlannerContextPtr & planner_context) - { - addColumnImpl(left_alias_columns_keys, node, plan_output_columns, planner_context); - } - - void addRightColumn(QueryTreeNodePtr & node, const ColumnsWithTypeAndName & plan_output_columns, const PlannerContextPtr & planner_context) - { - addColumnImpl(right_alias_columns_keys, node, plan_output_columns, planner_context); - } - - ActionsDAGPtr getLeftActions() - { - left_alias_columns_keys->projectInput(); - return std::move(left_alias_columns_keys); - } - - ActionsDAGPtr getRightActions() - { - right_alias_columns_keys->projectInput(); - return std::move(right_alias_columns_keys); - } - -private: - void addColumnImpl(ActionsDAGPtr & alias_columns_keys, QueryTreeNodePtr & node, const ColumnsWithTypeAndName & plan_output_columns, const PlannerContextPtr & planner_context) - { - auto & column_node = node->as(); - if (column_node.hasExpression()) - { - auto dag = buildActionsDAGFromExpressionNode(column_node.getExpressionOrThrow(), plan_output_columns, planner_context); - const auto & left_inner_column_identifier = planner_context->getColumnNodeIdentifierOrThrow(node); - dag->addOrReplaceInOutputs(dag->addAlias(*dag->getOutputs().front(), left_inner_column_identifier)); - alias_columns_keys->mergeInplace(std::move(*dag)); - } - } - - ActionsDAGPtr left_alias_columns_keys; - ActionsDAGPtr right_alias_columns_keys; -}; - JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_expression, JoinTreeQueryPlan left_join_tree_query_plan, JoinTreeQueryPlan right_join_tree_query_plan, @@ -1124,8 +1113,6 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_ if (join_node.isUsingJoinExpression()) { - UsingAliasKeyActions using_alias_key_actions{left_plan_output_columns, right_plan_output_columns}; - auto & join_node_using_columns_list = join_node.getJoinExpression()->as(); for (auto & join_node_using_node : join_node_using_columns_list.getNodes()) { @@ -1135,13 +1122,9 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_ auto & left_inner_column_node = inner_columns_list.getNodes().at(0); auto & left_inner_column = left_inner_column_node->as(); - using_alias_key_actions.addLeftColumn(left_inner_column_node, left_plan_output_columns, planner_context); - auto & right_inner_column_node = inner_columns_list.getNodes().at(1); auto & right_inner_column = right_inner_column_node->as(); - using_alias_key_actions.addRightColumn(right_inner_column_node, right_plan_output_columns, planner_context); - const auto & join_node_using_column_node_type = join_node_using_column_node.getColumnType(); if (!left_inner_column.getColumnType()->equals(*join_node_using_column_node_type)) { @@ -1155,14 +1138,6 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_ right_plan_column_name_to_cast_type.emplace(right_inner_column_identifier, join_node_using_column_node_type); } } - - auto left_alias_columns_keys_step = std::make_unique(left_plan.getCurrentDataStream(), using_alias_key_actions.getLeftActions()); - left_alias_columns_keys_step->setStepDescription("Actions for left table alias column keys"); - left_plan.addStep(std::move(left_alias_columns_keys_step)); - - auto right_alias_columns_keys_step = std::make_unique(right_plan.getCurrentDataStream(), using_alias_key_actions.getRightActions()); - right_alias_columns_keys_step->setStepDescription("Actions for right table alias column keys"); - right_plan.addStep(std::move(right_alias_columns_keys_step)); } auto join_cast_plan_output_nodes = [&](QueryPlan & plan_to_add_cast, std::unordered_map & plan_column_name_to_cast_type) diff --git a/src/Planner/PlannerJoinTree.h b/src/Planner/PlannerJoinTree.h index a21438d466f..9110b2bfef9 100644 --- a/src/Planner/PlannerJoinTree.h +++ b/src/Planner/PlannerJoinTree.h @@ -15,9 +15,9 @@ struct JoinTreeQueryPlan { QueryPlan query_plan; QueryProcessingStage::Enum from_stage; - std::set used_row_policies; - std::vector actions_dags; - std::unordered_map query_node_to_plan_step_mapping; + std::set used_row_policies{}; + std::vector actions_dags{}; + std::unordered_map query_node_to_plan_step_mapping{}; }; /// Build JOIN TREE query plan for query node diff --git a/src/Planner/TableExpressionData.h b/src/Planner/TableExpressionData.h index f6ef4017c98..9ab7a8e64fe 100644 --- a/src/Planner/TableExpressionData.h +++ b/src/Planner/TableExpressionData.h @@ -17,6 +17,9 @@ using ColumnIdentifier = std::string; using ColumnIdentifiers = std::vector; using ColumnIdentifierSet = std::unordered_set; +struct PrewhereInfo; +using PrewhereInfoPtr = std::shared_ptr; + /** Table expression data is created for each table expression that take part in query. * Table expression data has information about columns that participate in query, their name to identifier mapping, * and additional table expression properties. @@ -52,7 +55,7 @@ public: /// Return true if column with name exists, false otherwise bool hasColumn(const std::string & column_name) const { - return alias_columns_names.contains(column_name) || column_name_to_column.contains(column_name); + return column_name_to_column.contains(column_name); } /** Add column in table expression data. @@ -60,37 +63,40 @@ public: * * Logical error exception is thrown if column already exists. */ - void addColumn(const NameAndTypePair & column, const ColumnIdentifier & column_identifier) + void addColumn(const NameAndTypePair & column, const ColumnIdentifier & column_identifier, bool is_selected_column = true) { if (hasColumn(column.name)) throw Exception(ErrorCodes::LOGICAL_ERROR, "Column with name {} already exists", column.name); - addColumnImpl(column, column_identifier); + column_names.push_back(column.name); + addColumnImpl(column, column_identifier, is_selected_column); } - /** Add column if it does not exists in table expression data. - * Column identifier must be created using global planner context. - */ - void addColumnIfNotExists(const NameAndTypePair & column, const ColumnIdentifier & column_identifier) + /// Add alias column + void addAliasColumn(const NameAndTypePair & column, const ColumnIdentifier & column_identifier, ActionsDAGPtr actions_dag, bool is_selected_column = true) { - if (hasColumn(column.name)) - return; - - addColumnImpl(column, column_identifier); + alias_column_expressions.emplace(column.name, std::move(actions_dag)); + addColumnImpl(column, column_identifier, is_selected_column); } - /// Add alias column name - void addAliasColumnName(const std::string & column_name, const ColumnIdentifier & column_identifier) + /// Mark existing column as selected + void markSelectedColumn(const std::string & column_name) { - alias_columns_names.insert(column_name); - - column_name_to_column_identifier.emplace(column_name, column_identifier); + auto [_, inserted] = selected_column_names_set.emplace(column_name); + if (inserted) + selected_column_names.push_back(column_name); } - /// Get alias columns names - const NameSet & getAliasColumnsNames() const + /// Get columns that are requested from table expression, including ALIAS columns + const Names & getSelectedColumnsNames() const { - return alias_columns_names; + return selected_column_names; + } + + /// Get ALIAS columns names mapped to expressions + const std::unordered_map & getAliasColumnExpressions() const + { + return alias_column_expressions; } /// Get column name to column map @@ -99,7 +105,7 @@ public: return column_name_to_column; } - /// Get column names + /// Get column names that are read from table expression const Names & getColumnNames() const { return column_names; @@ -116,23 +122,6 @@ public: return result; } - ColumnIdentifiers getColumnIdentifiers() const - { - ColumnIdentifiers result; - result.reserve(column_identifier_to_column_name.size()); - - for (const auto & [column_identifier, _] : column_identifier_to_column_name) - result.push_back(column_identifier); - - return result; - } - - /// Get column name to column identifier map - const ColumnNameToColumnIdentifier & getColumnNameToIdentifier() const - { - return column_name_to_column_identifier; - } - /// Get column identifier to column name map const ColumnNameToColumnIdentifier & getColumnIdentifierToColumnName() const { @@ -156,18 +145,6 @@ public: return it->second; } - /** Get column for column name. - * Null is returned if there are no column for column name. - */ - const NameAndTypePair * getColumnOrNull(const std::string & column_name) const - { - auto it = column_name_to_column.find(column_name); - if (it == column_name_to_column.end()) - return nullptr; - - return &it->second; - } - /** Get column identifier for column name. * Exception is thrown if there are no column identifier for column name. */ @@ -197,24 +174,6 @@ public: return &it->second; } - /** Get column name for column identifier. - * Exception is thrown if there are no column name for column identifier. - */ - const std::string & getColumnNameOrThrow(const ColumnIdentifier & column_identifier) const - { - auto it = column_identifier_to_column_name.find(column_identifier); - if (it == column_identifier_to_column_name.end()) - { - auto column_identifiers = getColumnIdentifiers(); - throw Exception(ErrorCodes::LOGICAL_ERROR, - "Column name for column identifier {} does not exists. There are only column identifiers: {}", - column_identifier, - fmt::join(column_identifiers.begin(), column_identifiers.end(), ", ")); - } - - return it->second; - } - /** Get column name for column identifier. * Null is returned if there are no column name for column identifier. */ @@ -282,24 +241,47 @@ public: filter_actions = std::move(filter_actions_value); } -private: - void addColumnImpl(const NameAndTypePair & column, const ColumnIdentifier & column_identifier) + const PrewhereInfoPtr & getPrewhereInfo() const { - column_names.push_back(column.name); + return prewhere_info; + } + + void setPrewhereInfo(PrewhereInfoPtr prewhere_info_value) + { + prewhere_info = std::move(prewhere_info_value); + } + +private: + void addColumnImpl(const NameAndTypePair & column, const ColumnIdentifier & column_identifier, bool add_to_selected_columns) + { + if (add_to_selected_columns) + markSelectedColumn(column.name); + column_name_to_column.emplace(column.name, column); column_name_to_column_identifier.emplace(column.name, column_identifier); column_identifier_to_column_name.emplace(column_identifier, column.name); } - /// Valid for table, table function, array join, query, union nodes + /// Set of columns that are physically read from table expression + /// In case of ALIAS columns it contains source column names that are used to calculate alias + /// This source column may be not used by user Names column_names; + /// Set of columns that are SELECTed from table expression + /// It may contain ALIAS columns. + /// Mainly it's used to determine access to which columns to check + /// For example user may have an access to column `a ALIAS x + y` but not to `x` and `y` + /// In that case we can read `x` and `y` and calculate `a`, but not return `x` and `y` to user + Names selected_column_names; + /// To deduplicate columns in `selected_column_names` + NameSet selected_column_names_set; + + /// Expression to calculate ALIAS columns + std::unordered_map alias_column_expressions; + /// Valid for table, table function, array join, query, union nodes ColumnNameToColumn column_name_to_column; - /// Valid only for table node - NameSet alias_columns_names; - /// Valid for table, table function, array join, query, union nodes ColumnNameToColumnIdentifier column_name_to_column_identifier; @@ -309,6 +291,9 @@ private: /// Valid for table, table function ActionsDAGPtr filter_actions; + /// Valid for table, table function + PrewhereInfoPtr prewhere_info; + /// Valid for table, table function ActionsDAGPtr prewhere_filter_actions; diff --git a/src/Planner/Utils.cpp b/src/Planner/Utils.cpp index 50ffa83a272..bd0b831ee58 100644 --- a/src/Planner/Utils.cpp +++ b/src/Planner/Utils.cpp @@ -386,66 +386,37 @@ QueryTreeNodePtr mergeConditionNodes(const QueryTreeNodes & condition_nodes, con return function_node; } -QueryTreeNodePtr replaceTableExpressionsWithDummyTables(const QueryTreeNodePtr & query_node, +QueryTreeNodePtr replaceTableExpressionsWithDummyTables( + const QueryTreeNodePtr & query_node, + const QueryTreeNodes & table_nodes, const ContextPtr & context, - //PlannerContext & planner_context, ResultReplacementMap * result_replacement_map) { - auto & query_node_typed = query_node->as(); - auto table_expressions = extractTableExpressions(query_node_typed.getJoinTree()); std::unordered_map replacement_map; - size_t subquery_index = 0; - for (auto & table_expression : table_expressions) + for (const auto & table_expression : table_nodes) { auto * table_node = table_expression->as(); auto * table_function_node = table_expression->as(); - auto * subquery_node = table_expression->as(); - auto * union_node = table_expression->as(); - - StoragePtr storage_dummy; if (table_node || table_function_node) { const auto & storage_snapshot = table_node ? table_node->getStorageSnapshot() : table_function_node->getStorageSnapshot(); auto get_column_options = GetColumnsOptions(GetColumnsOptions::All).withExtendedObjects().withVirtuals(); - storage_dummy - = std::make_shared(storage_snapshot->storage.getStorageID(), ColumnsDescription(storage_snapshot->getColumns(get_column_options))); + StoragePtr storage_dummy = std::make_shared( + storage_snapshot->storage.getStorageID(), + ColumnsDescription(storage_snapshot->getColumns(get_column_options)), + storage_snapshot); + + auto dummy_table_node = std::make_shared(std::move(storage_dummy), context); + + if (result_replacement_map) + result_replacement_map->emplace(table_expression, dummy_table_node); + + dummy_table_node->setAlias(table_expression->getAlias()); + replacement_map.emplace(table_expression.get(), std::move(dummy_table_node)); } - else if (subquery_node || union_node) - { - const auto & subquery_projection_columns - = subquery_node ? subquery_node->getProjectionColumns() : union_node->computeProjectionColumns(); - - NameSet unique_column_names; - NamesAndTypes storage_dummy_columns; - storage_dummy_columns.reserve(subquery_projection_columns.size()); - - for (const auto & projection_column : subquery_projection_columns) - { - auto [_, inserted] = unique_column_names.insert(projection_column.name); - if (inserted) - storage_dummy_columns.emplace_back(projection_column); - } - - storage_dummy = std::make_shared(StorageID{"dummy", "subquery_" + std::to_string(subquery_index)}, ColumnsDescription::fromNamesAndTypes(storage_dummy_columns)); - ++subquery_index; - } - - auto dummy_table_node = std::make_shared(std::move(storage_dummy), context); - - if (result_replacement_map) - result_replacement_map->emplace(table_expression, dummy_table_node); - - dummy_table_node->setAlias(table_expression->getAlias()); - - // auto & src_table_expression_data = planner_context.getOrCreateTableExpressionData(table_expression); - // auto & dst_table_expression_data = planner_context.getOrCreateTableExpressionData(dummy_table_node); - - // dst_table_expression_data = src_table_expression_data; - - replacement_map.emplace(table_expression.get(), std::move(dummy_table_node)); } return query_node->cloneAndReplace(replacement_map); @@ -498,12 +469,19 @@ FilterDAGInfo buildFilterInfo(ASTPtr filter_expression, NameSet table_expression_required_names_without_filter) { const auto & query_context = planner_context->getQueryContext(); - auto filter_query_tree = buildQueryTree(filter_expression, query_context); QueryAnalysisPass query_analysis_pass(table_expression); query_analysis_pass.run(filter_query_tree, query_context); + return buildFilterInfo(std::move(filter_query_tree), table_expression, planner_context, std::move(table_expression_required_names_without_filter)); +} + +FilterDAGInfo buildFilterInfo(QueryTreeNodePtr filter_query_tree, + const QueryTreeNodePtr & table_expression, + PlannerContextPtr & planner_context, + NameSet table_expression_required_names_without_filter) +{ if (table_expression_required_names_without_filter.empty()) { auto & table_expression_data = planner_context->getTableExpressionDataOrThrow(table_expression); @@ -511,7 +489,7 @@ FilterDAGInfo buildFilterInfo(ASTPtr filter_expression, table_expression_required_names_without_filter.insert(table_expression_names.begin(), table_expression_names.end()); } - collectSourceColumns(filter_query_tree, planner_context); + collectSourceColumns(filter_query_tree, planner_context, false /*keep_alias_columns*/); collectSets(filter_query_tree, *planner_context); auto filter_actions_dag = std::make_shared(); diff --git a/src/Planner/Utils.h b/src/Planner/Utils.h index 8df26d598b1..bf45770552b 100644 --- a/src/Planner/Utils.h +++ b/src/Planner/Utils.h @@ -70,7 +70,9 @@ QueryTreeNodePtr mergeConditionNodes(const QueryTreeNodes & condition_nodes, con /// Replace table expressions from query JOIN TREE with dummy tables using ResultReplacementMap = std::unordered_map; -QueryTreeNodePtr replaceTableExpressionsWithDummyTables(const QueryTreeNodePtr & query_node, +QueryTreeNodePtr replaceTableExpressionsWithDummyTables( + const QueryTreeNodePtr & query_node, + const QueryTreeNodes & table_nodes, const ContextPtr & context, ResultReplacementMap * result_replacement_map = nullptr); @@ -87,6 +89,11 @@ FilterDAGInfo buildFilterInfo(ASTPtr filter_expression, PlannerContextPtr & planner_context, NameSet table_expression_required_names_without_filter = {}); +FilterDAGInfo buildFilterInfo(QueryTreeNodePtr filter_query_tree, + const QueryTreeNodePtr & table_expression, + PlannerContextPtr & planner_context, + NameSet table_expression_required_names_without_filter = {}); + ASTPtr parseAdditionalResultFilter(const Settings & settings); } diff --git a/src/Planner/findParallelReplicasQuery.cpp b/src/Planner/findParallelReplicasQuery.cpp index 362f7109f47..ef640bcd42d 100644 --- a/src/Planner/findParallelReplicasQuery.cpp +++ b/src/Planner/findParallelReplicasQuery.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include #include #include @@ -126,8 +127,10 @@ public: const auto & storage_snapshot = table_node ? table_node->getStorageSnapshot() : table_function_node->getStorageSnapshot(); auto get_column_options = GetColumnsOptions(GetColumnsOptions::All).withExtendedObjects().withVirtuals(); - auto storage_dummy - = std::make_shared(storage_snapshot->storage.getStorageID(), ColumnsDescription(storage_snapshot->getColumns(get_column_options))); + auto storage_dummy = std::make_shared( + storage_snapshot->storage.getStorageID(), + ColumnsDescription(storage_snapshot->getColumns(get_column_options)), + storage_snapshot); auto dummy_table_node = std::make_shared(std::move(storage_dummy), context); @@ -154,7 +157,8 @@ QueryTreeNodePtr replaceTablesWithDummyTables(const QueryTreeNodePtr & query, co /// Otherwise we can execute current query up to WithMergableStage only. const QueryNode * findQueryForParallelReplicas( std::stack stack, - const std::unordered_map & mapping) + const std::unordered_map & mapping, + const Settings & settings) { const QueryPlan::Node * prev_checked_node = nullptr; const QueryNode * res = nullptr; @@ -190,7 +194,11 @@ const QueryNode * findQueryForParallelReplicas( { const auto * expression = typeid_cast(step); const auto * filter = typeid_cast(step); - if (!expression && !filter) + + const auto * creating_sets = typeid_cast(step); + bool allowed_creating_sets = settings.parallel_replicas_allow_in_with_subquery && creating_sets; + + if (!expression && !filter && !allowed_creating_sets) can_distribute_full_node = false; next_node_to_check = children.front(); @@ -263,7 +271,7 @@ const QueryNode * findQueryForParallelReplicas(const QueryTreeNodePtr & query_tr auto updated_query_tree = replaceTablesWithDummyTables(query_tree_node, mutable_context); SelectQueryOptions options; - Planner planner(updated_query_tree, options, std::make_shared(nullptr, nullptr)); + Planner planner(updated_query_tree, options, std::make_shared(nullptr, nullptr, FiltersForTableExpressionMap{})); planner.buildQueryPlanIfNeeded(); /// This part is a bit clumsy. @@ -272,7 +280,7 @@ const QueryNode * findQueryForParallelReplicas(const QueryTreeNodePtr & query_tr /// So that we build a list of candidates again, and call findQueryForParallelReplicas for it. auto new_stack = getSupportingParallelReplicasQuery(updated_query_tree.get()); const auto & mapping = planner.getQueryNodeToPlanStepMapping(); - const auto * res = findQueryForParallelReplicas(new_stack, mapping); + const auto * res = findQueryForParallelReplicas(new_stack, mapping, context->getSettingsRef()); /// Now, return a query from initial stack. if (res) diff --git a/src/Processors/Executors/ExecutingGraph.h b/src/Processors/Executors/ExecutingGraph.h index 1a7659d3e08..e6d41321edd 100644 --- a/src/Processors/Executors/ExecutingGraph.h +++ b/src/Processors/Executors/ExecutingGraph.h @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -117,7 +118,11 @@ public: } }; - using Queue = std::queue; + /// This queue can grow a lot and lead to OOM. That is why we use non-default + /// allocator for container which throws exceptions in operator new + using DequeWithMemoryTracker = std::deque>; + using Queue = std::queue; + using NodePtr = std::unique_ptr; using Nodes = std::vector; Nodes nodes; diff --git a/src/Processors/Executors/ExecutorTasks.h b/src/Processors/Executors/ExecutorTasks.h index d35f8de94d1..202ca253c6c 100644 --- a/src/Processors/Executors/ExecutorTasks.h +++ b/src/Processors/Executors/ExecutorTasks.h @@ -47,7 +47,10 @@ class ExecutorTasks public: using Stack = std::stack; - using Queue = std::queue; + /// This queue can grow a lot and lead to OOM. That is why we use non-default + /// allocator for container which throws exceptions in operator new + using DequeWithMemoryTracker = std::deque>; + using Queue = std::queue; void finish(); bool isFinished() const { return finished; } diff --git a/src/Processors/Executors/PipelineExecutor.cpp b/src/Processors/Executors/PipelineExecutor.cpp index 580aaa2b259..5b5880759e6 100644 --- a/src/Processors/Executors/PipelineExecutor.cpp +++ b/src/Processors/Executors/PipelineExecutor.cpp @@ -138,8 +138,8 @@ bool PipelineExecutor::executeStep(std::atomic_bool * yield_flag) initializeExecution(1, true); // Acquire slot until we are done - single_thread_slot = slots->tryAcquire(); - chassert(single_thread_slot && "Unable to allocate slot for the first thread, but we just allocated at least one slot"); + single_thread_cpu_slot = cpu_slots->tryAcquire(); + chassert(single_thread_cpu_slot && "Unable to allocate cpu slot for the first thread, but we just allocated at least one slot"); if (yield_flag && *yield_flag) return true; @@ -155,7 +155,7 @@ bool PipelineExecutor::executeStep(std::atomic_bool * yield_flag) if (node->exception) std::rethrow_exception(node->exception); - single_thread_slot.reset(); + single_thread_cpu_slot.reset(); finalizeExecution(); return false; @@ -333,8 +333,8 @@ void PipelineExecutor::initializeExecution(size_t num_threads, bool concurrency_ /// Allocate CPU slots from concurrency control size_t min_threads = concurrency_control ? 1uz : num_threads; - slots = ConcurrencyControl::instance().allocate(min_threads, num_threads); - use_threads = slots->grantedCount(); + cpu_slots = ConcurrencyControl::instance().allocate(min_threads, num_threads); + use_threads = cpu_slots->grantedCount(); Queue queue; graph->initializeExecution(queue); @@ -348,7 +348,7 @@ void PipelineExecutor::initializeExecution(size_t num_threads, bool concurrency_ void PipelineExecutor::spawnThreads() { - while (auto slot = slots->tryAcquire()) + while (auto slot = cpu_slots->tryAcquire()) { size_t thread_num = threads.fetch_add(1); @@ -391,7 +391,9 @@ void PipelineExecutor::executeImpl(size_t num_threads, bool concurrency_control) SCOPE_EXIT_SAFE( if (!finished_flag) { - finish(); + /// If finished_flag is not set, there was an exception. + /// Cancel execution in this case. + cancel(); if (pool) pool->wait(); } @@ -405,7 +407,7 @@ void PipelineExecutor::executeImpl(size_t num_threads, bool concurrency_control) } else { - auto slot = slots->tryAcquire(); + auto slot = cpu_slots->tryAcquire(); executeSingleThread(0); } diff --git a/src/Processors/Executors/PipelineExecutor.h b/src/Processors/Executors/PipelineExecutor.h index 862a460f0ed..03f0f7f1a0a 100644 --- a/src/Processors/Executors/PipelineExecutor.h +++ b/src/Processors/Executors/PipelineExecutor.h @@ -5,7 +5,9 @@ #include #include #include +#include +#include #include #include #include @@ -68,8 +70,8 @@ private: ExecutorTasks tasks; /// Concurrency control related - ConcurrencyControl::AllocationPtr slots; - ConcurrencyControl::SlotPtr single_thread_slot; // slot for single-thread mode to work using executeStep() + SlotAllocationPtr cpu_slots; + AcquiredSlotPtr single_thread_cpu_slot; // cpu slot for single-thread mode to work using executeStep() std::unique_ptr pool; std::atomic_size_t threads = 0; @@ -90,7 +92,10 @@ private: ReadProgressCallbackPtr read_progress_callback; - using Queue = std::queue; + /// This queue can grow a lot and lead to OOM. That is why we use non-default + /// allocator for container which throws exceptions in operator new + using DequeWithMemoryTracker = std::deque>; + using Queue = std::queue; void initializeExecution(size_t num_threads, bool concurrency_control); /// Initialize executor contexts and task_queue. void finalizeExecution(); /// Check all processors are finished. diff --git a/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp b/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp index 345bec395b2..d27002197d2 100644 --- a/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp +++ b/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp @@ -23,7 +23,6 @@ struct PullingAsyncPipelineExecutor::Data std::atomic_bool is_finished = false; std::atomic_bool has_exception = false; ThreadFromGlobalPool thread; - Poco::Event finish_event; ~Data() { @@ -89,12 +88,10 @@ static void threadFunction( data.has_exception = true; /// Finish lazy format in case of exception. Otherwise thread.join() may hung. - if (data.lazy_format) - data.lazy_format->finalize(); + data.lazy_format->finalize(); } data.is_finished = true; - data.finish_event.set(); } @@ -129,20 +126,8 @@ bool PullingAsyncPipelineExecutor::pull(Chunk & chunk, uint64_t milliseconds) return false; } - if (lazy_format) - { - chunk = lazy_format->getChunk(milliseconds); - data->rethrowExceptionIfHas(); - return true; - } - - chunk.clear(); - - if (milliseconds) - data->finish_event.tryWait(milliseconds); - else - data->finish_event.wait(); - + chunk = lazy_format->getChunk(milliseconds); + data->rethrowExceptionIfHas(); return true; } @@ -230,14 +215,12 @@ void PullingAsyncPipelineExecutor::cancelWithExceptionHandling(CancelFunc && can Chunk PullingAsyncPipelineExecutor::getTotals() { - return lazy_format ? lazy_format->getTotals() - : Chunk(); + return lazy_format->getTotals(); } Chunk PullingAsyncPipelineExecutor::getExtremes() { - return lazy_format ? lazy_format->getExtremes() - : Chunk(); + return lazy_format->getExtremes(); } Block PullingAsyncPipelineExecutor::getTotalsBlock() @@ -264,15 +247,7 @@ Block PullingAsyncPipelineExecutor::getExtremesBlock() ProfileInfo & PullingAsyncPipelineExecutor::getProfileInfo() { - if (lazy_format) - return lazy_format->getProfileInfo(); - - static ProfileInfo profile_info; - static std::once_flag flag; - /// Calculate rows before limit here to avoid race. - std::call_once(flag, []() { profile_info.getRowsBeforeLimit(); }); - - return profile_info; + return lazy_format->getProfileInfo(); } } diff --git a/src/Processors/Executors/PushingAsyncPipelineExecutor.h b/src/Processors/Executors/PushingAsyncPipelineExecutor.h index 4b4b83a90b5..f976cd4c339 100644 --- a/src/Processors/Executors/PushingAsyncPipelineExecutor.h +++ b/src/Processors/Executors/PushingAsyncPipelineExecutor.h @@ -1,6 +1,5 @@ #pragma once #include -#include #include namespace DB diff --git a/src/Processors/Formats/ISchemaReader.cpp b/src/Processors/Formats/ISchemaReader.cpp index 79b7ca17a5a..45523700a5d 100644 --- a/src/Processors/Formats/ISchemaReader.cpp +++ b/src/Processors/Formats/ISchemaReader.cpp @@ -81,7 +81,7 @@ IIRowSchemaReader::IIRowSchemaReader(ReadBuffer & in_, const FormatSettings & fo { } -void IIRowSchemaReader::setContext(ContextPtr & context) +void IIRowSchemaReader::setContext(const ContextPtr & context) { ColumnsDescription columns; if (tryParseColumnsListFromString(hints_str, columns, context, hints_parsing_error)) diff --git a/src/Processors/Formats/ISchemaReader.h b/src/Processors/Formats/ISchemaReader.h index 94df71a88b4..23c6606a6bd 100644 --- a/src/Processors/Formats/ISchemaReader.h +++ b/src/Processors/Formats/ISchemaReader.h @@ -34,7 +34,7 @@ public: virtual bool hasStrictOrderOfColumns() const { return true; } virtual bool needContext() const { return false; } - virtual void setContext(ContextPtr &) {} + virtual void setContext(const ContextPtr &) {} virtual void setMaxRowsAndBytesToRead(size_t, size_t) {} virtual size_t getNumRowsRead() const { return 0; } @@ -56,7 +56,7 @@ public: IIRowSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_, DataTypePtr default_type_ = nullptr); bool needContext() const override { return !hints_str.empty(); } - void setContext(ContextPtr & context) override; + void setContext(const ContextPtr & context) override; protected: void setMaxRowsAndBytesToRead(size_t max_rows, size_t max_bytes) override diff --git a/src/Processors/Formats/Impl/AvroRowInputFormat.cpp b/src/Processors/Formats/Impl/AvroRowInputFormat.cpp index 8dc8fa516dc..2ed55cca30c 100644 --- a/src/Processors/Formats/Impl/AvroRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/AvroRowInputFormat.cpp @@ -212,7 +212,7 @@ static AvroDeserializer::DeserializeFn createDecimalDeserializeFn(const avro::No }; } -static std::string nodeToJson(avro::NodePtr root_node) +static std::string nodeToJSON(avro::NodePtr root_node) { std::ostringstream ss; // STYLE_CHECK_ALLOW_STD_STRING_STREAM ss.exceptions(std::ios::failbit); @@ -641,7 +641,7 @@ AvroDeserializer::DeserializeFn AvroDeserializer::createDeserializeFn(const avro throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Type {} is not compatible with Avro {}:\n{}", - target_type->getName(), avro::toString(root_node->type()), nodeToJson(root_node)); + target_type->getName(), avro::toString(root_node->type()), nodeToJSON(root_node)); } AvroDeserializer::SkipFn AvroDeserializer::createSkipFn(const avro::NodePtr & root_node) @@ -1016,7 +1016,7 @@ private: http_basic_credentials.authenticate(request); } - auto session = makePooledHTTPSession(url, timeouts, 1); + auto session = makeHTTPSession(HTTPConnectionGroupType::HTTP, url, timeouts); session->sendRequest(request); Poco::Net::HTTPResponse response; @@ -1025,8 +1025,6 @@ private: Poco::JSON::Parser parser; auto json_body = parser.parse(*response_body).extract(); - /// Response was fully read. - markSessionForReuse(session); auto schema = json_body->getValue("schema"); LOG_TRACE((getLogger("AvroConfluentRowInputFormat")), "Successfully fetched schema id = {}\n{}", id, schema); diff --git a/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp b/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp index bf584b759f8..f91f7cf536b 100644 --- a/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp +++ b/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp @@ -16,7 +16,6 @@ #include #include #include -#include #include #include #include @@ -28,7 +27,6 @@ #include #include #include -#include namespace DB diff --git a/src/Processors/Formats/Impl/DWARFBlockInputFormat.h b/src/Processors/Formats/Impl/DWARFBlockInputFormat.h index 0345a264d47..d8f5fc3d896 100644 --- a/src/Processors/Formats/Impl/DWARFBlockInputFormat.h +++ b/src/Processors/Formats/Impl/DWARFBlockInputFormat.h @@ -124,7 +124,7 @@ private: class DWARFSchemaReader : public ISchemaReader { public: - DWARFSchemaReader(ReadBuffer & in_); + explicit DWARFSchemaReader(ReadBuffer & in_); NamesAndTypesList readSchema() override; }; diff --git a/src/Processors/Formats/Impl/JSONAsStringRowInputFormat.h b/src/Processors/Formats/Impl/JSONAsStringRowInputFormat.h index e90dae31afd..16112325a97 100644 --- a/src/Processors/Formats/Impl/JSONAsStringRowInputFormat.h +++ b/src/Processors/Formats/Impl/JSONAsStringRowInputFormat.h @@ -69,7 +69,7 @@ public: class JSONAsObjectExternalSchemaReader : public IExternalSchemaReader { public: - JSONAsObjectExternalSchemaReader(const FormatSettings & settings); + explicit JSONAsObjectExternalSchemaReader(const FormatSettings & settings); NamesAndTypesList readSchema() override { diff --git a/src/Processors/Formats/Impl/JSONColumnsBlockInputFormat.h b/src/Processors/Formats/Impl/JSONColumnsBlockInputFormat.h index f8b8a80731e..eb3020036e2 100644 --- a/src/Processors/Formats/Impl/JSONColumnsBlockInputFormat.h +++ b/src/Processors/Formats/Impl/JSONColumnsBlockInputFormat.h @@ -15,7 +15,7 @@ namespace DB class JSONColumnsReader : public JSONColumnsReaderBase { public: - JSONColumnsReader(ReadBuffer & in_); + explicit JSONColumnsReader(ReadBuffer & in_); void readChunkStart() override; std::optional readColumnStart() override; diff --git a/src/Processors/Formats/Impl/JSONColumnsBlockInputFormatBase.cpp b/src/Processors/Formats/Impl/JSONColumnsBlockInputFormatBase.cpp index 53cb5a77898..62d33d36206 100644 --- a/src/Processors/Formats/Impl/JSONColumnsBlockInputFormatBase.cpp +++ b/src/Processors/Formats/Impl/JSONColumnsBlockInputFormatBase.cpp @@ -215,7 +215,7 @@ JSONColumnsSchemaReaderBase::JSONColumnsSchemaReaderBase( { } -void JSONColumnsSchemaReaderBase::setContext(ContextPtr & ctx) +void JSONColumnsSchemaReaderBase::setContext(const ContextPtr & ctx) { ColumnsDescription columns; if (tryParseColumnsListFromString(hints_str, columns, ctx, hints_parsing_error)) diff --git a/src/Processors/Formats/Impl/JSONColumnsBlockInputFormatBase.h b/src/Processors/Formats/Impl/JSONColumnsBlockInputFormatBase.h index fe80d77cd87..6d2532b350a 100644 --- a/src/Processors/Formats/Impl/JSONColumnsBlockInputFormatBase.h +++ b/src/Processors/Formats/Impl/JSONColumnsBlockInputFormatBase.h @@ -16,7 +16,7 @@ class ReadBuffer; class JSONColumnsReaderBase { public: - JSONColumnsReaderBase(ReadBuffer & in_); + explicit JSONColumnsReaderBase(ReadBuffer & in_); virtual ~JSONColumnsReaderBase() = default; @@ -84,7 +84,7 @@ public: void transformTypesFromDifferentFilesIfNeeded(DataTypePtr & type, DataTypePtr & new_type) override; bool needContext() const override { return !hints_str.empty(); } - void setContext(ContextPtr & ctx) override; + void setContext(const ContextPtr & ctx) override; void setMaxRowsAndBytesToRead(size_t max_rows, size_t max_bytes) override { diff --git a/src/Processors/Formats/Impl/JSONColumnsWithMetadataBlockInputFormat.h b/src/Processors/Formats/Impl/JSONColumnsWithMetadataBlockInputFormat.h index 265f76a74c1..e9db0b58223 100644 --- a/src/Processors/Formats/Impl/JSONColumnsWithMetadataBlockInputFormat.h +++ b/src/Processors/Formats/Impl/JSONColumnsWithMetadataBlockInputFormat.h @@ -22,7 +22,7 @@ private: class JSONColumnsWithMetadataSchemaReader : public ISchemaReader { public: - JSONColumnsWithMetadataSchemaReader(ReadBuffer & in_); + explicit JSONColumnsWithMetadataSchemaReader(ReadBuffer & in_); NamesAndTypesList readSchema() override; }; diff --git a/src/Processors/Formats/Impl/JSONCompactColumnsBlockInputFormat.h b/src/Processors/Formats/Impl/JSONCompactColumnsBlockInputFormat.h index 7f23e127ab4..7ec28995482 100644 --- a/src/Processors/Formats/Impl/JSONCompactColumnsBlockInputFormat.h +++ b/src/Processors/Formats/Impl/JSONCompactColumnsBlockInputFormat.h @@ -15,7 +15,7 @@ namespace DB class JSONCompactColumnsReader : public JSONColumnsReaderBase { public: - JSONCompactColumnsReader(ReadBuffer & in_); + explicit JSONCompactColumnsReader(ReadBuffer & in_); void readChunkStart() override; std::optional readColumnStart() override; diff --git a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp index 0ef19a9c14f..6fa94356cd3 100644 --- a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp @@ -179,7 +179,7 @@ void JSONEachRowRowInputFormat::readJSONObject(MutableColumns & columns) else if (column_index == NESTED_FIELD) readNestedData(name_ref.toString(), columns); else - throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: illegal value of column_index"); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Illegal value of column_index"); } else { diff --git a/src/Processors/Formats/Impl/JSONRowInputFormat.cpp b/src/Processors/Formats/Impl/JSONRowInputFormat.cpp index 23faa057715..67652a2cb0d 100644 --- a/src/Processors/Formats/Impl/JSONRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONRowInputFormat.cpp @@ -71,27 +71,36 @@ void JSONRowInputFormat::resetReadBuffer() JSONEachRowRowInputFormat::resetReadBuffer(); } -JSONRowSchemaReader::JSONRowSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_) - : JSONRowSchemaReader(std::make_unique(in_), format_settings_) +JSONRowSchemaReader::JSONRowSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_, bool fallback_to_json_each_row_) + : JSONRowSchemaReader(std::make_unique(in_), format_settings_, fallback_to_json_each_row_) { } -JSONRowSchemaReader::JSONRowSchemaReader(std::unique_ptr buf, const DB::FormatSettings & format_settings_) - : JSONEachRowSchemaReader(*buf, format_settings_), peekable_buf(std::move(buf)) +JSONRowSchemaReader::JSONRowSchemaReader(std::unique_ptr buf, const DB::FormatSettings & format_settings_, bool fallback_to_json_each_row_) + : JSONEachRowSchemaReader(*buf, format_settings_), peekable_buf(std::move(buf)), fallback_to_json_each_row(fallback_to_json_each_row_) { } NamesAndTypesList JSONRowSchemaReader::readSchema() { skipBOMIfExists(*peekable_buf); - PeekableReadBufferCheckpoint checkpoint(*peekable_buf); - /// Try to parse metadata, if failed, try to parse data as JSONEachRow format - NamesAndTypesList names_and_types; - if (JSONUtils::checkAndSkipObjectStart(*peekable_buf) && JSONUtils::tryReadMetadata(*peekable_buf, names_and_types)) - return names_and_types; - peekable_buf->rollbackToCheckpoint(true); - return JSONEachRowSchemaReader::readSchema(); + if (fallback_to_json_each_row) + { + PeekableReadBufferCheckpoint checkpoint(*peekable_buf); + /// Try to parse metadata, if failed, try to parse data as JSONEachRow format + NamesAndTypesList names_and_types; + if (JSONUtils::checkAndSkipObjectStart(*peekable_buf) && JSONUtils::tryReadMetadata(*peekable_buf, names_and_types)) + return names_and_types; + + peekable_buf->rollbackToCheckpoint(true); + return JSONEachRowSchemaReader::readSchema(); + } + else + { + JSONUtils::skipObjectStart(*peekable_buf); + return JSONUtils::readMetadata(*peekable_buf); + } } void registerInputFormatJSON(FormatFactory & factory) @@ -110,19 +119,19 @@ void registerInputFormatJSON(FormatFactory & factory) void registerJSONSchemaReader(FormatFactory & factory) { - auto register_schema_reader = [&](const String & format) + auto register_schema_reader = [&](const String & format, bool fallback_to_json_each_row) { factory.registerSchemaReader( - format, [](ReadBuffer & buf, const FormatSettings & format_settings) { return std::make_unique(buf, format_settings); }); + format, [fallback_to_json_each_row](ReadBuffer & buf, const FormatSettings & format_settings) { return std::make_unique(buf, format_settings, fallback_to_json_each_row); }); factory.registerAdditionalInfoForSchemaCacheGetter(format, [](const FormatSettings & settings) { return getAdditionalFormatInfoByEscapingRule(settings, FormatSettings::EscapingRule::JSON); }); }; - register_schema_reader("JSON"); + register_schema_reader("JSON", true); /// JSONCompact has the same suffix with metadata. - register_schema_reader("JSONCompact"); + register_schema_reader("JSONCompact", false); } } diff --git a/src/Processors/Formats/Impl/JSONRowInputFormat.h b/src/Processors/Formats/Impl/JSONRowInputFormat.h index b2e1d8a3d6d..6db5cee380a 100644 --- a/src/Processors/Formats/Impl/JSONRowInputFormat.h +++ b/src/Processors/Formats/Impl/JSONRowInputFormat.h @@ -45,16 +45,17 @@ private: class JSONRowSchemaReader : public JSONEachRowSchemaReader { public: - JSONRowSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_); + JSONRowSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_, bool fallback_to_json_each_row_); NamesAndTypesList readSchema() override; bool hasStrictOrderOfColumns() const override { return false; } private: - JSONRowSchemaReader(std::unique_ptr buf, const FormatSettings & format_settings_); + JSONRowSchemaReader(std::unique_ptr buf, const FormatSettings & format_settings_, bool fallback_to_json_each_row_); std::unique_ptr peekable_buf; + bool fallback_to_json_each_row; }; } diff --git a/src/Processors/Formats/Impl/MsgPackRowInputFormat.h b/src/Processors/Formats/Impl/MsgPackRowInputFormat.h index ae2ce386665..76c468b3983 100644 --- a/src/Processors/Formats/Impl/MsgPackRowInputFormat.h +++ b/src/Processors/Formats/Impl/MsgPackRowInputFormat.h @@ -19,7 +19,7 @@ class ReadBuffer; class MsgPackVisitor : public msgpack::null_visitor { public: - MsgPackVisitor(bool null_as_default_) : null_as_default(null_as_default_) {} + explicit MsgPackVisitor(bool null_as_default_) : null_as_default(null_as_default_) {} struct Info { diff --git a/src/Processors/Formats/Impl/ParallelParsingInputFormat.cpp b/src/Processors/Formats/Impl/ParallelParsingInputFormat.cpp index 8b6969bbfcc..447adb1ed48 100644 --- a/src/Processors/Formats/Impl/ParallelParsingInputFormat.cpp +++ b/src/Processors/Formats/Impl/ParallelParsingInputFormat.cpp @@ -224,7 +224,9 @@ Chunk ParallelParsingInputFormat::read() /// skipped all rows. For example, it can happen while using settings /// input_format_allow_errors_num/input_format_allow_errors_ratio /// and this segment contained only rows with errors. - /// Process the next unit. + /// Return this empty unit back to segmentator and process the next unit. + unit->status = READY_TO_INSERT; + segmentator_condvar.notify_all(); ++reader_ticket_number; unit = &processing_units[reader_ticket_number % processing_units.size()]; } diff --git a/src/Processors/Formats/Impl/ParallelParsingInputFormat.h b/src/Processors/Formats/Impl/ParallelParsingInputFormat.h index c4736ceea3a..963ccd88def 100644 --- a/src/Processors/Formats/Impl/ParallelParsingInputFormat.h +++ b/src/Processors/Formats/Impl/ParallelParsingInputFormat.h @@ -34,7 +34,7 @@ class Context; /** * ORDER-PRESERVING parallel parsing of data formats. * It splits original data into chunks. Then each chunk is parsed by different thread. - * The number of chunks equals to the number or parser threads. + * The number of chunks equals to the number of parser threads. * The size of chunk is equal to min_chunk_bytes_for_parallel_parsing setting. * * Parsers @@ -119,25 +119,25 @@ public: finishAndWait(); } - void resetParser() override final + void resetParser() final { throw Exception(ErrorCodes::LOGICAL_ERROR, "resetParser() is not allowed for {}", getName()); } - const BlockMissingValues & getMissingValues() const override final + const BlockMissingValues & getMissingValues() const final { return last_block_missing_values; } size_t getApproxBytesReadForChunk() const override { return last_approx_bytes_read_for_chunk; } - String getName() const override final { return "ParallelParsingBlockInputFormat"; } + String getName() const final { return "ParallelParsingBlockInputFormat"; } private: - Chunk read() override final; + Chunk read() final; - void onCancel() override final + void onCancel() final { /* * The format parsers themselves are not being cancelled here, so we'll diff --git a/src/Processors/Formats/Impl/Parquet/Write.cpp b/src/Processors/Formats/Impl/Parquet/Write.cpp index 02ca2734ff8..4d71e0102d8 100644 --- a/src/Processors/Formats/Impl/Parquet/Write.cpp +++ b/src/Processors/Formats/Impl/Parquet/Write.cpp @@ -409,7 +409,7 @@ PODArray & compress(PODArray & source, PODArray & scratch, Com #pragma clang diagnostic pop if (max_dest_size > std::numeric_limits::max()) - throw Exception(ErrorCodes::CANNOT_COMPRESS, "Cannot compress column of size {}", formatReadableSizeWithBinarySuffix(source.size())); + throw Exception(ErrorCodes::CANNOT_COMPRESS, "Cannot compress column of size {}", ReadableSize(source.size())); scratch.resize(max_dest_size); diff --git a/src/Processors/Formats/Impl/ParquetBlockOutputFormat.h b/src/Processors/Formats/Impl/ParquetBlockOutputFormat.h index aededc39dc4..3db90f4299b 100644 --- a/src/Processors/Formats/Impl/ParquetBlockOutputFormat.h +++ b/src/Processors/Formats/Impl/ParquetBlockOutputFormat.h @@ -79,7 +79,7 @@ private: MemoryToken mem; - ColumnChunk(ParquetBlockOutputFormat * p) : mem(p) {} + explicit ColumnChunk(ParquetBlockOutputFormat * p) : mem(p) {} }; struct RowGroupState diff --git a/src/Processors/Formats/Impl/ParquetMetadataInputFormat.h b/src/Processors/Formats/Impl/ParquetMetadataInputFormat.h index 1aa2d99ca76..ff63d78fa44 100644 --- a/src/Processors/Formats/Impl/ParquetMetadataInputFormat.h +++ b/src/Processors/Formats/Impl/ParquetMetadataInputFormat.h @@ -83,7 +83,7 @@ private: class ParquetMetadataSchemaReader : public ISchemaReader { public: - ParquetMetadataSchemaReader(ReadBuffer & in_); + explicit ParquetMetadataSchemaReader(ReadBuffer & in_); NamesAndTypesList readSchema() override; }; diff --git a/src/Processors/Formats/Impl/PrettyBlockOutputFormat.cpp b/src/Processors/Formats/Impl/PrettyBlockOutputFormat.cpp index eee0b24b5ba..c5db8f2e30a 100644 --- a/src/Processors/Formats/Impl/PrettyBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/PrettyBlockOutputFormat.cpp @@ -6,6 +6,7 @@ #include #include #include +#include namespace DB @@ -15,6 +16,7 @@ PrettyBlockOutputFormat::PrettyBlockOutputFormat( WriteBuffer & out_, const Block & header_, const FormatSettings & format_settings_, bool mono_block_, bool color_) : IOutputFormat(header_, out_), format_settings(format_settings_), serializations(header_.getSerializations()), color(color_), mono_block(mono_block_) { + readable_number_tip = header_.getColumns().size() == 1 && WhichDataType(header_.getDataTypes()[0]->getTypeId()).isNumber(); } @@ -305,6 +307,7 @@ void PrettyBlockOutputFormat::writeChunk(const Chunk & chunk, PortKind port_kind } writeCString(grid_symbols.bar, out); + writeReadableNumberTip(chunk); writeCString("\n", out); } @@ -410,6 +413,24 @@ void PrettyBlockOutputFormat::writeSuffix() } } +void PrettyBlockOutputFormat::writeReadableNumberTip(const Chunk & chunk) +{ + auto columns = chunk.getColumns(); + auto is_single_number = readable_number_tip && chunk.getNumRows() == 1 && chunk.getNumColumns() == 1; + if (!is_single_number) + return; + auto value = columns[0]->getFloat64(0); + auto threshold = format_settings.pretty.output_format_pretty_single_large_number_tip_threshold; + if (threshold == 0 || value <= threshold) + return; + if (color) + writeCString("\033[90m", out); + writeCString(" -- ", out); + formatReadableQuantity(value, out, 2); + if (color) + writeCString("\033[0m", out); +} + void registerOutputFormatPretty(FormatFactory & factory) { registerPrettyFormatWithNoEscapesAndMonoBlock(factory, "Pretty"); diff --git a/src/Processors/Formats/Impl/PrettyBlockOutputFormat.h b/src/Processors/Formats/Impl/PrettyBlockOutputFormat.h index 253a6a958cc..071bfa92e35 100644 --- a/src/Processors/Formats/Impl/PrettyBlockOutputFormat.h +++ b/src/Processors/Formats/Impl/PrettyBlockOutputFormat.h @@ -38,6 +38,7 @@ protected: virtual void writeChunk(const Chunk & chunk, PortKind port_kind); void writeMonoChunkIfNeeded(); void writeSuffix() override; + void writeReadableNumberTip(const Chunk & chunk); void onRowsReadBeforeUpdate() override { total_rows = getRowsReadBefore(); } @@ -57,6 +58,7 @@ protected: bool color; private: + bool readable_number_tip = false; bool mono_block; /// For mono_block == true only Chunk mono_chunk; diff --git a/src/Processors/Formats/Impl/PrettyCompactBlockOutputFormat.cpp b/src/Processors/Formats/Impl/PrettyCompactBlockOutputFormat.cpp index b547ce9358a..bda51770838 100644 --- a/src/Processors/Formats/Impl/PrettyCompactBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/PrettyCompactBlockOutputFormat.cpp @@ -1,4 +1,5 @@ #include +#include #include #include #include @@ -137,7 +138,7 @@ void PrettyCompactBlockOutputFormat::writeBottom(const Widths & max_widths) void PrettyCompactBlockOutputFormat::writeRow( size_t row_num, const Block & header, - const Columns & columns, + const Chunk & chunk, const WidthsPerColumn & widths, const Widths & max_widths) { @@ -157,6 +158,7 @@ void PrettyCompactBlockOutputFormat::writeRow( ascii_grid_symbols; size_t num_columns = max_widths.size(); + const auto & columns = chunk.getColumns(); writeCString(grid_symbols.bar, out); @@ -171,6 +173,7 @@ void PrettyCompactBlockOutputFormat::writeRow( } writeCString(grid_symbols.bar, out); + writeReadableNumberTip(chunk); writeCString("\n", out); } @@ -180,7 +183,6 @@ void PrettyCompactBlockOutputFormat::writeChunk(const Chunk & chunk, PortKind po size_t num_rows = chunk.getNumRows(); const auto & header = getPort(port_kind).getHeader(); - const auto & columns = chunk.getColumns(); WidthsPerColumn widths; Widths max_widths; @@ -190,7 +192,8 @@ void PrettyCompactBlockOutputFormat::writeChunk(const Chunk & chunk, PortKind po writeHeader(header, max_widths, name_widths); for (size_t i = 0; i < num_rows && total_rows + i < max_rows; ++i) - writeRow(i, header, columns, widths, max_widths); + writeRow(i, header, chunk, widths, max_widths); + writeBottom(max_widths); diff --git a/src/Processors/Formats/Impl/PrettyCompactBlockOutputFormat.h b/src/Processors/Formats/Impl/PrettyCompactBlockOutputFormat.h index 20cb931f282..911fc2e950c 100644 --- a/src/Processors/Formats/Impl/PrettyCompactBlockOutputFormat.h +++ b/src/Processors/Formats/Impl/PrettyCompactBlockOutputFormat.h @@ -22,7 +22,7 @@ private: void writeRow( size_t row_num, const Block & header, - const Columns & columns, + const Chunk & chunk, const WidthsPerColumn & widths, const Widths & max_widths); diff --git a/src/Processors/Formats/Impl/PrettySpaceBlockOutputFormat.cpp b/src/Processors/Formats/Impl/PrettySpaceBlockOutputFormat.cpp index f8e2ede869f..e92863c93fb 100644 --- a/src/Processors/Formats/Impl/PrettySpaceBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/PrettySpaceBlockOutputFormat.cpp @@ -1,8 +1,8 @@ -#include +#include #include #include -#include #include +#include namespace DB @@ -30,9 +30,7 @@ void PrettySpaceBlockOutputFormat::writeChunk(const Chunk & chunk, PortKind port calculateWidths(header, chunk, widths, max_widths, name_widths); if (format_settings.pretty.output_format_pretty_row_numbers) - { writeString(String(row_number_width, ' '), out); - } /// Names for (size_t i = 0; i < num_columns; ++i) { @@ -75,9 +73,7 @@ void PrettySpaceBlockOutputFormat::writeChunk(const Chunk & chunk, PortKind port // Write row number; auto row_num_string = std::to_string(row + 1 + total_rows) + ". "; for (size_t i = 0; i < row_number_width - row_num_string.size(); ++i) - { writeCString(" ", out); - } writeString(row_num_string, out); } for (size_t column = 0; column < num_columns; ++column) @@ -87,10 +83,11 @@ void PrettySpaceBlockOutputFormat::writeChunk(const Chunk & chunk, PortKind port const auto & type = *header.getByPosition(column).type; auto & cur_width = widths[column].empty() ? max_widths[column] : widths[column][row]; - writeValueWithPadding(*columns[column], *serializations[column], - row, cur_width, max_widths[column], type.shouldAlignRightInPrettyFormats()); + writeValueWithPadding( + *columns[column], *serializations[column], row, cur_width, max_widths[column], type.shouldAlignRightInPrettyFormats()); } + writeReadableNumberTip(chunk); writeChar('\n', out); } diff --git a/src/Processors/Formats/Impl/PrometheusTextOutputFormat.cpp b/src/Processors/Formats/Impl/PrometheusTextOutputFormat.cpp index c2ee5923c01..3578401a0f8 100644 --- a/src/Processors/Formats/Impl/PrometheusTextOutputFormat.cpp +++ b/src/Processors/Formats/Impl/PrometheusTextOutputFormat.cpp @@ -12,6 +12,7 @@ #include #include +#include "DataTypes/IDataType.h" #include #include @@ -35,9 +36,12 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; } +namespace +{ + constexpr auto FORMAT_NAME = "Prometheus"; -static bool isDataTypeMapString(const DataTypePtr & type) +bool isDataTypeMapString(const DataTypePtr & type) { if (!isMap(type)) return false; @@ -45,8 +49,8 @@ static bool isDataTypeMapString(const DataTypePtr & type) return isStringOrFixedString(type_map->getKeyType()) && isStringOrFixedString(type_map->getValueType()); } -template -static void getColumnPos(const Block & header, const String & col_name, Pred pred, ResType & res) +template +void getColumnPos(const Block & header, const String & col_name, bool (*pred)(const DataTypePtr &), ResType & res) { static_assert(std::is_same_v || std::is_same_v>, "Illegal ResType"); @@ -71,7 +75,7 @@ static void getColumnPos(const Block & header, const String & col_name, Pred pre } } -static Float64 tryParseFloat(const String & s) +Float64 tryParseFloat(const String & s) { Float64 t = 0; ReadBufferFromString buf(s); @@ -79,6 +83,8 @@ static Float64 tryParseFloat(const String & s) return t; } +} + PrometheusTextOutputFormat::PrometheusTextOutputFormat( WriteBuffer & out_, const Block & header_, @@ -89,12 +95,12 @@ PrometheusTextOutputFormat::PrometheusTextOutputFormat( { const Block & header = getPort(PortKind::Main).getHeader(); - getColumnPos(header, "name", isStringOrFixedString, pos.name); - getColumnPos(header, "value", isNumber, pos.value); + getColumnPos(header, "name", isStringOrFixedString, pos.name); + getColumnPos(header, "value", isNumber, pos.value); - getColumnPos(header, "help", isStringOrFixedString, pos.help); - getColumnPos(header, "type", isStringOrFixedString, pos.type); - getColumnPos(header, "timestamp", isNumber, pos.timestamp); + getColumnPos(header, "help", isStringOrFixedString, pos.help); + getColumnPos(header, "type", isStringOrFixedString, pos.type); + getColumnPos(header, "timestamp", isNumber, pos.timestamp); getColumnPos(header, "labels", isDataTypeMapString, pos.labels); } diff --git a/src/Processors/Formats/Impl/TemplateRowInputFormat.cpp b/src/Processors/Formats/Impl/TemplateRowInputFormat.cpp index a6e4600d83b..f5edfb7c9d4 100644 --- a/src/Processors/Formats/Impl/TemplateRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/TemplateRowInputFormat.cpp @@ -609,7 +609,9 @@ void registerTemplateSchemaReader(FormatFactory & factory) { size_t index = 0; auto idx_getter = [&](const String &) -> std::optional { return index++; }; - auto row_format = fillRowFormat(settings, idx_getter, false); + ParsedTemplateFormatString row_format; + if (!settings.template_settings.row_format.empty()) + row_format = fillRowFormat(settings, idx_getter, false); std::unordered_set visited_escaping_rules; String result = fmt::format("row_format={}, resultset_format={}, row_between_delimiter={}", settings.template_settings.row_format, diff --git a/src/Processors/Formats/Impl/ValuesBlockInputFormat.h b/src/Processors/Formats/Impl/ValuesBlockInputFormat.h index bf2765bfd1e..f82a8c8ab64 100644 --- a/src/Processors/Formats/Impl/ValuesBlockInputFormat.h +++ b/src/Processors/Formats/Impl/ValuesBlockInputFormat.h @@ -37,7 +37,7 @@ public: void resetReadBuffer() override; /// TODO: remove context somehow. - void setContext(ContextPtr & context_) { context = Context::createCopy(context_); } + void setContext(const ContextPtr & context_) { context = Context::createCopy(context_); } const BlockMissingValues & getMissingValues() const override { return block_missing_values; } diff --git a/src/Processors/Formats/InputFormatErrorsLogger.h b/src/Processors/Formats/InputFormatErrorsLogger.h index b30246ff705..2230490f66a 100644 --- a/src/Processors/Formats/InputFormatErrorsLogger.h +++ b/src/Processors/Formats/InputFormatErrorsLogger.h @@ -18,7 +18,7 @@ public: String raw_data; }; - InputFormatErrorsLogger(const ContextPtr & context); + explicit InputFormatErrorsLogger(const ContextPtr & context); virtual ~InputFormatErrorsLogger(); @@ -45,7 +45,7 @@ using InputFormatErrorsLoggerPtr = std::shared_ptr; class ParallelInputFormatErrorsLogger : public InputFormatErrorsLogger { public: - ParallelInputFormatErrorsLogger(const ContextPtr & context) : InputFormatErrorsLogger(context) { } + explicit ParallelInputFormatErrorsLogger(const ContextPtr & context) : InputFormatErrorsLogger(context) { } ~ParallelInputFormatErrorsLogger() override; diff --git a/src/Processors/Formats/RowInputFormatWithDiagnosticInfo.cpp b/src/Processors/Formats/RowInputFormatWithDiagnosticInfo.cpp index a56c24a740a..fcf338577f8 100644 --- a/src/Processors/Formats/RowInputFormatWithDiagnosticInfo.cpp +++ b/src/Processors/Formats/RowInputFormatWithDiagnosticInfo.cpp @@ -136,7 +136,7 @@ bool RowInputFormatWithDiagnosticInfo::deserializeFieldAndPrintDiagnosticInfo(co auto * curr_position = in->position(); if (curr_position < prev_position) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: parsing is non-deterministic."); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Parsing is non-deterministic."); if (isNativeNumber(type) || isDate(type) || isDateTime(type) || isDateTime64(type)) { diff --git a/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp b/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp index 845cf561968..28160b18269 100644 --- a/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp +++ b/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp @@ -12,7 +12,7 @@ #include #include #include -#include +#include namespace DB diff --git a/src/Processors/QueryPlan/AggregatingStep.cpp b/src/Processors/QueryPlan/AggregatingStep.cpp index f374a7b7b10..a76bacdd97b 100644 --- a/src/Processors/QueryPlan/AggregatingStep.cpp +++ b/src/Processors/QueryPlan/AggregatingStep.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Processors/QueryPlan/CubeStep.cpp b/src/Processors/QueryPlan/CubeStep.cpp index 0c632c346c7..d010a3327a6 100644 --- a/src/Processors/QueryPlan/CubeStep.cpp +++ b/src/Processors/QueryPlan/CubeStep.cpp @@ -1,10 +1,11 @@ +#include +#include +#include +#include #include #include #include -#include #include -#include -#include namespace DB { diff --git a/src/Processors/QueryPlan/IQueryPlanStep.h b/src/Processors/QueryPlan/IQueryPlanStep.h index df5c13a7f3b..ac5ea259d2e 100644 --- a/src/Processors/QueryPlan/IQueryPlanStep.h +++ b/src/Processors/QueryPlan/IQueryPlanStep.h @@ -64,6 +64,9 @@ public: using DataStreams = std::vector; +class QueryPlan; +using QueryPlanRawPtrs = std::list; + /// Single step of query plan. class IQueryPlanStep { @@ -109,6 +112,9 @@ public: /// Get description of processors added in current step. Should be called after updatePipeline(). virtual void describePipeline(FormatSettings & /*settings*/) const {} + /// Get child plans contained inside some steps (e.g ReadFromMerge) so that they are visible when doing EXPLAIN. + virtual QueryPlanRawPtrs getChildPlans() { return {}; } + /// Append extra processors for this step. void appendExtraProcessors(const Processors & extra_processors); diff --git a/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.cpp b/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.cpp index 55b8cb5ed53..81ea1707717 100644 --- a/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.cpp +++ b/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.cpp @@ -22,6 +22,8 @@ QueryPlanOptimizationSettings QueryPlanOptimizationSettings::fromSettings(const settings.filter_push_down = from.query_plan_enable_optimizations && from.query_plan_filter_push_down; + settings.optimize_prewhere = from.query_plan_enable_optimizations && from.query_plan_optimize_prewhere; + settings.execute_functions_after_sorting = from.query_plan_enable_optimizations && from.query_plan_execute_functions_after_sorting; settings.reuse_storage_ordering_for_window_functions = from.query_plan_enable_optimizations && from.query_plan_reuse_storage_ordering_for_window_functions; diff --git a/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h b/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h index 791b8e57475..68c2efc8b0e 100644 --- a/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h +++ b/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h @@ -61,6 +61,8 @@ struct QueryPlanOptimizationSettings /// If remove-redundant-distinct-steps optimization is enabled. bool remove_redundant_distinct = true; + bool optimize_prewhere = true; + /// If reading from projection can be applied bool optimize_projection = false; bool force_use_projection = false; diff --git a/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp b/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp index 49e1a49f131..8c5839a9803 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp @@ -1,8 +1,9 @@ #include #include #include -#include +#include #include +#include #include #include #include @@ -38,46 +39,35 @@ void optimizePrewhere(Stack & stack, QueryPlan::Nodes &) * * 1. SomeNode * 2. FilterNode - * 3. ReadFromMergeTreeNode + * 3. SourceStepWithFilterNode */ - auto * read_from_merge_tree = typeid_cast(frame.node->step.get()); - if (!read_from_merge_tree) + auto * source_step_with_filter = dynamic_cast(frame.node->step.get()); + if (!source_step_with_filter) return; - const auto & storage_prewhere_info = read_from_merge_tree->getPrewhereInfo(); + const auto & storage_snapshot = source_step_with_filter->getStorageSnapshot(); + const auto & storage = storage_snapshot->storage; + if (!storage.canMoveConditionsToPrewhere()) + return; + + const auto & storage_prewhere_info = source_step_with_filter->getPrewhereInfo(); if (storage_prewhere_info && storage_prewhere_info->prewhere_actions) return; + /// TODO: We can also check for UnionStep, such as StorageBuffer and local distributed plans. QueryPlan::Node * filter_node = (stack.rbegin() + 1)->node; const auto * filter_step = typeid_cast(filter_node->step.get()); if (!filter_step) return; - const auto & context = read_from_merge_tree->getContext(); + const auto & context = source_step_with_filter->getContext(); const auto & settings = context->getSettingsRef(); - if (!settings.allow_experimental_analyzer) - return; - - bool is_final = read_from_merge_tree->isQueryWithFinal(); + bool is_final = source_step_with_filter->isQueryWithFinal(); bool optimize_move_to_prewhere = settings.optimize_move_to_prewhere && (!is_final || settings.optimize_move_to_prewhere_if_final); if (!optimize_move_to_prewhere) return; - const auto & storage_snapshot = read_from_merge_tree->getStorageSnapshot(); - - ColumnsWithTypeAndName required_columns_after_filter; - if (read_from_merge_tree->isQueryWithSampling()) - { - const auto & sampling_key = storage_snapshot->getMetadataForQuery()->getSamplingKey(); - const auto & sampling_source_columns = sampling_key.expression->getRequiredColumnsWithTypes(); - for (const auto & column : sampling_source_columns) - required_columns_after_filter.push_back(ColumnWithTypeAndName(column.type, column.name)); - const auto & sampling_result_columns = sampling_key.sample_block.getColumnsWithTypeAndName(); - required_columns_after_filter.insert(required_columns_after_filter.end(), sampling_result_columns.begin(), sampling_result_columns.end()); - } - - const auto & storage = storage_snapshot->storage; const auto & storage_metadata = storage_snapshot->metadata; auto column_sizes = storage.getColumnSizes(); if (column_sizes.empty()) @@ -88,19 +78,19 @@ void optimizePrewhere(Stack & stack, QueryPlan::Nodes &) for (const auto & [name, sizes] : column_sizes) column_compressed_sizes[name] = sizes.data_compressed; - Names queried_columns = read_from_merge_tree->getRealColumnNames(); + Names queried_columns = source_step_with_filter->requiredSourceColumns(); MergeTreeWhereOptimizer where_optimizer{ std::move(column_compressed_sizes), storage_metadata, - storage.getConditionEstimatorByPredicate(read_from_merge_tree->getQueryInfo(), storage_snapshot, context), + storage.getConditionEstimatorByPredicate(source_step_with_filter->getQueryInfo(), storage_snapshot, context), queried_columns, storage.supportedPrewhereColumns(), getLogger("QueryPlanOptimizePrewhere")}; auto optimize_result = where_optimizer.optimize(filter_step->getExpression(), filter_step->getFilterColumnName(), - read_from_merge_tree->getContext(), + source_step_with_filter->getContext(), is_final); if (optimize_result.prewhere_nodes.empty()) @@ -113,11 +103,12 @@ void optimizePrewhere(Stack & stack, QueryPlan::Nodes &) prewhere_info = std::make_shared(); prewhere_info->need_filter = true; + prewhere_info->remove_prewhere_column = optimize_result.fully_moved_to_prewhere && filter_step->removesFilterColumn(); auto filter_expression = filter_step->getExpression(); const auto & filter_column_name = filter_step->getFilterColumnName(); - if (optimize_result.fully_moved_to_prewhere && filter_step->removesFilterColumn()) + if (prewhere_info->remove_prewhere_column) { removeFromOutput(*filter_expression, filter_column_name); auto & outputs = filter_expression->getOutputs(); @@ -142,7 +133,8 @@ void optimizePrewhere(Stack & stack, QueryPlan::Nodes &) /// /// So, here we restore removed inputs for PREWHERE actions { - std::unordered_set first_outputs(split_result.first->getOutputs().begin(), split_result.first->getOutputs().end()); + std::unordered_set first_outputs( + split_result.first->getOutputs().begin(), split_result.first->getOutputs().end()); for (const auto * input : split_result.first->getInputs()) { if (!first_outputs.contains(input)) @@ -157,7 +149,7 @@ void optimizePrewhere(Stack & stack, QueryPlan::Nodes &) ActionsDAG::NodeRawConstPtrs conditions; conditions.reserve(split_result.split_nodes_mapping.size()); - for (const auto * condition : optimize_result.prewhere_nodes) + for (const auto * condition : optimize_result.prewhere_nodes_list) conditions.push_back(split_result.split_nodes_mapping.at(condition)); prewhere_info->prewhere_actions = std::move(split_result.first); @@ -166,7 +158,8 @@ void optimizePrewhere(Stack & stack, QueryPlan::Nodes &) if (conditions.size() == 1) { prewhere_info->prewhere_column_name = conditions.front()->result_name; - prewhere_info->prewhere_actions->getOutputs().push_back(conditions.front()); + if (prewhere_info->remove_prewhere_column) + prewhere_info->prewhere_actions->getOutputs().push_back(conditions.front()); } else { @@ -178,20 +171,21 @@ void optimizePrewhere(Stack & stack, QueryPlan::Nodes &) prewhere_info->prewhere_actions->getOutputs().push_back(node); } - read_from_merge_tree->updatePrewhereInfo(prewhere_info); + source_step_with_filter->updatePrewhereInfo(prewhere_info); if (!optimize_result.fully_moved_to_prewhere) { filter_node->step = std::make_unique( - read_from_merge_tree->getOutputStream(), + source_step_with_filter->getOutputStream(), std::move(split_result.second), filter_step->getFilterColumnName(), filter_step->removesFilterColumn()); } else { + /// Have to keep this expression to change column names to column identifiers filter_node->step = std::make_unique( - read_from_merge_tree->getOutputStream(), + source_step_with_filter->getOutputStream(), std::move(split_result.second)); } } diff --git a/src/Processors/QueryPlan/Optimizations/optimizePrimaryKeyCondition.cpp b/src/Processors/QueryPlan/Optimizations/optimizePrimaryKeyCondition.cpp index e98386a6ee9..dbcaf5f00a7 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizePrimaryKeyCondition.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizePrimaryKeyCondition.cpp @@ -1,8 +1,8 @@ #include #include #include +#include #include -#include namespace DB::QueryPlanOptimizations { @@ -15,6 +15,14 @@ void optimizePrimaryKeyCondition(const Stack & stack) if (!source_step_with_filter) return; + const auto & storage_prewhere_info = source_step_with_filter->getPrewhereInfo(); + if (storage_prewhere_info) + { + source_step_with_filter->addFilter(storage_prewhere_info->prewhere_actions, storage_prewhere_info->prewhere_column_name); + if (storage_prewhere_info->row_level_filter) + source_step_with_filter->addFilter(storage_prewhere_info->row_level_filter, storage_prewhere_info->row_level_column_name); + } + for (auto iter = stack.rbegin() + 1; iter != stack.rend(); ++iter) { if (auto * filter_step = typeid_cast(iter->node->step.get())) @@ -28,6 +36,8 @@ void optimizePrimaryKeyCondition(const Stack & stack) else break; } + + source_step_with_filter->applyFilters(); } } diff --git a/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp b/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp index fafd6d1dc00..915e664ea8f 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp @@ -114,10 +114,13 @@ void optimizeTreeSecondPass(const QueryPlanOptimizationSettings & optimization_s while (!stack.empty()) { - /// NOTE: optimizePrewhere can modify the stack. - optimizePrewhere(stack, nodes); optimizePrimaryKeyCondition(stack); + /// NOTE: optimizePrewhere can modify the stack. + /// Prewhere optimization relies on PK optimization (getConditionEstimatorByPredicate) + if (optimization_settings.optimize_prewhere) + optimizePrewhere(stack, nodes); + auto & frame = stack.back(); if (frame.next_child == 0) @@ -223,11 +226,6 @@ void optimizeTreeThirdPass(QueryPlan & plan, QueryPlan::Node & root, QueryPlan:: continue; } - if (auto * source_step_with_filter = dynamic_cast(frame.node->step.get())) - { - source_step_with_filter->applyFilters(); - } - addPlansForSets(plan, *frame.node, nodes); stack.pop_back(); diff --git a/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp b/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp index 534716cc60e..b40fea47b3c 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp @@ -18,6 +18,7 @@ #include #include +#include #include #include #include @@ -431,7 +432,7 @@ AggregateProjectionCandidates getAggregateProjectionCandidates( { const auto & keys = aggregating.getParams().keys; const auto & aggregates = aggregating.getParams().aggregates; - Block key_virtual_columns = reading.getMergeTreeData().getSampleBlockWithVirtualColumns(); + Block key_virtual_columns = reading.getMergeTreeData().getHeaderWithVirtualsForFilter(); AggregateProjectionCandidates candidates; @@ -464,6 +465,9 @@ AggregateProjectionCandidates getAggregateProjectionCandidates( // LOG_TRACE(getLogger("optimizeUseProjections"), "Query DAG: {}", dag.dag->dumpDAG()); candidates.has_filter = dag.filter_node; + /// We can't use minmax projection if filter has non-deterministic functions. + if (dag.filter_node && !VirtualColumnUtils::isDeterministicInScopeOfQuery(dag.filter_node)) + can_use_minmax_projection = false; if (can_use_minmax_projection) { @@ -605,9 +609,6 @@ bool optimizeUseAggregateProjections(QueryPlan::Node & node, QueryPlan::Nodes & for (auto & candidate : candidates.real) { auto required_column_names = candidate.dag->getRequiredColumnsNames(); - ActionDAGNodes added_filter_nodes; - if (candidates.has_filter) - added_filter_nodes.nodes.push_back(candidate.dag->getOutputs().front()); bool analyzed = analyzeProjectionCandidate( candidate, @@ -618,7 +619,7 @@ bool optimizeUseAggregateProjections(QueryPlan::Node & node, QueryPlan::Nodes & query_info, context, max_added_blocks, - added_filter_nodes); + candidate.dag); if (!analyzed) continue; @@ -669,15 +670,16 @@ bool optimizeUseAggregateProjections(QueryPlan::Node & node, QueryPlan::Nodes & auto proj_snapshot = std::make_shared(storage_snapshot->storage, storage_snapshot->metadata); proj_snapshot->addProjection(best_candidate->projection); - auto query_info_copy = query_info; - query_info_copy.prewhere_info = nullptr; + auto projection_query_info = query_info; + projection_query_info.prewhere_info = nullptr; + projection_query_info.filter_actions_dag = nullptr; projection_reading = reader.readFromParts( /* parts = */ {}, /* alter_conversions = */ {}, best_candidate->dag->getRequiredColumnsNames(), proj_snapshot, - query_info_copy, + projection_query_info, context, reading->getMaxBlockSize(), reading->getNumStreams(), diff --git a/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp b/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp index 4e2fa5b2389..cac172a856f 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp @@ -135,7 +135,7 @@ bool optimizeUseNormalProjections(Stack & stack, QueryPlan::Nodes & nodes) std::list candidates; NormalProjectionCandidate * best_candidate = nullptr; - const Names & required_columns = reading->getRealColumnNames(); + const Names & required_columns = reading->getAllColumnNames(); const auto & parts = reading->getParts(); const auto & alter_conversions = reading->getAlterConvertionsForParts(); const auto & query_info = reading->getQueryInfo(); @@ -163,10 +163,6 @@ bool optimizeUseNormalProjections(Stack & stack, QueryPlan::Nodes & nodes) auto & candidate = candidates.emplace_back(); candidate.projection = projection; - ActionDAGNodes added_filter_nodes; - if (query.filter_node) - added_filter_nodes.nodes.push_back(query.filter_node); - bool analyzed = analyzeProjectionCandidate( candidate, *reading, @@ -176,7 +172,7 @@ bool optimizeUseNormalProjections(Stack & stack, QueryPlan::Nodes & nodes) query_info, context, max_added_blocks, - added_filter_nodes); + query.filter_node ? query.dag : nullptr); if (!analyzed) continue; diff --git a/src/Processors/QueryPlan/Optimizations/projectionsCommon.cpp b/src/Processors/QueryPlan/Optimizations/projectionsCommon.cpp index a183f50dee5..8333f5e857b 100644 --- a/src/Processors/QueryPlan/Optimizations/projectionsCommon.cpp +++ b/src/Processors/QueryPlan/Optimizations/projectionsCommon.cpp @@ -214,7 +214,7 @@ bool analyzeProjectionCandidate( const SelectQueryInfo & query_info, const ContextPtr & context, const std::shared_ptr & max_added_blocks, - const ActionDAGNodes & added_filter_nodes) + const ActionsDAGPtr & dag) { MergeTreeData::DataPartsVector projection_parts; MergeTreeData::DataPartsVector normal_parts; @@ -223,7 +223,7 @@ bool analyzeProjectionCandidate( { const auto & created_projections = part_with_ranges.data_part->getProjectionParts(); auto it = created_projections.find(candidate.projection->name); - if (it != created_projections.end() && !it->second->is_broken) + if (it != created_projections.end()) { projection_parts.push_back(it->second); } @@ -237,13 +237,15 @@ bool analyzeProjectionCandidate( if (projection_parts.empty()) return false; + auto projection_query_info = query_info; + projection_query_info.prewhere_info = nullptr; + projection_query_info.filter_actions_dag = dag; + auto projection_result_ptr = reader.estimateNumMarksToRead( std::move(projection_parts), - nullptr, required_column_names, candidate.projection->metadata, - query_info, /// How it is actually used? I hope that for index we need only added_filter_nodes - added_filter_nodes, + projection_query_info, context, context->getSettingsRef().max_threads, max_added_blocks); diff --git a/src/Processors/QueryPlan/Optimizations/projectionsCommon.h b/src/Processors/QueryPlan/Optimizations/projectionsCommon.h index cc833a86925..e1e106b988e 100644 --- a/src/Processors/QueryPlan/Optimizations/projectionsCommon.h +++ b/src/Processors/QueryPlan/Optimizations/projectionsCommon.h @@ -60,6 +60,6 @@ bool analyzeProjectionCandidate( const SelectQueryInfo & query_info, const ContextPtr & context, const std::shared_ptr & max_added_blocks, - const ActionDAGNodes & added_filter_nodes); + const ActionsDAGPtr & dag); } diff --git a/src/Processors/QueryPlan/PartsSplitter.cpp b/src/Processors/QueryPlan/PartsSplitter.cpp index 363fdca22c5..5235785907c 100644 --- a/src/Processors/QueryPlan/PartsSplitter.cpp +++ b/src/Processors/QueryPlan/PartsSplitter.cpp @@ -8,6 +8,10 @@ #include #include #include +#include +#include +#include +#include #include #include #include @@ -30,11 +34,76 @@ std::string toString(const Values & value) return fmt::format("({})", fmt::join(value, ", ")); } +bool isSafePrimaryDataKeyType(const IDataType & data_type) +{ + auto type_id = data_type.getTypeId(); + switch (type_id) + { + case TypeIndex::Float32: + case TypeIndex::Float64: + case TypeIndex::Nullable: + case TypeIndex::Object: + return false; + case TypeIndex::Array: + { + const auto & data_type_array = static_cast(data_type); + return isSafePrimaryDataKeyType(*data_type_array.getNestedType()); + } + case TypeIndex::Tuple: + { + const auto & data_type_tuple = static_cast(data_type); + const auto & data_type_tuple_elements = data_type_tuple.getElements(); + for (const auto & data_type_tuple_element : data_type_tuple_elements) + if (!isSafePrimaryDataKeyType(*data_type_tuple_element)) + return false; + + return true; + } + case TypeIndex::LowCardinality: + { + const auto & data_type_low_cardinality = static_cast(data_type); + return isSafePrimaryDataKeyType(*data_type_low_cardinality.getDictionaryType()); + } + case TypeIndex::Map: + { + const auto & data_type_map = static_cast(data_type); + return isSafePrimaryDataKeyType(*data_type_map.getKeyType()) && isSafePrimaryDataKeyType(*data_type_map.getValueType()); + } + case TypeIndex::Variant: + { + const auto & data_type_variant = static_cast(data_type); + const auto & data_type_variant_elements = data_type_variant.getVariants(); + for (const auto & data_type_variant_element : data_type_variant_elements) + if (!isSafePrimaryDataKeyType(*data_type_variant_element)) + return false; + + return false; + } + default: + { + break; + } + } + + return true; +} + +bool isSafePrimaryKey(const KeyDescription & primary_key) +{ + for (const auto & type : primary_key.data_types) + { + if (!isSafePrimaryDataKeyType(*type)) + return false; + } + + return true; +} + int compareValues(const Values & lhs, const Values & rhs) { - chassert(lhs.size() == rhs.size()); + size_t size = std::min(lhs.size(), rhs.size()); - for (size_t i = 0; i < lhs.size(); ++i) + for (size_t i = 0; i < size; ++i) { if (applyVisitor(FieldVisitorAccurateLess(), lhs[i], rhs[i])) return -1; @@ -54,9 +123,10 @@ public: Values getValue(size_t part_idx, size_t mark) const { - const auto & index = parts[part_idx].data_part->index; - Values values(index.size()); - for (size_t i = 0; i < values.size(); ++i) + const auto & index = parts[part_idx].data_part->getIndex(); + size_t size = index.size(); + Values values(size); + for (size_t i = 0; i < size; ++i) { index[i]->get(mark, values[i]); if (values[i].isNull()) @@ -117,8 +187,10 @@ public: return findLeftmostMarkGreaterThanValueInRange(part_index, value, mark_range.begin, mark_range.end); } - size_t getMarkRows(size_t part_idx, size_t mark) const { return parts[part_idx].data_part->index_granularity.getMarkRows(mark); } - + size_t getMarkRows(size_t part_idx, size_t mark) const + { + return parts[part_idx].data_part->index_granularity.getMarkRows(mark); + } private: const RangesInDataParts & parts; }; @@ -174,23 +246,20 @@ struct PartsRangesIterator else if (compare_result == 1) return false; - if (part_index == other.part_index) - { - /// Within the same part we should process events in order of mark numbers, - /// because they already ordered by value and range ends have greater mark numbers than the beginnings. - /// Otherwise we could get invalid ranges with the right bound that is less than the left bound. - const auto ev_mark = event == EventType::RangeStart ? range.begin : range.end; - const auto other_ev_mark = other.event == EventType::RangeStart ? other.range.begin : other.range.end; - - // Start event always before end event - if (ev_mark == other_ev_mark) - return event < other.event; - - return ev_mark < other_ev_mark; - } - if (event == other.event) + { + if (part_index == other.part_index) + { + /// Within the same part we should process events in order of mark numbers, + /// because they already ordered by value and range ends have greater mark numbers than the beginnings. + /// Otherwise we could get invalid ranges with the right bound that is less than the left bound. + const auto ev_mark = event == EventType::RangeStart ? range.begin : range.end; + const auto other_ev_mark = other.event == EventType::RangeStart ? other.range.begin : other.range.end; + return ev_mark < other_ev_mark; + } + return part_index < other.part_index; + } // Start event always before end event return event < other.event; @@ -216,18 +285,82 @@ struct PartsRangesIterator return true; } + void dump(WriteBuffer & buffer) const + { + buffer << "Part index " << part_index; + buffer << " event " << (event == PartsRangesIterator::EventType::RangeStart ? "Range Start" : "Range End"); + buffer << " range begin " << range.begin; + buffer << " end " << range.end; + buffer << " value " << ::toString(value) << '\n'; + } + + [[maybe_unused]] String toString() const + { + WriteBufferFromOwnString buffer; + dump(buffer); + return buffer.str(); + } + Values value; MarkRange range; size_t part_index; EventType event; }; +struct PartRangeIndex +{ + explicit PartRangeIndex(PartsRangesIterator & ranges_iterator) + : part_index(ranges_iterator.part_index) + , range(ranges_iterator.range) + {} + + bool operator==(const PartRangeIndex & other) const + { + return part_index == other.part_index && range.begin == other.range.begin && range.end == other.range.end; + } + + bool operator<(const PartRangeIndex & other) const + { + return part_index < other.part_index && range.begin < other.range.begin && range.end < other.range.end; + } + + size_t part_index; + MarkRange range; +}; + +struct PartRangeIndexHash +{ + size_t operator()(const PartRangeIndex & part_range_index) const noexcept + { + size_t result = 0; + + boost::hash_combine(result, part_range_index.part_index); + boost::hash_combine(result, part_range_index.range.begin); + boost::hash_combine(result, part_range_index.range.end); + + return result; + } +}; + struct SplitPartsRangesResult { RangesInDataParts non_intersecting_parts_ranges; RangesInDataParts intersecting_parts_ranges; }; +void dump(const std::vector & ranges_iterators, WriteBuffer & buffer) +{ + for (const auto & range_iterator : ranges_iterators) + range_iterator.dump(buffer); +} + +String toString(const std::vector & ranges_iterators) +{ + WriteBufferFromOwnString buffer; + dump(ranges_iterators, buffer); + return buffer.str(); +} + SplitPartsRangesResult splitPartsRanges(RangesInDataParts ranges_in_data_parts, const LoggerPtr & logger) { /** Split ranges in data parts into intersecting ranges in data parts and non intersecting ranges in data parts. @@ -307,7 +440,11 @@ SplitPartsRangesResult splitPartsRanges(RangesInDataParts ranges_in_data_parts, } } - std::sort(parts_ranges.begin(), parts_ranges.end()); + LOG_TEST(logger, "Parts ranges before sort {}", toString(parts_ranges)); + + ::sort(parts_ranges.begin(), parts_ranges.end()); + + LOG_TEST(logger, "Parts ranges after sort {}", toString(parts_ranges)); RangesInDataPartsBuilder intersecting_ranges_in_data_parts_builder(ranges_in_data_parts); RangesInDataPartsBuilder non_intersecting_ranges_in_data_parts_builder(ranges_in_data_parts); @@ -324,24 +461,27 @@ SplitPartsRangesResult splitPartsRanges(RangesInDataParts ranges_in_data_parts, intersecting_ranges_in_data_parts_builder.addRange(part_index, mark_range); }; - std::unordered_map part_index_start_to_range; + std::unordered_map part_index_start_to_range; chassert(!parts_ranges.empty()); chassert(parts_ranges[0].event == PartsRangesIterator::EventType::RangeStart); - part_index_start_to_range[parts_ranges[0].part_index] = parts_ranges[0].range; + part_index_start_to_range[PartRangeIndex(parts_ranges[0])] = parts_ranges[0].range; size_t parts_ranges_size = parts_ranges.size(); for (size_t i = 1; i < parts_ranges_size; ++i) { auto & previous_part_range = parts_ranges[i - 1]; + PartRangeIndex previous_part_range_index(previous_part_range); auto & current_part_range = parts_ranges[i]; + PartRangeIndex current_part_range_index(current_part_range); size_t intersecting_parts = part_index_start_to_range.size(); bool range_start = current_part_range.event == PartsRangesIterator::EventType::RangeStart; if (range_start) { - auto [it, inserted] = part_index_start_to_range.emplace(current_part_range.part_index, current_part_range.range); - chassert(inserted); + auto [it, inserted] = part_index_start_to_range.emplace(current_part_range_index, current_part_range.range); + if (!inserted) + throw Exception(ErrorCodes::LOGICAL_ERROR, "PartsSplitter expected unique range"); if (intersecting_parts != 1) continue; @@ -365,7 +505,7 @@ SplitPartsRangesResult splitPartsRanges(RangesInDataParts ranges_in_data_parts, if (end - begin >= min_number_of_marks_for_non_intersecting_range) { - part_index_start_to_range[previous_part_range.part_index].begin = end; + part_index_start_to_range[previous_part_range_index].begin = end; add_non_intersecting_range(previous_part_range.part_index, MarkRange{begin, end}); } @@ -379,8 +519,10 @@ SplitPartsRangesResult splitPartsRanges(RangesInDataParts ranges_in_data_parts, break; } - chassert(other_interval_it != part_index_start_to_range.end()); - size_t other_interval_part_index = other_interval_it->first; + if (!(other_interval_it != part_index_start_to_range.end() && other_interval_it != it)) + throw Exception(ErrorCodes::LOGICAL_ERROR, "PartsSplitter expected single other interval"); + + size_t other_interval_part_index = other_interval_it->first.part_index; MarkRange other_interval_range = other_interval_it->second; /// If part level is 0, we must process whole other intersecting part because it can contain duplicate primary keys @@ -413,6 +555,7 @@ SplitPartsRangesResult splitPartsRanges(RangesInDataParts ranges_in_data_parts, } chassert(current_part_range.event == PartsRangesIterator::EventType::RangeEnd); + chassert(part_index_start_to_range.contains(current_part_range_index)); /** If there are more than 1 part ranges that we are currently processing * that means that this part range is intersecting with other range. @@ -421,8 +564,8 @@ SplitPartsRangesResult splitPartsRanges(RangesInDataParts ranges_in_data_parts, */ if (intersecting_parts != 1 || ranges_in_data_parts[current_part_range.part_index].data_part->info.level == 0) { - add_intersecting_range(current_part_range.part_index, part_index_start_to_range[current_part_range.part_index]); - part_index_start_to_range.erase(current_part_range.part_index); + add_intersecting_range(current_part_range.part_index, part_index_start_to_range[current_part_range_index]); + part_index_start_to_range.erase(current_part_range_index); continue; } @@ -432,13 +575,12 @@ SplitPartsRangesResult splitPartsRanges(RangesInDataParts ranges_in_data_parts, chassert(current_part_range.range == previous_part_range.range); /// Case 3 Range End after Range Start - non_intersecting_ranges_in_data_parts_builder.addRange(current_part_range.part_index, current_part_range.range); - part_index_start_to_range.erase(current_part_range.part_index); + add_non_intersecting_range(current_part_range.part_index, current_part_range.range); + part_index_start_to_range.erase(current_part_range_index); continue; } chassert(previous_part_range.event == PartsRangesIterator::EventType::RangeEnd); - chassert(previous_part_range.part_index != current_part_range.part_index); /// Case 4 Range End after Range End std::optional begin_optional = index_access.findLeftmostMarkGreaterThanValueInRange(current_part_range.part_index, @@ -449,25 +591,25 @@ SplitPartsRangesResult splitPartsRanges(RangesInDataParts ranges_in_data_parts, if (begin_optional && end - *begin_optional >= min_number_of_marks_for_non_intersecting_range) { size_t begin = *begin_optional; - add_intersecting_range(current_part_range.part_index, MarkRange{part_index_start_to_range[current_part_range.part_index].begin, begin}); + add_intersecting_range(current_part_range.part_index, MarkRange{part_index_start_to_range[current_part_range_index].begin, begin}); add_non_intersecting_range(current_part_range.part_index, MarkRange{begin, end}); } else { - add_intersecting_range(current_part_range.part_index, MarkRange{part_index_start_to_range[current_part_range.part_index].begin, end}); + add_intersecting_range(current_part_range.part_index, MarkRange{part_index_start_to_range[current_part_range_index].begin, end}); } - part_index_start_to_range.erase(current_part_range.part_index); + part_index_start_to_range.erase(current_part_range_index); } /// Process parts ranges with undefined value at end mark bool is_intersecting = part_index_start_to_range.size() > 1; - for (const auto & [part_index, mark_range] : part_index_start_to_range) + for (const auto & [part_range_index, mark_range] : part_index_start_to_range) { if (is_intersecting) - add_intersecting_range(part_index, mark_range); + add_intersecting_range(part_range_index.part_index, mark_range); else - add_non_intersecting_range(part_index, mark_range); + add_non_intersecting_range(part_range_index.part_index, mark_range); } auto && non_intersecting_ranges_in_data_parts = std::move(non_intersecting_ranges_in_data_parts_builder.getCurrentRangesInDataParts()); @@ -493,35 +635,42 @@ std::pair, std::vector> splitIntersecting size_t max_layers, const LoggerPtr & logger) { - // We will advance the iterator pointing to the mark with the smallest PK value until - // there will be not less than rows_per_layer rows in the current layer (roughly speaking). - // Then we choose the last observed value as the new border, so the current layer will consists - // of granules with values greater than the previous mark and less or equal than the new border. - + /** We will advance the iterator pointing to the mark with the smallest PK value until + * there will be not less than rows_per_layer rows in the current layer (roughly speaking). + * Then we choose the last observed value as the new border, so the current layer will consists + * of granules with values greater than the previous mark and less or equal than the new border. + * + * We use PartRangeIndex to track currently processing ranges, because after sort, RangeStart event is always placed + * before Range End event and it is possible to encounter overlapping Range Start events for the same part. + */ IndexAccess index_access(intersecting_ranges_in_data_parts); - std::priority_queue, std::greater<>> parts_ranges_queue; + + using PartsRangesIteratorWithIndex = std::pair; + std::priority_queue, std::greater<>> parts_ranges_queue; for (size_t part_index = 0; part_index < intersecting_ranges_in_data_parts.size(); ++part_index) { for (const auto & range : intersecting_ranges_in_data_parts[part_index].ranges) { const auto & index_granularity = intersecting_ranges_in_data_parts[part_index].data_part->index_granularity; - parts_ranges_queue.push( - {index_access.getValue(part_index, range.begin), range, part_index, PartsRangesIterator::EventType::RangeStart}); + PartsRangesIterator parts_range_start{index_access.getValue(part_index, range.begin), range, part_index, PartsRangesIterator::EventType::RangeStart}; + PartRangeIndex parts_range_start_index(parts_range_start); + parts_ranges_queue.push({std::move(parts_range_start), std::move(parts_range_start_index)}); const bool value_is_defined_at_end_mark = range.end < index_granularity.getMarksCount(); if (!value_is_defined_at_end_mark) continue; - parts_ranges_queue.push( - {index_access.getValue(part_index, range.end), range, part_index, PartsRangesIterator::EventType::RangeEnd}); + PartsRangesIterator parts_range_end{index_access.getValue(part_index, range.end), range, part_index, PartsRangesIterator::EventType::RangeEnd}; + PartRangeIndex parts_range_end_index(parts_range_end); + parts_ranges_queue.push({std::move(parts_range_end), std::move(parts_range_end_index)}); } } /// The beginning of currently started (but not yet finished) range of marks of a part in the current layer. - std::unordered_map current_part_range_begin; + std::unordered_map current_part_range_begin; /// The current ending of a range of marks of a part in the current layer. - std::unordered_map current_part_range_end; + std::unordered_map current_part_range_end; /// Determine borders between layers. std::vector borders; @@ -551,17 +700,19 @@ std::pair, std::vector> splitIntersecting { // We're advancing iterators until a new value showed up. Values last_value; - while (!parts_ranges_queue.empty() && (last_value.empty() || last_value == parts_ranges_queue.top().value)) + while (!parts_ranges_queue.empty() && (last_value.empty() || last_value == parts_ranges_queue.top().first.value)) { - auto current = parts_ranges_queue.top(); + auto [current, current_range_index] = parts_ranges_queue.top(); + PartRangeIndex current_part_range_index(current); parts_ranges_queue.pop(); + const auto part_index = current.part_index; if (current.event == PartsRangesIterator::EventType::RangeEnd) { - current_layer_builder.addRange(part_index, MarkRange{current_part_range_begin[part_index], current.range.end}); - current_part_range_begin.erase(part_index); - current_part_range_end.erase(part_index); + current_layer_builder.addRange(part_index, MarkRange{current_part_range_begin[current_range_index], current.range.end}); + current_part_range_begin.erase(current_range_index); + current_part_range_end.erase(current_range_index); continue; } @@ -569,14 +720,14 @@ std::pair, std::vector> splitIntersecting rows_in_current_layer += index_access.getMarkRows(part_index, current.range.begin); ++marks_in_current_layer; - current_part_range_begin.try_emplace(part_index, current.range.begin); - current_part_range_end[part_index] = current.range.begin; + current_part_range_begin.try_emplace(current_range_index, current.range.begin); + current_part_range_end[current_range_index] = current.range.begin; if (current.range.begin + 1 < current.range.end) { ++current.range.begin; current.value = index_access.getValue(part_index, current.range.begin); - parts_ranges_queue.push(std::move(current)); + parts_ranges_queue.push({std::move(current), current_range_index}); } } @@ -587,10 +738,10 @@ std::pair, std::vector> splitIntersecting borders.push_back(last_value); } - for (const auto & [part_index, last_mark] : current_part_range_end) + for (const auto & [current_range_index, last_mark] : current_part_range_end) { - current_layer_builder.addRange(part_index, MarkRange{current_part_range_begin[part_index], last_mark + 1}); - current_part_range_begin[part_index] = current_part_range_end[part_index]; + current_layer_builder.addRange(current_range_index.part_index, MarkRange{current_part_range_begin[current_range_index], last_mark + 1}); + current_part_range_begin[current_range_index] = current_part_range_end[current_range_index]; } result_layers.back() = std::move(current_layer_builder.getCurrentRangesInDataParts()); @@ -658,10 +809,10 @@ ASTs buildFilters(const KeyDescription & primary_key, const std::vector { ASTPtr component_ast = std::make_shared(values[i]); auto decayed_type = removeNullable(removeLowCardinality(primary_key.data_types.at(i))); + // Values of some types (e.g. Date, DateTime) are stored in columns as numbers and we get them as just numbers from the index. // So we need an explicit Cast for them. - if (isColumnedAsNumber(decayed_type->getTypeId()) && !isNumber(decayed_type->getTypeId())) - component_ast = makeASTFunction("cast", std::move(component_ast), std::make_shared(decayed_type->getName())); + component_ast = makeASTFunction("cast", std::move(component_ast), std::make_shared(decayed_type->getName())); values_ast.push_back(std::move(component_ast)); } @@ -730,15 +881,18 @@ SplitPartsWithRangesByPrimaryKeyResult splitPartsWithRangesByPrimaryKey( bool split_parts_ranges_into_intersecting_and_non_intersecting_final, bool split_intersecting_parts_ranges_into_layers) { - if (max_layers <= 1) - throw Exception(ErrorCodes::LOGICAL_ERROR, "max_layer should be greater than 1"); - auto logger = getLogger("PartsSplitter"); SplitPartsWithRangesByPrimaryKeyResult result; RangesInDataParts intersecting_parts_ranges = std::move(parts); + if (!isSafePrimaryKey(primary_key)) + { + result.merging_pipes.emplace_back(in_order_reading_step_getter(intersecting_parts_ranges)); + return result; + } + if (split_parts_ranges_into_intersecting_and_non_intersecting_final) { SplitPartsRangesResult split_result = splitPartsRanges(intersecting_parts_ranges, logger); @@ -752,6 +906,9 @@ SplitPartsWithRangesByPrimaryKeyResult splitPartsWithRangesByPrimaryKey( return result; } + if (max_layers <= 1) + throw Exception(ErrorCodes::LOGICAL_ERROR, "max_layer should be greater than 1"); + auto && [layers, borders] = splitIntersectingPartsRangesIntoLayers(intersecting_parts_ranges, max_layers, logger); auto filters = buildFilters(primary_key, borders); result.merging_pipes.resize(layers.size()); diff --git a/src/Processors/QueryPlan/QueryPlan.cpp b/src/Processors/QueryPlan/QueryPlan.cpp index 2d2dc66a8c9..0fae7e8df4d 100644 --- a/src/Processors/QueryPlan/QueryPlan.cpp +++ b/src/Processors/QueryPlan/QueryPlan.cpp @@ -275,6 +275,14 @@ JSONBuilder::ItemPtr QueryPlan::explainPlan(const ExplainPlanOptions & options) } else { + auto child_plans = frame.node->step->getChildPlans(); + + if (!frame.children_array && !child_plans.empty()) + frame.children_array = std::make_unique(); + + for (const auto & child_plan : child_plans) + frame.children_array->add(child_plan->explainPlan(options)); + if (frame.children_array) frame.node_map->add("Plans", std::move(frame.children_array)); @@ -360,7 +368,7 @@ std::string debugExplainStep(const IQueryPlanStep & step) return out.str(); } -void QueryPlan::explainPlan(WriteBuffer & buffer, const ExplainPlanOptions & options) +void QueryPlan::explainPlan(WriteBuffer & buffer, const ExplainPlanOptions & options, size_t indent) { checkInitialized(); @@ -382,7 +390,7 @@ void QueryPlan::explainPlan(WriteBuffer & buffer, const ExplainPlanOptions & opt if (!frame.is_description_printed) { - settings.offset = (stack.size() - 1) * settings.indent; + settings.offset = (indent + stack.size() - 1) * settings.indent; explainStep(*frame.node->step, settings, options); frame.is_description_printed = true; } @@ -393,7 +401,14 @@ void QueryPlan::explainPlan(WriteBuffer & buffer, const ExplainPlanOptions & opt ++frame.next_child; } else + { + auto child_plans = frame.node->step->getChildPlans(); + + for (const auto & child_plan : child_plans) + child_plan->explainPlan(buffer, options, indent + stack.size()); + stack.pop(); + } } } diff --git a/src/Processors/QueryPlan/QueryPlan.h b/src/Processors/QueryPlan/QueryPlan.h index 62d658ddccd..bf135ba3cd6 100644 --- a/src/Processors/QueryPlan/QueryPlan.h +++ b/src/Processors/QueryPlan/QueryPlan.h @@ -82,7 +82,7 @@ public: }; JSONBuilder::ItemPtr explainPlan(const ExplainPlanOptions & options); - void explainPlan(WriteBuffer & buffer, const ExplainPlanOptions & options); + void explainPlan(WriteBuffer & buffer, const ExplainPlanOptions & options, size_t indent = 0); void explainPipeline(WriteBuffer & buffer, const ExplainPipelineOptions & options); void explainEstimate(MutableColumns & columns); diff --git a/src/Processors/QueryPlan/ReadFromMemoryStorageStep.cpp b/src/Processors/QueryPlan/ReadFromMemoryStorageStep.cpp index 188956b34fc..22ad53a39e0 100644 --- a/src/Processors/QueryPlan/ReadFromMemoryStorageStep.cpp +++ b/src/Processors/QueryPlan/ReadFromMemoryStorageStep.cpp @@ -95,17 +95,24 @@ private: InitializerFunc initializer_func; }; -ReadFromMemoryStorageStep::ReadFromMemoryStorageStep(const Names & columns_to_read_, - StoragePtr storage_, - const StorageSnapshotPtr & storage_snapshot_, - const size_t num_streams_, - const bool delay_read_for_global_sub_queries_) : - SourceStepWithFilter(DataStream{.header=storage_snapshot_->getSampleBlockForColumns(columns_to_read_)}), - columns_to_read(columns_to_read_), - storage(std::move(storage_)), - storage_snapshot(storage_snapshot_), - num_streams(num_streams_), - delay_read_for_global_sub_queries(delay_read_for_global_sub_queries_) +ReadFromMemoryStorageStep::ReadFromMemoryStorageStep( + const Names & columns_to_read_, + const SelectQueryInfo & query_info_, + const StorageSnapshotPtr & storage_snapshot_, + const ContextPtr & context_, + StoragePtr storage_, + const size_t num_streams_, + const bool delay_read_for_global_sub_queries_) + : SourceStepWithFilter( + DataStream{.header = storage_snapshot_->getSampleBlockForColumns(columns_to_read_)}, + columns_to_read_, + query_info_, + storage_snapshot_, + context_) + , columns_to_read(columns_to_read_) + , storage(std::move(storage_)) + , num_streams(num_streams_) + , delay_read_for_global_sub_queries(delay_read_for_global_sub_queries_) { } diff --git a/src/Processors/QueryPlan/ReadFromMemoryStorageStep.h b/src/Processors/QueryPlan/ReadFromMemoryStorageStep.h index 1122bfbb2a5..238c1a3aad0 100644 --- a/src/Processors/QueryPlan/ReadFromMemoryStorageStep.h +++ b/src/Processors/QueryPlan/ReadFromMemoryStorageStep.h @@ -15,11 +15,14 @@ class QueryPipelineBuilder; class ReadFromMemoryStorageStep final : public SourceStepWithFilter { public: - ReadFromMemoryStorageStep(const Names & columns_to_read_, - StoragePtr storage_, - const StorageSnapshotPtr & storage_snapshot_, - size_t num_streams_, - bool delay_read_for_global_sub_queries_); + ReadFromMemoryStorageStep( + const Names & columns_to_read_, + const SelectQueryInfo & query_info_, + const StorageSnapshotPtr & storage_snapshot_, + const ContextPtr & context_, + StoragePtr storage_, + size_t num_streams_, + bool delay_read_for_global_sub_queries_); ReadFromMemoryStorageStep() = delete; ReadFromMemoryStorageStep(const ReadFromMemoryStorageStep &) = delete; @@ -37,7 +40,6 @@ private: Names columns_to_read; StoragePtr storage; - StorageSnapshotPtr storage_snapshot; size_t num_streams; bool delay_read_for_global_sub_queries; diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index 25e58588bb7..21e3cfcceab 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -40,18 +40,14 @@ #include #include #include -#include "Processors/QueryPlan/IQueryPlanStep.h" +#include #include #include +#include #include -#include #include -#include #include -#include -#include -#include #include using namespace DB; @@ -265,55 +261,37 @@ void ReadFromMergeTree::AnalysisResult::checkLimits(const Settings & settings, c ReadFromMergeTree::ReadFromMergeTree( MergeTreeData::DataPartsVector parts_, std::vector alter_conversions_, - Names real_column_names_, - Names virt_column_names_, + Names all_column_names_, const MergeTreeData & data_, const SelectQueryInfo & query_info_, - StorageSnapshotPtr storage_snapshot_, - ContextPtr context_, + const StorageSnapshotPtr & storage_snapshot_, + const ContextPtr & context_, size_t max_block_size_, size_t num_streams_, - bool sample_factor_column_queried_, std::shared_ptr max_block_numbers_to_read_, LoggerPtr log_, AnalysisResultPtr analyzed_result_ptr_, bool enable_parallel_reading) : SourceStepWithFilter(DataStream{.header = MergeTreeSelectProcessor::transformHeader( - storage_snapshot_->getSampleBlockForColumns(real_column_names_), - query_info_.prewhere_info, - data_.getPartitionValueType(), - virt_column_names_)}) + storage_snapshot_->getSampleBlockForColumns(all_column_names_), + query_info_.prewhere_info)}, all_column_names_, query_info_, storage_snapshot_, context_) , reader_settings(getMergeTreeReaderSettings(context_, query_info_)) , prepared_parts(std::move(parts_)) , alter_conversions_for_parts(std::move(alter_conversions_)) - , real_column_names(std::move(real_column_names_)) - , virt_column_names(std::move(virt_column_names_)) + , all_column_names(std::move(all_column_names_)) , data(data_) - , query_info(query_info_) - , prewhere_info(query_info_.prewhere_info) , actions_settings(ExpressionActionsSettings::fromContext(context_)) - , storage_snapshot(std::move(storage_snapshot_)) , metadata_for_reading(storage_snapshot->getMetadataForQuery()) - , context(std::move(context_)) , block_size{ .max_block_size_rows = max_block_size_, .preferred_block_size_bytes = context->getSettingsRef().preferred_block_size_bytes, .preferred_max_column_in_block_size_bytes = context->getSettingsRef().preferred_max_column_in_block_size_bytes} , requested_num_streams(num_streams_) - , sample_factor_column_queried(sample_factor_column_queried_) , max_block_numbers_to_read(std::move(max_block_numbers_to_read_)) , log(std::move(log_)) , analyzed_result_ptr(analyzed_result_ptr_) , is_parallel_reading_from_replicas(enable_parallel_reading) { - if (sample_factor_column_queried) - { - /// Only _sample_factor virtual column is added by ReadFromMergeTree - /// Other virtual columns are added by MergeTreeSelectProcessor. - auto type = std::make_shared(); - output_stream->header.insert({type->createColumn(), type, "_sample_factor"}); - } - if (is_parallel_reading_from_replicas) { all_ranges_callback = context->getMergeTreeAllRangesCallback(); @@ -375,12 +353,12 @@ Pipe ReadFromMergeTree::readFromPoolParallelReplicas( auto pool = std::make_shared( std::move(extension), std::move(parts_with_range), + shared_virtual_fields, storage_snapshot, prewhere_info, actions_settings, reader_settings, required_columns, - virt_column_names, pool_settings, context); @@ -394,8 +372,8 @@ Pipe ReadFromMergeTree::readFromPoolParallelReplicas( auto algorithm = std::make_unique(i); auto processor = std::make_unique( - pool, std::move(algorithm), data, prewhere_info, - actions_settings, block_size_copy, reader_settings, virt_column_names); + pool, std::move(algorithm), storage_snapshot, prewhere_info, + actions_settings, block_size_copy, reader_settings); auto source = std::make_shared(std::move(processor)); pipes.emplace_back(std::move(source)); @@ -456,12 +434,12 @@ Pipe ReadFromMergeTree::readFromPool( { pool = std::make_shared( std::move(parts_with_range), + shared_virtual_fields, storage_snapshot, prewhere_info, actions_settings, reader_settings, required_columns, - virt_column_names, pool_settings, context); } @@ -469,12 +447,12 @@ Pipe ReadFromMergeTree::readFromPool( { pool = std::make_shared( std::move(parts_with_range), + shared_virtual_fields, storage_snapshot, prewhere_info, actions_settings, reader_settings, required_columns, - virt_column_names, pool_settings, context); } @@ -493,8 +471,8 @@ Pipe ReadFromMergeTree::readFromPool( auto algorithm = std::make_unique(i); auto processor = std::make_unique( - pool, std::move(algorithm), data, prewhere_info, - actions_settings, block_size_copy, reader_settings, virt_column_names); + pool, std::move(algorithm), storage_snapshot, prewhere_info, + actions_settings, block_size_copy, reader_settings); auto source = std::make_shared(std::move(processor)); @@ -545,12 +523,12 @@ Pipe ReadFromMergeTree::readInOrder( std::move(extension), mode, parts_with_ranges, + shared_virtual_fields, storage_snapshot, prewhere_info, actions_settings, reader_settings, required_columns, - virt_column_names, pool_settings, context); } @@ -560,12 +538,12 @@ Pipe ReadFromMergeTree::readInOrder( has_limit_below_one_block, read_type, parts_with_ranges, + shared_virtual_fields, storage_snapshot, prewhere_info, actions_settings, reader_settings, required_columns, - virt_column_names, pool_settings, context); } @@ -599,8 +577,8 @@ Pipe ReadFromMergeTree::readInOrder( algorithm = std::make_unique(i); auto processor = std::make_unique( - pool, std::move(algorithm), data, prewhere_info, - actions_settings, block_size, reader_settings, virt_column_names); + pool, std::move(algorithm), storage_snapshot, prewhere_info, + actions_settings, block_size, reader_settings); processor->addPartLevelToChunk(isQueryWithFinal()); @@ -1303,61 +1281,17 @@ ReadFromMergeTree::AnalysisResultPtr ReadFromMergeTree::selectRangesToRead( return selectRangesToRead( std::move(parts), std::move(alter_conversions), - prewhere_info, - filter_nodes, metadata_for_reading, query_info, context, requested_num_streams, max_block_numbers_to_read, data, - real_column_names, - sample_factor_column_queried, + all_column_names, log, indexes); } -static ActionsDAGPtr buildFilterDAG( - const ContextPtr & context, - const PrewhereInfoPtr & prewhere_info, - const ActionDAGNodes & added_filter_nodes, - const SelectQueryInfo & query_info) -{ - const auto & settings = context->getSettingsRef(); - ActionsDAG::NodeRawConstPtrs nodes; - - if (prewhere_info) - { - { - const auto & node = prewhere_info->prewhere_actions->findInOutputs(prewhere_info->prewhere_column_name); - nodes.push_back(&node); - } - - if (prewhere_info->row_level_filter) - { - const auto & node = prewhere_info->row_level_filter->findInOutputs(prewhere_info->row_level_column_name); - nodes.push_back(&node); - } - } - - for (const auto & node : added_filter_nodes.nodes) - nodes.push_back(node); - - std::unordered_map node_name_to_input_node_column; - - if (settings.allow_experimental_analyzer && query_info.planner_context) - { - const auto & table_expression_data = query_info.planner_context->getTableExpressionDataOrThrow(query_info.table_expression); - for (const auto & [column_identifier, column_name] : table_expression_data.getColumnIdentifierToColumnName()) - { - const auto & column = table_expression_data.getColumnOrThrow(column_name); - node_name_to_input_node_column.emplace(column_identifier, ColumnWithTypeAndName(column.type, column_name)); - } - } - - return ActionsDAG::buildFilterActionsDAG(nodes, node_name_to_input_node_column); -} - static void buildIndexes( std::optional & indexes, ActionsDAGPtr filter_actions_dag, @@ -1391,7 +1325,6 @@ static void buildIndexes( indexes->partition_pruner.emplace(metadata_snapshot, filter_actions_dag, context, false /* strict */); } - /// TODO Support row_policy_filter and additional_filters indexes->part_values = MergeTreeDataSelectExecutor::filterPartsByVirtualColumns(data, parts, filter_actions_dag, context); MergeTreeDataSelectExecutor::buildKeyConditionFromPartOffset(indexes->part_offset_condition, filter_actions_dag, context); @@ -1404,19 +1337,6 @@ static void buildIndexes( if (!indexes->use_skip_indexes) return; - std::optional info_copy; - auto get_query_info = [&]() -> const SelectQueryInfo & - { - if (settings.allow_experimental_analyzer) - { - info_copy.emplace(query_info); - info_copy->filter_actions_dag = filter_actions_dag; - return *info_copy; - } - - return query_info; - }; - std::unordered_set ignored_index_names; if (settings.ignore_data_skipping_indices.changed) @@ -1456,7 +1376,7 @@ static void buildIndexes( if (inserted) { skip_indexes.merged_indices.emplace_back(); - skip_indexes.merged_indices.back().condition = index_helper->createIndexMergedCondition(get_query_info(), metadata_snapshot); + skip_indexes.merged_indices.back().condition = index_helper->createIndexMergedCondition(query_info, metadata_snapshot); } skip_indexes.merged_indices[it->second].addIndex(index_helper); @@ -1468,11 +1388,11 @@ static void buildIndexes( { #ifdef ENABLE_ANNOY if (const auto * annoy = typeid_cast(index_helper.get())) - condition = annoy->createIndexCondition(get_query_info(), context); + condition = annoy->createIndexCondition(query_info, context); #endif #ifdef ENABLE_USEARCH if (const auto * usearch = typeid_cast(index_helper.get())) - condition = usearch->createIndexCondition(get_query_info(), context); + condition = usearch->createIndexCondition(query_info, context); #endif if (!condition) throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown vector search index {}", index_helper->index.name); @@ -1486,45 +1406,87 @@ static void buildIndexes( } } + // move minmax indices to first positions, so they will be applied first as cheapest ones + std::stable_sort(begin(skip_indexes.useful_indices), end(skip_indexes.useful_indices), [](const auto & l, const auto & r) + { + const bool l_min_max = (typeid_cast(l.index.get())); + const bool r_min_max = (typeid_cast(r.index.get())); + if (l_min_max == r_min_max) + return false; + + if (l_min_max) + return true; // left is min max but right is not + + return false; // right is min max but left is not + }); + indexes->skip_indexes = std::move(skip_indexes); } -void ReadFromMergeTree::applyFilters() +void ReadFromMergeTree::applyFilters(ActionDAGNodes added_filter_nodes) { - auto filter_actions_dag = buildFilterDAG(context, prewhere_info, filter_nodes, query_info); - buildIndexes(indexes, filter_actions_dag, data, prepared_parts, context, query_info, metadata_for_reading); + if (!indexes) + { + /// Analyzer generates unique ColumnIdentifiers like __table1.__partition_id in filter nodes, + /// while key analysis still requires unqualified column names. + std::unordered_map node_name_to_input_node_column; + if (query_info.planner_context) + { + const auto & table_expression_data = query_info.planner_context->getTableExpressionDataOrThrow(query_info.table_expression); + const auto & alias_column_expressions = table_expression_data.getAliasColumnExpressions(); + for (const auto & [column_identifier, column_name] : table_expression_data.getColumnIdentifierToColumnName()) + { + /// ALIAS columns cannot be used in the filter expression without being calculated in ActionsDAG, + /// so they should not be added to the input nodes. + if (alias_column_expressions.contains(column_name)) + continue; + const auto & column = table_expression_data.getColumnOrThrow(column_name); + node_name_to_input_node_column.emplace(column_identifier, ColumnWithTypeAndName(column.type, column_name)); + } + } + + filter_actions_dag = ActionsDAG::buildFilterActionsDAG(added_filter_nodes.nodes, node_name_to_input_node_column); + + /// NOTE: Currently we store two DAGs for analysis: + /// (1) SourceStepWithFilter::filter_nodes, (2) query_info.filter_actions_dag. Make sure there are consistent. + /// TODO: Get rid of filter_actions_dag in query_info after we move analysis of + /// parallel replicas and unused shards into optimization, similar to projection analysis. + query_info.filter_actions_dag = filter_actions_dag; + + buildIndexes( + indexes, + filter_actions_dag, + data, + prepared_parts, + context, + query_info, + metadata_for_reading); + } } ReadFromMergeTree::AnalysisResultPtr ReadFromMergeTree::selectRangesToRead( MergeTreeData::DataPartsVector parts, std::vector alter_conversions, - const PrewhereInfoPtr & prewhere_info, - const ActionDAGNodes & added_filter_nodes, const StorageMetadataPtr & metadata_snapshot, - const SelectQueryInfo & query_info, - ContextPtr context, + const SelectQueryInfo & query_info_, + ContextPtr context_, size_t num_streams, std::shared_ptr max_block_numbers_to_read, const MergeTreeData & data, - const Names & real_column_names, - bool sample_factor_column_queried, + const Names & all_column_names, LoggerPtr log, std::optional & indexes) { - auto updated_query_info_with_filter_dag = query_info; - updated_query_info_with_filter_dag.filter_actions_dag = buildFilterDAG(context, prewhere_info, added_filter_nodes, query_info); - return selectRangesToReadImpl( std::move(parts), std::move(alter_conversions), metadata_snapshot, - updated_query_info_with_filter_dag, - context, + query_info_, + context_, num_streams, max_block_numbers_to_read, data, - real_column_names, - sample_factor_column_queried, + all_column_names, log, indexes); } @@ -1533,22 +1495,21 @@ ReadFromMergeTree::AnalysisResultPtr ReadFromMergeTree::selectRangesToReadImpl( MergeTreeData::DataPartsVector parts, std::vector alter_conversions, const StorageMetadataPtr & metadata_snapshot, - const SelectQueryInfo & query_info, - ContextPtr context, + const SelectQueryInfo & query_info_, + ContextPtr context_, size_t num_streams, std::shared_ptr max_block_numbers_to_read, const MergeTreeData & data, - const Names & real_column_names, - bool sample_factor_column_queried, + const Names & all_column_names, LoggerPtr log, std::optional & indexes) { AnalysisResult result; - const auto & settings = context->getSettingsRef(); + const auto & settings = context_->getSettingsRef(); size_t total_parts = parts.size(); - result.column_names_to_read = real_column_names; + result.column_names_to_read = all_column_names; /// If there are only virtual columns in the query, you must request at least one non-virtual one. if (result.column_names_to_read.empty()) @@ -1562,7 +1523,7 @@ ReadFromMergeTree::AnalysisResultPtr ReadFromMergeTree::selectRangesToReadImpl( const Names & primary_key_column_names = primary_key.column_names; if (!indexes) - buildIndexes(indexes, query_info.filter_actions_dag, data, parts, context, query_info, metadata_snapshot); + buildIndexes(indexes, query_info_.filter_actions_dag, data, parts, context_, query_info_, metadata_snapshot); if (indexes->part_values && indexes->part_values->empty()) return std::make_shared(std::move(result)); @@ -1594,20 +1555,19 @@ ReadFromMergeTree::AnalysisResultPtr ReadFromMergeTree::selectRangesToReadImpl( indexes->part_values, metadata_snapshot, data, - context, + context_, max_block_numbers_to_read.get(), log, result.index_stats); result.sampling = MergeTreeDataSelectExecutor::getSampling( - query_info, + query_info_, metadata_snapshot->getColumns().getAllPhysical(), parts, indexes->key_condition, data, metadata_snapshot, - context, - sample_factor_column_queried, + context_, log); if (result.sampling.read_nothing) @@ -1617,12 +1577,12 @@ ReadFromMergeTree::AnalysisResultPtr ReadFromMergeTree::selectRangesToReadImpl( total_marks_pk += part->index_granularity.getMarksCountWithoutFinal(); parts_before_pk = parts.size(); - auto reader_settings = getMergeTreeReaderSettings(context, query_info); + auto reader_settings = getMergeTreeReaderSettings(context_, query_info_); result.parts_with_ranges = MergeTreeDataSelectExecutor::filterPartsByPrimaryKeyAndSkipIndexes( std::move(parts), std::move(alter_conversions), metadata_snapshot, - context, + context_, indexes->key_condition, indexes->part_offset_condition, indexes->skip_indexes, @@ -1658,8 +1618,8 @@ ReadFromMergeTree::AnalysisResultPtr ReadFromMergeTree::selectRangesToReadImpl( result.total_marks_pk = total_marks_pk; result.selected_rows = sum_rows; - if (query_info.input_order_info) - result.read_type = (query_info.input_order_info->direction > 0) + if (query_info_.input_order_info) + result.read_type = (query_info_.input_order_info->direction > 0) ? ReadType::InOrder : ReadType::InReverseOrder; @@ -1724,10 +1684,8 @@ void ReadFromMergeTree::updatePrewhereInfo(const PrewhereInfoPtr & prewhere_info prewhere_info = prewhere_info_value; output_stream = DataStream{.header = MergeTreeSelectProcessor::transformHeader( - storage_snapshot->getSampleBlockForColumns(real_column_names), - prewhere_info_value, - data.getPartitionValueType(), - virt_column_names)}; + storage_snapshot->getSampleBlockForColumns(all_column_names), + prewhere_info_value)}; updateSortDescriptionForOutputStream( *output_stream, @@ -1808,11 +1766,6 @@ ReadFromMergeTree::AnalysisResult ReadFromMergeTree::getAnalysisResult() const return *result_ptr; } -bool ReadFromMergeTree::isQueryWithFinal() const -{ - return query_info.isFinal(); -} - bool ReadFromMergeTree::isQueryWithSampling() const { if (context->getSettingsRef().parallel_replicas_count > 1 && data.supportsSampling()) @@ -1920,7 +1873,13 @@ Pipe ReadFromMergeTree::groupStreamsByPartition(AnalysisResult & result, Actions void ReadFromMergeTree::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) { auto result = getAnalysisResult(); + + /// Do not keep data parts in snapshot. + /// They are stored separately, and some could be released after PK analysis. + storage_snapshot->data = std::make_unique(); + result.checkLimits(context->getSettingsRef(), query_info); + shared_virtual_fields.emplace("_sample_factor", result.sampling.used_sample_factor); LOG_DEBUG( log, @@ -2005,18 +1964,6 @@ void ReadFromMergeTree::initializePipeline(QueryPipelineBuilder & pipeline, cons result_projection = ActionsDAG::merge(std::move(*result_projection), std::move(*actions)); }; - /// By the way, if a distributed query or query to a Merge table is made, then the `_sample_factor` column can have different values. - if (sample_factor_column_queried) - { - ColumnWithTypeAndName column; - column.name = "_sample_factor"; - column.type = std::make_shared(); - column.column = column.type->createColumnConst(0, Field(result.sampling.used_sample_factor)); - - auto adding_column = ActionsDAG::makeAddingColumnActions(std::move(column)); - append_actions(std::move(adding_column)); - } - if (result_projection) cur_header = result_projection->updateHeader(cur_header); diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.h b/src/Processors/QueryPlan/ReadFromMergeTree.h index fdeaff57279..5ed742a9bfd 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.h +++ b/src/Processors/QueryPlan/ReadFromMergeTree.h @@ -110,15 +110,13 @@ public: ReadFromMergeTree( MergeTreeData::DataPartsVector parts_, std::vector alter_conversions_, - Names real_column_names_, - Names virt_column_names_, + Names all_column_names_, const MergeTreeData & data_, const SelectQueryInfo & query_info_, - StorageSnapshotPtr storage_snapshot, - ContextPtr context_, + const StorageSnapshotPtr & storage_snapshot, + const ContextPtr & context_, size_t max_block_size_, size_t num_streams_, - bool sample_factor_column_queried_, std::shared_ptr max_block_numbers_to_read_, LoggerPtr log_, AnalysisResultPtr analyzed_result_ptr_, @@ -135,11 +133,9 @@ public: void describeActions(JSONBuilder::JSONMap & map) const override; void describeIndexes(JSONBuilder::JSONMap & map) const override; - const Names & getRealColumnNames() const { return real_column_names; } - const Names & getVirtualColumnNames() const { return virt_column_names; } + const Names & getAllColumnNames() const { return all_column_names; } StorageID getStorageID() const { return data.getStorageID(); } - const StorageSnapshotPtr & getStorageSnapshot() const { return storage_snapshot; } UInt64 getSelectedParts() const { return selected_parts; } UInt64 getSelectedRows() const { return selected_rows; } UInt64 getSelectedMarks() const { return selected_marks; } @@ -158,16 +154,13 @@ public: static AnalysisResultPtr selectRangesToRead( MergeTreeData::DataPartsVector parts, std::vector alter_conversions, - const PrewhereInfoPtr & prewhere_info, - const ActionDAGNodes & added_filter_nodes, const StorageMetadataPtr & metadata_snapshot, const SelectQueryInfo & query_info, ContextPtr context, size_t num_streams, std::shared_ptr max_block_numbers_to_read, const MergeTreeData & data, - const Names & real_column_names, - bool sample_factor_column_queried, + const Names & all_column_names, LoggerPtr log, std::optional & indexes); @@ -175,17 +168,13 @@ public: MergeTreeData::DataPartsVector parts, std::vector alter_conversions) const; - ContextPtr getContext() const { return context; } - const SelectQueryInfo & getQueryInfo() const { return query_info; } StorageMetadataPtr getStorageMetadata() const { return metadata_for_reading; } - const PrewhereInfoPtr & getPrewhereInfo() const { return prewhere_info; } /// Returns `false` if requested reading cannot be performed. bool requestReadingInOrder(size_t prefix_size, int direction, size_t limit); bool readsInOrder() const; - void updatePrewhereInfo(const PrewhereInfoPtr & prewhere_info_value); - bool isQueryWithFinal() const; + void updatePrewhereInfo(const PrewhereInfoPtr & prewhere_info_value) override; bool isQueryWithSampling() const; /// Returns true if the optimization is applicable (and applies it then). @@ -203,7 +192,7 @@ public: size_t getNumStreams() const { return requested_num_streams; } bool isParallelReadingEnabled() const { return read_task_callback != std::nullopt; } - void applyFilters() override; + void applyFilters(ActionDAGNodes added_filter_nodes) override; private: static AnalysisResultPtr selectRangesToReadImpl( @@ -215,8 +204,7 @@ private: size_t num_streams, std::shared_ptr max_block_numbers_to_read, const MergeTreeData & data, - const Names & real_column_names, - bool sample_factor_column_queried, + const Names & all_column_names, LoggerPtr log, std::optional & indexes); @@ -233,23 +221,17 @@ private: MergeTreeData::DataPartsVector prepared_parts; std::vector alter_conversions_for_parts; - Names real_column_names; - Names virt_column_names; + Names all_column_names; const MergeTreeData & data; - SelectQueryInfo query_info; - PrewhereInfoPtr prewhere_info; ExpressionActionsSettings actions_settings; - StorageSnapshotPtr storage_snapshot; StorageMetadataPtr metadata_for_reading; - ContextPtr context; const MergeTreeReadTask::BlockSizeParams block_size; size_t requested_num_streams; size_t output_streams_limit = 0; - const bool sample_factor_column_queried; /// Used for aggregation optimization (see DB::QueryPlanOptimizations::tryAggregateEachPartitionIndependently). bool output_each_partition_through_separate_port = false; @@ -290,7 +272,9 @@ private: RangesInDataParts && parts, size_t num_streams, const Names & origin_column_names, const Names & column_names, ActionsDAGPtr & out_projection); ReadFromMergeTree::AnalysisResult getAnalysisResult() const; + AnalysisResultPtr analyzed_result_ptr; + VirtualFields shared_virtual_fields; bool is_parallel_reading_from_replicas; std::optional all_ranges_callback; diff --git a/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp b/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp index d8b3f4fbb8e..a294683c640 100644 --- a/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp +++ b/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp @@ -26,9 +26,11 @@ namespace class NumbersSource : public ISource { public: - NumbersSource(UInt64 block_size_, UInt64 offset_, UInt64 step_) + NumbersSource(UInt64 block_size_, UInt64 offset_, std::optional limit_, UInt64 step_) : ISource(createHeader()), block_size(block_size_), next(offset_), step(step_) { + if (limit_.has_value()) + end = limit_.value() + offset_; } String getName() const override { return "Numbers"; } @@ -38,24 +40,32 @@ public: protected: Chunk generate() override { - auto column = ColumnUInt64::create(block_size); + UInt64 real_block_size = block_size; + if (end.has_value()) + { + if (end.value() <= next) + return {}; + real_block_size = std::min(block_size, end.value() - next); + } + auto column = ColumnUInt64::create(real_block_size); ColumnUInt64::Container & vec = column->getData(); UInt64 curr = next; /// The local variable for some reason works faster (>20%) than member of class. UInt64 * pos = vec.data(); /// This also accelerates the code. - UInt64 * end = &vec[block_size]; - iota(pos, static_cast(end - pos), curr); + UInt64 * end_ = &vec[real_block_size]; + iota(pos, static_cast(end_ - pos), curr); next += step; progress(column->size(), column->byteSize()); - return {Columns{std::move(column)}, block_size}; + return {Columns{std::move(column)}, real_block_size}; } private: UInt64 block_size; UInt64 next; + std::optional end; /// not included UInt64 step; }; @@ -321,21 +331,24 @@ void shrinkRanges(Ranges & ranges, size_t size) ReadFromSystemNumbersStep::ReadFromSystemNumbersStep( const Names & column_names_, - StoragePtr storage_, + const SelectQueryInfo & query_info_, const StorageSnapshotPtr & storage_snapshot_, - SelectQueryInfo & query_info, - ContextPtr context_, + const ContextPtr & context_, + StoragePtr storage_, size_t max_block_size_, size_t num_streams_) - : SourceStepWithFilter{DataStream{.header = storage_snapshot_->getSampleBlockForColumns(column_names_)}} + : SourceStepWithFilter( + DataStream{.header = storage_snapshot_->getSampleBlockForColumns(column_names_)}, + column_names_, + query_info_, + storage_snapshot_, + context_) , column_names{column_names_} , storage{std::move(storage_)} - , storage_snapshot{storage_snapshot_} - , context{std::move(context_)} , key_expression{KeyDescription::parse(column_names[0], storage_snapshot->metadata->columns, context).expression} , max_block_size{max_block_size_} , num_streams{num_streams_} - , limit_length_and_offset(InterpreterSelectQuery::getLimitLengthAndOffset(query_info.query->as(), context)) + , limit_length_and_offset(InterpreterSelectQuery::getLimitLengthAndOffset(query_info.query->as(), context)) , should_pushdown_limit(shouldPushdownLimit(query_info, limit_length_and_offset.first)) , limit(query_info.limit) , storage_limits(query_info.storage_limits) @@ -375,7 +388,7 @@ Pipe ReadFromSystemNumbersStep::makePipe() num_streams = 1; /// Build rpn of query filters - KeyCondition condition(buildFilterDAG(), context, column_names, key_expression); + KeyCondition condition(filter_actions_dag, context, column_names, key_expression); Pipe pipe; Ranges ranges; @@ -475,7 +488,7 @@ Pipe ReadFromSystemNumbersStep::makePipe() for (size_t i = 0; i < num_streams; ++i) { auto source - = std::make_shared(max_block_size, numbers_storage.offset + i * max_block_size, num_streams * max_block_size); + = std::make_shared(max_block_size, numbers_storage.offset + i * max_block_size, numbers_storage.limit, num_streams * max_block_size); if (numbers_storage.limit && i == 0) { @@ -504,12 +517,6 @@ Pipe ReadFromSystemNumbersStep::makePipe() return pipe; } -ActionsDAGPtr ReadFromSystemNumbersStep::buildFilterDAG() -{ - std::unordered_map node_name_to_input_node_column; - return ActionsDAG::buildFilterActionsDAG(filter_nodes.nodes, node_name_to_input_node_column); -} - void ReadFromSystemNumbersStep::checkLimits(size_t rows) { const auto & settings = context->getSettingsRef(); diff --git a/src/Processors/QueryPlan/ReadFromSystemNumbersStep.h b/src/Processors/QueryPlan/ReadFromSystemNumbersStep.h index 0a8c62b30af..cab0686474b 100644 --- a/src/Processors/QueryPlan/ReadFromSystemNumbersStep.h +++ b/src/Processors/QueryPlan/ReadFromSystemNumbersStep.h @@ -16,10 +16,10 @@ class ReadFromSystemNumbersStep final : public SourceStepWithFilter public: ReadFromSystemNumbersStep( const Names & column_names_, - StoragePtr storage_, + const SelectQueryInfo & query_info_, const StorageSnapshotPtr & storage_snapshot_, - SelectQueryInfo & query_info, - ContextPtr context_, + const ContextPtr & context_, + StoragePtr storage_, size_t max_block_size_, size_t num_streams_); @@ -32,12 +32,9 @@ private: void checkLimits(size_t rows); Pipe makePipe(); - ActionsDAGPtr buildFilterDAG(); const Names column_names; StoragePtr storage; - StorageSnapshotPtr storage_snapshot; - ContextPtr context; ExpressionActionsPtr key_expression; size_t max_block_size; size_t num_streams; diff --git a/src/Processors/QueryPlan/SortingStep.cpp b/src/Processors/QueryPlan/SortingStep.cpp index 641b9036d4c..d0491cb4b82 100644 --- a/src/Processors/QueryPlan/SortingStep.cpp +++ b/src/Processors/QueryPlan/SortingStep.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Processors/QueryPlan/SourceStepWithFilter.cpp b/src/Processors/QueryPlan/SourceStepWithFilter.cpp new file mode 100644 index 00000000000..5893c2aeb4f --- /dev/null +++ b/src/Processors/QueryPlan/SourceStepWithFilter.cpp @@ -0,0 +1,160 @@ +#include + +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ +extern const int ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER; +} + +Block SourceStepWithFilter::applyPrewhereActions(Block block, const PrewhereInfoPtr & prewhere_info) +{ + if (prewhere_info) + { + if (prewhere_info->row_level_filter) + { + block = prewhere_info->row_level_filter->updateHeader(std::move(block)); + auto & row_level_column = block.getByName(prewhere_info->row_level_column_name); + if (!row_level_column.type->canBeUsedInBooleanContext()) + { + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER, + "Invalid type for filter in PREWHERE: {}", + row_level_column.type->getName()); + } + + block.erase(prewhere_info->row_level_column_name); + } + + if (prewhere_info->prewhere_actions) + { + block = prewhere_info->prewhere_actions->updateHeader(std::move(block)); + + auto & prewhere_column = block.getByName(prewhere_info->prewhere_column_name); + if (!prewhere_column.type->canBeUsedInBooleanContext()) + { + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER, + "Invalid type for filter in PREWHERE: {}", + prewhere_column.type->getName()); + } + + if (prewhere_info->remove_prewhere_column) + { + block.erase(prewhere_info->prewhere_column_name); + } + else if (prewhere_info->need_filter) + { + if (const auto * type = typeid_cast(prewhere_column.type.get()); type && type->onlyNull()) + { + prewhere_column.column = prewhere_column.type->createColumnConst(block.rows(), Null()); + } + else + { + WhichDataType which(removeNullable(recursiveRemoveLowCardinality(prewhere_column.type))); + + if (which.isNativeInt() || which.isNativeUInt()) + prewhere_column.column = prewhere_column.type->createColumnConst(block.rows(), 1u)->convertToFullColumnIfConst(); + else if (which.isFloat()) + prewhere_column.column = prewhere_column.type->createColumnConst(block.rows(), 1.0f)->convertToFullColumnIfConst(); + else + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER, + "Illegal type {} of column for filter", + prewhere_column.type->getName()); + } + } + } + } + + return block; +} + +void SourceStepWithFilter::applyFilters(ActionDAGNodes added_filter_nodes) +{ + filter_actions_dag = ActionsDAG::buildFilterActionsDAG(added_filter_nodes.nodes); +} + +void SourceStepWithFilter::updatePrewhereInfo(const PrewhereInfoPtr & prewhere_info_value) +{ + query_info.prewhere_info = prewhere_info_value; + prewhere_info = prewhere_info_value; + output_stream = DataStream{.header = applyPrewhereActions(output_stream->header, prewhere_info)}; +} + +void SourceStepWithFilter::describeActions(FormatSettings & format_settings) const +{ + std::string prefix(format_settings.offset, format_settings.indent_char); + + if (prewhere_info) + { + format_settings.out << prefix << "Prewhere info" << '\n'; + format_settings.out << prefix << "Need filter: " << prewhere_info->need_filter << '\n'; + + prefix.push_back(format_settings.indent_char); + prefix.push_back(format_settings.indent_char); + + if (prewhere_info->prewhere_actions) + { + format_settings.out << prefix << "Prewhere filter" << '\n'; + format_settings.out << prefix << "Prewhere filter column: " << prewhere_info->prewhere_column_name; + if (prewhere_info->remove_prewhere_column) + format_settings.out << " (removed)"; + format_settings.out << '\n'; + + auto expression = std::make_shared(prewhere_info->prewhere_actions); + expression->describeActions(format_settings.out, prefix); + } + + if (prewhere_info->row_level_filter) + { + format_settings.out << prefix << "Row level filter" << '\n'; + format_settings.out << prefix << "Row level filter column: " << prewhere_info->row_level_column_name << '\n'; + + auto expression = std::make_shared(prewhere_info->row_level_filter); + expression->describeActions(format_settings.out, prefix); + } + } +} + +void SourceStepWithFilter::describeActions(JSONBuilder::JSONMap & map) const +{ + if (prewhere_info) + { + std::unique_ptr prewhere_info_map = std::make_unique(); + prewhere_info_map->add("Need filter", prewhere_info->need_filter); + + if (prewhere_info->prewhere_actions) + { + std::unique_ptr prewhere_filter_map = std::make_unique(); + prewhere_filter_map->add("Prewhere filter column", prewhere_info->prewhere_column_name); + prewhere_filter_map->add("Prewhere filter remove filter column", prewhere_info->remove_prewhere_column); + auto expression = std::make_shared(prewhere_info->prewhere_actions); + prewhere_filter_map->add("Prewhere filter expression", expression->toTree()); + + prewhere_info_map->add("Prewhere filter", std::move(prewhere_filter_map)); + } + + if (prewhere_info->row_level_filter) + { + std::unique_ptr row_level_filter_map = std::make_unique(); + row_level_filter_map->add("Row level filter column", prewhere_info->row_level_column_name); + auto expression = std::make_shared(prewhere_info->row_level_filter); + row_level_filter_map->add("Row level filter expression", expression->toTree()); + + prewhere_info_map->add("Row level filter", std::move(row_level_filter_map)); + } + + map.add("Prewhere info", std::move(prewhere_info_map)); + } +} + +} diff --git a/src/Processors/QueryPlan/SourceStepWithFilter.h b/src/Processors/QueryPlan/SourceStepWithFilter.h index dbaff06fde6..0971b99d828 100644 --- a/src/Processors/QueryPlan/SourceStepWithFilter.h +++ b/src/Processors/QueryPlan/SourceStepWithFilter.h @@ -1,7 +1,9 @@ #pragma once -#include #include +#include +#include +#include namespace DB { @@ -15,15 +17,31 @@ public: using Base = ISourceStep; using Base::Base; - const std::vector & getFilters() const + SourceStepWithFilter( + DataStream output_stream_, + const Names & column_names_, + const SelectQueryInfo & query_info_, + const StorageSnapshotPtr & storage_snapshot_, + const ContextPtr & context_) + : ISourceStep(std::move(output_stream_)) + , required_source_columns(column_names_) + , query_info(query_info_) + , prewhere_info(query_info.prewhere_info) + , storage_snapshot(storage_snapshot_) + , context(context_) { - return filter_dags; } - const ActionDAGNodes & getFilterNodes() const - { - return filter_nodes; - } + const ActionsDAGPtr & getFilterActionsDAG() const { return filter_actions_dag; } + + const SelectQueryInfo & getQueryInfo() const { return query_info; } + const PrewhereInfoPtr & getPrewhereInfo() const { return prewhere_info; } + ContextPtr getContext() const { return context; } + const StorageSnapshotPtr & getStorageSnapshot() const { return storage_snapshot; } + + bool isQueryWithFinal() const { return query_info.isFinal(); } + + const Names & requiredSourceColumns() const { return required_source_columns; } void addFilter(ActionsDAGPtr filter_dag, std::string column_name) { @@ -31,18 +49,41 @@ public: filter_dags.push_back(std::move(filter_dag)); } - void addFilter(ActionsDAGPtr filter_dag, const ActionsDAG::Node * filter_node) + void addFilterFromParentStep(const ActionsDAG::Node * filter_node) { filter_nodes.nodes.push_back(filter_node); - filter_dags.push_back(std::move(filter_dag)); } /// Apply filters that can optimize reading from storage. - virtual void applyFilters() {} + void applyFilters() + { + applyFilters(std::move(filter_nodes)); + filter_dags = {}; + } + + virtual void applyFilters(ActionDAGNodes added_filter_nodes); + + virtual void updatePrewhereInfo(const PrewhereInfoPtr & prewhere_info_value); + + void describeActions(FormatSettings & format_settings) const override; + + void describeActions(JSONBuilder::JSONMap & map) const override; + + static Block applyPrewhereActions(Block block, const PrewhereInfoPtr & prewhere_info); protected: - std::vector filter_dags; + Names required_source_columns; + SelectQueryInfo query_info; + PrewhereInfoPtr prewhere_info; + StorageSnapshotPtr storage_snapshot; + ContextPtr context; + + ActionsDAGPtr filter_actions_dag; + +private: + /// Will be cleared after applyFilters() is called. ActionDAGNodes filter_nodes; + std::vector filter_dags; }; } diff --git a/src/Processors/SourceWithKeyCondition.h b/src/Processors/SourceWithKeyCondition.h index 3538adf1d75..ee155d6f78c 100644 --- a/src/Processors/SourceWithKeyCondition.h +++ b/src/Processors/SourceWithKeyCondition.h @@ -16,13 +16,8 @@ protected: /// Represents pushed down filters in source std::shared_ptr key_condition; - void setKeyConditionImpl(const ActionsDAG::NodeRawConstPtrs & nodes, ContextPtr context, const Block & keys) + void setKeyConditionImpl(const ActionsDAGPtr & filter_actions_dag, ContextPtr context, const Block & keys) { - std::unordered_map node_name_to_input_column; - for (const auto & column : keys.getColumnsWithTypeAndName()) - node_name_to_input_column.insert({column.name, column}); - - auto filter_actions_dag = ActionsDAG::buildFilterActionsDAG(nodes, node_name_to_input_column); key_condition = std::make_shared( filter_actions_dag, context, @@ -37,7 +32,7 @@ public: /// Set key_condition directly. It is used for filter push down in source. virtual void setKeyCondition(const std::shared_ptr & key_condition_) { key_condition = key_condition_; } - /// Set key_condition created by nodes and context. - virtual void setKeyCondition(const ActionsDAG::NodeRawConstPtrs & /*nodes*/, ContextPtr /*context*/) { } + /// Set key_condition created by filter_actions_dag and context. + virtual void setKeyCondition(const ActionsDAGPtr & /*filter_actions_dag*/, ContextPtr /*context*/) { } }; } diff --git a/src/Processors/Sources/MySQLSource.h b/src/Processors/Sources/MySQLSource.h index fc26ffa3645..4ae5af22dab 100644 --- a/src/Processors/Sources/MySQLSource.h +++ b/src/Processors/Sources/MySQLSource.h @@ -20,7 +20,7 @@ struct StreamSettings bool fetch_by_name; size_t default_num_tries_on_connection_loss; - StreamSettings(const Settings & settings, bool auto_close_ = false, bool fetch_by_name_ = false, size_t max_retry_ = 5); + explicit StreamSettings(const Settings & settings, bool auto_close_ = false, bool fetch_by_name_ = false, size_t max_retry_ = 5); }; diff --git a/src/Processors/Sources/RemoteSource.cpp b/src/Processors/Sources/RemoteSource.cpp index dfa311a7079..3d7dd3f76b8 100644 --- a/src/Processors/Sources/RemoteSource.cpp +++ b/src/Processors/Sources/RemoteSource.cpp @@ -1,4 +1,3 @@ -#include #include #include #include @@ -72,19 +71,36 @@ ISource::Status RemoteSource::prepare() if (is_async_state) return Status::Async; + if (executor_finished) + return Status::Finished; + Status status = ISource::prepare(); /// To avoid resetting the connection (because of "unfinished" query) in the /// RemoteQueryExecutor it should be finished explicitly. if (status == Status::Finished) { - query_executor->finish(); is_async_state = false; - return status; + need_drain = true; + return Status::Ready; } return status; } +void RemoteSource::work() +{ + /// Connection drain is a heavy operation that may take a long time. + /// Therefore we move connection drain from prepare() to work(), and drain multiple connections in parallel. + /// See issue: https://github.com/ClickHouse/ClickHouse/issues/60844 + if (need_drain) + { + query_executor->finish(); + executor_finished = true; + return; + } + ISource::work(); +} + std::optional RemoteSource::tryGenerate() { /// onCancel() will do the cancel if the query was sent. diff --git a/src/Processors/Sources/RemoteSource.h b/src/Processors/Sources/RemoteSource.h index dbfa0156331..052567bc261 100644 --- a/src/Processors/Sources/RemoteSource.h +++ b/src/Processors/Sources/RemoteSource.h @@ -4,7 +4,6 @@ #include #include #include -#include namespace DB { @@ -23,6 +22,7 @@ public: ~RemoteSource() override; Status prepare() override; + void work() override; String getName() const override { return "Remote"; } void setRowsBeforeLimitCounter(RowsBeforeLimitCounterPtr counter) override { rows_before_limit.swap(counter); } @@ -40,6 +40,8 @@ protected: private: bool was_query_sent = false; + bool need_drain = false; + bool executor_finished = false; bool add_aggregation_info = false; RemoteQueryExecutorPtr query_executor; RowsBeforeLimitCounterPtr rows_before_limit; diff --git a/src/Processors/Sources/WaitForAsyncInsertSource.h b/src/Processors/Sources/WaitForAsyncInsertSource.h index 1029c164941..78af6294202 100644 --- a/src/Processors/Sources/WaitForAsyncInsertSource.h +++ b/src/Processors/Sources/WaitForAsyncInsertSource.h @@ -33,7 +33,7 @@ protected: { auto status = insert_future.wait_for(std::chrono::milliseconds(timeout_ms)); if (status == std::future_status::deferred) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: got future in deferred state"); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Got future in deferred state"); if (status == std::future_status::timeout) throw Exception(ErrorCodes::TIMEOUT_EXCEEDED, "Wait for async insert timeout ({} ms) exceeded)", timeout_ms); diff --git a/src/Processors/Transforms/CreatingSetsTransform.cpp b/src/Processors/Transforms/CreatingSetsTransform.cpp index cc0b5926e66..eeb8f4a6060 100644 --- a/src/Processors/Transforms/CreatingSetsTransform.cpp +++ b/src/Processors/Transforms/CreatingSetsTransform.cpp @@ -163,7 +163,7 @@ void CreatingSetsTransform::startSubquery() done_with_table = !external_table; if ((done_with_set && !set_from_cache) && done_with_table) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: nothing to do with subquery"); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Nothing to do with subquery"); if (table_out.initialized()) { diff --git a/src/Processors/Transforms/FillingTransform.cpp b/src/Processors/Transforms/FillingTransform.cpp index aaa98e96803..c659a2578f3 100644 --- a/src/Processors/Transforms/FillingTransform.cpp +++ b/src/Processors/Transforms/FillingTransform.cpp @@ -53,13 +53,13 @@ Block FillingTransform::transformHeader(Block header, const SortDescription & so template static FillColumnDescription::StepFunction getStepFunction( - IntervalKind kind, Int64 step, const DateLUTImpl & date_lut, UInt16 scale = DataTypeDateTime64::default_scale) + IntervalKind::Kind kind, Int64 step, const DateLUTImpl & date_lut, UInt16 scale = DataTypeDateTime64::default_scale) { static const DateLUTImpl & utc_time_zone = DateLUT::instance("UTC"); switch (kind) // NOLINT(bugprone-switch-missing-default-case) { #define DECLARE_CASE(NAME) \ - case IntervalKind::NAME: \ + case IntervalKind::Kind::NAME: \ return [step, scale, &date_lut](Field & field) { \ field = Add##NAME##sImpl::execute(static_cast(\ field.get()), static_cast(step), date_lut, utc_time_zone, scale); }; @@ -161,7 +161,7 @@ static bool tryConvertFields(FillColumnDescription & descr, const DataTypePtr & switch (*descr.step_kind) // NOLINT(bugprone-switch-missing-default-case) { #define DECLARE_CASE(NAME) \ - case IntervalKind::NAME: \ + case IntervalKind::Kind::NAME: \ descr.step_func = [step, &time_zone = date_time64->getTimeZone()](Field & field) \ { \ auto field_decimal = field.get>(); \ diff --git a/src/Processors/Transforms/FilterTransform.cpp b/src/Processors/Transforms/FilterTransform.cpp index ea4dcff4808..b3be9246f43 100644 --- a/src/Processors/Transforms/FilterTransform.cpp +++ b/src/Processors/Transforms/FilterTransform.cpp @@ -265,6 +265,7 @@ void FilterTransform::doTransform(Chunk & chunk) { size_t num_rows_before_filtration = chunk.getNumRows(); auto columns = chunk.detachColumns(); + DataTypes types; auto select_final_indices_info = getSelectByFinalIndices(chunk); { @@ -275,6 +276,7 @@ void FilterTransform::doTransform(Chunk & chunk) expression->execute(block, num_rows_before_filtration); columns = block.getColumns(); + types = block.getDataTypes(); } if (constant_filter_description.always_true || on_totals) @@ -323,14 +325,21 @@ void FilterTransform::doTransform(Chunk & chunk) * or calculate number of set bytes in the filter. */ size_t first_non_constant_column = num_columns; + size_t min_size_in_memory = std::numeric_limits::max(); for (size_t i = 0; i < num_columns; ++i) { - if (i != filter_column_position && !isColumnConst(*columns[i])) + DataTypePtr type_not_null = removeNullableOrLowCardinalityNullable(types[i]); + if (i != filter_column_position && !isColumnConst(*columns[i]) && type_not_null->isValueRepresentedByNumber()) { - first_non_constant_column = i; - break; + size_t size_in_memory = type_not_null->getSizeOfValueInMemory() + (isNullableOrLowCardinalityNullable(types[i]) ? 1 : 0); + if (size_in_memory < min_size_in_memory) + { + min_size_in_memory = size_in_memory; + first_non_constant_column = i; + } } } + (void)min_size_in_memory; /// Suppress error of clang-analyzer-deadcode.DeadStores size_t num_filtered_rows = 0; if (first_non_constant_column != num_columns) diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp index 91bbf04f327..aa5a1c0cc1a 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.cpp +++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp @@ -66,7 +66,6 @@ struct ViewsData StorageID source_storage_id; StorageMetadataPtr source_metadata_snapshot; StoragePtr source_storage; - /// This value is actually only for logs. size_t max_threads = 1; /// In case of exception happened while inserting into main table, it is pushed to pipeline. @@ -188,6 +187,244 @@ private: std::exception_ptr any_exception; }; +/// Generates one chain part for every view in buildPushingToViewsChain +std::optional generateViewChain( + ContextPtr context, + const StorageID & view_id, + ThreadGroupPtr running_group, + Chain & result_chain, + ViewsDataPtr views_data, + ThreadStatusesHolderPtr thread_status_holder, + bool async_insert, + const Block & storage_header, + bool disable_deduplication_for_children) +{ + auto view = DatabaseCatalog::instance().tryGetTable(view_id, context); + if (view == nullptr) + { + LOG_WARNING( + getLogger("PushingToViews"), "Trying to access table {} but it doesn't exist", view_id.getFullTableName()); + return std::nullopt; + } + + auto view_metadata_snapshot = view->getInMemoryMetadataPtr(); + auto select_context = view_metadata_snapshot->getSQLSecurityOverriddenContext(context); + select_context->setQueryAccessInfo(context->getQueryAccessInfoPtr()); + + auto insert_context = Context::createCopy(select_context); + + const auto & insert_settings = insert_context->getSettingsRef(); + + // Do not deduplicate insertions into MV if the main insertion is Ok + if (disable_deduplication_for_children) + { + insert_context->setSetting("insert_deduplicate", Field{false}); + } + else if (insert_settings.update_insert_deduplication_token_in_dependent_materialized_views && + !insert_settings.insert_deduplication_token.value.empty()) + { + /** Update deduplication token passed to dependent MV with current view id. So it is possible to properly handle + * deduplication in complex INSERT flows. + * + * Example: + * + * landing -┬--> mv_1_1 ---> ds_1_1 ---> mv_2_1 --┬-> ds_2_1 ---> mv_3_1 ---> ds_3_1 + * | | + * └--> mv_1_2 ---> ds_1_2 ---> mv_2_2 --┘ + * + * Here we want to avoid deduplication for two different blocks generated from `mv_2_1` and `mv_2_2` that will + * be inserted into `ds_2_1`. + * + * We are forced to use view id instead of table id because there are some possible INSERT flows where no tables + * are involved. + * + * Example: + * + * landing -┬--> mv_1_1 --┬-> ds_1_1 + * | | + * └--> mv_1_2 --┘ + * + */ + auto insert_deduplication_token = insert_settings.insert_deduplication_token.value; + + if (view_id.hasUUID()) + insert_deduplication_token += "_" + toString(view_id.uuid); + else + insert_deduplication_token += "_" + view_id.getFullNameNotQuoted(); + + insert_context->setSetting("insert_deduplication_token", insert_deduplication_token); + } + + // Processing of blocks for MVs is done block by block, and there will + // be no parallel reading after (plus it is not a costless operation) + select_context->setSetting("parallelize_output_from_storages", Field{false}); + + // Separate min_insert_block_size_rows/min_insert_block_size_bytes for children + if (insert_settings.min_insert_block_size_rows_for_materialized_views) + insert_context->setSetting("min_insert_block_size_rows", insert_settings.min_insert_block_size_rows_for_materialized_views.value); + if (insert_settings.min_insert_block_size_bytes_for_materialized_views) + insert_context->setSetting("min_insert_block_size_bytes", insert_settings.min_insert_block_size_bytes_for_materialized_views.value); + + ASTPtr query; + Chain out; + + /// We are creating a ThreadStatus per view to store its metrics individually + /// Since calling ThreadStatus() changes current_thread we save it and restore it after the calls + /// Later on, before doing any task related to a view, we'll switch to its ThreadStatus, do the work, + /// and switch back to the original thread_status. + auto * original_thread = current_thread; + SCOPE_EXIT({ current_thread = original_thread; }); + current_thread = nullptr; + std::unique_ptr view_thread_status_ptr = std::make_unique(/*check_current_thread_on_destruction=*/ false); + /// Copy of a ThreadStatus should be internal. + view_thread_status_ptr->setInternalThread(); + view_thread_status_ptr->attachToGroup(running_group); + + auto * view_thread_status = view_thread_status_ptr.get(); + views_data->thread_status_holder->thread_statuses.push_front(std::move(view_thread_status_ptr)); + + auto runtime_stats = std::make_unique(); + runtime_stats->target_name = view_id.getFullTableName(); + runtime_stats->thread_status = view_thread_status; + runtime_stats->event_time = std::chrono::system_clock::now(); + runtime_stats->event_status = QueryViewsLogElement::ViewStatus::EXCEPTION_BEFORE_START; + + auto & type = runtime_stats->type; + auto & target_name = runtime_stats->target_name; + auto * view_counter_ms = &runtime_stats->elapsed_ms; + + if (auto * materialized_view = dynamic_cast(view.get())) + { + auto lock = materialized_view->tryLockForShare(context->getInitialQueryId(), context->getSettingsRef().lock_acquire_timeout); + + if (lock == nullptr) + { + // In case the materialized view is dropped/detached at this point, we register a warning and ignore it + assert(materialized_view->is_dropped || materialized_view->is_detached); + LOG_WARNING( + getLogger("PushingToViews"), "Trying to access table {} but it doesn't exist", view_id.getFullTableName()); + return std::nullopt; + } + + type = QueryViewsLogElement::ViewType::MATERIALIZED; + result_chain.addTableLock(lock); + + StoragePtr inner_table = materialized_view->tryGetTargetTable(); + /// If target table was dropped, ignore this materialized view. + if (!inner_table) + { + if (context->getSettingsRef().ignore_materialized_views_with_dropped_target_table) + return std::nullopt; + + throw Exception( + ErrorCodes::UNKNOWN_TABLE, + "Target table '{}' of view '{}' doesn't exists. To ignore this view use setting " + "ignore_materialized_views_with_dropped_target_table", + materialized_view->getTargetTableId().getFullTableName(), + view_id.getFullTableName()); + } + + auto inner_table_id = inner_table->getStorageID(); + auto inner_metadata_snapshot = inner_table->getInMemoryMetadataPtr(); + + const auto & select_query = view_metadata_snapshot->getSelectQuery(); + if (select_query.select_table_id != views_data->source_storage_id) + { + /// It may happen if materialize view query was changed and it doesn't depend on this source table anymore. + /// See setting `allow_experimental_alter_materialized_view_structure` + LOG_DEBUG( + getLogger("PushingToViews"), "Table '{}' is not a source for view '{}' anymore, current source is '{}'", + select_query.select_table_id.getFullTableName(), view_id.getFullTableName(), views_data->source_storage_id); + return std::nullopt; + } + + query = select_query.inner_query; + + target_name = inner_table_id.getFullTableName(); + + Block header; + + /// Get list of columns we get from select query. + if (select_context->getSettingsRef().allow_experimental_analyzer) + header = InterpreterSelectQueryAnalyzer::getSampleBlock(query, select_context); + else + header = InterpreterSelectQuery(query, select_context, SelectQueryOptions()).getSampleBlock(); + + /// Insert only columns returned by select. + Names insert_columns; + const auto & inner_table_columns = inner_metadata_snapshot->getColumns(); + for (const auto & column : header) + { + /// But skip columns which storage doesn't have. + if (inner_table_columns.hasNotAlias(column.name)) + insert_columns.emplace_back(column.name); + } + + InterpreterInsertQuery interpreter(nullptr, insert_context, false, false, false); + out = interpreter.buildChain(inner_table, inner_metadata_snapshot, insert_columns, thread_status_holder, view_counter_ms, !materialized_view->hasInnerTable()); + + if (interpreter.shouldAddSquashingFroStorage(inner_table)) + { + bool table_prefers_large_blocks = inner_table->prefersLargeBlocks(); + const auto & settings = insert_context->getSettingsRef(); + + out.addSource(std::make_shared( + out.getInputHeader(), + table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, + table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL)); + } + + auto counting = std::make_shared(out.getInputHeader(), current_thread, insert_context->getQuota()); + counting->setProcessListElement(insert_context->getProcessListElement()); + counting->setProgressCallback(insert_context->getProgressCallback()); + out.addSource(std::move(counting)); + + out.addStorageHolder(view); + out.addStorageHolder(inner_table); + } + else if (auto * live_view = dynamic_cast(view.get())) + { + runtime_stats->type = QueryViewsLogElement::ViewType::LIVE; + query = live_view->getInnerQuery(); + out = buildPushingToViewsChain( + view, view_metadata_snapshot, insert_context, ASTPtr(), + /* no_destination= */ true, + thread_status_holder, running_group, view_counter_ms, async_insert, storage_header); + } + else if (auto * window_view = dynamic_cast(view.get())) + { + runtime_stats->type = QueryViewsLogElement::ViewType::WINDOW; + query = window_view->getMergeableQuery(); + out = buildPushingToViewsChain( + view, view_metadata_snapshot, insert_context, ASTPtr(), + /* no_destination= */ true, + thread_status_holder, running_group, view_counter_ms, async_insert); + } + else + out = buildPushingToViewsChain( + view, view_metadata_snapshot, insert_context, ASTPtr(), + /* no_destination= */ false, + thread_status_holder, running_group, view_counter_ms, async_insert); + + views_data->views.emplace_back(ViewRuntimeData{ + std::move(query), + out.getInputHeader(), + view_id, + nullptr, + std::move(runtime_stats)}); + + if (type == QueryViewsLogElement::ViewType::MATERIALIZED) + { + auto executing_inner_query = std::make_shared( + storage_header, views_data->views.back(), views_data); + executing_inner_query->setRuntimeData(view_thread_status, view_counter_ms); + + out.addSource(std::move(executing_inner_query)); + } + + return out; +} + Chain buildPushingToViewsChain( const StoragePtr & storage, @@ -214,7 +451,7 @@ Chain buildPushingToViewsChain( /// If we don't write directly to the destination /// then expect that we're inserting with precalculated virtual columns - auto storage_header = no_destination ? metadata_snapshot->getSampleBlockWithVirtuals(storage->getVirtuals()) + auto storage_header = no_destination ? metadata_snapshot->getSampleBlockWithVirtuals(storage->getVirtualsList()) : metadata_snapshot->getSampleBlock(); /** TODO This is a very important line. At any insertion into the table one of chains should own lock. @@ -232,259 +469,45 @@ Chain buildPushingToViewsChain( auto table_id = storage->getStorageID(); auto views = DatabaseCatalog::instance().getDependentViews(table_id); - /// We need special context for materialized views insertions - ContextMutablePtr select_context; - ContextMutablePtr insert_context; ViewsDataPtr views_data; if (!views.empty()) { - select_context = Context::createCopy(context); - insert_context = Context::createCopy(context); - - const auto & insert_settings = insert_context->getSettingsRef(); - - // Do not deduplicate insertions into MV if the main insertion is Ok - if (disable_deduplication_for_children) - { - insert_context->setSetting("insert_deduplicate", Field{false}); - } - - // Processing of blocks for MVs is done block by block, and there will - // be no parallel reading after (plus it is not a costless operation) - select_context->setSetting("parallelize_output_from_storages", Field{false}); - - // Separate min_insert_block_size_rows/min_insert_block_size_bytes for children - if (insert_settings.min_insert_block_size_rows_for_materialized_views) - insert_context->setSetting("min_insert_block_size_rows", insert_settings.min_insert_block_size_rows_for_materialized_views.value); - if (insert_settings.min_insert_block_size_bytes_for_materialized_views) - insert_context->setSetting("min_insert_block_size_bytes", insert_settings.min_insert_block_size_bytes_for_materialized_views.value); - - views_data = std::make_shared(thread_status_holder, select_context, table_id, metadata_snapshot, storage); + auto process_context = Context::createCopy(context); /// This context will be used in `process` function + views_data = std::make_shared(thread_status_holder, process_context, table_id, metadata_snapshot, storage); } std::vector chains; for (const auto & view_id : views) { - auto view = DatabaseCatalog::instance().tryGetTable(view_id, context); - if (view == nullptr) + try { - LOG_WARNING( - getLogger("PushingToViews"), "Trying to access table {} but it doesn't exist", view_id.getFullTableName()); - continue; - } + auto out = generateViewChain( + context, view_id, running_group, result_chain, + views_data, thread_status_holder, async_insert, storage_header, disable_deduplication_for_children); - auto view_metadata_snapshot = view->getInMemoryMetadataPtr(); - - ASTPtr query; - Chain out; - - /// We are creating a ThreadStatus per view to store its metrics individually - /// Since calling ThreadStatus() changes current_thread we save it and restore it after the calls - /// Later on, before doing any task related to a view, we'll switch to its ThreadStatus, do the work, - /// and switch back to the original thread_status. - auto * original_thread = current_thread; - SCOPE_EXIT({ current_thread = original_thread; }); - current_thread = nullptr; - std::unique_ptr view_thread_status_ptr = std::make_unique(/*check_current_thread_on_destruction=*/ false); - /// Copy of a ThreadStatus should be internal. - view_thread_status_ptr->setInternalThread(); - view_thread_status_ptr->attachToGroup(running_group); - - auto * view_thread_status = view_thread_status_ptr.get(); - views_data->thread_status_holder->thread_statuses.push_front(std::move(view_thread_status_ptr)); - - auto runtime_stats = std::make_unique(); - runtime_stats->target_name = view_id.getFullTableName(); - runtime_stats->thread_status = view_thread_status; - runtime_stats->event_time = std::chrono::system_clock::now(); - runtime_stats->event_status = QueryViewsLogElement::ViewStatus::EXCEPTION_BEFORE_START; - - auto & type = runtime_stats->type; - auto & target_name = runtime_stats->target_name; - auto * view_counter_ms = &runtime_stats->elapsed_ms; - - const auto & insert_settings = insert_context->getSettingsRef(); - ContextMutablePtr view_insert_context = insert_context; - - if (!disable_deduplication_for_children && - insert_settings.update_insert_deduplication_token_in_dependent_materialized_views && - !insert_settings.insert_deduplication_token.value.empty()) - { - /** Update deduplication token passed to dependent MV with current view id. So it is possible to properly handle - * deduplication in complex INSERT flows. - * - * Example: - * - * landing -┬--> mv_1_1 ---> ds_1_1 ---> mv_2_1 --┬-> ds_2_1 ---> mv_3_1 ---> ds_3_1 - * | | - * └--> mv_1_2 ---> ds_1_2 ---> mv_2_2 --┘ - * - * Here we want to avoid deduplication for two different blocks generated from `mv_2_1` and `mv_2_2` that will - * be inserted into `ds_2_1`. - * - * We are forced to use view id instead of table id because there are some possible INSERT flows where no tables - * are involved. - * - * Example: - * - * landing -┬--> mv_1_1 --┬-> ds_1_1 - * | | - * └--> mv_1_2 --┘ - * - */ - auto insert_deduplication_token = insert_settings.insert_deduplication_token.value; - - if (view_id.hasUUID()) - insert_deduplication_token += "_" + toString(view_id.uuid); - else - insert_deduplication_token += "_" + view_id.getFullNameNotQuoted(); - - view_insert_context = Context::createCopy(insert_context); - view_insert_context->setSetting("insert_deduplication_token", insert_deduplication_token); - } - - if (auto * materialized_view = dynamic_cast(view.get())) - { - auto lock = materialized_view->tryLockForShare(context->getInitialQueryId(), context->getSettingsRef().lock_acquire_timeout); - - if (lock == nullptr) - { - // In case the materialized view is dropped/detached at this point, we register a warning and ignore it - assert(materialized_view->is_dropped || materialized_view->is_detached); - LOG_WARNING( - getLogger("PushingToViews"), "Trying to access table {} but it doesn't exist", view_id.getFullTableName()); + if (!out.has_value()) continue; - } - type = QueryViewsLogElement::ViewType::MATERIALIZED; - result_chain.addTableLock(lock); + chains.emplace_back(std::move(*out)); - StoragePtr inner_table = materialized_view->tryGetTargetTable(); - /// If target table was dropped, ignore this materialized view. - if (!inner_table) + /// Add the view to the query access info so it can appear in system.query_log + /// hasQueryContext - for materialized tables with background replication process query context is not added + if (!no_destination && context->hasQueryContext()) { - if (context->getSettingsRef().ignore_materialized_views_with_dropped_target_table) - continue; + context->getQueryContext()->addQueryAccessInfo( + backQuoteIfNeed(view_id.getDatabaseName()), + views_data->views.back().runtime_stats->target_name, + /*column_names=*/ {}); - throw Exception( - ErrorCodes::UNKNOWN_TABLE, - "Target table '{}' of view '{}' doesn't exists. To ignore this view use setting " - "ignore_materialized_views_with_dropped_target_table", - materialized_view->getTargetTableId().getFullTableName(), - view_id.getFullTableName()); + context->getQueryContext()->addViewAccessInfo(view_id.getFullTableName()); } - - auto inner_table_id = inner_table->getStorageID(); - auto inner_metadata_snapshot = inner_table->getInMemoryMetadataPtr(); - - const auto & select_query = view_metadata_snapshot->getSelectQuery(); - if (select_query.select_table_id != table_id) - { - /// It may happen if materialize view query was changed and it doesn't depend on this source table anymore. - /// See setting `allow_experimental_alter_materialized_view_structure` - LOG_DEBUG( - getLogger("PushingToViews"), "Table '{}' is not a source for view '{}' anymore, current source is '{}'", - select_query.select_table_id.getFullTableName(), view_id.getFullTableName(), table_id); - continue; - } - - query = select_query.inner_query; - - target_name = inner_table_id.getFullTableName(); - - Block header; - - /// Get list of columns we get from select query. - if (select_context->getSettingsRef().allow_experimental_analyzer) - header = InterpreterSelectQueryAnalyzer::getSampleBlock(query, select_context); - else - header = InterpreterSelectQuery(query, select_context, SelectQueryOptions()).getSampleBlock(); - - /// Insert only columns returned by select. - Names insert_columns; - const auto & inner_table_columns = inner_metadata_snapshot->getColumns(); - for (const auto & column : header) - { - /// But skip columns which storage doesn't have. - if (inner_table_columns.hasNotAlias(column.name)) - insert_columns.emplace_back(column.name); - } - - InterpreterInsertQuery interpreter(nullptr, view_insert_context, false, false, false); - out = interpreter.buildChain(inner_table, inner_metadata_snapshot, insert_columns, thread_status_holder, view_counter_ms); - - if (interpreter.shouldAddSquashingFroStorage(inner_table)) - { - bool table_prefers_large_blocks = inner_table->prefersLargeBlocks(); - const auto & settings = view_insert_context->getSettingsRef(); - - out.addSource(std::make_shared( - out.getInputHeader(), - table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, - table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL)); - } - - auto counting = std::make_shared(out.getInputHeader(), current_thread, view_insert_context->getQuota()); - counting->setProcessListElement(view_insert_context->getProcessListElement()); - counting->setProgressCallback(view_insert_context->getProgressCallback()); - out.addSource(std::move(counting)); - - out.addStorageHolder(view); - out.addStorageHolder(inner_table); } - else if (auto * live_view = dynamic_cast(view.get())) + catch (const Exception & e) { - runtime_stats->type = QueryViewsLogElement::ViewType::LIVE; - query = live_view->getInnerQuery(); // Used only to log in system.query_views_log - out = buildPushingToViewsChain( - view, view_metadata_snapshot, view_insert_context, ASTPtr(), - /* no_destination= */ true, - thread_status_holder, running_group, view_counter_ms, async_insert, storage_header); - } - else if (auto * window_view = dynamic_cast(view.get())) - { - runtime_stats->type = QueryViewsLogElement::ViewType::WINDOW; - query = window_view->getMergeableQuery(); // Used only to log in system.query_views_log - out = buildPushingToViewsChain( - view, view_metadata_snapshot, view_insert_context, ASTPtr(), - /* no_destination= */ true, - thread_status_holder, running_group, view_counter_ms, async_insert); - } - else - out = buildPushingToViewsChain( - view, view_metadata_snapshot, view_insert_context, ASTPtr(), - /* no_destination= */ false, - thread_status_holder, running_group, view_counter_ms, async_insert); - - views_data->views.emplace_back(ViewRuntimeData{ - std::move(query), - out.getInputHeader(), - view_id, - nullptr, - std::move(runtime_stats)}); - - if (type == QueryViewsLogElement::ViewType::MATERIALIZED) - { - auto executing_inner_query = std::make_shared( - storage_header, views_data->views.back(), views_data); - executing_inner_query->setRuntimeData(view_thread_status, view_counter_ms); - - out.addSource(std::move(executing_inner_query)); - } - - chains.emplace_back(std::move(out)); - - /// Add the view to the query access info so it can appear in system.query_log - /// hasQueryContext - for materialized tables with background replication process query context is not added - if (!no_destination && context->hasQueryContext()) - { - context->getQueryContext()->addQueryAccessInfo( - backQuoteIfNeed(view_id.getDatabaseName()), - views_data->views.back().runtime_stats->target_name, - /*column_names=*/ {}); - - context->getQueryContext()->addViewAccessInfo(view_id.getFullTableName()); + LOG_ERROR(&Poco::Logger::get("PushingToViews"), "Failed to push block to view {}, {}", view_id, e.message()); + if (!context->getSettingsRef().materialized_views_ignore_errors) + throw; } } @@ -548,7 +571,6 @@ Chain buildPushingToViewsChain( result_chain.addSource(std::move(sink)); } - /// TODO: add pushing to live view if (result_chain.empty()) result_chain.addSink(std::make_shared(storage_header)); @@ -575,18 +597,18 @@ static QueryPipeline process(Block block, ViewRuntimeData & view, const ViewsDat views_data.source_storage_id, views_data.source_metadata_snapshot->getColumns(), std::move(block), - views_data.source_storage->getVirtuals())); + *views_data.source_storage->getVirtualsPtr())); QueryPipelineBuilder pipeline; if (local_context->getSettingsRef().allow_experimental_analyzer) { - InterpreterSelectQueryAnalyzer interpreter(view.query, local_context, local_context->getViewSource(), SelectQueryOptions()); + InterpreterSelectQueryAnalyzer interpreter(view.query, local_context, local_context->getViewSource(), SelectQueryOptions().ignoreAccessCheck()); pipeline = interpreter.buildQueryPipeline(); } else { - InterpreterSelectQuery interpreter(view.query, local_context, SelectQueryOptions()); + InterpreterSelectQuery interpreter(view.query, local_context, SelectQueryOptions().ignoreAccessCheck()); pipeline = interpreter.buildQueryPipeline(); } diff --git a/src/Processors/Transforms/getSourceFromASTInsertQuery.cpp b/src/Processors/Transforms/getSourceFromASTInsertQuery.cpp index 6c7c7447070..8a13973b970 100644 --- a/src/Processors/Transforms/getSourceFromASTInsertQuery.cpp +++ b/src/Processors/Transforms/getSourceFromASTInsertQuery.cpp @@ -37,7 +37,7 @@ InputFormatPtr getInputFormatFromASTInsertQuery( const auto * ast_insert_query = ast->as(); if (!ast_insert_query) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: query requires data to insert, but it is not INSERT query"); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Query requires data to insert, but it is not INSERT query"); if (ast_insert_query->infile && context->getApplicationType() == Context::ApplicationType::SERVER) throw Exception(ErrorCodes::UNKNOWN_TYPE_OF_QUERY, "Query has infile and was send directly to server"); @@ -47,7 +47,7 @@ InputFormatPtr getInputFormatFromASTInsertQuery( if (input_function) throw Exception(ErrorCodes::INVALID_USAGE_OF_INPUT, "FORMAT must be specified for function input()"); else - throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: INSERT query requires format to be set"); + throw Exception(ErrorCodes::LOGICAL_ERROR, "INSERT query requires format to be set"); } /// Data could be in parsed (ast_insert_query.data) and in not parsed yet (input_buffer_tail_part) part of query. @@ -105,7 +105,7 @@ std::unique_ptr getReadBufferFromASTInsertQuery(const ASTPtr & ast) { const auto * insert_query = ast->as(); if (!insert_query) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: query requires data to insert, but it is not INSERT query"); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Query requires data to insert, but it is not INSERT query"); if (insert_query->infile) { diff --git a/src/QueryPipeline/ExecutionSpeedLimits.cpp b/src/QueryPipeline/ExecutionSpeedLimits.cpp index f8ae4c76d0f..05fd394db77 100644 --- a/src/QueryPipeline/ExecutionSpeedLimits.cpp +++ b/src/QueryPipeline/ExecutionSpeedLimits.cpp @@ -113,7 +113,7 @@ static bool handleOverflowMode(OverflowMode mode, int code, FormatStringHelper #include #include -#include #include #include #include @@ -54,7 +53,7 @@ private: struct Task : public AsyncTask { - Task(RemoteQueryExecutorReadContext & read_context_) : read_context(read_context_) {} + explicit Task(RemoteQueryExecutorReadContext & read_context_) : read_context(read_context_) {} RemoteQueryExecutorReadContext & read_context; diff --git a/src/Server/CloudPlacementInfo.cpp b/src/Server/CloudPlacementInfo.cpp new file mode 100644 index 00000000000..0790f825a45 --- /dev/null +++ b/src/Server/CloudPlacementInfo.cpp @@ -0,0 +1,83 @@ +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +namespace PlacementInfo +{ + +namespace +{ + std::string getConfigPath(std::string_view path) + { + return fmt::format("{}.{}", PLACEMENT_CONFIG_PREFIX, path); + } + + String loadAvailabilityZoneFromFile(const Poco::Util::AbstractConfiguration & config) + { + auto az_file = config.getString(getConfigPath("availability_zone_from_file"), DEFAULT_AZ_FILE_PATH); + + if (!std::filesystem::exists(az_file)) + return ""; + + String availability_zone_from_file; + + ReadBufferFromFile in(az_file); + readStringUntilEOF(availability_zone_from_file, in); + Poco::trimInPlace(availability_zone_from_file); + + return availability_zone_from_file; + } +} + + +PlacementInfo & PlacementInfo::instance() +{ + static PlacementInfo instance; + return instance; +} + +void PlacementInfo::initialize(const Poco::Util::AbstractConfiguration & config) +{ + use_imds = config.getBool(getConfigPath("use_imds"), false); + + if (use_imds) + { + availability_zone = S3::getRunningAvailabilityZone(); + } + else + { + availability_zone = config.getString(getConfigPath("availability_zone"), ""); + + if (availability_zone.empty()) + availability_zone = loadAvailabilityZoneFromFile(config); + + if (availability_zone.empty()) + LOG_WARNING(log, "Availability zone info not found"); + } + + LOG_DEBUG(log, "Loaded info: availability_zone: {}", availability_zone); + initialized = true; +} + +std::string PlacementInfo::getAvailabilityZone() const +{ + if (!initialized) + { + LOG_WARNING(log, "Placement info has not been loaded"); + return ""; + } + + return availability_zone; +} + +} +} diff --git a/src/Server/CloudPlacementInfo.h b/src/Server/CloudPlacementInfo.h new file mode 100644 index 00000000000..407f668142f --- /dev/null +++ b/src/Server/CloudPlacementInfo.h @@ -0,0 +1,39 @@ +#pragma once + +#include +#include +#include +#include + +namespace DB +{ + +namespace PlacementInfo +{ + +static constexpr auto PLACEMENT_CONFIG_PREFIX = "placement"; +static constexpr auto DEFAULT_AZ_FILE_PATH = "/run/instance-metadata/node-zone"; + +/// A singleton providing information on where in cloud server is running. +class PlacementInfo : private boost::noncopyable +{ +public: + static PlacementInfo & instance(); + + void initialize(const Poco::Util::AbstractConfiguration & config); + + std::string getAvailabilityZone() const; + +private: + PlacementInfo() = default; + + LoggerPtr log = getLogger("CloudPlacementInfo"); + + bool initialized; + + bool use_imds; + std::string availability_zone; +}; + +} +} diff --git a/src/Server/HTTP/HTTPServerResponse.cpp b/src/Server/HTTP/HTTPServerResponse.cpp index 3c2d54a67df..b6207f2d302 100644 --- a/src/Server/HTTP/HTTPServerResponse.cpp +++ b/src/Server/HTTP/HTTPServerResponse.cpp @@ -123,4 +123,20 @@ void HTTPServerResponse::requireAuthentication(const std::string & realm) set("WWW-Authenticate", auth); } +void HTTPServerResponse::redirect(const std::string & uri, HTTPStatus status) +{ + poco_assert(!stream); + + setContentLength(0); + setChunkedTransferEncoding(false); + + setStatusAndReason(status); + set("Location", uri); + + // Send header + Poco::Net::HTTPHeaderOutputStream hs(session); + write(hs); + hs.flush(); +} + } diff --git a/src/Server/HTTP/HTTPServerResponse.h b/src/Server/HTTP/HTTPServerResponse.h index 6efe48667eb..6c5be008bf8 100644 --- a/src/Server/HTTP/HTTPServerResponse.h +++ b/src/Server/HTTP/HTTPServerResponse.h @@ -231,6 +231,16 @@ public: /// Returns true if the response (header) has been sent. bool sent() const { return !!stream; } + /// Sets the status code, which must be one of + /// HTTP_MOVED_PERMANENTLY (301), HTTP_FOUND (302), + /// or HTTP_SEE_OTHER (303), + /// and sets the "Location" header field + /// to the given URI, which according to + /// the HTTP specification, must be absolute. + /// + /// Must not be called after send() has been called. + void redirect(const std::string & uri, HTTPStatus status = HTTP_FOUND); + Poco::Net::StreamSocket & getSocket() { return session.socket(); } void attachRequest(HTTPServerRequest * request_) { request = request_; } diff --git a/src/Server/HTTPHandler.cpp b/src/Server/HTTPHandler.cpp index 72e7c5552f8..c112eefec6c 100644 --- a/src/Server/HTTPHandler.cpp +++ b/src/Server/HTTPHandler.cpp @@ -125,7 +125,7 @@ namespace ErrorCodes namespace { -bool tryAddHttpOptionHeadersFromConfig(HTTPServerResponse & response, const Poco::Util::LayeredConfiguration & config) +bool tryAddHTTPOptionHeadersFromConfig(HTTPServerResponse & response, const Poco::Util::LayeredConfiguration & config) { if (config.has("http_options_response")) { @@ -153,7 +153,7 @@ bool tryAddHttpOptionHeadersFromConfig(HTTPServerResponse & response, const Poco void processOptionsRequest(HTTPServerResponse & response, const Poco::Util::LayeredConfiguration & config) { /// If can add some headers from config - if (tryAddHttpOptionHeadersFromConfig(response, config)) + if (tryAddHTTPOptionHeadersFromConfig(response, config)) { response.setKeepAlive(false); response.setStatusAndReason(HTTPResponse::HTTP_NO_CONTENT); @@ -496,7 +496,7 @@ bool HTTPHandler::authenticateUser( else if (request.getMethod() == HTTPServerRequest::HTTP_POST) http_method = ClientInfo::HTTPMethod::POST; - session->setHttpClientInfo(http_method, request.get("User-Agent", ""), request.get("Referer", "")); + session->setHTTPClientInfo(http_method, request.get("User-Agent", ""), request.get("Referer", "")); session->setForwardedFor(request.get("X-Forwarded-For", "")); session->setQuotaClientKey(quota_key); @@ -884,6 +884,11 @@ void HTTPHandler::processQuery( { if (settings.http_write_exception_in_output_format && output_format.supportsWritingException()) { + bool with_stacktrace = (params.getParsed("stacktrace", false) && server.config().getBool("enable_http_stacktrace", true)); + + ExecutionStatus status = ExecutionStatus::fromCurrentException("", with_stacktrace); + formatExceptionForClient(status.code, request, response, used_output); + output_format.setException(getCurrentExceptionMessage(false)); output_format.finalize(); used_output.exception_is_written = true; @@ -916,31 +921,7 @@ void HTTPHandler::trySendExceptionToClient( const std::string & s, int exception_code, HTTPServerRequest & request, HTTPServerResponse & response, Output & used_output) try { - if (used_output.out_holder) - used_output.out_holder->setExceptionCode(exception_code); - else - response.set("X-ClickHouse-Exception-Code", toString(exception_code)); - - /// FIXME: make sure that no one else is reading from the same stream at the moment. - - /// If HTTP method is POST and Keep-Alive is turned on, we should read the whole request body - /// to avoid reading part of the current request body in the next request. - if (request.getMethod() == Poco::Net::HTTPRequest::HTTP_POST - && response.getKeepAlive() - && exception_code != ErrorCodes::HTTP_LENGTH_REQUIRED - && !request.getStream().eof()) - { - request.getStream().ignoreAll(); - } - - if (exception_code == ErrorCodes::REQUIRED_PASSWORD) - { - response.requireAuthentication("ClickHouse server HTTP API"); - } - else - { - response.setStatusAndReason(exceptionCodeToHTTPStatus(exception_code)); - } + formatExceptionForClient(exception_code, request, response, used_output); if (!used_output.out_holder && !used_output.exception_is_written) { @@ -1001,6 +982,28 @@ catch (...) } } +void HTTPHandler::formatExceptionForClient(int exception_code, HTTPServerRequest & request, HTTPServerResponse & response, Output & used_output) +{ + if (used_output.out_holder) + used_output.out_holder->setExceptionCode(exception_code); + else + response.set("X-ClickHouse-Exception-Code", toString(exception_code)); + + /// FIXME: make sure that no one else is reading from the same stream at the moment. + + /// If HTTP method is POST and Keep-Alive is turned on, we should read the whole request body + /// to avoid reading part of the current request body in the next request. + if (request.getMethod() == Poco::Net::HTTPRequest::HTTP_POST && response.getKeepAlive() + && exception_code != ErrorCodes::HTTP_LENGTH_REQUIRED && !request.getStream().eof()) + { + request.getStream().ignoreAll(); + } + + if (exception_code == ErrorCodes::REQUIRED_PASSWORD) + response.requireAuthentication("ClickHouse server HTTP API"); + else + response.setStatusAndReason(exceptionCodeToHTTPStatus(exception_code)); +} void HTTPHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) { @@ -1065,7 +1068,7 @@ void HTTPHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse response.set("X-ClickHouse-Server-Display-Name", server_display_name); if (!request.get("Origin", "").empty()) - tryAddHttpOptionHeadersFromConfig(response, server.config()); + tryAddHTTPOptionHeadersFromConfig(response, server.config()); /// For keep-alive to work. if (request.getVersion() == HTTPServerRequest::HTTP_1_1) @@ -1295,9 +1298,7 @@ HTTPRequestHandlerFactoryPtr createDynamicHandlerFactory(IServer & server, }; auto factory = std::make_shared>(std::move(creator)); - factory->addFiltersFromConfig(config, config_prefix); - return factory; } diff --git a/src/Server/HTTPHandler.h b/src/Server/HTTPHandler.h index fa2d0dae199..0e30b466694 100644 --- a/src/Server/HTTPHandler.h +++ b/src/Server/HTTPHandler.h @@ -148,6 +148,12 @@ private: HTTPServerResponse & response, Output & used_output); + void formatExceptionForClient( + int exception_code, + HTTPServerRequest & request, + HTTPServerResponse & response, + Output & used_output); + static void pushDelayedResults(Output & used_output); }; diff --git a/src/Server/HTTPHandlerFactory.cpp b/src/Server/HTTPHandlerFactory.cpp index 66b55f68217..9a67e576345 100644 --- a/src/Server/HTTPHandlerFactory.cpp +++ b/src/Server/HTTPHandlerFactory.cpp @@ -1,3 +1,4 @@ +#include #include #include @@ -7,6 +8,7 @@ #include #include "HTTPHandler.h" +#include "Server/PrometheusMetricsWriter.h" #include "StaticRequestHandler.h" #include "ReplicasStatusHandler.h" #include "InterserverIOHTTPHandler.h" @@ -24,6 +26,42 @@ namespace ErrorCodes extern const int INVALID_CONFIG_PARAMETER; } +namespace +{ + +class RedirectRequestHandler : public HTTPRequestHandler +{ +private: + std::string url; + +public: + explicit RedirectRequestHandler(std::string url_) + : url(std::move(url_)) + { + } + + void handleRequest(HTTPServerRequest &, HTTPServerResponse & response, const ProfileEvents::Event &) override + { + response.redirect(url); + } +}; + +HTTPRequestHandlerFactoryPtr createRedirectHandlerFactory( + const Poco::Util::AbstractConfiguration & config, + const std::string & config_prefix) +{ + std::string url = config.getString(config_prefix + ".handler.location"); + + auto factory = std::make_shared>( + [my_url = std::move(url)]() { return std::make_unique(my_url); }); + + factory->addFiltersFromConfig(config, config_prefix); + return factory; +} + +} + + static void addCommonDefaultHandlersFactory(HTTPRequestHandlerFactoryMain & factory, IServer & server); static void addDefaultHandlersFactory( HTTPRequestHandlerFactoryMain & factory, @@ -31,6 +69,16 @@ static void addDefaultHandlersFactory( const Poco::Util::AbstractConfiguration & config, AsynchronousMetrics & async_metrics); +static auto createPingHandlerFactory(IServer & server) +{ + auto creator = [&server]() -> std::unique_ptr + { + constexpr auto ping_response_expression = "Ok.\n"; + return std::make_unique(server, ping_response_expression); + }; + return std::make_shared>(std::move(creator)); +} + static inline auto createHandlersFactoryFromConfig( IServer & server, const Poco::Util::AbstractConfiguration & config, @@ -58,15 +106,53 @@ static inline auto createHandlersFactoryFromConfig( "{}.{}.handler.type", prefix, key); if (handler_type == "static") + { main_handler_factory->addHandler(createStaticHandlerFactory(server, config, prefix + "." + key)); + } + else if (handler_type == "redirect") + { + main_handler_factory->addHandler(createRedirectHandlerFactory(config, prefix + "." + key)); + } else if (handler_type == "dynamic_query_handler") + { main_handler_factory->addHandler(createDynamicHandlerFactory(server, config, prefix + "." + key)); + } else if (handler_type == "predefined_query_handler") + { main_handler_factory->addHandler(createPredefinedHandlerFactory(server, config, prefix + "." + key)); + } else if (handler_type == "prometheus") + { main_handler_factory->addHandler(createPrometheusHandlerFactory(server, config, async_metrics, prefix + "." + key)); + } else if (handler_type == "replicas_status") + { main_handler_factory->addHandler(createReplicasStatusHandlerFactory(server, config, prefix + "." + key)); + } + else if (handler_type == "ping") + { + auto handler = createPingHandlerFactory(server); + handler->addFiltersFromConfig(config, prefix + "." + key); + main_handler_factory->addHandler(std::move(handler)); + } + else if (handler_type == "play") + { + auto handler = std::make_shared>(server); + handler->addFiltersFromConfig(config, prefix + "." + key); + main_handler_factory->addHandler(std::move(handler)); + } + else if (handler_type == "dashboard") + { + auto handler = std::make_shared>(server); + handler->addFiltersFromConfig(config, prefix + "." + key); + main_handler_factory->addHandler(std::move(handler)); + } + else if (handler_type == "binary") + { + auto handler = std::make_shared>(server); + handler->addFiltersFromConfig(config, prefix + "." + key); + main_handler_factory->addHandler(std::move(handler)); + } else throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Unknown handler type '{}' in config here: {}.{}.handler.type", handler_type, prefix, key); @@ -106,6 +192,7 @@ static inline HTTPRequestHandlerFactoryPtr createInterserverHTTPHandlerFactory(I return factory; } + HTTPRequestHandlerFactoryPtr createHandlerFactory(IServer & server, const Poco::Util::AbstractConfiguration & config, AsynchronousMetrics & async_metrics, const std::string & name) { if (name == "HTTPHandler-factory" || name == "HTTPSHandler-factory") @@ -113,9 +200,12 @@ HTTPRequestHandlerFactoryPtr createHandlerFactory(IServer & server, const Poco:: else if (name == "InterserverIOHTTPHandler-factory" || name == "InterserverIOHTTPSHandler-factory") return createInterserverHTTPHandlerFactory(server, name); else if (name == "PrometheusHandler-factory") - return createPrometheusMainHandlerFactory(server, config, async_metrics, name); + { + auto metrics_writer = std::make_shared(config, "prometheus", async_metrics); + return createPrometheusMainHandlerFactory(server, config, metrics_writer, name); + } - throw Exception(ErrorCodes::LOGICAL_ERROR, "LOGICAL ERROR: Unknown HTTP handler factory name."); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown HTTP handler factory name."); } @@ -131,12 +221,7 @@ void addCommonDefaultHandlersFactory(HTTPRequestHandlerFactoryMain & factory, IS root_handler->allowGetAndHeadRequest(); factory.addHandler(root_handler); - auto ping_creator = [&server]() -> std::unique_ptr - { - constexpr auto ping_response_expression = "Ok.\n"; - return std::make_unique(server, ping_response_expression); - }; - auto ping_handler = std::make_shared>(std::move(ping_creator)); + auto ping_handler = createPingHandlerFactory(server); ping_handler->attachStrictPath("/ping"); ping_handler->allowGetAndHeadRequest(); factory.addPathToHints("/ping"); @@ -148,25 +233,25 @@ void addCommonDefaultHandlersFactory(HTTPRequestHandlerFactoryMain & factory, IS factory.addPathToHints("/replicas_status"); factory.addHandler(replicas_status_handler); - auto play_handler = std::make_shared>(server); + auto play_handler = std::make_shared>(server); play_handler->attachNonStrictPath("/play"); play_handler->allowGetAndHeadRequest(); factory.addPathToHints("/play"); factory.addHandler(play_handler); - auto dashboard_handler = std::make_shared>(server); + auto dashboard_handler = std::make_shared>(server); dashboard_handler->attachNonStrictPath("/dashboard"); dashboard_handler->allowGetAndHeadRequest(); factory.addPathToHints("/dashboard"); factory.addHandler(dashboard_handler); - auto binary_handler = std::make_shared>(server); + auto binary_handler = std::make_shared>(server); binary_handler->attachNonStrictPath("/binary"); binary_handler->allowGetAndHeadRequest(); factory.addPathToHints("/binary"); factory.addHandler(binary_handler); - auto js_handler = std::make_shared>(server); + auto js_handler = std::make_shared>(server); js_handler->attachNonStrictPath("/js/"); js_handler->allowGetAndHeadRequest(); factory.addHandler(js_handler); @@ -208,7 +293,7 @@ void addDefaultHandlersFactory( /// Otherwise it will be created separately, see createHandlerFactory(...). if (config.has("prometheus") && config.getInt("prometheus.port", 0) == 0) { - PrometheusMetricsWriter writer(config, "prometheus", async_metrics); + auto writer = std::make_shared(config, "prometheus", async_metrics); auto creator = [&server, writer] () -> std::unique_ptr { return std::make_unique(server, writer); diff --git a/src/Server/HTTPHandlerFactory.h b/src/Server/HTTPHandlerFactory.h index 94b02e52277..ac18c36e6c9 100644 --- a/src/Server/HTTPHandlerFactory.h +++ b/src/Server/HTTPHandlerFactory.h @@ -6,6 +6,7 @@ #include #include #include +#include #include @@ -35,7 +36,6 @@ public: creator = [&server]() -> std::unique_ptr { return std::make_unique(server); }; } - void addFilter(Filter cur_filter) { Filter prev_filter = filter; @@ -56,6 +56,8 @@ public: continue; else if (filter_type == "url") addFilter(urlFilter(config, prefix + ".url")); + else if (filter_type == "empty_query_string") + addFilter(emptyQueryStringFilter()); else if (filter_type == "headers") addFilter(headersFilter(config, prefix + ".headers")); else if (filter_type == "methods") @@ -130,10 +132,10 @@ createPrometheusHandlerFactory(IServer & server, AsynchronousMetrics & async_metrics, const std::string & config_prefix); -HTTPRequestHandlerFactoryPtr -createPrometheusMainHandlerFactory(IServer & server, +HTTPRequestHandlerFactoryPtr createPrometheusMainHandlerFactory( + IServer & server, const Poco::Util::AbstractConfiguration & config, - AsynchronousMetrics & async_metrics, + PrometheusMetricsWriterPtr metrics_writer, const std::string & name); /// @param server - used in handlers to check IServer::isCancelled() diff --git a/src/Server/HTTPHandlerRequestFilter.h b/src/Server/HTTPHandlerRequestFilter.h index 1f5db283323..15e64cf7f48 100644 --- a/src/Server/HTTPHandlerRequestFilter.h +++ b/src/Server/HTTPHandlerRequestFilter.h @@ -37,7 +37,7 @@ static inline bool checkExpression(std::string_view match_str, const std::pair methods; Poco::StringTokenizer tokenizer(config.getString(config_path), ","); @@ -62,7 +62,7 @@ static inline auto getExpression(const std::string & expression) return std::make_pair(expression, compiled_regex); } -static inline auto urlFilter(const Poco::Util::AbstractConfiguration & config, const std::string & config_path) /// NOLINT +static inline auto urlFilter(const Poco::Util::AbstractConfiguration & config, const std::string & config_path) { return [expression = getExpression(config.getString(config_path))](const HTTPServerRequest & request) { @@ -73,7 +73,16 @@ static inline auto urlFilter(const Poco::Util::AbstractConfiguration & config, c }; } -static inline auto headersFilter(const Poco::Util::AbstractConfiguration & config, const std::string & prefix) /// NOLINT +static inline auto emptyQueryStringFilter() +{ + return [](const HTTPServerRequest & request) + { + const auto & uri = request.getURI(); + return std::string::npos == uri.find('?'); + }; +} + +static inline auto headersFilter(const Poco::Util::AbstractConfiguration & config, const std::string & prefix) { std::unordered_map> headers_expression; Poco::Util::AbstractConfiguration::Keys headers_name; diff --git a/src/Server/InterserverIOHTTPHandler.cpp b/src/Server/InterserverIOHTTPHandler.cpp index c41d68bab02..28045380cd7 100644 --- a/src/Server/InterserverIOHTTPHandler.cpp +++ b/src/Server/InterserverIOHTTPHandler.cpp @@ -91,24 +91,35 @@ void InterserverIOHTTPHandler::handleRequest(HTTPServerRequest & request, HTTPSe used_output.out = std::make_shared( response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD, keep_alive_timeout, write_event); + auto finalize_output = [&] + { + try + { + used_output.out->finalize(); + } + catch (...) + { + tryLogCurrentException(log, "Failed to finalize response write buffer"); + } + }; + auto write_response = [&](const std::string & message) { - auto & out = *used_output.out; if (response.sent()) { - out.finalize(); + finalize_output(); return; } try { - writeString(message, out); - out.finalize(); + writeString(message, *used_output.out); + finalize_output(); } catch (...) { tryLogCurrentException(log); - out.finalize(); + finalize_output(); } }; @@ -117,7 +128,7 @@ void InterserverIOHTTPHandler::handleRequest(HTTPServerRequest & request, HTTPSe if (auto [message, success] = checkAuthentication(request); success) { processQuery(request, response, used_output); - used_output.out->finalize(); + finalize_output(); LOG_DEBUG(log, "Done processing query"); } else diff --git a/src/Server/MySQLHandler.cpp b/src/Server/MySQLHandler.cpp index 72fe3b7cea9..9efcebfc72d 100644 --- a/src/Server/MySQLHandler.cpp +++ b/src/Server/MySQLHandler.cpp @@ -461,6 +461,12 @@ void MySQLHandler::comQuery(ReadBuffer & payload, bool binary_protocol) auto query_context = session->makeQueryContext(); query_context->setCurrentQueryId(fmt::format("mysql:{}:{}", connection_id, toString(UUIDHelpers::generateV4()))); + + /// --- Workaround for Bug 56173. Can be removed when the analyzer is on by default. + auto settings = query_context->getSettings(); + settings.prefer_column_name_to_alias = true; + query_context->setSettings(settings); + CurrentThread::QueryScope query_scope{query_context}; std::atomic affected_rows {0}; diff --git a/src/Server/NotFoundHandler.h b/src/Server/NotFoundHandler.h index a484d237771..9820c185a3d 100644 --- a/src/Server/NotFoundHandler.h +++ b/src/Server/NotFoundHandler.h @@ -9,7 +9,7 @@ namespace DB class NotFoundHandler : public HTTPRequestHandler { public: - NotFoundHandler(std::vector hints_) : hints(std::move(hints_)) {} + explicit NotFoundHandler(std::vector hints_) : hints(std::move(hints_)) {} void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) override; private: std::vector hints; diff --git a/src/Server/PrometheusMetricsWriter.cpp b/src/Server/PrometheusMetricsWriter.cpp index 3d09c2165e5..d0fdcd61493 100644 --- a/src/Server/PrometheusMetricsWriter.cpp +++ b/src/Server/PrometheusMetricsWriter.cpp @@ -4,6 +4,8 @@ #include #include +#include "config.h" + namespace { @@ -38,8 +40,83 @@ void convertHelpToSingleLine(std::string & help) std::replace(help.begin(), help.end(), '\n', ' '); } +constexpr auto profile_events_prefix = "ClickHouseProfileEvents_"; +constexpr auto current_metrics_prefix = "ClickHouseMetrics_"; +constexpr auto asynchronous_metrics_prefix = "ClickHouseAsyncMetrics_"; +constexpr auto error_metrics_prefix = "ClickHouseErrorMetric_"; + +void writeEvent(DB::WriteBuffer & wb, ProfileEvents::Event event) +{ + const auto counter = ProfileEvents::global_counters[event].load(std::memory_order_relaxed); + + std::string metric_name{ProfileEvents::getName(static_cast(event))}; + std::string metric_doc{ProfileEvents::getDocumentation(static_cast(event))}; + + convertHelpToSingleLine(metric_doc); + + if (!replaceInvalidChars(metric_name)) + return; + + std::string key{profile_events_prefix + metric_name}; + + writeOutLine(wb, "# HELP", key, metric_doc); + writeOutLine(wb, "# TYPE", key, "counter"); + writeOutLine(wb, key, counter); } +void writeMetric(DB::WriteBuffer & wb, size_t metric) +{ + const auto value = CurrentMetrics::values[metric].load(std::memory_order_relaxed); + + std::string metric_name{CurrentMetrics::getName(static_cast(metric))}; + std::string metric_doc{CurrentMetrics::getDocumentation(static_cast(metric))}; + + convertHelpToSingleLine(metric_doc); + + if (!replaceInvalidChars(metric_name)) + return; + + std::string key{current_metrics_prefix + metric_name}; + + writeOutLine(wb, "# HELP", key, metric_doc); + writeOutLine(wb, "# TYPE", key, "gauge"); + writeOutLine(wb, key, value); +} + +void writeAsyncMetrics(DB::WriteBuffer & wb, const DB::AsynchronousMetricValues & values) +{ + for (const auto & name_value : values) + { + std::string key{asynchronous_metrics_prefix + name_value.first}; + + if (!replaceInvalidChars(key)) + continue; + + auto value = name_value.second; + + std::string metric_doc{value.documentation}; + convertHelpToSingleLine(metric_doc); + + writeOutLine(wb, "# HELP", key, metric_doc); + writeOutLine(wb, "# TYPE", key, "gauge"); + writeOutLine(wb, key, value.value); + } +} + +} + +#if USE_NURAFT +namespace ProfileEvents +{ + extern const std::vector keeper_profile_events; +} + +namespace CurrentMetrics +{ + extern const std::vector keeper_metrics; +} +#endif + namespace DB { @@ -60,65 +137,17 @@ void PrometheusMetricsWriter::write(WriteBuffer & wb) const if (send_events) { for (ProfileEvents::Event i = ProfileEvents::Event(0), end = ProfileEvents::end(); i < end; ++i) - { - const auto counter = ProfileEvents::global_counters[i].load(std::memory_order_relaxed); - - std::string metric_name{ProfileEvents::getName(static_cast(i))}; - std::string metric_doc{ProfileEvents::getDocumentation(static_cast(i))}; - - convertHelpToSingleLine(metric_doc); - - if (!replaceInvalidChars(metric_name)) - continue; - std::string key{profile_events_prefix + metric_name}; - - writeOutLine(wb, "# HELP", key, metric_doc); - writeOutLine(wb, "# TYPE", key, "counter"); - writeOutLine(wb, key, counter); - } + writeEvent(wb, i); } if (send_metrics) { for (size_t i = 0, end = CurrentMetrics::end(); i < end; ++i) - { - const auto value = CurrentMetrics::values[i].load(std::memory_order_relaxed); - - std::string metric_name{CurrentMetrics::getName(static_cast(i))}; - std::string metric_doc{CurrentMetrics::getDocumentation(static_cast(i))}; - - convertHelpToSingleLine(metric_doc); - - if (!replaceInvalidChars(metric_name)) - continue; - std::string key{current_metrics_prefix + metric_name}; - - writeOutLine(wb, "# HELP", key, metric_doc); - writeOutLine(wb, "# TYPE", key, "gauge"); - writeOutLine(wb, key, value); - } + writeMetric(wb, i); } if (send_asynchronous_metrics) - { - auto async_metrics_values = async_metrics.getValues(); - for (const auto & name_value : async_metrics_values) - { - std::string key{asynchronous_metrics_prefix + name_value.first}; - - if (!replaceInvalidChars(key)) - continue; - - auto value = name_value.second; - - std::string metric_doc{value.documentation}; - convertHelpToSingleLine(metric_doc); - - writeOutLine(wb, "# HELP", key, metric_doc); - writeOutLine(wb, "# TYPE", key, "gauge"); - writeOutLine(wb, key, value.value); - } - } + writeAsyncMetrics(wb, async_metrics.getValues()); if (send_errors) { @@ -152,4 +181,24 @@ void PrometheusMetricsWriter::write(WriteBuffer & wb) const } +void KeeperPrometheusMetricsWriter::write([[maybe_unused]] WriteBuffer & wb) const +{ +#if USE_NURAFT + if (send_events) + { + for (auto event : ProfileEvents::keeper_profile_events) + writeEvent(wb, event); + } + + if (send_metrics) + { + for (auto metric : CurrentMetrics::keeper_metrics) + writeMetric(wb, metric); + } + + if (send_asynchronous_metrics) + writeAsyncMetrics(wb, async_metrics.getValues()); +#endif +} + } diff --git a/src/Server/PrometheusMetricsWriter.h b/src/Server/PrometheusMetricsWriter.h index b909a0ddcf6..933ad909ee0 100644 --- a/src/Server/PrometheusMetricsWriter.h +++ b/src/Server/PrometheusMetricsWriter.h @@ -3,6 +3,7 @@ #include #include +#include #include #include @@ -19,20 +20,25 @@ public: const Poco::Util::AbstractConfiguration & config, const std::string & config_name, const AsynchronousMetrics & async_metrics_); - void write(WriteBuffer & wb) const; + virtual void write(WriteBuffer & wb) const; -private: + virtual ~PrometheusMetricsWriter() = default; + +protected: const AsynchronousMetrics & async_metrics; - const bool send_events; const bool send_metrics; const bool send_asynchronous_metrics; const bool send_errors; - - static inline constexpr auto profile_events_prefix = "ClickHouseProfileEvents_"; - static inline constexpr auto current_metrics_prefix = "ClickHouseMetrics_"; - static inline constexpr auto asynchronous_metrics_prefix = "ClickHouseAsyncMetrics_"; - static inline constexpr auto error_metrics_prefix = "ClickHouseErrorMetric_"; }; +class KeeperPrometheusMetricsWriter : public PrometheusMetricsWriter +{ + using PrometheusMetricsWriter::PrometheusMetricsWriter; + + void write(WriteBuffer & wb) const override; +}; + +using PrometheusMetricsWriterPtr = std::shared_ptr; + } diff --git a/src/Server/PrometheusRequestHandler.cpp b/src/Server/PrometheusRequestHandler.cpp index 8690ec9121e..dff960f7031 100644 --- a/src/Server/PrometheusRequestHandler.cpp +++ b/src/Server/PrometheusRequestHandler.cpp @@ -7,6 +7,7 @@ #include #include #include +#include "Server/PrometheusMetricsWriter.h" #include @@ -34,7 +35,7 @@ void PrometheusRequestHandler::handleRequest(HTTPServerRequest & request, HTTPSe WriteBufferFromHTTPServerResponse wb(response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD, keep_alive_timeout, write_event); try { - metrics_writer.write(wb); + metrics_writer->write(wb); wb.finalize(); } catch (...) @@ -54,7 +55,7 @@ HTTPRequestHandlerFactoryPtr createPrometheusHandlerFactory( AsynchronousMetrics & async_metrics, const std::string & config_prefix) { - PrometheusMetricsWriter writer(config, config_prefix + ".handler", async_metrics); + auto writer = std::make_shared(config, config_prefix + ".handler", async_metrics); auto creator = [&server, writer]() -> std::unique_ptr { return std::make_unique(server, writer); @@ -66,13 +67,12 @@ HTTPRequestHandlerFactoryPtr createPrometheusHandlerFactory( } HTTPRequestHandlerFactoryPtr createPrometheusMainHandlerFactory( - IServer & server, const Poco::Util::AbstractConfiguration & config, AsynchronousMetrics & async_metrics, const std::string & name) + IServer & server, const Poco::Util::AbstractConfiguration & config, PrometheusMetricsWriterPtr metrics_writer, const std::string & name) { auto factory = std::make_shared(name); - PrometheusMetricsWriter writer(config, "prometheus", async_metrics); - auto creator = [&server, writer]() -> std::unique_ptr + auto creator = [&server, metrics_writer] { - return std::make_unique(server, writer); + return std::make_unique(server, metrics_writer); }; auto handler = std::make_shared>(std::move(creator)); diff --git a/src/Server/PrometheusRequestHandler.h b/src/Server/PrometheusRequestHandler.h index 9ec54cc2e4e..d120752c8c5 100644 --- a/src/Server/PrometheusRequestHandler.h +++ b/src/Server/PrometheusRequestHandler.h @@ -13,12 +13,12 @@ class PrometheusRequestHandler : public HTTPRequestHandler { private: IServer & server; - const PrometheusMetricsWriter & metrics_writer; + PrometheusMetricsWriterPtr metrics_writer; public: - PrometheusRequestHandler(IServer & server_, const PrometheusMetricsWriter & metrics_writer_) + PrometheusRequestHandler(IServer & server_, PrometheusMetricsWriterPtr metrics_writer_) : server(server_) - , metrics_writer(metrics_writer_) + , metrics_writer(std::move(metrics_writer_)) { } diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index 2c4e9c1e3b2..9637e5bf9ec 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -1,5 +1,4 @@ #include "Interpreters/AsynchronousInsertQueue.h" -#include "Interpreters/Context_fwd.h" #include "Interpreters/SquashingTransform.h" #include "Parsers/ASTInsertQuery.h" #include @@ -35,7 +34,6 @@ #include #include #include -#include #include #include #include @@ -935,16 +933,33 @@ void TCPHandler::processInsertQuery() if (insert_queue && async_insert_enabled && !insert_query.select) { + /// Let's agree on terminology and say that a mini-INSERT is an asynchronous INSERT + /// which typically contains not a lot of data inside and a big-INSERT in an INSERT + /// which was formed by concatenating several mini-INSERTs together. + /// In case when the client had to retry some mini-INSERTs then they will be properly deduplicated + /// by the source tables. This functionality is controlled by a setting `async_insert_deduplicate`. + /// But then they will be glued together into a block and pushed through a chain of Materialized Views if any. + /// The process of forming such blocks is not deteministic so each time we retry mini-INSERTs the resulting + /// block may be concatenated differently. + /// That's why deduplication in dependent Materialized Views doesn't make sense in presence of async INSERTs. + if (settings.throw_if_deduplication_in_dependent_materialized_views_enabled_with_async_insert && + settings.deduplicate_blocks_in_dependent_materialized_views) + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, + "Deduplication is dependent materialized view cannot work together with async inserts. "\ + "Please disable eiher `deduplicate_blocks_in_dependent_materialized_views` or `async_insert` setting."); + auto result = processAsyncInsertQuery(*insert_queue); if (result.status == AsynchronousInsertQueue::PushResult::OK) { + /// Reset pipeline because it may hold write lock for some storages. + state.io.pipeline.reset(); if (settings.wait_for_async_insert) { size_t timeout_ms = settings.wait_for_async_insert_timeout.totalMilliseconds(); auto wait_status = result.future.wait_for(std::chrono::milliseconds(timeout_ms)); if (wait_status == std::future_status::deferred) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: got future in deferred state"); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Got future in deferred state"); if (wait_status == std::future_status::timeout) throw Exception(ErrorCodes::TIMEOUT_EXCEEDED, "Wait for async insert timeout ({} ms) exceeded)", timeout_ms); @@ -970,7 +985,7 @@ void TCPHandler::processInsertQuery() else { PushingPipelineExecutor executor(state.io.pipeline); - run_executor(executor, processed_block); + run_executor(executor, std::move(processed_block)); } sendInsertProfileEvents(); diff --git a/src/Server/WebUIRequestHandler.cpp b/src/Server/WebUIRequestHandler.cpp index e45d2a55acb..68d3ff0b325 100644 --- a/src/Server/WebUIRequestHandler.cpp +++ b/src/Server/WebUIRequestHandler.cpp @@ -24,67 +24,70 @@ INCBIN(resource_binary_html, SOURCE_DIR "/programs/server/binary.html"); namespace DB { -WebUIRequestHandler::WebUIRequestHandler(IServer & server_) - : server(server_) -{ -} +PlayWebUIRequestHandler::PlayWebUIRequestHandler(IServer & server_) : server(server_) {} +DashboardWebUIRequestHandler::DashboardWebUIRequestHandler(IServer & server_) : server(server_) {} +BinaryWebUIRequestHandler::BinaryWebUIRequestHandler(IServer & server_) : server(server_) {} +JavaScriptWebUIRequestHandler::JavaScriptWebUIRequestHandler(IServer & server_) : server(server_) {} - -void WebUIRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & /*write_event*/) +static void handle(const IServer & server, HTTPServerRequest & request, HTTPServerResponse & response, std::string_view html) { auto keep_alive_timeout = server.context()->getServerSettings().keep_alive_timeout.totalSeconds(); response.setContentType("text/html; charset=UTF-8"); - if (request.getVersion() == HTTPServerRequest::HTTP_1_1) response.setChunkedTransferEncoding(true); setResponseDefaultHeaders(response, keep_alive_timeout); + response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_OK); + WriteBufferFromHTTPServerResponse(response, request.getMethod() == HTTPRequest::HTTP_HEAD, keep_alive_timeout).write(html.data(), html.size()); - if (request.getURI().starts_with("/play")) +} + +void PlayWebUIRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event &) +{ + handle(server, request, response, {reinterpret_cast(gresource_play_htmlData), gresource_play_htmlSize}); +} + +void DashboardWebUIRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event &) +{ + std::string html(reinterpret_cast(gresource_dashboard_htmlData), gresource_dashboard_htmlSize); + + /// Replace a link to external JavaScript file to embedded file. + /// This allows to open the HTML without running a server and to host it on server. + /// Note: we can embed the JavaScript file inline to the HTML, + /// but we don't do it to keep the "view-source" perfectly readable. + + static re2::RE2 uplot_url = R"(https://[^\s"'`]+u[Pp]lot[^\s"'`]*\.js)"; + RE2::Replace(&html, uplot_url, "/js/uplot.js"); + + static re2::RE2 lz_string_url = R"(https://[^\s"'`]+lz-string[^\s"'`]*\.js)"; + RE2::Replace(&html, lz_string_url, "/js/lz-string.js"); + + handle(server, request, response, html); +} + +void BinaryWebUIRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event &) +{ + handle(server, request, response, {reinterpret_cast(gresource_binary_htmlData), gresource_binary_htmlSize}); +} + +void JavaScriptWebUIRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event &) +{ + if (request.getURI() == "/js/uplot.js") { - response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_OK); - WriteBufferFromHTTPServerResponse(response, request.getMethod() == HTTPRequest::HTTP_HEAD, keep_alive_timeout).write(reinterpret_cast(gresource_play_htmlData), gresource_play_htmlSize); - } - else if (request.getURI().starts_with("/dashboard")) - { - response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_OK); - - std::string html(reinterpret_cast(gresource_dashboard_htmlData), gresource_dashboard_htmlSize); - - /// Replace a link to external JavaScript file to embedded file. - /// This allows to open the HTML without running a server and to host it on server. - /// Note: we can embed the JavaScript file inline to the HTML, - /// but we don't do it to keep the "view-source" perfectly readable. - - static re2::RE2 uplot_url = R"(https://[^\s"'`]+u[Pp]lot[^\s"'`]*\.js)"; - RE2::Replace(&html, uplot_url, "/js/uplot.js"); - - static re2::RE2 lz_string_url = R"(https://[^\s"'`]+lz-string[^\s"'`]*\.js)"; - RE2::Replace(&html, lz_string_url, "/js/lz-string.js"); - - WriteBufferFromHTTPServerResponse(response, request.getMethod() == HTTPRequest::HTTP_HEAD, keep_alive_timeout).write(html); - } - else if (request.getURI().starts_with("/binary")) - { - response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_OK); - WriteBufferFromHTTPServerResponse(response, request.getMethod() == HTTPRequest::HTTP_HEAD, keep_alive_timeout).write(reinterpret_cast(gresource_binary_htmlData), gresource_binary_htmlSize); - } - else if (request.getURI() == "/js/uplot.js") - { - response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_OK); - WriteBufferFromHTTPServerResponse(response, request.getMethod() == HTTPRequest::HTTP_HEAD, keep_alive_timeout).write(reinterpret_cast(gresource_uplot_jsData), gresource_uplot_jsSize); + handle(server, request, response, {reinterpret_cast(gresource_uplot_jsData), gresource_uplot_jsSize}); } else if (request.getURI() == "/js/lz-string.js") { - response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_OK); - WriteBufferFromHTTPServerResponse(response, request.getMethod() == HTTPRequest::HTTP_HEAD, keep_alive_timeout).write(reinterpret_cast(gresource_lz_string_jsData), gresource_lz_string_jsSize); + handle(server, request, response, {reinterpret_cast(gresource_lz_string_jsData), gresource_lz_string_jsSize}); } else { response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_NOT_FOUND); *response.send() << "Not found.\n"; } + + handle(server, request, response, {reinterpret_cast(gresource_binary_htmlData), gresource_binary_htmlSize}); } } diff --git a/src/Server/WebUIRequestHandler.h b/src/Server/WebUIRequestHandler.h index c52946e2089..b84c8f6534d 100644 --- a/src/Server/WebUIRequestHandler.h +++ b/src/Server/WebUIRequestHandler.h @@ -9,13 +9,40 @@ namespace DB class IServer; /// Response with HTML page that allows to send queries and show results in browser. -class WebUIRequestHandler : public HTTPRequestHandler + +class PlayWebUIRequestHandler : public HTTPRequestHandler { private: IServer & server; - public: - WebUIRequestHandler(IServer & server_); + explicit PlayWebUIRequestHandler(IServer & server_); + void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) override; +}; + +class DashboardWebUIRequestHandler : public HTTPRequestHandler +{ +private: + IServer & server; +public: + explicit DashboardWebUIRequestHandler(IServer & server_); + void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) override; +}; + +class BinaryWebUIRequestHandler : public HTTPRequestHandler +{ +private: + IServer & server; +public: + explicit BinaryWebUIRequestHandler(IServer & server_); + void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) override; +}; + +class JavaScriptWebUIRequestHandler : public HTTPRequestHandler +{ +private: + IServer & server; +public: + explicit JavaScriptWebUIRequestHandler(IServer & server_); void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) override; }; diff --git a/src/Server/waitServersToFinish.h b/src/Server/waitServersToFinish.h index b6daa025964..2eb3092fc0c 100644 --- a/src/Server/waitServersToFinish.h +++ b/src/Server/waitServersToFinish.h @@ -1,4 +1,7 @@ #pragma once + +#include + #include namespace DB diff --git a/src/Storages/AlterCommands.cpp b/src/Storages/AlterCommands.cpp index 766863ed9f9..eae5e1a8a47 100644 --- a/src/Storages/AlterCommands.cpp +++ b/src/Storages/AlterCommands.cpp @@ -31,8 +31,6 @@ #include #include #include -#include -#include #include #include #include @@ -442,6 +440,14 @@ std::optional AlterCommand::parse(const ASTAlterCommand * command_ command.if_exists = command_ast->if_exists; return command; } + else if (command_ast->type == ASTAlterCommand::MODIFY_SQL_SECURITY) + { + AlterCommand command; + command.ast = command_ast->clone(); + command.type = AlterCommand::MODIFY_SQL_SECURITY; + command.sql_security = command_ast->sql_security->clone(); + return command; + } else return {}; } @@ -854,6 +860,8 @@ void AlterCommand::apply(StorageInMemoryMetadata & metadata, ContextPtr context) for (auto & index : metadata.secondary_indices) rename_visitor.visit(index.definition_ast); } + else if (type == MODIFY_SQL_SECURITY) + metadata.setSQLSecurity(sql_security->as()); else throw Exception(ErrorCodes::LOGICAL_ERROR, "Wrong parameter type in ALTER query"); } @@ -955,8 +963,7 @@ bool AlterCommand::isRequireMutationStage(const StorageInMemoryMetadata & metada /// Drop alias is metadata alter, in other case mutation is required. if (type == DROP_COLUMN) - return metadata.columns.hasColumnOrNested(GetColumnsOptions::AllPhysical, column_name) || - column_name == LightweightDeleteDescription::FILTER_COLUMN.name || column_name == BlockNumberColumn::name; + return metadata.columns.hasColumnOrNested(GetColumnsOptions::AllPhysical, column_name); if (type != MODIFY_COLUMN || data_type == nullptr) return false; @@ -1246,7 +1253,9 @@ void AlterCommands::prepare(const StorageInMemoryMetadata & metadata) void AlterCommands::validate(const StoragePtr & table, ContextPtr context) const { - const StorageInMemoryMetadata & metadata = table->getInMemoryMetadata(); + const auto & metadata = table->getInMemoryMetadata(); + auto virtuals = table->getVirtualsPtr(); + auto all_columns = metadata.columns; /// Default expression for all added/modified columns ASTPtr default_expr_list = std::make_shared(); @@ -1282,16 +1291,20 @@ void AlterCommands::validate(const StoragePtr & table, ContextPtr context) const if (command.data_type->hasDynamicSubcolumns()) throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Adding a new column of a type which has dynamic subcolumns to an existing table is not allowed. It has known bugs"); - if (column_name == LightweightDeleteDescription::FILTER_COLUMN.name && std::dynamic_pointer_cast(table)) - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot add column {}: " - "this column name is reserved for lightweight delete feature", backQuote(column_name)); - - if (column_name == BlockNumberColumn::name && std::dynamic_pointer_cast(table)) - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot add column {}: " - "this column name is reserved for _block_number persisting feature", backQuote(column_name)); + if (virtuals->tryGet(column_name, VirtualsKind::Persistent)) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, + "Cannot add column {}: this column name is reserved for persistent virtual column", backQuote(column_name)); if (command.codec) - CompressionCodecFactory::instance().validateCodecAndGetPreprocessedAST(command.codec, command.data_type, !context->getSettingsRef().allow_suspicious_codecs, context->getSettingsRef().allow_experimental_codecs, context->getSettingsRef().enable_deflate_qpl_codec, context->getSettingsRef().enable_zstd_qat_codec); + { + const auto & settings = context->getSettingsRef(); + CompressionCodecFactory::instance().validateCodecAndGetPreprocessedAST( + command.codec, command.data_type, + !settings.allow_suspicious_codecs, + settings.allow_experimental_codecs, + settings.enable_deflate_qpl_codec, + settings.enable_zstd_qat_codec); + } all_columns.add(ColumnDescription(column_name, command.data_type)); } @@ -1405,9 +1418,7 @@ void AlterCommands::validate(const StoragePtr & table, ContextPtr context) const } else if (command.type == AlterCommand::DROP_COLUMN) { - if (all_columns.has(command.column_name) || - all_columns.hasNested(command.column_name) || - (command.clear && column_name == LightweightDeleteDescription::FILTER_COLUMN.name)) + if (all_columns.has(command.column_name) || all_columns.hasNested(command.column_name)) { if (!command.clear) /// CLEAR column is Ok even if there are dependencies. { @@ -1491,16 +1502,12 @@ void AlterCommands::validate(const StoragePtr & table, ContextPtr context) const } if (all_columns.has(command.rename_to)) - throw Exception(ErrorCodes::DUPLICATE_COLUMN, "Cannot rename to {}: " - "column with this name already exists", backQuote(command.rename_to)); + throw Exception(ErrorCodes::DUPLICATE_COLUMN, + "Cannot rename to {}: column with this name already exists", backQuote(command.rename_to)); - if (command.rename_to == LightweightDeleteDescription::FILTER_COLUMN.name && std::dynamic_pointer_cast(table)) - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot rename to {}: " - "this column name is reserved for lightweight delete feature", backQuote(command.rename_to)); - - if (command.rename_to == BlockNumberColumn::name && std::dynamic_pointer_cast(table)) - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot rename to {}: " - "this column name is reserved for _block_number persisting feature", backQuote(command.rename_to)); + if (virtuals->tryGet(command.rename_to, VirtualsKind::Persistent)) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, + "Cannot rename to {}: this column name is reserved for persistent virtual column", backQuote(command.rename_to)); if (modified_columns.contains(column_name)) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot rename and modify the same column {} " diff --git a/src/Storages/AlterCommands.h b/src/Storages/AlterCommands.h index d0d5d02b5f7..b1b6c8308f9 100644 --- a/src/Storages/AlterCommands.h +++ b/src/Storages/AlterCommands.h @@ -50,6 +50,7 @@ struct AlterCommand MODIFY_DATABASE_SETTING, COMMENT_TABLE, REMOVE_SAMPLE_BY, + MODIFY_SQL_SECURITY, }; /// Which property user wants to remove from column @@ -147,6 +148,9 @@ struct AlterCommand /// For MODIFY_QUERY ASTPtr select = nullptr; + /// For MODIFY_SQL_SECURITY + ASTPtr sql_security = nullptr; + /// For MODIFY_REFRESH ASTPtr refresh = nullptr; diff --git a/src/Storages/BlockNumberColumn.cpp b/src/Storages/BlockNumberColumn.cpp deleted file mode 100644 index 8c9e1fd902a..00000000000 --- a/src/Storages/BlockNumberColumn.cpp +++ /dev/null @@ -1,23 +0,0 @@ -#include -#include - -namespace DB -{ - -CompressionCodecPtr getCompressionCodecDelta(UInt8 delta_bytes_size); - -CompressionCodecPtr getCompressionCodecForBlockNumberColumn() -{ - std::vector codecs; - codecs.reserve(2); - auto data_bytes_size = BlockNumberColumn::type->getSizeOfValueInMemory(); - codecs.emplace_back(getCompressionCodecDelta(data_bytes_size)); - codecs.emplace_back(CompressionCodecFactory::instance().get("LZ4", {})); - return std::make_shared(codecs); -} - -const String BlockNumberColumn::name = "_block_number"; -const DataTypePtr BlockNumberColumn::type = std::make_shared(); -const CompressionCodecPtr BlockNumberColumn::compression_codec = getCompressionCodecForBlockNumberColumn(); - -} diff --git a/src/Storages/BlockNumberColumn.h b/src/Storages/BlockNumberColumn.h deleted file mode 100644 index fffa68bfd49..00000000000 --- a/src/Storages/BlockNumberColumn.h +++ /dev/null @@ -1,16 +0,0 @@ -#pragma once -#include -#include -#include - -namespace DB -{ - -struct BlockNumberColumn -{ - static const String name; - static const DataTypePtr type; - static const CompressionCodecPtr compression_codec; -}; - -} diff --git a/src/Storages/Cache/SchemaCache.h b/src/Storages/Cache/SchemaCache.h index 1bfc18bddab..bb6c91fc9f0 100644 --- a/src/Storages/Cache/SchemaCache.h +++ b/src/Storages/Cache/SchemaCache.h @@ -22,7 +22,7 @@ const size_t DEFAULT_SCHEMA_CACHE_ELEMENTS = 4096; class SchemaCache { public: - SchemaCache(size_t max_elements_); + explicit SchemaCache(size_t max_elements_); struct Key { diff --git a/src/Storages/ColumnsDescription.cpp b/src/Storages/ColumnsDescription.cpp index d6a241da032..e08dac3a332 100644 --- a/src/Storages/ColumnsDescription.cpp +++ b/src/Storages/ColumnsDescription.cpp @@ -31,15 +31,11 @@ #include #include #include -#include namespace DB { -CompressionCodecPtr getCompressionCodecDelta(UInt8 delta_bytes_size); - - namespace ErrorCodes { extern const int NO_SUCH_COLUMN_IN_TABLE; @@ -482,6 +478,10 @@ NamesAndTypesList ColumnsDescription::get(const GetColumnsOptions & options) con NamesAndTypesList res; switch (options.kind) { + case GetColumnsOptions::None: + { + break; + } case GetColumnsOptions::All: { res = getAll(); @@ -559,6 +559,12 @@ const ColumnDescription & ColumnsDescription::get(const String & column_name) co return *it; } +const ColumnDescription * ColumnsDescription::tryGet(const String & column_name) const +{ + auto it = columns.get<1>().find(column_name); + return it == columns.get<1>().end() ? nullptr : &(*it); +} + static GetColumnsOptions::Kind defaultKindToGetKind(ColumnDefaultKind kind) { switch (kind) @@ -572,7 +578,8 @@ static GetColumnsOptions::Kind defaultKindToGetKind(ColumnDefaultKind kind) case ColumnDefaultKind::Ephemeral: return GetColumnsOptions::Ephemeral; } - UNREACHABLE(); + + return GetColumnsOptions::None; } NamesAndTypesList ColumnsDescription::getByNames(const GetColumnsOptions & options, const Names & names) const @@ -784,33 +791,6 @@ bool ColumnsDescription::hasCompressionCodec(const String & column_name) const return it != columns.get<1>().end() && it->codec != nullptr; } -CompressionCodecPtr ColumnsDescription::getCodecOrDefault(const String & column_name, CompressionCodecPtr default_codec) const -{ - const auto it = columns.get<1>().find(column_name); - - if (it == columns.get<1>().end() || !it->codec) - return default_codec; - - return CompressionCodecFactory::instance().get(it->codec, it->type, default_codec); -} - -CompressionCodecPtr ColumnsDescription::getCodecOrDefault(const String & column_name) const -{ - assert (column_name != BlockNumberColumn::name); - return getCodecOrDefault(column_name, CompressionCodecFactory::instance().getDefaultCodec()); -} - -ASTPtr ColumnsDescription::getCodecDescOrDefault(const String & column_name, CompressionCodecPtr default_codec) const -{ - assert (column_name != BlockNumberColumn::name); - const auto it = columns.get<1>().find(column_name); - - if (it == columns.get<1>().end() || !it->codec) - return default_codec->getFullCodecDesc(); - - return it->codec; -} - ColumnsDescription::ColumnTTLs ColumnsDescription::getColumnTTLs() const { ColumnTTLs ret; diff --git a/src/Storages/ColumnsDescription.h b/src/Storages/ColumnsDescription.h index 59179aac17a..82e55e29073 100644 --- a/src/Storages/ColumnsDescription.h +++ b/src/Storages/ColumnsDescription.h @@ -29,10 +29,19 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } +enum class VirtualsKind : UInt8 +{ + None = 0, + Ephemeral = 1, + Persistent = 2, + All = Ephemeral | Persistent, +}; + struct GetColumnsOptions { enum Kind : UInt8 { + None = 0, Ordinary = 1, Materialized = 2, Aliases = 4, @@ -43,7 +52,7 @@ struct GetColumnsOptions All = AllPhysical | Aliases | Ephemeral, }; - GetColumnsOptions(Kind kind_) : kind(kind_) {} + GetColumnsOptions(Kind kind_) : kind(kind_) {} /// NOLINT(google-explicit-constructor) GetColumnsOptions & withSubcolumns(bool value = true) { @@ -51,9 +60,9 @@ struct GetColumnsOptions return *this; } - GetColumnsOptions & withVirtuals(bool value = true) + GetColumnsOptions & withVirtuals(VirtualsKind value = VirtualsKind::All) { - with_virtuals = value; + virtuals_kind = value; return *this; } @@ -63,17 +72,11 @@ struct GetColumnsOptions return *this; } - GetColumnsOptions & withSystemColumns(bool value = true) - { - with_system_columns = value; - return *this; - } - Kind kind; + VirtualsKind virtuals_kind = VirtualsKind::None; + bool with_subcolumns = false; - bool with_virtuals = false; bool with_extended_objects = false; - bool with_system_columns = false; }; /// Description of a single table column (in CREATE TABLE for example). @@ -113,7 +116,7 @@ public: explicit ColumnsDescription(NamesAndTypesList ordinary); - explicit ColumnsDescription(std::initializer_list ordinary); + ColumnsDescription(std::initializer_list ordinary); explicit ColumnsDescription(NamesAndTypesList ordinary, NamesAndAliases aliases); @@ -160,6 +163,7 @@ public: bool hasNested(const String & column_name) const; bool hasSubcolumn(const String & column_name) const; const ColumnDescription & get(const String & column_name) const; + const ColumnDescription * tryGet(const String & column_name) const; template void modify(const String & column_name, F && f) @@ -213,9 +217,6 @@ public: /// Does column has non default specified compression codec bool hasCompressionCodec(const String & column_name) const; - CompressionCodecPtr getCodecOrDefault(const String & column_name, CompressionCodecPtr default_codec) const; - CompressionCodecPtr getCodecOrDefault(const String & column_name) const; - ASTPtr getCodecDescOrDefault(const String & column_name, CompressionCodecPtr default_codec) const; String toString() const; static ColumnsDescription parse(const String & str); @@ -269,4 +270,5 @@ private: /// don't have strange constructions in default expression like SELECT query or /// arrayJoin function. Block validateColumnsDefaultsAndGetSampleBlock(ASTPtr default_expr_list, const NamesAndTypesList & all_columns, ContextPtr context); + } diff --git a/src/Storages/DataLakes/IStorageDataLake.h b/src/Storages/DataLakes/IStorageDataLake.h index db3f835494f..711abbde38c 100644 --- a/src/Storages/DataLakes/IStorageDataLake.h +++ b/src/Storages/DataLakes/IStorageDataLake.h @@ -6,6 +6,7 @@ #include #include +#include #include #include #include @@ -22,15 +23,15 @@ public: using Configuration = typename Storage::Configuration; template - explicit IStorageDataLake(const Configuration & configuration_, ContextPtr context_, bool attach, Args && ...args) - : Storage(getConfigurationForDataRead(configuration_, context_, {}, attach), context_, std::forward(args)...) + explicit IStorageDataLake(const Configuration & configuration_, ContextPtr context_, LoadingStrictnessLevel mode, Args && ...args) + : Storage(getConfigurationForDataRead(configuration_, context_, {}, mode), context_, std::forward(args)...) , base_configuration(configuration_) , log(getLogger(getName())) {} // NOLINT(clang-analyzer-optin.cplusplus.VirtualCall) template - static StoragePtr create(const Configuration & configuration_, ContextPtr context_, bool attach, Args && ...args) + static StoragePtr create(const Configuration & configuration_, ContextPtr context_, LoadingStrictnessLevel mode, Args && ...args) { - return std::make_shared>(configuration_, context_, attach, std::forward(args)...); + return std::make_shared>(configuration_, context_, mode, std::forward(args)...); } String getName() const override { return name; } @@ -38,25 +39,25 @@ public: static ColumnsDescription getTableStructureFromData( Configuration & base_configuration, const std::optional & format_settings, - ContextPtr local_context) + const ContextPtr & local_context) { auto configuration = getConfigurationForDataRead(base_configuration, local_context); return Storage::getTableStructureFromData(configuration, format_settings, local_context); } - static Configuration getConfiguration(ASTs & engine_args, ContextPtr local_context) + static Configuration getConfiguration(ASTs & engine_args, const ContextPtr & local_context) { return Storage::getConfiguration(engine_args, local_context, /* get_format_from_file */false); } - Configuration updateConfigurationAndGetCopy(ContextPtr local_context) override + Configuration updateConfigurationAndGetCopy(const ContextPtr & local_context) override { std::lock_guard lock(configuration_update_mutex); updateConfigurationImpl(local_context); return Storage::getConfiguration(); } - void updateConfiguration(ContextPtr local_context) override + void updateConfiguration(const ContextPtr & local_context) override { std::lock_guard lock(configuration_update_mutex); updateConfigurationImpl(local_context); @@ -64,7 +65,8 @@ public: private: static Configuration getConfigurationForDataRead( - const Configuration & base_configuration, ContextPtr local_context, const Strings & keys = {}, bool attach = false) + const Configuration & base_configuration, const ContextPtr & local_context, const Strings & keys = {}, + LoadingStrictnessLevel mode = LoadingStrictnessLevel::CREATE) { auto configuration{base_configuration}; configuration.update(local_context); @@ -87,19 +89,19 @@ private: } catch (...) { - if (!attach) + if (mode <= LoadingStrictnessLevel::CREATE) throw; tryLogCurrentException(__PRETTY_FUNCTION__); return configuration; } } - static Strings getDataFiles(const Configuration & configuration, ContextPtr local_context) + static Strings getDataFiles(const Configuration & configuration, const ContextPtr & local_context) { return MetadataParser().getFiles(configuration, local_context); } - void updateConfigurationImpl(ContextPtr local_context) + void updateConfigurationImpl(const ContextPtr & local_context) { const bool updated = base_configuration.update(local_context); auto new_keys = getDataFiles(base_configuration, local_context); @@ -125,7 +127,7 @@ static StoragePtr createDataLakeStorage(const StorageFactory::Arguments & args) if (configuration.format == "auto") configuration.format = "Parquet"; - return DataLake::create(configuration, args.getContext(), args.attach, args.table_id, args.columns, args.constraints, + return DataLake::create(configuration, args.getContext(), args.mode, args.table_id, args.columns, args.constraints, args.comment, getFormatSettings(args.getContext())); } diff --git a/src/Storages/DataLakes/Iceberg/StorageIceberg.cpp b/src/Storages/DataLakes/Iceberg/StorageIceberg.cpp index 8a1a2cdbd8f..19cd97c3d4f 100644 --- a/src/Storages/DataLakes/Iceberg/StorageIceberg.cpp +++ b/src/Storages/DataLakes/Iceberg/StorageIceberg.cpp @@ -8,7 +8,7 @@ namespace DB StoragePtr StorageIceberg::create( const DB::StorageIceberg::Configuration & base_configuration, DB::ContextPtr context_, - bool attach, + LoadingStrictnessLevel mode, const DB::StorageID & table_id_, const DB::ColumnsDescription & columns_, const DB::ConstraintsDescription & constraints_, @@ -27,7 +27,7 @@ StoragePtr StorageIceberg::create( } catch (...) { - if (!attach) + if (mode <= LoadingStrictnessLevel::CREATE) throw; tryLogCurrentException(__PRETTY_FUNCTION__); } @@ -61,7 +61,7 @@ StorageIceberg::StorageIceberg( ColumnsDescription StorageIceberg::getTableStructureFromData( Configuration & base_configuration, const std::optional &, - ContextPtr local_context) + const ContextPtr & local_context) { auto configuration{base_configuration}; configuration.update(local_context); @@ -69,7 +69,7 @@ ColumnsDescription StorageIceberg::getTableStructureFromData( return ColumnsDescription(metadata->getTableSchema()); } -void StorageIceberg::updateConfigurationImpl(ContextPtr local_context) +void StorageIceberg::updateConfigurationImpl(const ContextPtr & local_context) { const bool updated = base_configuration.update(local_context); auto new_metadata = parseIcebergMetadata(base_configuration, local_context); diff --git a/src/Storages/DataLakes/Iceberg/StorageIceberg.h b/src/Storages/DataLakes/Iceberg/StorageIceberg.h index 4e63da5508a..45cbef0b41b 100644 --- a/src/Storages/DataLakes/Iceberg/StorageIceberg.h +++ b/src/Storages/DataLakes/Iceberg/StorageIceberg.h @@ -30,7 +30,7 @@ public: static StoragePtr create(const Configuration & base_configuration, ContextPtr context_, - bool attach, + LoadingStrictnessLevel mode, const StorageID & table_id_, const ColumnsDescription & columns_, const ConstraintsDescription & constraints_, @@ -52,28 +52,28 @@ public: static ColumnsDescription getTableStructureFromData( Configuration & base_configuration, const std::optional &, - ContextPtr local_context); + const ContextPtr & local_context); static Configuration getConfiguration(ASTs & engine_args, ContextPtr local_context) { return StorageS3::getConfiguration(engine_args, local_context, /* get_format_from_file */false); } - Configuration updateConfigurationAndGetCopy(ContextPtr local_context) override + Configuration updateConfigurationAndGetCopy(const ContextPtr & local_context) override { std::lock_guard lock(configuration_update_mutex); updateConfigurationImpl(local_context); return StorageS3::getConfiguration(); } - void updateConfiguration(ContextPtr local_context) override + void updateConfiguration(const ContextPtr & local_context) override { std::lock_guard lock(configuration_update_mutex); updateConfigurationImpl(local_context); } private: - void updateConfigurationImpl(ContextPtr local_context); + void updateConfigurationImpl(const ContextPtr & local_context); std::unique_ptr current_metadata; Configuration base_configuration; diff --git a/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.h b/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.h index f7d7553851a..a1b436bb9c8 100644 --- a/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.h +++ b/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.h @@ -6,9 +6,7 @@ #include #include #include -#include #include -#include namespace CurrentMetrics { class Increment; } diff --git a/src/Storages/Distributed/DistributedSettings.cpp b/src/Storages/Distributed/DistributedSettings.cpp index 555aeba7c58..e07b8da34af 100644 --- a/src/Storages/Distributed/DistributedSettings.cpp +++ b/src/Storages/Distributed/DistributedSettings.cpp @@ -15,6 +15,27 @@ namespace ErrorCodes IMPLEMENT_SETTINGS_TRAITS(DistributedSettingsTraits, LIST_OF_DISTRIBUTED_SETTINGS) +void DistributedSettings::loadFromConfig(const String & config_elem, const Poco::Util::AbstractConfiguration & config) +{ + if (!config.has(config_elem)) + return; + + Poco::Util::AbstractConfiguration::Keys config_keys; + config.keys(config_elem, config_keys); + + try + { + for (const String & key : config_keys) + set(key, config.getString(config_elem + "." + key)); + } + catch (Exception & e) + { + if (e.code() == ErrorCodes::UNKNOWN_SETTING) + e.addMessage("in Distributed config"); + throw; + } +} + void DistributedSettings::loadFromQuery(ASTStorage & storage_def) { if (storage_def.settings) diff --git a/src/Storages/Distributed/DistributedSettings.h b/src/Storages/Distributed/DistributedSettings.h index a326e6310dc..efbd6900b07 100644 --- a/src/Storages/Distributed/DistributedSettings.h +++ b/src/Storages/Distributed/DistributedSettings.h @@ -37,6 +37,7 @@ DECLARE_SETTINGS_TRAITS(DistributedSettingsTraits, LIST_OF_DISTRIBUTED_SETTINGS) */ struct DistributedSettings : public BaseSettings { + void loadFromConfig(const String & config_elem, const Poco::Util::AbstractConfiguration & config); void loadFromQuery(ASTStorage & storage_def); }; diff --git a/src/Storages/FileLog/FileLogSource.cpp b/src/Storages/FileLog/FileLogSource.cpp index b1192af4ced..eb3ff0436a5 100644 --- a/src/Storages/FileLog/FileLogSource.cpp +++ b/src/Storages/FileLog/FileLogSource.cpp @@ -31,7 +31,7 @@ FileLogSource::FileLogSource( , max_streams_number(max_streams_number_) , handle_error_mode(handle_error_mode_) , non_virtual_header(storage_snapshot->metadata->getSampleBlockNonMaterialized()) - , virtual_header(storage_snapshot->getSampleBlockForColumns(storage.getVirtuals().getNames())) + , virtual_header(storage_snapshot->virtual_columns->getSampleBlock()) { consumer = std::make_unique(storage, max_block_size, poll_time_out, context, stream_number_, max_streams_number_); diff --git a/src/Storages/FileLog/StorageFileLog.cpp b/src/Storages/FileLog/StorageFileLog.cpp index 9c7648ef658..a5f2331a068 100644 --- a/src/Storages/FileLog/StorageFileLog.cpp +++ b/src/Storages/FileLog/StorageFileLog.cpp @@ -132,7 +132,7 @@ StorageFileLog::StorageFileLog( const String & format_name_, std::unique_ptr settings, const String & comment, - bool attach) + LoadingStrictnessLevel mode) : IStorage(table_id_) , WithContext(context_->getGlobalContext()) , filelog_settings(std::move(settings)) @@ -147,10 +147,11 @@ StorageFileLog::StorageFileLog( storage_metadata.setColumns(columns_); storage_metadata.setComment(comment); setInMemoryMetadata(storage_metadata); + setVirtuals(createVirtuals(filelog_settings->handle_error_mode)); if (!fileOrSymlinkPathStartsWith(path, getContext()->getUserFilesPath())) { - if (attach) + if (LoadingStrictnessLevel::ATTACH <= mode) { LOG_ERROR(log, "The absolute data path should be inside `user_files_path`({})", getContext()->getUserFilesPath()); return; @@ -165,7 +166,7 @@ StorageFileLog::StorageFileLog( bool created_metadata_directory = false; try { - if (!attach) + if (mode < LoadingStrictnessLevel::ATTACH) { if (disk->exists(metadata_base_path)) { @@ -178,7 +179,7 @@ StorageFileLog::StorageFileLog( created_metadata_directory = true; } - loadMetaFiles(attach); + loadMetaFiles(LoadingStrictnessLevel::ATTACH <= mode); loadFiles(); assert(file_infos.file_names.size() == file_infos.meta_by_inode.size()); @@ -192,7 +193,7 @@ StorageFileLog::StorageFileLog( } catch (...) { - if (!attach) + if (mode <= LoadingStrictnessLevel::ATTACH) { if (created_metadata_directory) disk->removeRecursive(metadata_base_path); @@ -203,6 +204,22 @@ StorageFileLog::StorageFileLog( } } +VirtualColumnsDescription StorageFileLog::createVirtuals(StreamingHandleErrorMode handle_error_mode) +{ + VirtualColumnsDescription desc; + + desc.addEphemeral("_filename", std::make_shared(std::make_shared()), ""); + desc.addEphemeral("_offset", std::make_shared(), ""); + + if (handle_error_mode == StreamingHandleErrorMode::STREAM) + { + desc.addEphemeral("_raw_record", std::make_shared(std::make_shared()), ""); + desc.addEphemeral("_error", std::make_shared(std::make_shared()), ""); + } + + return desc; +} + void StorageFileLog::loadMetaFiles(bool attach) { /// Attach table @@ -845,7 +862,7 @@ void registerStorageFileLog(StorageFactory & factory) format, std::move(filelog_settings), args.comment, - args.attach); + args.mode); }; factory.registerStorage( @@ -1009,19 +1026,4 @@ bool StorageFileLog::updateFileInfos() return events.empty() || file_infos.file_names.empty(); } -NamesAndTypesList StorageFileLog::getVirtuals() const -{ - auto virtuals = NamesAndTypesList{ - {"_filename", std::make_shared(std::make_shared())}, - {"_offset", std::make_shared()}}; - - if (filelog_settings->handle_error_mode == StreamingHandleErrorMode::STREAM) - { - virtuals.push_back({"_raw_record", std::make_shared(std::make_shared())}); - virtuals.push_back({"_error", std::make_shared(std::make_shared())}); - } - - return virtuals; -} - } diff --git a/src/Storages/FileLog/StorageFileLog.h b/src/Storages/FileLog/StorageFileLog.h index cc5815a1cef..91d58540c94 100644 --- a/src/Storages/FileLog/StorageFileLog.h +++ b/src/Storages/FileLog/StorageFileLog.h @@ -38,7 +38,7 @@ public: const String & format_name_, std::unique_ptr settings, const String & comment, - bool attach); + LoadingStrictnessLevel mode); using Files = std::vector; @@ -102,8 +102,6 @@ public: String getFullMetaPath(const String & file_name) const { return std::filesystem::path(metadata_base_path) / file_name; } String getFullDataPath(const String & file_name) const { return std::filesystem::path(root_data_path) / file_name; } - NamesAndTypesList getVirtuals() const override; - static UInt64 getInode(const String & file_name); void openFilesAndSetPos(); @@ -212,6 +210,8 @@ private: UInt64 inode = 0; }; ReadMetadataResult readMetadata(const String & filename) const; + + static VirtualColumnsDescription createVirtuals(StreamingHandleErrorMode handle_error_mode); }; } diff --git a/src/Storages/Freeze.h b/src/Storages/Freeze.h index 5775653aaea..035786fc90d 100644 --- a/src/Storages/Freeze.h +++ b/src/Storages/Freeze.h @@ -32,7 +32,7 @@ public: class Unfreezer { public: - Unfreezer(ContextPtr context); + explicit Unfreezer(ContextPtr context); PartitionCommandsResultInfo unfreezePartitionsFromTableDirectory(MergeTreeData::MatcherFn matcher, const String & backup_name, const Disks & disks, const fs::path & table_directory); BlockIO systemUnfreeze(const String & backup_name); private: diff --git a/src/Storages/HDFS/StorageHDFS.cpp b/src/Storages/HDFS/StorageHDFS.cpp index ab21c4946e4..c574f57fc6a 100644 --- a/src/Storages/HDFS/StorageHDFS.cpp +++ b/src/Storages/HDFS/StorageHDFS.cpp @@ -67,6 +67,7 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; extern const int LOGICAL_ERROR; extern const int CANNOT_COMPILE_REGEXP; + extern const int CANNOT_DETECT_FORMAT; } namespace { @@ -194,7 +195,7 @@ StorageHDFS::StorageHDFS( const ColumnsDescription & columns_, const ConstraintsDescription & constraints_, const String & comment, - ContextPtr context_, + const ContextPtr & context_, const String & compression_method_, const bool distributed_processing_, ASTPtr partition_by_) @@ -206,7 +207,8 @@ StorageHDFS::StorageHDFS( , distributed_processing(distributed_processing_) , partition_by(partition_by_) { - FormatFactory::instance().checkFormatName(format_name); + if (format_name != "auto") + FormatFactory::instance().checkFormatName(format_name); context_->getRemoteHostFilter().checkURL(Poco::URI(uri_)); checkHDFSURL(uri_); @@ -217,11 +219,19 @@ StorageHDFS::StorageHDFS( if (columns_.empty()) { - auto columns = getTableStructureFromData(format_name, uri_, compression_method, context_); + ColumnsDescription columns; + if (format_name == "auto") + std::tie(columns, format_name) = getTableStructureAndFormatFromData(uri_, compression_method_, context_); + else + columns = getTableStructureFromData(format_name, uri_, compression_method, context_); + storage_metadata.setColumns(columns); } else { + if (format_name == "auto") + format_name = getTableStructureAndFormatFromData(uri_, compression_method_, context_).second; + /// We don't allow special columns in HDFS storage. if (!columns_.hasOnlyOrdinary()) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Table engine HDFS doesn't support special columns like MATERIALIZED, ALIAS or EPHEMERAL"); @@ -231,8 +241,7 @@ StorageHDFS::StorageHDFS( storage_metadata.setConstraints(constraints_); storage_metadata.setComment(comment); setInMemoryMetadata(storage_metadata); - - virtual_columns = VirtualColumnUtils::getPathFileAndSizeVirtualsForStorage(storage_metadata.getSampleBlock().getNamesAndTypesList()); + setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns())); } namespace @@ -243,25 +252,25 @@ namespace ReadBufferIterator( const std::vector & paths_with_info_, const String & uri_without_path_, - const String & format_, + std::optional format_, const String & compression_method_, const ContextPtr & context_) : WithContext(context_) , paths_with_info(paths_with_info_) , uri_without_path(uri_without_path_) - , format(format_) + , format(std::move(format_)) , compression_method(compression_method_) { } - std::pair, std::optional> next() override + Data next() override { bool is_first = current_index == 0; /// For default mode check cached columns for all paths on first iteration. if (is_first && getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::DEFAULT) { if (auto cached_columns = tryGetColumnsFromCache(paths_with_info)) - return {nullptr, cached_columns}; + return {nullptr, cached_columns, format}; } StorageHDFS::PathWithInfo path_with_info; @@ -271,10 +280,17 @@ namespace if (current_index == paths_with_info.size()) { if (is_first) - throw Exception(ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, - "Cannot extract table structure from {} format file, because all files are empty. " - "You must specify table structure manually", format); - return {nullptr, std::nullopt}; + { + if (format) + throw Exception(ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, + "The table structure cannot be extracted from a {} format file, because all files are empty. " + "You can specify table structure manually", *format); + + throw Exception( + ErrorCodes::CANNOT_DETECT_FORMAT, + "The data format cannot be detected by the contents of the files, because all files are empty. You can specify table structure manually"); + } + return {nullptr, std::nullopt, format}; } path_with_info = paths_with_info[current_index++]; @@ -285,7 +301,7 @@ namespace { std::vector paths = {path_with_info}; if (auto cached_columns = tryGetColumnsFromCache(paths)) - return {nullptr, cached_columns}; + return {nullptr, cached_columns, format}; } auto compression = chooseCompressionMethod(path_with_info.path, compression_method); @@ -293,7 +309,7 @@ namespace if (!getContext()->getSettingsRef().hdfs_skip_empty_files || !impl->eof()) { const Int64 zstd_window_log_max = getContext()->getSettingsRef().zstd_window_log_max; - return {wrapReadBufferWithCompressionMethod(std::move(impl), compression, static_cast(zstd_window_log_max)), std::nullopt}; + return {wrapReadBufferWithCompressionMethod(std::move(impl), compression, static_cast(zstd_window_log_max)), std::nullopt, format}; } } } @@ -304,7 +320,7 @@ namespace return; String source = uri_without_path + paths_with_info[current_index - 1].path; - auto key = getKeyForSchemaCache(source, format, std::nullopt, getContext()); + auto key = getKeyForSchemaCache(source, *format, std::nullopt, getContext()); StorageHDFS::getSchemaCache(getContext()).addNumRows(key, num_rows); } @@ -315,7 +331,7 @@ namespace return; String source = uri_without_path + paths_with_info[current_index - 1].path; - auto key = getKeyForSchemaCache(source, format, std::nullopt, getContext()); + auto key = getKeyForSchemaCache(source, *format, std::nullopt, getContext()); StorageHDFS::getSchemaCache(getContext()).addColumns(key, columns); } @@ -328,10 +344,15 @@ namespace Strings sources; sources.reserve(paths_with_info.size()); std::transform(paths_with_info.begin(), paths_with_info.end(), std::back_inserter(sources), [&](const StorageHDFS::PathWithInfo & path_with_info){ return uri_without_path + path_with_info.path; }); - auto cache_keys = getKeysForSchemaCache(sources, format, {}, getContext()); + auto cache_keys = getKeysForSchemaCache(sources, *format, {}, getContext()); StorageHDFS::getSchemaCache(getContext()).addManyColumns(cache_keys, columns); } + void setFormatName(const String & format_name) override + { + format = format_name; + } + String getLastFileName() const override { if (current_index != 0) @@ -340,13 +361,27 @@ namespace return ""; } + bool supportsLastReadBufferRecreation() const override { return true; } + + std::unique_ptr recreateLastReadBuffer() override + { + chassert(current_index > 0 && current_index <= paths_with_info.size()); + auto path_with_info = paths_with_info[current_index - 1]; + auto compression = chooseCompressionMethod(path_with_info.path, compression_method); + auto impl = std::make_unique(uri_without_path, path_with_info.path, getContext()->getGlobalContext()->getConfigRef(), getContext()->getReadSettings()); + const Int64 zstd_window_log_max = getContext()->getSettingsRef().zstd_window_log_max; + return wrapReadBufferWithCompressionMethod(std::move(impl), compression, static_cast(zstd_window_log_max)); + } + private: std::optional tryGetColumnsFromCache(const std::vector & paths_with_info_) { - if (!getContext()->getSettingsRef().schema_inference_use_cache_for_hdfs) + auto context = getContext(); + + if (!context->getSettingsRef().schema_inference_use_cache_for_hdfs) return std::nullopt; - auto & schema_cache = StorageHDFS::getSchemaCache(getContext()); + auto & schema_cache = StorageHDFS::getSchemaCache(context); for (const auto & path_with_info : paths_with_info_) { auto get_last_mod_time = [&]() -> std::optional @@ -354,7 +389,7 @@ namespace if (path_with_info.info) return path_with_info.info->last_mod_time; - auto builder = createHDFSBuilder(uri_without_path + "/", getContext()->getGlobalContext()->getConfigRef()); + auto builder = createHDFSBuilder(uri_without_path + "/", context->getGlobalContext()->getConfigRef()); auto fs = createHDFSFS(builder.get()); HDFSFileInfoPtr hdfs_info(hdfsGetPathInfo(fs.get(), path_with_info.path.c_str())); if (hdfs_info) @@ -364,10 +399,28 @@ namespace }; String url = uri_without_path + path_with_info.path; - auto cache_key = getKeyForSchemaCache(url, format, {}, getContext()); - auto columns = schema_cache.tryGetColumns(cache_key, get_last_mod_time); - if (columns) - return columns; + if (format) + { + auto cache_key = getKeyForSchemaCache(url, *format, {}, context); + if (auto columns = schema_cache.tryGetColumns(cache_key, get_last_mod_time)) + return columns; + } + else + { + /// If format is unknown, we can iterate through all possible input formats + /// and check if we have an entry with this format and this file in schema cache. + /// If we have such entry for some format, we can use this format to read the file. + for (const auto & format_name : FormatFactory::instance().getAllInputFormats()) + { + auto cache_key = getKeyForSchemaCache(url, format_name, {}, context); + if (auto columns = schema_cache.tryGetColumns(cache_key, get_last_mod_time)) + { + /// Now format is known. It should be the same for all files. + format = format_name; + return columns; + } + } + } } return std::nullopt; @@ -375,29 +428,49 @@ namespace const std::vector & paths_with_info; const String & uri_without_path; - const String & format; + std::optional format; const String & compression_method; size_t current_index = 0; }; } -ColumnsDescription StorageHDFS::getTableStructureFromData( - const String & format, +std::pair StorageHDFS::getTableStructureAndFormatFromDataImpl( + std::optional format, const String & uri, const String & compression_method, - ContextPtr ctx) + const ContextPtr & ctx) { const auto [path_from_uri, uri_without_path] = getPathFromUriAndUriWithoutPath(uri); auto paths_with_info = getPathsList(path_from_uri, uri, ctx); - if (paths_with_info.empty() && !FormatFactory::instance().checkIfFormatHasExternalSchemaReader(format)) + if (paths_with_info.empty() && (!format || !FormatFactory::instance().checkIfFormatHasExternalSchemaReader(*format))) + { + if (format) + throw Exception( + ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, + "The table structure cannot be extracted from a {} format file, because there are no files in HDFS with provided path." + " You can specify table structure manually", *format); + throw Exception( ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, - "Cannot extract table structure from {} format file, because there are no files in HDFS with provided path." - " You must specify table structure manually", format); + "The data format cannot be detected by the contents of the files, because there are no files in HDFS with provided path." + " You can specify the format manually"); + } ReadBufferIterator read_buffer_iterator(paths_with_info, uri_without_path, format, compression_method, ctx); - return readSchemaFromFormat(format, std::nullopt, read_buffer_iterator, paths_with_info.size() > 1, ctx); + if (format) + return {readSchemaFromFormat(*format, std::nullopt, read_buffer_iterator, ctx), *format}; + return detectFormatAndReadSchema(std::nullopt, read_buffer_iterator, ctx); +} + +std::pair StorageHDFS::getTableStructureAndFormatFromData(const String & uri, const String & compression_method, const ContextPtr & ctx) +{ + return getTableStructureAndFormatFromDataImpl(std::nullopt, uri, compression_method, ctx); +} + +ColumnsDescription StorageHDFS::getTableStructureFromData(const String & format, const String & uri, const String & compression_method, const DB::ContextPtr & ctx) +{ + return getTableStructureAndFormatFromDataImpl(format, uri, compression_method, ctx).first; } class HDFSSource::DisclosedGlobIterator::Impl @@ -533,7 +606,7 @@ StorageHDFS::PathWithInfo HDFSSource::URISIterator::next() HDFSSource::HDFSSource( const ReadFromFormatInfo & info, StorageHDFSPtr storage_, - ContextPtr context_, + const ContextPtr & context_, UInt64 max_block_size_, std::shared_ptr file_iterator_, bool need_only_count_) @@ -712,7 +785,7 @@ public: HDFSSink(const String & uri, const String & format, const Block & sample_block, - ContextPtr context, + const ContextPtr & context, const CompressionMethod compression_method) : SinkToStorage(sample_block) { @@ -841,21 +914,28 @@ class ReadFromHDFS : public SourceStepWithFilter public: std::string getName() const override { return "ReadFromHDFS"; } void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override; - void applyFilters() override; + void applyFilters(ActionDAGNodes added_filter_nodes) override; ReadFromHDFS( + const Names & column_names_, + const SelectQueryInfo & query_info_, + const StorageSnapshotPtr & storage_snapshot_, + const ContextPtr & context_, Block sample_block, ReadFromFormatInfo info_, bool need_only_count_, std::shared_ptr storage_, - ContextPtr context_, size_t max_block_size_, size_t num_streams_) - : SourceStepWithFilter(DataStream{.header = std::move(sample_block)}) + : SourceStepWithFilter( + DataStream{.header = std::move(sample_block)}, + column_names_, + query_info_, + storage_snapshot_, + context_) , info(std::move(info_)) , need_only_count(need_only_count_) , storage(std::move(storage_)) - , context(std::move(context_)) , max_block_size(max_block_size_) , num_streams(num_streams_) { @@ -866,7 +946,6 @@ private: const bool need_only_count; std::shared_ptr storage; - ContextPtr context; size_t max_block_size; size_t num_streams; @@ -875,9 +954,9 @@ private: void createIterator(const ActionsDAG::Node * predicate); }; -void ReadFromHDFS::applyFilters() +void ReadFromHDFS::applyFilters(ActionDAGNodes added_filter_nodes) { - auto filter_actions_dag = ActionsDAG::buildFilterActionsDAG(filter_nodes.nodes); + filter_actions_dag = ActionsDAG::buildFilterActionsDAG(added_filter_nodes.nodes); const ActionsDAG::Node * predicate = nullptr; if (filter_actions_dag) predicate = filter_actions_dag->getOutputs().at(0); @@ -895,18 +974,21 @@ void StorageHDFS::read( size_t max_block_size, size_t num_streams) { - auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(context_), virtual_columns); + auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(context_)); bool need_only_count = (query_info.optimize_trivial_count || read_from_format_info.requested_columns.empty()) && context_->getSettingsRef().optimize_count_from_files; auto this_ptr = std::static_pointer_cast(shared_from_this()); auto reading = std::make_unique( + column_names, + query_info, + storage_snapshot, + context_, read_from_format_info.source_header, std::move(read_from_format_info), need_only_count, std::move(this_ptr), - context_, max_block_size, num_streams); @@ -928,7 +1010,7 @@ void ReadFromHDFS::createIterator(const ActionsDAG::Node * predicate) else if (storage->is_path_with_globs) { /// Iterate through disclosed globs and make a source for each file - auto glob_iterator = std::make_shared(storage->uris[0], predicate, storage->virtual_columns, context); + auto glob_iterator = std::make_shared(storage->uris[0], predicate, storage->getVirtualsList(), context); iterator_wrapper = std::make_shared([glob_iterator]() { return glob_iterator->next(); @@ -936,7 +1018,7 @@ void ReadFromHDFS::createIterator(const ActionsDAG::Node * predicate) } else { - auto uris_iterator = std::make_shared(storage->uris, predicate, storage->virtual_columns, context); + auto uris_iterator = std::make_shared(storage->uris, predicate, storage->getVirtualsList(), context); iterator_wrapper = std::make_shared([uris_iterator]() { return uris_iterator->next(); @@ -1073,7 +1155,7 @@ void registerStorageHDFS(StorageFactory & factory) } if (format_name == "auto") - format_name = FormatFactory::instance().getFormatFromFileName(url, true); + format_name = FormatFactory::instance().tryGetFormatFromFileName(url).value_or("auto"); String compression_method; if (engine_args.size() == 3) @@ -1096,16 +1178,6 @@ void registerStorageHDFS(StorageFactory & factory) }); } -NamesAndTypesList StorageHDFS::getVirtuals() const -{ - return virtual_columns; -} - -Names StorageHDFS::getVirtualColumnNames() -{ - return VirtualColumnUtils::getPathFileAndSizeVirtualsForStorage({}).getNames(); -} - SchemaCache & StorageHDFS::getSchemaCache(const ContextPtr & ctx) { static SchemaCache schema_cache(ctx->getConfigRef().getUInt("schema_inference_cache_max_elements_for_hdfs", DEFAULT_SCHEMA_CACHE_ELEMENTS)); diff --git a/src/Storages/HDFS/StorageHDFS.h b/src/Storages/HDFS/StorageHDFS.h index 7170763c959..47e5addccb4 100644 --- a/src/Storages/HDFS/StorageHDFS.h +++ b/src/Storages/HDFS/StorageHDFS.h @@ -44,7 +44,7 @@ public: const ColumnsDescription & columns_, const ConstraintsDescription & constraints_, const String & comment, - ContextPtr context_, + const ContextPtr & context_, const String & compression_method_ = "", bool distributed_processing_ = false, ASTPtr partition_by = nullptr); @@ -69,9 +69,6 @@ public: ContextPtr local_context, TableExclusiveLockHolder &) override; - NamesAndTypesList getVirtuals() const override; - static Names getVirtualColumnNames(); - bool supportsPartitionBy() const override { return true; } /// Check if the format is column-oriented. @@ -86,7 +83,12 @@ public: const String & format, const String & uri, const String & compression_method, - ContextPtr ctx); + const ContextPtr & ctx); + + static std::pair getTableStructureAndFormatFromData( + const String & uri, + const String & compression_method, + const ContextPtr & ctx); static SchemaCache & getSchemaCache(const ContextPtr & ctx); @@ -97,13 +99,18 @@ protected: friend class ReadFromHDFS; private: + static std::pair getTableStructureAndFormatFromDataImpl( + std::optional format, + const String & uri, + const String & compression_method, + const ContextPtr & ctx); + std::vector uris; String format_name; String compression_method; const bool distributed_processing; ASTPtr partition_by; bool is_path_with_globs; - NamesAndTypesList virtual_columns; LoggerPtr log = getLogger("StorageHDFS"); }; @@ -141,7 +148,7 @@ public: HDFSSource( const ReadFromFormatInfo & info, StorageHDFSPtr storage_, - ContextPtr context_, + const ContextPtr & context_, UInt64 max_block_size_, std::shared_ptr file_iterator_, bool need_only_count_); diff --git a/src/Storages/HDFS/StorageHDFSCluster.cpp b/src/Storages/HDFS/StorageHDFSCluster.cpp index fad29436102..bde8b84e349 100644 --- a/src/Storages/HDFS/StorageHDFSCluster.cpp +++ b/src/Storages/HDFS/StorageHDFSCluster.cpp @@ -43,12 +43,10 @@ StorageHDFSCluster::StorageHDFSCluster( const String & format_name_, const ColumnsDescription & columns_, const ConstraintsDescription & constraints_, - const String & compression_method_, - bool structure_argument_was_provided_) - : IStorageCluster(cluster_name_, table_id_, getLogger("StorageHDFSCluster (" + table_id_.table_name + ")"), structure_argument_was_provided_) + const String & compression_method) + : IStorageCluster(cluster_name_, table_id_, getLogger("StorageHDFSCluster (" + table_id_.table_name + ")")) , uri(uri_) , format_name(format_name_) - , compression_method(compression_method_) { checkHDFSURL(uri_); context_->getRemoteHostFilter().checkURL(Poco::URI(uri_)); @@ -57,42 +55,44 @@ StorageHDFSCluster::StorageHDFSCluster( if (columns_.empty()) { - auto columns = StorageHDFS::getTableStructureFromData(format_name, uri_, compression_method, context_); + ColumnsDescription columns; + if (format_name == "auto") + std::tie(columns, format_name) = StorageHDFS::getTableStructureAndFormatFromData(uri_, compression_method, context_); + else + columns = StorageHDFS::getTableStructureFromData(format_name, uri_, compression_method, context_); storage_metadata.setColumns(columns); } else + { + if (format_name == "auto") + format_name = StorageHDFS::getTableStructureAndFormatFromData(uri_, compression_method, context_).second; + storage_metadata.setColumns(columns_); + } storage_metadata.setConstraints(constraints_); setInMemoryMetadata(storage_metadata); - - virtual_columns = VirtualColumnUtils::getPathFileAndSizeVirtualsForStorage(storage_metadata.getSampleBlock().getNamesAndTypesList()); + setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns())); } -void StorageHDFSCluster::addColumnsStructureToQuery(ASTPtr & query, const String & structure, const ContextPtr & context) +void StorageHDFSCluster::updateQueryToSendIfNeeded(DB::ASTPtr & query, const DB::StorageSnapshotPtr & storage_snapshot, const DB::ContextPtr & context) { ASTExpressionList * expression_list = extractTableFunctionArgumentsFromSelectQuery(query); if (!expression_list) throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected SELECT query from table function hdfsCluster, got '{}'", queryToString(query)); - TableFunctionHDFSCluster::addColumnsStructureToArguments(expression_list->children, structure, context); + TableFunctionHDFSCluster::updateStructureAndFormatArgumentsIfNeeded( + expression_list->children, storage_snapshot->metadata->getColumns().getAll().toNamesAndTypesDescription(), format_name, context); } RemoteQueryExecutor::Extension StorageHDFSCluster::getTaskIteratorExtension(const ActionsDAG::Node * predicate, const ContextPtr & context) const { - auto iterator = std::make_shared(uri, predicate, virtual_columns, context); + auto iterator = std::make_shared(uri, predicate, getVirtualsList(), context); auto callback = std::make_shared>([iter = std::move(iterator)]() mutable -> String { return iter->next().path; }); return RemoteQueryExecutor::Extension{.task_iterator = std::move(callback)}; } -NamesAndTypesList StorageHDFSCluster::getVirtuals() const -{ - return NamesAndTypesList{ - {"_path", std::make_shared(std::make_shared())}, - {"_file", std::make_shared(std::make_shared())}}; -} - } #endif diff --git a/src/Storages/HDFS/StorageHDFSCluster.h b/src/Storages/HDFS/StorageHDFSCluster.h index 7c4c41a573a..26ebc8601ee 100644 --- a/src/Storages/HDFS/StorageHDFSCluster.h +++ b/src/Storages/HDFS/StorageHDFSCluster.h @@ -28,13 +28,10 @@ public: const String & format_name_, const ColumnsDescription & columns_, const ConstraintsDescription & constraints_, - const String & compression_method_, - bool structure_argument_was_provided_); + const String & compression_method); std::string getName() const override { return "HDFSCluster"; } - NamesAndTypesList getVirtuals() const override; - RemoteQueryExecutor::Extension getTaskIteratorExtension(const ActionsDAG::Node * predicate, const ContextPtr & context) const override; bool supportsSubcolumns() const override { return true; } @@ -42,12 +39,10 @@ public: bool supportsTrivialCountOptimization() const override { return true; } private: - void addColumnsStructureToQuery(ASTPtr & query, const String & structure, const ContextPtr & context) override; + void updateQueryToSendIfNeeded(ASTPtr & query, const StorageSnapshotPtr & storage_snapshot, const ContextPtr & context) override; String uri; String format_name; - String compression_method; - NamesAndTypesList virtual_columns; }; diff --git a/src/Storages/Hive/StorageHive.cpp b/src/Storages/Hive/StorageHive.cpp index 6766ecd6b4f..88ab8e15e76 100644 --- a/src/Storages/Hive/StorageHive.cpp +++ b/src/Storages/Hive/StorageHive.cpp @@ -45,6 +45,7 @@ #include #include #include +#include namespace CurrentMetrics { @@ -444,6 +445,7 @@ StorageHive::StorageHive( storage_metadata.partition_key = KeyDescription::getKeyFromAST(partition_by_ast, storage_metadata.columns, getContext()); setInMemoryMetadata(storage_metadata); + setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns())); } void StorageHive::lazyInitialize() @@ -770,9 +772,12 @@ class ReadFromHive : public SourceStepWithFilter public: std::string getName() const override { return "ReadFromHive"; } void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override; - void applyFilters() override; ReadFromHive( + const Names & column_names_, + const SelectQueryInfo & query_info_, + const StorageSnapshotPtr & storage_snapshot_, + const ContextPtr & context_, Block header, std::shared_ptr storage_, std::shared_ptr sources_info_, @@ -781,10 +786,14 @@ public: HiveMetastoreClient::HiveTableMetadataPtr hive_table_metadata_, Block sample_block_, LoggerPtr log_, - ContextPtr context_, size_t max_block_size_, size_t num_streams_) - : SourceStepWithFilter(DataStream{.header = std::move(header)}) + : SourceStepWithFilter( + DataStream{.header = std::move(header)}, + column_names_, + query_info_, + storage_snapshot_, + context_) , storage(std::move(storage_)) , sources_info(std::move(sources_info_)) , builder(std::move(builder_)) @@ -792,7 +801,6 @@ public: , hive_table_metadata(std::move(hive_table_metadata_)) , sample_block(std::move(sample_block_)) , log(log_) - , context(std::move(context_)) , max_block_size(max_block_size_) , num_streams(num_streams_) { @@ -807,22 +815,15 @@ private: Block sample_block; LoggerPtr log; - ContextPtr context; size_t max_block_size; size_t num_streams; std::optional hive_files; - void createFiles(const ActionsDAGPtr & filter_actions_dag); + void createFiles(); }; -void ReadFromHive::applyFilters() -{ - auto filter_actions_dag = ActionsDAG::buildFilterActionsDAG(filter_nodes.nodes); - createFiles(filter_actions_dag); -} - -void ReadFromHive::createFiles(const ActionsDAGPtr & filter_actions_dag) +void ReadFromHive::createFiles() { if (hive_files) return; @@ -835,7 +836,7 @@ void StorageHive::read( QueryPlan & query_plan, const Names & column_names, const StorageSnapshotPtr & storage_snapshot, - SelectQueryInfo &, + SelectQueryInfo & query_info, ContextPtr context_, QueryProcessingStage::Enum /* processed_stage */, size_t max_block_size, @@ -891,6 +892,10 @@ void StorageHive::read( auto this_ptr = std::static_pointer_cast(shared_from_this()); auto reading = std::make_unique( + column_names, + query_info, + storage_snapshot, + context_, StorageHiveSource::getHeader(sample_block, sources_info), std::move(this_ptr), std::move(sources_info), @@ -899,7 +904,6 @@ void StorageHive::read( std::move(hive_table_metadata), std::move(sample_block), log, - context_, max_block_size, num_streams); @@ -908,7 +912,7 @@ void StorageHive::read( void ReadFromHive::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) { - createFiles(nullptr); + createFiles(); if (hive_files->empty()) { @@ -1018,13 +1022,6 @@ SinkToStoragePtr StorageHive::write(const ASTPtr & /*query*/, const StorageMetad throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method write is not implemented for StorageHive"); } -NamesAndTypesList StorageHive::getVirtuals() const -{ - return NamesAndTypesList{ - {"_path", std::make_shared(std::make_shared())}, - {"_file", std::make_shared(std::make_shared())}}; -} - std::optional StorageHive::totalRows(const Settings & settings) const { /// query_info is not used when prune_level == PruneLevel::None diff --git a/src/Storages/Hive/StorageHive.h b/src/Storages/Hive/StorageHive.h index 07440097f7a..67ef153af0e 100644 --- a/src/Storages/Hive/StorageHive.h +++ b/src/Storages/Hive/StorageHive.h @@ -54,8 +54,6 @@ public: SinkToStoragePtr write(const ASTPtr & /*query*/, const StorageMetadataPtr & metadata_snapshot, ContextPtr /*context*/, bool async_insert) override; - NamesAndTypesList getVirtuals() const override; - bool supportsSubsetOfColumns() const; std::optional totalRows(const Settings & settings) const override; diff --git a/src/Storages/IStorage.cpp b/src/Storages/IStorage.cpp index 85ef6a0bb35..9852220241f 100644 --- a/src/Storages/IStorage.cpp +++ b/src/Storages/IStorage.cpp @@ -27,10 +27,17 @@ namespace ErrorCodes extern const int CANNOT_RESTORE_TABLE; } +IStorage::IStorage(StorageID storage_id_) + : storage_id(std::move(storage_id_)) + , metadata(std::make_unique()) + , virtuals(std::make_unique()) +{ +} + bool IStorage::isVirtualColumn(const String & column_name, const StorageMetadataPtr & metadata_snapshot) const { /// Virtual column maybe overridden by real column - return !metadata_snapshot->getColumns().has(column_name) && getVirtuals().contains(column_name); + return !metadata_snapshot->getColumns().has(column_name) && virtuals.get()->has(column_name); } RWLockImpl::LockHolder IStorage::tryLockTimed( @@ -237,11 +244,6 @@ void IStorage::renameInMemory(const StorageID & new_table_id) storage_id = new_table_id; } -NamesAndTypesList IStorage::getVirtuals() const -{ - return {}; -} - Names IStorage::getAllRegisteredNames() const { Names result; diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index 4fa6bfdd617..1108eafc6b6 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -98,9 +99,7 @@ class IStorage : public std::enable_shared_from_this, public TypePromo public: IStorage() = delete; /// Storage metadata can be set separately in setInMemoryMetadata method - explicit IStorage(StorageID storage_id_) - : storage_id(std::move(storage_id_)) - , metadata(std::make_unique()) {} + explicit IStorage(StorageID storage_id_); IStorage(const IStorage &) = delete; IStorage & operator=(const IStorage &) = delete; @@ -215,6 +214,10 @@ public: metadata.set(std::make_unique(metadata_)); } + void setVirtuals(VirtualColumnsDescription virtuals_) + { + virtuals.set(std::make_unique(std::move(virtuals_))); + } /// Return list of virtual columns (like _part, _table, etc). In the vast /// majority of cases virtual columns are static constant part of Storage @@ -226,7 +229,9 @@ public: /// virtual column will be overridden and inaccessible. /// /// By default return empty list of columns. - virtual NamesAndTypesList getVirtuals() const; + VirtualsDescriptionPtr getVirtualsPtr() const { return virtuals.get(); } + NamesAndTypesList getVirtualsList() const { return virtuals.get()->getNamesAndTypesList(); } + Block getVirtualsHeader() const { return virtuals.get()->getSampleBlock(); } Names getAllRegisteredNames() const override; @@ -263,15 +268,16 @@ public: virtual bool supportsTrivialCountOptimization() const { return false; } private: - StorageID storage_id; mutable std::mutex id_mutex; - /// Multiversion storage metadata. Allows to read/write storage metadata - /// without locks. + /// Multiversion storage metadata. Allows to read/write storage metadata without locks. MultiVersionStorageMetadataPtr metadata; + /// Description of virtual columns. Optional, may be set in constructor. + MultiVersionVirtualsDescriptionPtr virtuals; + protected: RWLockImpl::LockHolder tryLockTimed( const RWLock & rwlock, RWLockImpl::Type type, const String & query_id, const std::chrono::milliseconds & acquire_timeout) const; diff --git a/src/Storages/IStorageCluster.cpp b/src/Storages/IStorageCluster.cpp index 812b213cf33..ab45ce877c2 100644 --- a/src/Storages/IStorageCluster.cpp +++ b/src/Storages/IStorageCluster.cpp @@ -32,12 +32,10 @@ namespace DB IStorageCluster::IStorageCluster( const String & cluster_name_, const StorageID & table_id_, - LoggerPtr log_, - bool structure_argument_was_provided_) + LoggerPtr log_) : IStorage(table_id_) , log(log_) , cluster_name(cluster_name_) - , structure_argument_was_provided(structure_argument_was_provided_) { } @@ -46,23 +44,30 @@ class ReadFromCluster : public SourceStepWithFilter public: std::string getName() const override { return "ReadFromCluster"; } void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override; - void applyFilters() override; + void applyFilters(ActionDAGNodes added_filter_nodes) override; ReadFromCluster( + const Names & column_names_, + const SelectQueryInfo & query_info_, + const StorageSnapshotPtr & storage_snapshot_, + const ContextPtr & context_, Block sample_block, std::shared_ptr storage_, ASTPtr query_to_send_, QueryProcessingStage::Enum processed_stage_, ClusterPtr cluster_, - LoggerPtr log_, - ContextPtr context_) - : SourceStepWithFilter(DataStream{.header = std::move(sample_block)}) + LoggerPtr log_) + : SourceStepWithFilter( + DataStream{.header = std::move(sample_block)}, + column_names_, + query_info_, + storage_snapshot_, + context_) , storage(std::move(storage_)) , query_to_send(std::move(query_to_send_)) , processed_stage(processed_stage_) , cluster(std::move(cluster_)) , log(log_) - , context(std::move(context_)) { } @@ -72,7 +77,6 @@ private: QueryProcessingStage::Enum processed_stage; ClusterPtr cluster; LoggerPtr log; - ContextPtr context; std::optional extension; @@ -80,9 +84,9 @@ private: ContextPtr updateSettings(const Settings & settings); }; -void ReadFromCluster::applyFilters() +void ReadFromCluster::applyFilters(ActionDAGNodes added_filter_nodes) { - auto filter_actions_dag = ActionsDAG::buildFilterActionsDAG(filter_nodes.nodes); + filter_actions_dag = ActionsDAG::buildFilterActionsDAG(added_filter_nodes.nodes); const ActionsDAG::Node * predicate = nullptr; if (filter_actions_dag) predicate = filter_actions_dag->getOutputs().at(0); @@ -130,8 +134,7 @@ void IStorageCluster::read( query_to_send = interpreter.getQueryInfo().query->clone(); } - if (!structure_argument_was_provided) - addColumnsStructureToQuery(query_to_send, storage_snapshot->metadata->getColumns().getAll().toNamesAndTypesDescription(), context); + updateQueryToSendIfNeeded(query_to_send, storage_snapshot, context); RestoreQualifiedNamesVisitor::Data data; data.distributed_table = DatabaseAndTableWithAlias(*getTableExpression(query_info.query->as(), 0)); @@ -146,13 +149,16 @@ void IStorageCluster::read( auto this_ptr = std::static_pointer_cast(shared_from_this()); auto reading = std::make_unique( + column_names, + query_info, + storage_snapshot, + context, sample_block, std::move(this_ptr), std::move(query_to_send), processed_stage, cluster, - log, - context); + log); query_plan.addStep(std::move(reading)); } @@ -185,7 +191,11 @@ void ReadFromCluster::initializePipeline(QueryPipelineBuilder & pipeline, const extension); remote_query_executor->setLogger(log); - pipes.emplace_back(std::make_shared(remote_query_executor, add_agg_info, false, false)); + pipes.emplace_back(std::make_shared( + remote_query_executor, + add_agg_info, + current_settings.async_socket_for_remote, + current_settings.async_query_sending_for_remote)); } } diff --git a/src/Storages/IStorageCluster.h b/src/Storages/IStorageCluster.h index 8d93e94be9a..f3283247672 100644 --- a/src/Storages/IStorageCluster.h +++ b/src/Storages/IStorageCluster.h @@ -19,8 +19,7 @@ public: IStorageCluster( const String & cluster_name_, const StorageID & table_id_, - LoggerPtr log_, - bool structure_argument_was_provided_); + LoggerPtr log_); void read( QueryPlan & query_plan, @@ -42,13 +41,11 @@ public: protected: virtual void updateBeforeRead(const ContextPtr &) {} - - virtual void addColumnsStructureToQuery(ASTPtr & query, const String & structure, const ContextPtr & context) = 0; + virtual void updateQueryToSendIfNeeded(ASTPtr & /*query*/, const StorageSnapshotPtr & /*storage_snapshot*/, const ContextPtr & /*context*/) {} private: LoggerPtr log; String cluster_name; - bool structure_argument_was_provided; }; diff --git a/src/Storages/IndicesDescription.h b/src/Storages/IndicesDescription.h index e56642b8c76..21ba5fb632e 100644 --- a/src/Storages/IndicesDescription.h +++ b/src/Storages/IndicesDescription.h @@ -2,7 +2,6 @@ #include -#include #include #include #include diff --git a/src/Storages/KVStorageUtils.cpp b/src/Storages/KVStorageUtils.cpp index 5175c93041b..94319aef3b8 100644 --- a/src/Storages/KVStorageUtils.cpp +++ b/src/Storages/KVStorageUtils.cpp @@ -231,12 +231,11 @@ bool traverseDAGFilter( } std::pair getFilterKeys( - const String & primary_key, const DataTypePtr & primary_key_type, const ActionDAGNodes & filter_nodes, const ContextPtr & context) + const String & primary_key, const DataTypePtr & primary_key_type, const ActionsDAGPtr & filter_actions_dag, const ContextPtr & context) { - if (filter_nodes.nodes.empty()) + if (!filter_actions_dag) return {{}, true}; - auto filter_actions_dag = ActionsDAG::buildFilterActionsDAG(filter_nodes.nodes); const auto * predicate = filter_actions_dag->getOutputs().at(0); FieldVectorPtr res = std::make_shared(); diff --git a/src/Storages/KVStorageUtils.h b/src/Storages/KVStorageUtils.h index c6d63b800df..e20a1ce4f37 100644 --- a/src/Storages/KVStorageUtils.h +++ b/src/Storages/KVStorageUtils.h @@ -22,7 +22,7 @@ std::pair getFilterKeys( const std::string & primary_key, const DataTypePtr & primary_key_type, const SelectQueryInfo & query_info, const ContextPtr & context); std::pair getFilterKeys( - const String & primary_key, const DataTypePtr & primary_key_type, const ActionDAGNodes & filter_nodes, const ContextPtr & context); + const String & primary_key, const DataTypePtr & primary_key_type, const ActionsDAGPtr & filter_actions_dag, const ContextPtr & context); template void fillColumns(const K & key, const V & value, size_t key_pos, const Block & header, MutableColumns & columns) diff --git a/src/Storages/Kafka/KafkaSource.cpp b/src/Storages/Kafka/KafkaSource.cpp index dc62c13f633..9c68107872e 100644 --- a/src/Storages/Kafka/KafkaSource.cpp +++ b/src/Storages/Kafka/KafkaSource.cpp @@ -45,7 +45,7 @@ KafkaSource::KafkaSource( , max_block_size(max_block_size_) , commit_in_suffix(commit_in_suffix_) , non_virtual_header(storage_snapshot->metadata->getSampleBlockNonMaterialized()) - , virtual_header(storage_snapshot->getSampleBlockForColumns(storage.getVirtualColumnNames())) + , virtual_header(storage.getVirtualsHeader()) , handle_error_mode(storage.getStreamingHandleErrorMode()) { } diff --git a/src/Storages/Kafka/StorageKafka.cpp b/src/Storages/Kafka/StorageKafka.cpp index aa347fc719d..e41488189e9 100644 --- a/src/Storages/Kafka/StorageKafka.cpp +++ b/src/Storages/Kafka/StorageKafka.cpp @@ -246,64 +246,83 @@ namespace const String CONFIG_KAFKA_TOPIC_TAG = "kafka_topic"; const String CONFIG_NAME_TAG = "name"; + void setKafkaConfigValue(cppkafka::Configuration & kafka_config, const String & key, const String & value) + { + if (key.starts_with(CONFIG_KAFKA_TOPIC_TAG) || key == CONFIG_NAME_TAG) /// multiple occurrences given as "kafka_topic", "kafka_topic[1]", etc. + return; /// used by new per-topic configuration, ignore + + /// "log_level" has valid underscore, the remaining librdkafka setting use dot.separated.format which isn't acceptable for XML. + /// See https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md + const String setting_name_in_kafka_config = (key == "log_level") ? key : boost::replace_all_copy(key, "_", "."); + kafka_config.set(setting_name_in_kafka_config, value); + } + /// Read server configuration into cppkafka configuration, used by global configuration and by legacy per-topic configuration - void loadFromConfig(cppkafka::Configuration & kafka_config, const Poco::Util::AbstractConfiguration & config, const String & config_prefix) + void loadFromConfig(cppkafka::Configuration & kafka_config, const Poco::Util::AbstractConfiguration & config, const String& collection_name, const String & config_prefix) { + if (!collection_name.empty()) + { + const auto & collection = NamedCollectionFactory::instance().get(collection_name); + for (const auto & key : collection->getKeys(-1, config_prefix)) + { + // Cut prefix with '.' before actual config tag. + const auto param_name = key.substr(config_prefix.size() + 1); + setKafkaConfigValue(kafka_config, param_name, collection->get(key)); + } + return; + } + /// Read all tags one level below Poco::Util::AbstractConfiguration::Keys tags; config.keys(config_prefix, tags); for (const auto & tag : tags) { - if (tag.starts_with(CONFIG_KAFKA_TOPIC_TAG)) /// multiple occurrences given as "kafka_topic", "kafka_topic[1]", etc. - continue; /// used by new per-topic configuration, ignore - - const String setting_path = config_prefix + "." + tag; - const String setting_value = config.getString(setting_path); - - /// "log_level" has valid underscore, the remaining librdkafka setting use dot.separated.format which isn't acceptable for XML. - /// See https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md - const String setting_name_in_kafka_config = (tag == "log_level") ? tag : boost::replace_all_copy(tag, "_", "."); - kafka_config.set(setting_name_in_kafka_config, setting_value); + const String setting_path = fmt::format("{}.{}", config_prefix, tag); + setKafkaConfigValue(kafka_config, tag, config.getString(setting_path)); } } /// Read server configuration into cppkafa configuration, used by new per-topic configuration - void loadTopicConfig(cppkafka::Configuration & kafka_config, const Poco::Util::AbstractConfiguration & config, const String & config_prefix, const String & topic) + void loadTopicConfig(cppkafka::Configuration & kafka_config, const Poco::Util::AbstractConfiguration & config, const String& collection_name, const String& config_prefix, const String& topic) { - /// Read all tags one level below - Poco::Util::AbstractConfiguration::Keys tags; - config.keys(config_prefix, tags); - - for (const auto & tag : tags) + if (!collection_name.empty()) { - /// Only consider tag . Multiple occurrences given as "kafka_topic", "kafka_topic[1]", etc. - if (!tag.starts_with(CONFIG_KAFKA_TOPIC_TAG)) - continue; - - /// Read topic name between ... - const String kafka_topic_path = config_prefix + "." + tag; - const String kafpa_topic_name_path = kafka_topic_path + "." + CONFIG_NAME_TAG; - - const String topic_name = config.getString(kafpa_topic_name_path); - if (topic_name == topic) + const auto topic_prefix = fmt::format("{}.{}", config_prefix, CONFIG_KAFKA_TOPIC_TAG); + const auto & collection = NamedCollectionFactory::instance().get(collection_name); + for (const auto & key : collection->getKeys(1, config_prefix)) { - /// Found it! Now read the per-topic configuration into cppkafka. - Poco::Util::AbstractConfiguration::Keys inner_tags; - config.keys(kafka_topic_path, inner_tags); - for (const auto & inner_tag : inner_tags) - { - if (inner_tag == CONFIG_NAME_TAG) - continue; // ignore + /// Only consider key . Multiple occurrences given as "kafka_topic", "kafka_topic[1]", etc. + if (!key.starts_with(topic_prefix)) + continue; - /// "log_level" has valid underscore, the remaining librdkafka setting use dot.separated.format which isn't acceptable for XML. - /// See https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md - const String setting_path = kafka_topic_path + "." + inner_tag; - const String setting_value = config.getString(setting_path); + const String kafka_topic_path = config_prefix + "." + key; + const String kafka_topic_name_path = kafka_topic_path + "." + CONFIG_NAME_TAG; + if (topic == collection->get(kafka_topic_name_path)) + /// Found it! Now read the per-topic configuration into cppkafka. + loadFromConfig(kafka_config, config, collection_name, kafka_topic_path); + } + } + else + { + /// Read all tags one level below + Poco::Util::AbstractConfiguration::Keys tags; + config.keys(config_prefix, tags); - const String setting_name_in_kafka_config = (inner_tag == "log_level") ? inner_tag : boost::replace_all_copy(inner_tag, "_", "."); - kafka_config.set(setting_name_in_kafka_config, setting_value); - } + for (const auto & tag : tags) + { + /// Only consider tag . Multiple occurrences given as "kafka_topic", "kafka_topic[1]", etc. + if (!tag.starts_with(CONFIG_KAFKA_TOPIC_TAG)) + continue; + + /// Read topic name between ... + const String kafka_topic_path = fmt::format("{}.{}", config_prefix, tag); + const String kafka_topic_name_path = fmt::format("{}.{}", kafka_topic_path, CONFIG_NAME_TAG); + + const String topic_name = config.getString(kafka_topic_name_path); + if (topic_name == topic) + /// Found it! Now read the per-topic configuration into cppkafka. + loadFromConfig(kafka_config, config, collection_name, kafka_topic_path); } } } @@ -344,6 +363,8 @@ StorageKafka::StorageKafka( StorageInMemoryMetadata storage_metadata; storage_metadata.setColumns(columns_); setInMemoryMetadata(storage_metadata); + setVirtuals(createVirtuals(kafka_settings->kafka_handle_error_mode)); + auto task_count = thread_per_consumer ? num_consumers : 1; for (size_t i = 0; i < task_count; ++i) { @@ -365,6 +386,28 @@ StorageKafka::StorageKafka( }); } +VirtualColumnsDescription StorageKafka::createVirtuals(StreamingHandleErrorMode handle_error_mode) +{ + VirtualColumnsDescription desc; + + desc.addEphemeral("_topic", std::make_shared(std::make_shared()), ""); + desc.addEphemeral("_key", std::make_shared(), ""); + desc.addEphemeral("_offset", std::make_shared(), ""); + desc.addEphemeral("_partition", std::make_shared(), ""); + desc.addEphemeral("_timestamp", std::make_shared(std::make_shared()), ""); + desc.addEphemeral("_timestamp_ms", std::make_shared(std::make_shared(3)), ""); + desc.addEphemeral("_headers.name", std::make_shared(std::make_shared()), ""); + desc.addEphemeral("_headers.value", std::make_shared(std::make_shared()), ""); + + if (handle_error_mode == StreamingHandleErrorMode::STREAM) + { + desc.addEphemeral("_raw_message", std::make_shared(), ""); + desc.addEphemeral("_error", std::make_shared(), ""); + } + + return desc; +} + SettingsChanges StorageKafka::createSettingsAdjustments() { SettingsChanges result; @@ -728,13 +771,6 @@ size_t StorageKafka::getPollTimeoutMillisecond() const : getContext()->getSettingsRef().stream_poll_timeout_ms.totalMilliseconds(); } -String StorageKafka::getConfigPrefix() const -{ - if (!collection_name.empty()) - return "named_collections." + collection_name + "." + CONFIG_KAFKA_TAG; /// Add one more level to separate librdkafka configuration. - return CONFIG_KAFKA_TAG; -} - void StorageKafka::updateConfiguration(cppkafka::Configuration & kafka_config) { // Update consumer configuration from the configuration. Example: @@ -743,9 +779,7 @@ void StorageKafka::updateConfiguration(cppkafka::Configuration & kafka_config) // 100000 // const auto & config = getContext()->getConfigRef(); - auto config_prefix = getConfigPrefix(); - if (config.has(config_prefix)) - loadFromConfig(kafka_config, config, config_prefix); + loadFromConfig(kafka_config, config, collection_name, CONFIG_KAFKA_TAG); #if USE_KRB5 if (kafka_config.has_property("sasl.kerberos.kinit.cmd")) @@ -784,9 +818,7 @@ void StorageKafka::updateConfiguration(cppkafka::Configuration & kafka_config) // as are ugly. for (const auto & topic : topics) { - const auto topic_config_key = config_prefix + "_" + topic; - if (config.has(topic_config_key)) - loadFromConfig(kafka_config, config, topic_config_key); + loadFromConfig(kafka_config, config, collection_name, CONFIG_KAFKA_TAG + "_" + topic); } // Update consumer topic-specific configuration (new syntax). Example with topics "football" and "baseball": @@ -805,8 +837,7 @@ void StorageKafka::updateConfiguration(cppkafka::Configuration & kafka_config) // Advantages: The period restriction no longer applies (e.g. sports.football will work), everything // Kafka-related is below . for (const auto & topic : topics) - if (config.has(config_prefix)) - loadTopicConfig(kafka_config, config, config_prefix, topic); + loadTopicConfig(kafka_config, config, collection_name, CONFIG_KAFKA_TAG, topic); // No need to add any prefix, messages can be distinguished kafka_config.set_log_callback([this](cppkafka::KafkaHandleBase &, int level, const std::string & facility, const std::string & message) @@ -817,7 +848,7 @@ void StorageKafka::updateConfiguration(cppkafka::Configuration & kafka_config) /// NOTE: statistics should be consumed, otherwise it creates too much /// entries in the queue, that leads to memory leak and slow shutdown. - if (!config.has(config_prefix + "." + "statistics_interval_ms")) + if (!kafka_config.has_property("statistics.interval.ms")) { // every 3 seconds by default. set to 0 to disable. kafka_config.set("statistics.interval.ms", "3000"); @@ -1187,43 +1218,4 @@ void registerStorageKafka(StorageFactory & factory) factory.registerStorage("Kafka", creator_fn, StorageFactory::StorageFeatures{ .supports_settings = true, }); } -NamesAndTypesList StorageKafka::getVirtuals() const -{ - auto result = NamesAndTypesList{ - {"_topic", std::make_shared(std::make_shared())}, - {"_key", std::make_shared()}, - {"_offset", std::make_shared()}, - {"_partition", std::make_shared()}, - {"_timestamp", std::make_shared(std::make_shared())}, - {"_timestamp_ms", std::make_shared(std::make_shared(3))}, - {"_headers.name", std::make_shared(std::make_shared())}, - {"_headers.value", std::make_shared(std::make_shared())}}; - if (kafka_settings->kafka_handle_error_mode == StreamingHandleErrorMode::STREAM) - { - result.push_back({"_raw_message", std::make_shared()}); - result.push_back({"_error", std::make_shared()}); - } - return result; -} - -Names StorageKafka::getVirtualColumnNames() const -{ - auto result = Names { - "_topic", - "_key", - "_offset", - "_partition", - "_timestamp", - "_timestamp_ms", - "_headers.name", - "_headers.value", - }; - if (kafka_settings->kafka_handle_error_mode == StreamingHandleErrorMode::STREAM) - { - result.push_back({"_raw_message"}); - result.push_back({"_error"}); - } - return result; -} - } diff --git a/src/Storages/Kafka/StorageKafka.h b/src/Storages/Kafka/StorageKafka.h index f9a1e3ff6f3..3b20e6b23f6 100644 --- a/src/Storages/Kafka/StorageKafka.h +++ b/src/Storages/Kafka/StorageKafka.h @@ -74,8 +74,6 @@ public: const auto & getFormatName() const { return format_name; } - NamesAndTypesList getVirtuals() const override; - Names getVirtualColumnNames() const; StreamingHandleErrorMode getStreamingHandleErrorMode() const { return kafka_settings->kafka_handle_error_mode; } struct SafeConsumers @@ -145,7 +143,6 @@ private: // Update Kafka configuration with values from CH user configuration. void updateConfiguration(cppkafka::Configuration & kafka_config); - String getConfigPrefix() const; void threadFunc(size_t idx); size_t getPollMaxBatchSize() const; @@ -159,6 +156,8 @@ private: bool checkDependencies(const StorageID & table_id); void cleanConsumers(); + + static VirtualColumnsDescription createVirtuals(StreamingHandleErrorMode handle_error_mode); }; } diff --git a/src/Storages/LightweightDeleteDescription.cpp b/src/Storages/LightweightDeleteDescription.cpp deleted file mode 100644 index ae5e68da9c2..00000000000 --- a/src/Storages/LightweightDeleteDescription.cpp +++ /dev/null @@ -1,9 +0,0 @@ -#include -#include - -namespace DB -{ - -const NameAndTypePair LightweightDeleteDescription::FILTER_COLUMN {"_row_exists", std::make_shared()}; - -} diff --git a/src/Storages/LightweightDeleteDescription.h b/src/Storages/LightweightDeleteDescription.h deleted file mode 100644 index 45bde59ea71..00000000000 --- a/src/Storages/LightweightDeleteDescription.h +++ /dev/null @@ -1,13 +0,0 @@ -#pragma once -#include -#include "Storages/TTLDescription.h" - -namespace DB -{ - -struct LightweightDeleteDescription -{ - static const NameAndTypePair FILTER_COLUMN; -}; - -} diff --git a/src/Storages/LiveView/StorageLiveView.cpp b/src/Storages/LiveView/StorageLiveView.cpp index f81225bbee3..c3aacfd67d3 100644 --- a/src/Storages/LiveView/StorageLiveView.cpp +++ b/src/Storages/LiveView/StorageLiveView.cpp @@ -57,7 +57,7 @@ namespace ErrorCodes { extern const int INCORRECT_QUERY; extern const int TABLE_WAS_NOT_DROPPED; - extern const int QUERY_IS_NOT_SUPPORTED_IN_LIVE_VIEW; + extern const int NOT_IMPLEMENTED; extern const int SUPPORT_IS_DISABLED; extern const int UNSUPPORTED_METHOD; } @@ -86,14 +86,14 @@ SelectQueryDescription buildSelectQueryDescription(const ASTPtr & select_query, if (inner_select_with_union_query) { if (inner_select_with_union_query->list_of_selects->children.size() != 1) - throw Exception(ErrorCodes::QUERY_IS_NOT_SUPPORTED_IN_LIVE_VIEW, "UNION is not supported for LIVE VIEW"); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "UNION is not supported for LIVE VIEW"); inner_query = inner_select_with_union_query->list_of_selects->children[0]; } auto * inner_select_query = inner_query->as(); if (!inner_select_query) - throw Exception(DB::ErrorCodes::QUERY_IS_NOT_SUPPORTED_IN_LIVE_VIEW, + throw Exception(DB::ErrorCodes::NOT_IMPLEMENTED, "LIVE VIEWs are only supported for queries from tables, " "but there is no table name in select query."); @@ -218,6 +218,10 @@ StorageLiveView::StorageLiveView( setInMemoryMetadata(storage_metadata); + VirtualColumnsDescription virtuals; + virtuals.addEphemeral("_version", std::make_shared(), ""); + setVirtuals(std::move(virtuals)); + if (!query.select) throw Exception(ErrorCodes::INCORRECT_QUERY, "SELECT query is not specified for {}", getName()); @@ -226,29 +230,9 @@ StorageLiveView::StorageLiveView( DatabaseCatalog::instance().addViewDependency(select_query_description.select_table_id, table_id_); - if (query.live_view_periodic_refresh) - { - is_periodically_refreshed = true; - periodic_live_view_refresh = Seconds {*query.live_view_periodic_refresh}; - } - blocks_ptr = std::make_shared(); blocks_metadata_ptr = std::make_shared(); active_ptr = std::make_shared(true); - - periodic_refresh_task = getContext()->getSchedulePool().createTask("LiveViewPeriodicRefreshTask", - [this] - { - try - { - periodicRefreshTaskFunc(); - } - catch (...) - { - tryLogCurrentException(log, "Exception in LiveView periodic refresh task in BackgroundSchedulePool"); - } - }); - periodic_refresh_task->deactivate(); } StorageLiveView::~StorageLiveView() @@ -256,13 +240,6 @@ StorageLiveView::~StorageLiveView() shutdown(false); } -NamesAndTypesList StorageLiveView::getVirtuals() const -{ - return NamesAndTypesList{ - NameAndTypePair("_version", std::make_shared()) - }; -} - void StorageLiveView::checkTableCanBeDropped([[ maybe_unused ]] ContextPtr query_context) const { auto table_id = getStorageID(); @@ -285,17 +262,12 @@ void StorageLiveView::drop() void StorageLiveView::startup() { - if (is_periodically_refreshed) - periodic_refresh_task->activate(); } void StorageLiveView::shutdown(bool) { shutdown_called = true; - if (is_periodically_refreshed) - periodic_refresh_task->deactivate(); - DatabaseCatalog::instance().removeViewDependency(select_query_description.select_table_id, getStorageID()); } @@ -311,17 +283,7 @@ Pipe StorageLiveView::read( std::lock_guard lock(mutex); if (!(*blocks_ptr)) - { refreshImpl(lock); - } - else if (is_periodically_refreshed) - { - Seconds current_time = std::chrono::duration_cast(std::chrono::system_clock::now().time_since_epoch()); - Seconds blocks_time = std::chrono::duration_cast(getBlocksTime(lock).time_since_epoch()); - - if ((current_time - periodic_live_view_refresh) >= blocks_time) - refreshImpl(lock); - } return Pipe(std::make_shared(*blocks_ptr, getHeader())); } @@ -362,9 +324,6 @@ Pipe StorageLiveView::watch( if (!(*blocks_ptr)) refreshImpl(lock); - - if (is_periodically_refreshed) - scheduleNextPeriodicRefresh(lock); } processed_stage = QueryProcessingStage::Complete; @@ -746,44 +705,11 @@ bool StorageLiveView::getNewBlocks(const std::lock_guard & lock) return updated; } -void StorageLiveView::periodicRefreshTaskFunc() -{ - LOG_TRACE(log, "periodic refresh task"); - - std::lock_guard lock(mutex); - - if (hasActiveUsers(lock)) - scheduleNextPeriodicRefresh(lock); -} - -void StorageLiveView::scheduleNextPeriodicRefresh(const std::lock_guard & lock) -{ - Seconds current_time = std::chrono::duration_cast(std::chrono::system_clock::now().time_since_epoch()); - Seconds blocks_time = std::chrono::duration_cast(getBlocksTime(lock).time_since_epoch()); - - if ((current_time - periodic_live_view_refresh) >= blocks_time) - { - refreshImpl(lock); - blocks_time = std::chrono::duration_cast(getBlocksTime(lock).time_since_epoch()); - } - current_time = std::chrono::duration_cast(std::chrono::system_clock::now().time_since_epoch()); - - auto next_refresh_time = blocks_time + periodic_live_view_refresh; - - if (current_time >= next_refresh_time) - periodic_refresh_task->scheduleAfter(0); - else - { - auto schedule_time = std::chrono::duration_cast (next_refresh_time - current_time); - periodic_refresh_task->scheduleAfter(static_cast(schedule_time.count())); - } -} - void registerStorageLiveView(StorageFactory & factory) { factory.registerStorage("LiveView", [](const StorageFactory::Arguments & args) { - if (!args.attach && !args.getLocalContext()->getSettingsRef().allow_experimental_live_view) + if (args.mode <= LoadingStrictnessLevel::CREATE && !args.getLocalContext()->getSettingsRef().allow_experimental_live_view) throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Experimental LIVE VIEW feature is not enabled (the setting 'allow_experimental_live_view')"); diff --git a/src/Storages/LiveView/StorageLiveView.h b/src/Storages/LiveView/StorageLiveView.h index 6b8780cb81b..91daac32c7b 100644 --- a/src/Storages/LiveView/StorageLiveView.h +++ b/src/Storages/LiveView/StorageLiveView.h @@ -21,6 +21,7 @@ limitations under the License. */ namespace DB { +using BlocksPtrs = std::shared_ptr>; struct BlocksMetadata { @@ -73,8 +74,6 @@ public: bool supportsFinal() const override { return true; } - NamesAndTypesList getVirtuals() const override; - void checkTableCanBeDropped([[ maybe_unused ]] ContextPtr query_context) const override; void drop() override; @@ -172,11 +171,6 @@ private: /// Read new data blocks that store query result bool getNewBlocks(const std::lock_guard & lock); - void periodicRefreshTaskFunc(); - - /// Must be called with mutex locked - void scheduleNextPeriodicRefresh(const std::lock_guard & lock); - SelectQueryDescription select_query_description; /// Query over the mergeable blocks to produce final result @@ -186,9 +180,6 @@ private: LoggerPtr log; - bool is_periodically_refreshed = false; - Seconds periodic_live_view_refresh; - /// Mutex to protect access to sample block and inner_blocks_query mutable std::mutex sample_block_lock; mutable Block sample_block; @@ -208,9 +199,6 @@ private: MergeableBlocksPtr mergeable_blocks; std::atomic shutdown_called = false; - - /// Periodic refresh task used when [PERIODIC] REFRESH is specified in create statement - BackgroundSchedulePool::TaskHolder periodic_refresh_task; }; } diff --git a/src/Storages/MergeTree/AlterConversions.cpp b/src/Storages/MergeTree/AlterConversions.cpp index a98cd6d99f9..31f8f17e2c1 100644 --- a/src/Storages/MergeTree/AlterConversions.cpp +++ b/src/Storages/MergeTree/AlterConversions.cpp @@ -9,6 +9,11 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } +bool AlterConversions::supportsMutationCommandType(MutationCommand::Type t) +{ + return t == MutationCommand::Type::RENAME_COLUMN; +} + void AlterConversions::addMutationCommand(const MutationCommand & command) { /// Currently only RENAME_COLUMN is applied on-fly. diff --git a/src/Storages/MergeTree/AlterConversions.h b/src/Storages/MergeTree/AlterConversions.h index 4410b9c56e2..0f857d351dd 100644 --- a/src/Storages/MergeTree/AlterConversions.h +++ b/src/Storages/MergeTree/AlterConversions.h @@ -35,6 +35,8 @@ public: /// Get column old name before rename (lookup by key in rename_map) std::string getColumnOldName(const std::string & new_name) const; + static bool supportsMutationCommandType(MutationCommand::Type); + private: /// Rename map new_name -> old_name. std::vector rename_map; diff --git a/src/Storages/MergeTree/CMakeLists.txt b/src/Storages/MergeTree/CMakeLists.txt index 390835f17ae..e69de29bb2d 100644 --- a/src/Storages/MergeTree/CMakeLists.txt +++ b/src/Storages/MergeTree/CMakeLists.txt @@ -1,3 +0,0 @@ -if(ENABLE_EXAMPLES) - add_subdirectory(examples) -endif() diff --git a/src/Storages/MergeTree/ColumnSizeEstimator.h b/src/Storages/MergeTree/ColumnSizeEstimator.h index 597dc80e525..e512415ff05 100644 --- a/src/Storages/MergeTree/ColumnSizeEstimator.h +++ b/src/Storages/MergeTree/ColumnSizeEstimator.h @@ -1,7 +1,6 @@ #pragma once #include -#include namespace DB @@ -10,7 +9,7 @@ namespace DB /* Allow to compute more accurate progress statistics */ class ColumnSizeEstimator { - using ColumnToSize = MergeTreeDataPartInMemory::ColumnToSize; + using ColumnToSize = std::map; ColumnToSize map; public: diff --git a/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp b/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp index 000d36752cb..ff9941ee808 100644 --- a/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp +++ b/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp @@ -335,9 +335,7 @@ void DataPartStorageOnDiskBase::backup( const ReadSettings & read_settings, bool make_temporary_hard_links, BackupEntries & backup_entries, - TemporaryFilesOnDisks * temp_dirs, - bool is_projection_part, - bool allow_backup_broken_projection) const + TemporaryFilesOnDisks * temp_dirs) const { fs::path part_path_on_disk = fs::path{root_path} / part_dir; fs::path part_path_in_backup = fs::path{path_in_backup} / part_dir; @@ -379,7 +377,7 @@ void DataPartStorageOnDiskBase::backup( bool copy_encrypted = !backup_settings.decrypt_files_from_encrypted_disks; - auto backup_file = [&](const String & filepath) + for (const auto & filepath : files_to_backup) { auto filepath_on_disk = part_path_on_disk / filepath; auto filepath_in_backup = part_path_in_backup / filepath; @@ -387,10 +385,8 @@ void DataPartStorageOnDiskBase::backup( if (files_without_checksums.contains(filepath)) { backup_entries.emplace_back(filepath_in_backup, std::make_unique(disk, filepath_on_disk, read_settings, copy_encrypted)); - return; + continue; } - else if (is_projection_part && allow_backup_broken_projection && !disk->exists(filepath_on_disk)) - return; if (make_temporary_hard_links) { @@ -415,31 +411,6 @@ void DataPartStorageOnDiskBase::backup( backup_entry = wrapBackupEntryWith(std::move(backup_entry), temp_dir_owner); backup_entries.emplace_back(filepath_in_backup, std::move(backup_entry)); - }; - - auto * log = &Poco::Logger::get("DataPartStorageOnDiskBase::backup"); - - for (const auto & filepath : files_to_backup) - { - if (is_projection_part && allow_backup_broken_projection) - { - try - { - backup_file(filepath); - } - catch (Exception & e) - { - if (e.code() != ErrorCodes::FILE_DOESNT_EXIST) - throw; - - LOG_ERROR(log, "Cannot backup file {} of projection part {}. Will try to ignore it", filepath, part_dir); - continue; - } - } - else - { - backup_file(filepath); - } } } @@ -652,6 +623,15 @@ void DataPartStorageOnDiskBase::remove( } } + if (!disk->exists(from)) + { + LOG_ERROR(log, "Directory {} (part to remove) doesn't exist or one of nested files has gone. Most likely this is due to manual removing. This should be discouraged. Ignoring.", fullPath(disk, from)); + /// We will never touch this part again, so unlocking it from zero-copy + if (!can_remove_description) + can_remove_description.emplace(can_remove_callback()); + return; + } + try { disk->moveDirectory(from, to); @@ -701,7 +681,9 @@ void DataPartStorageOnDiskBase::remove( if (file_name.starts_with(proj_dir_name)) files_not_to_remove_for_projection.emplace(fs::path(file_name).filename()); - LOG_DEBUG(log, "Will not remove files [{}] for projection {}", fmt::join(files_not_to_remove_for_projection, ", "), projection.name); + if (!files_not_to_remove_for_projection.empty()) + LOG_DEBUG( + log, "Will not remove files [{}] for projection {}", fmt::join(files_not_to_remove_for_projection, ", "), projection.name); CanRemoveDescription proj_description { @@ -780,8 +762,12 @@ void DataPartStorageOnDiskBase::clearDirectory( /// Remove each expected file in directory, then remove directory itself. RemoveBatchRequest request; for (const auto & file : names_to_remove) - request.emplace_back(fs::path(dir) / file); + { + if (isGinFile(file) && (!disk->isFile(fs::path(dir) / file))) + continue; + request.emplace_back(fs::path(dir) / file); + } request.emplace_back(fs::path(dir) / "default_compression_codec.txt", true); request.emplace_back(fs::path(dir) / "delete-on-destroy.txt", true); request.emplace_back(fs::path(dir) / "txn_version.txt", true); diff --git a/src/Storages/MergeTree/DataPartStorageOnDiskBase.h b/src/Storages/MergeTree/DataPartStorageOnDiskBase.h index 75bf3d6f93c..52dc850c7fd 100644 --- a/src/Storages/MergeTree/DataPartStorageOnDiskBase.h +++ b/src/Storages/MergeTree/DataPartStorageOnDiskBase.h @@ -58,9 +58,7 @@ public: const ReadSettings & read_settings, bool make_temporary_hard_links, BackupEntries & backup_entries, - TemporaryFilesOnDisks * temp_dirs, - bool is_projection_part, - bool allow_backup_broken_projection) const override; + TemporaryFilesOnDisks * temp_dirs) const override; MutableDataPartStoragePtr freeze( const std::string & to, diff --git a/src/Storages/MergeTree/DataPartsExchange.cpp b/src/Storages/MergeTree/DataPartsExchange.cpp index ce70fbe18e5..9f95794cf50 100644 --- a/src/Storages/MergeTree/DataPartsExchange.cpp +++ b/src/Storages/MergeTree/DataPartsExchange.cpp @@ -10,7 +10,6 @@ #include #include #include -#include #include #include #include @@ -44,10 +43,8 @@ namespace ErrorCodes extern const int CANNOT_WRITE_TO_OSTREAM; extern const int CHECKSUM_DOESNT_MATCH; extern const int INSECURE_PATH; - extern const int CORRUPTED_DATA; extern const int LOGICAL_ERROR; extern const int S3_ERROR; - extern const int INCORRECT_PART_TYPE; extern const int ZERO_COPY_REPLICATION_ERROR; } @@ -191,8 +188,6 @@ void Service::processQuery(const HTMLForm & params, ReadBuffer & /*body*/, Write } if (data_settings->allow_remote_fs_zero_copy_replication && - /// In memory data part does not have metadata yet. - !isInMemoryPart(part) && client_protocol_version >= REPLICATION_PROTOCOL_VERSION_WITH_PARTS_ZERO_COPY) { auto disk_type = part->getDataPartStorage().getDiskType(); @@ -205,11 +200,7 @@ void Service::processQuery(const HTMLForm & params, ReadBuffer & /*body*/, Write } } - if (isInMemoryPart(part)) - sendPartFromMemory(part, out, send_projections); - else - sendPartFromDisk(part, out, client_protocol_version, false, send_projections); - + sendPartFromDisk(part, out, client_protocol_version, false, send_projections); data.addLastSentPart(part->info); } catch (const NetException &) @@ -231,36 +222,6 @@ void Service::processQuery(const HTMLForm & params, ReadBuffer & /*body*/, Write } } -void Service::sendPartFromMemory( - const MergeTreeData::DataPartPtr & part, WriteBuffer & out, bool send_projections) -{ - auto metadata_snapshot = data.getInMemoryMetadataPtr(); - if (send_projections) - { - for (const auto & [name, projection] : part->getProjectionParts()) - { - auto projection_sample_block = metadata_snapshot->projections.get(name).sample_block; - auto part_in_memory = asInMemoryPart(projection); - if (!part_in_memory) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Projection {} of part {} is not stored in memory", name, part->name); - - writeStringBinary(name, out); - projection->checksums.write(out); - NativeWriter block_out(out, 0, projection_sample_block); - block_out.write(part_in_memory->block); - } - } - - auto part_in_memory = asInMemoryPart(part); - if (!part_in_memory) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Part {} is not stored in memory", part->name); - - NativeWriter block_out(out, 0, metadata_snapshot->getSampleBlock()); - part->checksums.write(out); - block_out.write(part_in_memory->block); - - data.getSendsThrottler()->add(part_in_memory->block.bytes()); -} MergeTreeData::DataPart::Checksums Service::sendPartFromDisk( const MergeTreeData::DataPartPtr & part, @@ -526,14 +487,12 @@ std::pair Fetcher::fetchSelected creds.setPassword(password); } - std::unique_ptr in = std::make_unique( - uri, - Poco::Net::HTTPRequest::HTTP_POST, - nullptr, - creds, - DBMS_DEFAULT_BUFFER_SIZE, - 0, /* no redirects */ - context->getCommonFetchesSessionFactory()); + auto in = BuilderRWBufferFromHTTP(uri) + .withConnectionGroup(HTTPConnectionGroupType::HTTP) + .withMethod(Poco::Net::HTTPRequest::HTTP_POST) + .withTimeouts(timeouts) + .withDelayInit(false) + .create(creds); int server_protocol_version = parse(in->getResponseCookie("server_protocol_version", "0")); String remote_fs_metadata = parse(in->getResponseCookie("remote_fs_metadata", "")); @@ -557,11 +516,13 @@ std::pair Fetcher::fetchSelected if (server_protocol_version >= REPLICATION_PROTOCOL_VERSION_WITH_PARTS_SIZE) { readBinary(sum_files_size, *in); + if (server_protocol_version >= REPLICATION_PROTOCOL_VERSION_WITH_PARTS_SIZE_AND_TTL_INFOS) { IMergeTreeDataPart::TTLInfos ttl_infos; String ttl_infos_string; readBinary(ttl_infos_string, *in); + ReadBufferFromString ttl_infos_buffer(ttl_infos_string); assertString("ttl format version: 1\n", ttl_infos_buffer); ttl_infos.read(ttl_infos_buffer); @@ -609,6 +570,7 @@ std::pair Fetcher::fetchSelected } UInt64 revision = parse(in->getResponseCookie("disk_revision", "0")); + if (revision) disk->syncRevision(revision); @@ -641,8 +603,6 @@ std::pair Fetcher::fetchSelected remote_fs_metadata, fmt::join(capability, ", ")); if (server_protocol_version < REPLICATION_PROTOCOL_VERSION_WITH_PARTS_ZERO_COPY) throw Exception(ErrorCodes::LOGICAL_ERROR, "Got 'remote_fs_metadata' cookie with old protocol version {}", server_protocol_version); - if (part_type == PartType::InMemory) - throw Exception(ErrorCodes::INCORRECT_PART_TYPE, "Got 'remote_fs_metadata' cookie for in-memory part"); try { @@ -701,7 +661,7 @@ std::pair Fetcher::fetchSelected } auto storage_id = data.getStorageID(); - String new_part_path = part_type == PartType::InMemory ? "memory" : fs::path(data.getFullPathOnDisk(disk)) / part_name / ""; + String new_part_path = fs::path(data.getFullPathOnDisk(disk)) / part_name / ""; auto entry = data.getContext()->getReplicatedFetchList().insert( storage_id.getDatabaseName(), storage_id.getTableName(), part_info.partition_id, part_name, new_part_path, @@ -709,22 +669,6 @@ std::pair Fetcher::fetchSelected in->setNextCallback(ReplicatedFetchReadCallback(*entry)); - if (part_type == PartType::InMemory) - { - auto volume = std::make_shared("volume_" + part_name, disk, 0); - - auto data_part_storage = std::make_shared( - volume, - data.getRelativeDataPath(), - part_name); - - return std::make_pair(downloadPartToMemory( - data_part_storage, part_name, - MergeTreePartInfo::fromPartName(part_name, data.format_version), - part_uuid, metadata_snapshot, context, *in, - projections, false, throttler), std::move(temporary_directory_lock)); - } - auto output_buffer_getter = [](IDataPartStorage & part_storage, const String & file_name, size_t file_size) { return part_storage.writeFile(file_name, std::min(file_size, DBMS_DEFAULT_BUFFER_SIZE), {}); @@ -736,70 +680,11 @@ std::pair Fetcher::fetchSelected projections, throttler, sync),std::move(temporary_directory_lock)); } -MergeTreeData::MutableDataPartPtr Fetcher::downloadPartToMemory( - MutableDataPartStoragePtr data_part_storage, - const String & part_name, - const MergeTreePartInfo & part_info, - const UUID & part_uuid, - const StorageMetadataPtr & metadata_snapshot, - ContextPtr context, - PooledReadWriteBufferFromHTTP & in, - size_t projections, - bool is_projection, - ThrottlerPtr throttler) -{ - auto new_data_part = std::make_shared(data, part_name, part_info, data_part_storage); - - for (size_t i = 0; i < projections; ++i) - { - String projection_name; - readStringBinary(projection_name, in); - - MergeTreePartInfo new_part_info("all", 0, 0, 0); - auto projection_part_storage = data_part_storage->getProjection(projection_name + ".proj"); - - auto new_projection_part = downloadPartToMemory( - projection_part_storage, projection_name, - new_part_info, part_uuid, metadata_snapshot, - context, in, 0, true, throttler); - - new_data_part->addProjectionPart(projection_name, std::move(new_projection_part)); - } - - MergeTreeData::DataPart::Checksums checksums; - if (!checksums.read(in)) - throw Exception(ErrorCodes::CORRUPTED_DATA, "Cannot deserialize checksums"); - - NativeReader block_in(in, 0); - auto block = block_in.read(); - throttler->add(block.bytes()); - - new_data_part->setColumns(block.getNamesAndTypesList(), {}, metadata_snapshot->getMetadataVersion()); - - if (!is_projection) - { - new_data_part->version.setCreationTID(Tx::PrehistoricTID, nullptr); - new_data_part->uuid = part_uuid; - new_data_part->is_temp = true; - new_data_part->minmax_idx->update(block, data.getMinMaxColumnsNames(metadata_snapshot->getPartitionKey())); - new_data_part->partition.create(metadata_snapshot, block, 0, context); - } - - MergedBlockOutputStream part_out( - new_data_part, metadata_snapshot, block.getNamesAndTypesList(), {}, {}, - CompressionCodecFactory::instance().get("NONE", {}), NO_TRANSACTION_PTR); - - part_out.write(block); - part_out.finalizePart(new_data_part, false); - new_data_part->checksums.checkEqual(checksums, /* have_uncompressed = */ true); - - return new_data_part; -} void Fetcher::downloadBaseOrProjectionPartToDisk( const String & replica_path, const MutableDataPartStoragePtr & data_part_storage, - PooledReadWriteBufferFromHTTP & in, + ReadWriteBufferFromHTTP & in, OutputBufferGetter output_buffer_getter, MergeTreeData::DataPart::Checksums & checksums, ThrottlerPtr throttler, @@ -807,6 +692,8 @@ void Fetcher::downloadBaseOrProjectionPartToDisk( { size_t files; readBinary(files, in); + LOG_DEBUG(log, "Downloading files {}", files); + std::vector> written_files; @@ -872,7 +759,7 @@ MergeTreeData::MutableDataPartPtr Fetcher::downloadPartToDisk( const String & tmp_prefix, DiskPtr disk, bool to_remote_disk, - PooledReadWriteBufferFromHTTP & in, + ReadWriteBufferFromHTTP & in, OutputBufferGetter output_buffer_getter, size_t projections, ThrottlerPtr throttler, @@ -903,7 +790,7 @@ MergeTreeData::MutableDataPartPtr Fetcher::downloadPartToDisk( || part_name.empty() || std::string::npos != tmp_prefix.find_first_of("/.") || std::string::npos != part_name.find_first_of("/.")) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: tmp_prefix and part_name cannot be empty or contain '.' or '/' characters."); + throw Exception(ErrorCodes::LOGICAL_ERROR, "`tmp_prefix` and `part_name` cannot be empty or contain '.' or '/' characters."); auto part_dir = tmp_prefix + part_name; auto part_relative_path = data.getRelativeDataPath() + String(to_detached ? "detached/" : ""); diff --git a/src/Storages/MergeTree/DataPartsExchange.h b/src/Storages/MergeTree/DataPartsExchange.h index 8c15dc3cfdb..6d532037806 100644 --- a/src/Storages/MergeTree/DataPartsExchange.h +++ b/src/Storages/MergeTree/DataPartsExchange.h @@ -20,7 +20,7 @@ namespace DB { class StorageReplicatedMergeTree; -class PooledReadWriteBufferFromHTTP; +class ReadWriteBufferFromHTTP; namespace DataPartsExchange { @@ -40,10 +40,6 @@ public: private: MergeTreeData::DataPartPtr findPart(const String & name); - void sendPartFromMemory( - const MergeTreeData::DataPartPtr & part, - WriteBuffer & out, - bool send_projections); MergeTreeData::DataPart::Checksums sendPartFromDisk( const MergeTreeData::DataPartPtr & part, @@ -94,7 +90,7 @@ private: void downloadBaseOrProjectionPartToDisk( const String & replica_path, const MutableDataPartStoragePtr & data_part_storage, - PooledReadWriteBufferFromHTTP & in, + ReadWriteBufferFromHTTP & in, OutputBufferGetter output_buffer_getter, MergeTreeData::DataPart::Checksums & checksums, ThrottlerPtr throttler, @@ -107,31 +103,19 @@ private: const String & tmp_prefix_, DiskPtr disk, bool to_remote_disk, - PooledReadWriteBufferFromHTTP & in, + ReadWriteBufferFromHTTP & in, OutputBufferGetter output_buffer_getter, size_t projections, ThrottlerPtr throttler, bool sync); - MergeTreeData::MutableDataPartPtr downloadPartToMemory( - MutableDataPartStoragePtr data_part_storage, - const String & part_name, - const MergeTreePartInfo & part_info, - const UUID & part_uuid, - const StorageMetadataPtr & metadata_snapshot, - ContextPtr context, - PooledReadWriteBufferFromHTTP & in, - size_t projections, - bool is_projection, - ThrottlerPtr throttler); - MergeTreeData::MutableDataPartPtr downloadPartToDiskRemoteMeta( const String & part_name, const String & replica_path, bool to_detached, const String & tmp_prefix_, DiskPtr disk, - PooledReadWriteBufferFromHTTP & in, + ReadWriteBufferFromHTTP & in, size_t projections, MergeTreeData::DataPart::Checksums & checksums, ThrottlerPtr throttler); diff --git a/src/Storages/MergeTree/EphemeralLockInZooKeeper.cpp b/src/Storages/MergeTree/EphemeralLockInZooKeeper.cpp index 1ffb5177430..cbdeabffa97 100644 --- a/src/Storages/MergeTree/EphemeralLockInZooKeeper.cpp +++ b/src/Storages/MergeTree/EphemeralLockInZooKeeper.cpp @@ -17,7 +17,7 @@ EphemeralLockInZooKeeper::EphemeralLockInZooKeeper(const String & path_prefix_, : zookeeper(zookeeper_), path_prefix(path_prefix_), path(path_), conflict_path(conflict_path_) { if (conflict_path.empty() && path.size() <= path_prefix.size()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: name of the main node is shorter than prefix."); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Name of the main node is shorter than prefix."); } template @@ -179,7 +179,7 @@ EphemeralLocksInAllPartitions::EphemeralLocksInAllPartitions( size_t prefix_size = block_numbers_path.size() + 1 + partitions[i].size() + 1 + path_prefix.size(); const String & path = dynamic_cast(*lock_responses[i]).path_created; if (path.size() <= prefix_size) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: name of the sequential node is shorter than prefix."); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Name of the sequential node is shorter than prefix."); UInt64 number = parse(path.c_str() + prefix_size, path.size() - prefix_size); locks.push_back(LockInfo{path, partitions[i], number}); diff --git a/src/Storages/MergeTree/GinIndexStore.h b/src/Storages/MergeTree/GinIndexStore.h index 3ed624995e5..ad14a142318 100644 --- a/src/Storages/MergeTree/GinIndexStore.h +++ b/src/Storages/MergeTree/GinIndexStore.h @@ -300,4 +300,9 @@ private: std::mutex mutex; }; +inline bool isGinFile(const String &file_name) +{ + return (file_name.ends_with(".gin_dict") || file_name.ends_with(".gin_post") || file_name.ends_with(".gin_seg") || file_name.ends_with(".gin_sid")); +} + } diff --git a/src/Storages/MergeTree/IDataPartStorage.h b/src/Storages/MergeTree/IDataPartStorage.h index d06d9791a53..5899ef58cd5 100644 --- a/src/Storages/MergeTree/IDataPartStorage.h +++ b/src/Storages/MergeTree/IDataPartStorage.h @@ -223,9 +223,7 @@ public: const ReadSettings & read_settings, bool make_temporary_hard_links, BackupEntries & backup_entries, - TemporaryFilesOnDisks * temp_dirs, - bool is_projection_part, - bool allow_backup_broken_projection) const = 0; + TemporaryFilesOnDisks * temp_dirs) const = 0; /// Creates hardlinks into 'to/dir_path' for every file in data part. /// Callback is called after hardlinks are created, but before 'delete-on-destroy.txt' marker is removed. diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index 0f82e00edff..0245baa836c 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -51,7 +51,6 @@ namespace CurrentMetrics extern const Metric PartsWide; extern const Metric PartsCompact; - extern const Metric PartsInMemory; } namespace DB @@ -278,9 +277,6 @@ static void incrementTypeMetric(MergeTreeDataPartType type) case MergeTreeDataPartType::Compact: CurrentMetrics::add(CurrentMetrics::PartsCompact); return; - case MergeTreeDataPartType::InMemory: - CurrentMetrics::add(CurrentMetrics::PartsInMemory); - return; case MergeTreeDataPartType::Unknown: return; } @@ -296,9 +292,6 @@ static void decrementTypeMetric(MergeTreeDataPartType type) case MergeTreeDataPartType::Compact: CurrentMetrics::sub(CurrentMetrics::PartsCompact); return; - case MergeTreeDataPartType::InMemory: - CurrentMetrics::sub(CurrentMetrics::PartsInMemory); - return; case MergeTreeDataPartType::Unknown: return; } @@ -313,13 +306,13 @@ IMergeTreeDataPart::IMergeTreeDataPart( const IMergeTreeDataPart * parent_part_) : DataPartStorageHolder(data_part_storage_) , storage(storage_) - , mutable_name(name_) , name(mutable_name) , info(info_) , index_granularity_info(storage_, part_type_) , part_type(part_type_) , parent_part(parent_part_) , parent_part_name(parent_part ? parent_part->name : "") + , mutable_name(name_) { if (parent_part) { @@ -342,6 +335,27 @@ IMergeTreeDataPart::~IMergeTreeDataPart() decrementTypeMetric(part_type); } + +const IMergeTreeDataPart::Index & IMergeTreeDataPart::getIndex() const +{ + std::scoped_lock lock(index_mutex); + if (!index_loaded) + loadIndex(); + index_loaded = true; + return TSA_SUPPRESS_WARNING_FOR_READ(index); /// The variable is guaranteed to be unchanged after return. +} + + +void IMergeTreeDataPart::setIndex(Columns index_) +{ + std::scoped_lock lock(index_mutex); + if (!index.empty()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "The index of data part can be set only once"); + index = std::move(index_); + index_loaded = true; +} + + void IMergeTreeDataPart::setName(const String & new_name) { mutable_name = new_name; @@ -548,6 +562,7 @@ void IMergeTreeDataPart::removeIfNeeded() UInt64 IMergeTreeDataPart::getIndexSizeInBytes() const { + std::scoped_lock lock(index_mutex); UInt64 res = 0; for (const ColumnPtr & column : index) res += column->byteSize(); @@ -556,6 +571,7 @@ UInt64 IMergeTreeDataPart::getIndexSizeInBytes() const UInt64 IMergeTreeDataPart::getIndexSizeInAllocatedBytes() const { + std::scoped_lock lock(index_mutex); UInt64 res = 0; for (const ColumnPtr & column : index) res += column->allocatedBytes(); @@ -669,18 +685,20 @@ void IMergeTreeDataPart::loadColumnsChecksumsIndexes(bool require_columns_checks loadColumns(require_columns_checksums); loadChecksums(require_columns_checksums); loadIndexGranularity(); + + if (!storage.getSettings()->primary_key_lazy_load) + getIndex(); + calculateColumnsAndSecondaryIndicesSizesOnDisk(); - loadIndex(); /// Must be called after loadIndexGranularity as it uses the value of `index_granularity` loadRowsCount(); /// Must be called after loadIndexGranularity() as it uses the value of `index_granularity`. loadPartitionAndMinMaxIndex(); - bool has_broken_projections = false; if (!parent_part) { loadTTLInfos(); - loadProjections(require_columns_checksums, check_consistency, has_broken_projections, false /* if_not_loaded */); + loadProjections(require_columns_checksums, check_consistency, false /* if_not_loaded */); } - if (check_consistency && !has_broken_projections) + if (check_consistency) checkConsistency(require_columns_checksums); loadDefaultCompressionCodec(); @@ -745,7 +763,7 @@ void IMergeTreeDataPart::addProjectionPart( projection_parts[projection_name] = std::move(projection_part); } -void IMergeTreeDataPart::loadProjections(bool require_columns_checksums, bool check_consistency, bool & has_broken_projection, bool if_not_loaded) +void IMergeTreeDataPart::loadProjections(bool require_columns_checksums, bool check_consistency, bool if_not_loaded) { auto metadata_snapshot = storage.getInMemoryMetadataPtr(); for (const auto & projection : metadata_snapshot->projections) @@ -762,34 +780,10 @@ void IMergeTreeDataPart::loadProjections(bool require_columns_checksums, bool ch else { auto part = getProjectionPartBuilder(projection.name).withPartFormatFromDisk().build(); - - try - { - part->loadColumnsChecksumsIndexes(require_columns_checksums, check_consistency); - } - catch (...) - { - if (isRetryableException(std::current_exception())) - throw; - - auto message = getCurrentExceptionMessage(true); - LOG_ERROR(&Poco::Logger::get("IMergeTreeDataPart"), - "Cannot load projection {}, will consider it broken. Reason: {}", projection.name, message); - - has_broken_projection = true; - part->setBrokenReason(message, getCurrentExceptionCode()); - } - + part->loadColumnsChecksumsIndexes(require_columns_checksums, check_consistency); addProjectionPart(projection.name, std::move(part)); } } - else if (checksums.has(path)) - { - auto part = getProjectionPartBuilder(projection.name).withPartFormatFromDisk().build(); - part->setBrokenReason("Projection directory " + path + " does not exist while loading projections", ErrorCodes::NO_FILE_IN_DATA_PART); - addProjectionPart(projection.name, std::move(part)); - has_broken_projection = true; - } } } @@ -804,8 +798,11 @@ void IMergeTreeDataPart::appendFilesOfIndexGranularity(Strings & /* files */) co { } -void IMergeTreeDataPart::loadIndex() +void IMergeTreeDataPart::loadIndex() const { + /// Memory for index must not be accounted as memory usage for query, because it belongs to a table. + MemoryTrackerBlockerInThread temporarily_disable_memory_tracker; + /// It can be empty in case of mutations if (!index_granularity.isInitialized()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Index granularity is not loaded before index loading"); @@ -840,8 +837,30 @@ void IMergeTreeDataPart::loadIndex() for (size_t j = 0; j < key_size; ++j) key_serializations[j]->deserializeBinary(*loaded_index[j], *index_file, {}); + /// Cut useless suffix columns, if necessary. + Float64 ratio_to_drop_suffix_columns = storage.getSettings()->primary_key_ratio_of_unique_prefix_values_to_skip_suffix_columns; + if (key_size > 1 && ratio_to_drop_suffix_columns > 0 && ratio_to_drop_suffix_columns < 1) + { + chassert(marks_count > 0); + for (size_t j = 0; j < key_size - 1; ++j) + { + size_t num_changes = 0; + for (size_t i = 1; i < marks_count; ++i) + if (0 != loaded_index[j]->compareAt(i, i - 1, *loaded_index[j], 0)) + ++num_changes; + + if (static_cast(num_changes) / marks_count >= ratio_to_drop_suffix_columns) + { + key_size = j + 1; + loaded_index.resize(key_size); + break; + } + } + } + for (size_t i = 0; i < key_size; ++i) { + loaded_index[i]->shrinkToFit(); loaded_index[i]->protect(); if (loaded_index[i]->size() != marks_count) throw Exception(ErrorCodes::CANNOT_READ_ALL_DATA, "Cannot read all data from index file {}(expected size: " @@ -1184,8 +1203,7 @@ void IMergeTreeDataPart::loadChecksums(bool require) /// Check the data while we are at it. LOG_WARNING(storage.log, "Checksums for part {} not found. Will calculate them from data on disk.", name); - bool noop; - checksums = checkDataPart(shared_from_this(), false, noop, /* is_cancelled */[]{ return false; }, /* throw_on_broken_projection */false); + checksums = checkDataPart(shared_from_this(), false); writeChecksums(checksums, {}); bytes_on_disk = checksums.getTotalSizeOnDisk(); @@ -1447,6 +1465,11 @@ bool IMergeTreeDataPart::supportLightweightDeleteMutate() const parent_part == nullptr && projection_parts.empty(); } +bool IMergeTreeDataPart::hasLightweightDelete() const +{ + return columns.contains(RowExistsColumn::name); +} + void IMergeTreeDataPart::assertHasVersionMetadata(MergeTreeTransaction * txn) const { TransactionID expected_tid = txn ? txn->tid : Tx::PrehistoricTID; @@ -1633,10 +1656,6 @@ bool IMergeTreeDataPart::assertHasValidVersionMetadata() const size_t file_size = getDataPartStorage().getFileSize(TXN_VERSION_METADATA_FILE_NAME); auto buf = getDataPartStorage().readFile(TXN_VERSION_METADATA_FILE_NAME, ReadSettings().adjustBufferSize(file_size), file_size, std::nullopt); - /// FIXME https://github.com/ClickHouse/ClickHouse/issues/48465 - if (dynamic_cast(buf.get())) - return true; - readStringUntilEOF(content, *buf); ReadBufferFromString str_buf{content}; VersionMetadata file; @@ -2192,32 +2211,6 @@ std::optional IMergeTreeDataPart::getStreamNameForColumn( return getStreamNameOrHash(stream_name, extension, storage_); } -void IMergeTreeDataPart::markProjectionPartAsBroken(const String & projection_name, const String & message, int code) const -{ - auto it = projection_parts.find(projection_name); - if (it == projection_parts.end()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "There is no projection part '{}'", projection_name); - it->second->setBrokenReason(message, code); -} - -bool IMergeTreeDataPart::hasBrokenProjection(const String & projection_name) const -{ - auto it = projection_parts.find(projection_name); - if (it == projection_parts.end()) - return false; - return it->second->is_broken; -} - -void IMergeTreeDataPart::setBrokenReason(const String & message, int code) const -{ - std::lock_guard lock(broken_reason_mutex); - if (is_broken) - return; - is_broken = true; - exception = message; - exception_code = code; -} - bool isCompactPart(const MergeTreeDataPartPtr & data_part) { return (data_part && data_part->getType() == MergeTreeDataPartType::Compact); @@ -2228,11 +2221,6 @@ bool isWidePart(const MergeTreeDataPartPtr & data_part) return (data_part && data_part->getType() == MergeTreeDataPartType::Wide); } -bool isInMemoryPart(const MergeTreeDataPartPtr & data_part) -{ - return (data_part && data_part->getType() == MergeTreeDataPartType::InMemory); -} - std::optional getIndexExtensionFromFilesystem(const IDataPartStorage & data_part_storage) { if (data_part_storage.exists()) diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h index fcf9d5bd17d..aaae64a5970 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.h +++ b/src/Storages/MergeTree/IMergeTreeDataPart.h @@ -1,11 +1,12 @@ #pragma once +#include #include #include #include +#include #include #include -#include #include #include #include @@ -47,6 +48,8 @@ class MarkCache; class UncompressedCache; class MergeTreeTransaction; +struct MergeTreeReadTaskInfo; +using MergeTreeReadTaskInfoPtr = std::shared_ptr; enum class DataPartRemovalState { @@ -68,6 +71,7 @@ public: using Checksums = MergeTreeDataPartChecksums; using Checksum = MergeTreeDataPartChecksums::Checksum; using ValueSizeMap = std::map; + using VirtualFields = std::unordered_map; using MergeTreeReaderPtr = std::unique_ptr; using MergeTreeWriterPtr = std::unique_ptr; @@ -75,6 +79,7 @@ public: using ColumnSizeByName = std::unordered_map; using NameToNumber = std::unordered_map; + using Index = Columns; using IndexSizeByName = std::unordered_map; using Type = MergeTreeDataPartType; @@ -93,6 +98,7 @@ public: const NamesAndTypesList & columns_, const StorageSnapshotPtr & storage_snapshot, const MarkRanges & mark_ranges, + const VirtualFields & virtual_fields, UncompressedCache * uncompressed_cache, MarkCache * mark_cache, const AlterConversionsPtr & alter_conversions, @@ -212,10 +218,6 @@ public: const MergeTreeData & storage; -private: - String mutable_name; - mutable MergeTreeDataPartState state{MergeTreeDataPartState::Temporary}; - public: const String & name; // const ref to private mutable_name MergeTreePartInfo info; @@ -261,12 +263,6 @@ public: /// Frozen by ALTER TABLE ... FREEZE ... It is used for information purposes in system.parts table. mutable std::atomic is_frozen {false}; - /// If it is a projection part, it can be broken sometimes. - mutable std::atomic is_broken {false}; - mutable std::string exception; - mutable int exception_code = 0; - mutable std::mutex broken_reason_mutex; - /// Indicates that the part was marked Outdated by PartCheckThread because the part was not committed to ZooKeeper mutable bool is_unexpected_local_part = false; @@ -309,12 +305,6 @@ public: /// Throws an exception if state of the part is not in affordable_states void assertState(const std::initializer_list & affordable_states) const; - /// Primary key (correspond to primary.idx file). - /// Always loaded in RAM. Contains each index_granularity-th value of primary key tuple. - /// Note that marks (also correspond to primary key) is not always in RAM, but cached. See MarkCache.h. - using Index = Columns; - Index index; - MergeTreePartition partition; /// Amount of rows between marks @@ -369,6 +359,9 @@ public: /// Version of part metadata (columns, pk and so on). Managed properly only for replicated merge tree. int32_t metadata_version; + const Index & getIndex() const; + void setIndex(Columns index_); + /// For data in RAM ('index') UInt64 getIndexSizeInBytes() const; UInt64 getIndexSizeInAllocatedBytes() const; @@ -429,16 +422,9 @@ public: void addProjectionPart(const String & projection_name, std::shared_ptr && projection_part); - void markProjectionPartAsBroken(const String & projection_name, const String & message, int code) const; - bool hasProjection(const String & projection_name) const { return projection_parts.contains(projection_name); } - bool hasBrokenProjection(const String & projection_name) const; - - /// Return true, if all projections were loaded successfully and none was marked as broken. - void loadProjections(bool require_columns_checksums, bool check_consistency, bool & has_broken_projection, bool if_not_loaded = false); - - void setBrokenReason(const String & message, int code) const; + void loadProjections(bool require_columns_checksums, bool check_consistency, bool if_not_loaded = false); /// Return set of metadata file names without checksums. For example, /// columns.txt or checksums.txt itself. @@ -511,7 +497,7 @@ public: bool supportLightweightDeleteMutate() const; /// True if here is lightweight deleted mask file in part. - bool hasLightweightDelete() const { return columns.contains(LightweightDeleteDescription::FILTER_COLUMN.name); } + bool hasLightweightDelete() const; void writeChecksums(const MergeTreeDataPartChecksums & checksums_, const WriteSettings & settings); @@ -567,6 +553,12 @@ public: mutable std::atomic last_removal_attempt_time = 0; protected: + /// Primary key (correspond to primary.idx file). + /// Lazily loaded in RAM. Contains each index_granularity-th value of primary key tuple. + /// Note that marks (also correspond to primary key) are not always in RAM, but cached. See MarkCache.h. + mutable std::mutex index_mutex; + mutable Index index TSA_GUARDED_BY(index_mutex); + mutable bool index_loaded TSA_GUARDED_BY(index_mutex) = false; /// Total size of all columns, calculated once in calcuateColumnSizesOnDisk ColumnSize total_columns_size; @@ -592,7 +584,7 @@ protected: const IMergeTreeDataPart * parent_part; String parent_part_name; - mutable std::map> projection_parts; + std::map> projection_parts; mutable PartMetadataManagerPtr metadata_manager; @@ -623,6 +615,9 @@ protected: void initializeIndexGranularityInfo(); private: + String mutable_name; + mutable MergeTreeDataPartState state{MergeTreeDataPartState::Temporary}; + /// In compact parts order of columns is necessary NameToNumber column_name_to_position; @@ -660,8 +655,8 @@ private: virtual void appendFilesOfIndexGranularity(Strings & files) const; - /// Loads index file. - void loadIndex(); + /// Loads the index file. + void loadIndex() const TSA_REQUIRES(index_mutex); void appendFilesOfIndex(Strings & files) const; @@ -715,7 +710,6 @@ using MergeTreeMutableDataPartPtr = std::shared_ptr; bool isCompactPart(const MergeTreeDataPartPtr & data_part); bool isWidePart(const MergeTreeDataPartPtr & data_part); -bool isInMemoryPart(const MergeTreeDataPartPtr & data_part); inline String getIndexExtension(bool is_compressed_primary_key) { return is_compressed_primary_key ? ".cidx" : ".idx"; } std::optional getIndexExtensionFromFilesystem(const IDataPartStorage & data_part_storage); diff --git a/src/Storages/MergeTree/IMergeTreeDataPartInfoForReader.h b/src/Storages/MergeTree/IMergeTreeDataPartInfoForReader.h index 2cc73556f04..7807f3d8c25 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPartInfoForReader.h +++ b/src/Storages/MergeTree/IMergeTreeDataPartInfoForReader.h @@ -34,8 +34,6 @@ public: virtual bool isWidePart() const = 0; - virtual bool isInMemoryPart() const = 0; - virtual bool isProjectionPart() const = 0; virtual DataPartStoragePtr getDataPartStorage() const = 0; diff --git a/src/Storages/MergeTree/IMergeTreeReader.cpp b/src/Storages/MergeTree/IMergeTreeReader.cpp index 63ed8021f58..4936f1d33c6 100644 --- a/src/Storages/MergeTree/IMergeTreeReader.cpp +++ b/src/Storages/MergeTree/IMergeTreeReader.cpp @@ -1,7 +1,8 @@ #include +#include +#include +#include #include -#include -#include #include #include #include @@ -19,12 +20,13 @@ namespace namespace ErrorCodes { extern const int LOGICAL_ERROR; + extern const int NOT_IMPLEMENTED; } - IMergeTreeReader::IMergeTreeReader( MergeTreeDataPartInfoForReaderPtr data_part_info_for_read_, const NamesAndTypesList & columns_, + const VirtualFields & virtual_fields_, const StorageSnapshotPtr & storage_snapshot_, UncompressedCache * uncompressed_cache_, MarkCache * mark_cache_, @@ -47,6 +49,7 @@ IMergeTreeReader::IMergeTreeReader( , part_columns(data_part_info_for_read->isWidePart() ? data_part_info_for_read->getColumnsDescriptionWithCollectedNested() : data_part_info_for_read->getColumnsDescription()) + , virtual_fields(virtual_fields_) { columns_to_read.reserve(requested_columns.size()); serializations.reserve(requested_columns.size()); @@ -63,7 +66,49 @@ const IMergeTreeReader::ValueSizeMap & IMergeTreeReader::getAvgValueSizeHints() return avg_value_size_hints; } -void IMergeTreeReader::fillMissingColumns(Columns & res_columns, bool & should_evaluate_missing_defaults, size_t num_rows, size_t block_number) const +void IMergeTreeReader::fillVirtualColumns(Columns & columns, size_t rows) const +{ + chassert(columns.size() == requested_columns.size()); + + const auto * loaded_part_info = typeid_cast(data_part_info_for_read.get()); + if (!loaded_part_info) + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Filling of virtual columns is supported only for LoadedMergeTreeDataPartInfoForReader"); + + const auto & data_part = loaded_part_info->getDataPart(); + const auto & storage_columns = storage_snapshot->getMetadataForQuery()->getColumns(); + const auto & virtual_columns = storage_snapshot->virtual_columns; + + auto it = requested_columns.begin(); + for (size_t pos = 0; pos < columns.size(); ++pos, ++it) + { + if (columns[pos] || storage_columns.has(it->name)) + continue; + + auto virtual_column = virtual_columns->tryGet(it->name); + if (!virtual_column) + continue; + + if (!it->type->equals(*virtual_column->type)) + { + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Data type for virtual column {} mismatched. Requested type: {}, virtual column type: {}", + it->name, it->type->getName(), virtual_column->type->getName()); + } + + if (it->name == "_part_offset") + throw Exception(ErrorCodes::LOGICAL_ERROR, "Virtual column {} must be filled by range reader", it->name); + + Field field; + if (auto field_it = virtual_fields.find(it->name); field_it != virtual_fields.end()) + field = field_it->second; + else + field = getFieldForConstVirtualColumn(it->name, *data_part); + + columns[pos] = virtual_column->type->createColumnConst(rows, field)->convertToFullColumnIfConst(); + } +} + +void IMergeTreeReader::fillMissingColumns(Columns & res_columns, bool & should_evaluate_missing_defaults, size_t num_rows) const { try { @@ -72,7 +117,7 @@ void IMergeTreeReader::fillMissingColumns(Columns & res_columns, bool & should_e res_columns, num_rows, Nested::convertToSubcolumns(requested_columns), Nested::convertToSubcolumns(available_columns), - partially_read_columns, storage_snapshot->metadata, block_number); + partially_read_columns, storage_snapshot->metadata); should_evaluate_missing_defaults = std::any_of( res_columns.begin(), res_columns.end(), [](const auto & column) { return column == nullptr; }); diff --git a/src/Storages/MergeTree/IMergeTreeReader.h b/src/Storages/MergeTree/IMergeTreeReader.h index 997be064f28..a5b84eba241 100644 --- a/src/Storages/MergeTree/IMergeTreeReader.h +++ b/src/Storages/MergeTree/IMergeTreeReader.h @@ -9,8 +9,6 @@ namespace DB { -class IDataType; - /// Reads the data between pairs of marks in the same part. When reading consecutive ranges, avoids unnecessary seeks. /// When ranges are almost consecutive, seeks are fast because they are performed inside the buffer. /// Avoids loading the marks file if it is not needed (e.g. when reading the whole part). @@ -18,11 +16,13 @@ class IMergeTreeReader : private boost::noncopyable { public: using ValueSizeMap = std::map; + using VirtualFields = std::unordered_map; using DeserializeBinaryBulkStateMap = std::map; IMergeTreeReader( MergeTreeDataPartInfoForReaderPtr data_part_info_for_read_, const NamesAndTypesList & columns_, + const VirtualFields & virtual_fields_, const StorageSnapshotPtr & storage_snapshot_, UncompressedCache * uncompressed_cache_, MarkCache * mark_cache_, @@ -42,10 +42,13 @@ public: const ValueSizeMap & getAvgValueSizeHints() const; + /// Add virtual columns that are not present in the block. + void fillVirtualColumns(Columns & columns, size_t rows) const; + /// Add columns from ordered_names that are not present in the block. /// Missing columns are added in the order specified by ordered_names. /// num_rows is needed in case if all res_columns are nullptr. - void fillMissingColumns(Columns & res_columns, bool & should_evaluate_missing_defaults, size_t num_rows, size_t block_number = 0) const; + void fillMissingColumns(Columns & res_columns, bool & should_evaluate_missing_defaults, size_t num_rows) const; /// Evaluate defaulted columns if necessary. void evaluateMissingDefaults(Block additional_columns, Columns & res_columns) const; @@ -113,6 +116,9 @@ private: /// Actual columns description in part. const ColumnsDescription & part_columns; + + /// Fields of virtual columns that were filled in previous stages. + VirtualFields virtual_fields; }; } diff --git a/src/Storages/MergeTree/IntersectionsIndexes.h b/src/Storages/MergeTree/IntersectionsIndexes.h deleted file mode 100644 index d9445f446ce..00000000000 --- a/src/Storages/MergeTree/IntersectionsIndexes.h +++ /dev/null @@ -1,237 +0,0 @@ -#pragma once - -#include -#include - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int LOGICAL_ERROR; -} - -/// A boundary of a segment (left or right) -struct PartToRead -{ - PartBlockRange range; - struct PartAndProjectionNames - { - String part; - String projection; - bool operator<(const PartAndProjectionNames & rhs) const - { - if (part == rhs.part) - return projection < rhs.projection; - return part < rhs.part; - } - bool operator==(const PartAndProjectionNames & rhs) const - { - return part == rhs.part && projection == rhs.projection; - } - }; - - PartAndProjectionNames name; - - bool operator==(const PartToRead & rhs) const - { - return range == rhs.range && name == rhs.name; - } - - bool operator<(const PartToRead & rhs) const - { - /// We allow only consecutive non-intersecting ranges - const bool intersection = - (range.begin <= rhs.range.begin && rhs.range.begin < range.end) || - (rhs.range.begin <= range.begin && range.begin <= rhs.range.end); - if (intersection) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Got intersecting parts. First [{}, {}]. Second [{}, {}]", - range.begin, range.end, rhs.range.begin, rhs.range.end); - return range.begin < rhs.range.begin && range.end <= rhs.range.begin; - } -}; - -/// MergeTreeDataPart is described as a segment (min block and max block) -/// During request handling we have to know how many intersection -/// current part has with already saved parts in our state. -struct PartSegments -{ - enum class IntersectionResult - { - NO_INTERSECTION, - EXACTLY_ONE_INTERSECTION, - REJECT - }; - - void addPart(PartToRead part) { segments.insert(std::move(part)); } - - IntersectionResult getIntersectionResult(PartToRead part) - { - bool intersected_before = false; - for (const auto & segment: segments) - { - auto are_intersect = [](auto & x, auto & y) - { - /// <= is important here, because we are working with segments [a, b] - if ((x.begin <= y.begin) && (y.begin <= x.end)) - return true; - if ((y.begin <= x.begin) && (x.begin <= y.end)) - return true; - return false; - }; - - if (are_intersect(segment.range, part.range)) - { - /// We have two or possibly more intersections - if (intersected_before) - return IntersectionResult::REJECT; - - /// We have intersection with part with different name - /// or with different min or max block - /// It could happens if we have merged part on one replica - /// but not on another. - if (segment != part) - return IntersectionResult::REJECT; - - /// We allow only the intersection with the same part as we have - intersected_before = true; - } - } - - return intersected_before ? IntersectionResult::EXACTLY_ONE_INTERSECTION : IntersectionResult::NO_INTERSECTION; - } - - using OrderedSegments = std::set; - OrderedSegments segments; -}; - -/// This is used only in parallel reading from replicas -/// This struct is an ordered set of half intervals and it is responsible for -/// giving an inversion of that intervals (e.g. [a, b) => {[-inf, a), [b, +inf)}) -/// or giving an intersection of two sets of intervals -/// This is needed, because MarkRange is actually a half-opened interval -/// and during the query execution we receive some kind of request from every replica -/// to read some ranges from a specific part. -/// We have to avoid the situation, where some range is read twice. -/// This struct helps us to do it using only two operations (intersection and inversion) -/// over a set of half opened intervals. -struct HalfIntervals -{ - static HalfIntervals initializeWithEntireSpace() - { - auto left_inf = std::numeric_limits::min(); - auto right_inf = std::numeric_limits::max(); - return HalfIntervals{{{left_inf, right_inf}}}; - } - - static HalfIntervals initializeFromMarkRanges(MarkRanges ranges) - { - OrderedRanges new_intervals; - for (const auto & range : ranges) - new_intervals.insert(range); - - return HalfIntervals{std::move(new_intervals)}; - } - - MarkRanges convertToMarkRangesFinal() - { - MarkRanges result; - std::copy(intervals.begin(), intervals.end(), std::back_inserter(result)); - return result; - } - - HalfIntervals & intersect(const HalfIntervals & rhs) - { - /** - * first [ ) [ ) [ ) [ ) [ ) - * second [ ) [ ) [ ) [ ) - */ - OrderedRanges intersected; - - const auto & first_intervals = intervals; - auto first = first_intervals.begin(); - const auto & second_intervals = rhs.intervals; - auto second = second_intervals.begin(); - - while (first != first_intervals.end() && second != second_intervals.end()) - { - auto curr_intersection = MarkRange{ - std::max(second->begin, first->begin), - std::min(second->end, first->end) - }; - - /// Insert only if segments are intersect - if (curr_intersection.begin < curr_intersection.end) - intersected.insert(std::move(curr_intersection)); - - if (first->end <= second->end) - ++first; - else - ++second; - } - - std::swap(intersected, intervals); - - return *this; - } - - HalfIntervals & negate() - { - auto left_inf = std::numeric_limits::min(); - auto right_inf = std::numeric_limits::max(); - - if (intervals.empty()) - { - intervals.insert(MarkRange{left_inf, right_inf}); - return *this; - } - - OrderedRanges new_ranges; - - /// Possibly add (-inf; begin) - if (auto begin = intervals.begin()->begin; begin != left_inf) - new_ranges.insert(MarkRange{left_inf, begin}); - - auto prev = intervals.begin(); - for (auto it = std::next(intervals.begin()); it != intervals.end(); ++it) - { - if (prev->end != it->begin) - new_ranges.insert(MarkRange{prev->end, it->begin}); - prev = it; - } - - /// Try to add (end; +inf) - if (auto end = intervals.rbegin()->end; end != right_inf) - new_ranges.insert(MarkRange{end, right_inf}); - - std::swap(new_ranges, intervals); - - return *this; - } - - bool operator==(const HalfIntervals & rhs) const - { - return intervals == rhs.intervals; - } - - using OrderedRanges = std::set; - OrderedRanges intervals; -}; - - -[[ maybe_unused ]] static std::ostream & operator<< (std::ostream & out, const HalfIntervals & ranges) -{ - for (const auto & range: ranges.intervals) - out << fmt::format("({}, {}) ", range.begin, range.end); - return out; -} - -/// This is needed for tests where we don't need to modify objects -[[ maybe_unused ]] static HalfIntervals getIntersection(const HalfIntervals & first, const HalfIntervals & second) -{ - auto result = first; - result.intersect(second); - return result; -} - -} diff --git a/src/Storages/MergeTree/KeyCondition.cpp b/src/Storages/MergeTree/KeyCondition.cpp index d5922ae1bc2..2d57ea40c9c 100644 --- a/src/Storages/MergeTree/KeyCondition.cpp +++ b/src/Storages/MergeTree/KeyCondition.cpp @@ -565,7 +565,7 @@ static const ActionsDAG::Node & cloneASTWithInversionPushDown( if (const auto * column_const = typeid_cast(node.column.get())) /// Re-generate column name for constant. /// DAG form query (with enabled analyzer) uses suffixes for constants, like 1_UInt8. - /// DAG from PK does not use it. This is breakig match by column name sometimes. + /// DAG from PK does not use it. This breaks matching by column name sometimes. /// Ideally, we should not compare manes, but DAG subtrees instead. name = ASTLiteral(column_const->getDataColumn()[0]).getColumnName(); else diff --git a/src/Storages/MergeTree/LoadedMergeTreeDataPartInfoForReader.h b/src/Storages/MergeTree/LoadedMergeTreeDataPartInfoForReader.h index f1cb8b34ecf..f5111ccaacc 100644 --- a/src/Storages/MergeTree/LoadedMergeTreeDataPartInfoForReader.h +++ b/src/Storages/MergeTree/LoadedMergeTreeDataPartInfoForReader.h @@ -22,8 +22,6 @@ public: bool isWidePart() const override { return DB::isWidePart(data_part); } - bool isInMemoryPart() const override { return DB::isInMemoryPart(data_part); } - bool isProjectionPart() const override { return data_part->isProjectionPart(); } DataPartStoragePtr getDataPartStorage() const override { return data_part->getDataPartStoragePtr(); } diff --git a/src/Storages/MergeTree/MergeTask.cpp b/src/Storages/MergeTree/MergeTask.cpp index e6ae63da7e3..bc49a505d8b 100644 --- a/src/Storages/MergeTree/MergeTask.cpp +++ b/src/Storages/MergeTree/MergeTask.cpp @@ -8,7 +8,6 @@ #include #include #include -#include #include #include @@ -297,7 +296,7 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::prepare() switch (global_ctx->chosen_merge_algorithm) { - case MergeAlgorithm::Horizontal : + case MergeAlgorithm::Horizontal: { global_ctx->merging_columns = global_ctx->storage_columns; global_ctx->merging_column_names = global_ctx->all_column_names; @@ -305,12 +304,12 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::prepare() global_ctx->gathering_column_names.clear(); break; } - case MergeAlgorithm::Vertical : + case MergeAlgorithm::Vertical: { ctx->rows_sources_uncompressed_write_buf = ctx->tmp_disk->createRawStream(); ctx->rows_sources_write_buf = std::make_unique(*ctx->rows_sources_uncompressed_write_buf); - MergeTreeDataPartInMemory::ColumnToSize local_merged_column_to_size; + std::map local_merged_column_to_size; for (const MergeTreeData::DataPartPtr & part : global_ctx->future_part->parts) part->accumulateColumnSizes(local_merged_column_to_size); @@ -731,9 +730,8 @@ bool MergeTask::MergeProjectionsStage::mergeMinMaxIndexAndPrepareProjections() c MergeTreeData::DataPartsVector projection_parts; for (const auto & part : global_ctx->future_part->parts) { - auto actual_projection_parts = part->getProjectionParts(); - auto it = actual_projection_parts.find(projection.name); - if (it != actual_projection_parts.end() && !it->second->is_broken) + auto it = part->getProjectionParts().find(projection.name); + if (it != part->getProjectionParts().end()) projection_parts.push_back(it->second); } if (projection_parts.size() < global_ctx->future_part->parts.size()) @@ -1076,14 +1074,18 @@ void MergeTask::ExecuteAndFinalizeHorizontalPart::createMergedStream() if (global_ctx->deduplicate) { - /// We don't want to deduplicate by block number column - /// so if deduplicate_by_columns is empty, add all columns except _block_number - if (supportsBlockNumberColumn(global_ctx) && global_ctx->deduplicate_by_columns.empty()) + const auto & virtuals = *global_ctx->data->getVirtualsPtr(); + + /// We don't want to deduplicate by virtual persistent column. + /// If deduplicate_by_columns is empty, add all columns except virtuals. + if (global_ctx->deduplicate_by_columns.empty()) { - for (const auto & col : global_ctx->merging_column_names) + for (const auto & column_name : global_ctx->merging_column_names) { - if (col != BlockNumberColumn::name) - global_ctx->deduplicate_by_columns.emplace_back(col); + if (virtuals.tryGet(column_name, VirtualsKind::Persistent)) + continue; + + global_ctx->deduplicate_by_columns.emplace_back(column_name); } } diff --git a/src/Storages/MergeTree/MergeTask.h b/src/Storages/MergeTree/MergeTask.h index 7fb4797e482..1f50e55f8a0 100644 --- a/src/Storages/MergeTree/MergeTask.h +++ b/src/Storages/MergeTree/MergeTask.h @@ -15,7 +15,7 @@ #include -#include +#include #include #include #include diff --git a/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp b/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp index f5f0fa6f726..67f5e7a53e8 100644 --- a/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp +++ b/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -106,16 +107,14 @@ NameSet injectRequiredColumns( auto options = GetColumnsOptions(GetColumnsOptions::AllPhysical) .withExtendedObjects() - .withSystemColumns(); - - if (with_subcolumns) - options.withSubcolumns(); + .withVirtuals() + .withSubcolumns(with_subcolumns); for (size_t i = 0; i < columns.size(); ++i) { - /// We are going to fetch only physical columns and system columns + /// We are going to fetch physical columns and system columns first if (!storage_snapshot->tryGetColumn(options, columns[i])) - throw Exception(ErrorCodes::NO_SUCH_COLUMN_IN_TABLE, "There is no physical column or subcolumn {} in table", columns[i]); + throw Exception(ErrorCodes::NO_SUCH_COLUMN_IN_TABLE, "There is no column or subcolumn {} in table", columns[i]); have_at_least_one_physical_column |= injectRequiredColumnsRecursively( columns[i], storage_snapshot, alter_conversions, @@ -258,11 +257,10 @@ void MergeTreeBlockSizePredictor::update(const Block & sample_block, const Colum } -MergeTreeReadTask::Columns getReadTaskColumns( +MergeTreeReadTaskColumns getReadTaskColumns( const IMergeTreeDataPartInfoForReader & data_part_info_for_reader, const StorageSnapshotPtr & storage_snapshot, const Names & required_columns, - const Names & system_columns, const PrewhereInfoPtr & prewhere_info, const ExpressionActionsSettings & actions_settings, const MergeTreeReaderSettings & reader_settings, @@ -270,28 +268,30 @@ MergeTreeReadTask::Columns getReadTaskColumns( { Names column_to_read_after_prewhere = required_columns; - /// Read system columns such as lightweight delete mask "_row_exists" if it is persisted in the part - for (const auto & name : system_columns) - if (data_part_info_for_reader.getColumns().contains(name)) - column_to_read_after_prewhere.push_back(name); - /// Inject columns required for defaults evaluation injectRequiredColumns( data_part_info_for_reader, storage_snapshot, with_subcolumns, column_to_read_after_prewhere); - MergeTreeReadTask::Columns result; + MergeTreeReadTaskColumns result; auto options = GetColumnsOptions(GetColumnsOptions::All) .withExtendedObjects() - .withSystemColumns(); + .withVirtuals() + .withSubcolumns(with_subcolumns); - if (with_subcolumns) - options.withSubcolumns(); + static const NameSet columns_to_read_at_first_step = {"_part_offset"}; NameSet columns_from_previous_steps; auto add_step = [&](const PrewhereExprStep & step) { Names step_column_names; + if (columns_from_previous_steps.empty()) + { + for (const auto & required_column : required_columns) + if (columns_to_read_at_first_step.contains(required_column)) + step_column_names.push_back(required_column); + } + /// Computation results from previous steps might be used in the current step as well. In such a case these /// computed columns will be present in the current step inputs. They don't need to be read from the disk so /// exclude them from the list of columns to read. This filtering must be done before injecting required diff --git a/src/Storages/MergeTree/MergeTreeBlockReadUtils.h b/src/Storages/MergeTree/MergeTreeBlockReadUtils.h index 9417d47814a..b19c42c8db8 100644 --- a/src/Storages/MergeTree/MergeTreeBlockReadUtils.h +++ b/src/Storages/MergeTree/MergeTreeBlockReadUtils.h @@ -1,6 +1,5 @@ #pragma once -#include #include #include @@ -22,11 +21,10 @@ NameSet injectRequiredColumns( bool with_subcolumns, Names & columns); -MergeTreeReadTask::Columns getReadTaskColumns( +MergeTreeReadTaskColumns getReadTaskColumns( const IMergeTreeDataPartInfoForReader & data_part_info_for_reader, const StorageSnapshotPtr & storage_snapshot, const Names & required_columns, - const Names & system_columns, const PrewhereInfoPtr & prewhere_info, const ExpressionActionsSettings & actions_settings, const MergeTreeReaderSettings & reader_settings, diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index c8262914702..8569d61e263 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -67,12 +67,11 @@ #include #include #include -#include +#include #include #include #include #include -#include #include #include #include @@ -96,8 +95,6 @@ #include #include #include -#include -#include #include #include @@ -190,6 +187,7 @@ namespace ErrorCodes extern const int TOO_MANY_MUTATIONS; extern const int CANNOT_SCHEDULE_TASK; extern const int LIMIT_EXCEEDED; + extern const int CANNOT_FORGET_PARTITION; } static void checkSuspiciousIndices(const ASTFunction * index_function) @@ -345,7 +343,7 @@ MergeTreeData::MergeTreeData( const MergingParams & merging_params_, std::unique_ptr storage_settings_, bool require_part_metadata_, - bool attach, + LoadingStrictnessLevel mode, BrokenPartCallback broken_part_callback_) : IStorage(table_id_) , WithMutableContext(context_->getGlobalContext()) @@ -366,10 +364,12 @@ MergeTreeData::MergeTreeData( const auto settings = getSettings(); - allow_nullable_key = attach || settings->allow_nullable_key; + bool sanity_checks = mode <= LoadingStrictnessLevel::CREATE; + + allow_nullable_key = !sanity_checks || settings->allow_nullable_key; /// Check sanity of MergeTreeSettings. Only when table is created. - if (!attach) + if (sanity_checks) settings->sanityCheck(getContext()->getMergeMutateExecutor()->getMaxTasksCount()); if (!date_column_name.empty()) @@ -377,7 +377,7 @@ MergeTreeData::MergeTreeData( try { checkPartitionKeyAndInitMinMax(metadata_.partition_key); - setProperties(metadata_, metadata_, attach); + setProperties(metadata_, metadata_, !sanity_checks); if (minmax_idx_date_column_pos == -1) throw Exception(ErrorCodes::BAD_TYPE_OF_FIELD, "Could not find Date column"); } @@ -393,7 +393,7 @@ MergeTreeData::MergeTreeData( is_custom_partitioned = true; checkPartitionKeyAndInitMinMax(metadata_.partition_key); } - setProperties(metadata_, metadata_, attach); + setProperties(metadata_, metadata_, !sanity_checks); /// NOTE: using the same columns list as is read when performing actual merges. merging_params.check(metadata_); @@ -401,11 +401,11 @@ MergeTreeData::MergeTreeData( if (metadata_.sampling_key.definition_ast != nullptr) { /// This is for backward compatibility. - checkSampleExpression(metadata_, attach || settings->compatibility_allow_sampling_expression_not_in_primary_key, - settings->check_sample_column_is_correct && !attach); + checkSampleExpression(metadata_, !sanity_checks || settings->compatibility_allow_sampling_expression_not_in_primary_key, + settings->check_sample_column_is_correct && sanity_checks); } - checkColumnFilenamesForCollision(metadata_.getColumns(), *settings, !attach); + checkColumnFilenamesForCollision(metadata_.getColumns(), *settings, sanity_checks); checkTTLExpressions(metadata_, metadata_); String reason; @@ -429,6 +429,29 @@ MergeTreeData::MergeTreeData( }; } +VirtualColumnsDescription MergeTreeData::createVirtuals(const StorageInMemoryMetadata & metadata) +{ + VirtualColumnsDescription desc; + + desc.addEphemeral("_part", std::make_shared(std::make_shared()), "Name of part"); + desc.addEphemeral("_part_index", std::make_shared(), "Sequential index of the part in the query result"); + desc.addEphemeral("_part_uuid", std::make_shared(), "Unique part identifier (if enabled MergeTree setting assign_part_uuids)"); + desc.addEphemeral("_partition_id", std::make_shared(std::make_shared()), "Name of partition"); + desc.addEphemeral("_sample_factor", std::make_shared(), "Sample factor (from the query)"); + desc.addEphemeral("_part_offset", std::make_shared(), "Number of row in the part"); + + if (metadata.hasPartitionKey()) + { + auto partition_types = metadata.partition_key.sample_block.getDataTypes(); + desc.addEphemeral("_partition_value", std::make_shared(std::move(partition_types)), "Value (a tuple) of a PARTITION BY expression"); + } + + desc.addPersistent(RowExistsColumn::name, RowExistsColumn::type, nullptr, "Persisted mask created by lightweight delete that show whether row exists or is deleted"); + desc.addPersistent(BlockNumberColumn::name, BlockNumberColumn::type, BlockNumberColumn::codec, "Persisted original number of block that was assigned at insert"); + + return desc; +} + StoragePolicyPtr MergeTreeData::getStoragePolicy() const { auto settings = getSettings(); @@ -676,6 +699,7 @@ void MergeTreeData::setProperties( { checkProperties(new_metadata, old_metadata, attach, false, allow_nullable_key, local_context); setInMemoryMetadata(new_metadata); + setVirtuals(createVirtuals(new_metadata)); } namespace @@ -869,7 +893,7 @@ void MergeTreeData::MergingParams::check(const StorageInMemoryMetadata & metadat if (is_optional) return; - throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: Sign column for storage {} is empty", storage); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Sign column for storage {} is empty", storage); } bool miss_column = true; @@ -896,7 +920,7 @@ void MergeTreeData::MergingParams::check(const StorageInMemoryMetadata & metadat if (is_optional) return; - throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: Version column for storage {} is empty", storage); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Version column for storage {} is empty", storage); } bool miss_column = true; @@ -925,12 +949,12 @@ void MergeTreeData::MergingParams::check(const StorageInMemoryMetadata & metadat if (is_optional) return; - throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: is_deleted ({}) column for storage {} is empty", is_deleted_column, storage); + throw Exception(ErrorCodes::LOGICAL_ERROR, "`is_deleted` ({}) column for storage {} is empty", is_deleted_column, storage); } else { if (version_column.empty() && !is_optional) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: Version column ({}) for storage {} is empty while is_deleted ({}) is not.", + throw Exception(ErrorCodes::LOGICAL_ERROR, "Version column ({}) for storage {} is empty while is_deleted ({}) is not.", version_column, storage, is_deleted_column); bool miss_is_deleted_column = true; @@ -1001,73 +1025,38 @@ void MergeTreeData::MergingParams::check(const StorageInMemoryMetadata & metadat /// TODO Checks for Graphite mode. } +const Names MergeTreeData::virtuals_useful_for_filter = {"_part", "_partition_id", "_part_uuid", "_partition_value"}; -DataTypePtr MergeTreeData::getPartitionValueType() const +Block MergeTreeData::getHeaderWithVirtualsForFilter() const { - DataTypePtr partition_value_type; - auto partition_types = getInMemoryMetadataPtr()->partition_key.sample_block.getDataTypes(); - if (partition_types.empty()) - partition_value_type = std::make_shared(); - else - partition_value_type = std::make_shared(std::move(partition_types)); - return partition_value_type; + Block header; + auto virtuals_desc = getVirtualsPtr(); + for (const auto & name : virtuals_useful_for_filter) + if (auto column = virtuals_desc->tryGet(name)) + header.insert({column->type->createColumn(), column->type, name}); + return header; } - -Block MergeTreeData::getSampleBlockWithVirtualColumns() const +Block MergeTreeData::getBlockWithVirtualsForFilter(const MergeTreeData::DataPartsVector & parts, bool ignore_empty) const { - DataTypePtr partition_value_type = getPartitionValueType(); - return { - ColumnWithTypeAndName( - DataTypeLowCardinality{std::make_shared()}.createColumn(), - std::make_shared(std::make_shared()), - "_part"), - ColumnWithTypeAndName( - DataTypeLowCardinality{std::make_shared()}.createColumn(), - std::make_shared(std::make_shared()), - "_partition_id"), - ColumnWithTypeAndName(ColumnUUID::create(), std::make_shared(), "_part_uuid"), - ColumnWithTypeAndName(partition_value_type->createColumn(), partition_value_type, "_partition_value")}; -} + auto block = getHeaderWithVirtualsForFilter(); - -Block MergeTreeData::getBlockWithVirtualPartColumns(const MergeTreeData::DataPartsVector & parts, bool one_part, bool ignore_empty) const -{ - auto block = getSampleBlockWithVirtualColumns(); - MutableColumns columns = block.mutateColumns(); - - auto & part_column = columns[0]; - auto & partition_id_column = columns[1]; - auto & part_uuid_column = columns[2]; - auto & partition_value_column = columns[3]; - - bool has_partition_value = typeid_cast(partition_value_column.get()); for (const auto & part_or_projection : parts) { if (ignore_empty && part_or_projection->isEmpty()) continue; - const auto * part = part_or_projection->isProjectionPart() ? part_or_projection->getParentPart() : part_or_projection.get(); - part_column->insert(part->name); - partition_id_column->insert(part->info.partition_id); - part_uuid_column->insert(part->uuid); - Tuple tuple(part->partition.value.begin(), part->partition.value.end()); - if (has_partition_value) - partition_value_column->insert(tuple); - if (one_part) + const auto * part = part_or_projection->isProjectionPart() + ? part_or_projection->getParentPart() + : part_or_projection.get(); + + for (auto & column : block) { - part_column = ColumnConst::create(std::move(part_column), 1); - partition_id_column = ColumnConst::create(std::move(partition_id_column), 1); - part_uuid_column = ColumnConst::create(std::move(part_uuid_column), 1); - if (has_partition_value) - partition_value_column = ColumnConst::create(std::move(partition_value_column), 1); - break; + auto field = getFieldForConstVirtualColumn(column.name, *part); + column.column->assumeMutableRef().insert(field); } } - block.setColumns(std::move(columns)); - if (!has_partition_value) - block.erase("_partition_value"); return block; } @@ -1076,13 +1065,16 @@ std::optional MergeTreeData::totalRowsByPartitionPredicateImpl( const ActionsDAGPtr & filter_actions_dag, ContextPtr local_context, const DataPartsVector & parts) const { if (parts.empty()) - return 0u; + return 0; + auto metadata_snapshot = getInMemoryMetadataPtr(); - Block virtual_columns_block = getBlockWithVirtualPartColumns(parts, true /* one_part */); + auto virtual_columns_block = getBlockWithVirtualsForFilter({parts[0]}); auto filter_dag = VirtualColumnUtils::splitFilterDagForAllowedInputs(filter_actions_dag->getOutputs().at(0), nullptr); + if (!filter_dag) + return {}; - // Generate valid expressions for filtering + /// Generate valid expressions for filtering bool valid = true; for (const auto * input : filter_dag->getInputs()) if (!virtual_columns_block.has(input->result_name)) @@ -1095,7 +1087,7 @@ std::optional MergeTreeData::totalRowsByPartitionPredicateImpl( std::unordered_set part_values; if (valid) { - virtual_columns_block = getBlockWithVirtualPartColumns(parts, false /* one_part */); + virtual_columns_block = getBlockWithVirtualsForFilter(parts); VirtualColumnUtils::filterBlockWithDAG(filter_dag, virtual_columns_block, local_context); part_values = VirtualColumnUtils::extractSingleValueFromBlock(virtual_columns_block, "_part"); if (part_values.empty()) @@ -1716,8 +1708,7 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks, std::optionalname(), "tmp") || it->name() == MergeTreeData::FORMAT_VERSION_FILE_NAME - || it->name() == MergeTreeData::DETACHED_DIR_NAME - || startsWith(it->name(), MergeTreeWriteAheadLog::WAL_FILE_NAME)) + || it->name() == MergeTreeData::DETACHED_DIR_NAME) continue; if (auto part_info = MergeTreePartInfo::tryParsePartName(it->name(), format_version)) @@ -2270,7 +2261,6 @@ MergeTreeData::DataPartsVector MergeTreeData::grabOldParts(bool force) bool reached_removal_time = part_remove_time <= time_now && time_now - part_remove_time >= getSettings()->old_parts_lifetime.totalSeconds(); if ((reached_removal_time && !has_skipped_mutation_parent(part)) || force - || isInMemoryPart(part) /// Remove in-memory parts immediately to not store excessive data in RAM || (part->version.creation_csn == Tx::RolledBackCSN && getSettings()->remove_rolled_back_parts_immediately)) { part->removal_state.store(DataPartRemovalState::REMOVED, std::memory_order_relaxed); @@ -3657,6 +3647,7 @@ void MergeTreeData::checkPartDynamicColumns(MutableDataPartPtr & part, DataParts { auto metadata_snapshot = getInMemoryMetadataPtr(); const auto & columns = metadata_snapshot->getColumns(); + auto virtuals = getVirtualsPtr(); if (!hasDynamicSubcolumns(columns)) return; @@ -3664,7 +3655,7 @@ void MergeTreeData::checkPartDynamicColumns(MutableDataPartPtr & part, DataParts const auto & part_columns = part->getColumns(); for (const auto & part_column : part_columns) { - if (part_column.name == LightweightDeleteDescription::FILTER_COLUMN.name || part_column.name == BlockNumberColumn::name) + if (virtuals->has(part_column.name)) continue; auto storage_column = columns.getPhysical(part_column.name); @@ -4888,7 +4879,16 @@ void MergeTreeData::checkAlterPartitionIsPossible( throw DB::Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Only support DROP/DETACH PARTITION ALL currently"); } else - getPartitionIDFromQuery(command.partition, local_context); + { + String partition_id = getPartitionIDFromQuery(command.partition, local_context); + if (command.type == PartitionCommand::FORGET_PARTITION) + { + DataPartsLock lock = lockParts(); + auto parts_in_partition = getDataPartsPartitionRange(partition_id); + if (!parts_in_partition.empty()) + throw Exception(ErrorCodes::CANNOT_FORGET_PARTITION, "Partition {} is not empty", partition_id); + } + } } } } @@ -5109,6 +5109,11 @@ void MergeTreeData::fetchPartition( throw Exception(ErrorCodes::NOT_IMPLEMENTED, "FETCH PARTITION is not supported by storage {}", getName()); } +void MergeTreeData::forgetPartition(const ASTPtr & /*partition*/, ContextPtr /*query_context*/) +{ + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "FORGET PARTITION is not supported by storage {}", getName()); +} + Pipe MergeTreeData::alterPartition( const StorageMetadataPtr & metadata_snapshot, const PartitionCommands & commands, @@ -5145,6 +5150,10 @@ Pipe MergeTreeData::alterPartition( dropDetached(command.partition, command.part, query_context); break; + case PartitionCommand::FORGET_PARTITION: + forgetPartition(command.partition, query_context); + break; + case PartitionCommand::ATTACH_PARTITION: current_command_results = attachPartition(command.partition, metadata_snapshot, command.part, query_context); break; @@ -5215,14 +5224,14 @@ Pipe MergeTreeData::alterPartition( case PartitionCommand::FREEZE_PARTITION: { auto lock = lockForShare(query_context->getCurrentQueryId(), query_context->getSettingsRef().lock_acquire_timeout); - current_command_results = freezePartition(command.partition, metadata_snapshot, command.with_name, query_context, lock); + current_command_results = freezePartition(command.partition, command.with_name, query_context, lock); } break; case PartitionCommand::FREEZE_ALL_PARTITIONS: { auto lock = lockForShare(query_context->getCurrentQueryId(), query_context->getSettingsRef().lock_acquire_timeout); - current_command_results = freezeAll(command.with_name, metadata_snapshot, query_context, lock); + current_command_results = freezeAll(command.with_name, query_context, lock); } break; @@ -5292,7 +5301,7 @@ MergeTreeData::PartsBackupEntries MergeTreeData::backupParts( if (hold_table_lock && !table_lock) table_lock = lockForShare(local_context->getCurrentQueryId(), local_context->getSettingsRef().lock_acquire_timeout); - if (backup_settings.check_projection_parts) + if (backup_settings.check_parts) part->checkConsistencyWithProjections(/* require_part_metadata= */ true); BackupEntries backup_entries_from_part; @@ -5304,8 +5313,7 @@ MergeTreeData::PartsBackupEntries MergeTreeData::backupParts( read_settings, make_temporary_hard_links, backup_entries_from_part, - &temp_dirs, - false, false); + &temp_dirs); auto projection_parts = part->getProjectionParts(); for (const auto & [projection_name, projection_part] : projection_parts) @@ -5318,9 +5326,7 @@ MergeTreeData::PartsBackupEntries MergeTreeData::backupParts( read_settings, make_temporary_hard_links, backup_entries_from_part, - &temp_dirs, - projection_part->is_broken, - backup_settings.allow_backup_broken_projections); + &temp_dirs); } if (hold_storage_and_part_ptrs) @@ -6653,14 +6659,6 @@ Block MergeTreeData::getMinMaxCountProjectionBlock( const auto & primary_key_max_column_name = metadata_snapshot->minmax_count_projection->primary_key_max_column_name; NameSet required_columns_set(required_columns.begin(), required_columns.end()); - if (required_columns_set.contains("_partition_value") && !typeid_cast(getPartitionValueType().get())) - { - throw Exception( - ErrorCodes::NO_SUCH_COLUMN_IN_TABLE, - "Missing column `_partition_value` because there is no partition column in table {}", - getStorageID().getTableName()); - } - if (!primary_key_max_column_name.empty()) need_primary_key_max_column = required_columns_set.contains(primary_key_max_column_name); @@ -6686,11 +6684,11 @@ Block MergeTreeData::getMinMaxCountProjectionBlock( }; Block virtual_columns_block; - auto virtual_block = getSampleBlockWithVirtualColumns(); + auto virtual_block = getHeaderWithVirtualsForFilter(); bool has_virtual_column = std::any_of(required_columns.begin(), required_columns.end(), [&](const auto & name) { return virtual_block.has(name); }); if (has_virtual_column || filter_dag) { - virtual_columns_block = getBlockWithVirtualPartColumns(parts, false /* one_part */, true /* ignore_empty */); + virtual_columns_block = getBlockWithVirtualsForFilter(parts, /*ignore_empty=*/ true); if (virtual_columns_block.rows() == 0) return {}; } @@ -6824,7 +6822,7 @@ Block MergeTreeData::getMinMaxCountProjectionBlock( { for (const auto & part : real_parts) { - const auto & primary_key_column = *part->index[0]; + const auto & primary_key_column = *part->getIndex()[0]; auto & min_column = assert_cast(*partition_minmax_count_columns[pos]); insert(min_column, primary_key_column[0]); } @@ -6835,7 +6833,7 @@ Block MergeTreeData::getMinMaxCountProjectionBlock( { for (const auto & part : real_parts) { - const auto & primary_key_column = *part->index[0]; + const auto & primary_key_column = *part->getIndex()[0]; auto & max_column = assert_cast(*partition_minmax_count_columns[pos]); insert(max_column, primary_key_column[primary_key_column.size() - 1]); } @@ -6919,10 +6917,7 @@ QueryProcessingStage::Enum MergeTreeData::getQueryProcessingStage( UInt64 MergeTreeData::estimateNumberOfRowsToRead( - ContextPtr query_context, - const StorageSnapshotPtr & storage_snapshot, - const SelectQueryInfo & query_info, - const ActionDAGNodes & added_filter_nodes) const + ContextPtr query_context, const StorageSnapshotPtr & storage_snapshot, const SelectQueryInfo & query_info) const { const auto & snapshot_data = assert_cast(*storage_snapshot->data); const auto & parts = snapshot_data.parts; @@ -6930,11 +6925,9 @@ UInt64 MergeTreeData::estimateNumberOfRowsToRead( MergeTreeDataSelectExecutor reader(*this); auto result_ptr = reader.estimateNumMarksToRead( parts, - query_info.prewhere_info, storage_snapshot->getMetadataForQuery()->getColumns().getAll().getNames(), storage_snapshot->metadata, query_info, - added_filter_nodes, query_context, query_context->getSettingsRef().max_threads); @@ -7109,27 +7102,6 @@ std::pair MergeTreeData::cloneAn scope_guard src_flushed_tmp_dir_lock; MergeTreeData::MutableDataPartPtr src_flushed_tmp_part; - /// If source part is in memory, flush it to disk and clone it already in on-disk format - /// Protect tmp dir from removing by cleanup thread with src_flushed_tmp_dir_lock - /// Construct src_flushed_tmp_part in order to delete part with its directory at destructor - if (auto src_part_in_memory = asInMemoryPart(src_part)) - { - auto flushed_part_path = *src_part_in_memory->getRelativePathForPrefix(tmp_part_prefix); - - auto tmp_src_part_file_name = fs::path(tmp_dst_part_name).filename(); - src_flushed_tmp_dir_lock = src_part->storage.getTemporaryPartDirectoryHolder(tmp_src_part_file_name); - - auto flushed_part_storage = src_part_in_memory->flushToDisk(flushed_part_path, metadata_snapshot); - - src_flushed_tmp_part = MergeTreeDataPartBuilder(*this, src_part->name, flushed_part_storage) - .withPartInfo(src_part->info) - .withPartFormatFromDisk() - .build(); - - src_flushed_tmp_part->is_temp = true; - src_part_storage = flushed_part_storage; - } - String with_copy; if (params.copy_instead_of_hardlink) with_copy = " (copying data)"; @@ -7311,26 +7283,23 @@ MergeTreeData::MatcherFn MergeTreeData::getPartitionMatcher(const ASTPtr & parti PartitionCommandsResultInfo MergeTreeData::freezePartition( const ASTPtr & partition_ast, - const StorageMetadataPtr & metadata_snapshot, const String & with_name, ContextPtr local_context, TableLockHolder &) { - return freezePartitionsByMatcher(getPartitionMatcher(partition_ast, local_context), metadata_snapshot, with_name, local_context); + return freezePartitionsByMatcher(getPartitionMatcher(partition_ast, local_context), with_name, local_context); } PartitionCommandsResultInfo MergeTreeData::freezeAll( const String & with_name, - const StorageMetadataPtr & metadata_snapshot, ContextPtr local_context, TableLockHolder &) { - return freezePartitionsByMatcher([] (const String &) { return true; }, metadata_snapshot, with_name, local_context); + return freezePartitionsByMatcher([] (const String &) { return true; }, with_name, local_context); } PartitionCommandsResultInfo MergeTreeData::freezePartitionsByMatcher( MatcherFn matcher, - const StorageMetadataPtr & metadata_snapshot, const String & with_name, ContextPtr local_context) { @@ -7382,22 +7351,6 @@ PartitionCommandsResultInfo MergeTreeData::freezePartitionsByMatcher( scope_guard src_flushed_tmp_dir_lock; MergeTreeData::MutableDataPartPtr src_flushed_tmp_part; - if (auto part_in_memory = asInMemoryPart(part)) - { - auto flushed_part_path = *part_in_memory->getRelativePathForPrefix("tmp_freeze"); - src_flushed_tmp_dir_lock = part->storage.getTemporaryPartDirectoryHolder("tmp_freeze" + part->name); - - auto flushed_part_storage = part_in_memory->flushToDisk(flushed_part_path, metadata_snapshot); - - src_flushed_tmp_part = MergeTreeDataPartBuilder(*this, part->name, flushed_part_storage) - .withPartInfo(part->info) - .withPartFormatFromDisk() - .build(); - - src_flushed_tmp_part->is_temp = true; - data_part_storage = flushed_part_storage; - } - auto callback = [this, &part, &backup_part_path](const DiskPtr & disk) { // Store metadata for replicated table. @@ -7814,39 +7767,21 @@ MovePartsOutcome MergeTreeData::moveParts(const CurrentlyMovingPartsTaggerPtr & bool MergeTreeData::partsContainSameProjections(const DataPartPtr & left, const DataPartPtr & right, String & out_reason) { - auto remove_broken_parts_from_consideration = [](auto & parts) - { - std::set broken_projection_parts; - for (const auto & [name, part] : parts) - { - if (part->is_broken) - broken_projection_parts.emplace(name); - } - for (const auto & name : broken_projection_parts) - parts.erase(name); - }; - - auto left_projection_parts = left->getProjectionParts(); - auto right_projection_parts = right->getProjectionParts(); - - remove_broken_parts_from_consideration(left_projection_parts); - remove_broken_parts_from_consideration(right_projection_parts); - - if (left_projection_parts.size() != right_projection_parts.size()) + if (left->getProjectionParts().size() != right->getProjectionParts().size()) { out_reason = fmt::format( "Parts have different number of projections: {} in part '{}' and {} in part '{}'", - left_projection_parts.size(), + left->getProjectionParts().size(), left->name, - right_projection_parts.size(), + right->getProjectionParts().size(), right->name ); return false; } - for (const auto & [name, _] : left_projection_parts) + for (const auto & [name, _] : left->getProjectionParts()) { - if (!right_projection_parts.contains(name)) + if (!right->hasProjection(name)) { out_reason = fmt::format( "The part '{}' doesn't have projection '{}' while part '{}' does", right->name, name, left->name @@ -7950,31 +7885,15 @@ bool MergeTreeData::canUsePolymorphicParts(const MergeTreeSettings & settings, S AlterConversionsPtr MergeTreeData::getAlterConversionsForPart(MergeTreeDataPartPtr part) const { - auto commands_map = getAlterMutationCommandsForPart(part); + auto commands = getAlterMutationCommandsForPart(part); auto result = std::make_shared(); - for (const auto & [_, commands] : commands_map) - for (const auto & command : commands) - result->addMutationCommand(command); + for (const auto & command : commands | std::views::reverse) + result->addMutationCommand(command); return result; } -NamesAndTypesList MergeTreeData::getVirtuals() const -{ - return NamesAndTypesList{ - NameAndTypePair("_part", std::make_shared(std::make_shared())), - NameAndTypePair("_part_index", std::make_shared()), - NameAndTypePair("_part_uuid", std::make_shared()), - NameAndTypePair("_partition_id", std::make_shared(std::make_shared())), - NameAndTypePair("_partition_value", getPartitionValueType()), - NameAndTypePair("_sample_factor", std::make_shared()), - NameAndTypePair("_part_offset", std::make_shared()), - LightweightDeleteDescription::FILTER_COLUMN, - NameAndTypePair(BlockNumberColumn::name, BlockNumberColumn::type), - }; -} - size_t MergeTreeData::getTotalMergesWithTTLInMergeList() const { return getContext()->getMergeList().getMergesWithTTLCount(); diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index 4ad440dae00..c864fce4659 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -23,9 +23,7 @@ #include #include #include -#include #include -#include #include #include #include @@ -35,8 +33,8 @@ #include #include #include -#include - +#include +#include #include #include @@ -391,7 +389,7 @@ public: const MergingParams & merging_params_, std::unique_ptr settings_, bool require_part_metadata_, - bool attach, + LoadingStrictnessLevel mode, BrokenPartCallback broken_part_callback_ = [](const String &){}); /// Build a block of minmax and count values of a MergeTree table. These values are extracted @@ -444,8 +442,6 @@ public: bool supportsTrivialCountOptimization() const override { return !hasLightweightDeletedMask(); } - NamesAndTypesList getVirtuals() const override; - /// Snapshot for MergeTree contains the current set of data parts /// at the moment of the start of query. struct SnapshotData : public StorageSnapshot::Data @@ -468,13 +464,8 @@ public: struct ProjectionPartsVector { - DataPartsVector data_parts; - DataPartsVector projection_parts; - DataPartStateVector projection_parts_states; - - DataPartsVector broken_projection_parts; - DataPartStateVector broken_projection_parts_states; + DataPartsVector data_parts; }; /// Returns a copy of the list so that the caller shouldn't worry about locks. @@ -489,7 +480,7 @@ public: const DataPartStates & affordable_states, DataPartStateVector * out_states = nullptr) const; /// Same as above but only returns projection parts ProjectionPartsVector getProjectionPartsVectorForInternalUsage( - const DataPartStates & affordable_states, MergeTreeData::DataPartStateVector * out_states) const; + const DataPartStates & affordable_states, DataPartStateVector * out_states = nullptr) const; /// Returns absolutely all parts (and snapshot of their states) @@ -759,7 +750,6 @@ public: */ PartitionCommandsResultInfo freezePartition( const ASTPtr & partition, - const StorageMetadataPtr & metadata_snapshot, const String & with_name, ContextPtr context, TableLockHolder & table_lock_holder); @@ -767,7 +757,6 @@ public: /// Freezes all parts. PartitionCommandsResultInfo freezeAll( const String & with_name, - const StorageMetadataPtr & metadata_snapshot, ContextPtr context, TableLockHolder & table_lock_holder); @@ -831,7 +820,7 @@ public: return secondary_index_sizes; } - /// For ATTACH/DETACH/DROP PARTITION. + /// For ATTACH/DETACH/DROP/FORGET PARTITION. String getPartitionIDFromQuery(const ASTPtr & ast, ContextPtr context, DataPartsLock * acquired_lock = nullptr) const; std::unordered_set getPartitionIDsFromQuery(const ASTs & asts, ContextPtr context) const; std::set getPartitionIdsAffectedByCommands(const MutationCommands & commands, ContextPtr query_context) const; @@ -993,15 +982,13 @@ public: void removeQueryId(const String & query_id) const; void removeQueryIdNoLock(const String & query_id) const TSA_REQUIRES(query_id_set_mutex); - /// Return the partition expression types as a Tuple type. Return DataTypeUInt8 if partition expression is empty. - DataTypePtr getPartitionValueType() const; + static const Names virtuals_useful_for_filter; /// Construct a sample block of virtual columns. - Block getSampleBlockWithVirtualColumns() const; + Block getHeaderWithVirtualsForFilter() const; /// Construct a block consisting only of possible virtual columns for part pruning. - /// If one_part is true, fill in at most one part. - Block getBlockWithVirtualPartColumns(const MergeTreeData::DataPartsVector & parts, bool one_part, bool ignore_empty = false) const; + Block getBlockWithVirtualsForFilter(const MergeTreeData::DataPartsVector & parts, bool ignore_empty = false) const; /// In merge tree we do inserts with several steps. One of them: /// X. write part to temporary directory with some temp name @@ -1088,13 +1075,12 @@ public: /// It is used to make a decision whether to enable parallel replicas (distributed processing) or not and how /// many to replicas to use UInt64 estimateNumberOfRowsToRead( - ContextPtr query_context, - const StorageSnapshotPtr & storage_snapshot, - const SelectQueryInfo & query_info, - const ActionDAGNodes & added_filter_nodes) const; + ContextPtr query_context, const StorageSnapshotPtr & storage_snapshot, const SelectQueryInfo & query_info) const; bool initializeDiskOnConfigChange(const std::set & /*new_added_disks*/) override; + static VirtualColumnsDescription createVirtuals(const StorageInMemoryMetadata & metadata); + protected: friend class IMergeTreeDataPart; friend class MergeTreeDataMergerMutator; @@ -1317,7 +1303,7 @@ protected: bool isPrimaryOrMinMaxKeyColumnPossiblyWrappedInFunctions(const ASTPtr & node, const StorageMetadataPtr & metadata_snapshot) const; /// Common part for |freezePartition()| and |freezeAll()|. - PartitionCommandsResultInfo freezePartitionsByMatcher(MatcherFn matcher, const StorageMetadataPtr & metadata_snapshot, const String & with_name, ContextPtr context); + PartitionCommandsResultInfo freezePartitionsByMatcher(MatcherFn matcher, const String & with_name, ContextPtr context); PartitionCommandsResultInfo unfreezePartitionsByMatcher(MatcherFn matcher, const String & backup_name, ContextPtr context); // Partition helpers @@ -1339,6 +1325,8 @@ protected: bool fetch_part, ContextPtr query_context); + virtual void forgetPartition(const ASTPtr & partition, ContextPtr context); + virtual void movePartitionToShard(const ASTPtr & partition, bool move_part, const String & to, ContextPtr query_context); void writePartLog( @@ -1351,16 +1339,104 @@ protected: const MergeListEntry * merge_entry, std::shared_ptr profile_counters); + class PartMutationBackoffPolicy + { + struct PartMutationInfo + { + size_t retry_count; + size_t latest_fail_time_us; + size_t max_postpone_time_ms; + size_t max_postpone_power; + + explicit PartMutationInfo(size_t max_postpone_time_ms_) + : retry_count(0ull) + , latest_fail_time_us(static_cast(Poco::Timestamp().epochMicroseconds())) + , max_postpone_time_ms(max_postpone_time_ms_) + , max_postpone_power((max_postpone_time_ms_) ? (static_cast(std::log2(max_postpone_time_ms_))) : (0ull)) + {} + + + size_t getNextMinExecutionTimeUsResolution() const + { + if (max_postpone_time_ms == 0) + return static_cast(Poco::Timestamp().epochMicroseconds()); + size_t current_backoff_interval_us = (1 << retry_count) * 1000ul; + return latest_fail_time_us + current_backoff_interval_us; + } + + void addPartFailure() + { + if (max_postpone_time_ms == 0) + return; + retry_count = std::min(max_postpone_power, retry_count + 1); + latest_fail_time_us = static_cast(Poco::Timestamp().epochMicroseconds()); + } + + bool partCanBeMutated() + { + if (max_postpone_time_ms == 0) + return true; + + auto current_time_us = static_cast(Poco::Timestamp().epochMicroseconds()); + return current_time_us >= getNextMinExecutionTimeUsResolution(); + } + }; + + using DataPartsWithRetryInfo = std::unordered_map; + DataPartsWithRetryInfo failed_mutation_parts; + mutable std::mutex parts_info_lock; + + public: + + void resetMutationFailures() + { + std::unique_lock _lock(parts_info_lock); + failed_mutation_parts.clear(); + } + + void removePartFromFailed(const String & part_name) + { + std::unique_lock _lock(parts_info_lock); + failed_mutation_parts.erase(part_name); + } + + void addPartMutationFailure (const String& part_name, size_t max_postpone_time_ms_) + { + std::unique_lock _lock(parts_info_lock); + auto part_info_it = failed_mutation_parts.find(part_name); + if (part_info_it == failed_mutation_parts.end()) + { + auto [it, success] = failed_mutation_parts.emplace(part_name, PartMutationInfo(max_postpone_time_ms_)); + std::swap(it, part_info_it); + } + auto& part_info = part_info_it->second; + part_info.addPartFailure(); + } + + bool partCanBeMutated(const String& part_name) + { + + std::unique_lock _lock(parts_info_lock); + auto iter = failed_mutation_parts.find(part_name); + if (iter == failed_mutation_parts.end()) + return true; + return iter->second.partCanBeMutated(); + } + }; + /// Controls postponing logic for failed mutations. + PartMutationBackoffPolicy mutation_backoff_policy; + /// If part is assigned to merge or mutation (possibly replicated) /// Should be overridden by children, because they can have different /// mechanisms for parts locking virtual bool partIsAssignedToBackgroundOperation(const DataPartPtr & part) const = 0; - /// Return most recent mutations commands for part which weren't applied - /// Used to receive AlterConversions for part and apply them on fly. This - /// method has different implementations for replicated and non replicated - /// MergeTree because they store mutations in different way. - virtual std::map getAlterMutationCommandsForPart(const DataPartPtr & part) const = 0; + /// Return pending mutations that weren't applied to `part` yet and should be applied on the fly + /// (i.e. when reading from the part). Mutations not supported by AlterConversions + /// (supportsMutationCommandType()) can be omitted. + /// + /// @return list of mutations, in *reverse* order (newest to oldest) + virtual MutationCommands getAlterMutationCommandsForPart(const DataPartPtr & part) const = 0; struct PartBackupEntries { diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp index 58fddde7b54..1bf1d4a3c29 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp @@ -85,7 +85,7 @@ UInt64 MergeTreeDataMergerMutator::getMaxSourcePartsSizeForMerge(size_t max_coun if (scheduled_tasks_count > max_count) { throw Exception(ErrorCodes::LOGICAL_ERROR, - "Logical error: invalid argument passed to getMaxSourcePartsSize: scheduled_tasks_count = {} > max_count = {}", + "Invalid argument passed to getMaxSourcePartsSize: scheduled_tasks_count = {} > max_count = {}", scheduled_tasks_count, max_count); } @@ -511,7 +511,7 @@ SelectPartsDecision MergeTreeDataMergerMutator::selectPartsToMergeFromRanges( /// Do not allow to "merge" part with itself for regular merges, unless it is a TTL-merge where it is ok to remove some values with expired ttl if (parts_to_merge.size() == 1) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: merge selector returned only one part to merge"); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Merge selector returned only one part to merge"); if (parts_to_merge.empty()) { diff --git a/src/Storages/MergeTree/MergeTreeDataPartBuilder.cpp b/src/Storages/MergeTree/MergeTreeDataPartBuilder.cpp index d55248df0af..146c15bbb69 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartBuilder.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartBuilder.cpp @@ -1,5 +1,4 @@ #include -#include #include #include #include @@ -64,8 +63,6 @@ std::shared_ptr MergeTreeDataPartBuilder::build() return std::make_shared(data, name, *part_info, part_storage, parent_part); case PartType::Compact: return std::make_shared(data, name, *part_info, part_storage, parent_part); - case PartType::InMemory: - return std::make_shared(data, name, *part_info, part_storage, parent_part); default: throw Exception(ErrorCodes::UNKNOWN_PART_TYPE, "Unknown type of part {}", part_storage->getRelativePath()); diff --git a/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp b/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp index aa1968794f9..d60f4cc7354 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include @@ -60,7 +61,7 @@ void MergeTreeDataPartChecksum::checkEqual(const MergeTreeDataPartChecksum & rhs void MergeTreeDataPartChecksum::checkSize(const IDataPartStorage & storage, const String & name) const { /// Skip inverted index files, these have a default MergeTreeDataPartChecksum with file_size == 0 - if (name.ends_with(".gin_dict") || name.ends_with(".gin_post") || name.ends_with(".gin_seg") || name.ends_with(".gin_sid")) + if (isGinFile(name)) return; if (!storage.exists(name)) diff --git a/src/Storages/MergeTree/MergeTreeDataPartChecksum.h b/src/Storages/MergeTree/MergeTreeDataPartChecksum.h index d4980a67a43..837b940e354 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartChecksum.h +++ b/src/Storages/MergeTree/MergeTreeDataPartChecksum.h @@ -54,8 +54,6 @@ struct MergeTreeDataPartChecksums bool has(const String & file_name) const { return files.find(file_name) != files.end(); } - bool remove(const String & file_name) { return files.erase(file_name); } - bool empty() const { return files.empty(); } /// Checks that the set of columns and their checksums are the same. If not, throws an exception. diff --git a/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp b/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp index 0ecd7abe183..9f201ab3b81 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp @@ -5,7 +5,6 @@ #include #include #include -#include namespace DB @@ -33,6 +32,7 @@ IMergeTreeDataPart::MergeTreeReaderPtr MergeTreeDataPartCompact::getReader( const NamesAndTypesList & columns_to_read, const StorageSnapshotPtr & storage_snapshot, const MarkRanges & mark_ranges, + const VirtualFields & virtual_fields, UncompressedCache * uncompressed_cache, MarkCache * mark_cache, const AlterConversionsPtr & alter_conversions, @@ -41,12 +41,21 @@ IMergeTreeDataPart::MergeTreeReaderPtr MergeTreeDataPartCompact::getReader( const ReadBufferFromFileBase::ProfileCallback & profile_callback) const { auto read_info = std::make_shared(shared_from_this(), alter_conversions); - auto * load_marks_threadpool = reader_settings.read_settings.load_marks_asynchronously ? &read_info->getContext()->getLoadMarksThreadpool() : nullptr; + auto * load_marks_threadpool + = reader_settings.read_settings.load_marks_asynchronously ? &read_info->getContext()->getLoadMarksThreadpool() : nullptr; return std::make_unique( - read_info, columns_to_read, storage_snapshot, uncompressed_cache, - mark_cache, mark_ranges, reader_settings, load_marks_threadpool, - avg_value_size_hints, profile_callback); + read_info, + columns_to_read, + virtual_fields, + storage_snapshot, + uncompressed_cache, + mark_cache, + mark_ranges, + reader_settings, + load_marks_threadpool, + avg_value_size_hints, + profile_callback); } IMergeTreeDataPart::MergeTreeWriterPtr MergeTreeDataPartCompact::getWriter( @@ -66,12 +75,6 @@ IMergeTreeDataPart::MergeTreeWriterPtr MergeTreeDataPartCompact::getWriter( ordered_columns_list.sort([this](const auto & lhs, const auto & rhs) { return *getColumnPosition(lhs.name) < *getColumnPosition(rhs.name); }); - /// _block_number column is not added by user, but is persisted in a part after merge - /// If _block_number is not present in the parts to be merged, then it won't have a position - /// So check if its not present and add it at the end - if (columns_list.contains(BlockNumberColumn::name) && !ordered_columns_list.contains(BlockNumberColumn::name)) - ordered_columns_list.emplace_back(NameAndTypePair{BlockNumberColumn::name, BlockNumberColumn::type}); - return std::make_unique( shared_from_this(), ordered_columns_list, metadata_snapshot, indices_to_recalc, stats_to_recalc_, getMarksFileExtension(), diff --git a/src/Storages/MergeTree/MergeTreeDataPartCompact.h b/src/Storages/MergeTree/MergeTreeDataPartCompact.h index 35a358b3720..a97d15a08f3 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartCompact.h +++ b/src/Storages/MergeTree/MergeTreeDataPartCompact.h @@ -32,6 +32,7 @@ public: const NamesAndTypesList & columns, const StorageSnapshotPtr & storage_snapshot, const MarkRanges & mark_ranges, + const VirtualFields & virtual_fields, UncompressedCache * uncompressed_cache, MarkCache * mark_cache, const AlterConversionsPtr & alter_conversions, diff --git a/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp b/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp deleted file mode 100644 index 2f01dbfe04b..00000000000 --- a/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp +++ /dev/null @@ -1,108 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int NOT_IMPLEMENTED; -} - -MergeTreeDataPartInMemory::MergeTreeDataPartInMemory( - const MergeTreeData & storage_, - const String & name_, - const MergeTreePartInfo & info_, - const MutableDataPartStoragePtr & data_part_storage_, - const IMergeTreeDataPart * parent_part_) - : IMergeTreeDataPart(storage_, name_, info_, data_part_storage_, Type::InMemory, parent_part_) -{ - default_codec = CompressionCodecFactory::instance().get("NONE", {}); -} - -IMergeTreeDataPart::MergeTreeReaderPtr MergeTreeDataPartInMemory::getReader( - const NamesAndTypesList & columns_to_read, - const StorageSnapshotPtr & storage_snapshot, - const MarkRanges & mark_ranges, - UncompressedCache * /* uncompressed_cache */, - MarkCache * /* mark_cache */, - const AlterConversionsPtr & alter_conversions, - const MergeTreeReaderSettings & reader_settings, - const ValueSizeMap & /* avg_value_size_hints */, - const ReadBufferFromFileBase::ProfileCallback & /* profile_callback */) const -{ - auto read_info = std::make_shared(shared_from_this(), alter_conversions); - auto ptr = std::static_pointer_cast(shared_from_this()); - - return std::make_unique( - read_info, ptr, columns_to_read, storage_snapshot, mark_ranges, reader_settings); -} - -IMergeTreeDataPart::MergeTreeWriterPtr MergeTreeDataPartInMemory::getWriter( - const NamesAndTypesList &, - const StorageMetadataPtr &, - const std::vector &, - const Statistics &, - const CompressionCodecPtr &, - const MergeTreeWriterSettings &, - const MergeTreeIndexGranularity &) -{ - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "In-memory data parts are obsolete and no longer supported for writing"); -} - -MutableDataPartStoragePtr MergeTreeDataPartInMemory::flushToDisk(const String &, const StorageMetadataPtr &) const -{ - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "In-memory data parts are obsolete and no longer supported for writing"); -} - -DataPartStoragePtr MergeTreeDataPartInMemory::makeCloneInDetached(const String & prefix, - const StorageMetadataPtr & metadata_snapshot, - const DiskTransactionPtr & disk_transaction) const -{ - if (disk_transaction) - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "InMemory parts are not compatible with disk transactions"); - String detached_path = *getRelativePathForDetachedPart(prefix, /* broken */ false); - return flushToDisk(detached_path, metadata_snapshot); -} - -void MergeTreeDataPartInMemory::renameTo(const String & new_relative_path, bool /* remove_new_dir_if_exists */) -{ - getDataPartStorage().setRelativePath(new_relative_path); -} - -void MergeTreeDataPartInMemory::calculateEachColumnSizes(ColumnSizeByName & each_columns_size, ColumnSize & total_size) const -{ - auto it = checksums.files.find("data.bin"); - if (it != checksums.files.end()) - total_size.data_uncompressed += it->second.uncompressed_size; - - for (const auto & column : columns) - each_columns_size[column.name].data_uncompressed += block.getByName(column.name).column->byteSize(); -} - -IMergeTreeDataPart::Checksum MergeTreeDataPartInMemory::calculateBlockChecksum() const -{ - SipHash hash; - IMergeTreeDataPart::Checksum checksum; - for (const auto & column : block) - column.column->updateHashFast(hash); - - checksum.uncompressed_size = block.bytes(); - checksum.uncompressed_hash = getSipHash128AsPair(hash); - return checksum; -} - -DataPartInMemoryPtr asInMemoryPart(const MergeTreeDataPartPtr & part) -{ - return std::dynamic_pointer_cast(part); -} -} diff --git a/src/Storages/MergeTree/MergeTreeDataPartInMemory.h b/src/Storages/MergeTree/MergeTreeDataPartInMemory.h deleted file mode 100644 index 27f8ba4bccb..00000000000 --- a/src/Storages/MergeTree/MergeTreeDataPartInMemory.h +++ /dev/null @@ -1,69 +0,0 @@ -#pragma once - -#include - -namespace DB -{ - -class UncompressedCache; - -class MergeTreeDataPartInMemory : public IMergeTreeDataPart -{ -public: - MergeTreeDataPartInMemory( - const MergeTreeData & storage_, - const String & name_, - const MergeTreePartInfo & info_, - const MutableDataPartStoragePtr & data_part_storage_, - const IMergeTreeDataPart * parent_part_ = nullptr); - - MergeTreeReaderPtr getReader( - const NamesAndTypesList & columns, - const StorageSnapshotPtr & storage_snapshot, - const MarkRanges & mark_ranges, - UncompressedCache * uncompressed_cache, - MarkCache * mark_cache, - const AlterConversionsPtr & alter_conversions, - const MergeTreeReaderSettings & reader_settings_, - const ValueSizeMap & avg_value_size_hints, - const ReadBufferFromFileBase::ProfileCallback & profile_callback) const override; - - MergeTreeWriterPtr getWriter( - const NamesAndTypesList & columns_list, - const StorageMetadataPtr & metadata_snapshot, - const std::vector & indices_to_recalc, - const Statistics & stats_to_recalc_, - const CompressionCodecPtr & default_codec_, - const MergeTreeWriterSettings & writer_settings, - const MergeTreeIndexGranularity & computed_index_granularity) override; - - bool isStoredOnDisk() const override { return false; } - bool isStoredOnRemoteDisk() const override { return false; } - bool isStoredOnRemoteDiskWithZeroCopySupport() const override { return false; } - bool hasColumnFiles(const NameAndTypePair & column) const override { return !!getColumnPosition(column.getNameInStorage()); } - std::optional getFileNameForColumn(const NameAndTypePair & /* column */) const override { return ""; } - void renameTo(const String & new_relative_path, bool remove_new_dir_if_exists) override; - DataPartStoragePtr makeCloneInDetached(const String & prefix, const StorageMetadataPtr & metadata_snapshot, - const DiskTransactionPtr & disk_transaction) const override; - std::optional getColumnModificationTime(const String & /* column_name */) const override { return {}; } - - MutableDataPartStoragePtr flushToDisk(const String & new_relative_path, const StorageMetadataPtr & metadata_snapshot) const; - - /// Returns hash of parts's block - Checksum calculateBlockChecksum() const; - - mutable Block block; - -private: - mutable std::condition_variable is_merged; - - /// Calculates uncompressed sizes in memory. - void calculateEachColumnSizes(ColumnSizeByName & each_columns_size, ColumnSize & total_size) const override; -}; - -using DataPartInMemoryPtr = std::shared_ptr; -using MutableDataPartInMemoryPtr = std::shared_ptr; - -DataPartInMemoryPtr asInMemoryPart(const MergeTreeDataPartPtr & part); - -} diff --git a/src/Storages/MergeTree/MergeTreeDataPartType.h b/src/Storages/MergeTree/MergeTreeDataPartType.h index 8b06da5167e..64f3abee391 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartType.h +++ b/src/Storages/MergeTree/MergeTreeDataPartType.h @@ -26,10 +26,8 @@ static E parseEnum(const String & str) /// It's a bug in clang with three-way comparison operator /// https://github.com/llvm/llvm-project/issues/55919 -#ifdef __clang__ - #pragma clang diagnostic push - #pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant" -#endif +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant" /// Types of data part format. class MergeTreeDataPartType @@ -44,9 +42,6 @@ public: /// Data of all columns is stored in one file. Marks are also stored in single file. Compact, - /// Format with buffering data in RAM. Obsolete - new parts cannot be created in this format. - InMemory, - Unknown, }; @@ -86,9 +81,7 @@ private: Value value; }; -#ifdef __clang__ - #pragma clang diagnostic pop -#endif +#pragma clang diagnostic pop struct MergeTreeDataPartFormat { diff --git a/src/Storages/MergeTree/MergeTreeDataPartWide.cpp b/src/Storages/MergeTree/MergeTreeDataPartWide.cpp index dc6c1f0019d..018b8a35534 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWide.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWide.cpp @@ -31,6 +31,7 @@ IMergeTreeDataPart::MergeTreeReaderPtr MergeTreeDataPartWide::getReader( const NamesAndTypesList & columns_to_read, const StorageSnapshotPtr & storage_snapshot, const MarkRanges & mark_ranges, + const VirtualFields & virtual_fields, UncompressedCache * uncompressed_cache, MarkCache * mark_cache, const AlterConversionsPtr & alter_conversions, @@ -40,10 +41,16 @@ IMergeTreeDataPart::MergeTreeReaderPtr MergeTreeDataPartWide::getReader( { auto read_info = std::make_shared(shared_from_this(), alter_conversions); return std::make_unique( - read_info, columns_to_read, - storage_snapshot, uncompressed_cache, - mark_cache, mark_ranges, reader_settings, - avg_value_size_hints, profile_callback); + read_info, + columns_to_read, + virtual_fields, + storage_snapshot, + uncompressed_cache, + mark_cache, + mark_ranges, + reader_settings, + avg_value_size_hints, + profile_callback); } IMergeTreeDataPart::MergeTreeWriterPtr MergeTreeDataPartWide::getWriter( diff --git a/src/Storages/MergeTree/MergeTreeDataPartWide.h b/src/Storages/MergeTree/MergeTreeDataPartWide.h index 14147c4ad56..a8710dad679 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWide.h +++ b/src/Storages/MergeTree/MergeTreeDataPartWide.h @@ -27,6 +27,7 @@ public: const NamesAndTypesList & columns, const StorageSnapshotPtr & storage_snapshot, const MarkRanges & mark_ranges, + const VirtualFields & virtual_fields, UncompressedCache * uncompressed_cache, MarkCache * mark_cache, const AlterConversionsPtr & alter_conversions, diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp index b05b4584259..1721fd15b8d 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp @@ -1,12 +1,9 @@ #include #include -#include namespace DB { - CompressionCodecPtr getCompressionCodecDelta(UInt8 delta_bytes_size); - namespace ErrorCodes { extern const int LOGICAL_ERROR; @@ -55,14 +52,10 @@ MergeTreeDataPartWriterCompact::MergeTreeDataPartWriterCompact( marks_source_hashing = std::make_unique(*marks_compressor); } - const auto & storage_columns = metadata_snapshot->getColumns(); + auto storage_snapshot = std::make_shared(data_part->storage, metadata_snapshot); for (const auto & column : columns_list) { - ASTPtr compression; - if (column.name == BlockNumberColumn::name) - compression = BlockNumberColumn::compression_codec->getFullCodecDesc(); - else - compression = storage_columns.getCodecDescOrDefault(column.name, default_codec); + auto compression = storage_snapshot->getCodecDescOrDefault(column.name, default_codec); addStreams(column, compression); } } diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp index 9d373504473..d79590ded21 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp @@ -6,12 +6,10 @@ #include #include #include -#include #include namespace DB { - CompressionCodecPtr getCompressionCodecDelta(UInt8 delta_bytes_size); namespace ErrorCodes { @@ -91,15 +89,11 @@ MergeTreeDataPartWriterWide::MergeTreeDataPartWriterWide( indices_to_recalc_, stats_to_recalc_, marks_file_extension_, default_codec_, settings_, index_granularity_) { - const auto & columns = metadata_snapshot->getColumns(); - for (const auto & it : columns_list) + auto storage_snapshot = std::make_shared(data_part->storage, metadata_snapshot); + for (const auto & column : columns_list) { - ASTPtr compression; - if (it.name == BlockNumberColumn::name) - compression = BlockNumberColumn::compression_codec->getFullCodecDesc(); - else - compression = columns.getCodecDescOrDefault(it.name, default_codec); - addStreams(it, compression); + auto compression = storage_snapshot->getCodecDescOrDefault(column.name, default_codec); + addStreams(column, compression); } } diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.h b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.h index ae40eb03649..f5ff323563d 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.h +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.h @@ -1,5 +1,8 @@ #pragma once + #include +#include + namespace DB { diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index a76d370d057..ef679b61a79 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -46,7 +46,6 @@ #include #include -#include #include namespace CurrentMetrics @@ -69,7 +68,6 @@ namespace ErrorCodes extern const int CANNOT_PARSE_TEXT; extern const int TOO_MANY_PARTITIONS; extern const int DUPLICATED_PART_UUIDS; - extern const int NO_SUCH_COLUMN_IN_TABLE; } @@ -166,7 +164,6 @@ MergeTreeDataSelectSamplingData MergeTreeDataSelectExecutor::getSampling( const MergeTreeData & data, const StorageMetadataPtr & metadata_snapshot, ContextPtr context, - bool sample_factor_column_queried, LoggerPtr log) { const Settings & settings = context->getSettingsRef(); @@ -296,7 +293,7 @@ MergeTreeDataSelectSamplingData MergeTreeDataSelectExecutor::getSampling( if (sampling.use_sampling) { - if (sample_factor_column_queried && relative_sample_size != RelativeSize(0)) + if (relative_sample_size != RelativeSize(0)) sampling.used_sample_factor = 1.0 / boost::rational_cast(relative_sample_size); RelativeSize size_of_universum = 0; @@ -483,12 +480,13 @@ std::optional> MergeTreeDataSelectExecutor::filterPar { if (!filter_dag) return {}; - auto sample = data.getSampleBlockWithVirtualColumns(); + + auto sample = data.getHeaderWithVirtualsForFilter(); auto dag = VirtualColumnUtils::splitFilterDagForAllowedInputs(filter_dag->getOutputs().at(0), &sample); if (!dag) return {}; - auto virtual_columns_block = data.getBlockWithVirtualPartColumns(parts, false /* one_part */); + auto virtual_columns_block = data.getBlockWithVirtualsForFilter(parts); VirtualColumnUtils::filterBlockWithDAG(dag, virtual_columns_block, context); return VirtualColumnUtils::extractSingleValueFromBlock(virtual_columns_block, "_part"); } @@ -868,76 +866,11 @@ std::shared_ptr MergeTreeDataSelectExecutor::checkLimits( return nullptr; } -static void selectColumnNames( - const Names & column_names_to_return, - const MergeTreeData & data, - Names & real_column_names, - Names & virt_column_names, - bool & sample_factor_column_queried) -{ - sample_factor_column_queried = false; - - for (const String & name : column_names_to_return) - { - if (name == "_part") - { - virt_column_names.push_back(name); - } - else if (name == "_part_index") - { - virt_column_names.push_back(name); - } - else if (name == "_partition_id") - { - virt_column_names.push_back(name); - } - else if (name == "_part_offset") - { - virt_column_names.push_back(name); - } - else if (name == LightweightDeleteDescription::FILTER_COLUMN.name) - { - virt_column_names.push_back(name); - } - else if (name == BlockNumberColumn::name) - { - virt_column_names.push_back(name); - } - else if (name == "_part_uuid") - { - virt_column_names.push_back(name); - } - else if (name == "_partition_value") - { - if (!typeid_cast(data.getPartitionValueType().get())) - { - throw Exception( - ErrorCodes::NO_SUCH_COLUMN_IN_TABLE, - "Missing column `_partition_value` because there is no partition column in table {}", - data.getStorageID().getTableName()); - } - - virt_column_names.push_back(name); - } - else if (name == "_sample_factor") - { - sample_factor_column_queried = true; - virt_column_names.push_back(name); - } - else - { - real_column_names.push_back(name); - } - } -} - ReadFromMergeTree::AnalysisResultPtr MergeTreeDataSelectExecutor::estimateNumMarksToRead( MergeTreeData::DataPartsVector parts, - const PrewhereInfoPtr & prewhere_info, const Names & column_names_to_return, const StorageMetadataPtr & metadata_snapshot, const SelectQueryInfo & query_info, - const ActionDAGNodes & added_filter_nodes, ContextPtr context, size_t num_streams, std::shared_ptr max_block_numbers_to_read) const @@ -946,30 +879,19 @@ ReadFromMergeTree::AnalysisResultPtr MergeTreeDataSelectExecutor::estimateNumMar if (total_parts == 0) return std::make_shared(); - Names real_column_names; - Names virt_column_names; - /// If query contains restrictions on the virtual column `_part` or `_part_index`, select only parts suitable for it. - /// The virtual column `_sample_factor` (which is equal to 1 / used sample rate) can be requested in the query. - bool sample_factor_column_queried = false; - - selectColumnNames(column_names_to_return, data, real_column_names, virt_column_names, sample_factor_column_queried); - std::optional indexes; /// NOTE: We don't need alter_conversions because the returned analysis_result is only used for: /// 1. estimate the number of rows to read; 2. projection reading, which doesn't have alter_conversions. return ReadFromMergeTree::selectRangesToRead( std::move(parts), /*alter_conversions=*/ {}, - prewhere_info, - added_filter_nodes, metadata_snapshot, query_info, context, num_streams, max_block_numbers_to_read, data, - real_column_names, - sample_factor_column_queried, + column_names_to_return, log, indexes); } @@ -996,30 +918,16 @@ QueryPlanStepPtr MergeTreeDataSelectExecutor::readFromParts( else if (parts.empty()) return {}; - Names real_column_names; - Names virt_column_names; - /// If query contains restrictions on the virtual column `_part` or `_part_index`, select only parts suitable for it. - /// The virtual column `_sample_factor` (which is equal to 1 / used sample rate) can be requested in the query. - bool sample_factor_column_queried = false; - - selectColumnNames(column_names_to_return, data, real_column_names, virt_column_names, sample_factor_column_queried); - - /// Do not keep data parts in snapshot. - /// They are stored separately, and some could be released after PK analysis. - auto storage_snapshot_copy = storage_snapshot->clone(std::make_unique()); - return std::make_unique( std::move(parts), std::move(alter_conversions), - real_column_names, - virt_column_names, + column_names_to_return, data, query_info, - storage_snapshot_copy, + storage_snapshot, context, max_block_size, num_streams, - sample_factor_column_queried, max_block_numbers_to_read, log, merge_tree_select_result_ptr, @@ -1087,7 +995,7 @@ MarkRanges MergeTreeDataSelectExecutor::markRangesFromPKRange( MarkRanges res; size_t marks_count = part->index_granularity.getMarksCount(); - const auto & index = part->index; + const auto & index = part->getIndex(); if (marks_count == 0) return res; @@ -1110,7 +1018,11 @@ MarkRanges MergeTreeDataSelectExecutor::markRangesFromPKRange( DataTypes key_types; for (size_t i : key_indices) { - index_columns->emplace_back(ColumnWithTypeAndName{index[i], primary_key.data_types[i], primary_key.column_names[i]}); + if (i < index.size()) + index_columns->emplace_back(index[i], primary_key.data_types[i], primary_key.column_names[i]); + else + index_columns->emplace_back(); /// The column of the primary key was not loaded in memory - we'll skip it. + key_types.emplace_back(primary_key.data_types[i]); } @@ -1119,7 +1031,6 @@ MarkRanges MergeTreeDataSelectExecutor::markRangesFromPKRange( std::function create_field_ref; if (key_condition.hasMonotonicFunctionsChain()) { - create_field_ref = [index_columns](size_t row, size_t column, FieldRef & field) { field = {index_columns.get(), row, column}; @@ -1159,7 +1070,11 @@ MarkRanges MergeTreeDataSelectExecutor::markRangesFromPKRange( { for (size_t i = 0; i < used_key_size; ++i) { - create_field_ref(range.begin, i, index_left[i]); + if ((*index_columns)[i].column) + create_field_ref(range.begin, i, index_left[i]); + else + index_left[i] = NEGATIVE_INFINITY; + index_right[i] = POSITIVE_INFINITY; } } @@ -1170,8 +1085,17 @@ MarkRanges MergeTreeDataSelectExecutor::markRangesFromPKRange( for (size_t i = 0; i < used_key_size; ++i) { - create_field_ref(range.begin, i, index_left[i]); - create_field_ref(range.end, i, index_right[i]); + if ((*index_columns)[i].column) + { + create_field_ref(range.begin, i, index_left[i]); + create_field_ref(range.end, i, index_right[i]); + } + else + { + /// If the PK column was not loaded in memory - exclude it from the analysis. + index_left[i] = NEGATIVE_INFINITY; + index_right[i] = POSITIVE_INFINITY; + } } } key_condition_maybe_true = key_condition.mayBeTrueInRange(used_key_size, index_left.data(), index_right.data(), key_types); @@ -1206,6 +1130,7 @@ MarkRanges MergeTreeDataSelectExecutor::markRangesFromPKRange( bool part_offset_condition_exact_range = !part_offset_condition || part_offset_condition->alwaysUnknownOrTrue() || part_offset_condition->matchesExactContinuousRange(); const String & part_name = part->isProjectionPart() ? fmt::format("{}.{}", part->name, part->getParentPart()->name) : part->name; + if (!key_condition_exact_range || !part_offset_condition_exact_range) { // Do exclusion search, where we drop ranges that do not match @@ -1220,10 +1145,10 @@ MarkRanges MergeTreeDataSelectExecutor::markRangesFromPKRange( part->index_granularity_info.index_granularity_bytes); /** There will always be disjoint suspicious segments on the stack, the leftmost one at the top (back). - * At each step, take the left segment and check if it fits. - * If fits, split it into smaller ones and put them on the stack. If not, discard it. - * If the segment is already of one mark length, add it to response and discard it. - */ + * At each step, take the left segment and check if it fits. + * If fits, split it into smaller ones and put them on the stack. If not, discard it. + * If the segment is already of one mark length, add it to response and discard it. + */ std::vector ranges_stack = { {0, marks_count} }; size_t steps = 0; @@ -1233,7 +1158,7 @@ MarkRanges MergeTreeDataSelectExecutor::markRangesFromPKRange( MarkRange range = ranges_stack.back(); ranges_stack.pop_back(); - steps++; + ++steps; if (!may_be_true_in_range(range)) continue; diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h index 17975354187..b1afd7e6668 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h @@ -56,11 +56,9 @@ public: /// This method is used to select best projection for table. ReadFromMergeTree::AnalysisResultPtr estimateNumMarksToRead( MergeTreeData::DataPartsVector parts, - const PrewhereInfoPtr & prewhere_info, const Names & column_names, const StorageMetadataPtr & metadata_snapshot, const SelectQueryInfo & query_info, - const ActionDAGNodes & added_filter_nodes, ContextPtr context, size_t num_streams, std::shared_ptr max_block_numbers_to_read = nullptr) const; @@ -215,7 +213,6 @@ public: const MergeTreeData & data, const StorageMetadataPtr & metadata_snapshot, ContextPtr context, - bool sample_factor_column_queried, LoggerPtr log); /// Check query limits: max_partitions_to_read, max_concurrent_queries. diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/src/Storages/MergeTree/MergeTreeDataWriter.cpp index c9c16b59f9e..fdac16ae19a 100644 --- a/src/Storages/MergeTree/MergeTreeDataWriter.cpp +++ b/src/Storages/MergeTree/MergeTreeDataWriter.cpp @@ -384,13 +384,13 @@ Block MergeTreeDataWriter::mergeBlock( /// Check that after first merge merging_algorithm is waiting for data from input 0. if (status.required_source != 0) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: required source after the first merge is not 0. Chunk rows: {}, is_finished: {}, required_source: {}, algorithm: {}", status.chunk.getNumRows(), status.is_finished, status.required_source, merging_algorithm->getName()); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Required source after the first merge is not 0. Chunk rows: {}, is_finished: {}, required_source: {}, algorithm: {}", status.chunk.getNumRows(), status.is_finished, status.required_source, merging_algorithm->getName()); status = merging_algorithm->merge(); /// Check that merge is finished. if (!status.is_finished) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: merge is not finished after the second merge."); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Merge is not finished after the second merge."); /// Merged Block is sorted and we don't need to use permutation anymore permutation = nullptr; @@ -439,7 +439,7 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeTempPartImpl( auto max_month = date_lut.toNumYYYYMM(max_date); if (min_month != max_month) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: part spans more than one month."); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Part spans more than one month."); part_name = new_part_info.getPartNameV0(min_date, max_date); } @@ -648,18 +648,11 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeProjectionPartImpl( const auto & metadata_snapshot = projection.metadata; MergeTreeDataPartType part_type; - if (parent_part->getType() == MergeTreeDataPartType::InMemory) - { - part_type = MergeTreeDataPartType::InMemory; - } - else - { - /// Size of part would not be greater than block.bytes() + epsilon - size_t expected_size = block.bytes(); - // just check if there is enough space on parent volume - data.reserveSpace(expected_size, parent_part->getDataPartStorage()); - part_type = data.choosePartFormatOnDisk(expected_size, block.rows()).part_type; - } + /// Size of part would not be greater than block.bytes() + epsilon + size_t expected_size = block.bytes(); + // just check if there is enough space on parent volume + data.reserveSpace(expected_size, parent_part->getDataPartStorage()); + part_type = data.choosePartFormatOnDisk(expected_size, block.rows()).part_type; auto new_data_part = parent_part->getProjectionPartBuilder(part_name, is_temp).withPartType(part_type).build(); auto projection_part_storage = new_data_part->getDataPartStoragePtr(); diff --git a/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp b/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp index da49814b83a..f506230b5ea 100644 --- a/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp @@ -59,7 +59,7 @@ bool maybeTrueOnBloomFilter(const IColumn * hash_column, const BloomFilterPtr & const auto * non_const_column = typeid_cast(hash_column); if (!const_column && !non_const_column) - throw Exception(ErrorCodes::LOGICAL_ERROR, "LOGICAL ERROR: hash column must be Const Column or UInt64 Column."); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Hash column must be Const or UInt64."); if (const_column) { diff --git a/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.h b/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.h index db85c804d8d..8029d6d405b 100644 --- a/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.h +++ b/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.h @@ -53,7 +53,7 @@ public: if (const auto & bf_granule = typeid_cast(granule.get())) return mayBeTrueOnGranule(bf_granule); - throw Exception(ErrorCodes::LOGICAL_ERROR, "LOGICAL ERROR: require bloom filter index granule."); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Requires bloom filter index granule."); } private: diff --git a/src/Storages/MergeTree/MergeTreeIndexGranularityInfo.cpp b/src/Storages/MergeTree/MergeTreeIndexGranularityInfo.cpp index 4e339964de3..031a0c08c26 100644 --- a/src/Storages/MergeTree/MergeTreeIndexGranularityInfo.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexGranularityInfo.cpp @@ -54,9 +54,9 @@ MarkType::MarkType(bool adaptive_, bool compressed_, MergeTreeDataPartType::Valu : adaptive(adaptive_), compressed(compressed_), part_type(part_type_) { if (!adaptive && part_type != MergeTreeDataPartType::Wide) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: non-Wide data part type with non-adaptive granularity"); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Non-Wide data part type with non-adaptive granularity"); if (part_type == MergeTreeDataPartType::Unknown) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: unknown data part type"); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown data part type"); } bool MarkType::isMarkFileExtension(std::string_view extension) @@ -71,7 +71,7 @@ std::string MarkType::getFileExtension() const if (!adaptive) { if (part_type != MergeTreeDataPartType::Wide) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: non-Wide data part type with non-adaptive granularity"); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Non-Wide data part type with non-adaptive granularity"); return res; } @@ -81,10 +81,8 @@ std::string MarkType::getFileExtension() const return res + "2"; case MergeTreeDataPartType::Compact: return res + "3"; - case MergeTreeDataPartType::InMemory: - return ""; case MergeTreeDataPartType::Unknown: - throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: unknown data part type"); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown data part type"); } } @@ -126,8 +124,6 @@ size_t MergeTreeIndexGranularityInfo::getMarkSizeInBytes(size_t columns_num) con return mark_type.adaptive ? getAdaptiveMrkSizeWide() : getNonAdaptiveMrkSizeWide(); else if (mark_type.part_type == MergeTreeDataPartType::Compact) return getAdaptiveMrkSizeCompact(columns_num); - else if (mark_type.part_type == MergeTreeDataPartType::InMemory) - return 0; else throw Exception(ErrorCodes::UNKNOWN_PART_TYPE, "Unknown part type"); } diff --git a/src/Storages/MergeTree/MergeTreeIndexGranularityInfo.h b/src/Storages/MergeTree/MergeTreeIndexGranularityInfo.h index 4cb35ee64b1..af008866919 100644 --- a/src/Storages/MergeTree/MergeTreeIndexGranularityInfo.h +++ b/src/Storages/MergeTree/MergeTreeIndexGranularityInfo.h @@ -18,7 +18,7 @@ class MergeTreeData; */ struct MarkType { - MarkType(std::string_view extension); + explicit MarkType(std::string_view extension); MarkType(bool adaptive_, bool compressed_, MergeTreeDataPartType::Value part_type_); static bool isMarkFileExtension(std::string_view extension); diff --git a/src/Storages/MergeTree/MergeTreeIndexHypothesis.h b/src/Storages/MergeTree/MergeTreeIndexHypothesis.h index 2296e1b717d..130e708d76f 100644 --- a/src/Storages/MergeTree/MergeTreeIndexHypothesis.h +++ b/src/Storages/MergeTree/MergeTreeIndexHypothesis.h @@ -2,7 +2,6 @@ #include #include -#include namespace DB { diff --git a/src/Storages/MergeTree/MergeTreeIndexMinMax.h b/src/Storages/MergeTree/MergeTreeIndexMinMax.h index 1e2abe6983f..dca26fb7b28 100644 --- a/src/Storages/MergeTree/MergeTreeIndexMinMax.h +++ b/src/Storages/MergeTree/MergeTreeIndexMinMax.h @@ -4,8 +4,6 @@ #include #include -#include - namespace DB { diff --git a/src/Storages/MergeTree/MergeTreeIndexSet.h b/src/Storages/MergeTree/MergeTreeIndexSet.h index ea9f7ddef3d..7c66ba1a867 100644 --- a/src/Storages/MergeTree/MergeTreeIndexSet.h +++ b/src/Storages/MergeTree/MergeTreeIndexSet.h @@ -5,9 +5,6 @@ #include -#include -#include - namespace DB { diff --git a/src/Storages/MergeTree/MergeTreeIndices.h b/src/Storages/MergeTree/MergeTreeIndices.h index 4749470bedd..8fdadb4e5eb 100644 --- a/src/Storages/MergeTree/MergeTreeIndices.h +++ b/src/Storages/MergeTree/MergeTreeIndices.h @@ -1,12 +1,9 @@ #pragma once #include -#include #include #include #include -#include -#include #include #include #include diff --git a/src/Storages/MergeTree/MergeTreeMarksLoader.cpp b/src/Storages/MergeTree/MergeTreeMarksLoader.cpp index 7531c03a011..8250050412f 100644 --- a/src/Storages/MergeTree/MergeTreeMarksLoader.cpp +++ b/src/Storages/MergeTree/MergeTreeMarksLoader.cpp @@ -1,12 +1,11 @@ #include #include -#include +#include #include #include #include #include #include -#include #include #include diff --git a/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp b/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp index 47c2fe07bb4..c19b4ddd8a2 100644 --- a/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp +++ b/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include #include @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -108,22 +109,22 @@ MergeTreeReadTask::Readers MergeTreePrefetchedReadPool::PrefetchedReaders::get() MergeTreePrefetchedReadPool::MergeTreePrefetchedReadPool( RangesInDataParts && parts_, + VirtualFields shared_virtual_fields_, const StorageSnapshotPtr & storage_snapshot_, const PrewhereInfoPtr & prewhere_info_, const ExpressionActionsSettings & actions_settings_, const MergeTreeReaderSettings & reader_settings_, const Names & column_names_, - const Names & virtual_column_names_, const PoolSettings & settings_, const ContextPtr & context_) : MergeTreeReadPoolBase( std::move(parts_), + std::move(shared_virtual_fields_), storage_snapshot_, prewhere_info_, actions_settings_, reader_settings_, column_names_, - virtual_column_names_, settings_, context_) , WithContext(context_) @@ -375,7 +376,7 @@ void MergeTreePrefetchedReadPool::fillPerPartStatistics() update_stat_for_column(column.name); if (reader_settings.apply_deleted_mask && read_info.data_part->hasLightweightDelete()) - update_stat_for_column(LightweightDeleteDescription::FILTER_COLUMN.name); + update_stat_for_column(RowExistsColumn::name); for (const auto & pre_columns : read_info.task_columns.pre_columns) for (const auto & column : pre_columns) diff --git a/src/Storages/MergeTree/MergeTreePrefetchedReadPool.h b/src/Storages/MergeTree/MergeTreePrefetchedReadPool.h index 378034c5eae..0c8a6716d40 100644 --- a/src/Storages/MergeTree/MergeTreePrefetchedReadPool.h +++ b/src/Storages/MergeTree/MergeTreePrefetchedReadPool.h @@ -18,12 +18,12 @@ class MergeTreePrefetchedReadPool : public MergeTreeReadPoolBase, private WithCo public: MergeTreePrefetchedReadPool( RangesInDataParts && parts_, + VirtualFields shared_virtual_fields_, const StorageSnapshotPtr & storage_snapshot_, const PrewhereInfoPtr & prewhere_info_, const ExpressionActionsSettings & actions_settings_, const MergeTreeReaderSettings & reader_settings_, const Names & column_names_, - const Names & virtual_column_names_, const PoolSettings & settings_, const ContextPtr & context_); @@ -67,7 +67,7 @@ private: struct ThreadTask { - using InfoPtr = MergeTreeReadTask::InfoPtr; + using InfoPtr = MergeTreeReadTaskInfoPtr; ThreadTask(InfoPtr read_info_, MarkRanges ranges_, Priority priority_) : read_info(std::move(read_info_)), ranges(std::move(ranges_)), priority(priority_) diff --git a/src/Storages/MergeTree/MergeTreeRangeReader.cpp b/src/Storages/MergeTree/MergeTreeRangeReader.cpp index cce7e56dda9..6932762f58b 100644 --- a/src/Storages/MergeTree/MergeTreeRangeReader.cpp +++ b/src/Storages/MergeTree/MergeTreeRangeReader.cpp @@ -362,7 +362,7 @@ void MergeTreeRangeReader::ReadResult::shrink(Columns & old_columns, const NumRo } } -/// The main invariant of the data in the read result is that he number of rows is +/// The main invariant of the data in the read result is that the number of rows is /// either equal to total_rows_per_granule (if filter has not been applied) or to the number of /// 1s in the filter (if filter has been applied). void MergeTreeRangeReader::ReadResult::checkInternalConsistency() const @@ -448,21 +448,16 @@ static ColumnPtr andFilters(ColumnPtr c1, ColumnPtr c2) throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of filters don't match: {} and {}", c1->size(), c2->size()); - // TODO: use proper vectorized implementation of AND? auto res = ColumnUInt8::create(c1->size()); auto & res_data = res->getData(); const auto & c1_data = typeid_cast(*c1).getData(); const auto & c2_data = typeid_cast(*c2).getData(); const size_t size = c1->size(); - const size_t step = 16; - size_t i = 0; - /// NOTE: '&&' must be used instead of '&' for 'AND' operation because UInt8 columns might contain any non-zero - /// value for true and we cannot bitwise AND them to get the correct result. - for (; i + step < size; i += step) - for (size_t j = 0; j < step; ++j) - res_data[i+j] = (c1_data[i+j] && c2_data[i+j]); - for (; i < size; ++i) - res_data[i] = (c1_data[i] && c2_data[i]); + /// The double NOT operators (!!) convert the non-zeros to the bool value of true (0x01) and zeros to false (0x00). + /// After casting them to UInt8, '&' could replace '&&' for the 'AND' operation implementation and at the same + /// time enable the auto vectorization. + for (size_t i = 0; i < size; ++i) + res_data[i] = (static_cast(!!c1_data[i]) & static_cast(!!c2_data[i])); return res; } @@ -808,8 +803,7 @@ MergeTreeRangeReader::MergeTreeRangeReader( IMergeTreeReader * merge_tree_reader_, MergeTreeRangeReader * prev_reader_, const PrewhereExprStep * prewhere_info_, - bool last_reader_in_chain_, - const Names & non_const_virtual_column_names_) + bool last_reader_in_chain_) : merge_tree_reader(merge_tree_reader_) , index_granularity(&(merge_tree_reader->data_part_info_for_read->getIndexGranularity())) , prev_reader(prev_reader_) @@ -826,21 +820,6 @@ MergeTreeRangeReader::MergeTreeRangeReader( result_sample_block.insert({name_and_type.type->createColumn(), name_and_type.type, name_and_type.name}); } - for (const auto & column_name : non_const_virtual_column_names_) - { - if (result_sample_block.has(column_name)) - continue; - - non_const_virtual_column_names.push_back(column_name); - - if (column_name == "_part_offset" && !prev_reader) - { - /// _part_offset column is filled by the first reader. - read_sample_block.insert(ColumnWithTypeAndName(ColumnUInt64::create(), std::make_shared(), column_name)); - result_sample_block.insert(ColumnWithTypeAndName(ColumnUInt64::create(), std::make_shared(), column_name)); - } - } - if (prewhere_info) { const auto & step = *prewhere_info; @@ -1006,6 +985,8 @@ MergeTreeRangeReader::ReadResult MergeTreeRangeReader::read(size_t max_rows, Mar if (num_read_rows == 0) num_read_rows = read_result.num_rows; + merge_tree_reader->fillVirtualColumns(columns, num_read_rows); + /// fillMissingColumns() must be called after reading but befoe any filterings because /// some columns (e.g. arrays) might be only partially filled and thus not be valid and /// fillMissingColumns() fixes this. @@ -1055,23 +1036,23 @@ MergeTreeRangeReader::ReadResult MergeTreeRangeReader::read(size_t max_rows, Mar return read_result; { - /// Physical columns go first and then some virtual columns follow - size_t physical_columns_count = merge_tree_reader->getColumns().size(); - Columns physical_columns(read_result.columns.begin(), read_result.columns.begin() + physical_columns_count); + size_t columns_count = merge_tree_reader->getColumns().size(); + Columns columns(read_result.columns.begin(), read_result.columns.begin() + columns_count); + merge_tree_reader->fillVirtualColumns(columns, read_result.num_rows); bool should_evaluate_missing_defaults; - merge_tree_reader->fillMissingColumns(physical_columns, should_evaluate_missing_defaults, read_result.num_rows); + merge_tree_reader->fillMissingColumns(columns, should_evaluate_missing_defaults, read_result.num_rows); /// If some columns absent in part, then evaluate default values if (should_evaluate_missing_defaults) - merge_tree_reader->evaluateMissingDefaults({}, physical_columns); + merge_tree_reader->evaluateMissingDefaults({}, columns); /// If result not empty, then apply on-fly alter conversions if any required if (!prewhere_info || prewhere_info->perform_alter_conversions) - merge_tree_reader->performRequiredConversions(physical_columns); + merge_tree_reader->performRequiredConversions(columns); - for (size_t i = 0; i < physical_columns.size(); ++i) - read_result.columns[i] = std::move(physical_columns[i]); + for (size_t i = 0; i < columns.size(); ++i) + read_result.columns[i] = std::move(columns[i]); } size_t total_bytes = 0; @@ -1163,12 +1144,17 @@ MergeTreeRangeReader::ReadResult MergeTreeRangeReader::startReadingChain(size_t result.adjustLastGranule(); if (read_sample_block.has("_part_offset")) - fillPartOffsetColumn(result, leading_begin_part_offset, leading_end_part_offset); + { + size_t pos = read_sample_block.getPositionByName("_part_offset"); + chassert(pos < result.columns.size()); + chassert(result.columns[pos] == nullptr); + result.columns[pos] = createPartOffsetColumn(result, leading_begin_part_offset, leading_end_part_offset); + } return result; } -void MergeTreeRangeReader::fillPartOffsetColumn(ReadResult & result, UInt64 leading_begin_part_offset, UInt64 leading_end_part_offset) +ColumnPtr MergeTreeRangeReader::createPartOffsetColumn(ReadResult & result, UInt64 leading_begin_part_offset, UInt64 leading_end_part_offset) { size_t num_rows = result.numReadRows(); @@ -1194,7 +1180,7 @@ void MergeTreeRangeReader::fillPartOffsetColumn(ReadResult & result, UInt64 lead *pos++ = start_part_offset++; } - result.columns.emplace_back(std::move(column)); + return column; } Columns MergeTreeRangeReader::continueReadingChain(const ReadResult & result, size_t & num_rows) @@ -1208,7 +1194,7 @@ Columns MergeTreeRangeReader::continueReadingChain(const ReadResult & result, si if (result.rows_per_granule.empty()) { - /// If zero rows were read on prev step, than there is no more rows to read. + /// If zero rows were read on prev step, there is no more rows to read. /// Last granule may have less rows than index_granularity, so finish reading manually. stream.finish(); return columns; diff --git a/src/Storages/MergeTree/MergeTreeRangeReader.h b/src/Storages/MergeTree/MergeTreeRangeReader.h index 79ed18f4d1f..688a6b0922b 100644 --- a/src/Storages/MergeTree/MergeTreeRangeReader.h +++ b/src/Storages/MergeTree/MergeTreeRangeReader.h @@ -101,8 +101,7 @@ public: IMergeTreeReader * merge_tree_reader_, MergeTreeRangeReader * prev_reader_, const PrewhereExprStep * prewhere_info_, - bool last_reader_in_chain_, - const Names & non_const_virtual_column_names); + bool last_reader_in_chain_); MergeTreeRangeReader() = default; @@ -309,7 +308,7 @@ private: ReadResult startReadingChain(size_t max_rows, MarkRanges & ranges); Columns continueReadingChain(const ReadResult & result, size_t & num_rows); void executePrewhereActionsAndFilterColumns(ReadResult & result) const; - void fillPartOffsetColumn(ReadResult & result, UInt64 leading_begin_part_offset, UInt64 leading_end_part_offset); + ColumnPtr createPartOffsetColumn(ReadResult & result, UInt64 leading_begin_part_offset, UInt64 leading_end_part_offset); IMergeTreeReader * merge_tree_reader = nullptr; const MergeTreeIndexGranularity * index_granularity = nullptr; @@ -323,7 +322,6 @@ private: bool last_reader_in_chain = false; bool is_initialized = false; - Names non_const_virtual_column_names; LoggerPtr log = getLogger("MergeTreeRangeReader"); }; diff --git a/src/Storages/MergeTree/MergeTreeReadPool.cpp b/src/Storages/MergeTree/MergeTreeReadPool.cpp index 8ed7a9d8707..e525f7f5f65 100644 --- a/src/Storages/MergeTree/MergeTreeReadPool.cpp +++ b/src/Storages/MergeTree/MergeTreeReadPool.cpp @@ -16,14 +16,15 @@ namespace ProfileEvents extern const Event ReadBackoff; } -namespace ErrorCodes -{ - extern const int LOGICAL_ERROR; -} - namespace DB { +namespace ErrorCodes +{ +extern const int CANNOT_SCHEDULE_TASK; +extern const int LOGICAL_ERROR; +} + size_t getApproxSizeOfPart(const IMergeTreeDataPart & part, const Names & columns_to_read) { ColumnSize columns_size{}; @@ -34,22 +35,22 @@ size_t getApproxSizeOfPart(const IMergeTreeDataPart & part, const Names & column MergeTreeReadPool::MergeTreeReadPool( RangesInDataParts && parts_, + VirtualFields shared_virtual_fields_, const StorageSnapshotPtr & storage_snapshot_, const PrewhereInfoPtr & prewhere_info_, const ExpressionActionsSettings & actions_settings_, const MergeTreeReaderSettings & reader_settings_, const Names & column_names_, - const Names & virtual_column_names_, const PoolSettings & settings_, const ContextPtr & context_) : MergeTreeReadPoolBase( std::move(parts_), + std::move(shared_virtual_fields_), storage_snapshot_, prewhere_info_, actions_settings_, reader_settings_, column_names_, - virtual_column_names_, settings_, context_) , min_marks_for_concurrent_read(pool_settings.min_marks_for_concurrent_read) @@ -217,6 +218,9 @@ void MergeTreeReadPool::profileFeedback(ReadBufferFromFileBase::ProfileInfo info void MergeTreeReadPool::fillPerThreadInfo(size_t threads, size_t sum_marks) { + if (threads > 1000000ull) + throw Exception(ErrorCodes::CANNOT_SCHEDULE_TASK, "Too many threads ({}) requested", threads); + threads_tasks.resize(threads); if (parts_ranges.empty()) return; diff --git a/src/Storages/MergeTree/MergeTreeReadPool.h b/src/Storages/MergeTree/MergeTreeReadPool.h index e45ccad912f..cb0e8a9657f 100644 --- a/src/Storages/MergeTree/MergeTreeReadPool.h +++ b/src/Storages/MergeTree/MergeTreeReadPool.h @@ -26,12 +26,12 @@ public: MergeTreeReadPool( RangesInDataParts && parts_, + VirtualFields shared_virtual_fields_, const StorageSnapshotPtr & storage_snapshot_, const PrewhereInfoPtr & prewhere_info_, const ExpressionActionsSettings & actions_settings_, const MergeTreeReaderSettings & reader_settings_, const Names & column_names_, - const Names & virtual_column_names_, const PoolSettings & settings_, const ContextPtr & context_); diff --git a/src/Storages/MergeTree/MergeTreeReadPoolBase.cpp b/src/Storages/MergeTree/MergeTreeReadPoolBase.cpp index 446baccd961..0cbb0a86b2f 100644 --- a/src/Storages/MergeTree/MergeTreeReadPoolBase.cpp +++ b/src/Storages/MergeTree/MergeTreeReadPoolBase.cpp @@ -2,26 +2,27 @@ #include #include + namespace DB { MergeTreeReadPoolBase::MergeTreeReadPoolBase( RangesInDataParts && parts_, + VirtualFields shared_virtual_fields_, const StorageSnapshotPtr & storage_snapshot_, const PrewhereInfoPtr & prewhere_info_, const ExpressionActionsSettings & actions_settings_, const MergeTreeReaderSettings & reader_settings_, const Names & column_names_, - const Names & virtual_column_names_, const PoolSettings & pool_settings_, const ContextPtr & context_) : parts_ranges(std::move(parts_)) + , shared_virtual_fields(std::move(shared_virtual_fields_)) , storage_snapshot(storage_snapshot_) , prewhere_info(prewhere_info_) , actions_settings(actions_settings_) , reader_settings(reader_settings_) , column_names(column_names_) - , virtual_column_names(virtual_column_names_) , pool_settings(pool_settings_) , owned_mark_cache(context_->getGlobalContext()->getMarkCache()) , owned_uncompressed_cache(pool_settings_.use_uncompressed_cache ? context_->getGlobalContext()->getUncompressedCache() : nullptr) @@ -44,7 +45,7 @@ void MergeTreeReadPoolBase::fillPerPartInfos() assertSortedAndNonIntersecting(part_with_ranges.ranges); #endif - MergeTreeReadTask::Info read_task_info; + MergeTreeReadTaskInfo read_task_info; read_task_info.data_part = part_with_ranges.data_part; read_task_info.part_index_in_query = part_with_ranges.part_index_in_query; @@ -53,9 +54,16 @@ void MergeTreeReadPoolBase::fillPerPartInfos() LoadedMergeTreeDataPartInfoForReader part_info(part_with_ranges.data_part, part_with_ranges.alter_conversions); read_task_info.task_columns = getReadTaskColumns( - part_info, storage_snapshot, column_names, virtual_column_names, - prewhere_info, actions_settings, - reader_settings, /*with_subcolumns=*/ true); + part_info, + storage_snapshot, + column_names, + prewhere_info, + actions_settings, + reader_settings, + /*with_subcolumns=*/true); + + read_task_info.const_virtual_fields = shared_virtual_fields; + read_task_info.const_virtual_fields.emplace("_part_index", read_task_info.part_index_in_query); if (pool_settings.preferred_block_size_bytes > 0) { @@ -75,7 +83,7 @@ void MergeTreeReadPoolBase::fillPerPartInfos() } is_part_on_remote_disk.push_back(part_with_ranges.data_part->isStoredOnRemoteDisk()); - per_part_infos.push_back(std::make_shared(std::move(read_task_info))); + per_part_infos.push_back(std::make_shared(std::move(read_task_info))); } } @@ -97,7 +105,7 @@ std::vector MergeTreeReadPoolBase::getPerPartSumMarks() const } MergeTreeReadTaskPtr MergeTreeReadPoolBase::createTask( - MergeTreeReadTask::InfoPtr read_info, + MergeTreeReadTaskInfoPtr read_info, MarkRanges ranges, MergeTreeReadTask * previous_task) const { diff --git a/src/Storages/MergeTree/MergeTreeReadPoolBase.h b/src/Storages/MergeTree/MergeTreeReadPoolBase.h index 0081063cd37..1b5bfec5898 100644 --- a/src/Storages/MergeTree/MergeTreeReadPoolBase.h +++ b/src/Storages/MergeTree/MergeTreeReadPoolBase.h @@ -23,12 +23,12 @@ public: MergeTreeReadPoolBase( RangesInDataParts && parts_, + VirtualFields shared_virtual_fields_, const StorageSnapshotPtr & storage_snapshot_, const PrewhereInfoPtr & prewhere_info_, const ExpressionActionsSettings & actions_settings_, const MergeTreeReaderSettings & reader_settings_, const Names & column_names_, - const Names & virtual_column_names_, const PoolSettings & settings_, const ContextPtr & context_); @@ -37,12 +37,12 @@ public: protected: /// Initialized in constructor const RangesInDataParts parts_ranges; + const VirtualFields shared_virtual_fields; const StorageSnapshotPtr storage_snapshot; const PrewhereInfoPtr prewhere_info; const ExpressionActionsSettings actions_settings; const MergeTreeReaderSettings reader_settings; const Names column_names; - const Names virtual_column_names; const PoolSettings pool_settings; const MarkCachePtr owned_mark_cache; const UncompressedCachePtr owned_uncompressed_cache; @@ -52,13 +52,13 @@ protected: std::vector getPerPartSumMarks() const; MergeTreeReadTaskPtr createTask( - MergeTreeReadTask::InfoPtr read_info, + MergeTreeReadTaskInfoPtr read_info, MarkRanges ranges, MergeTreeReadTask * previous_task) const; MergeTreeReadTask::Extras getExtras() const; - std::vector per_part_infos; + std::vector per_part_infos; std::vector is_part_on_remote_disk; ReadBufferFromFileBase::ProfileCallback profile_callback; diff --git a/src/Storages/MergeTree/MergeTreeReadPoolInOrder.cpp b/src/Storages/MergeTree/MergeTreeReadPoolInOrder.cpp index 1b621ad5055..4c0391ffa57 100644 --- a/src/Storages/MergeTree/MergeTreeReadPoolInOrder.cpp +++ b/src/Storages/MergeTree/MergeTreeReadPoolInOrder.cpp @@ -12,22 +12,22 @@ MergeTreeReadPoolInOrder::MergeTreeReadPoolInOrder( bool has_limit_below_one_block_, MergeTreeReadType read_type_, RangesInDataParts parts_, + VirtualFields shared_virtual_fields_, const StorageSnapshotPtr & storage_snapshot_, const PrewhereInfoPtr & prewhere_info_, const ExpressionActionsSettings & actions_settings_, const MergeTreeReaderSettings & reader_settings_, const Names & column_names_, - const Names & virtual_column_names_, const PoolSettings & settings_, const ContextPtr & context_) : MergeTreeReadPoolBase( std::move(parts_), + std::move(shared_virtual_fields_), storage_snapshot_, prewhere_info_, actions_settings_, reader_settings_, column_names_, - virtual_column_names_, settings_, context_) , has_limit_below_one_block(has_limit_below_one_block_) diff --git a/src/Storages/MergeTree/MergeTreeReadPoolInOrder.h b/src/Storages/MergeTree/MergeTreeReadPoolInOrder.h index d9cc1ba4984..9fedf396a6b 100644 --- a/src/Storages/MergeTree/MergeTreeReadPoolInOrder.h +++ b/src/Storages/MergeTree/MergeTreeReadPoolInOrder.h @@ -11,12 +11,12 @@ public: bool has_limit_below_one_block_, MergeTreeReadType read_type_, RangesInDataParts parts_, + VirtualFields shared_virtual_fields_, const StorageSnapshotPtr & storage_snapshot_, const PrewhereInfoPtr & prewhere_info_, const ExpressionActionsSettings & actions_settings_, const MergeTreeReaderSettings & reader_settings_, const Names & column_names_, - const Names & virtual_column_names_, const PoolSettings & settings_, const ContextPtr & context_); diff --git a/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.cpp b/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.cpp index 47436ed1407..38035d97f56 100644 --- a/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.cpp +++ b/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.cpp @@ -13,22 +13,22 @@ namespace ErrorCodes MergeTreeReadPoolParallelReplicas::MergeTreeReadPoolParallelReplicas( ParallelReadingExtension extension_, RangesInDataParts && parts_, + VirtualFields shared_virtual_fields_, const StorageSnapshotPtr & storage_snapshot_, const PrewhereInfoPtr & prewhere_info_, const ExpressionActionsSettings & actions_settings_, const MergeTreeReaderSettings & reader_settings_, const Names & column_names_, - const Names & virtual_column_names_, const PoolSettings & settings_, const ContextPtr & context_) : MergeTreeReadPoolBase( std::move(parts_), + std::move(shared_virtual_fields_), storage_snapshot_, prewhere_info_, actions_settings_, reader_settings_, column_names_, - virtual_column_names_, settings_, context_) , extension(std::move(extension_)) diff --git a/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.h b/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.h index 6a548dffe37..ca159edb91c 100644 --- a/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.h +++ b/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.h @@ -11,12 +11,12 @@ public: MergeTreeReadPoolParallelReplicas( ParallelReadingExtension extension_, RangesInDataParts && parts_, + VirtualFields shared_virtual_fields_, const StorageSnapshotPtr & storage_snapshot_, const PrewhereInfoPtr & prewhere_info_, const ExpressionActionsSettings & actions_settings_, const MergeTreeReaderSettings & reader_settings_, const Names & column_names_, - const Names & virtual_column_names_, const PoolSettings & settings_, const ContextPtr & context_); diff --git a/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicasInOrder.cpp b/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicasInOrder.cpp index a822a517933..01c0a9f91be 100644 --- a/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicasInOrder.cpp +++ b/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicasInOrder.cpp @@ -12,22 +12,22 @@ MergeTreeReadPoolParallelReplicasInOrder::MergeTreeReadPoolParallelReplicasInOrd ParallelReadingExtension extension_, CoordinationMode mode_, RangesInDataParts parts_, + VirtualFields shared_virtual_fields_, const StorageSnapshotPtr & storage_snapshot_, const PrewhereInfoPtr & prewhere_info_, const ExpressionActionsSettings & actions_settings_, const MergeTreeReaderSettings & reader_settings_, const Names & column_names_, - const Names & virtual_column_names_, const PoolSettings & settings_, const ContextPtr & context_) : MergeTreeReadPoolBase( std::move(parts_), + std::move(shared_virtual_fields_), storage_snapshot_, prewhere_info_, actions_settings_, reader_settings_, column_names_, - virtual_column_names_, settings_, context_) , extension(std::move(extension_)) diff --git a/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicasInOrder.h b/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicasInOrder.h index 3e5f8f5dfba..4fe3f7a699c 100644 --- a/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicasInOrder.h +++ b/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicasInOrder.h @@ -12,12 +12,12 @@ public: ParallelReadingExtension extension_, CoordinationMode mode_, RangesInDataParts parts_, + VirtualFields shared_virtual_fields_, const StorageSnapshotPtr & storage_snapshot_, const PrewhereInfoPtr & prewhere_info_, const ExpressionActionsSettings & actions_settings_, const MergeTreeReaderSettings & reader_settings_, const Names & column_names_, - const Names & virtual_column_names_, const PoolSettings & settings_, const ContextPtr & context_); diff --git a/src/Storages/MergeTree/MergeTreeReadTask.cpp b/src/Storages/MergeTree/MergeTreeReadTask.cpp index 41c7531b6a6..08b30e445e2 100644 --- a/src/Storages/MergeTree/MergeTreeReadTask.cpp +++ b/src/Storages/MergeTree/MergeTreeReadTask.cpp @@ -1,5 +1,6 @@ #include #include +#include #include namespace DB @@ -10,7 +11,7 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } -String MergeTreeReadTask::Columns::dump() const +String MergeTreeReadTaskColumns::dump() const { WriteBufferFromOwnString s; for (size_t i = 0; i < pre_columns.size(); ++i) @@ -22,7 +23,7 @@ String MergeTreeReadTask::Columns::dump() const } MergeTreeReadTask::MergeTreeReadTask( - InfoPtr info_, + MergeTreeReadTaskInfoPtr info_, Readers readers_, MarkRanges mark_ranges_, MergeTreeBlockSizePredictorPtr size_predictor_) @@ -34,23 +35,30 @@ MergeTreeReadTask::MergeTreeReadTask( } MergeTreeReadTask::Readers MergeTreeReadTask::createReaders( - const InfoPtr & read_info, const Extras & extras, const MarkRanges & ranges) + const MergeTreeReadTaskInfoPtr & read_info, const Extras & extras, const MarkRanges & ranges) { Readers new_readers; auto create_reader = [&](const NamesAndTypesList & columns_to_read) { return read_info->data_part->getReader( - columns_to_read, extras.storage_snapshot, ranges, - extras.uncompressed_cache, extras.mark_cache, - read_info->alter_conversions, extras.reader_settings, extras.value_size_map, extras.profile_callback); + columns_to_read, + extras.storage_snapshot, + ranges, + read_info->const_virtual_fields, + extras.uncompressed_cache, + extras.mark_cache, + read_info->alter_conversions, + extras.reader_settings, + extras.value_size_map, + extras.profile_callback); }; new_readers.main = create_reader(read_info->task_columns.columns); /// Add lightweight delete filtering step if (extras.reader_settings.apply_deleted_mask && read_info->data_part->hasLightweightDelete()) - new_readers.prewhere.push_back(create_reader({LightweightDeleteDescription::FILTER_COLUMN})); + new_readers.prewhere.push_back(create_reader({{RowExistsColumn::name, RowExistsColumn::type}})); for (const auto & pre_columns_per_step : read_info->task_columns.pre_columns) new_readers.prewhere.push_back(create_reader(pre_columns_per_step)); @@ -58,10 +66,8 @@ MergeTreeReadTask::Readers MergeTreeReadTask::createReaders( return new_readers; } -MergeTreeReadTask::RangeReaders MergeTreeReadTask::createRangeReaders( - const Readers & task_readers, - const PrewhereExprInfo & prewhere_actions, - const Names & non_const_virtual_column_names) +MergeTreeReadTask::RangeReaders +MergeTreeReadTask::createRangeReaders(const Readers & task_readers, const PrewhereExprInfo & prewhere_actions) { MergeTreeReadTask::RangeReaders new_range_readers; if (prewhere_actions.steps.size() != task_readers.prewhere.size()) @@ -77,10 +83,7 @@ MergeTreeReadTask::RangeReaders MergeTreeReadTask::createRangeReaders( { last_reader = task_readers.main->getColumns().empty() && (i + 1 == prewhere_actions.steps.size()); - MergeTreeRangeReader current_reader( - task_readers.prewhere[i].get(), - prev_reader, prewhere_actions.steps[i].get(), - last_reader, non_const_virtual_column_names); + MergeTreeRangeReader current_reader(task_readers.prewhere[i].get(), prev_reader, prewhere_actions.steps[i].get(), last_reader); new_range_readers.prewhere.push_back(std::move(current_reader)); prev_reader = &new_range_readers.prewhere.back(); @@ -88,11 +91,11 @@ MergeTreeReadTask::RangeReaders MergeTreeReadTask::createRangeReaders( if (!last_reader) { - new_range_readers.main = MergeTreeRangeReader(task_readers.main.get(), prev_reader, nullptr, true, non_const_virtual_column_names); + new_range_readers.main = MergeTreeRangeReader(task_readers.main.get(), prev_reader, nullptr, true); } else { - /// If all columns are read by prewhere range readers than move last prewhere range reader to main. + /// If all columns are read by prewhere range readers, move last prewhere range reader to main. new_range_readers.main = std::move(new_range_readers.prewhere.back()); new_range_readers.prewhere.pop_back(); } @@ -100,14 +103,12 @@ MergeTreeReadTask::RangeReaders MergeTreeReadTask::createRangeReaders( return new_range_readers; } -void MergeTreeReadTask::initializeRangeReaders( - const PrewhereExprInfo & prewhere_actions, - const Names & non_const_virtual_column_names) +void MergeTreeReadTask::initializeRangeReaders(const PrewhereExprInfo & prewhere_actions) { if (range_readers.main.isInitialized()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Range reader is already initialized"); - range_readers = createRangeReaders(readers, prewhere_actions, non_const_virtual_column_names); + range_readers = createRangeReaders(readers, prewhere_actions); } UInt64 MergeTreeReadTask::estimateNumRows(const BlockSizeParams & params) const diff --git a/src/Storages/MergeTree/MergeTreeReadTask.h b/src/Storages/MergeTree/MergeTreeReadTask.h index 8d2f0657fd1..c8bb501c0e8 100644 --- a/src/Storages/MergeTree/MergeTreeReadTask.h +++ b/src/Storages/MergeTree/MergeTreeReadTask.h @@ -20,6 +20,8 @@ using MergeTreeBlockSizePredictorPtr = std::shared_ptr; using MergeTreeReaderPtr = std::unique_ptr; +using VirtualFields = std::unordered_map; + enum class MergeTreeReadType { @@ -40,45 +42,47 @@ enum class MergeTreeReadType ParallelReplicas, }; +struct MergeTreeReadTaskColumns +{ + /// Column names to read during WHERE + NamesAndTypesList columns; + /// Column names to read during each PREWHERE step + std::vector pre_columns; + + String dump() const; +}; + +struct MergeTreeReadTaskInfo +{ + /// Data part which should be read while performing this task + DataPartPtr data_part; + /// For `part_index` virtual column + size_t part_index_in_query; + /// Alter converversionss that should be applied on-fly for part. + AlterConversionsPtr alter_conversions; + /// Column names to read during PREWHERE and WHERE + MergeTreeReadTaskColumns task_columns; + /// Shared initialized size predictor. It is copied for each new task. + MergeTreeBlockSizePredictorPtr shared_size_predictor; + /// TODO: comment + VirtualFields const_virtual_fields; +}; + +using MergeTreeReadTaskInfoPtr = std::shared_ptr; + /// A batch of work for MergeTreeSelectProcessor struct MergeTreeReadTask : private boost::noncopyable { public: - struct Columns - { - /// Column names to read during WHERE - NamesAndTypesList columns; - /// Column names to read during each PREWHERE step - std::vector pre_columns; - - String dump() const; - }; - - struct Info - { - /// Data part which should be read while performing this task - DataPartPtr data_part; - /// For virtual `part_index` virtual column - size_t part_index_in_query; - /// Alter converversionss that should be applied on-fly for part. - AlterConversionsPtr alter_conversions; - /// Column names to read during PREWHERE and WHERE - Columns task_columns; - /// Shared initialized size predictor. It is copied for each new task. - MergeTreeBlockSizePredictorPtr shared_size_predictor; - }; - - using InfoPtr = std::shared_ptr; - /// Extra params that required for creation of reader. struct Extras { UncompressedCache * uncompressed_cache = nullptr; MarkCache * mark_cache = nullptr; - MergeTreeReaderSettings reader_settings; - StorageSnapshotPtr storage_snapshot; - IMergeTreeReader::ValueSizeMap value_size_map; - ReadBufferFromFileBase::ProfileCallback profile_callback; + MergeTreeReaderSettings reader_settings{}; + StorageSnapshotPtr storage_snapshot{}; + IMergeTreeReader::ValueSizeMap value_size_map{}; + ReadBufferFromFileBase::ProfileCallback profile_callback{}; }; struct Readers @@ -115,27 +119,32 @@ public: size_t num_read_bytes = 0; }; - MergeTreeReadTask(InfoPtr info_, Readers readers_, MarkRanges mark_ranges_, MergeTreeBlockSizePredictorPtr size_predictor_); + MergeTreeReadTask( + MergeTreeReadTaskInfoPtr info_, + Readers readers_, + MarkRanges mark_ranges_, - void initializeRangeReaders(const PrewhereExprInfo & prewhere_actions, const Names & non_const_virtual_column_names); + MergeTreeBlockSizePredictorPtr size_predictor_); + + void initializeRangeReaders(const PrewhereExprInfo & prewhere_actions); BlockAndProgress read(const BlockSizeParams & params); bool isFinished() const { return mark_ranges.empty() && range_readers.main.isCurrentRangeFinished(); } - const Info & getInfo() const { return *info; } + const MergeTreeReadTaskInfo & getInfo() const { return *info; } const MergeTreeRangeReader & getMainRangeReader() const { return range_readers.main; } const IMergeTreeReader & getMainReader() const { return *readers.main; } Readers releaseReaders() { return std::move(readers); } - static Readers createReaders(const InfoPtr & read_info, const Extras & extras, const MarkRanges & ranges); - static RangeReaders createRangeReaders(const Readers & readers, const PrewhereExprInfo & prewhere_actions, const Names & non_const_virtual_column_names); + static Readers createReaders(const MergeTreeReadTaskInfoPtr & read_info, const Extras & extras, const MarkRanges & ranges); + static RangeReaders createRangeReaders(const Readers & readers, const PrewhereExprInfo & prewhere_actions); private: UInt64 estimateNumRows(const BlockSizeParams & params) const; /// Shared information required for reading. - InfoPtr info; + MergeTreeReadTaskInfoPtr info; /// Readers for data_part of this task. /// May be reused and released to the next task. diff --git a/src/Storages/MergeTree/MergeTreeReaderCompact.cpp b/src/Storages/MergeTree/MergeTreeReaderCompact.cpp index 02048009296..63824366722 100644 --- a/src/Storages/MergeTree/MergeTreeReaderCompact.cpp +++ b/src/Storages/MergeTree/MergeTreeReaderCompact.cpp @@ -17,6 +17,7 @@ namespace ErrorCodes MergeTreeReaderCompact::MergeTreeReaderCompact( MergeTreeDataPartInfoForReaderPtr data_part_info_for_read_, NamesAndTypesList columns_, + const VirtualFields & virtual_fields_, const StorageSnapshotPtr & storage_snapshot_, UncompressedCache * uncompressed_cache_, MarkCache * mark_cache_, @@ -29,6 +30,7 @@ MergeTreeReaderCompact::MergeTreeReaderCompact( : IMergeTreeReader( data_part_info_for_read_, columns_, + virtual_fields_, storage_snapshot_, uncompressed_cache_, mark_cache_, diff --git a/src/Storages/MergeTree/MergeTreeReaderCompact.h b/src/Storages/MergeTree/MergeTreeReaderCompact.h index dace4ec468e..769e6a08be4 100644 --- a/src/Storages/MergeTree/MergeTreeReaderCompact.h +++ b/src/Storages/MergeTree/MergeTreeReaderCompact.h @@ -21,6 +21,7 @@ public: MergeTreeReaderCompact( MergeTreeDataPartInfoForReaderPtr data_part_info_for_read_, NamesAndTypesList columns_, + const VirtualFields & virtual_fields_, const StorageSnapshotPtr & storage_snapshot_, UncompressedCache * uncompressed_cache_, MarkCache * mark_cache_, diff --git a/src/Storages/MergeTree/MergeTreeReaderInMemory.cpp b/src/Storages/MergeTree/MergeTreeReaderInMemory.cpp deleted file mode 100644 index bacd86511f5..00000000000 --- a/src/Storages/MergeTree/MergeTreeReaderInMemory.cpp +++ /dev/null @@ -1,117 +0,0 @@ -#include -#include -#include -#include -#include -#include - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int CANNOT_READ_ALL_DATA; - extern const int ARGUMENT_OUT_OF_BOUND; -} - - -MergeTreeReaderInMemory::MergeTreeReaderInMemory( - MergeTreeDataPartInfoForReaderPtr data_part_info_for_read_, - DataPartInMemoryPtr data_part_, - NamesAndTypesList columns_, - const StorageSnapshotPtr & storage_snapshot_, - MarkRanges mark_ranges_, - MergeTreeReaderSettings settings_) - : IMergeTreeReader( - data_part_info_for_read_, - columns_, - storage_snapshot_, - nullptr, - nullptr, - mark_ranges_, - settings_, - {}) - , part_in_memory(std::move(data_part_)) -{ - for (const auto & column_to_read : columns_to_read) - { - /// If array of Nested column is missing in part, - /// we have to read its offsets if they exist. - if (typeid_cast(column_to_read.type.get()) - && !tryGetColumnFromBlock(part_in_memory->block, column_to_read)) - { - if (auto offsets_position = findColumnForOffsets(column_to_read)) - { - positions_for_offsets[column_to_read.name] = *data_part_info_for_read->getColumnPosition(offsets_position->first); - partially_read_columns.insert(column_to_read.name); - } - } - } -} - -size_t MergeTreeReaderInMemory::readRows( - size_t from_mark, size_t /* current_task_last_mark */, bool continue_reading, size_t max_rows_to_read, Columns & res_columns) -{ - if (!continue_reading) - total_rows_read = 0; - - size_t total_marks = data_part_info_for_read->getIndexGranularity().getMarksCount(); - if (from_mark >= total_marks) - throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Mark {} is out of bound. Max mark: {}", - toString(from_mark), toString(total_marks)); - - size_t num_columns = res_columns.size(); - checkNumberOfColumns(num_columns); - - size_t part_rows = part_in_memory->block.rows(); - if (total_rows_read >= part_rows) - throw Exception(ErrorCodes::CANNOT_READ_ALL_DATA, "Cannot read data in MergeTreeReaderInMemory. " - "Rows already read: {}. Rows in part: {}", total_rows_read, part_rows); - - size_t rows_to_read = std::min(max_rows_to_read, part_rows - total_rows_read); - for (size_t i = 0; i < num_columns; ++i) - { - const auto & column_to_read = columns_to_read[i]; - - /// Copy offsets, if array of Nested column is missing in part. - auto offsets_it = positions_for_offsets.find(column_to_read.name); - if (offsets_it != positions_for_offsets.end() && !column_to_read.isSubcolumn()) - { - const auto & source_offsets = assert_cast( - *part_in_memory->block.getByPosition(offsets_it->second).column).getOffsets(); - - if (res_columns[i] == nullptr) - res_columns[i] = column_to_read.type->createColumn(); - - auto mutable_column = res_columns[i]->assumeMutable(); - auto & res_offstes = assert_cast(*mutable_column).getOffsets(); - size_t start_offset = total_rows_read ? source_offsets[total_rows_read - 1] : 0; - for (size_t row = 0; row < rows_to_read; ++row) - res_offstes.push_back(source_offsets[total_rows_read + row] - start_offset); - - res_columns[i] = std::move(mutable_column); - } - else if (part_in_memory->hasColumnFiles(column_to_read)) - { - auto block_column = getColumnFromBlock(part_in_memory->block, column_to_read); - if (rows_to_read == part_rows) - { - res_columns[i] = block_column; - } - else - { - if (res_columns[i] == nullptr) - res_columns[i] = column_to_read.type->createColumn(); - - auto mutable_column = res_columns[i]->assumeMutable(); - mutable_column->insertRangeFrom(*block_column, total_rows_read, rows_to_read); - res_columns[i] = std::move(mutable_column); - } - } - } - - total_rows_read += rows_to_read; - return rows_to_read; -} - -} diff --git a/src/Storages/MergeTree/MergeTreeReaderInMemory.h b/src/Storages/MergeTree/MergeTreeReaderInMemory.h deleted file mode 100644 index e26a98f0916..00000000000 --- a/src/Storages/MergeTree/MergeTreeReaderInMemory.h +++ /dev/null @@ -1,39 +0,0 @@ -#pragma once - -#include -#include - - -namespace DB -{ - -class MergeTreeDataPartInMemory; -using DataPartInMemoryPtr = std::shared_ptr; - -/// Reader for InMemory parts -class MergeTreeReaderInMemory : public IMergeTreeReader -{ -public: - MergeTreeReaderInMemory( - MergeTreeDataPartInfoForReaderPtr data_part_info_for_read_, - DataPartInMemoryPtr data_part_, - NamesAndTypesList columns_, - const StorageSnapshotPtr & storage_snapshot_, - MarkRanges mark_ranges_, - MergeTreeReaderSettings settings_); - - /// Return the number of rows has been read or zero if there is no columns to read. - /// If continue_reading is true, continue reading from last state, otherwise seek to from_mark - size_t readRows(size_t from_mark, size_t current_tasl_last_mark, - bool continue_reading, size_t max_rows_to_read, Columns & res_columns) override; - - bool canReadIncompleteGranules() const override { return true; } - -private: - size_t total_rows_read = 0; - DataPartInMemoryPtr part_in_memory; - - std::unordered_map positions_for_offsets; -}; - -} diff --git a/src/Storages/MergeTree/MergeTreeReaderStream.h b/src/Storages/MergeTree/MergeTreeReaderStream.h index baf8ec713f9..49ce3103434 100644 --- a/src/Storages/MergeTree/MergeTreeReaderStream.h +++ b/src/Storages/MergeTree/MergeTreeReaderStream.h @@ -1,5 +1,4 @@ #pragma once -#include #include #include #include diff --git a/src/Storages/MergeTree/MergeTreeReaderWide.cpp b/src/Storages/MergeTree/MergeTreeReaderWide.cpp index 640432ef755..d34a58a25b0 100644 --- a/src/Storages/MergeTree/MergeTreeReaderWide.cpp +++ b/src/Storages/MergeTree/MergeTreeReaderWide.cpp @@ -24,6 +24,7 @@ namespace MergeTreeReaderWide::MergeTreeReaderWide( MergeTreeDataPartInfoForReaderPtr data_part_info_, NamesAndTypesList columns_, + const VirtualFields & virtual_fields_, const StorageSnapshotPtr & storage_snapshot_, UncompressedCache * uncompressed_cache_, MarkCache * mark_cache_, @@ -35,6 +36,7 @@ MergeTreeReaderWide::MergeTreeReaderWide( : IMergeTreeReader( data_part_info_, columns_, + virtual_fields_, storage_snapshot_, uncompressed_cache_, mark_cache_, diff --git a/src/Storages/MergeTree/MergeTreeReaderWide.h b/src/Storages/MergeTree/MergeTreeReaderWide.h index 2a850cc2814..a9a5526dd65 100644 --- a/src/Storages/MergeTree/MergeTreeReaderWide.h +++ b/src/Storages/MergeTree/MergeTreeReaderWide.h @@ -17,6 +17,7 @@ public: MergeTreeReaderWide( MergeTreeDataPartInfoForReaderPtr data_part_info_for_read_, NamesAndTypesList columns_, + const VirtualFields & virtual_fields_, const StorageSnapshotPtr & storage_snapshot_, UncompressedCache * uncompressed_cache_, MarkCache * mark_cache_, diff --git a/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp b/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp index 4e93bd267ec..fce733d47b7 100644 --- a/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp +++ b/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp @@ -10,8 +10,9 @@ #include #include #include +#include #include -#include +#include #include namespace DB @@ -19,42 +20,26 @@ namespace DB namespace ErrorCodes { - extern const int ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER; - extern const int LOGICAL_ERROR; extern const int QUERY_WAS_CANCELLED; } -static void injectNonConstVirtualColumns( - size_t rows, - Block & block, - const Names & virtual_columns, - MergeTreeReadTask * task = nullptr); - -static void injectPartConstVirtualColumns( - size_t rows, - Block & block, - MergeTreeReadTask * task, - const DataTypePtr & partition_value_type, - const Names & virtual_columns); - MergeTreeSelectProcessor::MergeTreeSelectProcessor( MergeTreeReadPoolPtr pool_, MergeTreeSelectAlgorithmPtr algorithm_, - const MergeTreeData & storage_, + const StorageSnapshotPtr & storage_snapshot_, const PrewhereInfoPtr & prewhere_info_, const ExpressionActionsSettings & actions_settings_, const MergeTreeReadTask::BlockSizeParams & block_size_params_, - const MergeTreeReaderSettings & reader_settings_, - const Names & virt_column_names_) + const MergeTreeReaderSettings & reader_settings_) : pool(std::move(pool_)) , algorithm(std::move(algorithm_)) + , storage_snapshot(storage_snapshot_) , prewhere_info(prewhere_info_) , actions_settings(actions_settings_) , prewhere_actions(getPrewhereActions(prewhere_info, actions_settings, reader_settings_.enable_multiple_prewhere_read_steps)) , reader_settings(reader_settings_) , block_size_params(block_size_params_) - , virt_column_names(virt_column_names_) - , partition_value_type(storage_.getPartitionValueType()) + , result_header(transformHeader(pool->getHeader(), prewhere_info)) { if (reader_settings.apply_deleted_mask) { @@ -62,7 +47,7 @@ MergeTreeSelectProcessor::MergeTreeSelectProcessor( { .type = PrewhereExprStep::Filter, .actions = nullptr, - .filter_column_name = LightweightDeleteDescription::FILTER_COLUMN.name, + .filter_column_name = RowExistsColumn::name, .remove_filter_column = true, .need_filter = true, .perform_alter_conversions = true, @@ -71,16 +56,6 @@ MergeTreeSelectProcessor::MergeTreeSelectProcessor( lightweight_delete_filter_step = std::make_shared(std::move(step)); } - header_without_const_virtual_columns = applyPrewhereActions(pool->getHeader(), prewhere_info); - size_t non_const_columns_offset = header_without_const_virtual_columns.columns(); - injectNonConstVirtualColumns(0, header_without_const_virtual_columns, virt_column_names); - - for (size_t col_num = non_const_columns_offset; col_num < header_without_const_virtual_columns.columns(); ++col_num) - non_const_virtual_column_names.emplace_back(header_without_const_virtual_columns.getByPosition(col_num).name); - - result_header = header_without_const_virtual_columns; - injectPartConstVirtualColumns(0, result_header, nullptr, partition_value_type, virt_column_names); - if (!prewhere_actions.steps.empty()) LOG_TRACE(log, "PREWHERE condition was split into {} steps: {}", prewhere_actions.steps.size(), prewhere_actions.dumpConditions()); @@ -163,8 +138,6 @@ ChunkAndProgress MergeTreeSelectProcessor::read() if (res.row_count) { - injectVirtualColumns(res.block, res.row_count, task.get(), partition_value_type, virt_column_names); - /// Reorder the columns according to result_header Columns ordered_columns; ordered_columns.reserve(result_header.columns()); @@ -198,260 +171,12 @@ void MergeTreeSelectProcessor::initializeRangeReaders() for (const auto & step : prewhere_actions.steps) all_prewhere_actions.steps.push_back(step); - task->initializeRangeReaders(all_prewhere_actions, non_const_virtual_column_names); + task->initializeRangeReaders(all_prewhere_actions); } - -namespace +Block MergeTreeSelectProcessor::transformHeader(Block block, const PrewhereInfoPtr & prewhere_info) { - struct VirtualColumnsInserter - { - explicit VirtualColumnsInserter(Block & block_) : block(block_) {} - - bool columnExists(const String & name) const { return block.has(name); } - - void insertUInt8Column(const ColumnPtr & column, const String & name) - { - block.insert({column, std::make_shared(), name}); - } - - void insertUInt64Column(const ColumnPtr & column, const String & name) - { - block.insert({column, std::make_shared(), name}); - } - - void insertUUIDColumn(const ColumnPtr & column, const String & name) - { - block.insert({column, std::make_shared(), name}); - } - - void insertLowCardinalityColumn(const ColumnPtr & column, const String & name) - { - block.insert({column, std::make_shared(std::make_shared()), name}); - } - - void insertPartitionValueColumn( - size_t rows, const Row & partition_value, const DataTypePtr & partition_value_type, const String & name) - { - ColumnPtr column; - if (rows) - column = partition_value_type->createColumnConst(rows, Tuple(partition_value.begin(), partition_value.end())) - ->convertToFullColumnIfConst(); - else - column = partition_value_type->createColumn(); - - block.insert({column, partition_value_type, name}); - } - - Block & block; - }; -} - -/// Adds virtual columns that are not const for all rows -static void injectNonConstVirtualColumns( - size_t rows, - Block & block, - const Names & virtual_columns, - MergeTreeReadTask * task) -{ - VirtualColumnsInserter inserter(block); - for (const auto & virtual_column_name : virtual_columns) - { - if (virtual_column_name == "_part_offset") - { - if (!rows) - { - inserter.insertUInt64Column(DataTypeUInt64().createColumn(), virtual_column_name); - } - else - { - if (!inserter.columnExists(virtual_column_name)) - throw Exception(ErrorCodes::LOGICAL_ERROR, - "Column {} must have been filled part reader", - virtual_column_name); - } - } - - if (virtual_column_name == LightweightDeleteDescription::FILTER_COLUMN.name) - { - /// If _row_exists column isn't present in the part then fill it here with 1s - ColumnPtr column; - if (rows) - column = LightweightDeleteDescription::FILTER_COLUMN.type->createColumnConst(rows, 1)->convertToFullColumnIfConst(); - else - column = LightweightDeleteDescription::FILTER_COLUMN.type->createColumn(); - - inserter.insertUInt8Column(column, virtual_column_name); - } - - if (virtual_column_name == BlockNumberColumn::name) - { - ColumnPtr column; - if (rows) - { - size_t value = 0; - if (task) - { - value = task->getInfo().data_part ? task->getInfo().data_part->info.min_block : 0; - } - column = BlockNumberColumn::type->createColumnConst(rows, value)->convertToFullColumnIfConst(); - } - else - column = BlockNumberColumn::type->createColumn(); - - inserter.insertUInt64Column(column, virtual_column_name); - } - } -} - -/// Adds virtual columns that are const for the whole part -static void injectPartConstVirtualColumns( - size_t rows, - Block & block, - MergeTreeReadTask * task, - const DataTypePtr & partition_value_type, - const Names & virtual_columns) -{ - VirtualColumnsInserter inserter(block); - /// add virtual columns - /// Except _sample_factor, which is added from the outside. - if (!virtual_columns.empty()) - { - if (unlikely(rows && !task)) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot insert virtual columns to non-empty chunk without specified task."); - - const IMergeTreeDataPart * part = nullptr; - - if (rows) - { - part = task->getInfo().data_part.get(); - if (part->isProjectionPart()) - part = part->getParentPart(); - } - - for (const auto & virtual_column_name : virtual_columns) - { - if (virtual_column_name == "_part") - { - ColumnPtr column; - if (rows) - column = DataTypeLowCardinality{std::make_shared()} - .createColumnConst(rows, part->name) - ->convertToFullColumnIfConst(); - else - column = DataTypeLowCardinality{std::make_shared()}.createColumn(); - - inserter.insertLowCardinalityColumn(column, virtual_column_name); - } - else if (virtual_column_name == "_part_index") - { - ColumnPtr column; - if (rows) - column = DataTypeUInt64().createColumnConst(rows, task->getInfo().part_index_in_query)->convertToFullColumnIfConst(); - else - column = DataTypeUInt64().createColumn(); - - inserter.insertUInt64Column(column, virtual_column_name); - } - else if (virtual_column_name == "_part_uuid") - { - ColumnPtr column; - if (rows) - column = DataTypeUUID().createColumnConst(rows, part->uuid)->convertToFullColumnIfConst(); - else - column = DataTypeUUID().createColumn(); - - inserter.insertUUIDColumn(column, virtual_column_name); - } - else if (virtual_column_name == "_partition_id") - { - ColumnPtr column; - if (rows) - column = DataTypeLowCardinality{std::make_shared()} - .createColumnConst(rows, part->info.partition_id) - ->convertToFullColumnIfConst(); - else - column = DataTypeLowCardinality{std::make_shared()}.createColumn(); - - inserter.insertLowCardinalityColumn(column, virtual_column_name); - } - else if (virtual_column_name == "_partition_value") - { - if (rows) - inserter.insertPartitionValueColumn(rows, part->partition.value, partition_value_type, virtual_column_name); - else - inserter.insertPartitionValueColumn(rows, {}, partition_value_type, virtual_column_name); - } - } - } -} - -void MergeTreeSelectProcessor::injectVirtualColumns( - Block & block, size_t row_count, MergeTreeReadTask * task, const DataTypePtr & partition_value_type, const Names & virtual_columns) -{ - /// First add non-const columns that are filled by the range reader and then const columns that we will fill ourselves. - /// Note that the order is important: virtual columns filled by the range reader must go first - injectNonConstVirtualColumns(row_count, block, virtual_columns,task); - injectPartConstVirtualColumns(row_count, block, task, partition_value_type, virtual_columns); -} - -Block MergeTreeSelectProcessor::applyPrewhereActions(Block block, const PrewhereInfoPtr & prewhere_info) -{ - if (prewhere_info) - { - if (prewhere_info->row_level_filter) - { - block = prewhere_info->row_level_filter->updateHeader(std::move(block)); - auto & row_level_column = block.getByName(prewhere_info->row_level_column_name); - if (!row_level_column.type->canBeUsedInBooleanContext()) - { - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER, "Invalid type for filter in PREWHERE: {}", - row_level_column.type->getName()); - } - - block.erase(prewhere_info->row_level_column_name); - } - - if (prewhere_info->prewhere_actions) - { - block = prewhere_info->prewhere_actions->updateHeader(std::move(block)); - - auto & prewhere_column = block.getByName(prewhere_info->prewhere_column_name); - if (!prewhere_column.type->canBeUsedInBooleanContext()) - { - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER, "Invalid type for filter in PREWHERE: {}", - prewhere_column.type->getName()); - } - - if (prewhere_info->remove_prewhere_column) - { - block.erase(prewhere_info->prewhere_column_name); - } - else if (prewhere_info->need_filter) - { - WhichDataType which(removeNullable(recursiveRemoveLowCardinality(prewhere_column.type))); - - if (which.isNativeInt() || which.isNativeUInt()) - prewhere_column.column = prewhere_column.type->createColumnConst(block.rows(), 1u)->convertToFullColumnIfConst(); - else if (which.isFloat()) - prewhere_column.column = prewhere_column.type->createColumnConst(block.rows(), 1.0f)->convertToFullColumnIfConst(); - else - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER, - "Illegal type {} of column for filter", - prewhere_column.type->getName()); - } - } - } - - return block; -} - -Block MergeTreeSelectProcessor::transformHeader( - Block block, const PrewhereInfoPtr & prewhere_info, const DataTypePtr & partition_value_type, const Names & virtual_columns) -{ - injectVirtualColumns(block, 0, nullptr, partition_value_type, virtual_columns); - auto transformed = applyPrewhereActions(std::move(block), prewhere_info); - return transformed; + return SourceStepWithFilter::applyPrewhereActions(std::move(block), prewhere_info); } } diff --git a/src/Storages/MergeTree/MergeTreeSelectProcessor.h b/src/Storages/MergeTree/MergeTreeSelectProcessor.h index b06ae788e91..01bb3851e04 100644 --- a/src/Storages/MergeTree/MergeTreeSelectProcessor.h +++ b/src/Storages/MergeTree/MergeTreeSelectProcessor.h @@ -41,21 +41,15 @@ public: MergeTreeSelectProcessor( MergeTreeReadPoolPtr pool_, MergeTreeSelectAlgorithmPtr algorithm_, - const MergeTreeData & storage_, + const StorageSnapshotPtr & storage_snapshot_, const PrewhereInfoPtr & prewhere_info_, const ExpressionActionsSettings & actions_settings_, const MergeTreeReadTask::BlockSizeParams & block_size_params_, - const MergeTreeReaderSettings & reader_settings_, - const Names & virt_column_names_); + const MergeTreeReaderSettings & reader_settings_); String getName() const; - static Block transformHeader( - Block block, - const PrewhereInfoPtr & prewhere_info, - const DataTypePtr & partition_value_type, - const Names & virtual_columns); - + static Block transformHeader(Block block, const PrewhereInfoPtr & prewhere_info); Block getHeader() const { return result_header; } ChunkAndProgress read(); @@ -81,15 +75,12 @@ private: size_t num_read_bytes = 0; }; - /// Used for filling header with no rows as well as block with data - static void injectVirtualColumns(Block & block, size_t row_count, MergeTreeReadTask * task, const DataTypePtr & partition_value_type, const Names & virtual_columns); - static Block applyPrewhereActions(Block block, const PrewhereInfoPtr & prewhere_info); - /// Sets up range readers corresponding to data readers void initializeRangeReaders(); const MergeTreeReadPoolPtr pool; const MergeTreeSelectAlgorithmPtr algorithm; + const StorageSnapshotPtr storage_snapshot; const PrewhereInfoPtr prewhere_info; const ExpressionActionsSettings actions_settings; @@ -97,17 +88,11 @@ private: const MergeTreeReaderSettings reader_settings; const MergeTreeReadTask::BlockSizeParams block_size_params; - const Names virt_column_names; - const DataTypePtr partition_value_type; /// Current task to read from. MergeTreeReadTaskPtr task; /// This step is added when the part has lightweight delete mask PrewhereExprStepPtr lightweight_delete_filter_step; - /// These columns will be filled by the merge tree range reader - Names non_const_virtual_column_names; - /// This header is used for chunks from readFromPart(). - Block header_without_const_virtual_columns; /// A result of getHeader(). A chunk which this header is returned from read(). Block result_header; diff --git a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp index d0fbc316024..81eb166b300 100644 --- a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp +++ b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include #include #include @@ -16,6 +17,7 @@ namespace DB { + namespace ErrorCodes { extern const int MEMORY_LIMIT_EXCEEDED; @@ -55,7 +57,6 @@ protected: Chunk generate() override; private: - const MergeTreeData & storage; StorageSnapshotPtr storage_snapshot; @@ -86,7 +87,6 @@ private: void finish(); }; - MergeTreeSequentialSource::MergeTreeSequentialSource( MergeTreeSequentialSourceType type, const MergeTreeData & storage_, @@ -136,10 +136,8 @@ MergeTreeSequentialSource::MergeTreeSequentialSource( { auto options = GetColumnsOptions(GetColumnsOptions::AllPhysical) .withExtendedObjects() - .withSystemColumns(); - - if (storage.supportsSubcolumns()) - options.withSubcolumns(); + .withVirtuals() + .withSubcolumns(storage.supportsSubcolumns()); columns_for_reader = storage_snapshot->getColumnsByNames(options, columns_to_read); } @@ -151,7 +149,8 @@ MergeTreeSequentialSource::MergeTreeSequentialSource( const auto & context = storage.getContext(); ReadSettings read_settings = context->getReadSettings(); - read_settings.read_from_filesystem_cache_if_exists_otherwise_bypass_cache = true; + read_settings.read_from_filesystem_cache_if_exists_otherwise_bypass_cache = !storage.getSettings()->force_read_through_cache_for_merges; + /// It does not make sense to use pthread_threadpool for background merges/mutations /// And also to preserve backward compatibility read_settings.local_fs_method = LocalFSReadMethod::pread; @@ -181,9 +180,37 @@ MergeTreeSequentialSource::MergeTreeSequentialSource( mark_ranges.emplace(MarkRanges{MarkRange(0, data_part->getMarksCount())}); reader = data_part->getReader( - columns_for_reader, storage_snapshot, - *mark_ranges, /* uncompressed_cache = */ nullptr, - mark_cache.get(), alter_conversions, reader_settings, {}, {}); + columns_for_reader, + storage_snapshot, + *mark_ranges, + /*virtual_fields=*/ {}, + /*uncompressed_cache=*/{}, + mark_cache.get(), + alter_conversions, + reader_settings, + {}, + {}); +} + +static void fillBlockNumberColumns( + Columns & res_columns, + const NamesAndTypesList & columns_list, + UInt64 block_number, + UInt64 num_rows) +{ + chassert(res_columns.size() == columns_list.size()); + + auto it = columns_list.begin(); + for (size_t i = 0; i < res_columns.size(); ++i, ++it) + { + if (res_columns[i]) + continue; + + if (it->name == BlockNumberColumn::name) + { + res_columns[i] = BlockNumberColumn::type->createColumnConst(num_rows, block_number)->convertToFullColumnIfConst(); + } + } } Chunk MergeTreeSequentialSource::generate() @@ -204,16 +231,17 @@ try if (rows_read) { + fillBlockNumberColumns(columns, sample, data_part->info.min_block, rows_read); + reader->fillVirtualColumns(columns, rows_read); + current_row += rows_read; current_mark += (rows_to_read == rows_read); bool should_evaluate_missing_defaults = false; - reader->fillMissingColumns(columns, should_evaluate_missing_defaults, rows_read, data_part->info.min_block); + reader->fillMissingColumns(columns, should_evaluate_missing_defaults, rows_read); if (should_evaluate_missing_defaults) - { reader->evaluateMissingDefaults({}, columns); - } reader->performRequiredConversions(columns); @@ -278,14 +306,13 @@ Pipe createMergeTreeSequentialSource( bool quiet, std::shared_ptr> filtered_rows_count) { - const auto & filter_column = LightweightDeleteDescription::FILTER_COLUMN; /// The part might have some rows masked by lightweight deletes const bool need_to_filter_deleted_rows = apply_deleted_mask && data_part->hasLightweightDelete(); - const bool has_filter_column = std::ranges::find(columns_to_read, filter_column.name) != columns_to_read.end(); + const bool has_filter_column = std::ranges::find(columns_to_read, RowExistsColumn::name) != columns_to_read.end(); if (need_to_filter_deleted_rows && !has_filter_column) - columns_to_read.emplace_back(filter_column.name); + columns_to_read.emplace_back(RowExistsColumn::name); auto column_part_source = std::make_shared(type, storage, storage_snapshot, data_part, columns_to_read, std::move(mark_ranges), @@ -299,7 +326,7 @@ Pipe createMergeTreeSequentialSource( pipe.addSimpleTransform([filtered_rows_count, has_filter_column](const Block & header) { return std::make_shared( - header, nullptr, filter_column.name, !has_filter_column, false, filtered_rows_count); + header, nullptr, RowExistsColumn::name, !has_filter_column, false, filtered_rows_count); }); } diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h index 96cab9c0293..48bf3eacd88 100644 --- a/src/Storages/MergeTree/MergeTreeSettings.h +++ b/src/Storages/MergeTree/MergeTreeSettings.h @@ -146,6 +146,7 @@ struct Settings; M(UInt64, vertical_merge_algorithm_min_rows_to_activate, 16 * 8192, "Minimal (approximate) sum of rows in merging parts to activate Vertical merge algorithm.", 0) \ M(UInt64, vertical_merge_algorithm_min_bytes_to_activate, 0, "Minimal (approximate) uncompressed size in bytes in merging parts to activate Vertical merge algorithm.", 0) \ M(UInt64, vertical_merge_algorithm_min_columns_to_activate, 11, "Minimal amount of non-PK columns to activate Vertical merge algorithm.", 0) \ + M(UInt64, max_postpone_time_for_failed_mutations_ms, 5ULL * 60 * 1000, "The maximum postpone time for failed mutations.", 0) \ \ /** Compatibility settings */ \ M(Bool, allow_suspicious_indices, false, "Reject primary/secondary indexes and sorting keys with identical expressions", 0) \ @@ -191,6 +192,7 @@ struct Settings; M(String, remote_fs_zero_copy_zookeeper_path, "/clickhouse/zero_copy", "ZooKeeper path for zero-copy table-independent info.", 0) \ M(Bool, remote_fs_zero_copy_path_compatible_mode, false, "Run zero-copy in compatible mode during conversion process.", 0) \ M(Bool, cache_populated_by_fetch, false, "Only available in ClickHouse Cloud", 0) \ + M(Bool, force_read_through_cache_for_merges, false, "Force read-through filesystem cache for merges", 0) \ M(Bool, allow_experimental_block_number_column, false, "Enable persisting column _block_number for each row.", 0) \ M(Bool, allow_experimental_replacing_merge_with_cleanup, false, "Allow experimental CLEANUP merges for ReplacingMergeTree with is_deleted column.", 0) \ \ @@ -201,7 +203,8 @@ struct Settings; M(String, primary_key_compression_codec, "ZSTD(3)", "Compression encoding used by primary, primary key is small enough and cached, so the default compression is ZSTD(3).", 0) \ M(UInt64, marks_compress_block_size, 65536, "Mark compress block size, the actual size of the block to compress.", 0) \ M(UInt64, primary_key_compress_block_size, 65536, "Primary compress block size, the actual size of the block to compress.", 0) \ - \ + M(Bool, primary_key_lazy_load, true, "Load primary key in memory on first use instead of on table initialization. This can save memory in the presence of a large number of tables.", 0) \ + M(Float, primary_key_ratio_of_unique_prefix_values_to_skip_suffix_columns, 0.9f, "If the value of a column of the primary key in data part changes at least in this ratio of times, skip loading next columns in memory. This allows to save memory usage by not loading useless columns of the primary key.", 0) \ /** Projection settings. */ \ M(UInt64, max_projections, 25, "The maximum number of merge tree projections.", 0) \ diff --git a/src/Storages/MergeTree/MergeTreeSink.cpp b/src/Storages/MergeTree/MergeTreeSink.cpp index 36816904a81..87b0a04d244 100644 --- a/src/Storages/MergeTree/MergeTreeSink.cpp +++ b/src/Storages/MergeTree/MergeTreeSink.cpp @@ -1,5 +1,4 @@ #include -#include #include #include #include diff --git a/src/Storages/MergeTree/MergeTreeSource.cpp b/src/Storages/MergeTree/MergeTreeSource.cpp index a450505f7a8..e1d1d0951e4 100644 --- a/src/Storages/MergeTree/MergeTreeSource.cpp +++ b/src/Storages/MergeTree/MergeTreeSource.cpp @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include diff --git a/src/Storages/MergeTree/MergeTreeVirtualColumns.cpp b/src/Storages/MergeTree/MergeTreeVirtualColumns.cpp new file mode 100644 index 00000000000..b87dccc2b18 --- /dev/null +++ b/src/Storages/MergeTree/MergeTreeVirtualColumns.cpp @@ -0,0 +1,52 @@ +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int NO_SUCH_COLUMN_IN_TABLE; +} + +static ASTPtr getCompressionCodecDeltaLZ4() +{ + return makeASTFunction("CODEC", + std::make_shared("Delta"), + std::make_shared("LZ4")); +} + +const String RowExistsColumn::name = "_row_exists"; +const DataTypePtr RowExistsColumn::type = std::make_shared(); + +const String BlockNumberColumn::name = "_block_number"; +const DataTypePtr BlockNumberColumn::type = std::make_shared(); +const ASTPtr BlockNumberColumn::codec = getCompressionCodecDeltaLZ4(); + +Field getFieldForConstVirtualColumn(const String & column_name, const IMergeTreeDataPart & part) +{ + if (column_name == RowExistsColumn::name) + return 1ULL; + + if (column_name == BlockNumberColumn::name) + return part.info.min_block; + + if (column_name == "_part") + return part.name; + + if (column_name == "_part_uuid") + return part.uuid; + + if (column_name == "_partition_id") + return part.info.partition_id; + + if (column_name == "_partition_value") + return Tuple(part.partition.value.begin(), part.partition.value.end()); + + throw Exception(ErrorCodes::NO_SUCH_COLUMN_IN_TABLE, "Unexpected const virtual column: {}", column_name); +} + +} diff --git a/src/Storages/MergeTree/MergeTreeVirtualColumns.h b/src/Storages/MergeTree/MergeTreeVirtualColumns.h new file mode 100644 index 00000000000..24721bf1ad1 --- /dev/null +++ b/src/Storages/MergeTree/MergeTreeVirtualColumns.h @@ -0,0 +1,26 @@ +#pragma once +#include +#include +#include + +namespace DB +{ + +class IMergeTreeDataPart; + +struct RowExistsColumn +{ + static const String name; + static const DataTypePtr type; +}; + +struct BlockNumberColumn +{ + static const String name; + static const DataTypePtr type; + static const ASTPtr codec; +}; + +Field getFieldForConstVirtualColumn(const String & column_name, const IMergeTreeDataPart & part); + +} diff --git a/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp b/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp index d9a89b9d4ef..6f1c5302b0e 100644 --- a/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp +++ b/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp @@ -133,10 +133,18 @@ MergeTreeWhereOptimizer::FilterActionsOptimizeResult MergeTreeWhereOptimizer::op return {}; std::unordered_set prewhere_conditions; + std::list prewhere_conditions_list; for (const auto & condition : optimize_result->prewhere_conditions) - prewhere_conditions.insert(condition.node.getDAGNode()); + { + const ActionsDAG::Node * condition_node = condition.node.getDAGNode(); + if (prewhere_conditions.insert(condition_node).second) + prewhere_conditions_list.push_back(condition_node); + } - return {.prewhere_nodes = std::move(prewhere_conditions), .fully_moved_to_prewhere = optimize_result->where_conditions.empty()}; + return { + .prewhere_nodes = std::move(prewhere_conditions), + .prewhere_nodes_list = std::move(prewhere_conditions_list), + .fully_moved_to_prewhere = optimize_result->where_conditions.empty()}; } static void collectColumns(const RPNBuilderTreeNode & node, const NameSet & columns_names, NameSet & result_set, bool & has_invalid_column) diff --git a/src/Storages/MergeTree/MergeTreeWhereOptimizer.h b/src/Storages/MergeTree/MergeTreeWhereOptimizer.h index 84afa4cda17..6c5ff29bc76 100644 --- a/src/Storages/MergeTree/MergeTreeWhereOptimizer.h +++ b/src/Storages/MergeTree/MergeTreeWhereOptimizer.h @@ -48,6 +48,7 @@ public: struct FilterActionsOptimizeResult { std::unordered_set prewhere_nodes; + std::list prewhere_nodes_list; /// Keep insertion order of moved prewhere_nodes bool fully_moved_to_prewhere = false; }; diff --git a/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp b/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp deleted file mode 100644 index 2236c1a9380..00000000000 --- a/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp +++ /dev/null @@ -1,336 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int UNKNOWN_FORMAT_VERSION; - extern const int CANNOT_READ_ALL_DATA; - extern const int BAD_DATA_PART_NAME; - extern const int CORRUPTED_DATA; -} - -MergeTreeWriteAheadLog::MergeTreeWriteAheadLog( - MergeTreeData & storage_, - const DiskPtr & disk_, - const String & name_) - : storage(storage_) - , disk(disk_) - , name(name_) - , path(storage.getRelativeDataPath() + name_) - , pool(storage.getContext()->getSchedulePool()) - , log(getLogger(storage.getLogName() + " (WriteAheadLog)")) -{ - init(); - sync_task = pool.createTask("MergeTreeWriteAheadLog::sync", [this] - { - std::lock_guard lock(write_mutex); - out->sync(); - sync_scheduled = false; - sync_cv.notify_all(); - }); -} - -MergeTreeWriteAheadLog::~MergeTreeWriteAheadLog() -{ - try - { - shutdown(); - } - catch (...) - { - tryLogCurrentException(__PRETTY_FUNCTION__); - } -} - - -void MergeTreeWriteAheadLog::dropAllWriteAheadLogs(DiskPtr disk_to_drop, std::string relative_data_path) -{ - std::vector files; - disk_to_drop->listFiles(relative_data_path, files); - for (const auto & file : files) - { - if (file.starts_with(WAL_FILE_NAME)) - disk_to_drop->removeFile(fs::path(relative_data_path) / file); - } -} - -void MergeTreeWriteAheadLog::init() -{ - out = disk->writeFile(path, DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Append); - - /// Small hack: in NativeWriter header is used only in `getHeader` method. - /// To avoid complex logic of changing it during ALTERs we leave it empty. - block_out = std::make_unique(*out, 0, Block{}); - min_block_number = std::numeric_limits::max(); - max_block_number = -1; - bytes_at_last_sync = 0; -} - -void MergeTreeWriteAheadLog::dropPart(const String & part_name) -{ - std::unique_lock lock(write_mutex); - - writeIntBinary(WAL_VERSION, *out); - - ActionMetadata metadata{}; - metadata.write(*out); - - writeIntBinary(static_cast(ActionType::DROP_PART), *out); - writeStringBinary(part_name, *out); - out->next(); -} - -void MergeTreeWriteAheadLog::rotate(const std::unique_lock &) -{ - String new_name = String(WAL_FILE_NAME) + "_" - + toString(min_block_number) + "_" - + toString(max_block_number) + WAL_FILE_EXTENSION; - - /// Finalize stream before file rename - out->finalize(); - disk->replaceFile(path, storage.getRelativeDataPath() + new_name); - init(); -} - -MergeTreeData::MutableDataPartsVector MergeTreeWriteAheadLog::restore( - const StorageMetadataPtr & metadata_snapshot, - ContextPtr context, - DataPartsLock & parts_lock, - bool readonly) -{ - std::unique_lock lock(write_mutex); - - MergeTreeData::MutableDataPartsVector parts; - auto in = disk->readFile(path); - NativeReader block_in(*in, 0); - NameSet dropped_parts; - - while (!in->eof()) - { - MergeTreeData::MutableDataPartPtr part; - UInt8 version; - String part_name; - Block block; - ActionType action_type; - - try - { - ActionMetadata metadata; - - readIntBinary(version, *in); - if (version > 0) - { - metadata.read(*in); - } - - readIntBinary(action_type, *in); - readStringBinary(part_name, *in); - - if (action_type == ActionType::DROP_PART) - { - dropped_parts.insert(part_name); - } - else if (action_type == ActionType::ADD_PART) - { - auto single_disk_volume = std::make_shared("volume_" + part_name, disk, 0); - - part = storage.getDataPartBuilder(part_name, single_disk_volume, part_name) - .withPartType(MergeTreeDataPartType::InMemory) - .withPartStorageType(MergeTreeDataPartStorageType::Full) - .build(); - - part->uuid = metadata.part_uuid; - block = block_in.read(); - - if (storage.getActiveContainingPart(part->info, MergeTreeDataPartState::Active, parts_lock)) - continue; - } - else - { - throw Exception(ErrorCodes::CORRUPTED_DATA, "Unknown action type: {}", toString(static_cast(action_type))); - } - } - catch (const Exception & e) - { - if (e.code() == ErrorCodes::CANNOT_READ_ALL_DATA - || e.code() == ErrorCodes::UNKNOWN_FORMAT_VERSION - || e.code() == ErrorCodes::BAD_DATA_PART_NAME - || e.code() == ErrorCodes::CORRUPTED_DATA) - { - LOG_WARNING(log, "WAL file '{}' is broken. {}", path, e.displayText()); - - /// If file is broken, do not write new parts to it. - /// But if it contains any part rotate and save them. - if (max_block_number == -1) - { - if (!readonly) - disk->removeFile(path); - } - else if (name == DEFAULT_WAL_FILE_NAME) - rotate(lock); - - break; - } - throw; - } - - if (action_type == ActionType::ADD_PART) - { - MergedBlockOutputStream part_out( - part, - metadata_snapshot, - block.getNamesAndTypesList(), - {}, {}, - CompressionCodecFactory::instance().get("NONE", {}), - NO_TRANSACTION_PTR); - - part->minmax_idx->update(block, storage.getMinMaxColumnsNames(metadata_snapshot->getPartitionKey())); - part->partition.create(metadata_snapshot, block, 0, context); - part->setColumns(block.getNamesAndTypesList(), {}, metadata_snapshot->getMetadataVersion()); - if (metadata_snapshot->hasSortingKey()) - metadata_snapshot->getSortingKey().expression->execute(block); - - part_out.write(block); - - for (const auto & projection : metadata_snapshot->getProjections()) - { - auto projection_block = projection.calculate(block, context); - auto temp_part = MergeTreeDataWriter::writeProjectionPart(storage, log, projection_block, projection, part.get()); - temp_part.finalize(); - if (projection_block.rows()) - part->addProjectionPart(projection.name, std::move(temp_part.part)); - } - - part_out.finalizePart(part, false); - - min_block_number = std::min(min_block_number, part->info.min_block); - max_block_number = std::max(max_block_number, part->info.max_block); - parts.push_back(std::move(part)); - } - } - - MergeTreeData::MutableDataPartsVector result; - std::copy_if(parts.begin(), parts.end(), std::back_inserter(result), - [&dropped_parts](const auto & part) { return dropped_parts.count(part->name) == 0; }); - - /// All parts in WAL had been already committed into the disk -> clear the WAL - if (!readonly && result.empty()) - { - LOG_DEBUG(log, "WAL file '{}' had been completely processed. Removing.", path); - disk->removeFile(path); - init(); - return {}; - } - - return result; -} - -void MergeTreeWriteAheadLog::shutdown() -{ - { - std::unique_lock lock(write_mutex); - if (shutdown_called) - return; - - if (sync_scheduled) - sync_cv.wait(lock, [this] { return !sync_scheduled; }); - - shutdown_called = true; - out->finalize(); - out.reset(); - } - - /// Do it without lock, otherwise inversion between pool lock and write_mutex is possible - sync_task->deactivate(); -} - -std::optional -MergeTreeWriteAheadLog::tryParseMinMaxBlockNumber(const String & filename) -{ - Int64 min_block; - Int64 max_block; - ReadBufferFromString in(filename); - if (!checkString(WAL_FILE_NAME, in) - || !checkChar('_', in) - || !tryReadIntText(min_block, in) - || !checkChar('_', in) - || !tryReadIntText(max_block, in)) - { - return {}; - } - - return std::make_pair(min_block, max_block); -} - -String MergeTreeWriteAheadLog::ActionMetadata::toJSON() const -{ - Poco::JSON::Object json; - - if (part_uuid != UUIDHelpers::Nil) - json.set(JSON_KEY_PART_UUID, toString(part_uuid)); - - std::ostringstream oss; // STYLE_CHECK_ALLOW_STD_STRING_STREAM - oss.exceptions(std::ios::failbit); - json.stringify(oss); - - return oss.str(); -} - -void MergeTreeWriteAheadLog::ActionMetadata::fromJSON(const String & buf) -{ - Poco::JSON::Parser parser; - auto json = parser.parse(buf).extract(); - - if (json->has(JSON_KEY_PART_UUID)) - part_uuid = parseFromString(json->getValue(JSON_KEY_PART_UUID)); -} - -void MergeTreeWriteAheadLog::ActionMetadata::read(ReadBuffer & meta_in) -{ - readIntBinary(min_compatible_version, meta_in); - if (min_compatible_version > WAL_VERSION) - throw Exception(ErrorCodes::UNKNOWN_FORMAT_VERSION, - "WAL metadata version {} is not compatible with this ClickHouse version", - toString(min_compatible_version)); - - size_t metadata_size; - readVarUInt(metadata_size, meta_in); - - if (metadata_size == 0) - return; - - String buf(metadata_size, ' '); - meta_in.readStrict(buf.data(), metadata_size); - - fromJSON(buf); -} - -void MergeTreeWriteAheadLog::ActionMetadata::write(WriteBuffer & meta_out) const -{ - writeIntBinary(min_compatible_version, meta_out); - - String ser_meta = toJSON(); - - writeVarUInt(static_cast(ser_meta.length()), meta_out); - writeString(ser_meta, meta_out); -} - -} diff --git a/src/Storages/MergeTree/MergeTreeWriteAheadLog.h b/src/Storages/MergeTree/MergeTreeWriteAheadLog.h deleted file mode 100644 index 9550fa6ecee..00000000000 --- a/src/Storages/MergeTree/MergeTreeWriteAheadLog.h +++ /dev/null @@ -1,105 +0,0 @@ -#pragma once - -#include -#include -#include -#include -#include -#include - -namespace DB -{ - -class MergeTreeData; -struct DataPartsLock; - -/** WAL stores addditions and removals of data parts in in-memory format. - * Format of data in WAL: - * - version - * - type of action (ADD or DROP) - * - part name - * - part's block in Native format. (for ADD action) - */ -class MergeTreeWriteAheadLog -{ -public: - /// Append-only enum. It is serialized to WAL - enum class ActionType : UInt8 - { - ADD_PART = 0, - DROP_PART = 1, - }; - - struct ActionMetadata - { - /// The minimum version of WAL reader that can understand metadata written by current ClickHouse version. - /// This field must be increased when making backwards incompatible changes. - /// - /// The same approach can be used recursively inside metadata. - UInt8 min_compatible_version = 0; - - /// Actual metadata. - UUID part_uuid = UUIDHelpers::Nil; - - void write(WriteBuffer & meta_out) const; - void read(ReadBuffer & meta_in); - - private: - static constexpr auto JSON_KEY_PART_UUID = "part_uuid"; - - String toJSON() const; - void fromJSON(const String & buf); - }; - - constexpr static UInt8 WAL_VERSION = 1; - constexpr static auto WAL_FILE_NAME = "wal"; - constexpr static auto WAL_FILE_EXTENSION = ".bin"; - constexpr static auto DEFAULT_WAL_FILE_NAME = "wal.bin"; - - MergeTreeWriteAheadLog(MergeTreeData & storage_, const DiskPtr & disk_, - const String & name = DEFAULT_WAL_FILE_NAME); - - ~MergeTreeWriteAheadLog(); - - void dropPart(const String & part_name); - std::vector restore( - const StorageMetadataPtr & metadata_snapshot, - ContextPtr context, - DataPartsLock & parts_lock, - bool readonly); - - using MinMaxBlockNumber = std::pair; - static std::optional tryParseMinMaxBlockNumber(const String & filename); - void shutdown(); - - /// Drop all write ahead logs from disk. Useful during table drop. - static void dropAllWriteAheadLogs(DiskPtr disk_to_drop, std::string relative_data_path); -private: - void init(); - void rotate(const std::unique_lock & lock); - - const MergeTreeData & storage; - DiskPtr disk; - String name; - String path; - - std::unique_ptr out; - std::unique_ptr block_out; - - Int64 min_block_number = std::numeric_limits::max(); - Int64 max_block_number = -1; - - BackgroundSchedulePool & pool; - BackgroundSchedulePoolTaskHolder sync_task; - std::condition_variable sync_cv; - - size_t bytes_at_last_sync = 0; - bool sync_scheduled = false; - bool shutdown_called = false; - - mutable std::mutex write_mutex; - - LoggerPtr log; -}; - -} diff --git a/src/Storages/MergeTree/MergedBlockOutputStream.cpp b/src/Storages/MergeTree/MergedBlockOutputStream.cpp index 1d10a1433ef..f2fe2e0f255 100644 --- a/src/Storages/MergeTree/MergedBlockOutputStream.cpp +++ b/src/Storages/MergeTree/MergedBlockOutputStream.cpp @@ -181,7 +181,7 @@ MergedBlockOutputStream::Finalizer MergedBlockOutputStream::finalizePartAsync( new_part->rows_count = rows_count; new_part->modification_time = time(nullptr); - new_part->index = writer->releaseIndexColumns(); + new_part->setIndex(writer->releaseIndexColumns()); new_part->checksums = checksums; new_part->setBytesOnDisk(checksums.getTotalSizeOnDisk()); new_part->setBytesUncompressedOnDisk(checksums.getTotalSizeUncompressedOnDisk()); diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index 9959688d889..150cc27c369 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -23,7 +23,7 @@ #include #include #include -#include +#include #include #include #include @@ -73,6 +73,7 @@ static void splitAndModifyMutationCommands( LoggerPtr log) { auto part_columns = part->getColumnsDescription(); + const auto & table_columns = metadata_snapshot->getColumns(); if (!isWidePart(part) || !isFullPartStorage(part->getDataPartStorage())) { @@ -81,9 +82,19 @@ static void splitAndModifyMutationCommands( for (const auto & command : commands) { + if (command.type == MutationCommand::Type::MATERIALIZE_COLUMN) + { + /// For ordinary column with default or materialized expression, MATERIALIZE COLUMN should not override past values + /// So we only mutate column if `command.column_name` is a default/materialized column or if the part does not have physical column file + auto column_ordinary = table_columns.getOrdinary().tryGetByName(command.column_name); + if (!column_ordinary || !part->tryGetColumn(command.column_name) || !part->hasColumnFiles(*column_ordinary)) + { + for_interpreter.push_back(command); + mutated_columns.emplace(command.column_name); + } + } if (command.type == MutationCommand::Type::MATERIALIZE_INDEX || command.type == MutationCommand::Type::MATERIALIZE_STATISTIC - || command.type == MutationCommand::Type::MATERIALIZE_COLUMN || command.type == MutationCommand::Type::MATERIALIZE_PROJECTION || command.type == MutationCommand::Type::MATERIALIZE_TTL || command.type == MutationCommand::Type::DELETE @@ -93,9 +104,6 @@ static void splitAndModifyMutationCommands( for_interpreter.push_back(command); for (const auto & [column_name, expr] : command.column_to_update_expression) mutated_columns.emplace(column_name); - - if (command.type == MutationCommand::Type::MATERIALIZE_COLUMN) - mutated_columns.emplace(command.column_name); } else if (command.type == MutationCommand::Type::DROP_INDEX || command.type == MutationCommand::Type::DROP_PROJECTION @@ -160,7 +168,7 @@ static void splitAndModifyMutationCommands( { if (!mutated_columns.contains(column.name)) { - if (!metadata_snapshot->getColumns().has(column.name) && !part->storage.getVirtuals().contains(column.name)) + if (!metadata_snapshot->getColumns().has(column.name) && !part->storage.getVirtualsPtr()->has(column.name)) { /// We cannot add the column because there's no such column in table. /// It's okay if the column was dropped. It may also absent in dropped_columns @@ -205,8 +213,15 @@ static void splitAndModifyMutationCommands( { for (const auto & command : commands) { - if (command.type == MutationCommand::Type::MATERIALIZE_INDEX - || command.type == MutationCommand::Type::MATERIALIZE_COLUMN + if (command.type == MutationCommand::Type::MATERIALIZE_COLUMN) + { + /// For ordinary column with default or materialized expression, MATERIALIZE COLUMN should not override past values + /// So we only mutate column if `command.column_name` is a default/materialized column or if the part does not have physical column file + auto column_ordinary = table_columns.getOrdinary().tryGetByName(command.column_name); + if (!column_ordinary || !part->tryGetColumn(command.column_name) || !part->hasColumnFiles(*column_ordinary)) + for_interpreter.push_back(command); + } + else if (command.type == MutationCommand::Type::MATERIALIZE_INDEX || command.type == MutationCommand::Type::MATERIALIZE_STATISTIC || command.type == MutationCommand::Type::MATERIALIZE_PROJECTION || command.type == MutationCommand::Type::MATERIALIZE_TTL @@ -268,7 +283,6 @@ getColumnsForNewDataPart( ColumnsDescription part_columns(source_part->getColumns()); NamesAndTypesList system_columns; - const auto & deleted_mask_column = LightweightDeleteDescription::FILTER_COLUMN; bool supports_lightweight_deletes = source_part->supportLightweightDeleteMutate(); bool deleted_mask_updated = false; @@ -284,9 +298,9 @@ getColumnsForNewDataPart( { for (const auto & [column_name, _] : command.column_to_update_expression) { - if (column_name == deleted_mask_column.name + if (column_name == RowExistsColumn::name && supports_lightweight_deletes - && !storage_columns_set.contains(deleted_mask_column.name)) + && !storage_columns_set.contains(RowExistsColumn::name)) deleted_mask_updated = true; } } @@ -308,12 +322,12 @@ getColumnsForNewDataPart( } } - if (!storage_columns_set.contains(deleted_mask_column.name)) + if (!storage_columns_set.contains(RowExistsColumn::name)) { - if (deleted_mask_updated || (part_columns.has(deleted_mask_column.name) && !has_delete_command)) + if (deleted_mask_updated || (part_columns.has(RowExistsColumn::name) && !has_delete_command)) { - storage_columns.push_back(deleted_mask_column); - storage_columns_set.insert(deleted_mask_column.name); + storage_columns.emplace_back(RowExistsColumn::name, RowExistsColumn::type); + storage_columns_set.insert(RowExistsColumn::name); } } @@ -558,9 +572,7 @@ static std::set getProjectionsToRecalculate( { bool need_recalculate = materialized_projections.contains(projection.name) - || (!is_full_part_storage - && source_part->hasProjection(projection.name) - && !source_part->hasBrokenProjection(projection.name)); + || (!is_full_part_storage && source_part->hasProjection(projection.name)); if (need_recalculate) projections_to_recalc.insert(&projection); @@ -899,13 +911,12 @@ void finalizeMutatedPart( new_data_part->rows_count = source_part->rows_count; new_data_part->index_granularity = source_part->index_granularity; - new_data_part->index = source_part->index; + new_data_part->setIndex(source_part->getIndex()); new_data_part->minmax_idx = source_part->minmax_idx; new_data_part->modification_time = time(nullptr); /// Load rest projections which are hardlinked - bool noop; - new_data_part->loadProjections(false, false, noop, true /* if_not_loaded */); + new_data_part->loadProjections(false, false, true /* if_not_loaded */); /// All information about sizes is stored in checksums. /// It doesn't make sense to touch filesystem for sizes. @@ -1224,14 +1235,8 @@ void PartMergerWriter::prepare() for (size_t i = 0, size = ctx->projections_to_build.size(); i < size; ++i) { - // If the parent part is an in-memory part, squash projection output into one block and - // build in-memory projection because we don't support merging into a new in-memory part. - // Otherwise we split the materialization into multiple stages similar to the process of - // INSERT SELECT query. - if (ctx->new_data_part->getType() == MergeTreeDataPartType::InMemory) - projection_squashes.emplace_back(0, 0); - else - projection_squashes.emplace_back(settings.min_insert_block_size_rows, settings.min_insert_block_size_bytes); + // We split the materialization into multiple stages similar to the process of INSERT SELECT query. + projection_squashes.emplace_back(settings.min_insert_block_size_rows, settings.min_insert_block_size_bytes); } } @@ -1488,9 +1493,7 @@ private: bool need_recalculate = ctx->materialized_projections.contains(projection.name) - || (!is_full_part_storage - && ctx->source_part->hasProjection(projection.name) - && !ctx->source_part->hasBrokenProjection(projection.name)); + || (!is_full_part_storage && ctx->source_part->hasProjection(projection.name)); if (need_recalculate) { @@ -1627,9 +1630,8 @@ private: void finalize() { - bool noop; ctx->new_data_part->minmax_idx = std::move(ctx->minmax_idx); - ctx->new_data_part->loadProjections(false, false, noop, true /* if_not_loaded */); + ctx->new_data_part->loadProjections(false, false, true /* if_not_loaded */); ctx->mutating_executor.reset(); ctx->mutating_pipeline.reset(); diff --git a/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp index 2fe237efdc7..7d9691b847d 100644 --- a/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp +++ b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp @@ -1,28 +1,19 @@ #include #include -#include -#include #include -#include -#include #include #include #include -#include #include #include #include #include -#include -#include #include #include #include #include -#include -#include #include #include #include @@ -31,7 +22,7 @@ #include #include #include -#include + using namespace DB; diff --git a/src/Storages/MergeTree/RangesInDataPart.h b/src/Storages/MergeTree/RangesInDataPart.h index 82152ee3906..0bc5829c82e 100644 --- a/src/Storages/MergeTree/RangesInDataPart.h +++ b/src/Storages/MergeTree/RangesInDataPart.h @@ -19,8 +19,8 @@ using DataPartPtr = std::shared_ptr; /// they look natural here because we can fully serialize and then deserialize original DataPart class. struct RangesInDataPartDescription { - MergeTreePartInfo info; - MarkRanges ranges; + MergeTreePartInfo info{}; + MarkRanges ranges{}; size_t rows = 0; void serialize(WriteBuffer & out) const; diff --git a/src/Storages/MergeTree/ReplicatedMergeMutateTaskBase.cpp b/src/Storages/MergeTree/ReplicatedMergeMutateTaskBase.cpp index 94c069d789b..37d848ad095 100644 --- a/src/Storages/MergeTree/ReplicatedMergeMutateTaskBase.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeMutateTaskBase.cpp @@ -1,6 +1,7 @@ #include #include +#include #include #include @@ -110,11 +111,14 @@ bool ReplicatedMergeMutateTaskBase::executeStep() auto mutations_end_it = in_partition->second.upper_bound(result_data_version); for (auto it = mutations_begin_it; it != mutations_end_it; ++it) { + auto & src_part = log_entry->source_parts.at(0); ReplicatedMergeTreeQueue::MutationStatus & status = *it->second; - status.latest_failed_part = log_entry->source_parts.at(0); + status.latest_failed_part = src_part; status.latest_failed_part_info = source_part_info; status.latest_fail_time = time(nullptr); status.latest_fail_reason = getExceptionMessage(saved_exception, false); + if (result_data_version == it->first) + storage.mutation_backoff_policy.addPartMutationFailure(src_part, storage.getSettings()->max_postpone_time_for_failed_mutations_ms); } } } @@ -142,6 +146,12 @@ bool ReplicatedMergeMutateTaskBase::executeImpl() { storage.queue.removeProcessedEntry(storage.getZooKeeper(), selected_entry->log_entry); state = State::SUCCESS; + + auto & log_entry = selected_entry->log_entry; + if (log_entry->type == ReplicatedMergeTreeLogEntryData::MUTATE_PART) + { + storage.mutation_backoff_policy.removePartFromFailed(log_entry->source_parts.at(0)); + } } catch (...) { diff --git a/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp index bc0b4f73a31..156c41563ec 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp @@ -63,7 +63,7 @@ void ReplicatedMergeTreePartCheckThread::enqueuePart(const String & name, time_t if (parts_set.contains(name)) return; - LOG_TRACE(log, "Enqueueing {} for check after {}s", name, delay_to_check_seconds); + LOG_TRACE(log, "Enqueueing {} for check after after {}s", name, delay_to_check_seconds); parts_queue.emplace_back(name, std::chrono::steady_clock::now() + std::chrono::seconds(delay_to_check_seconds)); parts_set.insert(name); task->schedule(); @@ -274,7 +274,7 @@ std::pair ReplicatedMergeTreePartCheckThread::findLo return std::make_pair(exists_in_zookeeper, part); } -ReplicatedCheckResult ReplicatedMergeTreePartCheckThread::checkPartImpl(const String & part_name, bool throw_on_broken_projection) +ReplicatedCheckResult ReplicatedMergeTreePartCheckThread::checkPartImpl(const String & part_name) { ReplicatedCheckResult result; auto [exists_in_zookeeper, part] = findLocalPart(part_name); @@ -341,7 +341,6 @@ ReplicatedCheckResult ReplicatedMergeTreePartCheckThread::checkPartImpl(const St /// before the ReplicatedMergeTreePartHeader was introduced. String part_path = storage.replica_path + "/parts/" + part_name; String part_znode = zookeeper->get(part_path); - bool is_broken_projection = false; try { @@ -363,10 +362,8 @@ ReplicatedCheckResult ReplicatedMergeTreePartCheckThread::checkPartImpl(const St checkDataPart( part, - /* require_checksums */true, - is_broken_projection, - [this] { return need_stop.load(); }, - throw_on_broken_projection); + true, + [this] { return need_stop.load(); }); if (need_stop) { @@ -385,27 +382,14 @@ ReplicatedCheckResult ReplicatedMergeTreePartCheckThread::checkPartImpl(const St if (isRetryableException(std::current_exception())) throw; - PreformattedMessage message; - if (is_broken_projection) - { - WriteBufferFromOwnString wb; - message = PreformattedMessage::create( - "Part {} has a broken projections. It will be ignored. Broken projections info: {}", - part_name, getCurrentExceptionMessage(false)); - LOG_DEBUG(log, message); - result.action = ReplicatedCheckResult::DoNothing; - } - else - { - tryLogCurrentException(log, __PRETTY_FUNCTION__); + tryLogCurrentException(log, __PRETTY_FUNCTION__); - message = PreformattedMessage::create("Part {} looks broken. Removing it and will try to fetch.", part_name); - LOG_ERROR(log, message); - result.action = ReplicatedCheckResult::TryFetchMissing; - } + auto message = PreformattedMessage::create("Part {} looks broken. Removing it and will try to fetch.", part_name); + LOG_ERROR(log, message); /// Part is broken, let's try to find it and fetch. result.status = {part_name, false, message}; + result.action = ReplicatedCheckResult::TryFetchMissing; return result; } @@ -435,12 +419,12 @@ ReplicatedCheckResult ReplicatedMergeTreePartCheckThread::checkPartImpl(const St } -CheckResult ReplicatedMergeTreePartCheckThread::checkPartAndFix(const String & part_name, std::optional * recheck_after, bool throw_on_broken_projection) +CheckResult ReplicatedMergeTreePartCheckThread::checkPartAndFix(const String & part_name, std::optional * recheck_after) { LOG_INFO(log, "Checking part {}", part_name); ProfileEvents::increment(ProfileEvents::ReplicatedPartChecks); - ReplicatedCheckResult result = checkPartImpl(part_name, throw_on_broken_projection); + ReplicatedCheckResult result = checkPartImpl(part_name); switch (result.action) { case ReplicatedCheckResult::None: UNREACHABLE(); @@ -593,7 +577,7 @@ void ReplicatedMergeTreePartCheckThread::run() } std::optional recheck_after; - checkPartAndFix(selected->name, &recheck_after, /* throw_on_broken_projection */false); + checkPartAndFix(selected->name, &recheck_after); if (need_stop) return; diff --git a/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.h b/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.h index 9091f698546..f2e26b3d324 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.h @@ -65,9 +65,9 @@ public: size_t size() const; /// Check part by name - CheckResult checkPartAndFix(const String & part_name, std::optional * recheck_after = nullptr, bool throw_on_broken_projection = true); + CheckResult checkPartAndFix(const String & part_name, std::optional * recheck_after = nullptr); - ReplicatedCheckResult checkPartImpl(const String & part_name, bool throw_on_broken_projection); + ReplicatedCheckResult checkPartImpl(const String & part_name); std::unique_lock pausePartsCheck(); diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp index 8d921bdcb1c..858eae4afd9 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp @@ -13,6 +13,7 @@ #include #include +#include namespace DB { @@ -682,7 +683,7 @@ std::pair ReplicatedMergeTreeQueue::pullLogsToQueue(zkutil::Zo ops.emplace_back(zkutil::makeSetRequest( fs::path(replica_path) / "min_unprocessed_insert_time", toString(*min_unprocessed_insert_time_changed), -1)); - auto responses = zookeeper->multi(ops); + auto responses = zookeeper->multi(ops, /* check_session_valid */ true); /// Now we have successfully updated the queue in ZooKeeper. Update it in RAM. @@ -860,6 +861,9 @@ ActiveDataPartSet getPartNamesToMutate( int32_t ReplicatedMergeTreeQueue::updateMutations(zkutil::ZooKeeperPtr zookeeper, Coordination::WatchCallbackPtr watch_callback) { + if (pull_log_blocker.isCancelled()) + throw Exception(ErrorCodes::ABORTED, "Log pulling is cancelled"); + std::lock_guard lock(update_mutations_mutex); Coordination::Stat mutations_stat; @@ -1346,13 +1350,18 @@ bool ReplicatedMergeTreeQueue::shouldExecuteLogEntry( auto part = data.getPartIfExists(name, {MergeTreeDataPartState::PreActive, MergeTreeDataPartState::Active, MergeTreeDataPartState::Outdated}); if (part) { - if (auto part_in_memory = asInMemoryPart(part)) - sum_parts_size_in_bytes += part_in_memory->block.bytes(); - else - sum_parts_size_in_bytes += part->getBytesOnDisk(); + sum_parts_size_in_bytes += part->getBytesOnDisk(); + + if (entry.type == LogEntry::MUTATE_PART && !storage.mutation_backoff_policy.partCanBeMutated(part->name)) + { + constexpr auto fmt_string = "Not executing log entry {} of type {} for part {} " + "because recently it has failed. According to exponential backoff policy, put aside this log entry."; + + LOG_DEBUG(LogToStr(out_postpone_reason, log), fmt_string, entry.znode_name, entry.typeToString(), entry.new_part_name); + return false; + } } } - if (merger_mutator.merges_blocker.isCancelled()) { constexpr auto fmt_string = "Not executing log entry {} of type {} for part {} because merges and mutations are cancelled now."; @@ -1786,7 +1795,7 @@ ReplicatedMergeTreeMergePredicate ReplicatedMergeTreeQueue::getMergePredicate(zk } -std::map ReplicatedMergeTreeQueue::getAlterMutationCommandsForPart(const MergeTreeData::DataPartPtr & part) const +MutationCommands ReplicatedMergeTreeQueue::getAlterMutationCommandsForPart(const MergeTreeData::DataPartPtr & part) const { std::unique_lock lock(state_mutex); @@ -1796,9 +1805,8 @@ std::map ReplicatedMergeTreeQueue::getAlterMutationCo Int64 part_data_version = part->info.getDataVersion(); Int64 part_metadata_version = part->getMetadataVersion(); - LOG_TEST(log, "Looking for mutations for part {} (part data version {}, part metadata version {})", part->name, part_data_version, part_metadata_version); - std::map result; + MutationCommands result; bool seen_all_data_mutations = false; bool seen_all_metadata_mutations = false; @@ -1811,7 +1819,15 @@ std::map ReplicatedMergeTreeQueue::getAlterMutationCo if (seen_all_data_mutations && seen_all_metadata_mutations) break; - auto alter_version = mutation_status->entry->alter_version; + auto & entry = mutation_status->entry; + + auto add_to_result = [&] { + for (const auto & command : entry->commands | std::views::reverse) + if (AlterConversions::supportsMutationCommandType(command.type)) + result.emplace_back(command); + }; + + auto alter_version = entry->alter_version; if (alter_version != -1) { if (alter_version > storage.getInMemoryMetadataPtr()->getMetadataVersion()) @@ -1819,22 +1835,19 @@ std::map ReplicatedMergeTreeQueue::getAlterMutationCo /// We take commands with bigger metadata version if (alter_version > part_metadata_version) - result[mutation_version] = mutation_status->entry->commands; + add_to_result(); else seen_all_metadata_mutations = true; } else { if (mutation_version > part_data_version) - result[mutation_version] = mutation_status->entry->commands; + add_to_result(); else seen_all_data_mutations = true; } } - LOG_TEST(log, "Got {} commands for part {} (part data version {}, part metadata version {})", - result.size(), part->name, part_data_version, part_metadata_version); - return result; } diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h index 84106565dff..b17e7819946 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h @@ -401,7 +401,7 @@ public: /// Return mutation commands for part which could be not applied to /// it according to part mutation version. Used when we apply alter commands on fly, /// without actual data modification on disk. - std::map getAlterMutationCommandsForPart(const MergeTreeData::DataPartPtr & part) const; + MutationCommands getAlterMutationCommandsForPart(const MergeTreeData::DataPartPtr & part) const; /// Mark finished mutations as done. If the function needs to be called again at some later time /// (because some mutations are probably done but we are not sure yet), returns true. @@ -499,7 +499,7 @@ class BaseMergePredicate { public: BaseMergePredicate() = default; - BaseMergePredicate(std::optional && partition_ids_hint_) : partition_ids_hint(std::move(partition_ids_hint_)) {} + explicit BaseMergePredicate(std::optional && partition_ids_hint_) : partition_ids_hint(std::move(partition_ids_hint_)) {} /// Depending on the existence of left part checks a merge predicate for two parts or for single part. bool operator()(const MergeTreeData::DataPartPtr & left, @@ -550,7 +550,7 @@ protected: class LocalMergePredicate : public BaseMergePredicate { public: - LocalMergePredicate(ReplicatedMergeTreeQueue & queue_); + explicit LocalMergePredicate(ReplicatedMergeTreeQueue & queue_); }; class ReplicatedMergeTreeMergePredicate : public BaseMergePredicate diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQuorumAddedParts.h b/src/Storages/MergeTree/ReplicatedMergeTreeQuorumAddedParts.h index a0b0d026693..f0f737cb1e6 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQuorumAddedParts.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQuorumAddedParts.h @@ -22,7 +22,7 @@ struct ReplicatedMergeTreeQuorumAddedParts MergeTreeDataFormatVersion format_version; - ReplicatedMergeTreeQuorumAddedParts(const MergeTreeDataFormatVersion format_version_) + explicit ReplicatedMergeTreeQuorumAddedParts(const MergeTreeDataFormatVersion format_version_) : format_version(format_version_) {} diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp index 1fb2393948a..e50b4007d64 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp @@ -879,7 +879,7 @@ std::pair, bool> ReplicatedMergeTreeSinkImpl:: fiu_do_on(FailPoints::replicated_merge_tree_commit_zk_fail_after_op, { zookeeper->forceFailureAfterOperation(); }); Coordination::Responses responses; - Coordination::Error multi_code = zookeeper->tryMultiNoThrow(ops, responses); /// 1 RTT + Coordination::Error multi_code = zookeeper->tryMultiNoThrow(ops, responses, /* check_session_valid */ true); /// 1 RTT if (multi_code == Coordination::Error::ZOK) { diff --git a/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h b/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h index bbb38346f38..ca8ed9abdb5 100644 --- a/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h +++ b/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h @@ -34,6 +34,7 @@ public: , partition_id(part_->info.partition_id) { setInMemoryMetadata(storage.getInMemoryMetadata()); + setVirtuals(*storage.getVirtualsPtr()); } /// Used in queries with projection. @@ -90,11 +91,6 @@ public: bool supportsSubcolumns() const override { return true; } - NamesAndTypesList getVirtuals() const override - { - return storage.getVirtuals(); - } - String getPartitionId() const { return partition_id; diff --git a/src/Storages/MergeTree/checkDataPart.cpp b/src/Storages/MergeTree/checkDataPart.cpp index 0b545beb116..aea0020f89d 100644 --- a/src/Storages/MergeTree/checkDataPart.cpp +++ b/src/Storages/MergeTree/checkDataPart.cpp @@ -7,7 +7,6 @@ #include #include #include -#include #include #include #include @@ -43,7 +42,6 @@ namespace ErrorCodes extern const int NO_FILE_IN_DATA_PART; extern const int NETWORK_ERROR; extern const int SOCKET_TIMEOUT; - extern const int BROKEN_PROJECTION; } @@ -118,9 +116,7 @@ static IMergeTreeDataPart::Checksums checkDataPart( const NameSet & files_without_checksums, const ReadSettings & read_settings, bool require_checksums, - std::function is_cancelled, - bool & is_broken_projection, - bool throw_on_broken_projection) + std::function is_cancelled) { /** Responsibility: * - read list of columns from columns.txt; @@ -129,7 +125,6 @@ static IMergeTreeDataPart::Checksums checkDataPart( */ CurrentMetrics::Increment metric_increment{CurrentMetrics::ReplicatedChecks}; - Poco::Logger * log = &Poco::Logger::get("checkDataPart"); NamesAndTypesList columns_txt; @@ -258,7 +253,7 @@ static IMergeTreeDataPart::Checksums checkDataPart( } /// Exclude files written by inverted index from check. No correct checksums are available for them currently. - if (file_name.ends_with(".gin_dict") || file_name.ends_with(".gin_post") || file_name.ends_with(".gin_seg") || file_name.ends_with(".gin_sid")) + if (isGinFile(file_name)) continue; auto checksum_it = checksums_data.files.find(file_name); @@ -279,55 +274,17 @@ static IMergeTreeDataPart::Checksums checkDataPart( } } - std::string broken_projections_message; for (const auto & [name, projection] : data_part->getProjectionParts()) { if (is_cancelled()) return {}; auto projection_file = name + ".proj"; - if (!throw_on_broken_projection && projection->is_broken) - { - projections_on_disk.erase(projection_file); - checksums_txt.remove(projection_file); - } - - IMergeTreeDataPart::Checksums projection_checksums; - try - { - bool noop; - projection_checksums = checkDataPart( - projection, *data_part_storage.getProjection(projection_file), - projection->getColumns(), projection->getType(), - projection->getFileNamesWithoutChecksums(), - read_settings, require_checksums, is_cancelled, noop, /* throw_on_broken_projection */false); - } - catch (...) - { - if (isRetryableException(std::current_exception())) - throw; - - if (!projection->is_broken) - { - LOG_TEST(log, "Marking projection {} as broken ({})", name, projection_file); - projection->setBrokenReason(getCurrentExceptionMessage(false), getCurrentExceptionCode()); - } - - is_broken_projection = true; - if (throw_on_broken_projection) - { - if (!broken_projections_message.empty()) - broken_projections_message += "\n"; - - broken_projections_message += fmt::format( - "Part {} has a broken projection {} (error: {})", - data_part->name, name, getCurrentExceptionMessage(false)); - continue; - } - - projections_on_disk.erase(projection_file); - checksums_txt.remove(projection_file); - } + auto projection_checksums = checkDataPart( + projection, *data_part_storage.getProjection(projection_file), + projection->getColumns(), projection->getType(), + projection->getFileNamesWithoutChecksums(), + read_settings, require_checksums, is_cancelled); checksums_data.files[projection_file] = IMergeTreeDataPart::Checksums::Checksum( projection_checksums.getTotalSizeOnDisk(), @@ -336,11 +293,6 @@ static IMergeTreeDataPart::Checksums checkDataPart( projections_on_disk.erase(projection_file); } - if (throw_on_broken_projection && !broken_projections_message.empty()) - { - throw Exception(ErrorCodes::BROKEN_PROJECTION, "{}", broken_projections_message); - } - if (require_checksums && !projections_on_disk.empty()) { throw Exception(ErrorCodes::UNEXPECTED_FILE_IN_DATA_PART, @@ -357,24 +309,11 @@ static IMergeTreeDataPart::Checksums checkDataPart( return checksums_data; } -IMergeTreeDataPart::Checksums checkDataPartInMemory(const DataPartInMemoryPtr & data_part) -{ - IMergeTreeDataPart::Checksums data_checksums; - data_checksums.files["data.bin"] = data_part->calculateBlockChecksum(); - data_part->checksums.checkEqual(data_checksums, true); - return data_checksums; -} - IMergeTreeDataPart::Checksums checkDataPart( MergeTreeData::DataPartPtr data_part, bool require_checksums, - bool & is_broken_projection, - std::function is_cancelled, - bool throw_on_broken_projection) + std::function is_cancelled) { - if (auto part_in_memory = asInMemoryPart(data_part)) - return checkDataPartInMemory(part_in_memory); - /// If check of part has failed and it is stored on disk with cache /// try to drop cache and check it once again because maybe the cache /// is broken not the part itself. @@ -412,9 +351,7 @@ IMergeTreeDataPart::Checksums checkDataPart( data_part->getFileNamesWithoutChecksums(), read_settings, require_checksums, - is_cancelled, - is_broken_projection, - throw_on_broken_projection); + is_cancelled); }; try @@ -428,9 +365,7 @@ IMergeTreeDataPart::Checksums checkDataPart( data_part->getFileNamesWithoutChecksums(), read_settings, require_checksums, - is_cancelled, - is_broken_projection, - throw_on_broken_projection); + is_cancelled); } catch (...) { diff --git a/src/Storages/MergeTree/checkDataPart.h b/src/Storages/MergeTree/checkDataPart.h index a01978f4efe..d0e48b6f80a 100644 --- a/src/Storages/MergeTree/checkDataPart.h +++ b/src/Storages/MergeTree/checkDataPart.h @@ -10,9 +10,7 @@ namespace DB IMergeTreeDataPart::Checksums checkDataPart( MergeTreeData::DataPartPtr data_part, bool require_checksums, - bool & is_broken_projection, - std::function is_cancelled = []{ return false; }, - bool throw_on_broken_projection = false); + std::function is_cancelled = []{ return false; }); bool isNotEnoughMemoryErrorCode(int code); bool isRetryableException(const std::exception_ptr exception_ptr); diff --git a/src/Storages/MergeTree/examples/CMakeLists.txt b/src/Storages/MergeTree/examples/CMakeLists.txt deleted file mode 100644 index 25bba7ae0b4..00000000000 --- a/src/Storages/MergeTree/examples/CMakeLists.txt +++ /dev/null @@ -1,2 +0,0 @@ -clickhouse_add_executable (wal_action_metadata wal_action_metadata.cpp) -target_link_libraries (wal_action_metadata PRIVATE dbms) diff --git a/src/Storages/MergeTree/examples/wal_action_metadata.cpp b/src/Storages/MergeTree/examples/wal_action_metadata.cpp deleted file mode 100644 index 03c38c7a186..00000000000 --- a/src/Storages/MergeTree/examples/wal_action_metadata.cpp +++ /dev/null @@ -1,61 +0,0 @@ -#include - -#include -#include - -namespace DB -{ -namespace ErrorCodes -{ - extern const int UNKNOWN_FORMAT_VERSION; -} -} - -int main(int, char **) -{ - try - { - { - std::cout << "test: dummy test" << std::endl; - - DB::MergeTreeWriteAheadLog::ActionMetadata metadata_out; - DB::MemoryWriteBuffer buf{}; - - metadata_out.write(buf); - buf.finalize(); - - metadata_out.read(*buf.tryGetReadBuffer()); - } - - { - std::cout << "test: min compatibility" << std::endl; - - DB::MergeTreeWriteAheadLog::ActionMetadata metadata_out; - metadata_out.min_compatible_version = DB::MergeTreeWriteAheadLog::WAL_VERSION + 1; - DB::MemoryWriteBuffer buf{}; - - metadata_out.write(buf); - buf.finalize(); - - try - { - metadata_out.read(*buf.tryGetReadBuffer()); - } - catch (const DB::Exception & e) - { - if (e.code() != DB::ErrorCodes::UNKNOWN_FORMAT_VERSION) - { - std::cerr << "Expected UNKNOWN_FORMAT_VERSION exception but got: " - << e.what() << ", " << e.displayText() << std::endl; - } - } - } - } - catch (const DB::Exception & e) - { - std::cerr << e.what() << ", " << e.displayText() << std::endl; - return 1; - } - - return 0; -} diff --git a/src/Storages/MergeTree/registerStorageMergeTree.cpp b/src/Storages/MergeTree/registerStorageMergeTree.cpp index 0a723e7adb4..2b24a56e994 100644 --- a/src/Storages/MergeTree/registerStorageMergeTree.cpp +++ b/src/Storages/MergeTree/registerStorageMergeTree.cpp @@ -1,6 +1,5 @@ #include #include -#include #include #include #include @@ -9,16 +8,12 @@ #include #include #include -#include -#include -#include #include #include #include #include -#include #include #include @@ -298,7 +293,7 @@ static StoragePtr create(const StorageFactory::Arguments & args) arg_idx, e.message(), verbose_help_message); } } - else if (!args.attach && !args.getLocalContext()->getSettingsRef().allow_deprecated_syntax_for_merge_tree) + else if (args.mode <= LoadingStrictnessLevel::CREATE && !args.getLocalContext()->getSettingsRef().allow_deprecated_syntax_for_merge_tree) { throw Exception(ErrorCodes::BAD_ARGUMENTS, "This syntax for *MergeTree engine is deprecated. " "Use extended storage definition syntax with ORDER BY/PRIMARY KEY clause. " @@ -315,12 +310,13 @@ static StoragePtr create(const StorageFactory::Arguments & args) DatabaseCatalog::instance().getDatabase(args.table_id.database_name)->getEngineName() == "Replicated"; /// Allow implicit {uuid} macros only for zookeeper_path in ON CLUSTER queries - bool allow_uuid_macro = is_on_cluster || is_replicated_database || args.query.attach; + /// and if UUID was explicitly passed in CREATE TABLE (like for ATTACH) + bool allow_uuid_macro = is_on_cluster || is_replicated_database || args.query.attach || args.query.has_uuid; auto expand_macro = [&] (ASTLiteral * ast_zk_path, ASTLiteral * ast_replica_name) { /// Unfold {database} and {table} macro on table creation, so table can be renamed. - if (!args.attach) + if (args.mode < LoadingStrictnessLevel::ATTACH) { Macros::MacroExpansionInfo info; /// NOTE: it's not recursive @@ -581,7 +577,7 @@ static StoragePtr create(const StorageFactory::Arguments & args) if (args.storage_def->sample_by) metadata.sampling_key = KeyDescription::getKeyFromAST(args.storage_def->sample_by->ptr(), metadata.columns, context); - bool allow_suspicious_ttl = args.attach || args.getLocalContext()->getSettingsRef().allow_suspicious_ttl_expressions; + bool allow_suspicious_ttl = LoadingStrictnessLevel::SECONDARY_CREATE <= args.mode || args.getLocalContext()->getSettingsRef().allow_suspicious_ttl_expressions; if (args.storage_def->ttl_table) { @@ -608,12 +604,12 @@ static StoragePtr create(const StorageFactory::Arguments & args) metadata.column_ttls_by_name[name] = new_ttl_entry; } - storage_settings->loadFromQuery(*args.storage_def, context, args.attach); + storage_settings->loadFromQuery(*args.storage_def, context, LoadingStrictnessLevel::ATTACH <= args.mode); // updates the default storage_settings with settings specified via SETTINGS arg in a query if (args.storage_def->settings) { - if (!args.attach) + if (args.mode <= LoadingStrictnessLevel::CREATE) args.getLocalContext()->checkMergeTreeSettingsConstraints(initial_storage_settings, storage_settings->changes()); metadata.settings_changes = args.storage_def->settings->ptr(); } @@ -689,7 +685,7 @@ static StoragePtr create(const StorageFactory::Arguments & args) if (ast && ast->value.getType() == Field::Types::UInt64) { storage_settings->index_granularity = ast->value.safeGet(); - if (!args.attach) + if (args.mode <= LoadingStrictnessLevel::CREATE) { SettingsChanges changes; changes.emplace_back("index_granularity", Field(storage_settings->index_granularity)); @@ -700,12 +696,12 @@ static StoragePtr create(const StorageFactory::Arguments & args) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Index granularity must be a positive integer{}", verbose_help_message); ++arg_num; - if (args.storage_def->ttl_table && !args.attach) + if (args.storage_def->ttl_table && args.mode <= LoadingStrictnessLevel::CREATE) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Table TTL is not allowed for MergeTree in old syntax"); } DataTypes data_types = metadata.partition_key.data_types; - if (!args.attach && !storage_settings->allow_floating_point_partition_key) + if (args.mode <= LoadingStrictnessLevel::CREATE && !storage_settings->allow_floating_point_partition_key) { for (size_t i = 0; i < data_types.size(); ++i) if (isFloat(data_types[i])) @@ -725,7 +721,7 @@ static StoragePtr create(const StorageFactory::Arguments & args) return std::make_shared( zookeeper_path, replica_name, - args.attach, + args.mode, args.table_id, args.relative_data_path, metadata, @@ -733,7 +729,6 @@ static StoragePtr create(const StorageFactory::Arguments & args) date_column_name, merging_params, std::move(storage_settings), - args.has_force_restore_data_flag, renaming_restrictions, need_check_table_structure); } @@ -742,12 +737,11 @@ static StoragePtr create(const StorageFactory::Arguments & args) args.table_id, args.relative_data_path, metadata, - args.attach, + args.mode, context, date_column_name, merging_params, - std::move(storage_settings), - args.has_force_restore_data_flag); + std::move(storage_settings)); } diff --git a/src/Storages/MergeTree/tests/gtest_combine_filters.cpp b/src/Storages/MergeTree/tests/gtest_combine_filters.cpp index 53696474eb8..9a9e6caad81 100644 --- a/src/Storages/MergeTree/tests/gtest_combine_filters.cpp +++ b/src/Storages/MergeTree/tests/gtest_combine_filters.cpp @@ -138,6 +138,57 @@ bool testCombineColumns(size_t size) return true; } +/* To ensure the vectorized DB::andFilters works as its scalar implementation, this test validates the AND (&&) + * of any combinations of the UInt8 values. + */ +bool testAndFilters(size_t size) +{ + auto generateFastIncrementColumn = [](size_t len)->ColumnPtr + { + auto filter = ColumnUInt8::create(len); + auto & filter_data = filter->getData(); + + for (size_t i = 0; i < len; ++i) + filter_data[i] = static_cast(i & 0xFF); + + return filter; + }; + + auto generateSlowIncrementColumn = [](size_t len)->ColumnPtr + { + auto filter = ColumnUInt8::create(len); + auto & filter_data = filter->getData(); + + for (size_t i = 0; i < len; ++i) + filter_data[i] = static_cast((i >> 8) & 0xFF); + + return filter; + }; + + auto first_filter = generateFastIncrementColumn(size); + auto second_filter = generateSlowIncrementColumn(size); + + auto result = andFilters(first_filter, second_filter); + + const auto & first_filter_data = typeid_cast(first_filter.get())->getData(); + const auto & second_filter_data = typeid_cast(second_filter.get())->getData(); + const auto & result_data = typeid_cast(result.get())->getData(); + + if (result->size() != size) + { + return false; + } + + for (size_t i = 0; i < size; i++) + { + UInt8 expected = first_filter_data[i] && second_filter_data[i]; + if (result_data[i] != expected) + return false; + } + + return true; +} + TEST(MergeTree, CombineFilters) { /// Tests with only 0/1 and fixed intervals. @@ -159,3 +210,18 @@ TEST(MergeTree, CombineFilters) EXPECT_TRUE(testCombineColumns(2000)); EXPECT_TRUE(testCombineColumns(200000)); } + +TEST(MergeTree, AndFilters) +{ + EXPECT_TRUE(testAndFilters(1)); + EXPECT_TRUE(testAndFilters(2)); + EXPECT_TRUE(testAndFilters(15)); + EXPECT_TRUE(testAndFilters(16)); + EXPECT_TRUE(testAndFilters(200)); + EXPECT_TRUE(testAndFilters(201)); + EXPECT_TRUE(testAndFilters(2000)); + EXPECT_TRUE(testAndFilters(65535)); + EXPECT_TRUE(testAndFilters(65536)); + EXPECT_TRUE(testAndFilters(65537)); + EXPECT_TRUE(testAndFilters(200000)); +} diff --git a/src/Storages/NATS/NATSSource.cpp b/src/Storages/NATS/NATSSource.cpp index 3fc01eacb22..54f479faacc 100644 --- a/src/Storages/NATS/NATSSource.cpp +++ b/src/Storages/NATS/NATSSource.cpp @@ -9,10 +9,10 @@ namespace DB { -static std::pair getHeaders(StorageNATS & storage, const StorageSnapshotPtr & storage_snapshot) +static std::pair getHeaders(const StorageSnapshotPtr & storage_snapshot) { auto non_virtual_header = storage_snapshot->metadata->getSampleBlockNonMaterialized(); - auto virtual_header = storage_snapshot->getSampleBlockForColumns(storage.getVirtuals().getNames()); + auto virtual_header = storage_snapshot->virtual_columns->getSampleBlock(); return {non_virtual_header, virtual_header}; } @@ -33,7 +33,7 @@ NATSSource::NATSSource( const Names & columns, size_t max_block_size_, StreamingHandleErrorMode handle_error_mode_) - : NATSSource(storage_, storage_snapshot_, getHeaders(storage_, storage_snapshot_), context_, columns, max_block_size_, handle_error_mode_) + : NATSSource(storage_, storage_snapshot_, getHeaders(storage_snapshot_), context_, columns, max_block_size_, handle_error_mode_) { } diff --git a/src/Storages/NATS/StorageNATS.cpp b/src/Storages/NATS/StorageNATS.cpp index 4b6ff1d8f2a..0b88a9e8929 100644 --- a/src/Storages/NATS/StorageNATS.cpp +++ b/src/Storages/NATS/StorageNATS.cpp @@ -50,7 +50,7 @@ StorageNATS::StorageNATS( ContextPtr context_, const ColumnsDescription & columns_, std::unique_ptr nats_settings_, - bool is_attach_) + LoadingStrictnessLevel mode) : IStorage(table_id_) , WithContext(context_->getGlobalContext()) , nats_settings(std::move(nats_settings_)) @@ -62,7 +62,7 @@ StorageNATS::StorageNATS( , log(getLogger("StorageNATS (" + table_id_.table_name + ")")) , semaphore(0, static_cast(num_consumers)) , queue_size(std::max(QUEUE_SIZE, static_cast(getMaxBlockSize()))) - , is_attach(is_attach_) + , throw_on_startup_failure(mode <= LoadingStrictnessLevel::CREATE) { auto nats_username = getContext()->getMacros()->expand(nats_settings->nats_username); auto nats_password = getContext()->getMacros()->expand(nats_settings->nats_password); @@ -88,6 +88,7 @@ StorageNATS::StorageNATS( StorageInMemoryMetadata storage_metadata; storage_metadata.setColumns(columns_); setInMemoryMetadata(storage_metadata); + setVirtuals(createVirtuals(nats_settings->nats_handle_error_mode)); nats_context = addSettings(getContext()); nats_context->makeQueryContext(); @@ -116,7 +117,7 @@ StorageNATS::StorageNATS( catch (...) { tryLogCurrentException(log); - if (!is_attach) + if (throw_on_startup_failure) throw; } @@ -131,6 +132,19 @@ StorageNATS::StorageNATS( connection_task->deactivate(); } +VirtualColumnsDescription StorageNATS::createVirtuals(StreamingHandleErrorMode handle_error_mode) +{ + VirtualColumnsDescription desc; + desc.addEphemeral("_subject", std::make_shared(), ""); + + if (handle_error_mode == StreamingHandleErrorMode::STREAM) + { + desc.addEphemeral("_raw_message", std::make_shared(std::make_shared()), ""); + desc.addEphemeral("_error", std::make_shared(std::make_shared()), ""); + } + + return desc; +} Names StorageNATS::parseList(const String & list, char delim) { @@ -399,7 +413,6 @@ SinkToStoragePtr StorageNATS::write(const ASTPtr &, const StorageMetadataPtr & m void StorageNATS::startup() { - (void) is_attach; for (size_t i = 0; i < num_consumers; ++i) { try @@ -410,7 +423,7 @@ void StorageNATS::startup() } catch (...) { - if (!is_attach) + if (throw_on_startup_failure) throw; tryLogCurrentException(log); } @@ -741,26 +754,10 @@ void registerStorageNATS(StorageFactory & factory) if (!nats_settings->nats_subjects.changed) throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "You must specify `nats_subjects` setting"); - return std::make_shared(args.table_id, args.getContext(), args.columns, std::move(nats_settings), args.attach); + return std::make_shared(args.table_id, args.getContext(), args.columns, std::move(nats_settings), args.mode); }; factory.registerStorage("NATS", creator_fn, StorageFactory::StorageFeatures{ .supports_settings = true, }); } - -NamesAndTypesList StorageNATS::getVirtuals() const -{ - auto virtuals = NamesAndTypesList{ - {"_subject", std::make_shared()} - }; - - if (nats_settings->nats_handle_error_mode == StreamingHandleErrorMode::STREAM) - { - virtuals.push_back({"_raw_message", std::make_shared(std::make_shared())}); - virtuals.push_back({"_error", std::make_shared(std::make_shared())}); - } - - return virtuals; -} - } diff --git a/src/Storages/NATS/StorageNATS.h b/src/Storages/NATS/StorageNATS.h index 882119f5cdb..41d77acfde6 100644 --- a/src/Storages/NATS/StorageNATS.h +++ b/src/Storages/NATS/StorageNATS.h @@ -24,7 +24,7 @@ public: ContextPtr context_, const ColumnsDescription & columns_, std::unique_ptr nats_settings_, - bool is_attach_); + LoadingStrictnessLevel mode); std::string getName() const override { return "NATS"; } @@ -61,7 +61,6 @@ public: NATSConsumerPtr popConsumer(std::chrono::milliseconds timeout); const String & getFormatName() const { return format_name; } - NamesAndTypesList getVirtuals() const override; void incrementReader(); void decrementReader(); @@ -117,7 +116,7 @@ private: std::mutex loop_mutex; mutable bool drop_table = false; - bool is_attach; + bool throw_on_startup_failure; NATSConsumerPtr createConsumer(); @@ -137,6 +136,7 @@ private: static Names parseList(const String & list, char delim); static String getTableBasedName(String name, const StorageID & table_id); + static VirtualColumnsDescription createVirtuals(StreamingHandleErrorMode handle_error_mode); ContextMutablePtr addSettings(ContextPtr context) const; size_t getMaxBlockSize() const; diff --git a/src/Storages/NamedCollectionsHelpers.cpp b/src/Storages/NamedCollectionsHelpers.cpp index f832c7f53f1..c1e744e8d79 100644 --- a/src/Storages/NamedCollectionsHelpers.cpp +++ b/src/Storages/NamedCollectionsHelpers.cpp @@ -5,6 +5,7 @@ #include #include #include +#include namespace DB { @@ -61,8 +62,31 @@ namespace auto value = literal_value->as()->value; return std::pair{key, Field(value)}; } + + std::pair getKeyValueFromAST(ASTPtr ast, ContextPtr context) + { + auto res = getKeyValueFromAST(ast, true, context); + + if (!res || !std::holds_alternative(res->second)) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Failed to get key value from ast '{}'", queryToString(ast)); + + return {res->first, std::get(res->second)}; + } } +std::map getParamsMapFromAST(ASTs asts, ContextPtr context) +{ + std::map params; + for (const auto & ast : asts) + { + auto [key, value] = getKeyValueFromAST(ast, context); + bool inserted = params.emplace(key, value).second; + if (!inserted) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Duplicated key '{}' in params", key); + } + + return params; +} MutableNamedCollectionPtr tryGetNamedCollectionWithOverrides( ASTs asts, ContextPtr context, bool throw_unknown_collection, std::vector> * complex_args) diff --git a/src/Storages/NamedCollectionsHelpers.h b/src/Storages/NamedCollectionsHelpers.h index 657ad91e825..a1909f514ea 100644 --- a/src/Storages/NamedCollectionsHelpers.h +++ b/src/Storages/NamedCollectionsHelpers.h @@ -21,10 +21,16 @@ namespace DB /// Table engines have collection name as first argument of ast and other arguments are key-value overrides. MutableNamedCollectionPtr tryGetNamedCollectionWithOverrides( ASTs asts, ContextPtr context, bool throw_unknown_collection = true, std::vector> * complex_args = nullptr); + /// Helper function to get named collection for dictionary source. /// Dictionaries have collection name as name argument of dict configuration and other arguments are overrides. MutableNamedCollectionPtr tryGetNamedCollectionWithOverrides(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, ContextPtr context); +/// Parses asts as key value pairs and returns a map of them. +/// If key or value cannot be parsed as literal or interpreted +/// as constant expression throws an exception. +std::map getParamsMapFromAST(ASTs asts, ContextPtr context); + HTTPHeaderEntries getHeadersFromNamedCollection(const NamedCollection & collection); struct ExternalDatabaseEqualKeysSet @@ -45,9 +51,9 @@ struct RedisEqualKeysSet template struct NamedCollectionValidateKey { NamedCollectionValidateKey() = default; - NamedCollectionValidateKey(const char * value_) : value(value_) {} - NamedCollectionValidateKey(std::string_view value_) : value(value_) {} - NamedCollectionValidateKey(const String & value_) : value(value_) {} + NamedCollectionValidateKey(const char * value_) : value(value_) {} /// NOLINT(google-explicit-constructor) + NamedCollectionValidateKey(std::string_view value_) : value(value_) {} /// NOLINT(google-explicit-constructor) + NamedCollectionValidateKey(const String & value_) : value(value_) {} /// NOLINT(google-explicit-constructor) std::string_view value; diff --git a/src/Storages/PartitionCommands.cpp b/src/Storages/PartitionCommands.cpp index 6ce66d85ddc..fd5964c7034 100644 --- a/src/Storages/PartitionCommands.cpp +++ b/src/Storages/PartitionCommands.cpp @@ -36,6 +36,13 @@ std::optional PartitionCommand::parse(const ASTAlterCommand * res.part = command_ast->part; return res; } + else if (command_ast->type == ASTAlterCommand::FORGET_PARTITION) + { + PartitionCommand res; + res.type = FORGET_PARTITION; + res.partition = command_ast->partition->clone(); + return res; + } else if (command_ast->type == ASTAlterCommand::ATTACH_PARTITION) { PartitionCommand res; @@ -147,6 +154,8 @@ std::string PartitionCommand::typeToString() const return "DROP DETACHED PART"; else return "DROP DETACHED PARTITION"; + case PartitionCommand::Type::FORGET_PARTITION: + return "FORGET PARTITION"; case PartitionCommand::Type::FETCH_PARTITION: if (part) return "FETCH PART"; diff --git a/src/Storages/PartitionCommands.h b/src/Storages/PartitionCommands.h index b8b2ec47e71..f0ecf91f567 100644 --- a/src/Storages/PartitionCommands.h +++ b/src/Storages/PartitionCommands.h @@ -26,6 +26,7 @@ struct PartitionCommand MOVE_PARTITION, DROP_PARTITION, DROP_DETACHED_PARTITION, + FORGET_PARTITION, FETCH_PARTITION, FREEZE_ALL_PARTITIONS, FREEZE_PARTITION, diff --git a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp index f13cb820ec3..64d329f74b2 100644 --- a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp +++ b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp @@ -51,7 +51,7 @@ namespace ErrorCodes /// For the case of single storage. StorageMaterializedPostgreSQL::StorageMaterializedPostgreSQL( const StorageID & table_id_, - bool is_attach_, + LoadingStrictnessLevel mode, const String & remote_database_name, const String & remote_table_name_, const postgres::ConnectionInfo & connection_info, @@ -66,12 +66,13 @@ StorageMaterializedPostgreSQL::StorageMaterializedPostgreSQL( , nested_context(makeNestedTableContext(context_->getGlobalContext())) , nested_table_id(StorageID(table_id_.database_name, getNestedTableName())) , remote_table_name(remote_table_name_) - , is_attach(is_attach_) + , is_attach(mode >= LoadingStrictnessLevel::ATTACH) { if (table_id_.uuid == UUIDHelpers::Nil) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Storage MaterializedPostgreSQL is allowed only for Atomic database"); setInMemoryMetadata(storage_metadata); + setVirtuals(createVirtuals()); replication_settings->materialized_postgresql_tables_list = remote_table_name_; @@ -127,8 +128,16 @@ StorageMaterializedPostgreSQL::StorageMaterializedPostgreSQL( , nested_table_id(nested_storage_->getStorageID()) { setInMemoryMetadata(nested_storage_->getInMemoryMetadata()); + setVirtuals(*nested_storage_->getVirtualsPtr()); } +VirtualColumnsDescription StorageMaterializedPostgreSQL::createVirtuals() +{ + VirtualColumnsDescription desc; + desc.addEphemeral("_sign", std::make_shared(), ""); + desc.addEphemeral("_version", std::make_shared(), ""); + return desc; +} /// A temporary clone table might be created for current table in order to update its schema and reload /// all data in the background while current table will still handle read requests. @@ -254,15 +263,6 @@ void StorageMaterializedPostgreSQL::dropInnerTableIfAny(bool sync, ContextPtr lo } -NamesAndTypesList StorageMaterializedPostgreSQL::getVirtuals() const -{ - return NamesAndTypesList{ - {"_sign", std::make_shared()}, - {"_version", std::make_shared()} - }; -} - - bool StorageMaterializedPostgreSQL::needRewriteQueryWithFinal(const Names & column_names) const { return needRewriteQueryWithFinalForStorage(column_names, getNested()); @@ -573,7 +573,8 @@ void registerStorageMaterializedPostgreSQL(StorageFactory & factory) metadata.setColumns(args.columns); metadata.setConstraints(args.constraints); - if (!args.attach && !args.getLocalContext()->getSettingsRef().allow_experimental_materialized_postgresql_table) + if (args.mode <= LoadingStrictnessLevel::CREATE + && !args.getLocalContext()->getSettingsRef().allow_experimental_materialized_postgresql_table) throw Exception(ErrorCodes::BAD_ARGUMENTS, "MaterializedPostgreSQL is an experimental table engine." " You can enable it with the `allow_experimental_materialized_postgresql_table` setting"); @@ -600,7 +601,7 @@ void registerStorageMaterializedPostgreSQL(StorageFactory & factory) postgresql_replication_settings->loadFromQuery(*args.storage_def); return std::make_shared( - args.table_id, args.attach, configuration.database, configuration.table, connection_info, + args.table_id, args.mode, configuration.database, configuration.table, connection_info, metadata, args.getContext(), std::move(postgresql_replication_settings)); }; diff --git a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.h b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.h index 9c9418a8caa..af2f13bb880 100644 --- a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.h +++ b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.h @@ -74,7 +74,7 @@ public: StorageMaterializedPostgreSQL( const StorageID & table_id_, - bool is_attach_, + LoadingStrictnessLevel mode, const String & remote_database_name, const String & remote_table_name, const postgres::ConnectionInfo & connection_info, @@ -89,8 +89,6 @@ public: /// Used only for single MaterializedPostgreSQL storage. void dropInnerTableIfAny(bool sync, ContextPtr local_context) override; - NamesAndTypesList getVirtuals() const override; - bool needRewriteQueryWithFinal(const Names & column_names) const override; void read( @@ -138,6 +136,8 @@ private: static std::shared_ptr getMaterializedColumnsDeclaration( String name, String type, UInt64 default_value); + static VirtualColumnsDescription createVirtuals(); + ASTPtr getColumnDeclaration(const DataTypePtr & data_type) const; String getNestedTableName() const; diff --git a/src/Storages/RabbitMQ/RabbitMQConsumer.cpp b/src/Storages/RabbitMQ/RabbitMQConsumer.cpp index 1843bebe3c7..28dc239ae37 100644 --- a/src/Storages/RabbitMQ/RabbitMQConsumer.cpp +++ b/src/Storages/RabbitMQ/RabbitMQConsumer.cpp @@ -128,6 +128,32 @@ bool RabbitMQConsumer::ackMessages(const CommitInfo & commit_info) return false; } +bool RabbitMQConsumer::nackMessages(const CommitInfo & commit_info) +{ + if (state != State::OK) + return false; + + /// Nothing to nack. + if (!commit_info.delivery_tag || commit_info.delivery_tag <= last_commited_delivery_tag) + return false; + + if (consumer_channel->reject(commit_info.delivery_tag, AMQP::multiple)) + { + LOG_TRACE( + log, "Consumer rejected messages with deliveryTags from {} to {} on channel {}", + last_commited_delivery_tag, commit_info.delivery_tag, channel_id); + + return true; + } + + LOG_ERROR( + log, + "Failed to reject messages for {}:{}, (current commit point {}:{})", + commit_info.channel_id, commit_info.delivery_tag, + channel_id, last_commited_delivery_tag); + + return false; +} void RabbitMQConsumer::updateChannel(RabbitMQConnection & connection) { @@ -161,7 +187,7 @@ void RabbitMQConsumer::updateChannel(RabbitMQConnection & connection) consumer_channel->onError([&](const char * message) { - LOG_ERROR(log, "Channel {} in an error state: {}", channel_id, message); + LOG_ERROR(log, "Channel {} in in error state: {}", channel_id, message); state = State::ERROR; }); } diff --git a/src/Storages/RabbitMQ/RabbitMQConsumer.h b/src/Storages/RabbitMQ/RabbitMQConsumer.h index c78b33bfc7c..9dad175dda3 100644 --- a/src/Storages/RabbitMQ/RabbitMQConsumer.h +++ b/src/Storages/RabbitMQ/RabbitMQConsumer.h @@ -50,7 +50,9 @@ public: UInt64 delivery_tag = 0; String channel_id; }; + const MessageData & currentMessage() { return current; } + const String & getChannelID() const { return channel_id; } /// Return read buffer containing next available message /// or nullptr if there are no messages to process. @@ -63,6 +65,7 @@ public: bool isConsumerStopped() const { return stopped.load(); } bool ackMessages(const CommitInfo & commit_info); + bool nackMessages(const CommitInfo & commit_info); bool hasPendingMessages() { return !received.empty(); } diff --git a/src/Storages/RabbitMQ/RabbitMQSource.cpp b/src/Storages/RabbitMQ/RabbitMQSource.cpp index 3cec448fc11..09c1bf1b2e7 100644 --- a/src/Storages/RabbitMQ/RabbitMQSource.cpp +++ b/src/Storages/RabbitMQ/RabbitMQSource.cpp @@ -11,10 +11,20 @@ namespace DB { -static std::pair getHeaders(StorageRabbitMQ & storage_, const StorageSnapshotPtr & storage_snapshot) +static std::pair getHeaders(const StorageSnapshotPtr & storage_snapshot, const Names & column_names) { + auto all_columns_header = storage_snapshot->metadata->getSampleBlock(); + auto non_virtual_header = storage_snapshot->metadata->getSampleBlockNonMaterialized(); - auto virtual_header = storage_snapshot->getSampleBlockForColumns(storage_.getVirtuals().getNames()); + auto virtual_header = storage_snapshot->virtual_columns->getSampleBlock(); + + for (const auto & column_name : column_names) + { + if (non_virtual_header.has(column_name) || virtual_header.has(column_name)) + continue; + const auto & column = all_columns_header.getByName(column_name); + non_virtual_header.insert(column); + } return {non_virtual_header, virtual_header}; } @@ -40,7 +50,7 @@ RabbitMQSource::RabbitMQSource( : RabbitMQSource( storage_, storage_snapshot_, - getHeaders(storage_, storage_snapshot_), + getHeaders(storage_snapshot_, columns), context_, columns, max_block_size_, @@ -123,7 +133,11 @@ Chunk RabbitMQSource::generateImpl() } if (is_finished || !consumer || consumer->isConsumerStopped()) + { + LOG_TRACE(log, "RabbitMQSource is stopped (is_finished: {}, consumer_stopped: {})", + is_finished, consumer ? toString(consumer->isConsumerStopped()) : "No consumer"); return {}; + } /// Currently it is one time usage source: to make sure data is flushed /// strictly by timeout or by block size. @@ -254,13 +268,12 @@ Chunk RabbitMQSource::generateImpl() bool RabbitMQSource::sendAck() { - if (!consumer) - return false; + return consumer && consumer->ackMessages(commit_info); +} - if (!consumer->ackMessages(commit_info)) - return false; - - return true; +bool RabbitMQSource::sendNack() +{ + return consumer && consumer->nackMessages(commit_info); } } diff --git a/src/Storages/RabbitMQ/RabbitMQSource.h b/src/Storages/RabbitMQ/RabbitMQSource.h index 21d059bfae2..0d6fad97054 100644 --- a/src/Storages/RabbitMQ/RabbitMQSource.h +++ b/src/Storages/RabbitMQ/RabbitMQSource.h @@ -33,6 +33,7 @@ public: bool needChannelUpdate(); void updateChannel(); bool sendAck(); + bool sendNack(); private: StorageRabbitMQ & storage; diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp index 868f48d0b7d..b882fd2728c 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -69,7 +70,7 @@ StorageRabbitMQ::StorageRabbitMQ( ContextPtr context_, const ColumnsDescription & columns_, std::unique_ptr rabbitmq_settings_, - bool is_attach) + LoadingStrictnessLevel mode) : IStorage(table_id_) , WithContext(context_->getGlobalContext()) , rabbitmq_settings(std::move(rabbitmq_settings_)) @@ -133,9 +134,13 @@ StorageRabbitMQ::StorageRabbitMQ( if (configuration.secure) SSL_library_init(); + if (!columns_.getMaterialized().empty() || !columns_.getAliases().empty() || !columns_.getDefaults().empty() || !columns_.getEphemeral().empty()) + context_->addWarningMessage("RabbitMQ table engine doesn't support ALIAS, DEFAULT or MATERIALIZED columns. They will be ignored and filled with default values"); + StorageInMemoryMetadata storage_metadata; storage_metadata.setColumns(columns_); setInMemoryMetadata(storage_metadata); + setVirtuals(createVirtuals(rabbitmq_settings->rabbitmq_handle_error_mode)); rabbitmq_context = addSettings(getContext()); rabbitmq_context->makeQueryContext(); @@ -170,13 +175,13 @@ StorageRabbitMQ::StorageRabbitMQ( connection = std::make_unique(configuration, log); if (connection->connect()) initRabbitMQ(); - else if (!is_attach) + else if (mode <= LoadingStrictnessLevel::CREATE) throw Exception(ErrorCodes::CANNOT_CONNECT_RABBITMQ, "Cannot connect to {}", connection->connectionInfoForLog()); } catch (...) { tryLogCurrentException(log); - if (!is_attach) + if (mode <= LoadingStrictnessLevel::CREATE) throw; } @@ -191,6 +196,26 @@ StorageRabbitMQ::StorageRabbitMQ( init_task->deactivate(); } +VirtualColumnsDescription StorageRabbitMQ::createVirtuals(StreamingHandleErrorMode handle_error_mode) +{ + VirtualColumnsDescription desc; + + desc.addEphemeral("_exchange_name", std::make_shared(), ""); + desc.addEphemeral("_channel_id", std::make_shared(), ""); + desc.addEphemeral("_delivery_tag", std::make_shared(), ""); + desc.addEphemeral("_redelivered", std::make_shared(), ""); + desc.addEphemeral("_message_id", std::make_shared(), ""); + desc.addEphemeral("_timestamp", std::make_shared(), ""); + + + if (handle_error_mode == StreamingHandleErrorMode::STREAM) + { + desc.addEphemeral("_raw_message", std::make_shared(std::make_shared()), ""); + desc.addEphemeral("_error", std::make_shared(std::make_shared()), ""); + } + + return desc; +} Names StorageRabbitMQ::parseSettings(String settings_list) { @@ -1034,18 +1059,7 @@ bool StorageRabbitMQ::tryStreamToViews() if (!table) throw Exception(ErrorCodes::LOGICAL_ERROR, "Engine table {} doesn't exist.", table_id.getNameForLogs()); - // Create an INSERT query for streaming data - auto insert = std::make_shared(); - insert->table_id = table_id; - - // Only insert into dependent views and expect that input blocks contain virtual columns - InterpreterInsertQuery interpreter(insert, rabbitmq_context, false, true, true); - auto block_io = interpreter.execute(); - auto storage_snapshot = getStorageSnapshot(getInMemoryMetadataPtr(), getContext()); - auto column_names = block_io.pipeline.getHeader().getNames(); - auto sample_block = storage_snapshot->getSampleBlockForColumns(column_names); - auto block_size = getMaxBlockSize(); // Create a stream for each consumer and join them in a union stream @@ -1061,21 +1075,50 @@ bool StorageRabbitMQ::tryStreamToViews() for (size_t i = 0; i < num_created_consumers; ++i) { auto source = std::make_shared( - *this, storage_snapshot, rabbitmq_context, column_names, block_size, max_execution_time_ms, rabbitmq_settings->rabbitmq_handle_error_mode, false); + *this, storage_snapshot, rabbitmq_context, Names{}, block_size, + max_execution_time_ms, rabbitmq_settings->rabbitmq_handle_error_mode); sources.emplace_back(source); pipes.emplace_back(source); } + // Create an INSERT query for streaming data + auto insert = std::make_shared(); + insert->table_id = table_id; + if (!sources.empty()) + { + auto column_list = std::make_shared(); + const auto & header = sources[0]->getPort().getHeader(); + for (const auto & column : header) + column_list->children.emplace_back(std::make_shared(column.name)); + insert->columns = std::move(column_list); + } + + // Only insert into dependent views and expect that input blocks contain virtual columns + InterpreterInsertQuery interpreter(insert, rabbitmq_context, /* allow_materialized_ */ false, /* no_squash_ */ true, /* no_destination_ */ true); + auto block_io = interpreter.execute(); + block_io.pipeline.complete(Pipe::unitePipes(std::move(pipes))); + std::atomic_size_t rows = 0; + block_io.pipeline.setProgressCallback([&](const Progress & progress) { rows += progress.read_rows.load(); }); + if (!connection->getHandler().loopRunning()) startLoop(); + bool write_failed = false; + try { CompletedPipelineExecutor executor(block_io.pipeline); executor.execute(); } + catch (...) + { + LOG_ERROR(log, "Failed to push to views. Error: {}", getCurrentExceptionMessage(true)); + write_failed = true; + } + + LOG_TRACE(log, "Processed {} rows", rows); /* Note: sending ack() with loop running in another thread will lead to a lot of data races inside the library, but only in case * error occurs or connection is lost while ack is being sent @@ -1083,13 +1126,6 @@ bool StorageRabbitMQ::tryStreamToViews() deactivateTask(looping_task, false, true); size_t queue_empty = 0; - if (!hasDependencies(getStorageID())) - { - /// Do not commit to rabbitmq if the dependency was removed. - LOG_TRACE(log, "No dependencies, reschedule"); - return false; - } - if (!connection->isConnected()) { if (shutdown_called) @@ -1130,7 +1166,7 @@ bool StorageRabbitMQ::tryStreamToViews() * the same channel will also commit all previously not-committed messages. Anyway I do not think that for ack frame this * will ever happen. */ - if (!source->sendAck()) + if (write_failed ? source->sendNack() : source->sendAck()) { /// Iterate loop to activate error callbacks if they happened connection->getHandler().iterateLoop(); @@ -1142,6 +1178,19 @@ bool StorageRabbitMQ::tryStreamToViews() } } + if (write_failed) + { + LOG_TRACE(log, "Write failed, reschedule"); + return false; + } + + if (!hasDependencies(getStorageID())) + { + /// Do not commit to rabbitmq if the dependency was removed. + LOG_TRACE(log, "No dependencies, reschedule"); + return false; + } + if ((queue_empty == num_created_consumers) && (++read_attempts == MAX_FAILED_READ_ATTEMPTS)) { connection->heartbeat(); @@ -1188,31 +1237,10 @@ void registerStorageRabbitMQ(StorageFactory & factory) if (!rabbitmq_settings->rabbitmq_format.changed) throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "You must specify `rabbitmq_format` setting"); - return std::make_shared(args.table_id, args.getContext(), args.columns, std::move(rabbitmq_settings), args.attach); + return std::make_shared(args.table_id, args.getContext(), args.columns, std::move(rabbitmq_settings), args.mode); }; factory.registerStorage("RabbitMQ", creator_fn, StorageFactory::StorageFeatures{ .supports_settings = true, }); } - -NamesAndTypesList StorageRabbitMQ::getVirtuals() const -{ - auto virtuals = NamesAndTypesList{ - {"_exchange_name", std::make_shared()}, - {"_channel_id", std::make_shared()}, - {"_delivery_tag", std::make_shared()}, - {"_redelivered", std::make_shared()}, - {"_message_id", std::make_shared()}, - {"_timestamp", std::make_shared()} - }; - - if (rabbitmq_settings->rabbitmq_handle_error_mode == StreamingHandleErrorMode::STREAM) - { - virtuals.push_back({"_raw_message", std::make_shared(std::make_shared())}); - virtuals.push_back({"_error", std::make_shared(std::make_shared())}); - } - - return virtuals; -} - } diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.h b/src/Storages/RabbitMQ/StorageRabbitMQ.h index 696734617be..e14741d9636 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.h +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.h @@ -27,7 +27,7 @@ public: ContextPtr context_, const ColumnsDescription & columns_, std::unique_ptr rabbitmq_settings_, - bool is_attach); + LoadingStrictnessLevel mode); std::string getName() const override { return "RabbitMQ"; } @@ -68,7 +68,6 @@ public: RabbitMQConsumerPtr popConsumer(std::chrono::milliseconds timeout); const String & getFormatName() const { return format_name; } - NamesAndTypesList getVirtuals() const override; String getExchange() const { return exchange_name; } void unbindExchange(); @@ -191,6 +190,8 @@ private: bool tryStreamToViews(); bool hasDependencies(const StorageID & table_id); + static VirtualColumnsDescription createVirtuals(StreamingHandleErrorMode handle_error_mode); + static String getRandomName() { std::uniform_int_distribution distribution('a', 'z'); diff --git a/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp b/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp index 4ead714c740..8725ab172ac 100644 --- a/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp +++ b/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp @@ -172,7 +172,7 @@ private: StorageEmbeddedRocksDB::StorageEmbeddedRocksDB(const StorageID & table_id_, const String & relative_data_path_, const StorageInMemoryMetadata & metadata_, - bool attach, + LoadingStrictnessLevel mode, ContextPtr context_, const String & primary_key_, Int32 ttl_, @@ -190,7 +190,7 @@ StorageEmbeddedRocksDB::StorageEmbeddedRocksDB(const StorageID & table_id_, { rocksdb_dir = context_->getPath() + relative_data_path_; } - if (!attach) + if (mode < LoadingStrictnessLevel::ATTACH) { fs::create_directories(rocksdb_dir); } @@ -479,31 +479,26 @@ class ReadFromEmbeddedRocksDB : public SourceStepWithFilter public: std::string getName() const override { return "ReadFromEmbeddedRocksDB"; } void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override; - void applyFilters() override; + void applyFilters(ActionDAGNodes added_filter_nodes) override; ReadFromEmbeddedRocksDB( + const Names & column_names_, + const SelectQueryInfo & query_info_, + const StorageSnapshotPtr & storage_snapshot_, + const ContextPtr & context_, Block sample_block, - StorageSnapshotPtr storage_snapshot_, const StorageEmbeddedRocksDB & storage_, - SelectQueryInfo query_info_, - ContextPtr context_, size_t max_block_size_, size_t num_streams_) - : SourceStepWithFilter(DataStream{.header = std::move(sample_block)}) - , storage_snapshot(std::move(storage_snapshot_)) + : SourceStepWithFilter(DataStream{.header = std::move(sample_block)}, column_names_, query_info_, storage_snapshot_, context_) , storage(storage_) - , query_info(std::move(query_info_)) - , context(std::move(context_)) , max_block_size(max_block_size_) , num_streams(num_streams_) { } private: - StorageSnapshotPtr storage_snapshot; const StorageEmbeddedRocksDB & storage; - SelectQueryInfo query_info; - ContextPtr context; size_t max_block_size; size_t num_streams; @@ -526,13 +521,7 @@ void StorageEmbeddedRocksDB::read( Block sample_block = storage_snapshot->metadata->getSampleBlock(); auto reading = std::make_unique( - std::move(sample_block), - storage_snapshot, - *this, - query_info, - context_, - max_block_size, - num_streams); + column_names, query_info, storage_snapshot, context_, std::move(sample_block), *this, max_block_size, num_streams); query_plan.addStep(std::move(reading)); } @@ -582,11 +571,12 @@ void ReadFromEmbeddedRocksDB::initializePipeline(QueryPipelineBuilder & pipeline } } -void ReadFromEmbeddedRocksDB::applyFilters() +void ReadFromEmbeddedRocksDB::applyFilters(ActionDAGNodes added_filter_nodes) { + filter_actions_dag = ActionsDAG::buildFilterActionsDAG(added_filter_nodes.nodes); const auto & sample_block = getOutputStream().header; auto primary_key_data_type = sample_block.getByName(storage.primary_key).type; - std::tie(keys, all_scan) = getFilterKeys(storage.primary_key, primary_key_data_type, filter_nodes, context); + std::tie(keys, all_scan) = getFilterKeys(storage.primary_key, primary_key_data_type, filter_actions_dag, context); } SinkToStoragePtr StorageEmbeddedRocksDB::write( @@ -630,7 +620,7 @@ static StoragePtr create(const StorageFactory::Arguments & args) { throw Exception(ErrorCodes::BAD_ARGUMENTS, "StorageEmbeddedRocksDB must require one column in primary key"); } - return std::make_shared(args.table_id, args.relative_data_path, metadata, args.attach, args.getContext(), primary_key_names[0], ttl, std::move(rocksdb_dir), read_only); + return std::make_shared(args.table_id, args.relative_data_path, metadata, args.mode, args.getContext(), primary_key_names[0], ttl, std::move(rocksdb_dir), read_only); } std::shared_ptr StorageEmbeddedRocksDB::getRocksDBStatistics() const diff --git a/src/Storages/RocksDB/StorageEmbeddedRocksDB.h b/src/Storages/RocksDB/StorageEmbeddedRocksDB.h index b09dfca7338..113e1f12b65 100644 --- a/src/Storages/RocksDB/StorageEmbeddedRocksDB.h +++ b/src/Storages/RocksDB/StorageEmbeddedRocksDB.h @@ -32,7 +32,7 @@ public: StorageEmbeddedRocksDB(const StorageID & table_id_, const String & relative_data_path_, const StorageInMemoryMetadata & metadata, - bool attach, + LoadingStrictnessLevel mode, ContextPtr context_, const String & primary_key_, Int32 ttl_ = 0, diff --git a/src/Storages/RocksDB/StorageSystemRocksDB.cpp b/src/Storages/RocksDB/StorageSystemRocksDB.cpp index d0533b5ba0c..eec2f53381f 100644 --- a/src/Storages/RocksDB/StorageSystemRocksDB.cpp +++ b/src/Storages/RocksDB/StorageSystemRocksDB.cpp @@ -39,7 +39,7 @@ ColumnsDescription StorageSystemRocksDB::getColumnsDescription() } -void StorageSystemRocksDB::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const +void StorageSystemRocksDB::fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node * predicate, std::vector) const { const auto access = context->getAccess(); const bool check_access_for_databases = !access->isGranted(AccessType::SHOW_TABLES); @@ -87,7 +87,7 @@ void StorageSystemRocksDB::fillData(MutableColumns & res_columns, ContextPtr con { col_table_to_filter, std::make_shared(), "table" }, }; - VirtualColumnUtils::filterBlockWithQuery(query_info.query, filtered_block, context); + VirtualColumnUtils::filterBlockWithPredicate(predicate, filtered_block, context); if (!filtered_block.rows()) return; diff --git a/src/Storages/RocksDB/StorageSystemRocksDB.h b/src/Storages/RocksDB/StorageSystemRocksDB.h index c1f10a7722d..ec351c75446 100644 --- a/src/Storages/RocksDB/StorageSystemRocksDB.h +++ b/src/Storages/RocksDB/StorageSystemRocksDB.h @@ -11,7 +11,7 @@ class Context; /** Implements the `rocksdb` system table, which expose various rocksdb metrics. */ -class StorageSystemRocksDB final : public IStorageSystemOneBlock +class StorageSystemRocksDB final : public IStorageSystemOneBlock { public: std::string getName() const override { return "SystemRocksDB"; } @@ -21,7 +21,7 @@ public: protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const override; + void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node * predicate, std::vector) const override; }; } diff --git a/src/Storages/S3Queue/S3QueueFilesMetadata.cpp b/src/Storages/S3Queue/S3QueueFilesMetadata.cpp index ac80ded5792..7b4438e1387 100644 --- a/src/Storages/S3Queue/S3QueueFilesMetadata.cpp +++ b/src/Storages/S3Queue/S3QueueFilesMetadata.cpp @@ -214,7 +214,7 @@ size_t S3QueueFilesMetadata::registerNewShard() } const auto zk_client = getZooKeeper(); - zk_client->createAncestors(zookeeper_shards_path / ""); + zk_client->createIfNotExists(zookeeper_shards_path, ""); std::string shard_node_path; size_t shard_id = 0; @@ -287,7 +287,10 @@ void S3QueueFilesMetadata::unregisterShard(size_t shard_id) const auto zk_client = getZooKeeper(); const auto node_path = getZooKeeperPathForShard(shard_id); - zk_client->remove(node_path); + auto error_code = zk_client->tryRemove(node_path); + if (error_code != Coordination::Error::ZOK + && error_code != Coordination::Error::ZNONODE) + throw zkutil::KeeperException::fromPath(error_code, node_path); } size_t S3QueueFilesMetadata::getProcessingIdsNum() const @@ -696,7 +699,10 @@ void S3QueueFilesMetadata::setFileProcessedForOrderedModeImpl( { auto code = zk_client->tryMulti(requests, responses); if (code == Coordination::Error::ZOK) + { + LOG_TEST(log, "Moved file `{}` to processed", path); return; + } } /// Failed to update max processed node, retry. diff --git a/src/Storages/S3Queue/S3QueueSource.cpp b/src/Storages/S3Queue/S3QueueSource.cpp index b4f5f957f76..19c69d5c589 100644 --- a/src/Storages/S3Queue/S3QueueSource.cpp +++ b/src/Storages/S3Queue/S3QueueSource.cpp @@ -80,6 +80,7 @@ StorageS3QueueSource::KeyWithInfoPtr StorageS3QueueSource::FileIterator::next(si { val = keys.front(); keys.pop_front(); + chassert(idx == metadata->getProcessingIdForPath(val->key)); } } else @@ -103,7 +104,7 @@ StorageS3QueueSource::KeyWithInfoPtr StorageS3QueueSource::FileIterator::next(si LOG_TEST(log, "Putting key {} into queue of processor {} (total: {})", val->key, processing_id_for_key, sharded_keys.size()); - if (auto it = sharded_keys.find(idx); it != sharded_keys.end()) + if (auto it = sharded_keys.find(processing_id_for_key); it != sharded_keys.end()) { it->second.push_back(val); } @@ -111,7 +112,7 @@ StorageS3QueueSource::KeyWithInfoPtr StorageS3QueueSource::FileIterator::next(si { throw Exception(ErrorCodes::LOGICAL_ERROR, "Processing id {} does not exist (Expected ids: {})", - idx, fmt::join(metadata->getProcessingIdsForShard(current_shard), ", ")); + processing_id_for_key, fmt::join(metadata->getProcessingIdsForShard(current_shard), ", ")); } } continue; @@ -352,7 +353,11 @@ void StorageS3QueueSource::applyActionAfterProcessing(const String & path) } } -void StorageS3QueueSource::appendLogElement(const std::string & filename, S3QueueFilesMetadata::FileStatus & file_status_, size_t processed_rows, bool processed) +void StorageS3QueueSource::appendLogElement( + const std::string & filename, + S3QueueFilesMetadata::FileStatus & file_status_, + size_t processed_rows, + bool processed) { if (!s3_queue_log) return; @@ -363,6 +368,9 @@ void StorageS3QueueSource::appendLogElement(const std::string & filename, S3Queu elem = S3QueueLogElement { .event_time = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now()), + .database = storage_id.database_name, + .table = storage_id.table_name, + .uuid = toString(storage_id.uuid), .file_name = filename, .rows_processed = processed_rows, .status = processed ? S3QueueLogElement::S3QueueStatus::Processed : S3QueueLogElement::S3QueueStatus::Failed, diff --git a/src/Storages/S3Queue/StorageS3Queue.cpp b/src/Storages/S3Queue/StorageS3Queue.cpp index 0723205b544..6e7ac2b47b8 100644 --- a/src/Storages/S3Queue/StorageS3Queue.cpp +++ b/src/Storages/S3Queue/StorageS3Queue.cpp @@ -138,19 +138,24 @@ StorageS3Queue::StorageS3Queue( StorageInMemoryMetadata storage_metadata; if (columns_.empty()) { - auto columns = StorageS3::getTableStructureFromDataImpl(configuration, format_settings, context_); + ColumnsDescription columns; + if (configuration.format == "auto") + std::tie(columns, configuration.format) = StorageS3::getTableStructureAndFormatFromData(configuration, format_settings, context_); + else + columns = StorageS3::getTableStructureFromData(configuration, format_settings, context_); storage_metadata.setColumns(columns); } else { + if (configuration.format == "auto") + configuration.format = StorageS3::getTableStructureAndFormatFromData(configuration, format_settings, context_).second; storage_metadata.setColumns(columns_); } storage_metadata.setConstraints(constraints_); storage_metadata.setComment(comment); setInMemoryMetadata(storage_metadata); - - virtual_columns = VirtualColumnUtils::getPathFileAndSizeVirtualsForStorage(storage_metadata.getSampleBlock().getNamesAndTypesList()); + setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns())); LOG_INFO(log, "Using zookeeper path: {}", zk_path.string()); task = getContext()->getSchedulePool().createTask("S3QueueStreamingTask", [this] { threadFunc(); }); @@ -234,18 +239,25 @@ class ReadFromS3Queue : public SourceStepWithFilter public: std::string getName() const override { return "ReadFromS3Queue"; } void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override; - void applyFilters() override; + void applyFilters(ActionDAGNodes added_filter_nodes) override; ReadFromS3Queue( + const Names & column_names_, + const SelectQueryInfo & query_info_, + const StorageSnapshotPtr & storage_snapshot_, + const ContextPtr & context_, Block sample_block, ReadFromFormatInfo info_, std::shared_ptr storage_, - ContextPtr context_, size_t max_block_size_) - : SourceStepWithFilter(DataStream{.header = std::move(sample_block)}) + : SourceStepWithFilter( + DataStream{.header = std::move(sample_block)}, + column_names_, + query_info_, + storage_snapshot_, + context_) , info(std::move(info_)) , storage(std::move(storage_)) - , context(std::move(context_)) , max_block_size(max_block_size_) { } @@ -253,7 +265,6 @@ public: private: ReadFromFormatInfo info; std::shared_ptr storage; - ContextPtr context; size_t max_block_size; std::shared_ptr iterator; @@ -270,9 +281,9 @@ void ReadFromS3Queue::createIterator(const ActionsDAG::Node * predicate) } -void ReadFromS3Queue::applyFilters() +void ReadFromS3Queue::applyFilters(ActionDAGNodes added_filter_nodes) { - auto filter_actions_dag = ActionsDAG::buildFilterActionsDAG(filter_nodes.nodes); + auto filter_actions_dag = ActionsDAG::buildFilterActionsDAG(added_filter_nodes.nodes); const ActionsDAG::Node * predicate = nullptr; if (filter_actions_dag) predicate = filter_actions_dag->getOutputs().at(0); @@ -284,7 +295,7 @@ void StorageS3Queue::read( QueryPlan & query_plan, const Names & column_names, const StorageSnapshotPtr & storage_snapshot, - SelectQueryInfo & /*query_info*/, + SelectQueryInfo & query_info, ContextPtr local_context, QueryProcessingStage::Enum /*processed_stage*/, size_t max_block_size, @@ -303,13 +314,16 @@ void StorageS3Queue::read( } auto this_ptr = std::static_pointer_cast(shared_from_this()); - auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(local_context), getVirtuals()); + auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(local_context)); auto reading = std::make_unique( + column_names, + query_info, + storage_snapshot, + local_context, read_from_format_info.source_header, read_from_format_info, std::move(this_ptr), - local_context, max_block_size); query_plan.addStep(std::move(reading)); @@ -478,7 +492,7 @@ bool StorageS3Queue::streamToViews() auto block_io = interpreter.execute(); auto file_iterator = createFileIterator(s3queue_context, nullptr); - auto read_from_format_info = prepareReadingFromFormat(block_io.pipeline.getHeader().getNames(), storage_snapshot, supportsSubsetOfColumns(s3queue_context), getVirtuals()); + auto read_from_format_info = prepareReadingFromFormat(block_io.pipeline.getHeader().getNames(), storage_snapshot, supportsSubsetOfColumns(s3queue_context)); Pipes pipes; pipes.reserve(s3queue_settings->s3queue_processing_threads_num); @@ -587,8 +601,9 @@ void StorageS3Queue::checkTableStructure(const String & zookeeper_prefix, const std::shared_ptr StorageS3Queue::createFileIterator(ContextPtr local_context, const ActionsDAG::Node * predicate) { auto glob_iterator = std::make_unique( - *configuration.client, configuration.url, predicate, virtual_columns, local_context, + *configuration.client, configuration.url, predicate, getVirtualsList(), local_context, /* read_keys */nullptr, configuration.request_settings); + return std::make_shared(files_metadata, std::move(glob_iterator), s3queue_settings->s3queue_current_shard_num, shutdown_called); } diff --git a/src/Storages/S3Queue/StorageS3Queue.h b/src/Storages/S3Queue/StorageS3Queue.h index fd3b4bb4914..bdd3ab7b687 100644 --- a/src/Storages/S3Queue/StorageS3Queue.h +++ b/src/Storages/S3Queue/StorageS3Queue.h @@ -51,8 +51,6 @@ public: size_t max_block_size, size_t num_streams) override; - NamesAndTypesList getVirtuals() const override { return virtual_columns; } - const auto & getFormatName() const { return configuration.format; } const fs::path & getZooKeeperPath() const { return zk_path; } @@ -71,7 +69,6 @@ private: Configuration configuration; const std::optional format_settings; - NamesAndTypesList virtual_columns; BackgroundSchedulePool::TaskHolder task; std::atomic stream_cancelled{false}; diff --git a/src/Storages/SelectQueryInfo.h b/src/Storages/SelectQueryInfo.h index 4cb88a6d3fc..22d30674b97 100644 --- a/src/Storages/SelectQueryInfo.h +++ b/src/Storages/SelectQueryInfo.h @@ -53,6 +53,7 @@ struct PrewhereInfo String prewhere_column_name; bool remove_prewhere_column = false; bool need_filter = false; + bool generated_by_optimizer = false; PrewhereInfo() = default; explicit PrewhereInfo(ActionsDAGPtr prewhere_actions_, String prewhere_column_name_) @@ -74,6 +75,7 @@ struct PrewhereInfo prewhere_info->prewhere_column_name = prewhere_column_name; prewhere_info->remove_prewhere_column = remove_prewhere_column; prewhere_info->need_filter = need_filter; + prewhere_info->generated_by_optimizer = generated_by_optimizer; return prewhere_info; } @@ -206,6 +208,10 @@ struct SelectQueryInfo bool need_aggregate = false; PrewhereInfoPtr prewhere_info; + /// Generated by pre-run optimization with StorageDummy. + /// Currently it's used to support StorageMerge PREWHERE optimization. + PrewhereInfoPtr optimized_prewhere_info; + /// If query has aggregate functions bool has_aggregates = false; diff --git a/src/Storages/StorageAzureBlob.cpp b/src/Storages/StorageAzureBlob.cpp index c09db0bfb7b..bac9aa1cbdf 100644 --- a/src/Storages/StorageAzureBlob.cpp +++ b/src/Storages/StorageAzureBlob.cpp @@ -65,9 +65,9 @@ namespace ErrorCodes extern const int DATABASE_ACCESS_DENIED; extern const int CANNOT_COMPILE_REGEXP; extern const int CANNOT_EXTRACT_TABLE_STRUCTURE; + extern const int CANNOT_DETECT_FORMAT; extern const int LOGICAL_ERROR; extern const int NOT_IMPLEMENTED; - } namespace @@ -127,7 +127,7 @@ void StorageAzureBlob::processNamedCollectionResult(StorageAzureBlob::Configurat } -StorageAzureBlob::Configuration StorageAzureBlob::getConfiguration(ASTs & engine_args, ContextPtr local_context) +StorageAzureBlob::Configuration StorageAzureBlob::getConfiguration(ASTs & engine_args, const ContextPtr & local_context) { StorageAzureBlob::Configuration configuration; @@ -143,7 +143,7 @@ StorageAzureBlob::Configuration StorageAzureBlob::getConfiguration(ASTs & engine configuration.blobs_paths = {configuration.blob_path}; if (configuration.format == "auto") - configuration.format = FormatFactory::instance().getFormatFromFileName(configuration.blob_path, true); + configuration.format = FormatFactory::instance().tryGetFormatFromFileName(configuration.blob_path).value_or("auto"); return configuration; } @@ -166,7 +166,7 @@ StorageAzureBlob::Configuration StorageAzureBlob::getConfiguration(ASTs & engine auto is_format_arg = [] (const std::string & s) -> bool { - return s == "auto" || FormatFactory::instance().getAllFormats().contains(s); + return s == "auto" || FormatFactory::instance().exists(s); }; if (engine_args.size() == 4) @@ -199,7 +199,7 @@ StorageAzureBlob::Configuration StorageAzureBlob::getConfiguration(ASTs & engine else if (engine_args.size() == 6) { auto fourth_arg = checkAndGetLiteralArgument(engine_args[3], "format/account_name"); - if (fourth_arg == "auto" || FormatFactory::instance().getAllFormats().contains(fourth_arg)) + if (fourth_arg == "auto" || FormatFactory::instance().exists(fourth_arg)) { throw Exception(ErrorCodes::BAD_ARGUMENTS, "Format and compression must be last arguments"); } @@ -217,7 +217,7 @@ StorageAzureBlob::Configuration StorageAzureBlob::getConfiguration(ASTs & engine else if (engine_args.size() == 7) { auto fourth_arg = checkAndGetLiteralArgument(engine_args[3], "format/account_name"); - if (fourth_arg == "auto" || FormatFactory::instance().getAllFormats().contains(fourth_arg)) + if (fourth_arg == "auto" || FormatFactory::instance().exists(fourth_arg)) { throw Exception(ErrorCodes::BAD_ARGUMENTS, "Format and compression must be last arguments"); } @@ -236,13 +236,13 @@ StorageAzureBlob::Configuration StorageAzureBlob::getConfiguration(ASTs & engine configuration.blobs_paths = {configuration.blob_path}; if (configuration.format == "auto") - configuration.format = FormatFactory::instance().getFormatFromFileName(configuration.blob_path, true); + configuration.format = FormatFactory::instance().tryGetFormatFromFileName(configuration.blob_path).value_or("auto"); return configuration; } -AzureObjectStorage::SettingsPtr StorageAzureBlob::createSettings(ContextPtr local_context) +AzureObjectStorage::SettingsPtr StorageAzureBlob::createSettings(const ContextPtr & local_context) { const auto & context_settings = local_context->getSettingsRef(); auto settings_ptr = std::make_unique(); @@ -447,7 +447,7 @@ Poco::URI StorageAzureBlob::Configuration::getConnectionURL() const StorageAzureBlob::StorageAzureBlob( const Configuration & configuration_, std::unique_ptr && object_storage_, - ContextPtr context, + const ContextPtr & context, const StorageID & table_id_, const ColumnsDescription & columns_, const ConstraintsDescription & constraints_, @@ -463,17 +463,25 @@ StorageAzureBlob::StorageAzureBlob( , format_settings(format_settings_) , partition_by(partition_by_) { - FormatFactory::instance().checkFormatName(configuration.format); + if (configuration.format != "auto") + FormatFactory::instance().checkFormatName(configuration.format); context->getGlobalContext()->getRemoteHostFilter().checkURL(configuration.getConnectionURL()); StorageInMemoryMetadata storage_metadata; if (columns_.empty()) { - auto columns = getTableStructureFromData(object_storage.get(), configuration, format_settings, context, distributed_processing); + ColumnsDescription columns; + if (configuration.format == "auto") + std::tie(columns, configuration.format) = getTableStructureAndFormatFromData(object_storage.get(), configuration, format_settings, context); + else + columns = getTableStructureFromData(object_storage.get(), configuration, format_settings, context); storage_metadata.setColumns(columns); } else { + if (configuration.format == "auto") + configuration.format = getTableStructureAndFormatFromData(object_storage.get(), configuration, format_settings, context).second; + /// We don't allow special columns in File storage. if (!columns_.hasOnlyOrdinary()) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Table engine AzureBlobStorage doesn't support special columns like MATERIALIZED, ALIAS or EPHEMERAL"); @@ -483,12 +491,11 @@ StorageAzureBlob::StorageAzureBlob( storage_metadata.setConstraints(constraints_); storage_metadata.setComment(comment); setInMemoryMetadata(storage_metadata); + setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns())); StoredObjects objects; for (const auto & key : configuration.blobs_paths) objects.emplace_back(key); - - virtual_columns = VirtualColumnUtils::getPathFileAndSizeVirtualsForStorage(storage_metadata.getSampleBlock().getNamesAndTypesList()); } void StorageAzureBlob::truncate(const ASTPtr &, const StorageMetadataPtr &, ContextPtr, TableExclusiveLockHolder &) @@ -517,7 +524,7 @@ public: StorageAzureBlobSink( const String & format, const Block & sample_block_, - ContextPtr context, + const ContextPtr & context, std::optional format_settings_, const CompressionMethod compression_method, AzureObjectStorage * object_storage, @@ -607,22 +614,21 @@ private: std::mutex cancel_mutex; }; -class PartitionedStorageAzureBlobSink : public PartitionedSink +class PartitionedStorageAzureBlobSink : public PartitionedSink, WithContext { public: PartitionedStorageAzureBlobSink( const ASTPtr & partition_by, const String & format_, const Block & sample_block_, - ContextPtr context_, + const ContextPtr & context_, std::optional format_settings_, const CompressionMethod compression_method_, AzureObjectStorage * object_storage_, const String & blob_) - : PartitionedSink(partition_by, context_, sample_block_) + : PartitionedSink(partition_by, context_, sample_block_), WithContext(context_) , format(format_) , sample_block(sample_block_) - , context(context_) , compression_method(compression_method_) , object_storage(object_storage_) , blob(blob_) @@ -638,7 +644,7 @@ public: return std::make_shared( format, sample_block, - context, + getContext(), format_settings, compression_method, object_storage, @@ -649,7 +655,6 @@ public: private: const String format; const Block sample_block; - const ContextPtr context; const CompressionMethod compression_method; AzureObjectStorage * object_storage; const String blob; @@ -670,21 +675,23 @@ class ReadFromAzureBlob : public SourceStepWithFilter public: std::string getName() const override { return "ReadFromAzureBlob"; } void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override; - void applyFilters() override; + void applyFilters(ActionDAGNodes added_filter_nodes) override; ReadFromAzureBlob( + const Names & column_names_, + const SelectQueryInfo & query_info_, + const StorageSnapshotPtr & storage_snapshot_, + const ContextPtr & context_, Block sample_block, std::shared_ptr storage_, ReadFromFormatInfo info_, const bool need_only_count_, - ContextPtr context_, size_t max_block_size_, size_t num_streams_) - : SourceStepWithFilter(DataStream{.header = std::move(sample_block)}) + : SourceStepWithFilter(DataStream{.header = std::move(sample_block)}, column_names_, query_info_, storage_snapshot_, context_) , storage(std::move(storage_)) , info(std::move(info_)) , need_only_count(need_only_count_) - , context(std::move(context_)) , max_block_size(max_block_size_) , num_streams(num_streams_) { @@ -695,8 +702,6 @@ private: ReadFromFormatInfo info; const bool need_only_count; - ContextPtr context; - size_t max_block_size; const size_t num_streams; @@ -705,9 +710,9 @@ private: void createIterator(const ActionsDAG::Node * predicate); }; -void ReadFromAzureBlob::applyFilters() +void ReadFromAzureBlob::applyFilters(ActionDAGNodes added_filter_nodes) { - auto filter_actions_dag = ActionsDAG::buildFilterActionsDAG(filter_nodes.nodes); + filter_actions_dag = ActionsDAG::buildFilterActionsDAG(added_filter_nodes.nodes); const ActionsDAG::Node * predicate = nullptr; if (filter_actions_dag) predicate = filter_actions_dag->getOutputs().at(0); @@ -730,16 +735,19 @@ void StorageAzureBlob::read( auto this_ptr = std::static_pointer_cast(shared_from_this()); - auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(local_context), getVirtuals()); + auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(local_context)); bool need_only_count = (query_info.optimize_trivial_count || read_from_format_info.requested_columns.empty()) && local_context->getSettingsRef().optimize_count_from_files; auto reading = std::make_unique( + column_names, + query_info, + storage_snapshot, + local_context, read_from_format_info.source_header, std::move(this_ptr), std::move(read_from_format_info), need_only_count, - local_context, max_block_size, num_streams); @@ -763,13 +771,13 @@ void ReadFromAzureBlob::createIterator(const ActionsDAG::Node * predicate) /// Iterate through disclosed globs and make a source for each file iterator_wrapper = std::make_shared( storage->object_storage.get(), configuration.container, configuration.blob_path, - predicate, storage->virtual_columns, context, nullptr, context->getFileProgressCallback()); + predicate, storage->getVirtualsList(), context, nullptr, context->getFileProgressCallback()); } else { iterator_wrapper = std::make_shared( storage->object_storage.get(), configuration.container, configuration.blobs_paths, - predicate, storage->virtual_columns, context, nullptr, context->getFileProgressCallback()); + predicate, storage->getVirtualsList(), context, nullptr, context->getFileProgressCallback()); } } @@ -877,16 +885,6 @@ SinkToStoragePtr StorageAzureBlob::write(const ASTPtr & query, const StorageMeta } } -NamesAndTypesList StorageAzureBlob::getVirtuals() const -{ - return virtual_columns; -} - -Names StorageAzureBlob::getVirtualColumnNames() -{ - return VirtualColumnUtils::getPathFileAndSizeVirtualsForStorage({}).getNames(); -} - bool StorageAzureBlob::supportsPartitionBy() const { return true; @@ -913,7 +911,7 @@ StorageAzureBlobSource::GlobIterator::GlobIterator( String blob_path_with_globs_, const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns_, - ContextPtr context_, + const ContextPtr & context_, RelativePathsWithMetadata * outer_blobs_, std::function file_progress_callback_) : IIterator(context_) @@ -1028,7 +1026,7 @@ StorageAzureBlobSource::KeysIterator::KeysIterator( const Strings & keys_, const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns_, - ContextPtr context_, + const ContextPtr & context_, RelativePathsWithMetadata * outer_blobs, std::function file_progress_callback) : IIterator(context_) @@ -1147,7 +1145,7 @@ StorageAzureBlobSource::StorageAzureBlobSource( const ReadFromFormatInfo & info, const String & format_, String name_, - ContextPtr context_, + const ContextPtr & context_, std::optional format_settings_, UInt64 max_block_size_, String compression_hint_, @@ -1290,6 +1288,7 @@ namespace ReadBufferIterator( const std::shared_ptr & file_iterator_, AzureObjectStorage * object_storage_, + std::optional format_, const StorageAzureBlob::Configuration & configuration_, const std::optional & format_settings_, const RelativePathsWithMetadata & read_keys_, @@ -1298,19 +1297,38 @@ namespace , file_iterator(file_iterator_) , object_storage(object_storage_) , configuration(configuration_) + , format(std::move(format_)) , format_settings(format_settings_) , read_keys(read_keys_) , prev_read_keys_size(read_keys_.size()) { } - std::pair, std::optional> next() override + Data next() override { /// For default mode check cached columns for currently read keys on first iteration. - if (first && getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::DEFAULT) + if (first) { - if (auto cached_columns = tryGetColumnsFromCache(read_keys.begin(), read_keys.end())) - return {nullptr, cached_columns}; + /// If format is unknown we iterate through all currently read keys on first iteration and + /// try to determine format by file name. + if (!format) + { + for (const auto & key : read_keys) + { + if (auto format_from_path = FormatFactory::instance().tryGetFormatFromFileName(key.relative_path)) + { + format = format_from_path; + break; + } + } + } + + /// For default mode check cached columns for currently read keys on first iteration. + if (getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::DEFAULT) + { + if (auto cached_columns = tryGetColumnsFromCache(read_keys.begin(), read_keys.end())) + return {nullptr, cached_columns, format}; + } } current_path_with_metadata = file_iterator->next(); @@ -1318,29 +1336,55 @@ namespace if (current_path_with_metadata.relative_path.empty()) { if (first) - throw Exception( - ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, - "Cannot extract table structure from {} format file, because there are no files with provided path " - "in AzureBlobStorage. You must specify table structure manually", configuration.format); + { + if (format) + throw Exception( + ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, + "The table structure cannot be extracted from a {} format file, because there are no files with provided path " + "in AzureBlobStorage. You can specify table structure manually", *format); - return {nullptr, std::nullopt}; + throw Exception( + ErrorCodes::CANNOT_DETECT_FORMAT, + "The data format cannot be detected by the contents of the files, because there are no files with provided path " + "in AzureBlobStorage. You can specify table structure manually"); + } + + return {nullptr, std::nullopt, format}; } first = false; - /// AzureBlobStorage file iterator could get new keys after new iteration, check them in schema cache if schema inference mode is default. - if (getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::DEFAULT && read_keys.size() > prev_read_keys_size) + /// AzureBlobStorage file iterator could get new keys after new iteration. + if (read_keys.size() > prev_read_keys_size) { - auto columns_from_cache = tryGetColumnsFromCache(read_keys.begin() + prev_read_keys_size, read_keys.end()); + /// If format is unknown we can try to determine it by new file names. + if (!format) + { + for (auto it = read_keys.begin() + prev_read_keys_size; it != read_keys.end(); ++it) + { + if (auto format_from_file_name = FormatFactory::instance().tryGetFormatFromFileName((*it).relative_path)) + { + format = format_from_file_name; + break; + } + } + } + /// Check new files in schema cache if schema inference mode is default. + if (getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::DEFAULT) + { + auto columns_from_cache = tryGetColumnsFromCache(read_keys.begin() + prev_read_keys_size, read_keys.end()); + if (columns_from_cache) + return {nullptr, columns_from_cache, format}; + } + prev_read_keys_size = read_keys.size(); - if (columns_from_cache) - return {nullptr, columns_from_cache}; } - else if (getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::UNION) + + if (getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::UNION) { RelativePathsWithMetadata paths = {current_path_with_metadata}; if (auto columns_from_cache = tryGetColumnsFromCache(paths.begin(), paths.end())) - return {nullptr, columns_from_cache}; + return {nullptr, columns_from_cache, format}; } first = false; @@ -1348,7 +1392,7 @@ namespace return {wrapReadBufferWithCompressionMethod( object_storage->readObject(StoredObject(current_path_with_metadata.relative_path), getContext()->getReadSettings(), {}, current_path_with_metadata.metadata.size_bytes), chooseCompressionMethod(current_path_with_metadata.relative_path, configuration.compression_method), - zstd_window_log_max), std::nullopt}; + zstd_window_log_max), std::nullopt, format}; } void setNumRowsToLastFile(size_t num_rows) override @@ -1357,7 +1401,7 @@ namespace return; String source = fs::path(configuration.connection_url) / configuration.container / current_path_with_metadata.relative_path; - auto key = getKeyForSchemaCache(source, configuration.format, format_settings, getContext()); + auto key = getKeyForSchemaCache(source, *format, format_settings, getContext()); StorageAzureBlob::getSchemaCache(getContext()).addNumRows(key, num_rows); } @@ -1368,7 +1412,7 @@ namespace return; String source = fs::path(configuration.connection_url) / configuration.container / current_path_with_metadata.relative_path; - auto key = getKeyForSchemaCache(source, configuration.format, format_settings, getContext()); + auto key = getKeyForSchemaCache(source, *format, format_settings, getContext()); StorageAzureBlob::getSchemaCache(getContext()).addColumns(key, columns); } @@ -1382,16 +1426,36 @@ namespace Strings sources; sources.reserve(read_keys.size()); std::transform(read_keys.begin(), read_keys.end(), std::back_inserter(sources), [&](const auto & elem){ return host_and_bucket + '/' + elem.relative_path; }); - auto cache_keys = getKeysForSchemaCache(sources, configuration.format, format_settings, getContext()); + auto cache_keys = getKeysForSchemaCache(sources, *format, format_settings, getContext()); StorageAzureBlob::getSchemaCache(getContext()).addManyColumns(cache_keys, columns); } + void setFormatName(const String & format_name) override + { + format = format_name; + } + String getLastFileName() const override { return current_path_with_metadata.relative_path; } + bool supportsLastReadBufferRecreation() const override { return true; } + + std::unique_ptr recreateLastReadBuffer() override + { + int zstd_window_log_max = static_cast(getContext()->getSettingsRef().zstd_window_log_max); + return wrapReadBufferWithCompressionMethod( + object_storage->readObject(StoredObject(current_path_with_metadata.relative_path), getContext()->getReadSettings(), {}, current_path_with_metadata.metadata.size_bytes), + chooseCompressionMethod(current_path_with_metadata.relative_path, configuration.compression_method), + zstd_window_log_max); + } + private: std::optional tryGetColumnsFromCache(const RelativePathsWithMetadata::const_iterator & begin, const RelativePathsWithMetadata::const_iterator & end) { - auto & schema_cache = StorageAzureBlob::getSchemaCache(getContext()); + auto context = getContext(); + if (!context->getSettingsRef().schema_inference_use_cache_for_azure) + return std::nullopt; + + auto & schema_cache = StorageAzureBlob::getSchemaCache(context); for (auto it = begin; it < end; ++it) { auto get_last_mod_time = [&] -> std::optional @@ -1403,10 +1467,28 @@ namespace auto host_and_bucket = configuration.connection_url + '/' + configuration.container; String source = host_and_bucket + '/' + it->relative_path; - auto cache_key = getKeyForSchemaCache(source, configuration.format, format_settings, getContext()); - auto columns = schema_cache.tryGetColumns(cache_key, get_last_mod_time); - if (columns) - return columns; + if (format) + { + auto cache_key = getKeyForSchemaCache(source, *format, format_settings, context); + if (auto columns = schema_cache.tryGetColumns(cache_key, get_last_mod_time)) + return columns; + } + else + { + /// If format is unknown, we can iterate through all possible input formats + /// and check if we have an entry with this format and this file in schema cache. + /// If we have such entry for some format, we can use this format to read the file. + for (const auto & format_name : FormatFactory::instance().getAllInputFormats()) + { + auto cache_key = getKeyForSchemaCache(source, format_name, format_settings, context); + if (auto columns = schema_cache.tryGetColumns(cache_key, get_last_mod_time)) + { + /// Now format is known. It should be the same for all files. + format = format_name; + return columns; + } + } + } } return std::nullopt; @@ -1415,6 +1497,7 @@ namespace std::shared_ptr file_iterator; AzureObjectStorage * object_storage; const StorageAzureBlob::Configuration & configuration; + std::optional format; const std::optional & format_settings; const RelativePathsWithMetadata & read_keys; size_t prev_read_keys_size; @@ -1423,21 +1506,16 @@ namespace }; } -ColumnsDescription StorageAzureBlob::getTableStructureFromData( +std::pair StorageAzureBlob::getTableStructureAndFormatFromDataImpl( + std::optional format, AzureObjectStorage * object_storage, const Configuration & configuration, const std::optional & format_settings, - ContextPtr ctx, - bool distributed_processing) + const ContextPtr & ctx) { RelativePathsWithMetadata read_keys; std::shared_ptr file_iterator; - if (distributed_processing) - { - file_iterator = std::make_shared(ctx, - ctx->getReadTaskCallback()); - } - else if (configuration.withGlobs()) + if (configuration.withGlobs()) { file_iterator = std::make_shared( object_storage, configuration.container, configuration.blob_path, nullptr, NamesAndTypesList{}, ctx, &read_keys); @@ -1448,8 +1526,28 @@ ColumnsDescription StorageAzureBlob::getTableStructureFromData( object_storage, configuration.container, configuration.blobs_paths, nullptr, NamesAndTypesList{}, ctx, &read_keys); } - ReadBufferIterator read_buffer_iterator(file_iterator, object_storage, configuration, format_settings, read_keys, ctx); - return readSchemaFromFormat(configuration.format, format_settings, read_buffer_iterator, configuration.withGlobs(), ctx); + ReadBufferIterator read_buffer_iterator(file_iterator, object_storage, format, configuration, format_settings, read_keys, ctx); + if (format) + return {readSchemaFromFormat(*format, format_settings, read_buffer_iterator, ctx), *format}; + return detectFormatAndReadSchema(format_settings, read_buffer_iterator, ctx); +} + +std::pair StorageAzureBlob::getTableStructureAndFormatFromData( + DB::AzureObjectStorage * object_storage, + const DB::StorageAzureBlob::Configuration & configuration, + const std::optional & format_settings, + const DB::ContextPtr & ctx) +{ + return getTableStructureAndFormatFromDataImpl(std::nullopt, object_storage, configuration, format_settings, ctx); +} + +ColumnsDescription StorageAzureBlob::getTableStructureFromData( + DB::AzureObjectStorage * object_storage, + const DB::StorageAzureBlob::Configuration & configuration, + const std::optional & format_settings, + const DB::ContextPtr & ctx) +{ + return getTableStructureAndFormatFromDataImpl(configuration.format, object_storage, configuration, format_settings, ctx).first; } SchemaCache & StorageAzureBlob::getSchemaCache(const ContextPtr & ctx) diff --git a/src/Storages/StorageAzureBlob.h b/src/Storages/StorageAzureBlob.h index 6fc3c5ce592..63fd489dcaf 100644 --- a/src/Storages/StorageAzureBlob.h +++ b/src/Storages/StorageAzureBlob.h @@ -31,9 +31,9 @@ public: String getPath() const { return blob_path; } - bool update(ContextPtr context); + bool update(const ContextPtr & context); - void connect(ContextPtr context); + void connect(const ContextPtr & context); bool withGlobs() const { return blob_path.find_first_of("*?{") != std::string::npos; } @@ -59,7 +59,7 @@ public: StorageAzureBlob( const Configuration & configuration_, std::unique_ptr && object_storage_, - ContextPtr context_, + const ContextPtr & context_, const StorageID & table_id_, const ColumnsDescription & columns_, const ConstraintsDescription & constraints_, @@ -68,10 +68,10 @@ public: bool distributed_processing_, ASTPtr partition_by_); - static StorageAzureBlob::Configuration getConfiguration(ASTs & engine_args, ContextPtr local_context); + static StorageAzureBlob::Configuration getConfiguration(ASTs & engine_args, const ContextPtr & local_context); static AzureClientPtr createClient(StorageAzureBlob::Configuration configuration, bool is_read_only); - static AzureObjectStorage::SettingsPtr createSettings(ContextPtr local_context); + static AzureObjectStorage::SettingsPtr createSettings(const ContextPtr & local_context); static void processNamedCollectionResult(StorageAzureBlob::Configuration & configuration, const NamedCollection & collection); @@ -94,9 +94,6 @@ public: void truncate(const ASTPtr & query, const StorageMetadataPtr & metadata_snapshot, ContextPtr local_context, TableExclusiveLockHolder &) override; - NamesAndTypesList getVirtuals() const override; - static Names getVirtualColumnNames(); - bool supportsPartitionBy() const override; bool supportsSubcolumns() const override { return true; } @@ -115,16 +112,27 @@ public: AzureObjectStorage * object_storage, const Configuration & configuration, const std::optional & format_settings, - ContextPtr ctx, - bool distributed_processing = false); + const ContextPtr & ctx); + + static std::pair getTableStructureAndFormatFromData( + AzureObjectStorage * object_storage, + const Configuration & configuration, + const std::optional & format_settings, + const ContextPtr & ctx); private: + static std::pair getTableStructureAndFormatFromDataImpl( + std::optional format, + AzureObjectStorage * object_storage, + const Configuration & configuration, + const std::optional & format_settings, + const ContextPtr & ctx); + friend class ReadFromAzureBlob; std::string name; Configuration configuration; std::unique_ptr object_storage; - NamesAndTypesList virtual_columns; const bool distributed_processing; std::optional format_settings; @@ -137,7 +145,7 @@ public: class IIterator : public WithContext { public: - IIterator(ContextPtr context_):WithContext(context_) {} + explicit IIterator(const ContextPtr & context_):WithContext(context_) {} virtual ~IIterator() = default; virtual RelativePathWithMetadata next() = 0; @@ -153,7 +161,7 @@ public: String blob_path_with_globs_, const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns_, - ContextPtr context_, + const ContextPtr & context_, RelativePathsWithMetadata * outer_blobs_, std::function file_progress_callback_ = {}); @@ -186,7 +194,7 @@ public: class ReadIterator : public IIterator { public: - explicit ReadIterator(ContextPtr context_, + explicit ReadIterator(const ContextPtr & context_, const ReadTaskCallback & callback_) : IIterator(context_), callback(callback_) { } RelativePathWithMetadata next() override @@ -207,7 +215,7 @@ public: const Strings & keys_, const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns_, - ContextPtr context_, + const ContextPtr & context_, RelativePathsWithMetadata * outer_blobs, std::function file_progress_callback = {}); @@ -229,7 +237,7 @@ public: const ReadFromFormatInfo & info, const String & format_, String name_, - ContextPtr context_, + const ContextPtr & context_, std::optional format_settings_, UInt64 max_block_size_, String compression_hint_, diff --git a/src/Storages/StorageAzureBlobCluster.cpp b/src/Storages/StorageAzureBlobCluster.cpp index 1d587512f38..a80d121567a 100644 --- a/src/Storages/StorageAzureBlobCluster.cpp +++ b/src/Storages/StorageAzureBlobCluster.cpp @@ -36,54 +36,56 @@ StorageAzureBlobCluster::StorageAzureBlobCluster( const StorageID & table_id_, const ColumnsDescription & columns_, const ConstraintsDescription & constraints_, - ContextPtr context_, - bool structure_argument_was_provided_) - : IStorageCluster(cluster_name_, table_id_, getLogger("StorageAzureBlobCluster (" + table_id_.table_name + ")"), structure_argument_was_provided_) + const ContextPtr & context) + : IStorageCluster(cluster_name_, table_id_, getLogger("StorageAzureBlobCluster (" + table_id_.table_name + ")")) , configuration{configuration_} , object_storage(std::move(object_storage_)) { - context_->getGlobalContext()->getRemoteHostFilter().checkURL(configuration_.getConnectionURL()); + context->getGlobalContext()->getRemoteHostFilter().checkURL(configuration_.getConnectionURL()); StorageInMemoryMetadata storage_metadata; if (columns_.empty()) { + ColumnsDescription columns; /// `format_settings` is set to std::nullopt, because StorageAzureBlobCluster is used only as table function - auto columns = StorageAzureBlob::getTableStructureFromData(object_storage.get(), configuration, /*format_settings=*/std::nullopt, context_, false); + if (configuration.format == "auto") + std::tie(columns, configuration.format) = StorageAzureBlob::getTableStructureAndFormatFromData(object_storage.get(), configuration, /*format_settings=*/std::nullopt, context); + else + columns = StorageAzureBlob::getTableStructureFromData(object_storage.get(), configuration, /*format_settings=*/std::nullopt, context); storage_metadata.setColumns(columns); } else + { + if (configuration.format == "auto") + configuration.format = StorageAzureBlob::getTableStructureAndFormatFromData(object_storage.get(), configuration, /*format_settings=*/std::nullopt, context).second; storage_metadata.setColumns(columns_); + } storage_metadata.setConstraints(constraints_); setInMemoryMetadata(storage_metadata); - - virtual_columns = VirtualColumnUtils::getPathFileAndSizeVirtualsForStorage(storage_metadata.getSampleBlock().getNamesAndTypesList()); + setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns())); } -void StorageAzureBlobCluster::addColumnsStructureToQuery(ASTPtr & query, const String & structure, const ContextPtr & context) +void StorageAzureBlobCluster::updateQueryToSendIfNeeded(DB::ASTPtr & query, const DB::StorageSnapshotPtr & storage_snapshot, const DB::ContextPtr & context) { ASTExpressionList * expression_list = extractTableFunctionArgumentsFromSelectQuery(query); if (!expression_list) throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected SELECT query from table function s3Cluster, got '{}'", queryToString(query)); - TableFunctionAzureBlobStorageCluster::addColumnsStructureToArguments(expression_list->children, structure, context); + TableFunctionAzureBlobStorageCluster::updateStructureAndFormatArgumentsIfNeeded( + expression_list->children, storage_snapshot->metadata->getColumns().getAll().toNamesAndTypesDescription(), configuration.format, context); } RemoteQueryExecutor::Extension StorageAzureBlobCluster::getTaskIteratorExtension(const ActionsDAG::Node * predicate, const ContextPtr & context) const { auto iterator = std::make_shared( object_storage.get(), configuration.container, configuration.blob_path, - predicate, virtual_columns, context, nullptr); + predicate, getVirtualsList(), context, nullptr); + auto callback = std::make_shared>([iterator]() mutable -> String{ return iterator->next().relative_path; }); return RemoteQueryExecutor::Extension{ .task_iterator = std::move(callback) }; } -NamesAndTypesList StorageAzureBlobCluster::getVirtuals() const -{ - return virtual_columns; -} - - } #endif diff --git a/src/Storages/StorageAzureBlobCluster.h b/src/Storages/StorageAzureBlobCluster.h index 2831b94f825..545e568a772 100644 --- a/src/Storages/StorageAzureBlobCluster.h +++ b/src/Storages/StorageAzureBlobCluster.h @@ -27,13 +27,10 @@ public: const StorageID & table_id_, const ColumnsDescription & columns_, const ConstraintsDescription & constraints_, - ContextPtr context_, - bool structure_argument_was_provided_); + const ContextPtr & context); std::string getName() const override { return "AzureBlobStorageCluster"; } - NamesAndTypesList getVirtuals() const override; - RemoteQueryExecutor::Extension getTaskIteratorExtension(const ActionsDAG::Node * predicate, const ContextPtr & context) const override; bool supportsSubcolumns() const override { return true; } @@ -43,10 +40,9 @@ public: private: void updateBeforeRead(const ContextPtr & /*context*/) override {} - void addColumnsStructureToQuery(ASTPtr & query, const String & structure, const ContextPtr & context) override; + void updateQueryToSendIfNeeded(ASTPtr & query, const StorageSnapshotPtr & storage_snapshot, const ContextPtr & context) override; StorageAzureBlob::Configuration configuration; - NamesAndTypesList virtual_columns; std::unique_ptr object_storage; }; diff --git a/src/Storages/StorageBuffer.cpp b/src/Storages/StorageBuffer.cpp index d5c135bb81d..2925038ec8e 100644 --- a/src/Storages/StorageBuffer.cpp +++ b/src/Storages/StorageBuffer.cpp @@ -1,40 +1,41 @@ -#include -#include #include +#include +#include #include #include -#include #include -#include -#include -#include -#include -#include -#include -#include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include #include +#include +#include +#include #include #include #include -#include -#include -#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include namespace ProfileEvents @@ -56,6 +57,9 @@ namespace CurrentMetrics { extern const Metric StorageBufferRows; extern const Metric StorageBufferBytes; + extern const Metric StorageBufferFlushThreads; + extern const Metric StorageBufferFlushThreadsActive; + extern const Metric StorageBufferFlushThreadsScheduled; } @@ -153,6 +157,12 @@ StorageBuffer::StorageBuffer( storage_metadata.setComment(comment); setInMemoryMetadata(storage_metadata); + if (num_shards > 1) + { + flush_pool = std::make_unique( + CurrentMetrics::StorageBufferFlushThreads, CurrentMetrics::StorageBufferFlushThreadsActive, CurrentMetrics::StorageBufferFlushThreadsScheduled, + num_shards, 0, num_shards); + } flush_handle = bg_pool.createTask(log->name() + "/Bg", [this]{ backgroundFlush(); }); } @@ -802,7 +812,22 @@ bool StorageBuffer::checkThresholdsImpl(bool direct, size_t rows, size_t bytes, void StorageBuffer::flushAllBuffers(bool check_thresholds) { for (auto & buf : buffers) - flushBuffer(buf, check_thresholds, false); + { + if (flush_pool) + { + scheduleFromThreadPool([&] () + { + flushBuffer(buf, check_thresholds, false); + }, *flush_pool, "BufferFlush"); + } + else + { + flushBuffer(buf, check_thresholds, false); + } + } + + if (flush_pool) + flush_pool->wait(); } diff --git a/src/Storages/StorageBuffer.h b/src/Storages/StorageBuffer.h index 47f6239b173..6c15c7e0238 100644 --- a/src/Storages/StorageBuffer.h +++ b/src/Storages/StorageBuffer.h @@ -3,6 +3,7 @@ #include #include #include +#include #include @@ -149,6 +150,7 @@ private: /// There are `num_shards` of independent buffers. const size_t num_shards; + std::unique_ptr flush_pool; std::vector buffers; const Thresholds min_thresholds; diff --git a/src/Storages/StorageDictionary.cpp b/src/Storages/StorageDictionary.cpp index 4c354371574..f716332dd24 100644 --- a/src/Storages/StorageDictionary.cpp +++ b/src/Storages/StorageDictionary.cpp @@ -316,7 +316,7 @@ void registerStorageDictionary(StorageFactory & factory) auto result_storage = std::make_shared(dictionary_id, abstract_dictionary_configuration, local_context); bool lazy_load = local_context->getConfigRef().getBool("dictionaries_lazy_load", true); - if (!args.attach && !lazy_load) + if (args.mode <= LoadingStrictnessLevel::CREATE && !lazy_load) { /// load() is called here to force loading the dictionary, wait until the loading is finished, /// and throw an exception if the loading is failed. @@ -335,7 +335,7 @@ void registerStorageDictionary(StorageFactory & factory) args.engine_args[0] = evaluateConstantExpressionOrIdentifierAsLiteral(args.engine_args[0], local_context); String dictionary_name = checkAndGetLiteralArgument(args.engine_args[0], "dictionary_name"); - if (!args.attach) + if (args.mode <= LoadingStrictnessLevel::CREATE) { const auto & dictionary = args.getContext()->getExternalDictionariesLoader().getDictionary(dictionary_name, args.getContext()); const DictionaryStructure & dictionary_structure = dictionary->getStructure(); diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index 86ed1d03b94..3b766ac8d26 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -19,6 +19,7 @@ #include #include #include +#include #include @@ -104,11 +105,8 @@ #include #include -#include - #include #include -#include #include @@ -290,22 +288,17 @@ size_t getClusterQueriedNodes(const Settings & settings, const ClusterPtr & clus StorageDistributed::~StorageDistributed() = default; -NamesAndTypesList StorageDistributed::getVirtuals() const +VirtualColumnsDescription StorageDistributed::createVirtuals() { - /// NOTE This is weird. Most of these virtual columns are part of MergeTree + /// NOTE: This is weird. + /// Most of these virtual columns are part of MergeTree /// tables info. But Distributed is general-purpose engine. - return NamesAndTypesList{ - NameAndTypePair("_table", std::make_shared(std::make_shared())), - NameAndTypePair("_part", std::make_shared(std::make_shared())), - NameAndTypePair("_part_index", std::make_shared()), - NameAndTypePair("_part_uuid", std::make_shared()), - NameAndTypePair("_partition_id", std::make_shared(std::make_shared())), - NameAndTypePair("_sample_factor", std::make_shared()), - NameAndTypePair("_part_offset", std::make_shared()), - NameAndTypePair("_row_exists", std::make_shared()), - NameAndTypePair(BlockNumberColumn::name, BlockNumberColumn::type), - NameAndTypePair("_shard_num", std::make_shared()), /// deprecated - }; + StorageInMemoryMetadata metadata; + auto desc = MergeTreeData::createVirtuals(metadata); + + desc.addEphemeral("_shard_num", std::make_shared(), "Deprecated. Use function shardNum instead"); + + return desc; } StorageDistributed::StorageDistributed( @@ -321,7 +314,7 @@ StorageDistributed::StorageDistributed( const String & storage_policy_name_, const String & relative_data_path_, const DistributedSettings & distributed_settings_, - bool attach_, + LoadingStrictnessLevel mode, ClusterPtr owned_cluster_, ASTPtr remote_table_function_ptr_) : IStorage(id_) @@ -354,6 +347,7 @@ StorageDistributed::StorageDistributed( storage_metadata.setConstraints(constraints_); storage_metadata.setComment(comment); setInMemoryMetadata(storage_metadata); + setVirtuals(createVirtuals()); if (sharding_key_) { @@ -372,7 +366,7 @@ StorageDistributed::StorageDistributed( } /// Sanity check. Skip check if the table is already created to allow the server to start. - if (!attach_) + if (mode <= LoadingStrictnessLevel::CREATE) { if (remote_database.empty() && !remote_table_function_ptr && !getCluster()->maybeCrossReplication()) LOG_WARNING(log, "Name of remote database is empty. Default database will be used implicitly."); @@ -397,7 +391,7 @@ StorageDistributed::StorageDistributed( const String & storage_policy_name_, const String & relative_data_path_, const DistributedSettings & distributed_settings_, - bool attach, + LoadingStrictnessLevel mode, ClusterPtr owned_cluster_) : StorageDistributed( id_, @@ -412,7 +406,7 @@ StorageDistributed::StorageDistributed( storage_policy_name_, relative_data_path_, distributed_settings_, - attach, + mode, std::move(owned_cluster_), remote_table_function_ptr_) { @@ -744,6 +738,32 @@ StorageSnapshotPtr StorageDistributed::getStorageSnapshotForQuery( namespace { +class ReplaseAliasColumnsVisitor : public InDepthQueryTreeVisitor +{ + static QueryTreeNodePtr getColumnNodeAliasExpression(const QueryTreeNodePtr & node) + { + const auto * column_node = node->as(); + if (!column_node || !column_node->hasExpression()) + return nullptr; + + const auto & column_source = column_node->getColumnSourceOrNull(); + if (!column_source || column_source->getNodeType() == QueryTreeNodeType::JOIN + || column_source->getNodeType() == QueryTreeNodeType::ARRAY_JOIN) + return nullptr; + + auto column_expression = column_node->getExpression(); + column_expression->setAlias(column_node->getColumnName()); + return column_expression; + } + +public: + void visitImpl(QueryTreeNodePtr & node) + { + if (auto column_expression = getColumnNodeAliasExpression(node)) + node = column_expression; + } +}; + QueryTreeNodePtr buildQueryTreeDistributed(SelectQueryInfo & query_info, const StorageSnapshotPtr & distributed_storage_snapshot, const StorageID & remote_storage_id, @@ -796,6 +816,8 @@ QueryTreeNodePtr buildQueryTreeDistributed(SelectQueryInfo & query_info, replacement_table_expression->setAlias(query_info.table_expression->getAlias()); auto query_tree_to_modify = query_info.query_tree->cloneAndReplace(query_info.table_expression, std::move(replacement_table_expression)); + ReplaseAliasColumnsVisitor replase_alias_columns_visitor; + replase_alias_columns_visitor.visit(query_tree_to_modify); return buildQueryTreeForShard(query_info.planner_context, query_tree_to_modify); } @@ -941,7 +963,7 @@ SinkToStoragePtr StorageDistributed::write(const ASTPtr &, const StorageMetadata else columns_to_send = metadata_snapshot->getSampleBlockNonMaterialized().getNames(); - /// DistributedSink will not own cluster, but will own ConnectionPools of the cluster + /// DistributedSink will not own cluster return std::make_shared( local_context, *this, metadata_snapshot, cluster, insert_sync, timeout, StorageID{remote_database, remote_table}, columns_to_send); @@ -1097,7 +1119,7 @@ static ActionsDAGPtr getFilterFromQuery(const ASTPtr & ast, ContextPtr context) if (!source) return nullptr; - return ActionsDAG::buildFilterActionsDAG(source->getFilterNodes().nodes); + return source->getFilterActionsDAG(); } @@ -1566,32 +1588,9 @@ ClusterPtr StorageDistributed::skipUnusedShardsWithAnalyzer( [[maybe_unused]] const StorageSnapshotPtr & storage_snapshot, ContextPtr local_context) const { - - ActionsDAG::NodeRawConstPtrs nodes; - - const auto & prewhere_info = query_info.prewhere_info; - if (prewhere_info) - { - { - const auto & node = prewhere_info->prewhere_actions->findInOutputs(prewhere_info->prewhere_column_name); - nodes.push_back(&node); - } - - if (prewhere_info->row_level_filter) - { - const auto & node = prewhere_info->row_level_filter->findInOutputs(prewhere_info->row_level_column_name); - nodes.push_back(&node); - } - } - - if (query_info.filter_actions_dag) - nodes.push_back(query_info.filter_actions_dag->getOutputs().at(0)); - - if (nodes.empty()) + if (!query_info.filter_actions_dag) return nullptr; - auto filter_actions_dag = ActionsDAG::buildFilterActionsDAG(nodes); - size_t limit = local_context->getSettingsRef().optimize_skip_unused_shards_limit; if (!limit || limit > SSIZE_MAX) { @@ -1605,7 +1604,7 @@ ClusterPtr StorageDistributed::skipUnusedShardsWithAnalyzer( ErrorCodes::LOGICAL_ERROR, "Cannot find sharding key column {} in expression {}", sharding_key_column_name, sharding_key_dag.dumpDAG()); - const auto * predicate = filter_actions_dag->getOutputs().at(0); + const auto * predicate = query_info.filter_actions_dag->getOutputs().at(0); const auto variants = evaluateExpressionOverConstantCondition(predicate, {expr_node}, local_context, limit); // Can't get a definite answer if we can skip any shards @@ -1915,7 +1914,7 @@ void registerStorageDistributed(StorageFactory & factory) } /// TODO: move some arguments from the arguments to the SETTINGS. - DistributedSettings distributed_settings; + DistributedSettings distributed_settings = context->getDistributedSettings(); if (args.storage_def->settings) { distributed_settings.loadFromQuery(*args.storage_def); @@ -1955,7 +1954,7 @@ void registerStorageDistributed(StorageFactory & factory) storage_policy, args.relative_data_path, distributed_settings, - args.attach); + args.mode); }, { .supports_settings = true, diff --git a/src/Storages/StorageDistributed.h b/src/Storages/StorageDistributed.h index 161a5983f94..323646ab911 100644 --- a/src/Storages/StorageDistributed.h +++ b/src/Storages/StorageDistributed.h @@ -58,7 +58,7 @@ public: const String & storage_policy_name_, const String & relative_data_path_, const DistributedSettings & distributed_settings_, - bool attach_, + LoadingStrictnessLevel mode, ClusterPtr owned_cluster_ = {}, ASTPtr remote_table_function_ptr_ = {}); @@ -73,7 +73,7 @@ public: const String & storage_policy_name_, const String & relative_data_path_, const DistributedSettings & distributed_settings_, - bool attach, + LoadingStrictnessLevel mode, ClusterPtr owned_cluster_ = {}); ~StorageDistributed() override; @@ -146,8 +146,6 @@ public: ActionLock getActionLock(StorageActionBlockType type) override; - NamesAndTypesList getVirtuals() const override; - /// Used by InterpreterInsertQuery std::string getRemoteDatabaseName() const { return remote_database; } std::string getRemoteTableName() const { return remote_table; } @@ -156,7 +154,6 @@ public: /// Used by InterpreterSystemQuery void flushClusterNodesAllData(ContextPtr context); - /// Used by ClusterCopier size_t getShardCount() const; bool initializeDiskOnConfigChange(const std::set & new_added_disks) override; @@ -234,6 +231,8 @@ private: std::optional distributedWriteFromClusterStorage(const IStorageCluster & src_storage_cluster, const ASTInsertQuery & query, ContextPtr context) const; std::optional distributedWriteBetweenDistributedTables(const StorageDistributed & src_distributed, const ASTInsertQuery & query, ContextPtr context) const; + static VirtualColumnsDescription createVirtuals(); + String remote_database; String remote_table; ASTPtr remote_table_function_ptr; diff --git a/src/Storages/StorageDummy.cpp b/src/Storages/StorageDummy.cpp index e2396a54acb..0525a004099 100644 --- a/src/Storages/StorageDummy.cpp +++ b/src/Storages/StorageDummy.cpp @@ -10,9 +10,9 @@ namespace DB { -StorageDummy::StorageDummy(const StorageID & table_id_, const ColumnsDescription & columns_, ColumnsDescription object_columns_) - : IStorage(table_id_) - , object_columns(std::move(object_columns_)) +StorageDummy::StorageDummy( + const StorageID & table_id_, const ColumnsDescription & columns_, const StorageSnapshotPtr & original_storage_snapshot_) + : IStorage(table_id_), original_storage_snapshot(original_storage_snapshot_) { StorageInMemoryMetadata storage_metadata; storage_metadata.setColumns(columns_); @@ -31,23 +31,38 @@ QueryProcessingStage::Enum StorageDummy::getQueryProcessingStage( void StorageDummy::read(QueryPlan & query_plan, const Names & column_names, const StorageSnapshotPtr & storage_snapshot, - SelectQueryInfo &, - ContextPtr, + SelectQueryInfo & query_info, + ContextPtr local_context, QueryProcessingStage::Enum, size_t, size_t) { - query_plan.addStep(std::make_unique(*this, storage_snapshot, column_names)); + query_plan.addStep(std::make_unique( + column_names, + query_info, + original_storage_snapshot ? original_storage_snapshot : storage_snapshot, + local_context, + *this)); } -ReadFromDummy::ReadFromDummy(const StorageDummy & storage_, - StorageSnapshotPtr storage_snapshot_, - Names column_names_) - : SourceStepWithFilter(DataStream{.header = storage_snapshot_->getSampleBlockForColumns(column_names_)}) +ReadFromDummy::ReadFromDummy( + const Names & column_names_, + const SelectQueryInfo & query_info_, + const StorageSnapshotPtr & storage_snapshot_, + const ContextPtr & context_, + const StorageDummy & storage_) + : SourceStepWithFilter( + DataStream{ + .header = SourceStepWithFilter::applyPrewhereActions( + storage_snapshot_->getSampleBlockForColumns(column_names_), query_info_.prewhere_info)}, + column_names_, + query_info_, + storage_snapshot_, + context_) , storage(storage_) - , storage_snapshot(std::move(storage_snapshot_)) - , column_names(std::move(column_names_)) -{} + , column_names(column_names_) +{ +} void ReadFromDummy::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) { diff --git a/src/Storages/StorageDummy.h b/src/Storages/StorageDummy.h index aa2201a196b..e9d8f90f755 100644 --- a/src/Storages/StorageDummy.h +++ b/src/Storages/StorageDummy.h @@ -11,7 +11,8 @@ namespace DB class StorageDummy final : public IStorage { public: - StorageDummy(const StorageID & table_id_, const ColumnsDescription & columns_, ColumnsDescription object_columns_ = {}); + StorageDummy( + const StorageID & table_id_, const ColumnsDescription & columns_, const StorageSnapshotPtr & original_storage_snapshot_ = nullptr); std::string getName() const override { return "StorageDummy"; } @@ -20,7 +21,15 @@ public: bool supportsPrewhere() const override { return true; } bool supportsSubcolumns() const override { return true; } bool supportsDynamicSubcolumns() const override { return true; } - bool canMoveConditionsToPrewhere() const override { return false; } + bool canMoveConditionsToPrewhere() const override + { + return original_storage_snapshot ? original_storage_snapshot->storage.canMoveConditionsToPrewhere() : false; + } + + bool hasEvenlyDistributedRead() const override + { + return original_storage_snapshot ? original_storage_snapshot->storage.hasEvenlyDistributedRead() : false; + } StorageSnapshotPtr getStorageSnapshot(const StorageMetadataPtr & metadata_snapshot, ContextPtr /*query_context*/) const override { @@ -42,16 +51,23 @@ public: QueryProcessingStage::Enum processed_stage, size_t max_block_size, size_t num_streams) override; + private: const ColumnsDescription object_columns; + + /// The original storage snapshot which is replaced during planning. See collectFiltersForAnalysis for example. + StorageSnapshotPtr original_storage_snapshot; }; class ReadFromDummy final : public SourceStepWithFilter { public: - explicit ReadFromDummy(const StorageDummy & storage_, - StorageSnapshotPtr storage_snapshot_, - Names column_names_); + explicit ReadFromDummy( + const Names & column_names_, + const SelectQueryInfo & query_info_, + const StorageSnapshotPtr & storage_snapshot_, + const ContextPtr & context_, + const StorageDummy & storage_); const StorageDummy & getStorage() const { @@ -74,7 +90,6 @@ public: private: const StorageDummy & storage; - StorageSnapshotPtr storage_snapshot; Names column_names; }; diff --git a/src/Storages/StorageFactory.cpp b/src/Storages/StorageFactory.cpp index f9bc25ef72c..d1968daa1f1 100644 --- a/src/Storages/StorageFactory.cpp +++ b/src/Storages/StorageFactory.cpp @@ -62,7 +62,7 @@ StoragePtr StorageFactory::get( ContextMutablePtr context, const ColumnsDescription & columns, const ConstraintsDescription & constraints, - bool has_force_restore_data_flag) const + LoadingStrictnessLevel mode) const { String name, comment; @@ -216,8 +216,7 @@ StoragePtr StorageFactory::get( .context = context, .columns = columns, .constraints = constraints, - .attach = query.attach, - .has_force_restore_data_flag = has_force_restore_data_flag, + .mode = mode, .comment = comment}; assert(arguments.getContext() == arguments.getContext()->getGlobalContext()); diff --git a/src/Storages/StorageFactory.h b/src/Storages/StorageFactory.h index 7b1d7235bac..50ace6aaad7 100644 --- a/src/Storages/StorageFactory.h +++ b/src/Storages/StorageFactory.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include #include @@ -43,8 +44,7 @@ public: ContextWeakMutablePtr context; const ColumnsDescription & columns; const ConstraintsDescription & constraints; - bool attach; - bool has_force_restore_data_flag; + LoadingStrictnessLevel mode; const String & comment; ContextMutablePtr getContext() const; @@ -87,7 +87,7 @@ public: ContextMutablePtr context, const ColumnsDescription & columns, const ConstraintsDescription & constraints, - bool has_force_restore_data_flag) const; + LoadingStrictnessLevel mode) const; /// Register a table engine by its name. /// No locking, you must register all engines before usage of get. diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index 0d9e79d1d54..0d220f2fd5d 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -26,6 +26,8 @@ #include #include #include +#include +#include #include #include @@ -90,7 +92,9 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; extern const int CANNOT_APPEND_TO_FILE; extern const int CANNOT_EXTRACT_TABLE_STRUCTURE; + extern const int CANNOT_DETECT_FORMAT; extern const int CANNOT_COMPILE_REGEXP; + extern const int UNSUPPORTED_METHOD; } namespace @@ -275,6 +279,22 @@ std::unique_ptr selectReadBuffer( ProfileEvents::increment(ProfileEvents::CreatedReadBufferOrdinary); } + else if (read_method == LocalFSReadMethod::io_uring && !use_table_fd) + { +#if USE_LIBURING + auto & reader = context->getIOURingReader(); + if (!reader.isSupported()) + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "io_uring is not supported by this system"); + + res = std::make_unique( + reader, + Priority{}, + current_path, + context->getSettingsRef().max_read_buffer_size); +#else + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Read method io_uring is only supported in Linux"); +#endif + } else { if (use_table_fd) @@ -328,7 +348,7 @@ std::unique_ptr createReadBuffer( } -Strings StorageFile::getPathsList(const String & table_path, const String & user_files_path, ContextPtr context, size_t & total_bytes_to_read) +Strings StorageFile::getPathsList(const String & table_path, const String & user_files_path, const ContextPtr & context, size_t & total_bytes_to_read) { fs::path user_files_absolute_path = fs::weakly_canonical(user_files_path); fs::path fs_table_path(table_path); @@ -375,27 +395,44 @@ namespace public: ReadBufferFromFileIterator( const std::vector & paths_, - const String & format_, + std::optional format_, const String & compression_method_, const std::optional & format_settings_, - ContextPtr context_) + const ContextPtr & context_) : WithContext(context_) , paths(paths_) - , format(format_) + , format(std::move(format_)) , compression_method(compression_method_) , format_settings(format_settings_) { } - std::pair, std::optional> next() override + Data next() override { bool is_first = current_index == 0; - /// For default mode check cached columns for all paths on first iteration. - /// If we have cached columns, next() won't be called again. - if (is_first && getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::DEFAULT) + if (is_first) { - if (auto cached_columns = tryGetColumnsFromCache(paths)) - return {nullptr, cached_columns}; + /// If format is unknown we iterate through all paths on first iteration and + /// try to determine format by file name. + if (!format) + { + for (const auto & path : paths) + { + if (auto format_from_path = FormatFactory::instance().tryGetFormatFromFileName(path)) + { + format = format_from_path; + break; + } + } + } + + /// For default mode check cached columns for all paths on first iteration. + /// If we have cached columns, next() won't be called again. + if (getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::DEFAULT) + { + if (auto cached_columns = tryGetColumnsFromCache(paths)) + return {nullptr, cached_columns, format}; + } } String path; @@ -406,11 +443,18 @@ namespace if (current_index == paths.size()) { if (is_first) + { + if (format) + throw Exception( + ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, + "The table structure cannot be extracted from a {} format file, because all files are empty. You can specify the format manually", + *format); + throw Exception( - ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, - "Cannot extract table structure from {} format file, because all files are empty. You must specify table structure manually", - format); - return {nullptr, std::nullopt}; + ErrorCodes::CANNOT_DETECT_FORMAT, + "The data format cannot be detected by the contents of the files, because all files are empty. You can specify table structure manually"); + } + return {nullptr, std::nullopt, std::nullopt}; } path = paths[current_index++]; @@ -421,10 +465,10 @@ namespace if (getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::UNION) { if (auto cached_columns = tryGetColumnsFromCache({path})) - return {nullptr, cached_columns}; + return {nullptr, cached_columns, format}; } - return {createReadBuffer(path, file_stat, false, -1, compression_method, getContext()), std::nullopt}; + return {createReadBuffer(path, file_stat, false, -1, compression_method, getContext()), std::nullopt, format}; } void setNumRowsToLastFile(size_t num_rows) override @@ -432,7 +476,7 @@ namespace if (!getContext()->getSettingsRef().use_cache_for_count_from_files) return; - auto key = getKeyForSchemaCache(paths[current_index - 1], format, format_settings, getContext()); + auto key = getKeyForSchemaCache(paths[current_index - 1], *format, format_settings, getContext()); StorageFile::getSchemaCache(getContext()).addNumRows(key, num_rows); } @@ -444,7 +488,7 @@ namespace /// For union mode, schema can be different for different files, so we need to /// cache last inferred schema only for last processed file. - auto cache_key = getKeyForSchemaCache(paths[current_index - 1], format, format_settings, getContext()); + auto cache_key = getKeyForSchemaCache(paths[current_index - 1], *format, format_settings, getContext()); StorageFile::getSchemaCache(getContext()).addColumns(cache_key, columns); } @@ -455,7 +499,7 @@ namespace return; /// For default mode we cache resulting schema for all paths. - auto cache_keys = getKeysForSchemaCache(paths, format, format_settings, getContext()); + auto cache_keys = getKeysForSchemaCache(paths, *format, format_settings, getContext()); StorageFile::getSchemaCache(getContext()).addManyColumns(cache_keys, columns); } @@ -466,14 +510,30 @@ namespace return ""; } + void setFormatName(const String & format_name) override + { + format = format_name; + } + + bool supportsLastReadBufferRecreation() const override { return true; } + + std::unique_ptr recreateLastReadBuffer() override + { + chassert(current_index > 0 && current_index <= paths.size()); + auto path = paths[current_index - 1]; + auto file_stat = getFileStat(path, false, -1, "File"); + return createReadBuffer(path, file_stat, false, -1, compression_method, getContext()); + } + private: std::optional tryGetColumnsFromCache(const Strings & paths_) { - if (!getContext()->getSettingsRef().schema_inference_use_cache_for_file) + auto context = getContext(); + if (!context->getSettingsRef().schema_inference_use_cache_for_file) return std::nullopt; /// Check if the cache contains one of the paths. - auto & schema_cache = StorageFile::getSchemaCache(getContext()); + auto & schema_cache = StorageFile::getSchemaCache(context); struct stat file_stat{}; for (const auto & path : paths_) { @@ -485,10 +545,28 @@ namespace return file_stat.st_mtime; }; - auto cache_key = getKeyForSchemaCache(path, format, format_settings, getContext()); - auto columns = schema_cache.tryGetColumns(cache_key, get_last_mod_time); - if (columns) - return columns; + if (format) + { + auto cache_key = getKeyForSchemaCache(path, *format, format_settings, context); + if (auto columns = schema_cache.tryGetColumns(cache_key, get_last_mod_time)) + return columns; + } + else + { + /// If format is unknown, we can iterate through all possible input formats + /// and check if we have an entry with this format and this file in schema cache. + /// If we have such entry for some format, we can use this format to read the file. + for (const auto & format_name : FormatFactory::instance().getAllInputFormats()) + { + auto cache_key = getKeyForSchemaCache(path, format_name, format_settings, context); + if (auto columns = schema_cache.tryGetColumns(cache_key, get_last_mod_time)) + { + /// Now format is known. It should be the same for all files. + format = format_name; + return columns; + } + } + } } return std::nullopt; @@ -497,7 +575,7 @@ namespace const std::vector & paths; size_t current_index = 0; - String format; + std::optional format; String compression_method; const std::optional & format_settings; }; @@ -507,17 +585,17 @@ namespace public: ReadBufferFromArchiveIterator( const StorageFile::ArchiveInfo & archive_info_, - const String & format_, + std::optional format_, const std::optional & format_settings_, - ContextPtr context_) + const ContextPtr & context_) : WithContext(context_) , archive_info(archive_info_) - , format(format_) + , format(std::move(format_)) , format_settings(format_settings_) { } - std::pair, std::optional> next() override + Data next() override { /// For default mode check cached columns for all initial archive paths (maybe with globs) on first iteration. /// If we have cached columns, next() won't be called again. @@ -525,8 +603,8 @@ namespace { for (const auto & archive : archive_info.paths_to_archives) { - if (auto cached_columns = tryGetColumnsFromSchemaCache(archive, archive_info.path_in_archive)) - return {nullptr, cached_columns}; + if (auto cached_schema = tryGetSchemaFromCache(archive, fmt::format("{}::{}", archive, archive_info.path_in_archive))) + return {nullptr, cached_schema, format}; } } @@ -536,12 +614,19 @@ namespace if (current_archive_index == archive_info.paths_to_archives.size()) { if (is_first) - throw Exception( - ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, - "Cannot extract table structure from {} format file, because all files are empty. You must specify table structure manually", - format); + { + if (format) + throw Exception( + ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, + "The table structure cannot be extracted from a {} format file, because all files are empty. You can specify table structure manually", + *format); - return {nullptr, std::nullopt}; + throw Exception( + ErrorCodes::CANNOT_DETECT_FORMAT, + "The data format cannot be detected by the contents of the files, because all files are empty. You can specify the format manually"); + } + + return {nullptr, std::nullopt, format}; } const auto & archive = archive_info.paths_to_archives[current_archive_index]; @@ -555,11 +640,18 @@ namespace continue; } + if (format) + throw Exception( + ErrorCodes::CANNOT_DETECT_FORMAT, + "The table structure cannot be extracted from a {} format file, because the archive {} is empty. " + "You can specify table structure manually", + *format, + archive); + throw Exception( - ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, - "Cannot extract table structure from {} format file, because the archive {} is empty. " - "You must specify table structure manually", - format, + ErrorCodes::CANNOT_DETECT_FORMAT, + "The data format cannot be detected by the contents of the files, because the archive {} is empty. " + "You can specify the format manually", archive); } @@ -575,8 +667,8 @@ namespace last_read_file_path = paths_for_schema_cache.emplace_back(fmt::format("{}::{}", archive_reader->getPath(), archive_info.path_in_archive)); is_first = false; - if (auto cached_columns = tryGetColumnsFromSchemaCache(archive, last_read_file_path)) - return {nullptr, cached_columns}; + if (auto cached_schema = tryGetSchemaFromCache(archive, last_read_file_path)) + return {nullptr, cached_schema, format}; } else { @@ -612,13 +704,20 @@ namespace last_read_file_path = paths_for_schema_cache.emplace_back(fmt::format("{}::{}", archive_reader->getPath(), *filename)); is_first = false; - if (auto cached_columns = tryGetColumnsFromSchemaCache(archive, last_read_file_path)) + /// If format is unknown we can try to determine it by the file name. + if (!format) + { + if (auto format_from_file = FormatFactory::instance().tryGetFormatFromFileName(*filename)) + format = format_from_file; + } + + if (auto cached_schema = tryGetSchemaFromCache(archive, last_read_file_path)) { /// For union mode next() will be called again even if we found cached columns, /// so we need to remember last_read_buffer to continue iterating through files in archive. if (getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::UNION) last_read_buffer = archive_reader->readFile(std::move(file_enumerator)); - return {nullptr, cached_columns}; + return {nullptr, cached_schema, format}; } read_buf = archive_reader->readFile(std::move(file_enumerator)); @@ -627,7 +726,7 @@ namespace break; } - return {std::move(read_buf), std::nullopt}; + return {std::move(read_buf), std::nullopt, format}; } void setPreviousReadBuffer(std::unique_ptr buffer) override @@ -641,7 +740,7 @@ namespace if (!getContext()->getSettingsRef().use_cache_for_count_from_files) return; - auto key = getKeyForSchemaCache(last_read_file_path, format, format_settings, getContext()); + auto key = getKeyForSchemaCache(last_read_file_path, *format, format_settings, getContext()); StorageFile::getSchemaCache(getContext()).addNumRows(key, num_rows); } @@ -654,7 +753,7 @@ namespace /// For union mode, schema can be different for different files in archive, so we need to /// cache last inferred schema only for last processed file. auto & schema_cache = StorageFile::getSchemaCache(getContext()); - auto cache_key = getKeyForSchemaCache(last_read_file_path, format, format_settings, getContext()); + auto cache_key = getKeyForSchemaCache(last_read_file_path, *format, format_settings, getContext()); schema_cache.addColumns(cache_key, columns); } @@ -670,17 +769,42 @@ namespace for (const auto & archive : archive_info.paths_to_archives) paths_for_schema_cache.emplace_back(fmt::format("{}::{}", archive, archive_info.path_in_archive)); auto & schema_cache = StorageFile::getSchemaCache(getContext()); - auto cache_keys = getKeysForSchemaCache(paths_for_schema_cache, format, format_settings, getContext()); + auto cache_keys = getKeysForSchemaCache(paths_for_schema_cache, *format, format_settings, getContext()); schema_cache.addManyColumns(cache_keys, columns); } + void setFormatName(const String & format_name) override + { + format = format_name; + } + String getLastFileName() const override { return last_read_file_path; } + bool supportsLastReadBufferRecreation() const override { return true; } + + std::unique_ptr recreateLastReadBuffer() override + { + if (archive_info.isSingleFileRead()) + { + chassert(current_archive_index > 0 && current_archive_index <= archive_info.paths_to_archives.size()); + const auto & archive = archive_info.paths_to_archives[current_archive_index - 1]; + auto archive_reader = createArchiveReader(archive); + return archive_reader->readFile(archive_info.path_in_archive, false); + } + + chassert(current_archive_index >= 0 && current_archive_index < archive_info.paths_to_archives.size()); + const auto & archive = archive_info.paths_to_archives[current_archive_index]; + auto archive_reader = createArchiveReader(archive); + chassert(last_read_buffer); + file_enumerator = archive_reader->currentFile(std::move(last_read_buffer)); + return archive_reader->readFile(std::move(file_enumerator)); + } + private: - std::optional tryGetColumnsFromSchemaCache(const std::string & archive_path, const std::string & full_path) + std::optional tryGetSchemaFromCache(const std::string & archive_path, const std::string & full_path) { auto context = getContext(); if (!context->getSettingsRef().schema_inference_use_cache_for_file) @@ -696,11 +820,28 @@ namespace return file_stat.st_mtime; }; - auto cache_key = getKeyForSchemaCache(full_path, format, format_settings, context); - auto columns = schema_cache.tryGetColumns(cache_key, get_last_mod_time); - - if (columns) - return columns; + if (format) + { + auto cache_key = getKeyForSchemaCache(full_path, *format, format_settings, context); + if (auto columns = schema_cache.tryGetColumns(cache_key, get_last_mod_time)) + return columns; + } + else + { + /// If format is unknown, we can iterate through all possible input formats + /// and check if we have an entry with this format and this file in schema cache. + /// If we have such entry for some format, we can use this format to read the file. + for (const auto & format_name : FormatFactory::instance().getAllInputFormats()) + { + auto cache_key = getKeyForSchemaCache(full_path, format_name, format_settings, context); + if (auto columns = schema_cache.tryGetColumns(cache_key, get_last_mod_time)) + { + /// Now format is known. It should be the same for all files. + format = format_name; + return columns; + } + } + } return std::nullopt; } @@ -716,13 +857,13 @@ namespace std::unique_ptr file_enumerator; std::unique_ptr last_read_buffer; - String format; + std::optional format; const std::optional & format_settings; std::vector paths_for_schema_cache; }; } -ColumnsDescription StorageFile::getTableStructureFromFileDescriptor(ContextPtr context) +std::pair StorageFile::getTableStructureAndFormatFromFileDescriptor(std::optional format, const ContextPtr & context) { /// If we want to read schema from file descriptor we should create /// a read buffer from fd, create a checkpoint, read some data required @@ -739,22 +880,29 @@ ColumnsDescription StorageFile::getTableStructureFromFileDescriptor(ContextPtr c read_buf->setCheckpoint(); auto read_buffer_iterator = SingleReadBufferIterator(std::move(read_buf)); - auto columns = readSchemaFromFormat(format_name, format_settings, read_buffer_iterator, false, context, peekable_read_buffer_from_fd); + ColumnsDescription columns; + if (format) + columns = readSchemaFromFormat(*format, format_settings, read_buffer_iterator, context); + else + std::tie(columns, format) = detectFormatAndReadSchema(format_settings, read_buffer_iterator, context); + + peekable_read_buffer_from_fd = read_buffer_iterator.releaseBuffer(); if (peekable_read_buffer_from_fd) { /// If we have created read buffer in readSchemaFromFormat we should rollback to checkpoint. assert_cast(peekable_read_buffer_from_fd.get())->rollbackToCheckpoint(); has_peekable_read_buffer_from_fd = true; } - return columns; + + return {columns, *format}; } -ColumnsDescription StorageFile::getTableStructureFromFile( - const String & format, +std::pair StorageFile::getTableStructureAndFormatFromFileImpl( + std::optional format, const std::vector & paths, const String & compression_method, const std::optional & format_settings, - ContextPtr context, + const ContextPtr & context, const std::optional & archive_info) { if (format == "Distributed") @@ -762,29 +910,60 @@ ColumnsDescription StorageFile::getTableStructureFromFile( if (paths.empty()) throw Exception(ErrorCodes::INCORRECT_FILE_NAME, "Cannot get table structure from file, because no files match specified name"); - return ColumnsDescription(DistributedAsyncInsertSource(paths[0]).getOutputs().front().getHeader().getNamesAndTypesList()); + return {ColumnsDescription(DistributedAsyncInsertSource(paths[0]).getOutputs().front().getHeader().getNamesAndTypesList()), *format}; } if (((archive_info && archive_info->paths_to_archives.empty()) || (!archive_info && paths.empty())) - && !FormatFactory::instance().checkIfFormatHasExternalSchemaReader(format)) + && (!format || !FormatFactory::instance().checkIfFormatHasExternalSchemaReader(*format))) + { + if (format) + throw Exception( + ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, + "The table structure cannot be extracted from a {} format file, because there are no files with provided path. " + "You can specify table structure manually", *format); + throw Exception( ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, - "Cannot extract table structure from {} format file, because there are no files with provided path. " - "You must specify table structure manually", format); + "The data format cannot be detected by the contents of the files, because there are no files with provided path. " + "You can specify the format manually"); + + } if (archive_info) { ReadBufferFromArchiveIterator read_buffer_iterator(*archive_info, format, format_settings, context); - return readSchemaFromFormat( - format, - format_settings, - read_buffer_iterator, - /*retry=*/archive_info->paths_to_archives.size() > 1 || !archive_info->isSingleFileRead(), - context); + if (format) + return {readSchemaFromFormat(*format, format_settings, read_buffer_iterator, context), *format}; + + return detectFormatAndReadSchema(format_settings, read_buffer_iterator, context); } ReadBufferFromFileIterator read_buffer_iterator(paths, format, compression_method, format_settings, context); - return readSchemaFromFormat(format, format_settings, read_buffer_iterator, paths.size() > 1, context); + if (format) + return {readSchemaFromFormat(*format, format_settings, read_buffer_iterator, context), *format}; + + return detectFormatAndReadSchema(format_settings, read_buffer_iterator, context); +} + +ColumnsDescription StorageFile::getTableStructureFromFile( + const DB::String & format, + const std::vector & paths, + const DB::String & compression_method, + const std::optional & format_settings, + const ContextPtr & context, + const std::optional & archive_info) +{ + return getTableStructureAndFormatFromFileImpl(format, paths, compression_method, format_settings, context, archive_info).first; +} + +std::pair StorageFile::getTableStructureAndFormatFromFile( + const std::vector & paths, + const DB::String & compression_method, + const std::optional & format_settings, + const ContextPtr & context, + const std::optional & archive_info) +{ + return getTableStructureAndFormatFromFileImpl(std::nullopt, paths, compression_method, format_settings, context, archive_info); } bool StorageFile::supportsSubsetOfColumns(const ContextPtr & context) const @@ -875,7 +1054,7 @@ StorageFile::StorageFile(CommonArguments args) , compression_method(args.compression_method) , base_path(args.getContext()->getPath()) { - if (format_name != "Distributed") + if (format_name != "Distributed" && format_name != "auto") FormatFactory::instance().checkFormatName(format_name); } @@ -887,16 +1066,19 @@ void StorageFile::setStorageMetadata(CommonArguments args) { ColumnsDescription columns; if (use_table_fd) - columns = getTableStructureFromFileDescriptor(args.getContext()); + { + if (format_name == "auto") + std::tie(columns, format_name) = getTableStructureAndFormatFromFileDescriptor(std::nullopt, args.getContext()); + else + columns = getTableStructureAndFormatFromFileDescriptor(format_name, args.getContext()).first; + } else { - columns = getTableStructureFromFile( - format_name, - paths, - compression_method, - format_settings, - args.getContext(), - archive_info); + if (format_name == "auto") + std::tie(columns, format_name) = getTableStructureAndFormatFromFile(paths, compression_method, format_settings, args.getContext(), archive_info); + else + columns = getTableStructureFromFile(format_name, paths, compression_method, format_settings, args.getContext(), archive_info); + if (!args.columns.empty() && args.columns != columns) throw Exception(ErrorCodes::INCOMPATIBLE_COLUMNS, "Table structure and file structure are different"); } @@ -904,6 +1086,8 @@ void StorageFile::setStorageMetadata(CommonArguments args) } else { + if (format_name == "auto") + format_name = getTableStructureAndFormatFromFile(paths, compression_method, format_settings, args.getContext(), archive_info).second; /// We don't allow special columns in File storage. if (!args.columns.hasOnlyOrdinary()) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Table engine File doesn't support special columns like MATERIALIZED, ALIAS or EPHEMERAL"); @@ -913,12 +1097,11 @@ void StorageFile::setStorageMetadata(CommonArguments args) storage_metadata.setConstraints(args.constraints); storage_metadata.setComment(args.comment); setInMemoryMetadata(storage_metadata); - - virtual_columns = VirtualColumnUtils::getPathFileAndSizeVirtualsForStorage(storage_metadata.getSampleBlock().getNamesAndTypesList()); + setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns())); } -static std::chrono::seconds getLockTimeout(ContextPtr context) +static std::chrono::seconds getLockTimeout(const ContextPtr & context) { const Settings & settings = context->getSettingsRef(); Int64 lock_timeout = settings.lock_acquire_timeout.totalSeconds(); @@ -934,9 +1117,9 @@ StorageFileSource::FilesIterator::FilesIterator( std::optional archive_info_, const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns, - ContextPtr context_, + const ContextPtr & context_, bool distributed_processing_) - : files(files_), archive_info(std::move(archive_info_)), distributed_processing(distributed_processing_), context(context_) + : WithContext(context_), files(files_), archive_info(std::move(archive_info_)), distributed_processing(distributed_processing_) { ActionsDAGPtr filter_dag; if (!distributed_processing && !archive_info && !files.empty()) @@ -949,7 +1132,7 @@ StorageFileSource::FilesIterator::FilesIterator( String StorageFileSource::FilesIterator::next() { if (distributed_processing) - return context->getReadTaskCallback()(); + return getContext()->getReadTaskCallback()(); else { const auto & fs = isReadFromArchive() ? archive_info->paths_to_archives : files; @@ -973,12 +1156,12 @@ const String & StorageFileSource::FilesIterator::getFileNameInArchive() StorageFileSource::StorageFileSource( const ReadFromFormatInfo & info, std::shared_ptr storage_, - ContextPtr context_, + const ContextPtr & context_, UInt64 max_block_size_, FilesIteratorPtr files_iterator_, std::unique_ptr read_buf_, bool need_only_count_) - : SourceWithKeyCondition(info.source_header, false) + : SourceWithKeyCondition(info.source_header, false), WithContext(context_) , storage(std::move(storage_)) , files_iterator(std::move(files_iterator_)) , read_buf(std::move(read_buf_)) @@ -986,13 +1169,12 @@ StorageFileSource::StorageFileSource( , requested_columns(info.requested_columns) , requested_virtual_columns(info.requested_virtual_columns) , block_for_format(info.format_header) - , context(context_) , max_block_size(max_block_size_) , need_only_count(need_only_count_) { if (!storage->use_table_fd) { - shared_lock = std::shared_lock(storage->rwlock, getLockTimeout(context)); + shared_lock = std::shared_lock(storage->rwlock, getLockTimeout(getContext())); if (!shared_lock) throw Exception(ErrorCodes::TIMEOUT_EXCEEDED, "Lock timeout exceeded"); storage->readers_counter.fetch_add(1, std::memory_order_release); @@ -1009,7 +1191,7 @@ void StorageFileSource::beforeDestroy() if (std::uncaught_exceptions() == 0 && cnt == 1 && !storage->was_renamed) { shared_lock.unlock(); - auto exclusive_lock = std::unique_lock{storage->rwlock, getLockTimeout(context)}; + auto exclusive_lock = std::unique_lock{storage->rwlock, getLockTimeout(getContext())}; if (!exclusive_lock) return; @@ -1028,7 +1210,7 @@ void StorageFileSource::beforeDestroy() file_path = file_path.lexically_normal(); // Checking access rights - checkCreationIsAllowed(context, context->getUserFilesPath(), file_path, true); + checkCreationIsAllowed(getContext(), getContext()->getUserFilesPath(), file_path, true); // Checking an existing of new file if (fs::exists(file_path)) @@ -1053,15 +1235,15 @@ StorageFileSource::~StorageFileSource() beforeDestroy(); } -void StorageFileSource::setKeyCondition(const ActionsDAG::NodeRawConstPtrs & nodes, ContextPtr context_) +void StorageFileSource::setKeyCondition(const ActionsDAGPtr & filter_actions_dag, ContextPtr context_) { - setKeyConditionImpl(nodes, context_, block_for_format); + setKeyConditionImpl(filter_actions_dag, context_, block_for_format); } bool StorageFileSource::tryGetCountFromCache(const struct stat & file_stat) { - if (!context->getSettingsRef().use_cache_for_count_from_files) + if (!getContext()->getSettingsRef().use_cache_for_count_from_files) return false; auto num_rows_from_cache = tryGetNumRowsFromCache(current_path, file_stat.st_mtime); @@ -1103,7 +1285,7 @@ Chunk StorageFileSource::generate() return {}; auto file_stat = getFileStat(archive, storage->use_table_fd, storage->table_fd, storage->getName()); - if (context->getSettingsRef().engine_file_skip_empty_files && file_stat.st_size == 0) + if (getContext()->getSettingsRef().engine_file_skip_empty_files && file_stat.st_size == 0) continue; archive_reader = createArchiveReader(archive); @@ -1117,7 +1299,7 @@ Chunk StorageFileSource::generate() if (!read_buf) continue; - if (auto progress_callback = context->getFileProgressCallback()) + if (auto progress_callback = getContext()->getFileProgressCallback()) progress_callback(FileProgress(0, tryGetFileSizeFromReadBuffer(*read_buf).value_or(0))); } else @@ -1131,7 +1313,7 @@ Chunk StorageFileSource::generate() return {}; current_archive_stat = getFileStat(archive, storage->use_table_fd, storage->table_fd, storage->getName()); - if (context->getSettingsRef().engine_file_skip_empty_files && current_archive_stat.st_size == 0) + if (getContext()->getSettingsRef().engine_file_skip_empty_files && current_archive_stat.st_size == 0) continue; archive_reader = createArchiveReader(archive); @@ -1165,7 +1347,7 @@ Chunk StorageFileSource::generate() continue; read_buf = archive_reader->readFile(std::move(file_enumerator)); - if (auto progress_callback = context->getFileProgressCallback()) + if (auto progress_callback = getContext()->getFileProgressCallback()) progress_callback(FileProgress(0, tryGetFileSizeFromReadBuffer(*read_buf).value_or(0))); } } @@ -1191,16 +1373,16 @@ Chunk StorageFileSource::generate() file_stat = getFileStat(current_path, storage->use_table_fd, storage->table_fd, storage->getName()); current_file_size = file_stat.st_size; - if (context->getSettingsRef().engine_file_skip_empty_files && file_stat.st_size == 0) + if (getContext()->getSettingsRef().engine_file_skip_empty_files && file_stat.st_size == 0) continue; if (need_only_count && tryGetCountFromCache(file_stat)) continue; - read_buf = createReadBuffer(current_path, file_stat, storage->use_table_fd, storage->table_fd, storage->compression_method, context); + read_buf = createReadBuffer(current_path, file_stat, storage->use_table_fd, storage->table_fd, storage->compression_method, getContext()); } - const Settings & settings = context->getSettingsRef(); + const Settings & settings = getContext()->getSettingsRef(); size_t file_num = 0; if (storage->archive_info) @@ -1212,7 +1394,7 @@ Chunk StorageFileSource::generate() const auto max_parsing_threads = std::max(settings.max_threads / file_num, 1UL); input_format = FormatFactory::instance().getInput( - storage->format_name, *read_buf, block_for_format, context, max_block_size, storage->format_settings, + storage->format_name, *read_buf, block_for_format, getContext(), max_block_size, storage->format_settings, max_parsing_threads, std::nullopt, /*is_remote_fs*/ false, CompressionMethod::None, need_only_count); if (key_condition) @@ -1228,7 +1410,7 @@ Chunk StorageFileSource::generate() { builder.addSimpleTransform([&](const Block & header) { - return std::make_shared(header, columns_description, *input_format, context); + return std::make_shared(header, columns_description, *input_format, getContext()); }); } @@ -1265,7 +1447,7 @@ Chunk StorageFileSource::generate() if (storage->use_table_fd) finished_generate = true; - if (input_format && storage->format_name != "Distributed" && context->getSettingsRef().use_cache_for_count_from_files) + if (input_format && storage->format_name != "Distributed" && getContext()->getSettingsRef().use_cache_for_count_from_files) addNumRowsToCache(current_path, total_rows_in_file); total_rows_in_file = 0; @@ -1296,14 +1478,14 @@ Chunk StorageFileSource::generate() void StorageFileSource::addNumRowsToCache(const String & path, size_t num_rows) const { - auto key = getKeyForSchemaCache(path, storage->format_name, storage->format_settings, context); - StorageFile::getSchemaCache(context).addNumRows(key, num_rows); + auto key = getKeyForSchemaCache(path, storage->format_name, storage->format_settings, getContext()); + StorageFile::getSchemaCache(getContext()).addNumRows(key, num_rows); } std::optional StorageFileSource::tryGetNumRowsFromCache(const String & path, time_t last_mod_time) const { - auto & schema_cache = StorageFile::getSchemaCache(context); - auto key = getKeyForSchemaCache(path, storage->format_name, storage->format_settings, context); + auto & schema_cache = StorageFile::getSchemaCache(getContext()); + auto key = getKeyForSchemaCache(path, storage->format_name, storage->format_settings, getContext()); auto get_last_mod_time = [&]() -> std::optional { return last_mod_time; @@ -1317,21 +1499,23 @@ class ReadFromFile : public SourceStepWithFilter public: std::string getName() const override { return "ReadFromFile"; } void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override; - void applyFilters() override; + void applyFilters(ActionDAGNodes added_filter_nodes) override; ReadFromFile( + const Names & column_names_, + const SelectQueryInfo & query_info_, + const StorageSnapshotPtr & storage_snapshot_, + const ContextPtr & context_, Block sample_block, std::shared_ptr storage_, ReadFromFormatInfo info_, const bool need_only_count_, - ContextPtr context_, size_t max_block_size_, size_t num_streams_) - : SourceStepWithFilter(DataStream{.header = std::move(sample_block)}) + : SourceStepWithFilter(DataStream{.header = std::move(sample_block)}, column_names_, query_info_, storage_snapshot_, context_) , storage(std::move(storage_)) , info(std::move(info_)) , need_only_count(need_only_count_) - , context(std::move(context_)) , max_block_size(max_block_size_) , max_num_streams(num_streams_) { @@ -1342,7 +1526,6 @@ private: ReadFromFormatInfo info; const bool need_only_count; - ContextPtr context; size_t max_block_size; const size_t max_num_streams; @@ -1351,9 +1534,9 @@ private: void createIterator(const ActionsDAG::Node * predicate); }; -void ReadFromFile::applyFilters() +void ReadFromFile::applyFilters(ActionDAGNodes added_filter_nodes) { - auto filter_actions_dag = ActionsDAG::buildFilterActionsDAG(filter_nodes.nodes); + filter_actions_dag = ActionsDAG::buildFilterActionsDAG(added_filter_nodes.nodes); const ActionsDAG::Node * predicate = nullptr; if (filter_actions_dag) predicate = filter_actions_dag->getOutputs().at(0); @@ -1397,16 +1580,19 @@ void StorageFile::read( auto this_ptr = std::static_pointer_cast(shared_from_this()); - auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(context), getVirtuals()); + auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(context)); bool need_only_count = (query_info.optimize_trivial_count || read_from_format_info.requested_columns.empty()) && context->getSettingsRef().optimize_count_from_files; auto reading = std::make_unique( + column_names, + query_info, + storage_snapshot, + context, read_from_format_info.source_header, std::move(this_ptr), std::move(read_from_format_info), need_only_count, - context, max_block_size, num_streams); @@ -1422,7 +1608,7 @@ void ReadFromFile::createIterator(const ActionsDAG::Node * predicate) storage->paths, storage->archive_info, predicate, - storage->virtual_columns, + storage->getVirtualsList(), context, storage->distributed_processing); } @@ -1445,8 +1631,10 @@ void ReadFromFile::initializePipeline(QueryPipelineBuilder & pipeline, const Bui Pipes pipes; pipes.reserve(num_streams); + auto ctx = getContext(); + /// Set total number of bytes to process. For progress bar. - auto progress_callback = context->getFileProgressCallback(); + auto progress_callback = ctx->getFileProgressCallback(); if (progress_callback && !storage->archive_info) progress_callback(FileProgress(0, storage->total_bytes_to_read)); @@ -1464,20 +1652,20 @@ void ReadFromFile::initializePipeline(QueryPipelineBuilder & pipeline, const Bui auto source = std::make_shared( info, storage, - context, + ctx, max_block_size, files_iterator, std::move(read_buffer), need_only_count); - source->setKeyCondition(filter_nodes.nodes, context); + source->setKeyCondition(filter_actions_dag, ctx); pipes.emplace_back(std::move(source)); } auto pipe = Pipe::unitePipes(std::move(pipes)); size_t output_ports = pipe.numOutputPorts(); - const bool parallelize_output = context->getSettingsRef().parallelize_output_from_storages; - if (parallelize_output && storage->parallelizeOutputAfterReading(context) && output_ports > 0 && output_ports < max_num_streams) + const bool parallelize_output = ctx->getSettingsRef().parallelize_output_from_storages; + if (parallelize_output && storage->parallelizeOutputAfterReading(ctx) && output_ports > 0 && output_ports < max_num_streams) pipe.resize(max_num_streams); if (pipe.empty()) @@ -1490,7 +1678,7 @@ void ReadFromFile::initializePipeline(QueryPipelineBuilder & pipeline, const Bui } -class StorageFileSink final : public SinkToStorage +class StorageFileSink final : public SinkToStorage, WithContext { public: StorageFileSink( @@ -1503,9 +1691,9 @@ public: const CompressionMethod compression_method_, const std::optional & format_settings_, const String format_name_, - ContextPtr context_, + const ContextPtr & context_, int flags_) - : SinkToStorage(metadata_snapshot_->getSampleBlock()) + : SinkToStorage(metadata_snapshot_->getSampleBlock()), WithContext(context_) , metadata_snapshot(metadata_snapshot_) , table_name_for_log(table_name_for_log_) , table_fd(table_fd_) @@ -1515,7 +1703,6 @@ public: , compression_method(compression_method_) , format_name(format_name_) , format_settings(format_settings_) - , context(context_) , flags(flags_) { initialize(); @@ -1532,9 +1719,9 @@ public: const CompressionMethod compression_method_, const std::optional & format_settings_, const String format_name_, - ContextPtr context_, + const ContextPtr & context_, int flags_) - : SinkToStorage(metadata_snapshot_->getSampleBlock()) + : SinkToStorage(metadata_snapshot_->getSampleBlock()), WithContext(context_) , metadata_snapshot(metadata_snapshot_) , table_name_for_log(table_name_for_log_) , table_fd(table_fd_) @@ -1544,7 +1731,6 @@ public: , compression_method(compression_method_) , format_name(format_name_) , format_settings(format_settings_) - , context(context_) , flags(flags_) , lock(std::move(lock_)) { @@ -1568,7 +1754,7 @@ public: /// In case of formats with prefixes if file is not empty we have already written prefix. bool do_not_write_prefix = naked_buffer->size(); - const auto & settings = context->getSettingsRef(); + const auto & settings = getContext()->getSettingsRef(); write_buf = wrapWriteBufferWithCompressionMethod( std::move(naked_buffer), compression_method, @@ -1576,7 +1762,7 @@ public: static_cast(settings.output_format_compression_zstd_window_log)); writer = FormatFactory::instance().getOutputFormatParallelIfPossible(format_name, - *write_buf, metadata_snapshot->getSampleBlock(), context, format_settings); + *write_buf, metadata_snapshot->getSampleBlock(), getContext(), format_settings); if (do_not_write_prefix) writer->doNotWritePrefix(); @@ -1659,7 +1845,6 @@ private: std::string format_name; std::optional format_settings; - ContextPtr context; int flags; std::unique_lock lock; @@ -2044,7 +2229,7 @@ StorageFile::ArchiveInfo StorageFile::getArchiveInfo( const std::string & path_to_archive, const std::string & file_in_archive, const std::string & user_files_path, - ContextPtr context, + const ContextPtr & context, size_t & total_bytes_to_read ) { @@ -2070,9 +2255,4 @@ StorageFile::ArchiveInfo StorageFile::getArchiveInfo( return archive_info; } -Names StorageFile::getVirtualColumnNames() -{ - return VirtualColumnUtils::getPathFileAndSizeVirtualsForStorage({}).getNames(); -} - } diff --git a/src/Storages/StorageFile.h b/src/Storages/StorageFile.h index 2955eb0f1aa..93c263008a6 100644 --- a/src/Storages/StorageFile.h +++ b/src/Storages/StorageFile.h @@ -80,11 +80,7 @@ public: bool storesDataOnDisk() const override; Strings getDataPaths() const override; - NamesAndTypesList getVirtuals() const override { return virtual_columns; } - - static Names getVirtualColumnNames(); - - static Strings getPathsList(const String & table_path, const String & user_files_path, ContextPtr context, size_t & total_bytes_to_read); + static Strings getPathsList(const String & table_path, const String & user_files_path, const ContextPtr & context, size_t & total_bytes_to_read); /// Check if the format supports reading only some subset of columns. /// Is is useful because such formats could effectively skip unknown columns @@ -112,14 +108,19 @@ public: } }; - ColumnsDescription getTableStructureFromFileDescriptor(ContextPtr context); - static ColumnsDescription getTableStructureFromFile( const String & format, const std::vector & paths, const String & compression_method, const std::optional & format_settings, - ContextPtr context, + const ContextPtr & context, + const std::optional & archive_info = std::nullopt); + + static std::pair getTableStructureAndFormatFromFile( + const std::vector & paths, + const String & compression_method, + const std::optional & format_settings, + const ContextPtr & context, const std::optional & archive_info = std::nullopt); static SchemaCache & getSchemaCache(const ContextPtr & context); @@ -130,7 +131,7 @@ public: const std::string & path_to_archive, const std::string & file_in_archive, const std::string & user_files_path, - ContextPtr context, + const ContextPtr & context, size_t & total_bytes_to_read); bool supportsTrivialCountOptimization() const override { return true; } @@ -141,6 +142,16 @@ protected: friend class ReadFromFile; private: + std::pair getTableStructureAndFormatFromFileDescriptor(std::optional format, const ContextPtr & context); + + static std::pair getTableStructureAndFormatFromFileImpl( + std::optional format, + const std::vector & paths, + const String & compression_method, + const std::optional & format_settings, + const ContextPtr & context, + const std::optional & archive_info = std::nullopt); + void setStorageMetadata(CommonArguments args); std::string format_name; @@ -182,15 +193,13 @@ private: std::atomic readers_counter = 0; FileRenamer file_renamer; bool was_renamed = false; - - NamesAndTypesList virtual_columns; bool distributed_processing = false; }; -class StorageFileSource : public SourceWithKeyCondition +class StorageFileSource : public SourceWithKeyCondition, WithContext { public: - class FilesIterator + class FilesIterator : WithContext { public: explicit FilesIterator( @@ -198,7 +207,7 @@ public: std::optional archive_info_, const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns, - ContextPtr context_, + const ContextPtr & context_, bool distributed_processing_ = false); String next(); @@ -227,8 +236,6 @@ private: std::atomic index = 0; bool distributed_processing; - - ContextPtr context; }; using FilesIteratorPtr = std::shared_ptr; @@ -236,7 +243,7 @@ private: StorageFileSource( const ReadFromFormatInfo & info, std::shared_ptr storage_, - ContextPtr context_, + const ContextPtr & context_, UInt64 max_block_size_, FilesIteratorPtr files_iterator_, std::unique_ptr read_buf_, @@ -256,7 +263,7 @@ private: return storage->getName(); } - void setKeyCondition(const ActionsDAG::NodeRawConstPtrs & nodes, ContextPtr context_) override; + void setKeyCondition(const ActionsDAGPtr & filter_actions_dag, ContextPtr context_) override; bool tryGetCountFromCache(const struct stat & file_stat); @@ -286,7 +293,6 @@ private: NamesAndTypesList requested_virtual_columns; Block block_for_format; - ContextPtr context; /// TODO Untangle potential issues with context lifetime. UInt64 max_block_size; bool finished_generate = false; diff --git a/src/Storages/StorageFileCluster.cpp b/src/Storages/StorageFileCluster.cpp index 0cc961bb464..d43e242f70c 100644 --- a/src/Storages/StorageFileCluster.cpp +++ b/src/Storages/StorageFileCluster.cpp @@ -25,55 +25,58 @@ extern const int LOGICAL_ERROR; } StorageFileCluster::StorageFileCluster( - ContextPtr context_, + const ContextPtr & context, const String & cluster_name_, const String & filename_, const String & format_name_, - const String & compression_method_, + const String & compression_method, const StorageID & table_id_, const ColumnsDescription & columns_, - const ConstraintsDescription & constraints_, - bool structure_argument_was_provided_) - : IStorageCluster(cluster_name_, table_id_, getLogger("StorageFileCluster (" + table_id_.table_name + ")"), structure_argument_was_provided_) + const ConstraintsDescription & constraints_) + : IStorageCluster(cluster_name_, table_id_, getLogger("StorageFileCluster (" + table_id_.table_name + ")")) , filename(filename_) , format_name(format_name_) - , compression_method(compression_method_) { StorageInMemoryMetadata storage_metadata; size_t total_bytes_to_read; // its value isn't used as we are not reading files (just listing them). But it is required by getPathsList - paths = StorageFile::getPathsList(filename_, context_->getUserFilesPath(), context_, total_bytes_to_read); + paths = StorageFile::getPathsList(filename_, context->getUserFilesPath(), context, total_bytes_to_read); if (columns_.empty()) { - auto columns = StorageFile::getTableStructureFromFile(format_name, - paths, - compression_method, - std::nullopt, - context_); + ColumnsDescription columns; + if (format_name == "auto") + std::tie(columns, format_name) = StorageFile::getTableStructureAndFormatFromFile(paths, compression_method, std::nullopt, context); + else + columns = StorageFile::getTableStructureFromFile(format_name, paths, compression_method, std::nullopt, context); + storage_metadata.setColumns(columns); } else + { + if (format_name == "auto") + format_name = StorageFile::getTableStructureAndFormatFromFile(paths, compression_method, std::nullopt, context).second; storage_metadata.setColumns(columns_); + } storage_metadata.setConstraints(constraints_); setInMemoryMetadata(storage_metadata); - - virtual_columns = VirtualColumnUtils::getPathFileAndSizeVirtualsForStorage(storage_metadata.getSampleBlock().getNamesAndTypesList()); + setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns())); } -void StorageFileCluster::addColumnsStructureToQuery(ASTPtr & query, const String & structure, const ContextPtr & context) +void StorageFileCluster::updateQueryToSendIfNeeded(DB::ASTPtr & query, const StorageSnapshotPtr & storage_snapshot, const DB::ContextPtr & context) { ASTExpressionList * expression_list = extractTableFunctionArgumentsFromSelectQuery(query); if (!expression_list) throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected SELECT query from table function fileCluster, got '{}'", queryToString(query)); - TableFunctionFileCluster::addColumnsStructureToArguments(expression_list->children, structure, context); + TableFunctionFileCluster::updateStructureAndFormatArgumentsIfNeeded( + expression_list->children, storage_snapshot->metadata->getColumns().getAll().toNamesAndTypesDescription(), format_name, context); } RemoteQueryExecutor::Extension StorageFileCluster::getTaskIteratorExtension(const ActionsDAG::Node * predicate, const ContextPtr & context) const { - auto iterator = std::make_shared(paths, std::nullopt, predicate, virtual_columns, context); + auto iterator = std::make_shared(paths, std::nullopt, predicate, getVirtualsList(), context); auto callback = std::make_shared([iter = std::move(iterator)]() mutable -> String { return iter->next(); }); return RemoteQueryExecutor::Extension{.task_iterator = std::move(callback)}; } diff --git a/src/Storages/StorageFileCluster.h b/src/Storages/StorageFileCluster.h index a6e57c3bb4f..3acbc71ba7e 100644 --- a/src/Storages/StorageFileCluster.h +++ b/src/Storages/StorageFileCluster.h @@ -17,20 +17,17 @@ class StorageFileCluster : public IStorageCluster { public: StorageFileCluster( - ContextPtr context_, + const ContextPtr & context_, const String & cluster_name_, const String & filename_, const String & format_name_, const String & compression_method_, const StorageID & table_id_, const ColumnsDescription & columns_, - const ConstraintsDescription & constraints_, - bool structure_argument_was_provided_); + const ConstraintsDescription & constraints_); std::string getName() const override { return "FileCluster"; } - NamesAndTypesList getVirtuals() const override { return virtual_columns; } - RemoteQueryExecutor::Extension getTaskIteratorExtension(const ActionsDAG::Node * predicate, const ContextPtr & context) const override; bool supportsSubcolumns() const override { return true; } @@ -38,13 +35,11 @@ public: bool supportsTrivialCountOptimization() const override { return true; } private: - void addColumnsStructureToQuery(ASTPtr & query, const String & structure, const ContextPtr & context) override; + void updateQueryToSendIfNeeded(ASTPtr & query, const StorageSnapshotPtr & storage_snapshot, const ContextPtr & context) override; Strings paths; String filename; String format_name; - String compression_method; - NamesAndTypesList virtual_columns; }; } diff --git a/src/Storages/StorageInMemoryMetadata.cpp b/src/Storages/StorageInMemoryMetadata.cpp index 64ff224fc10..8e5195d497f 100644 --- a/src/Storages/StorageInMemoryMetadata.cpp +++ b/src/Storages/StorageInMemoryMetadata.cpp @@ -1,5 +1,8 @@ #include +#include +#include + #include #include #include @@ -7,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -23,6 +27,7 @@ namespace ErrorCodes extern const int NOT_FOUND_COLUMN_IN_BLOCK; extern const int TYPE_MISMATCH; extern const int EMPTY_LIST_OF_COLUMNS_PASSED; + extern const int LOGICAL_ERROR; } StorageInMemoryMetadata::StorageInMemoryMetadata(const StorageInMemoryMetadata & other) @@ -41,6 +46,8 @@ StorageInMemoryMetadata::StorageInMemoryMetadata(const StorageInMemoryMetadata & , settings_changes(other.settings_changes ? other.settings_changes->clone() : nullptr) , select(other.select) , refresh(other.refresh ? other.refresh->clone() : nullptr) + , definer(other.definer) + , sql_security_type(other.sql_security_type) , comment(other.comment) , metadata_version(other.metadata_version) { @@ -71,6 +78,8 @@ StorageInMemoryMetadata & StorageInMemoryMetadata::operator=(const StorageInMemo settings_changes.reset(); select = other.select; refresh = other.refresh ? other.refresh->clone() : nullptr; + definer = other.definer; + sql_security_type = other.sql_security_type; comment = other.comment; metadata_version = other.metadata_version; return *this; @@ -81,6 +90,69 @@ void StorageInMemoryMetadata::setComment(const String & comment_) comment = comment_; } +void StorageInMemoryMetadata::setSQLSecurity(const ASTSQLSecurity & sql_security) +{ + if (sql_security.definer) + definer = sql_security.definer->toString(); + + sql_security_type = sql_security.type; +} + +UUID StorageInMemoryMetadata::getDefinerID(DB::ContextPtr context) const +{ + if (!definer) + { + if (const auto definer_id = context->getUserID()) + return *definer_id; + + throw Exception(ErrorCodes::LOGICAL_ERROR, "No user in context for sub query execution."); + } + + const auto & access_control = context->getAccessControl(); + return access_control.getID(*definer); +} + +ContextMutablePtr StorageInMemoryMetadata::getSQLSecurityOverriddenContext(ContextPtr context) const +{ + if (!sql_security_type) + return Context::createCopy(context); + + if (sql_security_type == SQLSecurityType::INVOKER) + return Context::createCopy(context); + + auto new_context = Context::createCopy(context->getGlobalContext()); + new_context->setClientInfo(context->getClientInfo()); + new_context->makeQueryContext(); + + const auto & database = context->getCurrentDatabase(); + if (!database.empty()) + new_context->setCurrentDatabase(database); + + new_context->setInsertionTable(context->getInsertionTable(), context->getInsertionTableColumnNames()); + new_context->setProgressCallback(context->getProgressCallback()); + new_context->setProcessListElement(context->getProcessListElement()); + + if (context->getCurrentTransaction()) + new_context->setCurrentTransaction(context->getCurrentTransaction()); + + if (context->getZooKeeperMetadataTransaction()) + new_context->initZooKeeperMetadataTransaction(context->getZooKeeperMetadataTransaction()); + + if (sql_security_type == SQLSecurityType::NONE) + { + new_context->applySettingsChanges(context->getSettingsRef().changes()); + return new_context; + } + + new_context->setUser(getDefinerID(context)); + + auto changed_settings = context->getSettingsRef().changes(); + new_context->clampToSettingsConstraints(changed_settings, SettingSource::QUERY); + new_context->applySettingsChanges(changed_settings); + + return new_context; +} + void StorageInMemoryMetadata::setColumns(ColumnsDescription columns_) { if (columns_.getAllPhysical().empty()) diff --git a/src/Storages/StorageInMemoryMetadata.h b/src/Storages/StorageInMemoryMetadata.h index ecc30f7b756..2823aba1224 100644 --- a/src/Storages/StorageInMemoryMetadata.h +++ b/src/Storages/StorageInMemoryMetadata.h @@ -1,5 +1,7 @@ #pragma once +#include +#include #include #include #include @@ -51,6 +53,14 @@ struct StorageInMemoryMetadata /// Materialized view REFRESH parameters. ASTPtr refresh; + /// DEFINER . Allows to specify a definer of the table. + /// Supported for MaterializedView and View. + std::optional definer; + + /// SQL SECURITY + /// Supported for MaterializedView and View. + std::optional sql_security_type; + String comment; /// Version of metadata. Managed properly by ReplicatedMergeTree only @@ -105,6 +115,15 @@ struct StorageInMemoryMetadata /// Get copy of current metadata with metadata_version_ StorageInMemoryMetadata withMetadataVersion(int32_t metadata_version_) const; + /// Sets SQL security for the storage. + void setSQLSecurity(const ASTSQLSecurity & sql_security); + UUID getDefinerID(ContextPtr context) const; + + /// Returns a copy of the context with the correct user from SQL security options. + /// If the SQL security wasn't set, this is equivalent to `Context::createCopy(context)`. + /// The context from this function must be used every time whenever views execute any read/write operations or subqueries. + ContextMutablePtr getSQLSecurityOverriddenContext(ContextPtr context) const; + /// Returns combined set of columns const ColumnsDescription & getColumns() const; diff --git a/src/Storages/StorageJoin.cpp b/src/Storages/StorageJoin.cpp index b9e082c0b22..d12e5b1a20b 100644 --- a/src/Storages/StorageJoin.cpp +++ b/src/Storages/StorageJoin.cpp @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -25,6 +26,7 @@ #include #include + namespace fs = std::filesystem; namespace DB @@ -500,7 +502,7 @@ protected: Chunk chunk; if (!joinDispatch(join->kind, join->strictness, join->data->maps.front(), [&](auto kind, auto strictness, auto & map) { chunk = createChunk(map); })) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: unknown JOIN strictness"); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown JOIN strictness"); return chunk; } diff --git a/src/Storages/StorageKeeperMap.cpp b/src/Storages/StorageKeeperMap.cpp index 80abaa3ea2d..b0b7afdfe8d 100644 --- a/src/Storages/StorageKeeperMap.cpp +++ b/src/Storages/StorageKeeperMap.cpp @@ -233,7 +233,7 @@ public: will_be); } - zookeeper->multi(requests); + zookeeper->multi(requests, /* check_session_valid */ true); } }; @@ -331,6 +331,10 @@ StorageKeeperMap::StorageKeeperMap( setInMemoryMetadata(metadata); + VirtualColumnsDescription virtuals; + virtuals.addEphemeral(String(version_column_name), std::make_shared(), ""); + setVirtuals(std::move(virtuals)); + WriteBufferFromOwnString out; out << "KeeperMap metadata format version: 1\n" << "columns: " << metadata.columns.toString() @@ -367,7 +371,7 @@ StorageKeeperMap::StorageKeeperMap( zk_metadata_path = metadata_path_fs; zk_tables_path = metadata_path_fs / "tables"; - auto table_unique_id = toString(table_id.uuid) + toString(ServerUUID::get()); + table_unique_id = toString(table_id.uuid) + toString(ServerUUID::get()); zk_table_path = fs::path(zk_tables_path) / table_unique_id; zk_dropped_path = metadata_path_fs / "dropped"; @@ -634,12 +638,6 @@ void StorageKeeperMap::drop() dropTable(client, metadata_drop_lock); } -NamesAndTypesList StorageKeeperMap::getVirtuals() const -{ - return NamesAndTypesList{ - {std::string{version_column_name}, std::make_shared()}}; -} - namespace { @@ -753,14 +751,12 @@ private: void StorageKeeperMap::backupData(BackupEntriesCollector & backup_entries_collector, const String & data_path_in_backup, const std::optional & /*partitions*/) { - auto table_id = toString(getStorageID().uuid); - auto coordination = backup_entries_collector.getBackupCoordination(); - coordination->addKeeperMapTable(zk_root_path, table_id, data_path_in_backup); + coordination->addKeeperMapTable(zk_root_path, table_unique_id, data_path_in_backup); /// This task will be executed after all tables have registered their root zk path and the coordination is ready to /// assign each path to a single table only. - auto post_collecting_task = [my_table_id = std::move(table_id), coordination, &backup_entries_collector, my_data_path_in_backup = data_path_in_backup, this] + auto post_collecting_task = [coordination, &backup_entries_collector, my_data_path_in_backup = data_path_in_backup, this] { auto path_with_data = coordination->getKeeperMapDataPath(zk_root_path); if (path_with_data != my_data_path_in_backup) @@ -798,8 +794,7 @@ void StorageKeeperMap::restoreDataFromBackup(RestorerFromBackup & restorer, cons if (!backup->hasFiles(data_path_in_backup)) return; - auto table_id = toString(getStorageID().uuid); - if (!restorer.getRestoreCoordination()->acquireInsertingDataForKeeperMap(zk_root_path, table_id)) + if (!restorer.getRestoreCoordination()->acquireInsertingDataForKeeperMap(zk_root_path, table_unique_id)) { /// Other table is already restoring the data for this Keeper path. /// Tables defined on the same path share data @@ -1210,7 +1205,7 @@ void StorageKeeperMap::mutate(const MutationCommands & commands, ContextPtr loca } Coordination::Responses responses; - auto status = client->tryMulti(delete_requests, responses); + auto status = client->tryMulti(delete_requests, responses, /* check_session_valid */ true); if (status == Coordination::Error::ZOK) return; diff --git a/src/Storages/StorageKeeperMap.h b/src/Storages/StorageKeeperMap.h index 9dca96a24a3..d4556792c48 100644 --- a/src/Storages/StorageKeeperMap.h +++ b/src/Storages/StorageKeeperMap.h @@ -50,8 +50,6 @@ public: void truncate(const ASTPtr &, const StorageMetadataPtr &, ContextPtr, TableExclusiveLockHolder &) override; void drop() override; - NamesAndTypesList getVirtuals() const override; - std::string getName() const override { return "KeeperMap"; } Names getPrimaryKey() const override { return {primary_key}; } @@ -125,10 +123,10 @@ private: std::string primary_key; std::string zk_data_path; - std::string zk_metadata_path; - std::string zk_tables_path; + + std::string table_unique_id; std::string zk_table_path; std::string zk_dropped_path; diff --git a/src/Storages/StorageLog.cpp b/src/Storages/StorageLog.cpp index c7b0a9d0644..549cfca1b6c 100644 --- a/src/Storages/StorageLog.cpp +++ b/src/Storages/StorageLog.cpp @@ -35,7 +35,6 @@ #include #include #include -#include #include #include @@ -48,8 +47,6 @@ namespace DB { - CompressionCodecPtr getCompressionCodecDelta(UInt8 delta_bytes_size); - namespace ErrorCodes { extern const int TIMEOUT_EXCEEDED; @@ -241,7 +238,7 @@ void LogSource::readData(const NameAndTypePair & name_and_type, ColumnPtr & colu const auto & data_file_it = storage.data_files_by_names.find(data_file_name); if (data_file_it == storage.data_files_by_names.end()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: no information about file {} in StorageLog", data_file_name); + throw Exception(ErrorCodes::LOGICAL_ERROR, "No information about file {} in StorageLog", data_file_name); const auto & data_file = *data_file_it->second; size_t offset = stream_for_prefix ? 0 : offsets[data_file.index]; @@ -299,6 +296,7 @@ public: : SinkToStorage(metadata_snapshot_->getSampleBlock()) , storage(storage_) , metadata_snapshot(metadata_snapshot_) + , storage_snapshot(std::make_shared(storage, metadata_snapshot)) , lock(std::move(lock_)) { if (!lock) @@ -343,6 +341,7 @@ public: private: StorageLog & storage; StorageMetadataPtr metadata_snapshot; + StorageSnapshotPtr storage_snapshot; WriteLock lock; bool done = false; @@ -448,7 +447,7 @@ ISerialization::OutputStreamGetter LogSink::createStreamGetter(const NameAndType String data_file_name = ISerialization::getFileNameForStream(name_and_type, path); auto it = streams.find(data_file_name); if (it == streams.end()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: stream was not created when writing data in LogSink"); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Stream was not created when writing data in LogSink"); Stream & stream = it->second; if (stream.written) @@ -473,16 +472,10 @@ void LogSink::writeData(const NameAndTypePair & name_and_type, const IColumn & c { const auto & data_file_it = storage.data_files_by_names.find(data_file_name); if (data_file_it == storage.data_files_by_names.end()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: no information about file {} in StorageLog", data_file_name); + throw Exception(ErrorCodes::LOGICAL_ERROR, "No information about file {} in StorageLog", data_file_name); const auto & data_file = *data_file_it->second; - const auto & columns = metadata_snapshot->getColumns(); - - CompressionCodecPtr compression; - if (name_and_type.name == BlockNumberColumn::name) - compression = BlockNumberColumn::compression_codec; - else - compression = columns.getCodecOrDefault(name_and_type.name); + auto compression = storage_snapshot->getCodecOrDefault(name_and_type.name); it = streams.try_emplace(data_file.name, storage.disk, data_file.path, storage.file_checker.getFileSize(data_file.path), @@ -569,7 +562,7 @@ StorageLog::StorageLog( const ColumnsDescription & columns_, const ConstraintsDescription & constraints_, const String & comment, - bool attach, + LoadingStrictnessLevel mode, ContextMutablePtr context_) : IStorage(table_id_) , WithMutableContext(context_) @@ -603,7 +596,7 @@ StorageLog::StorageLog( file_checker.setEmpty(marks_file_path); } - if (!attach) + if (mode < LoadingStrictnessLevel::ATTACH) { /// create directories if they do not exist disk->createDirectories(table_path); @@ -1163,7 +1156,7 @@ void registerStorageLog(StorageFactory & factory) args.columns, args.constraints, args.comment, - args.attach, + args.mode, args.getContext()); }; diff --git a/src/Storages/StorageLog.h b/src/Storages/StorageLog.h index d5daed21b3c..882e9cfaa75 100644 --- a/src/Storages/StorageLog.h +++ b/src/Storages/StorageLog.h @@ -40,7 +40,7 @@ public: const ColumnsDescription & columns_, const ConstraintsDescription & constraints_, const String & comment, - bool attach, + LoadingStrictnessLevel mode, ContextMutablePtr context_); ~StorageLog() override; diff --git a/src/Storages/StorageMaterializedMySQL.cpp b/src/Storages/StorageMaterializedMySQL.cpp index 0dc0b1bff0b..887c58ff816 100644 --- a/src/Storages/StorageMaterializedMySQL.cpp +++ b/src/Storages/StorageMaterializedMySQL.cpp @@ -22,9 +22,8 @@ namespace DB StorageMaterializedMySQL::StorageMaterializedMySQL(const StoragePtr & nested_storage_, const IDatabase * database_) : StorageProxy(nested_storage_->getStorageID()), nested_storage(nested_storage_), database(database_) { - StorageInMemoryMetadata in_memory_metadata; - in_memory_metadata = nested_storage->getInMemoryMetadata(); - setInMemoryMetadata(in_memory_metadata); + setInMemoryMetadata(nested_storage->getInMemoryMetadata()); + setVirtuals(*nested_storage->getVirtualsPtr()); } bool StorageMaterializedMySQL::needRewriteQueryWithFinal(const Names & column_names) const @@ -49,14 +48,6 @@ void StorageMaterializedMySQL::read( query_info, context, processed_stage, max_block_size, num_streams); } -NamesAndTypesList StorageMaterializedMySQL::getVirtuals() const -{ - if (const auto * db = typeid_cast(database)) - db->rethrowExceptionIfNeeded(); - - return nested_storage->getVirtuals(); -} - IStorage::ColumnSizeByName StorageMaterializedMySQL::getColumnSizes() const { auto sizes = nested_storage->getColumnSizes(); diff --git a/src/Storages/StorageMaterializedMySQL.h b/src/Storages/StorageMaterializedMySQL.h index 2cd589bfd75..9f5d157ce3b 100644 --- a/src/Storages/StorageMaterializedMySQL.h +++ b/src/Storages/StorageMaterializedMySQL.h @@ -34,7 +34,6 @@ public: SinkToStoragePtr write(const ASTPtr &, const StorageMetadataPtr &, ContextPtr, bool) override { throwNotAllowed(); } - NamesAndTypesList getVirtuals() const override; ColumnSizeByName getColumnSizes() const override; StoragePtr getNested() const override { return nested_storage; } diff --git a/src/Storages/StorageMaterializedView.cpp b/src/Storages/StorageMaterializedView.cpp index bfe75e61bcd..9958d65819b 100644 --- a/src/Storages/StorageMaterializedView.cpp +++ b/src/Storages/StorageMaterializedView.cpp @@ -39,6 +39,7 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; extern const int NOT_IMPLEMENTED; extern const int INCORRECT_QUERY; + extern const int QUERY_IS_NOT_SUPPORTED_IN_MATERIALIZED_VIEW; extern const int TOO_MANY_MATERIALIZED_VIEWS; } @@ -71,12 +72,17 @@ StorageMaterializedView::StorageMaterializedView( ContextPtr local_context, const ASTCreateQuery & query, const ColumnsDescription & columns_, - bool attach_, + LoadingStrictnessLevel mode, const String & comment) : IStorage(table_id_), WithMutableContext(local_context->getGlobalContext()) { StorageInMemoryMetadata storage_metadata; storage_metadata.setColumns(columns_); + if (query.sql_security) + storage_metadata.setSQLSecurity(query.sql_security->as()); + + if (storage_metadata.sql_security_type == SQLSecurityType::INVOKER) + throw Exception(ErrorCodes::QUERY_IS_NOT_SUPPORTED_IN_MATERIALIZED_VIEW, "SQL SECURITY INVOKER can't be specified for MATERIALIZED VIEW"); if (!query.select) throw Exception(ErrorCodes::INCORRECT_QUERY, "SELECT query is not specified for {}", getName()); @@ -118,7 +124,7 @@ StorageMaterializedView::StorageMaterializedView( { target_table_id = query.to_table_id; } - else if (attach_) + else if (LoadingStrictnessLevel::ATTACH <= mode) { /// If there is an ATTACH request, then the internal table must already be created. target_table_id = StorageID(getStorageID().database_name, generateInnerTableName(getStorageID()), query.to_inner_uuid); @@ -151,7 +157,7 @@ StorageMaterializedView::StorageMaterializedView( *this, getContext(), *query.refresh_strategy); - refresh_on_start = !attach_ && !query.is_create_empty; + refresh_on_start = mode < LoadingStrictnessLevel::ATTACH && !query.is_create_empty; } } @@ -165,6 +171,12 @@ QueryProcessingStage::Enum StorageMaterializedView::getQueryProcessingStage( return getTargetTable()->getQueryProcessingStage(local_context, to_stage, getTargetTable()->getStorageSnapshot(target_metadata, local_context), query_info); } +StorageSnapshotPtr StorageMaterializedView::getStorageSnapshot(const StorageMetadataPtr & metadata_snapshot, ContextPtr) const +{ + /// We cannot set virtuals at table creation because target table may not exist at that time. + return std::make_shared(*this, metadata_snapshot, getTargetTable()->getVirtualsPtr()); +} + void StorageMaterializedView::read( QueryPlan & query_plan, const Names & column_names, @@ -175,19 +187,28 @@ void StorageMaterializedView::read( const size_t max_block_size, const size_t num_streams) { + auto context = getInMemoryMetadataPtr()->getSQLSecurityOverriddenContext(local_context); auto storage = getTargetTable(); - auto lock = storage->lockForShare(local_context->getCurrentQueryId(), local_context->getSettingsRef().lock_acquire_timeout); + auto lock = storage->lockForShare(context->getCurrentQueryId(), context->getSettingsRef().lock_acquire_timeout); auto target_metadata_snapshot = storage->getInMemoryMetadataPtr(); - auto target_storage_snapshot = storage->getStorageSnapshot(target_metadata_snapshot, local_context); + auto target_storage_snapshot = storage->getStorageSnapshot(target_metadata_snapshot, context); if (query_info.order_optimizer) - query_info.input_order_info = query_info.order_optimizer->getInputOrder(target_metadata_snapshot, local_context); + query_info.input_order_info = query_info.order_optimizer->getInputOrder(target_metadata_snapshot, context); - storage->read(query_plan, column_names, target_storage_snapshot, query_info, local_context, processed_stage, max_block_size, num_streams); + if (!getInMemoryMetadataPtr()->select.select_table_id.empty()) + context->checkAccess(AccessType::SELECT, getInMemoryMetadataPtr()->select.select_table_id, column_names); + + auto storage_id = storage->getStorageID(); + /// We don't need to check access if the inner table was created automatically. + if (!has_inner_table && !storage_id.empty()) + context->checkAccess(AccessType::SELECT, storage_id, column_names); + + storage->read(query_plan, column_names, target_storage_snapshot, query_info, context, processed_stage, max_block_size, num_streams); if (query_plan.isInitialized()) { - auto mv_header = getHeaderForProcessingStage(column_names, storage_snapshot, query_info, local_context, processed_stage); + auto mv_header = getHeaderForProcessingStage(column_names, storage_snapshot, query_info, context, processed_stage); auto target_header = query_plan.getCurrentDataStream().header; /// No need to convert columns that does not exists in MV @@ -222,11 +243,20 @@ void StorageMaterializedView::read( SinkToStoragePtr StorageMaterializedView::write(const ASTPtr & query, const StorageMetadataPtr & /*metadata_snapshot*/, ContextPtr local_context, bool async_insert) { + auto context = getInMemoryMetadataPtr()->getSQLSecurityOverriddenContext(local_context); auto storage = getTargetTable(); - auto lock = storage->lockForShare(local_context->getCurrentQueryId(), local_context->getSettingsRef().lock_acquire_timeout); - + auto lock = storage->lockForShare(context->getCurrentQueryId(), context->getSettingsRef().lock_acquire_timeout); auto metadata_snapshot = storage->getInMemoryMetadataPtr(); - auto sink = storage->write(query, metadata_snapshot, local_context, async_insert); + + auto storage_id = storage->getStorageID(); + /// We don't need to check access if the inner table was created automatically. + if (!has_inner_table && !storage_id.empty()) + { + auto query_sample_block = InterpreterInsertQuery::getSampleBlock(query->as(), storage, metadata_snapshot, context); + context->checkAccess(AccessType::INSERT, storage_id, query_sample_block.getNames()); + } + + auto sink = storage->write(query, metadata_snapshot, context, async_insert); sink->addTableLock(lock); return sink; @@ -297,7 +327,7 @@ bool StorageMaterializedView::optimize( std::tuple> StorageMaterializedView::prepareRefresh() const { - auto refresh_context = Context::createCopy(getContext()); + auto refresh_context = getInMemoryMetadataPtr()->getSQLSecurityOverriddenContext(getContext()); /// Generate a random query id. refresh_context->setCurrentQueryId(""); @@ -378,15 +408,24 @@ void StorageMaterializedView::checkAlterIsPossible(const AlterCommands & command { for (const auto & command : commands) { - if (command.isCommentAlter()) + if (command.type == AlterCommand::MODIFY_SQL_SECURITY) + { + if (command.sql_security->as().type == SQLSecurityType::INVOKER) + throw Exception(ErrorCodes::QUERY_IS_NOT_SUPPORTED_IN_MATERIALIZED_VIEW, "SQL SECURITY INVOKER can't be specified for MATERIALIZED VIEW"); + continue; - if (command.type == AlterCommand::MODIFY_QUERY) + } + else if (command.isCommentAlter()) continue; - if (command.type == AlterCommand::MODIFY_REFRESH && refresher) + else if (command.type == AlterCommand::MODIFY_QUERY) continue; + else if (command.type == AlterCommand::MODIFY_REFRESH && refresher) + continue; + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Alter of type '{}' is not supported by storage {}", - command.type, getName()); + command.type, getName()); } + } void StorageMaterializedView::checkMutationIsPossible(const MutationCommands & commands, const Settings & settings) const @@ -426,8 +465,8 @@ void StorageMaterializedView::renameInMemory(const StorageID & new_table_id) if (!from_atomic_to_atomic_database && has_inner_table && tryGetTargetTable()) { auto new_target_table_name = generateInnerTableName(new_table_id); - auto rename = std::make_shared(); + ASTRenameQuery::Elements rename_elements; assert(inner_table_id.database_name == old_table_id.database_name); ASTRenameQuery::Element elem @@ -443,8 +482,9 @@ void StorageMaterializedView::renameInMemory(const StorageID & new_table_id) std::make_shared(new_target_table_name) } }; - rename->elements.emplace_back(std::move(elem)); + rename_elements.emplace_back(std::move(elem)); + auto rename = std::make_shared(std::move(rename_elements)); InterpreterRenameQuery(rename, getContext()).execute(); updateTargetTableId(new_table_id.database_name, new_target_table_name); } @@ -503,11 +543,6 @@ StoragePtr StorageMaterializedView::tryGetTargetTable() const return DatabaseCatalog::instance().tryGetTable(getTargetTableId(), getContext()); } -NamesAndTypesList StorageMaterializedView::getVirtuals() const -{ - return getTargetTable()->getVirtuals(); -} - Strings StorageMaterializedView::getDataPaths() const { if (auto table = tryGetTargetTable()) @@ -624,7 +659,7 @@ void registerStorageMaterializedView(StorageFactory & factory) /// Pass local_context here to convey setting for inner table return std::make_shared( args.table_id, args.getLocalContext(), args.query, - args.columns, args.attach, args.comment); + args.columns, args.mode, args.comment); }); } diff --git a/src/Storages/StorageMaterializedView.h b/src/Storages/StorageMaterializedView.h index 4678060d81f..198b7a642ee 100644 --- a/src/Storages/StorageMaterializedView.h +++ b/src/Storages/StorageMaterializedView.h @@ -18,7 +18,7 @@ public: ContextPtr local_context, const ASTCreateQuery & query, const ColumnsDescription & columns_, - bool attach_, + LoadingStrictnessLevel mode, const String & comment); std::string getName() const override { return "MaterializedView"; } @@ -75,12 +75,11 @@ public: StoragePtr tryGetTargetTable() const; StorageID getTargetTableId() const; - /// Get the virtual column of the target table; - NamesAndTypesList getVirtuals() const override; - ActionLock getActionLock(StorageActionBlockType type) override; void onActionLockRemove(StorageActionBlockType action_type) override; + StorageSnapshotPtr getStorageSnapshot(const StorageMetadataPtr & metadata_snapshot, ContextPtr) const override; + void read( QueryPlan & query_plan, const Names & column_names, diff --git a/src/Storages/StorageMemory.cpp b/src/Storages/StorageMemory.cpp index 02d81eda59a..7a8fb9feeda 100644 --- a/src/Storages/StorageMemory.cpp +++ b/src/Storages/StorageMemory.cpp @@ -19,6 +19,8 @@ #include #include #include +#include +#include #include #include @@ -152,13 +154,14 @@ void StorageMemory::read( QueryPlan & query_plan, const Names & column_names, const StorageSnapshotPtr & storage_snapshot, - SelectQueryInfo & /*query_info*/, - ContextPtr /*context*/, + SelectQueryInfo & query_info, + ContextPtr context, QueryProcessingStage::Enum /*processed_stage*/, size_t /*max_block_size*/, size_t num_streams) { - query_plan.addStep(std::make_unique(column_names, shared_from_this(), storage_snapshot, num_streams, delay_read_for_global_subqueries)); + query_plan.addStep(std::make_unique( + column_names, query_info, storage_snapshot, context, shared_from_this(), num_streams, delay_read_for_global_subqueries)); } diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp index 79d7b83cada..8410f0a8df8 100644 --- a/src/Storages/StorageMerge.cpp +++ b/src/Storages/StorageMerge.cpp @@ -63,10 +63,12 @@ #include #include +namespace DB +{ + namespace { -using namespace DB; bool columnIsPhysical(ColumnDefaultKind kind) { return kind == ColumnDefaultKind::Default || kind == ColumnDefaultKind::Materialized; @@ -82,10 +84,23 @@ bool columnDefaultKindHasSameType(ColumnDefaultKind lhs, ColumnDefaultKind rhs) return false; } +/// Adds to the select query section `WITH value AS column_name` +/// +/// For example: +/// - `WITH 9000 as _port`. +void rewriteEntityInAst(ASTPtr ast, const String & column_name, const Field & value) +{ + auto & select = ast->as(); + if (!select.with()) + select.setExpression(ASTSelectQuery::Expression::WITH, std::make_shared()); + + auto literal = std::make_shared(value); + literal->alias = column_name; + literal->prefer_alias_to_column_name = true; + select.with()->children.push_back(literal); } -namespace DB -{ +} namespace ErrorCodes { @@ -132,6 +147,7 @@ StorageMerge::StorageMerge( storage_metadata.setColumns(columns_.empty() ? getColumnsDescriptionFromSourceTables() : columns_); storage_metadata.setComment(comment); setInMemoryMetadata(storage_metadata); + setVirtuals(createVirtuals()); } StorageMerge::StorageMerge( @@ -154,6 +170,7 @@ StorageMerge::StorageMerge( storage_metadata.setColumns(columns_.empty() ? getColumnsDescriptionFromSourceTables() : columns_); storage_metadata.setComment(comment); setInMemoryMetadata(storage_metadata); + setVirtuals(createVirtuals()); } StorageMerge::DatabaseTablesIterators StorageMerge::getDatabaseIterators(ContextPtr context_) const @@ -212,6 +229,7 @@ bool StorageMerge::tableSupportsPrewhere() const /// If new table that matches regexp for current storage and doesn't support PREWHERE /// will appear after this check and before calling "read" method, the optimized query may fail. /// Since it's quite rare case, we just ignore this possibility. + /// TODO: Store tables inside StorageSnapshot /// /// NOTE: Type can be different, and in this case, PREWHERE cannot be /// applied for those columns, but there a separate method to return @@ -219,11 +237,6 @@ bool StorageMerge::tableSupportsPrewhere() const return getFirstTable([](const auto & table) { return !table->canMoveConditionsToPrewhere(); }) == nullptr; } -bool StorageMerge::canMoveConditionsToPrewhere() const -{ - return tableSupportsPrewhere(); -} - std::optional StorageMerge::supportedPrewhereColumns() const { bool supports_prewhere = true; @@ -310,6 +323,37 @@ QueryProcessingStage::Enum StorageMerge::getQueryProcessingStage( return selected_table_size == 1 ? stage_in_source_tables : std::min(stage_in_source_tables, QueryProcessingStage::WithMergeableState); } +VirtualColumnsDescription StorageMerge::createVirtuals() +{ + VirtualColumnsDescription desc; + + desc.addEphemeral("_database", std::make_shared(std::make_shared()), ""); + desc.addEphemeral("_table", std::make_shared(std::make_shared()), ""); + + return desc; +} + +StorageSnapshotPtr StorageMerge::getStorageSnapshot(const StorageMetadataPtr & metadata_snapshot, ContextPtr) const +{ + static const auto common_virtuals = createVirtuals(); + + auto virtuals = common_virtuals; + if (auto first_table = getFirstTable([](auto && table) { return table; })) + { + auto table_virtuals = first_table->getVirtualsPtr(); + for (const auto & column : *table_virtuals) + { + if (virtuals.has(column.name)) + continue; + + virtuals.add(column); + } + } + + auto virtuals_ptr = std::make_shared(std::move(virtuals)); + return std::make_shared(*this, metadata_snapshot, std::move(virtuals_ptr)); +} + void StorageMerge::read( QueryPlan & query_plan, const Names & column_names, @@ -320,53 +364,50 @@ void StorageMerge::read( const size_t max_block_size, size_t num_streams) { - /** Just in case, turn off optimization "transfer to PREWHERE", - * since there is no certainty that it works when one of table is MergeTree and other is not. - */ - auto modified_context = Context::createCopy(local_context); - modified_context->setSetting("optimize_move_to_prewhere", false); - query_plan.addInterpreterContext(modified_context); - /// What will be result structure depending on query processed stage in source tables? Block common_header = getHeaderForProcessingStage(column_names, storage_snapshot, query_info, local_context, processed_stage); auto step = std::make_unique( - common_header, column_names, + query_info, + storage_snapshot, + local_context, + common_header, max_block_size, num_streams, shared_from_this(), - storage_snapshot, - query_info, - std::move(modified_context), processed_stage); query_plan.addStep(std::move(step)); } ReadFromMerge::ReadFromMerge( + const Names & column_names_, + const SelectQueryInfo & query_info_, + const StorageSnapshotPtr & storage_snapshot_, + const ContextPtr & context_, Block common_header_, - Names all_column_names_, size_t max_block_size, size_t num_streams, StoragePtr storage, - StorageSnapshotPtr storage_snapshot, - const SelectQueryInfo & query_info_, - ContextMutablePtr context_, QueryProcessingStage::Enum processed_stage) - : SourceStepWithFilter(DataStream{.header = common_header_}) + : SourceStepWithFilter(DataStream{.header = common_header_}, column_names_, query_info_, storage_snapshot_, context_) , required_max_block_size(max_block_size) , requested_num_streams(num_streams) , common_header(std::move(common_header_)) - , all_column_names(std::move(all_column_names_)) + , all_column_names(column_names_) , storage_merge(std::move(storage)) - , merge_storage_snapshot(std::move(storage_snapshot)) - , query_info(query_info_) - , context(std::move(context_)) + , merge_storage_snapshot(storage_snapshot) , common_processed_stage(processed_stage) { } +void ReadFromMerge::updatePrewhereInfo(const PrewhereInfoPtr & prewhere_info_value) +{ + SourceStepWithFilter::updatePrewhereInfo(prewhere_info_value); + common_header = applyPrewhereActions(common_header, prewhere_info); +} + void ReadFromMerge::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) { filterTablesAndCreateChildrenPlans(); @@ -407,8 +448,7 @@ void ReadFromMerge::initializePipeline(QueryPipelineBuilder & pipeline, const Bu common_header, child_plan.table_aliases, child_plan.row_policy_data_opt, - table, - context); + table); if (source_pipeline && source_pipeline->initialized()) { @@ -506,6 +546,8 @@ std::vector ReadFromMerge::createChildrenPlans(SelectQ query_info_.input_order_info = input_sorting_info; } + /// Settings will be modified when planning children tables. + auto modified_context = Context::createCopy(context); for (const auto & table : selected_tables) { size_t current_need_streams = tables_count >= num_streams ? 1 : (num_streams / tables_count); @@ -545,7 +587,8 @@ std::vector ReadFromMerge::createChildrenPlans(SelectQ row_policy_data_opt->extendNames(real_column_names); } - auto modified_query_info = getModifiedQueryInfo(context, table, nested_storage_snaphsot, real_column_names, column_names_as_aliases, aliases); + auto modified_query_info + = getModifiedQueryInfo(context, table, nested_storage_snaphsot, real_column_names, column_names_as_aliases, aliases); if (!context->getSettingsRef().allow_experimental_analyzer) { @@ -612,10 +655,13 @@ std::vector ReadFromMerge::createChildrenPlans(SelectQ table, column_names_as_aliases.empty() ? std::move(real_column_names) : std::move(column_names_as_aliases), row_policy_data_opt, - context, + modified_context, current_streams); } + if (!res.empty()) + res[0].plan.addInterpreterContext(modified_context); + return res; } @@ -819,7 +865,7 @@ QueryTreeNodePtr replaceTableExpressionAndRemoveJoin( SelectQueryInfo ReadFromMerge::getModifiedQueryInfo(const ContextPtr & modified_context, const StorageWithLockAndName & storage_with_lock_and_name, - const StorageSnapshotPtr & storage_snapshot, + const StorageSnapshotPtr & storage_snapshot_, Names required_column_names, Names & column_names_as_aliases, Aliases & aliases) const @@ -828,10 +874,12 @@ SelectQueryInfo ReadFromMerge::getModifiedQueryInfo(const ContextPtr & modified_ const StorageID current_storage_id = storage->getStorageID(); SelectQueryInfo modified_query_info = query_info; + if (modified_query_info.optimized_prewhere_info && !modified_query_info.prewhere_info) + modified_query_info.prewhere_info = modified_query_info.optimized_prewhere_info; if (modified_query_info.table_expression) { - auto replacement_table_expression = std::make_shared(storage, storage_lock, storage_snapshot); + auto replacement_table_expression = std::make_shared(storage, storage_lock, storage_snapshot_); if (query_info.table_expression_modifiers) replacement_table_expression->setTableExpressionModifiers(*query_info.table_expression_modifiers); @@ -840,26 +888,26 @@ SelectQueryInfo ReadFromMerge::getModifiedQueryInfo(const ContextPtr & modified_ modified_query_info.planner_context->getOrCreateTableExpressionData(replacement_table_expression); auto get_column_options = GetColumnsOptions(GetColumnsOptions::All).withExtendedObjects().withVirtuals(); - if (storage_snapshot->storage.supportsSubcolumns()) + if (storage_snapshot_->storage.supportsSubcolumns()) get_column_options.withSubcolumns(); std::unordered_map column_name_to_node; - if (!storage_snapshot->tryGetColumn(get_column_options, "_table")) + if (!storage_snapshot_->tryGetColumn(get_column_options, "_table")) { auto table_name_node = std::make_shared(current_storage_id.table_name); table_name_node->setAlias("_table"); column_name_to_node.emplace("_table", table_name_node); } - if (!storage_snapshot->tryGetColumn(get_column_options, "_database")) + if (!storage_snapshot_->tryGetColumn(get_column_options, "_database")) { auto database_name_node = std::make_shared(current_storage_id.database_name); database_name_node->setAlias("_database"); column_name_to_node.emplace("_database", database_name_node); } - auto storage_columns = storage_snapshot->metadata->getColumns(); + auto storage_columns = storage_snapshot_->metadata->getColumns(); bool with_aliases = /* common_processed_stage == QueryProcessingStage::FetchColumns && */ !storage_columns.getAliases().empty(); if (with_aliases) @@ -897,13 +945,12 @@ SelectQueryInfo ReadFromMerge::getModifiedQueryInfo(const ContextPtr & modified_ column_node = std::make_shared(NameAndTypePair{column, storage_columns.getColumn(get_column_options, column).type }, modified_query_info.table_expression); } - PlannerActionsVisitor actions_visitor(modified_query_info.planner_context, false /*use_column_identifier_as_action_node_name*/); actions_visitor.visit(filter_actions_dag, column_node); } column_names_as_aliases = filter_actions_dag->getRequiredColumnsNames(); if (column_names_as_aliases.empty()) - column_names_as_aliases.push_back(ExpressionActions::getSmallestColumn(storage_snapshot->metadata->getColumns().getAllPhysical()).name); + column_names_as_aliases.push_back(ExpressionActions::getSmallestColumn(storage_snapshot_->metadata->getColumns().getAllPhysical()).name); } if (!column_name_to_node.empty()) @@ -928,8 +975,8 @@ SelectQueryInfo ReadFromMerge::getModifiedQueryInfo(const ContextPtr & modified_ if (!is_storage_merge_engine) { - VirtualColumnUtils::rewriteEntityInAst(modified_query_info.query, "_table", current_storage_id.table_name); - VirtualColumnUtils::rewriteEntityInAst(modified_query_info.query, "_database", current_storage_id.database_name); + rewriteEntityInAst(modified_query_info.query, "_table", current_storage_id.table_name); + rewriteEntityInAst(modified_query_info.query, "_database", current_storage_id.database_name); } } @@ -956,14 +1003,13 @@ bool recursivelyApplyToReadingSteps(QueryPlan::Node * node, const std::function< QueryPipelineBuilderPtr ReadFromMerge::createSources( QueryPlan & plan, - const StorageSnapshotPtr & storage_snapshot, + const StorageSnapshotPtr & storage_snapshot_, SelectQueryInfo & modified_query_info, QueryProcessingStage::Enum processed_stage, const Block & header, const Aliases & aliases, const RowPolicyDataOpt & row_policy_data_opt, const StorageWithLockAndName & storage_with_lock, - ContextMutablePtr modified_context, bool concat_streams) const { if (!plan.isInitialized()) @@ -972,12 +1018,12 @@ QueryPipelineBuilderPtr ReadFromMerge::createSources( QueryPipelineBuilderPtr builder; const auto & [database_name, storage, _, table_name] = storage_with_lock; - bool allow_experimental_analyzer = modified_context->getSettingsRef().allow_experimental_analyzer; + bool allow_experimental_analyzer = context->getSettingsRef().allow_experimental_analyzer; auto storage_stage - = storage->getQueryProcessingStage(modified_context, QueryProcessingStage::Complete, storage_snapshot, modified_query_info); + = storage->getQueryProcessingStage(context, QueryProcessingStage::Complete, storage_snapshot_, modified_query_info); builder = plan.buildQueryPipeline( - QueryPlanOptimizationSettings::fromContext(modified_context), BuildQueryPipelineSettings::fromContext(modified_context)); + QueryPlanOptimizationSettings::fromContext(context), BuildQueryPipelineSettings::fromContext(context)); if (processed_stage > storage_stage || (allow_experimental_analyzer && processed_stage != QueryProcessingStage::FetchColumns)) { @@ -1001,7 +1047,7 @@ QueryPipelineBuilderPtr ReadFromMerge::createSources( Block pipe_header = builder->getHeader(); - if (has_database_virtual_column && !pipe_header.has("_database")) + if (has_database_virtual_column && common_header.has("_database") && !pipe_header.has("_database")) { ColumnWithTypeAndName column; column.name = "_database"; @@ -1010,13 +1056,13 @@ QueryPipelineBuilderPtr ReadFromMerge::createSources( auto adding_column_dag = ActionsDAG::makeAddingColumnActions(std::move(column)); auto adding_column_actions = std::make_shared( - std::move(adding_column_dag), ExpressionActionsSettings::fromContext(modified_context, CompileExpressions::yes)); + std::move(adding_column_dag), ExpressionActionsSettings::fromContext(context, CompileExpressions::yes)); builder->addSimpleTransform([&](const Block & stream_header) { return std::make_shared(stream_header, adding_column_actions); }); } - if (has_table_virtual_column && !pipe_header.has("_table")) + if (has_table_virtual_column && common_header.has("_table") && !pipe_header.has("_table")) { ColumnWithTypeAndName column; column.name = "_table"; @@ -1025,7 +1071,7 @@ QueryPipelineBuilderPtr ReadFromMerge::createSources( auto adding_column_dag = ActionsDAG::makeAddingColumnActions(std::move(column)); auto adding_column_actions = std::make_shared( - std::move(adding_column_dag), ExpressionActionsSettings::fromContext(modified_context, CompileExpressions::yes)); + std::move(adding_column_dag), ExpressionActionsSettings::fromContext(context, CompileExpressions::yes)); builder->addSimpleTransform([&](const Block & stream_header) { return std::make_shared(stream_header, adding_column_actions); }); @@ -1033,14 +1079,15 @@ QueryPipelineBuilderPtr ReadFromMerge::createSources( /// Subordinary tables could have different but convertible types, like numeric types of different width. /// We must return streams with structure equals to structure of Merge table. - convertAndFilterSourceStream(header, modified_query_info, storage_snapshot, aliases, row_policy_data_opt, modified_context, *builder, processed_stage); + convertAndFilterSourceStream( + header, modified_query_info, storage_snapshot_, aliases, row_policy_data_opt, context, *builder, processed_stage); } return builder; } QueryPlan ReadFromMerge::createPlanForTable( - const StorageSnapshotPtr & storage_snapshot, + const StorageSnapshotPtr & storage_snapshot_, SelectQueryInfo & modified_query_info, QueryProcessingStage::Enum processed_stage, UInt64 max_block_size, @@ -1065,7 +1112,7 @@ QueryPlan ReadFromMerge::createPlanForTable( auto storage_stage = storage->getQueryProcessingStage(modified_context, QueryProcessingStage::Complete, - storage_snapshot, + storage_snapshot_, modified_query_info); QueryPlan plan; @@ -1074,14 +1121,14 @@ QueryPlan ReadFromMerge::createPlanForTable( { /// If there are only virtual columns in query, you must request at least one other column. if (real_column_names.empty()) - real_column_names.push_back(ExpressionActions::getSmallestColumn(storage_snapshot->metadata->getColumns().getAllPhysical()).name); + real_column_names.push_back(ExpressionActions::getSmallestColumn(storage_snapshot_->metadata->getColumns().getAllPhysical()).name); StorageView * view = dynamic_cast(storage.get()); if (!view || allow_experimental_analyzer) { storage->read(plan, real_column_names, - storage_snapshot, + storage_snapshot_, modified_query_info, modified_context, processed_stage, @@ -1111,13 +1158,9 @@ QueryPlan ReadFromMerge::createPlanForTable( if (row_policy_data_opt) { - if (auto * source_step_with_filter = dynamic_cast((plan.getRootNode()->step.get()))) - { + if (auto * source_step_with_filter = dynamic_cast((plan.getRootNode()->step.get()))) row_policy_data_opt->addStorageFilter(source_step_with_filter); - } } - - applyFilters(plan); } else if (processed_stage > storage_stage || (allow_experimental_analyzer && processed_stage != QueryProcessingStage::FetchColumns)) { @@ -1259,7 +1302,6 @@ StorageMerge::StorageListWithLocks ReadFromMerge::getSelectedTables( if (!filter_by_database_virtual_column && !filter_by_table_virtual_column) return res; - auto filter_actions_dag = ActionsDAG::buildFilterActionsDAG(filter_nodes.nodes); if (!filter_actions_dag) return res; @@ -1380,6 +1422,7 @@ void StorageMerge::alter( params.apply(storage_metadata, local_context); DatabaseCatalog::instance().getDatabase(table_id.database_name)->alterTable(local_context, table_id, storage_metadata); setInMemoryMetadata(storage_metadata); + setVirtuals(createVirtuals()); } void ReadFromMerge::convertAndFilterSourceStream( @@ -1388,7 +1431,7 @@ void ReadFromMerge::convertAndFilterSourceStream( const StorageSnapshotPtr & snapshot, const Aliases & aliases, const RowPolicyDataOpt & row_policy_data_opt, - ContextMutablePtr local_context, + ContextPtr local_context, QueryPipelineBuilder & builder, QueryProcessingStage::Enum processed_stage) { @@ -1505,28 +1548,41 @@ bool ReadFromMerge::requestReadingInOrder(InputOrderInfoPtr order_info_) return true; } -void ReadFromMerge::applyFilters(const QueryPlan & plan) const +void ReadFromMerge::applyFilters(const QueryPlan & plan, const ActionDAGNodes & added_filter_nodes) const { - auto apply_filters = [this](ReadFromMergeTree & read_from_merge_tree) + auto apply_filters = [&added_filter_nodes](ReadFromMergeTree & read_from_merge_tree) { - size_t filters_dags_size = filter_dags.size(); - for (size_t i = 0; i < filters_dags_size; ++i) - read_from_merge_tree.addFilter(filter_dags[i], filter_nodes.nodes[i]); + for (const auto & node : added_filter_nodes.nodes) + read_from_merge_tree.addFilterFromParentStep(node); - read_from_merge_tree.applyFilters(); + read_from_merge_tree.SourceStepWithFilter::applyFilters(); return true; }; recursivelyApplyToReadingSteps(plan.getRootNode(), apply_filters); } -void ReadFromMerge::applyFilters() +void ReadFromMerge::applyFilters(ActionDAGNodes added_filter_nodes) { + filter_actions_dag = ActionsDAG::buildFilterActionsDAG(added_filter_nodes.nodes); + filterTablesAndCreateChildrenPlans(); for (const auto & child_plan : *child_plans) if (child_plan.plan.isInitialized()) - applyFilters(child_plan.plan); + applyFilters(child_plan.plan, added_filter_nodes); +} + +QueryPlanRawPtrs ReadFromMerge::getChildPlans() +{ + filterTablesAndCreateChildrenPlans(); + + QueryPlanRawPtrs plans; + for (auto & child_plan : *child_plans) + if (child_plan.plan.isInitialized()) + plans.push_back(&child_plan.plan); + + return plans; } IStorage::ColumnSizeByName StorageMerge::getColumnSizes() const @@ -1626,20 +1682,4 @@ void registerStorageMerge(StorageFactory & factory) }); } -NamesAndTypesList StorageMerge::getVirtuals() const -{ - NamesAndTypesList virtuals{ - {"_database", std::make_shared(std::make_shared())}, - {"_table", std::make_shared(std::make_shared())}}; - - auto first_table = getFirstTable([](auto && table) { return table; }); - if (first_table) - { - auto table_virtuals = first_table->getVirtuals(); - virtuals.insert(virtuals.end(), table_virtuals.begin(), table_virtuals.end()); - } - - return virtuals; -} - } diff --git a/src/Storages/StorageMerge.h b/src/Storages/StorageMerge.h index f5b6c3a7ca9..556649f622d 100644 --- a/src/Storages/StorageMerge.h +++ b/src/Storages/StorageMerge.h @@ -49,14 +49,14 @@ public: bool supportsSampling() const override { return true; } bool supportsFinal() const override { return true; } bool supportsSubcolumns() const override { return true; } - bool supportsPrewhere() const override { return true; } + bool supportsPrewhere() const override { return tableSupportsPrewhere(); } std::optional supportedPrewhereColumns() const override; - bool canMoveConditionsToPrewhere() const override; - QueryProcessingStage::Enum getQueryProcessingStage(ContextPtr, QueryProcessingStage::Enum, const StorageSnapshotPtr &, SelectQueryInfo &) const override; + StorageSnapshotPtr getStorageSnapshot(const StorageMetadataPtr & metadata_snapshot, ContextPtr) const override; + void read( QueryPlan & query_plan, const Names & column_names, @@ -118,11 +118,12 @@ private: template void forEachTable(F && func) const; - NamesAndTypesList getVirtuals() const override; ColumnSizeByName getColumnSizes() const override; ColumnsDescription getColumnsDescriptionFromSourceTables() const; + static VirtualColumnsDescription createVirtuals(); + bool tableSupportsPrewhere() const; template @@ -142,14 +143,14 @@ public: using DatabaseTablesIterators = std::vector; ReadFromMerge( + const Names & column_names_, + const SelectQueryInfo & query_info_, + const StorageSnapshotPtr & storage_snapshot_, + const ContextPtr & context_, Block common_header_, - Names all_column_names_, size_t max_block_size, size_t num_streams, StoragePtr storage, - StorageSnapshotPtr storage_snapshot, - const SelectQueryInfo & query_info_, - ContextMutablePtr context_, QueryProcessingStage::Enum processed_stage); void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override; @@ -159,12 +160,16 @@ public: /// Returns `false` if requested reading cannot be performed. bool requestReadingInOrder(InputOrderInfoPtr order_info_); - void applyFilters() override; + void applyFilters(ActionDAGNodes added_filter_nodes) override; + + QueryPlanRawPtrs getChildPlans() override; + + void updatePrewhereInfo(const PrewhereInfoPtr & prewhere_info_value) override; private: const size_t required_max_block_size; const size_t requested_num_streams; - const Block common_header; + Block common_header; StorageListWithLocks selected_tables; Names all_column_names; @@ -174,8 +179,6 @@ private: StoragePtr storage_merge; StorageSnapshotPtr merge_storage_snapshot; - SelectQueryInfo query_info; - ContextMutablePtr context; QueryProcessingStage::Enum common_processed_stage; InputOrderInfoPtr order_info; @@ -243,7 +246,7 @@ private: void filterTablesAndCreateChildrenPlans(); - void applyFilters(const QueryPlan & plan) const; + void applyFilters(const QueryPlan & plan, const ActionDAGNodes & added_filter_nodes) const; QueryPlan createPlanForTable( const StorageSnapshotPtr & storage_snapshot, @@ -265,7 +268,6 @@ private: const Aliases & aliases, const RowPolicyDataOpt & row_policy_data_opt, const StorageWithLockAndName & storage_with_lock, - ContextMutablePtr modified_context, bool concat_streams = false) const; static void convertAndFilterSourceStream( @@ -274,7 +276,7 @@ private: const StorageSnapshotPtr & snapshot, const Aliases & aliases, const RowPolicyDataOpt & row_policy_data_opt, - ContextMutablePtr context, + ContextPtr context, QueryPipelineBuilder & builder, QueryProcessingStage::Enum processed_stage); @@ -282,6 +284,8 @@ private: ContextPtr query_context, bool filter_by_database_virtual_column, bool filter_by_table_virtual_column) const; + + // static VirtualColumnsDescription createVirtuals(StoragePtr first_table); }; } diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index 678535da732..663e7f435b7 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -5,6 +5,7 @@ #include #include +#include #include #include #include @@ -36,7 +37,6 @@ #include #include #include -#include #include #include #include @@ -96,12 +96,11 @@ StorageMergeTree::StorageMergeTree( const StorageID & table_id_, const String & relative_data_path_, const StorageInMemoryMetadata & metadata_, - bool attach, + LoadingStrictnessLevel mode, ContextMutablePtr context_, const String & date_column_name, const MergingParams & merging_params_, - std::unique_ptr storage_settings_, - bool has_force_restore_data_flag) + std::unique_ptr storage_settings_) : MergeTreeData( table_id_, metadata_, @@ -110,17 +109,17 @@ StorageMergeTree::StorageMergeTree( merging_params_, std::move(storage_settings_), false, /// require_part_metadata - attach) + mode) , reader(*this) , writer(*this) , merger_mutator(*this) { - initializeDirectoriesAndFormatVersion(relative_data_path_, attach, date_column_name); + initializeDirectoriesAndFormatVersion(relative_data_path_, LoadingStrictnessLevel::ATTACH <= mode, date_column_name); - loadDataParts(has_force_restore_data_flag, std::nullopt); + loadDataParts(LoadingStrictnessLevel::FORCE_RESTORE <= mode, std::nullopt); - if (!attach && !getDataPartsForInternalUsage().empty() && !isStaticStorage()) + if (mode < LoadingStrictnessLevel::ATTACH && !getDataPartsForInternalUsage().empty() && !isStaticStorage()) throw Exception(ErrorCodes::INCORRECT_DATA, "Data directory for table already containing data parts - probably " "it was unclean DROP table or manual intervention. " @@ -539,6 +538,8 @@ void StorageMergeTree::updateMutationEntriesErrors(FutureMergedMutatedPartPtr re Int64 sources_data_version = result_part->parts.at(0)->info.getDataVersion(); Int64 result_data_version = result_part->part_info.getDataVersion(); + auto & failed_part = result_part->parts.at(0); + if (sources_data_version != result_data_version) { std::lock_guard lock(currently_processing_in_background_mutex); @@ -556,14 +557,21 @@ void StorageMergeTree::updateMutationEntriesErrors(FutureMergedMutatedPartPtr re entry.latest_failed_part_info = MergeTreePartInfo(); entry.latest_fail_time = 0; entry.latest_fail_reason.clear(); + if (static_cast(result_part->part_info.mutation) == it->first) + mutation_backoff_policy.removePartFromFailed(failed_part->name); } } else { - entry.latest_failed_part = result_part->parts.at(0)->name; - entry.latest_failed_part_info = result_part->parts.at(0)->info; + entry.latest_failed_part = failed_part->name; + entry.latest_failed_part_info = failed_part->info; entry.latest_fail_time = time(nullptr); entry.latest_fail_reason = exception_message; + + if (static_cast(result_part->part_info.mutation) == it->first) + { + mutation_backoff_policy.addPartMutationFailure(failed_part->name, getSettings()->max_postpone_time_for_failed_mutations_ms); + } } } } @@ -834,6 +842,8 @@ CancellationCode StorageMergeTree::killMutation(const String & mutation_id) } } + mutation_backoff_policy.resetMutationFailures(); + if (!to_kill) return CancellationCode::NotFound; @@ -1218,6 +1228,12 @@ MergeMutateSelectedEntryPtr StorageMergeTree::selectPartsToMutate( TransactionID first_mutation_tid = mutations_begin_it->second.tid; MergeTreeTransactionPtr txn; + if (!mutation_backoff_policy.partCanBeMutated(part->name)) + { + LOG_DEBUG(log, "According to exponential backoff policy, do not perform mutations for the part {} yet. Put it aside.", part->name); + continue; + } + if (!first_mutation_tid.isPrehistoric()) { @@ -2298,12 +2314,11 @@ std::optional StorageMergeTree::checkDataNext(DataValidationTasksPt { /// If the checksums file is not present, calculate the checksums and write them to disk. static constexpr auto checksums_path = "checksums.txt"; - bool noop; if (part->isStoredOnDisk() && !part->getDataPartStorage().exists(checksums_path)) { try { - auto calculated_checksums = checkDataPart(part, false, noop, /* is_cancelled */[]{ return false; }, /* throw_on_broken_projection */true); + auto calculated_checksums = checkDataPart(part, false); calculated_checksums.checkEqual(part->checksums, true); auto & part_mutable = const_cast(*part); @@ -2324,7 +2339,7 @@ std::optional StorageMergeTree::checkDataNext(DataValidationTasksPt { try { - checkDataPart(part, true, noop, /* is_cancelled */[]{ return false; }, /* throw_on_broken_projection */true); + checkDataPart(part, true); return CheckResult(part->name, true, ""); } catch (...) @@ -2394,19 +2409,21 @@ void StorageMergeTree::attachRestoredParts(MutableDataPartsVector && parts) } -std::map StorageMergeTree::getAlterMutationCommandsForPart(const DataPartPtr & part) const +MutationCommands StorageMergeTree::getAlterMutationCommandsForPart(const DataPartPtr & part) const { std::lock_guard lock(currently_processing_in_background_mutex); UInt64 part_data_version = part->info.getDataVersion(); - std::map result; + MutationCommands result; for (const auto & [mutation_version, entry] : current_mutations_by_version | std::views::reverse) { - if (mutation_version > part_data_version) - result[mutation_version] = entry.commands; - else + if (mutation_version <= part_data_version) break; + + for (const auto & command : entry.commands | std::views::reverse) + if (AlterConversions::supportsMutationCommandType(command.type)) + result.emplace_back(command); } return result; diff --git a/src/Storages/StorageMergeTree.h b/src/Storages/StorageMergeTree.h index 359fa1d262d..c384a391291 100644 --- a/src/Storages/StorageMergeTree.h +++ b/src/Storages/StorageMergeTree.h @@ -37,12 +37,11 @@ public: const StorageID & table_id_, const String & relative_data_path_, const StorageInMemoryMetadata & metadata, - bool attach, + LoadingStrictnessLevel mode, ContextMutablePtr context_, const String & date_column_name, const MergingParams & merging_params_, - std::unique_ptr settings_, - bool has_force_restore_data_flag); + std::unique_ptr settings_); void startup() override; void shutdown(bool is_drop) override; @@ -308,7 +307,7 @@ private: }; protected: - std::map getAlterMutationCommandsForPart(const DataPartPtr & part) const override; + MutationCommands getAlterMutationCommandsForPart(const DataPartPtr & part) const override; }; } diff --git a/src/Storages/StorageMergeTreeIndex.cpp b/src/Storages/StorageMergeTreeIndex.cpp new file mode 100644 index 00000000000..5f89849e920 --- /dev/null +++ b/src/Storages/StorageMergeTreeIndex.cpp @@ -0,0 +1,363 @@ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; + extern const int NO_SUCH_COLUMN_IN_TABLE; + extern const int NOT_IMPLEMENTED; +} + +class MergeTreeIndexSource : public ISource, WithContext +{ +public: + MergeTreeIndexSource( + Block header_, + Block index_header_, + MergeTreeData::DataPartsVector data_parts_, + ContextPtr context_, + bool with_marks_) + : ISource(header_) + , WithContext(context_) + , header(std::move(header_)) + , index_header(std::move(index_header_)) + , data_parts(std::move(data_parts_)) + , with_marks(with_marks_) + { + } + + String getName() const override { return "MergeTreeIndex"; } + +protected: + Chunk generate() override + { + if (part_index >= data_parts.size()) + return {}; + + const auto & part = data_parts[part_index]; + const auto & index_granularity = part->index_granularity; + + std::shared_ptr marks_loader; + if (with_marks && isCompactPart(part)) + marks_loader = createMarksLoader(part, MergeTreeDataPartCompact::DATA_FILE_NAME, part->getColumns().size()); + + size_t num_columns = header.columns(); + size_t num_rows = index_granularity.getMarksCount(); + + const auto & part_name_column = StorageMergeTreeIndex::part_name_column; + const auto & mark_number_column = StorageMergeTreeIndex::mark_number_column; + const auto & rows_in_granule_column = StorageMergeTreeIndex::rows_in_granule_column; + + const auto & index = part->getIndex(); + Columns result_columns(num_columns); + for (size_t pos = 0; pos < num_columns; ++pos) + { + const auto & column_name = header.getByPosition(pos).name; + const auto & column_type = header.getByPosition(pos).type; + + if (index_header.has(column_name)) + { + size_t index_position = index_header.getPositionByName(column_name); + result_columns[pos] = index[index_position]; + } + else if (column_name == part_name_column.name) + { + auto column = column_type->createColumnConst(num_rows, part->name); + result_columns[pos] = column->convertToFullColumnIfConst(); + } + else if (column_name == mark_number_column.name) + { + auto column = column_type->createColumn(); + auto & data = assert_cast(*column).getData(); + + data.resize(num_rows); + std::iota(data.begin(), data.end(), 0); + + result_columns[pos] = std::move(column); + } + else if (column_name == rows_in_granule_column.name) + { + auto column = column_type->createColumn(); + auto & data = assert_cast(*column).getData(); + + data.resize(num_rows); + for (size_t i = 0; i < num_rows; ++i) + data[i] = index_granularity.getMarkRows(i); + + result_columns[pos] = std::move(column); + } + else if (auto [first, second] = Nested::splitName(column_name, true); with_marks && second == "mark") + { + result_columns[pos] = fillMarks(part, marks_loader, *column_type, first); + } + else + { + throw Exception(ErrorCodes::NO_SUCH_COLUMN_IN_TABLE, "No such column {}", column_name); + } + } + + ++part_index; + return Chunk(std::move(result_columns), num_rows); + } + +private: + std::shared_ptr createMarksLoader(const MergeTreeDataPartPtr & part, const String & prefix_name, size_t num_columns) + { + auto info_for_read = std::make_shared(part, std::make_shared()); + auto local_context = getContext(); + + return std::make_shared( + info_for_read, + local_context->getMarkCache().get(), + info_for_read->getIndexGranularityInfo().getMarksFilePath(prefix_name), + info_for_read->getMarksCount(), + info_for_read->getIndexGranularityInfo(), + /*save_marks_in_cache=*/ false, + local_context->getReadSettings(), + /*load_marks_threadpool=*/ nullptr, + num_columns); + } + + ColumnPtr fillMarks( + MergeTreeDataPartPtr part, + std::shared_ptr marks_loader, + const IDataType & data_type, + const String & column_name) + { + size_t col_idx = 0; + bool has_marks_in_part = false; + size_t num_rows = part->index_granularity.getMarksCount(); + + if (isWidePart(part)) + { + if (auto stream_name = part->getStreamNameOrHash(column_name, part->checksums)) + { + col_idx = 0; + has_marks_in_part = true; + marks_loader = createMarksLoader(part, *stream_name, /*num_columns=*/ 1); + } + } + else if (isCompactPart(part)) + { + auto unescaped_name = unescapeForFileName(column_name); + if (auto col_idx_opt = part->getColumnPosition(unescaped_name)) + { + col_idx = *col_idx_opt; + has_marks_in_part = true; + } + } + else + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Parts with type {} are not supported", part->getTypeName()); + } + + if (!has_marks_in_part) + { + auto column = data_type.createColumnConstWithDefaultValue(num_rows); + return column->convertToFullColumnIfConst(); + } + + auto compressed = ColumnUInt64::create(num_rows); + auto uncompressed = ColumnUInt64::create(num_rows); + + auto & compressed_data = compressed->getData(); + auto & uncompressed_data = uncompressed->getData(); + + for (size_t i = 0; i < num_rows; ++i) + { + auto mark = marks_loader->getMark(i, col_idx); + + compressed_data[i] = mark.offset_in_compressed_file; + uncompressed_data[i] = mark.offset_in_decompressed_block; + } + + auto compressed_nullable = ColumnNullable::create(std::move(compressed), ColumnUInt8::create(num_rows, 0)); + auto uncompressed_nullable = ColumnNullable::create(std::move(uncompressed), ColumnUInt8::create(num_rows, 0)); + + return ColumnTuple::create(Columns{std::move(compressed_nullable), std::move(uncompressed_nullable)}); + } + + Block header; + Block index_header; + MergeTreeData::DataPartsVector data_parts; + bool with_marks; + + size_t part_index = 0; +}; + +const ColumnWithTypeAndName StorageMergeTreeIndex::part_name_column{std::make_shared(), "part_name"}; +const ColumnWithTypeAndName StorageMergeTreeIndex::mark_number_column{std::make_shared(), "mark_number"}; +const ColumnWithTypeAndName StorageMergeTreeIndex::rows_in_granule_column{std::make_shared(), "rows_in_granule"}; +const Block StorageMergeTreeIndex::virtuals_sample_block{part_name_column, mark_number_column, rows_in_granule_column}; + +StorageMergeTreeIndex::StorageMergeTreeIndex( + const StorageID & table_id_, + const StoragePtr & source_table_, + const ColumnsDescription & columns, + bool with_marks_) + : IStorage(table_id_) + , source_table(source_table_) + , with_marks(with_marks_) +{ + const auto * merge_tree = dynamic_cast(source_table.get()); + if (!merge_tree) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Storage MergeTreeIndex expected MergeTree table, got: {}", source_table->getName()); + + data_parts = merge_tree->getDataPartsVectorForInternalUsage(); + key_sample_block = merge_tree->getInMemoryMetadataPtr()->getPrimaryKey().sample_block; + + StorageInMemoryMetadata storage_metadata; + storage_metadata.setColumns(columns); + setInMemoryMetadata(storage_metadata); +} + +class ReadFromMergeTreeIndex : public SourceStepWithFilter +{ +public: + std::string getName() const override { return "ReadFromMergeTreeIndex"; } + void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override; + + ReadFromMergeTreeIndex( + const Names & column_names_, + const SelectQueryInfo & query_info_, + const StorageSnapshotPtr & storage_snapshot_, + const ContextPtr & context_, + Block sample_block, + std::shared_ptr storage_) + : SourceStepWithFilter( + DataStream{.header = std::move(sample_block)}, + column_names_, + query_info_, + storage_snapshot_, + context_) + , storage(std::move(storage_)) + , log(&Poco::Logger::get("StorageMergeTreeIndex")) + { + } + + void applyFilters(ActionDAGNodes added_filter_nodes) override; + +private: + std::shared_ptr storage; + Poco::Logger * log; + const ActionsDAG::Node * predicate = nullptr; +}; + +void ReadFromMergeTreeIndex::applyFilters(ActionDAGNodes added_filter_nodes) +{ + filter_actions_dag = ActionsDAG::buildFilterActionsDAG(added_filter_nodes.nodes); + if (filter_actions_dag) + predicate = filter_actions_dag->getOutputs().at(0); +} + +void StorageMergeTreeIndex::read( + QueryPlan & query_plan, + const Names & column_names, + const StorageSnapshotPtr & storage_snapshot, + SelectQueryInfo & query_info, + ContextPtr context, + QueryProcessingStage::Enum, + size_t /*max_block_size*/, + size_t /*num_streams*/) +{ + const auto & storage_columns = source_table->getInMemoryMetadataPtr()->getColumns(); + Names columns_from_storage; + + for (const auto & column_name : column_names) + { + if (storage_columns.hasColumnOrSubcolumn(GetColumnsOptions::All, column_name)) + { + columns_from_storage.push_back(column_name); + continue; + } + + if (with_marks) + { + auto [first, second] = Nested::splitName(column_name, true); + auto unescaped_name = unescapeForFileName(first); + + if (second == "mark" && storage_columns.hasColumnOrSubcolumn(GetColumnsOptions::All, unescapeForFileName(unescaped_name))) + { + columns_from_storage.push_back(unescaped_name); + continue; + } + } + } + + context->checkAccess(AccessType::SELECT, source_table->getStorageID(), columns_from_storage); + + auto sample_block = storage_snapshot->getSampleBlockForColumns(column_names); + + auto this_ptr = std::static_pointer_cast(shared_from_this()); + + auto reading = std::make_unique( + column_names, query_info, storage_snapshot, + std::move(context), std::move(sample_block), std::move(this_ptr)); + + query_plan.addStep(std::move(reading)); +} + +void ReadFromMergeTreeIndex::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) +{ + auto filtered_parts = storage->getFilteredDataParts(predicate, context); + + LOG_DEBUG(log, "Reading index{}from {} parts of table {}", + storage->with_marks ? " with marks " : " ", + filtered_parts.size(), + storage->source_table->getStorageID().getNameForLogs()); + + pipeline.init(Pipe(std::make_shared(getOutputStream().header, storage->key_sample_block, std::move(filtered_parts), context, storage->with_marks))); +} + +MergeTreeData::DataPartsVector StorageMergeTreeIndex::getFilteredDataParts(const ActionsDAG::Node * predicate, const ContextPtr & context) const +{ + if (!predicate) + return data_parts; + + auto all_part_names = ColumnString::create(); + for (const auto & part : data_parts) + all_part_names->insert(part->name); + + Block filtered_block{{std::move(all_part_names), std::make_shared(), part_name_column.name}}; + VirtualColumnUtils::filterBlockWithPredicate(predicate, filtered_block, context); + + if (!filtered_block.rows()) + return {}; + + auto part_names = filtered_block.getByPosition(0).column; + const auto & part_names_str = assert_cast(*part_names); + + HashSet part_names_set; + for (size_t i = 0; i < part_names_str.size(); ++i) + part_names_set.insert(part_names_str.getDataAt(i)); + + MergeTreeData::DataPartsVector filtered_parts; + for (const auto & part : data_parts) + if (part_names_set.has(part->name)) + filtered_parts.push_back(part); + + return filtered_parts; +} + +} diff --git a/src/Storages/StorageMergeTreeIndex.h b/src/Storages/StorageMergeTreeIndex.h new file mode 100644 index 00000000000..a1fb61d5a56 --- /dev/null +++ b/src/Storages/StorageMergeTreeIndex.h @@ -0,0 +1,48 @@ +#pragma once + +#include +#include + +namespace DB +{ + +/// Internal temporary storage for table function mergeTreeIndex(...) +class StorageMergeTreeIndex final : public IStorage +{ +public: + static const ColumnWithTypeAndName part_name_column; + static const ColumnWithTypeAndName mark_number_column; + static const ColumnWithTypeAndName rows_in_granule_column; + static const Block virtuals_sample_block; + + StorageMergeTreeIndex( + const StorageID & table_id_, + const StoragePtr & source_table_, + const ColumnsDescription & columns, + bool with_marks_); + + void read( + QueryPlan & query_plan, + const Names & column_names, + const StorageSnapshotPtr & storage_snapshot, + SelectQueryInfo & query_info, + ContextPtr context, + QueryProcessingStage::Enum processing_stage, + size_t max_block_size, + size_t num_streams) override; + + String getName() const override { return "MergeTreeIndex"; } + +private: + friend class ReadFromMergeTreeIndex; + + MergeTreeData::DataPartsVector getFilteredDataParts(const ActionsDAG::Node * predicate, const ContextPtr & context) const; + + StoragePtr source_table; + bool with_marks; + + MergeTreeData::DataPartsVector data_parts; + Block key_sample_block; +}; + +} diff --git a/src/Storages/StorageProxy.h b/src/Storages/StorageProxy.h index 18a1f9086ae..a8e7fd528dd 100644 --- a/src/Storages/StorageProxy.h +++ b/src/Storages/StorageProxy.h @@ -30,7 +30,6 @@ public: bool hasEvenlyDistributedRead() const override { return getNested()->hasEvenlyDistributedRead(); } ColumnSizeByName getColumnSizes() const override { return getNested()->getColumnSizes(); } - NamesAndTypesList getVirtuals() const override { return getNested()->getVirtuals(); } QueryProcessingStage::Enum getQueryProcessingStage( ContextPtr context, diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 8e1598a1eef..73c7e484bcb 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -188,6 +188,7 @@ namespace ErrorCodes extern const int CANNOT_BACKUP_TABLE; extern const int SUPPORT_IS_DISABLED; extern const int FAULT_INJECTED; + extern const int CANNOT_FORGET_PARTITION; } namespace ActionLocks @@ -288,7 +289,7 @@ static MergeTreePartInfo makeDummyDropRangeForMovePartitionOrAttachPartitionFrom StorageReplicatedMergeTree::StorageReplicatedMergeTree( const String & zookeeper_path_, const String & replica_name_, - bool attach, + LoadingStrictnessLevel mode, const StorageID & table_id_, const String & relative_data_path_, const StorageInMemoryMetadata & metadata_, @@ -296,7 +297,6 @@ StorageReplicatedMergeTree::StorageReplicatedMergeTree( const String & date_column_name, const MergingParams & merging_params_, std::unique_ptr settings_, - bool has_force_restore_data_flag, RenamingRestrictions renaming_restrictions_, bool need_check_structure) : MergeTreeData(table_id_, @@ -306,10 +306,10 @@ StorageReplicatedMergeTree::StorageReplicatedMergeTree( merging_params_, std::move(settings_), true, /// require_part_metadata - attach, + mode, [this] (const std::string & name) { enqueuePartForCheck(name); }) , zookeeper_name(zkutil::extractZooKeeperName(zookeeper_path_)) - , zookeeper_path(zkutil::extractZooKeeperPath(zookeeper_path_, /* check_starts_with_slash */ !attach, log.load())) + , zookeeper_path(zkutil::extractZooKeeperPath(zookeeper_path_, /* check_starts_with_slash */ mode <= LoadingStrictnessLevel::CREATE, log.load())) , replica_name(replica_name_) , replica_path(fs::path(zookeeper_path) / "replicas" / replica_name_) , reader(*this) @@ -327,7 +327,7 @@ StorageReplicatedMergeTree::StorageReplicatedMergeTree( , replicated_fetches_throttler(std::make_shared(getSettings()->max_replicated_fetches_network_bandwidth, getContext()->getReplicatedFetchesThrottler())) , replicated_sends_throttler(std::make_shared(getSettings()->max_replicated_sends_network_bandwidth, getContext()->getReplicatedSendsThrottler())) { - initializeDirectoriesAndFormatVersion(relative_data_path_, attach, date_column_name); + initializeDirectoriesAndFormatVersion(relative_data_path_, LoadingStrictnessLevel::ATTACH <= mode, date_column_name); /// We create and deactivate all tasks for consistency. /// They all will be scheduled and activated by the restarting thread. queue_updating_task = getContext()->getSchedulePool().createTask( @@ -379,7 +379,7 @@ StorageReplicatedMergeTree::StorageReplicatedMergeTree( } catch (...) { - if (!attach) + if (mode < LoadingStrictnessLevel::ATTACH) { dropIfEmpty(); throw; @@ -395,7 +395,7 @@ StorageReplicatedMergeTree::StorageReplicatedMergeTree( std::optional> expected_parts_on_this_replica; bool skip_sanity_checks = false; /// It does not make sense for CREATE query - if (attach) + if (LoadingStrictnessLevel::ATTACH <= mode) { try { @@ -416,7 +416,7 @@ StorageReplicatedMergeTree::StorageReplicatedMergeTree( "Skipping the limits on severity of changes to data parts and columns (flag {}/flags/force_restore_data).", replica_path); } - else if (has_force_restore_data_flag) + else if (LoadingStrictnessLevel::FORCE_RESTORE <= mode) { skip_sanity_checks = true; @@ -443,7 +443,7 @@ StorageReplicatedMergeTree::StorageReplicatedMergeTree( loadDataParts(skip_sanity_checks, expected_parts_on_this_replica); - if (attach) + if (LoadingStrictnessLevel::ATTACH <= mode) { /// Provide better initial value of merge_selecting_sleep_ms on server startup auto settings = getSettings(); @@ -458,7 +458,7 @@ StorageReplicatedMergeTree::StorageReplicatedMergeTree( if (!current_zookeeper) { - if (!attach) + if (mode < LoadingStrictnessLevel::ATTACH) { dropIfEmpty(); throw Exception(ErrorCodes::NO_ZOOKEEPER, "Can't create replicated table without ZooKeeper"); @@ -474,7 +474,7 @@ StorageReplicatedMergeTree::StorageReplicatedMergeTree( } } - if (attach) + if (LoadingStrictnessLevel::ATTACH <= mode) { LOG_INFO(log, "Table will be in readonly mode until initialization is finished"); attach_thread.emplace(*this); @@ -665,6 +665,34 @@ void StorageReplicatedMergeTree::waitMutationToFinishOnReplicas( } } +namespace +{ + +std::vector getAncestors(const String & path) +{ + auto full_path = fs::path(path); + if (full_path.empty()) + return {}; + + std::vector result; + fs::path ancestor = *full_path.begin(); + for (auto it = ++full_path.begin(); it != full_path.end(); it++) + { + /// If there is a directory separator after the last file-name in the path, + /// the last element before the end iterator is an empty element. + /// We do not what to create path with the / at the end + if (!it->empty()) + { + ancestor = ancestor / *it; + result.push_back(ancestor); + } + } + + return result; +} + +} + void StorageReplicatedMergeTree::createNewZooKeeperNodes() { auto zookeeper = getZooKeeper(); @@ -687,44 +715,14 @@ void StorageReplicatedMergeTree::createNewZooKeeperNodes() const auto settings = getSettings(); if (settings->allow_remote_fs_zero_copy_replication) { - auto disks = getStoragePolicy()->getDisks(); - std::set disk_types_for_zero_copy; - for (auto & disk : disks) + for (const auto & zero_copy_locks_root : getZookeeperZeroCopyLockPaths()) { - if (!disk->supportZeroCopyReplication()) - continue; - - disk_types_for_zero_copy.insert(disk->getDataSourceDescription().toString()); - } - - const auto table_shared_id_ = getTableSharedID(); - - if (!disk_types_for_zero_copy.empty()) - { - /// create all ancestors of remote_fs_zero_copy_zookeeper_path - auto remote_fs_zero_copy_zookeeper_path = fs::path(settings->remote_fs_zero_copy_zookeeper_path.toString()); - fs::path ancestor = *remote_fs_zero_copy_zookeeper_path.begin(); - for (auto it = ++remote_fs_zero_copy_zookeeper_path.begin(); it != remote_fs_zero_copy_zookeeper_path.end(); it++) + for (const auto & ancestor : getAncestors(zero_copy_locks_root)) { - /// If there is a directory separator after the last file-name in the path, - /// the last element before the end iterator is an empty element. - /// We do not what to create path with the / at the end - if (!it->empty()) - { - ancestor = ancestor / *it; - futures.push_back(zookeeper->asyncTryCreateNoThrow(ancestor, String(), zkutil::CreateMode::Persistent)); - } + futures.push_back(zookeeper->asyncTryCreateNoThrow(ancestor, String(), zkutil::CreateMode::Persistent)); } } - for (const auto & disk_type : disk_types_for_zero_copy) - { - auto zero_copy = fmt::format("zero_copy_{}", disk_type); - auto zero_copy_path = fs::path(settings->remote_fs_zero_copy_zookeeper_path.toString()) / zero_copy; - futures.push_back(zookeeper->asyncTryCreateNoThrow(zero_copy_path, String(), zkutil::CreateMode::Persistent)); - futures.push_back(zookeeper->asyncTryCreateNoThrow(zero_copy_path / table_shared_id_, String(), zkutil::CreateMode::Persistent)); - } - futures.push_back(zookeeper->asyncTryCreateNoThrow(zookeeper_path + "/zero_copy_s3", String(), zkutil::CreateMode::Persistent)); futures.push_back(zookeeper->asyncTryCreateNoThrow(zookeeper_path + "/zero_copy_s3/shared", String(), zkutil::CreateMode::Persistent)); futures.push_back(zookeeper->asyncTryCreateNoThrow(zookeeper_path + "/zero_copy_hdfs", String(), zkutil::CreateMode::Persistent)); @@ -1067,6 +1065,63 @@ zkutil::ZooKeeperPtr StorageReplicatedMergeTree::getZooKeeperIfTableShutDown() c return maybe_new_zookeeper; } +std::vector StorageReplicatedMergeTree::getZookeeperZeroCopyLockPaths() const +{ + const auto settings = getSettings(); + if (!settings->allow_remote_fs_zero_copy_replication) + { + return {}; + } + + const auto & disks = getStoragePolicy()->getDisks(); + std::set disk_types_with_zero_copy; + for (const auto & disk : disks) + { + if (!disk->supportZeroCopyReplication()) + continue; + + disk_types_with_zero_copy.insert(disk->getDataSourceDescription().toString()); + } + + const auto actual_table_shared_id = getTableSharedID(); + + std::vector result; + result.reserve(disk_types_with_zero_copy.size()); + + for (const auto & disk_type: disk_types_with_zero_copy) + { + auto zero_copy = fmt::format("zero_copy_{}", disk_type); + auto zero_copy_path = fs::path(settings->remote_fs_zero_copy_zookeeper_path.toString()) / zero_copy; + + result.push_back(zero_copy_path / actual_table_shared_id); + } + + return result; +} + +void StorageReplicatedMergeTree::dropZookeeperZeroCopyLockPaths(zkutil::ZooKeeperPtr zookeeper, std::vector zero_copy_locks_paths, + LoggerPtr logger) +{ + for (const auto & zero_copy_locks_root : zero_copy_locks_paths) + { + auto code = zookeeper->tryRemove(zero_copy_locks_root); + if (code == Coordination::Error::ZNOTEMPTY) + { + LOG_WARNING(logger, "Zero copy locks are not empty for {}. There are some lost locks inside." + "Removing them all.", zero_copy_locks_root); + zookeeper->tryRemoveRecursive(zero_copy_locks_root); + } + else if (code == Coordination::Error::ZNONODE) + { + LOG_INFO(logger, "Zero copy locks directory {} is absent on ZooKeeper.", zero_copy_locks_root); + } + else + { + chassert(code == Coordination::Error::ZOK); + } + } +} + void StorageReplicatedMergeTree::drop() { /// There is also the case when user has configured ClickHouse to wrong ZooKeeper cluster @@ -1098,6 +1153,11 @@ void StorageReplicatedMergeTree::drop() loadOutdatedDataParts(/*is_async=*/ false); } + + /// getZookeeperZeroCopyLockPaths has to be called before dropAllData + /// otherwise table_shared_id is unknown + auto zero_copy_locks_paths = getZookeeperZeroCopyLockPaths(); + dropAllData(); if (maybe_has_metadata_in_zookeeper) @@ -1114,12 +1174,18 @@ void StorageReplicatedMergeTree::drop() if (lost_part_count > 0) LOG_INFO(log, "Dropping table with non-zero lost_part_count equal to {}", lost_part_count); } - dropReplica(zookeeper, zookeeper_path, replica_name, log.load(), getSettings(), &has_metadata_in_zookeeper); + + bool last_replica_dropped = dropReplica(zookeeper, zookeeper_path, replica_name, log.load(), getSettings(), &has_metadata_in_zookeeper); + if (last_replica_dropped) + { + dropZookeeperZeroCopyLockPaths(zookeeper, zero_copy_locks_paths, log.load()); + } } } -void StorageReplicatedMergeTree::dropReplica(zkutil::ZooKeeperPtr zookeeper, const String & zookeeper_path, const String & replica, - LoggerPtr logger, MergeTreeSettingsPtr table_settings, std::optional * has_metadata_out) + +bool StorageReplicatedMergeTree::dropReplica(zkutil::ZooKeeperPtr zookeeper, const String & zookeeper_path, const String & replica, + LoggerPtr logger, MergeTreeSettingsPtr table_settings, std::optional * has_metadata_out) { if (zookeeper->expired()) throw Exception(ErrorCodes::TABLE_WAS_NOT_DROPPED, "Table was not dropped because ZooKeeper session has expired."); @@ -1136,7 +1202,7 @@ void StorageReplicatedMergeTree::dropReplica(zkutil::ZooKeeperPtr zookeeper, con if (!zookeeper->exists(remote_replica_path)) { LOG_INFO(logger, "Removing replica {} does not exist", remote_replica_path); - return; + return false; } { @@ -1191,7 +1257,7 @@ void StorageReplicatedMergeTree::dropReplica(zkutil::ZooKeeperPtr zookeeper, con /// Check that `zookeeper_path` exists: it could have been deleted by another replica after execution of previous line. Strings replicas; if (Coordination::Error::ZOK != zookeeper->tryGetChildren(zookeeper_path + "/replicas", replicas) || !replicas.empty()) - return; + return false; LOG_INFO(logger, "{} is the last replica, will remove table", remote_replica_path); @@ -1220,10 +1286,12 @@ void StorageReplicatedMergeTree::dropReplica(zkutil::ZooKeeperPtr zookeeper, con if (code == Coordination::Error::ZNONODE || code == Coordination::Error::ZNODEEXISTS) { LOG_WARNING(logger, "Table {} is already started to be removing by another replica right now", remote_replica_path); + return false; } else if (code == Coordination::Error::ZNOTEMPTY) { LOG_WARNING(logger, "Another replica was suddenly created, will keep the table {}", remote_replica_path); + return false; } else if (code != Coordination::Error::ZOK) { @@ -1235,9 +1303,12 @@ void StorageReplicatedMergeTree::dropReplica(zkutil::ZooKeeperPtr zookeeper, con LOG_INFO(logger, "Removing table {} (this might take several minutes)", zookeeper_path); removeTableNodesFromZooKeeper(zookeeper, zookeeper_path, metadata_drop_lock, logger); } + + return true; } -void StorageReplicatedMergeTree::dropReplica(const String & drop_zookeeper_path, const String & drop_replica, LoggerPtr logger) + +bool StorageReplicatedMergeTree::dropReplica(const String & drop_zookeeper_path, const String & drop_replica, LoggerPtr logger) { zkutil::ZooKeeperPtr zookeeper = getZooKeeperIfTableShutDown(); @@ -1247,7 +1318,7 @@ void StorageReplicatedMergeTree::dropReplica(const String & drop_zookeeper_path, if (zookeeper->exists(drop_zookeeper_path + "/replicas/" + drop_replica + "/is_active")) throw Exception(ErrorCodes::TABLE_WAS_NOT_DROPPED, "Can't drop replica: {}, because it's active", drop_replica); - dropReplica(zookeeper, drop_zookeeper_path, drop_replica, logger); + return dropReplica(zookeeper, drop_zookeeper_path, drop_replica, logger); } @@ -1300,7 +1371,7 @@ bool StorageReplicatedMergeTree::removeTableNodesFromZooKeeper(zkutil::ZooKeeper ops.emplace_back(zkutil::makeRemoveRequest(metadata_drop_lock->getPath(), -1)); ops.emplace_back(zkutil::makeRemoveRequest(fs::path(zookeeper_path) / "dropped", -1)); ops.emplace_back(zkutil::makeRemoveRequest(zookeeper_path, -1)); - code = zookeeper->tryMulti(ops, responses); + code = zookeeper->tryMulti(ops, responses, /* check_session_valid */ true); if (code == Coordination::Error::ZNONODE) { @@ -1847,7 +1918,7 @@ MergeTreeData::DataPartsVector StorageReplicatedMergeTree::checkPartChecksumsAnd Coordination::SimpleFaultInjection fault(getSettings()->fault_probability_before_part_commit, getSettings()->fault_probability_after_part_commit, "part commit"); ThreadFuzzer::maybeInjectSleep(); - e = zookeeper->tryMulti(ops, responses); + e = zookeeper->tryMulti(ops, responses, /* check_session_valid */ true); } if (e == Coordination::Error::ZOK) { @@ -2050,7 +2121,7 @@ bool StorageReplicatedMergeTree::executeFetch(LogEntry & entry, bool need_to_che if (entry.quorum) { if (entry.type != LogEntry::GET_PART) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: log entry with quorum but type is not GET_PART"); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Log entry with quorum but type is not GET_PART"); LOG_DEBUG(log, "No active replica has part {} which needs to be written with quorum. Will try to mark that quorum as failed.", entry.new_part_name); @@ -2113,7 +2184,7 @@ bool StorageReplicatedMergeTree::executeFetch(LogEntry & entry, bool need_to_che auto part_info = MergeTreePartInfo::fromPartName(entry.new_part_name, format_version); if (part_info.min_block != part_info.max_block) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: log entry with quorum for part covering more than one block number"); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Log entry with quorum for part covering more than one block number"); ops.emplace_back(zkutil::makeCreateRequest( fs::path(zookeeper_path) / "quorum" / "failed_parts" / entry.new_part_name, @@ -2904,7 +2975,7 @@ void StorageReplicatedMergeTree::cloneReplica(const String & source_replica, Coo /// Check that log pointer of source replica didn't changed while we read queue entries ops.push_back(zkutil::makeCheckRequest(fs::path(source_path) / "log_pointer", log_pointer_stat.version)); - auto rc = zookeeper->tryMulti(ops, responses); + auto rc = zookeeper->tryMulti(ops, responses, /* check_session_valid */ true); if (rc == Coordination::Error::ZOK) { @@ -3259,7 +3330,7 @@ void StorageReplicatedMergeTree::cloneMetadataIfNeeded(const String & source_rep ops.emplace_back(zkutil::makeCheckRequest(source_path + "/metadata", metadata_stat.version)); ops.emplace_back(zkutil::makeCheckRequest(source_path + "/columns", columns_stat.version)); - Coordination::Error code = zookeeper->tryMulti(ops, responses); + Coordination::Error code = zookeeper->tryMulti(ops, responses, /* check_session_valid */ true); if (code == Coordination::Error::ZOK) break; else if (code == Coordination::Error::ZBADVERSION) @@ -4134,7 +4205,7 @@ void StorageReplicatedMergeTree::removePartAndEnqueueFetch(const String & part_n zkutil::CreateMode::PersistentSequential)); Coordination::Responses results; - auto rc = zookeeper->tryMulti(ops, results); + auto rc = zookeeper->tryMulti(ops, results, /* check_session_valid */ true); if (rc == Coordination::Error::ZBADVERSION) { @@ -5849,7 +5920,7 @@ bool StorageReplicatedMergeTree::executeMetadataAlter(const StorageReplicatedMer } else { - zookeeper->multi(requests); + zookeeper->multi(requests, /* check_session_valid */ true); } { @@ -6800,7 +6871,7 @@ bool StorageReplicatedMergeTree::tryWaitForReplicaToProcessLogEntry( } else { - throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: unexpected name of log node: {}", entry.znode_name); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected name of log node: {}", entry.znode_name); } /** Second - find the corresponding entry in the queue of the specified replica. @@ -7176,7 +7247,7 @@ void StorageReplicatedMergeTree::fetchPartition( } if (best_replica.empty()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: cannot choose best replica."); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot choose best replica."); LOG_INFO(log, "Found {} replicas, {} of them are active. Selected {} to fetch from.", replicas.size(), active_replicas.size(), best_replica); @@ -7263,6 +7334,24 @@ void StorageReplicatedMergeTree::fetchPartition( } +void StorageReplicatedMergeTree::forgetPartition(const ASTPtr & partition, ContextPtr query_context) +{ + zkutil::ZooKeeperPtr zookeeper = getZooKeeperAndAssertNotReadonly(); + + String partition_id = getPartitionIDFromQuery(partition, query_context); + String block_numbers_path = fs::path(zookeeper_path) / "block_numbers"; + String partition_path = fs::path(block_numbers_path) / partition_id; + + auto error_code = zookeeper->tryRemove(partition_path); + if (error_code == Coordination::Error::ZOK) + LOG_INFO(log, "Forget partition {}", partition_id); + else if (error_code == Coordination::Error::ZNONODE) + throw Exception(ErrorCodes::CANNOT_FORGET_PARTITION, "Partition {} is unknown", partition_id); + else + throw zkutil::KeeperException::fromPath(error_code, partition_path); +} + + void StorageReplicatedMergeTree::mutate(const MutationCommands & commands, ContextPtr query_context) { /// Overview of the mutation algorithm. @@ -7442,6 +7531,7 @@ CancellationCode StorageReplicatedMergeTree::killMutation(const String & mutatio Int64 block_number = pair.second; getContext()->getMergeList().cancelPartMutations(getStorageID(), partition_id, block_number); } + mutation_backoff_policy.resetMutationFailures(); return CancellationCode::CancelSent; } @@ -8872,11 +8962,12 @@ IStorage::DataValidationTasksPtr StorageReplicatedMergeTree::getCheckTaskList( std::optional StorageReplicatedMergeTree::checkDataNext(DataValidationTasksPtr & check_task_list) { + if (auto part = assert_cast(check_task_list.get())->next()) { try { - return part_check_thread.checkPartAndFix(part->name, /* recheck_after */nullptr, /* throw_on_broken_projection */true); + return CheckResult(part_check_thread.checkPartAndFix(part->name)); } catch (const Exception & ex) { @@ -8957,7 +9048,7 @@ bool StorageReplicatedMergeTree::canUseAdaptiveGranularity() const } -std::map StorageReplicatedMergeTree::getAlterMutationCommandsForPart(const DataPartPtr & part) const +MutationCommands StorageReplicatedMergeTree::getAlterMutationCommandsForPart(const DataPartPtr & part) const { return queue.getAlterMutationCommandsForPart(part); } @@ -10010,7 +10101,7 @@ bool StorageReplicatedMergeTree::createEmptyPartInsteadOfLost(zkutil::ZooKeeperP ThreadFuzzer::maybeInjectSleep(); Coordination::Responses responses; - auto code = zookeeper->tryMulti(ops, responses); + auto code = zookeeper->tryMulti(ops, responses, /* check_session_valid */ true); if (code == Coordination::Error::ZOK) { transaction.commit(); diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h index 79d6d1dce3d..d8aefdf5b4c 100644 --- a/src/Storages/StorageReplicatedMergeTree.h +++ b/src/Storages/StorageReplicatedMergeTree.h @@ -99,7 +99,7 @@ public: StorageReplicatedMergeTree( const String & zookeeper_path_, const String & replica_name_, - bool attach, + LoadingStrictnessLevel mode, const StorageID & table_id_, const String & relative_data_path_, const StorageInMemoryMetadata & metadata_, @@ -107,7 +107,6 @@ public: const String & date_column_name, const MergingParams & merging_params_, std::unique_ptr settings_, - bool has_force_restore_data_flag, RenamingRestrictions renaming_restrictions_, bool need_check_structure); @@ -232,14 +231,6 @@ public: /// Checks ability to use granularity bool canUseAdaptiveGranularity() const override; - /// Returns the default path to the table in ZooKeeper. - /// It's used if not set in engine's arguments while creating a replicated table. - static String getDefaultReplicaPath(const ContextPtr & context_); - - /// Returns the default replica name in ZooKeeper. - /// It's used if not set in engine's arguments while creating a replicated table. - static String getDefaultReplicaName(const ContextPtr & context_); - /// Modify a CREATE TABLE query to make a variant which must be written to a backup. void adjustCreateQueryForBackup(ASTPtr & create_query) const override; @@ -250,11 +241,12 @@ public: void restoreDataFromBackup(RestorerFromBackup & restorer, const String & data_path_in_backup, const std::optional & partitions) override; /** Remove a specific replica from zookeeper. + * returns true if there are no replicas left */ - static void dropReplica(zkutil::ZooKeeperPtr zookeeper, const String & zookeeper_path, const String & replica, + static bool dropReplica(zkutil::ZooKeeperPtr zookeeper, const String & zookeeper_path, const String & replica, LoggerPtr logger, MergeTreeSettingsPtr table_settings = nullptr, std::optional * has_metadata_out = nullptr); - void dropReplica(const String & drop_zookeeper_path, const String & drop_replica, LoggerPtr logger); + bool dropReplica(const String & drop_zookeeper_path, const String & drop_replica, LoggerPtr logger); /// Removes table from ZooKeeper after the last replica was dropped static bool removeTableNodesFromZooKeeper(zkutil::ZooKeeperPtr zookeeper, const String & zookeeper_path, @@ -361,6 +353,8 @@ public: bool isTableReadOnly () { return is_readonly; } + std::optional hasMetadataInZooKeeper () { return has_metadata_in_zookeeper; } + /// Get a sequential consistent view of current parts. ReplicatedMergeTreeQuorumAddedParts::PartitionIdToMaxBlock getMaxAddedBlocks() const; @@ -909,6 +903,8 @@ private: const String & from, bool fetch_part, ContextPtr query_context) override; + void forgetPartition(const ASTPtr & partition, ContextPtr query_context) override; + /// NOTE: there are no guarantees for concurrent merges. Dropping part can /// be concurrently merged into some covering part and dropPart will do @@ -938,7 +934,7 @@ private: void waitMutationToFinishOnReplicas( const Strings & replicas, const String & mutation_id) const; - std::map getAlterMutationCommandsForPart(const DataPartPtr & part) const override; + MutationCommands getAlterMutationCommandsForPart(const DataPartPtr & part) const override; void startBackgroundMovesIfNeeded() override; @@ -988,6 +984,10 @@ private: void startupImpl(bool from_attach_thread); + std::vector getZookeeperZeroCopyLockPaths() const; + static void dropZookeeperZeroCopyLockPaths(zkutil::ZooKeeperPtr zookeeper, + std::vector zero_copy_locks_paths, LoggerPtr logger); + struct DataValidationTasks : public IStorage::DataValidationTasksBase { explicit DataValidationTasks(DataPartsVector && parts_, std::unique_lock && parts_check_lock_) diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index 4fde6fd04f3..11da394feec 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -68,14 +68,10 @@ #include -#ifdef __clang__ -# pragma clang diagnostic push -# pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant" -#endif +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant" #include -#ifdef __clang__ -# pragma clang diagnostic pop -#endif +#pragma clang diagnostic pop namespace fs = std::filesystem; @@ -129,6 +125,7 @@ namespace ErrorCodes extern const int UNEXPECTED_EXPRESSION; extern const int DATABASE_ACCESS_DENIED; extern const int CANNOT_EXTRACT_TABLE_STRUCTURE; + extern const int CANNOT_DETECT_FORMAT; extern const int NOT_IMPLEMENTED; extern const int CANNOT_COMPILE_REGEXP; extern const int FILE_DOESNT_EXIST; @@ -142,43 +139,39 @@ public: void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override; - void applyFilters() override; + void applyFilters(ActionDAGNodes added_filter_nodes) override; ReadFromStorageS3Step( - Block sample_block, const Names & column_names_, - StorageSnapshotPtr storage_snapshot_, + const SelectQueryInfo & query_info_, + const StorageSnapshotPtr & storage_snapshot_, + const ContextPtr & context_, + Block sample_block, StorageS3 & storage_, ReadFromFormatInfo read_from_format_info_, bool need_only_count_, - ContextPtr context_, size_t max_block_size_, size_t num_streams_) - : SourceStepWithFilter(DataStream{.header = std::move(sample_block)}) + : SourceStepWithFilter(DataStream{.header = std::move(sample_block)}, column_names_, query_info_, storage_snapshot_, context_) , column_names(column_names_) - , storage_snapshot(std::move(storage_snapshot_)) , storage(storage_) , read_from_format_info(std::move(read_from_format_info_)) , need_only_count(need_only_count_) - , local_context(std::move(context_)) , max_block_size(max_block_size_) , num_streams(num_streams_) { - query_configuration = storage.updateConfigurationAndGetCopy(local_context); - virtual_columns = storage.getVirtuals(); + query_configuration = storage.updateConfigurationAndGetCopy(context); + virtual_columns = storage.getVirtualsList(); } private: Names column_names; - StorageSnapshotPtr storage_snapshot; StorageS3 & storage; ReadFromFormatInfo read_from_format_info; bool need_only_count; StorageS3::Configuration query_configuration; NamesAndTypesList virtual_columns; - ContextPtr local_context; - size_t max_block_size; size_t num_streams; @@ -265,45 +258,46 @@ private: KeyWithInfoPtr nextAssumeLocked() { - if (buffer_iter != buffer.end()) + do { - auto answer = *buffer_iter; - ++buffer_iter; - - /// If url doesn't contain globs, we didn't list s3 bucket and didn't get object info for the key. - /// So we get object info lazily here on 'next()' request. - if (!answer->info) + if (buffer_iter != buffer.end()) { - answer->info = S3::getObjectInfo(*client, globbed_uri.bucket, answer->key, globbed_uri.version_id, request_settings); - if (file_progress_callback) - file_progress_callback(FileProgress(0, answer->info->size)); + auto answer = *buffer_iter; + ++buffer_iter; + + /// If url doesn't contain globs, we didn't list s3 bucket and didn't get object info for the key. + /// So we get object info lazily here on 'next()' request. + if (!answer->info) + { + answer->info = S3::getObjectInfo(*client, globbed_uri.bucket, answer->key, globbed_uri.version_id, request_settings); + if (file_progress_callback) + file_progress_callback(FileProgress(0, answer->info->size)); + } + + return answer; } - return answer; - } + if (is_finished) + return {}; - if (is_finished) - return {}; - - try - { - fillInternalBufferAssumeLocked(); - } - catch (...) - { - /// In case of exception thrown while listing new batch of files - /// iterator may be partially initialized and its further using may lead to UB. - /// Iterator is used by several processors from several threads and - /// it may take some time for threads to stop processors and they - /// may still use this iterator after exception is thrown. - /// To avoid this UB, reset the buffer and return defaults for further calls. - is_finished = true; - buffer.clear(); - buffer_iter = buffer.begin(); - throw; - } - - return nextAssumeLocked(); + try + { + fillInternalBufferAssumeLocked(); + } + catch (...) + { + /// In case of exception thrown while listing new batch of files + /// iterator may be partially initialized and its further using may lead to UB. + /// Iterator is used by several processors from several threads and + /// it may take some time for threads to stop processors and they + /// may still use this iterator after exception is thrown. + /// To avoid this UB, reset the buffer and return defaults for further calls. + is_finished = true; + buffer.clear(); + buffer_iter = buffer.begin(); + throw; + } + } while (true); } void fillInternalBufferAssumeLocked() @@ -428,7 +422,7 @@ StorageS3Source::DisclosedGlobIterator::DisclosedGlobIterator( const S3::URI & globbed_uri_, const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns_, - ContextPtr context, + const ContextPtr & context, KeysWithInfo * read_keys_, const S3Settings::RequestSettings & request_settings_, std::function file_progress_callback_) @@ -551,7 +545,15 @@ StorageS3Source::KeyWithInfoPtr StorageS3Source::ReadTaskIterator::next(size_t) if (current_index >= buffer.size()) return std::make_shared(callback()); - return buffer[current_index]; + while (current_index < buffer.size()) + { + if (const auto & key_info = buffer[current_index]; key_info && !key_info->key.empty()) + return buffer[current_index]; + + current_index = index.fetch_add(1, std::memory_order_relaxed); + } + + return nullptr; } size_t StorageS3Source::ReadTaskIterator::estimatedKeysCount() @@ -563,7 +565,7 @@ StorageS3Source::StorageS3Source( const ReadFromFormatInfo & info, const String & format_, String name_, - ContextPtr context_, + const ContextPtr & context_, std::optional format_settings_, UInt64 max_block_size_, const S3Settings::RequestSettings & request_settings_, @@ -722,7 +724,7 @@ std::unique_ptr StorageS3Source::createAsyncS3ReadBuffer( auto context = getContext(); auto read_buffer_creator = [this, read_settings, object_size] - (const std::string & path, size_t read_until_position) -> std::unique_ptr + (bool restricted_seek, const std::string & path) -> std::unique_ptr { return std::make_unique( client, @@ -733,21 +735,25 @@ std::unique_ptr StorageS3Source::createAsyncS3ReadBuffer( read_settings, /* use_external_buffer */true, /* offset */0, - read_until_position, - /* restricted_seek */true, + /* read_until_position */0, + restricted_seek, object_size); }; + auto modified_settings{read_settings}; + /// User's S3 object may change, don't cache it. + modified_settings.use_page_cache_for_disks_without_file_cache = false; + + /// FIXME: Changing this setting to default value breaks something around parquet reading + modified_settings.remote_read_min_bytes_for_seek = modified_settings.remote_fs_buffer_size; + auto s3_impl = std::make_unique( std::move(read_buffer_creator), StoredObjects{StoredObject{key, /* local_path */ "", object_size}}, + "", read_settings, /* cache_log */nullptr, /* use_external_buffer */true); - auto modified_settings{read_settings}; - /// FIXME: Changing this setting to default value breaks something around parquet reading - modified_settings.remote_read_min_bytes_for_seek = modified_settings.remote_fs_buffer_size; - auto & pool_reader = context->getThreadPoolReader(FilesystemReaderType::ASYNCHRONOUS_REMOTE_FS_READER); auto async_reader = std::make_unique( std::move(s3_impl), pool_reader, modified_settings, @@ -841,7 +847,7 @@ public: StorageS3Sink( const String & format, const Block & sample_block_, - ContextPtr context, + const ContextPtr & context, std::optional format_settings_, const CompressionMethod compression_method, const StorageS3::Configuration & configuration_, @@ -949,23 +955,22 @@ private: }; -class PartitionedStorageS3Sink : public PartitionedSink +class PartitionedStorageS3Sink : public PartitionedSink, WithContext { public: PartitionedStorageS3Sink( const ASTPtr & partition_by, const String & format_, const Block & sample_block_, - ContextPtr context_, + const ContextPtr & context_, std::optional format_settings_, const CompressionMethod compression_method_, const StorageS3::Configuration & configuration_, const String & bucket_, const String & key_) - : PartitionedSink(partition_by, context_, sample_block_) + : PartitionedSink(partition_by, context_, sample_block_), WithContext(context_) , format(format_) , sample_block(sample_block_) - , context(context_) , compression_method(compression_method_) , configuration(configuration_) , bucket(bucket_) @@ -985,7 +990,7 @@ public: return std::make_shared( format, sample_block, - context, + getContext(), format_settings, compression_method, configuration, @@ -997,7 +1002,6 @@ public: private: const String format; const Block sample_block; - const ContextPtr context; const CompressionMethod compression_method; const StorageS3::Configuration configuration; const String bucket; @@ -1033,7 +1037,7 @@ private: StorageS3::StorageS3( const Configuration & configuration_, - ContextPtr context_, + const ContextPtr & context_, const StorageID & table_id_, const ColumnsDescription & columns_, const ConstraintsDescription & constraints_, @@ -1050,18 +1054,27 @@ StorageS3::StorageS3( { updateConfiguration(context_); // NOLINT(clang-analyzer-optin.cplusplus.VirtualCall) - FormatFactory::instance().checkFormatName(configuration.format); + if (configuration.format != "auto") + FormatFactory::instance().checkFormatName(configuration.format); context_->getGlobalContext()->getRemoteHostFilter().checkURL(configuration.url.uri); context_->getGlobalContext()->getHTTPHeaderFilter().checkHeaders(configuration.headers_from_ast); StorageInMemoryMetadata storage_metadata; if (columns_.empty()) { - auto columns = getTableStructureFromDataImpl(configuration, format_settings, context_); + ColumnsDescription columns; + if (configuration.format == "auto") + std::tie(columns, configuration.format) = getTableStructureAndFormatFromData(configuration, format_settings, context_); + else + columns = getTableStructureFromData(configuration, format_settings, context_); + storage_metadata.setColumns(columns); } else { + if (configuration.format == "auto") + configuration.format = getTableStructureAndFormatFromData(configuration, format_settings, context_).second; + /// We don't allow special columns in S3 storage. if (!columns_.hasOnlyOrdinary()) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Table engine S3 doesn't support special columns like MATERIALIZED, ALIAS or EPHEMERAL"); @@ -1071,8 +1084,7 @@ StorageS3::StorageS3( storage_metadata.setConstraints(constraints_); storage_metadata.setComment(comment); setInMemoryMetadata(storage_metadata); - - virtual_columns = VirtualColumnUtils::getPathFileAndSizeVirtualsForStorage(storage_metadata.getSampleBlock().getNamesAndTypesList()); + setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns())); } static std::shared_ptr createFileIterator( @@ -1139,28 +1151,29 @@ void StorageS3::read( size_t max_block_size, size_t num_streams) { - auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(local_context), virtual_columns); + auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(local_context)); bool need_only_count = (query_info.optimize_trivial_count || read_from_format_info.requested_columns.empty()) && local_context->getSettingsRef().optimize_count_from_files; auto reading = std::make_unique( - read_from_format_info.source_header, column_names, + query_info, storage_snapshot, + local_context, + read_from_format_info.source_header, *this, std::move(read_from_format_info), need_only_count, - local_context, max_block_size, num_streams); query_plan.addStep(std::move(reading)); } -void ReadFromStorageS3Step::applyFilters() +void ReadFromStorageS3Step::applyFilters(ActionDAGNodes added_filter_nodes) { - auto filter_actions_dag = ActionsDAG::buildFilterActionsDAG(filter_nodes.nodes); + filter_actions_dag = ActionsDAG::buildFilterActionsDAG(added_filter_nodes.nodes); const ActionsDAG::Node * predicate = nullptr; if (filter_actions_dag) predicate = filter_actions_dag->getOutputs().at(0); @@ -1174,8 +1187,8 @@ void ReadFromStorageS3Step::createIterator(const ActionsDAG::Node * predicate) return; iterator_wrapper = createFileIterator( - query_configuration, storage.distributed_processing, local_context, predicate, - virtual_columns, nullptr, local_context->getFileProgressCallback()); + query_configuration, storage.distributed_processing, context, predicate, + virtual_columns, nullptr, context->getFileProgressCallback()); } void ReadFromStorageS3Step::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) @@ -1192,7 +1205,7 @@ void ReadFromStorageS3Step::initializePipeline(QueryPipelineBuilder & pipeline, /// Disclosed glob iterator can underestimate the amount of keys in some cases. We will keep one stream for this particular case. num_streams = 1; - const size_t max_threads = local_context->getSettingsRef().max_threads; + const size_t max_threads = context->getSettingsRef().max_threads; const size_t max_parsing_threads = num_streams >= max_threads ? 1 : (max_threads / std::max(num_streams, 1ul)); LOG_DEBUG(getLogger("StorageS3"), "Reading in {} streams, {} threads per stream", num_streams, max_parsing_threads); @@ -1204,7 +1217,7 @@ void ReadFromStorageS3Step::initializePipeline(QueryPipelineBuilder & pipeline, read_from_format_info, query_configuration.format, storage.getName(), - local_context, + context, storage.format_settings, max_block_size, query_configuration.request_settings, @@ -1217,7 +1230,7 @@ void ReadFromStorageS3Step::initializePipeline(QueryPipelineBuilder & pipeline, max_parsing_threads, need_only_count); - source->setKeyCondition(filter_nodes.nodes, local_context); + source->setKeyCondition(filter_actions_dag, context); pipes.emplace_back(std::move(source)); } @@ -1350,14 +1363,14 @@ void StorageS3::truncate(const ASTPtr & /* query */, const StorageMetadataPtr &, LOG_WARNING(getLogger("StorageS3"), "Failed to delete {}, error: {}", error.GetKey(), error.GetMessage()); } -StorageS3::Configuration StorageS3::updateConfigurationAndGetCopy(ContextPtr local_context) +StorageS3::Configuration StorageS3::updateConfigurationAndGetCopy(const ContextPtr & local_context) { std::lock_guard lock(configuration_update_mutex); configuration.update(local_context); return configuration; } -void StorageS3::updateConfiguration(ContextPtr local_context) +void StorageS3::updateConfiguration(const ContextPtr & local_context) { std::lock_guard lock(configuration_update_mutex); configuration.update(local_context); @@ -1375,9 +1388,9 @@ const StorageS3::Configuration & StorageS3::getConfiguration() return configuration; } -bool StorageS3::Configuration::update(ContextPtr context) +bool StorageS3::Configuration::update(const ContextPtr & context) { - auto s3_settings = context->getStorageS3Settings().getSettings(url.uri.toString()); + auto s3_settings = context->getStorageS3Settings().getSettings(url.uri.toString(), context->getUserName()); request_settings = s3_settings.request_settings; request_settings.updateFromSettings(context->getSettings()); @@ -1390,7 +1403,7 @@ bool StorageS3::Configuration::update(ContextPtr context) return true; } -void StorageS3::Configuration::connect(ContextPtr context) +void StorageS3::Configuration::connect(const ContextPtr & context) { const Settings & global_settings = context->getGlobalContext()->getSettingsRef(); const Settings & local_settings = context->getSettingsRef(); @@ -1407,8 +1420,10 @@ void StorageS3::Configuration::connect(ContextPtr context) url.uri.getScheme()); client_configuration.endpointOverride = url.endpoint; + /// seems as we don't use it client_configuration.maxConnections = static_cast(request_settings.max_connections); - client_configuration.http_connection_pool_size = global_settings.s3_http_connection_pool_size; + client_configuration.connectTimeoutMs = local_settings.s3_connect_timeout_ms; + auto headers = auth_settings.headers; if (!headers_from_ast.empty()) headers.insert(headers.end(), headers_from_ast.begin(), headers_from_ast.end()); @@ -1462,7 +1477,7 @@ void StorageS3::processNamedCollectionResult(StorageS3::Configuration & configur configuration.request_settings = S3Settings::RequestSettings(collection); } -StorageS3::Configuration StorageS3::getConfiguration(ASTs & engine_args, ContextPtr local_context, bool get_format_from_file) +StorageS3::Configuration StorageS3::getConfiguration(ASTs & engine_args, const ContextPtr & local_context, bool get_format_from_file) { StorageS3::Configuration configuration; @@ -1523,7 +1538,7 @@ StorageS3::Configuration StorageS3::getConfiguration(ASTs & engine_args, Context no_sign_request = true; engine_args_to_idx = {{"format", 2}}; } - else if (second_arg == "auto" || FormatFactory::instance().getAllFormats().contains(second_arg)) + else if (second_arg == "auto" || FormatFactory::instance().exists(second_arg)) engine_args_to_idx = {{"format", 1}, {"compression_method", 2}}; else engine_args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}}; @@ -1544,7 +1559,7 @@ StorageS3::Configuration StorageS3::getConfiguration(ASTs & engine_args, Context else { auto fourth_arg = checkAndGetLiteralArgument(engine_args[3], "session_token/format"); - if (fourth_arg == "auto" || FormatFactory::instance().getAllFormats().contains(fourth_arg)) + if (fourth_arg == "auto" || FormatFactory::instance().exists(fourth_arg)) { engine_args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"format", 3}}; } @@ -1560,7 +1575,7 @@ StorageS3::Configuration StorageS3::getConfiguration(ASTs & engine_args, Context else if (count == 5) { auto fourth_arg = checkAndGetLiteralArgument(engine_args[3], "session_token/format"); - if (fourth_arg == "auto" || FormatFactory::instance().getAllFormats().contains(fourth_arg)) + if (fourth_arg == "auto" || FormatFactory::instance().exists(fourth_arg)) { engine_args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"format", 3}, {"compression", 4}}; } @@ -1601,7 +1616,7 @@ StorageS3::Configuration StorageS3::getConfiguration(ASTs & engine_args, Context configuration.keys = {configuration.url.key}; if (configuration.format == "auto" && get_format_from_file) - configuration.format = FormatFactory::instance().getFormatFromFileName(configuration.url.key, true); + configuration.format = FormatFactory::instance().tryGetFormatFromFileName(configuration.url.key).value_or("auto"); return configuration; } @@ -1609,9 +1624,17 @@ StorageS3::Configuration StorageS3::getConfiguration(ASTs & engine_args, Context ColumnsDescription StorageS3::getTableStructureFromData( const StorageS3::Configuration & configuration, const std::optional & format_settings, - ContextPtr ctx) + const ContextPtr & ctx) { - return getTableStructureFromDataImpl(configuration, format_settings, ctx); + return getTableStructureAndFormatFromDataImpl(configuration.format, configuration, format_settings, ctx).first; +} + +std::pair StorageS3::getTableStructureAndFormatFromData( + const StorageS3::Configuration & configuration, + const std::optional & format_settings, + const ContextPtr & ctx) +{ + return getTableStructureAndFormatFromDataImpl(std::nullopt, configuration, format_settings, ctx); } namespace @@ -1623,24 +1646,43 @@ namespace std::shared_ptr file_iterator_, const StorageS3Source::KeysWithInfo & read_keys_, const StorageS3::Configuration & configuration_, + std::optional format_, const std::optional & format_settings_, const ContextPtr & context_) : WithContext(context_) , file_iterator(file_iterator_) , read_keys(read_keys_) , configuration(configuration_) + , format(std::move(format_)) , format_settings(format_settings_) , prev_read_keys_size(read_keys_.size()) { } - std::pair, std::optional> next() override + Data next() override { - /// For default mode check cached columns for currently read keys on first iteration. - if (first && getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::DEFAULT) + if (first) { - if (auto cached_columns = tryGetColumnsFromCache(read_keys.begin(), read_keys.end())) - return {nullptr, cached_columns}; + /// If format is unknown we iterate through all currently read keys on first iteration and + /// try to determine format by file name. + if (!format) + { + for (const auto & key_with_info : read_keys) + { + if (auto format_from_file_name = FormatFactory::instance().tryGetFormatFromFileName(key_with_info->key)) + { + format = format_from_file_name; + break; + } + } + } + + /// For default mode check cached columns for currently read keys on first iteration. + if (first && getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::DEFAULT) + { + if (auto cached_columns = tryGetColumnsFromCache(read_keys.begin(), read_keys.end())) + return {nullptr, cached_columns, format}; + } } while (true) @@ -1650,22 +1692,48 @@ namespace if (!current_key_with_info || current_key_with_info->key.empty()) { if (first) - throw Exception( - ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, - "Cannot extract table structure from {} format file, because there are no files with provided path " - "in S3 or all files are empty. You must specify table structure manually", - configuration.format); + { + if (format) + throw Exception( + ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, + "The table structure cannot be extracted from a {} format file, because there are no files with provided path " + "in S3 or all files are empty. You can specify table structure manually", + *format); - return {nullptr, std::nullopt}; + throw Exception( + ErrorCodes::CANNOT_DETECT_FORMAT, + "The data format cannot be detected by the contents of the files, because there are no files with provided path " + "in S3 or all files are empty. You can specify the format manually"); + } + + return {nullptr, std::nullopt, format}; } - /// S3 file iterator could get new keys after new iteration, check them in schema cache if schema inference mode is default. - if (getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::DEFAULT && read_keys.size() > prev_read_keys_size) + /// S3 file iterator could get new keys after new iteration + if (read_keys.size() > prev_read_keys_size) { - auto columns_from_cache = tryGetColumnsFromCache(read_keys.begin() + prev_read_keys_size, read_keys.end()); + /// If format is unknown we can try to determine it by new file names. + if (!format) + { + for (auto it = read_keys.begin() + prev_read_keys_size; it != read_keys.end(); ++it) + { + if (auto format_from_file_name = FormatFactory::instance().tryGetFormatFromFileName((*it)->key)) + { + format = format_from_file_name; + break; + } + } + } + + /// Check new files in schema cache if schema inference mode is default. + if (getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::DEFAULT) + { + auto columns_from_cache = tryGetColumnsFromCache(read_keys.begin() + prev_read_keys_size, read_keys.end()); + if (columns_from_cache) + return {nullptr, columns_from_cache, format}; + } + prev_read_keys_size = read_keys.size(); - if (columns_from_cache) - return {nullptr, columns_from_cache}; } if (getContext()->getSettingsRef().s3_skip_empty_files && current_key_with_info->info && current_key_with_info->info->size == 0) @@ -1678,7 +1746,7 @@ namespace if (auto columns_from_cache = tryGetColumnsFromCache(keys.begin(), keys.end())) { first = false; - return {nullptr, columns_from_cache}; + return {nullptr, columns_from_cache, format}; } } @@ -1687,7 +1755,7 @@ namespace if (!getContext()->getSettingsRef().s3_skip_empty_files || !impl->eof()) { first = false; - return {wrapReadBufferWithCompressionMethod(std::move(impl), chooseCompressionMethod(current_key_with_info->key, configuration.compression_method), zstd_window_log_max), std::nullopt}; + return {wrapReadBufferWithCompressionMethod(std::move(impl), chooseCompressionMethod(current_key_with_info->key, configuration.compression_method), zstd_window_log_max), std::nullopt, format}; } } } @@ -1698,7 +1766,7 @@ namespace return; String source = fs::path(configuration.url.uri.getHost() + std::to_string(configuration.url.uri.getPort())) / configuration.url.bucket / current_key_with_info->key; - auto key = getKeyForSchemaCache(source, configuration.format, format_settings, getContext()); + auto key = getKeyForSchemaCache(source, *format, format_settings, getContext()); StorageS3::getSchemaCache(getContext()).addNumRows(key, num_rows); } @@ -1709,7 +1777,7 @@ namespace return; String source = fs::path(configuration.url.uri.getHost() + std::to_string(configuration.url.uri.getPort())) / configuration.url.bucket / current_key_with_info->key; - auto cache_key = getKeyForSchemaCache(source, configuration.format, format_settings, getContext()); + auto cache_key = getKeyForSchemaCache(source, *format, format_settings, getContext()); StorageS3::getSchemaCache(getContext()).addColumns(cache_key, columns); } @@ -1723,10 +1791,15 @@ namespace Strings sources; sources.reserve(read_keys.size()); std::transform(read_keys.begin(), read_keys.end(), std::back_inserter(sources), [&](const auto & elem){ return host_and_bucket / elem->key; }); - auto cache_keys = getKeysForSchemaCache(sources, configuration.format, format_settings, getContext()); + auto cache_keys = getKeysForSchemaCache(sources, *format, format_settings, getContext()); StorageS3::getSchemaCache(getContext()).addManyColumns(cache_keys, columns); } + void setFormatName(const String & format_name) override + { + format = format_name; + } + String getLastFileName() const override { if (current_key_with_info) @@ -1734,15 +1807,26 @@ namespace return ""; } + bool supportsLastReadBufferRecreation() const override { return true; } + + std::unique_ptr recreateLastReadBuffer() override + { + chassert(current_key_with_info); + int zstd_window_log_max = static_cast(getContext()->getSettingsRef().zstd_window_log_max); + auto impl = std::make_unique(configuration.client, configuration.url.bucket, current_key_with_info->key, configuration.url.version_id, configuration.request_settings, getContext()->getReadSettings()); + return wrapReadBufferWithCompressionMethod(std::move(impl), chooseCompressionMethod(current_key_with_info->key, configuration.compression_method), zstd_window_log_max); + } + private: std::optional tryGetColumnsFromCache( const StorageS3::KeysWithInfo::const_iterator & begin, const StorageS3::KeysWithInfo::const_iterator & end) { - if (!getContext()->getSettingsRef().schema_inference_use_cache_for_s3) + auto context = getContext(); + if (!context->getSettingsRef().schema_inference_use_cache_for_s3) return std::nullopt; - auto & schema_cache = StorageS3::getSchemaCache(getContext()); + auto & schema_cache = StorageS3::getSchemaCache(context); for (auto it = begin; it < end; ++it) { auto get_last_mod_time = [&] @@ -1773,10 +1857,29 @@ namespace String path = fs::path(configuration.url.bucket) / (*it)->key; String source = fs::path(configuration.url.uri.getHost() + std::to_string(configuration.url.uri.getPort())) / path; - auto cache_key = getKeyForSchemaCache(source, configuration.format, format_settings, getContext()); - auto columns = schema_cache.tryGetColumns(cache_key, get_last_mod_time); - if (columns) - return columns; + + if (format) + { + auto cache_key = getKeyForSchemaCache(source, *format, format_settings, context); + if (auto columns = schema_cache.tryGetColumns(cache_key, get_last_mod_time)) + return columns; + } + else + { + /// If format is unknown, we can iterate through all possible input formats + /// and check if we have an entry with this format and this file in schema cache. + /// If we have such entry for some format, we can use this format to read the file. + for (const auto & format_name : FormatFactory::instance().getAllInputFormats()) + { + auto cache_key = getKeyForSchemaCache(source, format_name, format_settings, context); + if (auto columns = schema_cache.tryGetColumns(cache_key, get_last_mod_time)) + { + /// Now format is known. It should be the same for all files. + format = format_name; + return columns; + } + } + } } return std::nullopt; @@ -1785,6 +1888,7 @@ namespace std::shared_ptr file_iterator; const StorageS3Source::KeysWithInfo & read_keys; const StorageS3::Configuration & configuration; + std::optional format; const std::optional & format_settings; StorageS3Source::KeyWithInfoPtr current_key_with_info; size_t prev_read_keys_size; @@ -1793,17 +1897,20 @@ namespace } -ColumnsDescription StorageS3::getTableStructureFromDataImpl( +std::pair StorageS3::getTableStructureAndFormatFromDataImpl( + std::optional format, const Configuration & configuration, const std::optional & format_settings, - ContextPtr ctx) + const ContextPtr & ctx) { KeysWithInfo read_keys; auto file_iterator = createFileIterator(configuration, false, ctx, {}, {}, &read_keys); - ReadBufferIterator read_buffer_iterator(file_iterator, read_keys, configuration, format_settings, ctx); - return readSchemaFromFormat(configuration.format, format_settings, read_buffer_iterator, configuration.withGlobs(), ctx); + ReadBufferIterator read_buffer_iterator(file_iterator, read_keys, configuration, format, format_settings, ctx); + if (format) + return {readSchemaFromFormat(*format, format_settings, read_buffer_iterator, ctx), *format}; + return detectFormatAndReadSchema(format_settings, read_buffer_iterator, ctx); } void registerStorageS3Impl(const String & name, StorageFactory & factory) @@ -1879,16 +1986,6 @@ void registerStorageOSS(StorageFactory & factory) return registerStorageS3Impl("OSS", factory); } -NamesAndTypesList StorageS3::getVirtuals() const -{ - return virtual_columns; -} - -Names StorageS3::getVirtualColumnNames() -{ - return VirtualColumnUtils::getPathFileAndSizeVirtualsForStorage({}).getNames(); -} - bool StorageS3::supportsPartitionBy() const { return true; diff --git a/src/Storages/StorageS3.h b/src/Storages/StorageS3.h index 81a03cc5ad5..7c5db76aadf 100644 --- a/src/Storages/StorageS3.h +++ b/src/Storages/StorageS3.h @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include #include #include @@ -80,7 +80,7 @@ public: const S3::URI & globbed_uri_, const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns, - ContextPtr context, + const ContextPtr & context, KeysWithInfo * read_keys_ = nullptr, const S3Settings::RequestSettings & request_settings_ = {}, std::function progress_callback_ = {}); @@ -134,7 +134,7 @@ public: const ReadFromFormatInfo & info, const String & format, String name_, - ContextPtr context_, + const ContextPtr & context_, std::optional format_settings_, UInt64 max_block_size_, const S3Settings::RequestSettings & request_settings_, @@ -151,9 +151,9 @@ public: String getName() const override; - void setKeyCondition(const ActionsDAG::NodeRawConstPtrs & nodes, ContextPtr context_) override + void setKeyCondition(const ActionsDAGPtr & filter_actions_dag, ContextPtr context_) override { - setKeyConditionImpl(nodes, context_, sample_block); + setKeyConditionImpl(filter_actions_dag, context_, sample_block); } Chunk generate() override; @@ -280,9 +280,9 @@ public: String getPath() const { return url.key; } - bool update(ContextPtr context); + bool update(const ContextPtr & context); - void connect(ContextPtr context); + void connect(const ContextPtr & context); bool withGlobs() const { return url.key.find_first_of("*?{") != std::string::npos; } @@ -308,7 +308,7 @@ public: StorageS3( const Configuration & configuration_, - ContextPtr context_, + const ContextPtr & context_, const StorageID & table_id_, const ColumnsDescription & columns_, const ConstraintsDescription & constraints_, @@ -336,30 +336,32 @@ public: void truncate(const ASTPtr & query, const StorageMetadataPtr & metadata_snapshot, ContextPtr local_context, TableExclusiveLockHolder &) override; - NamesAndTypesList getVirtuals() const override; - static Names getVirtualColumnNames(); - bool supportsPartitionBy() const override; static void processNamedCollectionResult(StorageS3::Configuration & configuration, const NamedCollection & collection); static SchemaCache & getSchemaCache(const ContextPtr & ctx); - static StorageS3::Configuration getConfiguration(ASTs & engine_args, ContextPtr local_context, bool get_format_from_file = true); + static StorageS3::Configuration getConfiguration(ASTs & engine_args, const ContextPtr & local_context, bool get_format_from_file = true); static ColumnsDescription getTableStructureFromData( const StorageS3::Configuration & configuration, const std::optional & format_settings, - ContextPtr ctx); + const ContextPtr & ctx); + + static std::pair getTableStructureAndFormatFromData( + const StorageS3::Configuration & configuration, + const std::optional & format_settings, + const ContextPtr & ctx); using KeysWithInfo = StorageS3Source::KeysWithInfo; bool supportsTrivialCountOptimization() const override { return true; } protected: - virtual Configuration updateConfigurationAndGetCopy(ContextPtr local_context); + virtual Configuration updateConfigurationAndGetCopy(const ContextPtr & local_context); - virtual void updateConfiguration(ContextPtr local_context); + virtual void updateConfiguration(const ContextPtr & local_context); void useConfiguration(const Configuration & new_configuration); @@ -373,17 +375,17 @@ private: Configuration configuration; std::mutex configuration_update_mutex; - NamesAndTypesList virtual_columns; String name; const bool distributed_processing; std::optional format_settings; ASTPtr partition_by; - static ColumnsDescription getTableStructureFromDataImpl( + static std::pair getTableStructureAndFormatFromDataImpl( + std::optional format, const Configuration & configuration, const std::optional & format_settings, - ContextPtr ctx); + const ContextPtr & ctx); bool supportsSubcolumns() const override { return true; } diff --git a/src/Storages/StorageS3Cluster.cpp b/src/Storages/StorageS3Cluster.cpp index 25c2b42b766..6b22771b38f 100644 --- a/src/Storages/StorageS3Cluster.cpp +++ b/src/Storages/StorageS3Cluster.cpp @@ -1,7 +1,5 @@ #include "Storages/StorageS3Cluster.h" -#include "config.h" - #if USE_AWS_S3 #include @@ -38,39 +36,51 @@ StorageS3Cluster::StorageS3Cluster( const StorageID & table_id_, const ColumnsDescription & columns_, const ConstraintsDescription & constraints_, - ContextPtr context_, - bool structure_argument_was_provided_) - : IStorageCluster(cluster_name_, table_id_, getLogger("StorageS3Cluster (" + table_id_.table_name + ")"), structure_argument_was_provided_) + const ContextPtr & context) + : IStorageCluster(cluster_name_, table_id_, getLogger("StorageS3Cluster (" + table_id_.table_name + ")")) , s3_configuration{configuration_} { - context_->getGlobalContext()->getRemoteHostFilter().checkURL(configuration_.url.uri); - context_->getGlobalContext()->getHTTPHeaderFilter().checkHeaders(configuration_.headers_from_ast); + context->getGlobalContext()->getRemoteHostFilter().checkURL(configuration_.url.uri); + context->getGlobalContext()->getHTTPHeaderFilter().checkHeaders(configuration_.headers_from_ast); StorageInMemoryMetadata storage_metadata; - updateConfigurationIfChanged(context_); + updateConfigurationIfChanged(context); if (columns_.empty()) { + ColumnsDescription columns; /// `format_settings` is set to std::nullopt, because StorageS3Cluster is used only as table function - auto columns = StorageS3::getTableStructureFromDataImpl(s3_configuration, /*format_settings=*/std::nullopt, context_); + if (s3_configuration.format == "auto") + std::tie(columns, s3_configuration.format) = StorageS3::getTableStructureAndFormatFromData(s3_configuration, /*format_settings=*/std::nullopt, context); + else + columns = StorageS3::getTableStructureFromData(s3_configuration, /*format_settings=*/std::nullopt, context); + storage_metadata.setColumns(columns); } else + { + if (s3_configuration.format == "auto") + s3_configuration.format = StorageS3::getTableStructureAndFormatFromData(s3_configuration, /*format_settings=*/std::nullopt, context).second; + storage_metadata.setColumns(columns_); + } storage_metadata.setConstraints(constraints_); setInMemoryMetadata(storage_metadata); - - virtual_columns = VirtualColumnUtils::getPathFileAndSizeVirtualsForStorage(storage_metadata.getSampleBlock().getNamesAndTypesList()); + setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns())); } -void StorageS3Cluster::addColumnsStructureToQuery(ASTPtr & query, const String & structure, const ContextPtr & context) +void StorageS3Cluster::updateQueryToSendIfNeeded(DB::ASTPtr & query, const DB::StorageSnapshotPtr & storage_snapshot, const DB::ContextPtr & context) { ASTExpressionList * expression_list = extractTableFunctionArgumentsFromSelectQuery(query); if (!expression_list) throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected SELECT query from table function s3Cluster, got '{}'", queryToString(query)); - TableFunctionS3Cluster::addColumnsStructureToArguments(expression_list->children, structure, context); + TableFunctionS3Cluster::updateStructureAndFormatArgumentsIfNeeded( + expression_list->children, + storage_snapshot->metadata->getColumns().getAll().toNamesAndTypesDescription(), + s3_configuration.format, + context); } void StorageS3Cluster::updateConfigurationIfChanged(ContextPtr local_context) @@ -81,7 +91,7 @@ void StorageS3Cluster::updateConfigurationIfChanged(ContextPtr local_context) RemoteQueryExecutor::Extension StorageS3Cluster::getTaskIteratorExtension(const ActionsDAG::Node * predicate, const ContextPtr & context) const { auto iterator = std::make_shared( - *s3_configuration.client, s3_configuration.url, predicate, virtual_columns, context, nullptr, s3_configuration.request_settings, context->getFileProgressCallback()); + *s3_configuration.client, s3_configuration.url, predicate, getVirtualsList(), context, nullptr, s3_configuration.request_settings, context->getFileProgressCallback()); auto callback = std::make_shared>([iterator]() mutable -> String { @@ -92,12 +102,6 @@ RemoteQueryExecutor::Extension StorageS3Cluster::getTaskIteratorExtension(const return RemoteQueryExecutor::Extension{ .task_iterator = std::move(callback) }; } -NamesAndTypesList StorageS3Cluster::getVirtuals() const -{ - return virtual_columns; -} - - } #endif diff --git a/src/Storages/StorageS3Cluster.h b/src/Storages/StorageS3Cluster.h index c526f14834a..6a5b03e682f 100644 --- a/src/Storages/StorageS3Cluster.h +++ b/src/Storages/StorageS3Cluster.h @@ -4,10 +4,7 @@ #if USE_AWS_S3 -#include -#include - -#include "Client/Connection.h" +#include #include #include #include @@ -27,13 +24,10 @@ public: const StorageID & table_id_, const ColumnsDescription & columns_, const ConstraintsDescription & constraints_, - ContextPtr context_, - bool structure_argument_was_provided_); + const ContextPtr & context_); std::string getName() const override { return "S3Cluster"; } - NamesAndTypesList getVirtuals() const override; - RemoteQueryExecutor::Extension getTaskIteratorExtension(const ActionsDAG::Node * predicate, const ContextPtr & context) const override; bool supportsSubcolumns() const override { return true; } @@ -46,10 +40,9 @@ protected: private: void updateBeforeRead(const ContextPtr & context) override { updateConfigurationIfChanged(context); } - void addColumnsStructureToQuery(ASTPtr & query, const String & structure, const ContextPtr & context) override; + void updateQueryToSendIfNeeded(ASTPtr & query, const StorageSnapshotPtr & storage_snapshot, const ContextPtr & context) override; StorageS3::Configuration s3_configuration; - NamesAndTypesList virtual_columns; }; diff --git a/src/Storages/StorageS3Settings.cpp b/src/Storages/StorageS3Settings.cpp index b0c1160429a..5887018268b 100644 --- a/src/Storages/StorageS3Settings.cpp +++ b/src/Storages/StorageS3Settings.cpp @@ -8,7 +8,6 @@ #include #include #include -#include namespace DB @@ -293,7 +292,7 @@ void StorageS3Settings::loadFromConfig(const String & config_elem, const Poco::U } } -S3Settings StorageS3Settings::getSettings(const String & endpoint) const +S3Settings StorageS3Settings::getSettings(const String & endpoint, const String & user) const { std::lock_guard lock(mutex); auto next_prefix_setting = s3_settings.upper_bound(endpoint); @@ -302,7 +301,8 @@ S3Settings StorageS3Settings::getSettings(const String & endpoint) const for (auto possible_prefix_setting = next_prefix_setting; possible_prefix_setting != s3_settings.begin();) { std::advance(possible_prefix_setting, -1); - if (boost::algorithm::starts_with(endpoint, possible_prefix_setting->first)) + const auto & [endpoint_prefix, settings] = *possible_prefix_setting; + if (endpoint.starts_with(endpoint_prefix) && settings.auth_settings.canBeUsedByUser(user)) return possible_prefix_setting->second; } diff --git a/src/Storages/StorageS3Settings.h b/src/Storages/StorageS3Settings.h index 0e152bb2d31..21b6264717e 100644 --- a/src/Storages/StorageS3Settings.h +++ b/src/Storages/StorageS3Settings.h @@ -112,7 +112,7 @@ class StorageS3Settings public: void loadFromConfig(const String & config_elem, const Poco::Util::AbstractConfiguration & config, const Settings & settings); - S3Settings getSettings(const String & endpoint) const; + S3Settings getSettings(const String & endpoint, const String & user) const; private: mutable std::mutex mutex; diff --git a/src/Storages/StorageSQLite.cpp b/src/Storages/StorageSQLite.cpp index 85c5e16a1bf..179e4cee199 100644 --- a/src/Storages/StorageSQLite.cpp +++ b/src/Storages/StorageSQLite.cpp @@ -19,6 +19,20 @@ #include #include +namespace +{ + +using namespace DB; + +ContextPtr makeSQLiteWriteContext(ContextPtr context) +{ + auto write_context = Context::createCopy(context); + write_context->setSetting("output_format_values_escape_quote_with_quote", Field(true)); + return write_context; +} + +} + namespace DB { @@ -43,6 +57,7 @@ StorageSQLite::StorageSQLite( , database_path(database_path_) , sqlite_db(sqlite_db_) , log(getLogger("StorageSQLite (" + table_id_.table_name + ")")) + , write_context(makeSQLiteWriteContext(getContext())) { StorageInMemoryMetadata storage_metadata; @@ -144,7 +159,7 @@ public: sqlbuf << ") VALUES "; - auto writer = FormatFactory::instance().getOutputFormat("Values", sqlbuf, metadata_snapshot->getSampleBlock(), storage.getContext()); + auto writer = FormatFactory::instance().getOutputFormat("Values", sqlbuf, metadata_snapshot->getSampleBlock(), storage.write_context); writer->write(block); sqlbuf << ";"; @@ -193,7 +208,7 @@ void registerStorageSQLite(StorageFactory & factory) const auto database_path = checkAndGetLiteralArgument(engine_args[0], "database_path"); const auto table_name = checkAndGetLiteralArgument(engine_args[1], "table_name"); - auto sqlite_db = openSQLiteDB(database_path, args.getContext(), /* throw_on_error */!args.attach); + auto sqlite_db = openSQLiteDB(database_path, args.getContext(), /* throw_on_error */ args.mode <= LoadingStrictnessLevel::CREATE); return std::make_shared(args.table_id, sqlite_db, database_path, table_name, args.columns, args.constraints, args.getContext()); diff --git a/src/Storages/StorageSQLite.h b/src/Storages/StorageSQLite.h index baacdfb4899..ed673123fe0 100644 --- a/src/Storages/StorageSQLite.h +++ b/src/Storages/StorageSQLite.h @@ -47,10 +47,13 @@ public: const String & table); private: + friend class SQLiteSink; /// for write_context + String remote_table_name; String database_path; SQLitePtr sqlite_db; LoggerPtr log; + ContextPtr write_context; }; } diff --git a/src/Storages/StorageSnapshot.cpp b/src/Storages/StorageSnapshot.cpp index 34c092c7208..8b087a4a2bc 100644 --- a/src/Storages/StorageSnapshot.cpp +++ b/src/Storages/StorageSnapshot.cpp @@ -1,6 +1,4 @@ #include -#include -#include #include #include #include @@ -18,6 +16,49 @@ namespace ErrorCodes extern const int COLUMN_QUERIED_MORE_THAN_ONCE; } +StorageSnapshot::StorageSnapshot( + const IStorage & storage_, + StorageMetadataPtr metadata_) + : storage(storage_) + , metadata(std::move(metadata_)) + , virtual_columns(storage_.getVirtualsPtr()) +{ +} + +StorageSnapshot::StorageSnapshot( + const IStorage & storage_, + StorageMetadataPtr metadata_, + VirtualsDescriptionPtr virtual_columns_) + : storage(storage_) + , metadata(std::move(metadata_)) + , virtual_columns(std::move(virtual_columns_)) +{ +} + +StorageSnapshot::StorageSnapshot( + const IStorage & storage_, + StorageMetadataPtr metadata_, + ColumnsDescription object_columns_) + : storage(storage_) + , metadata(std::move(metadata_)) + , virtual_columns(storage_.getVirtualsPtr()) + , object_columns(std::move(object_columns_)) +{ +} + +StorageSnapshot::StorageSnapshot( + const IStorage & storage_, + StorageMetadataPtr metadata_, + ColumnsDescription object_columns_, + DataPtr data_) + : storage(storage_) + , metadata(std::move(metadata_)) + , virtual_columns(storage_.getVirtualsPtr()) + , object_columns(std::move(object_columns_)) + , data(std::move(data_)) +{ +} + std::shared_ptr StorageSnapshot::clone(DataPtr data_) const { auto res = std::make_shared(storage, metadata, object_columns); @@ -28,15 +69,12 @@ std::shared_ptr StorageSnapshot::clone(DataPtr data_) const return res; } -void StorageSnapshot::init() +ColumnsDescription StorageSnapshot::getAllColumnsDescription() const { - for (const auto & [name, type] : storage.getVirtuals()) - virtual_columns[name] = type; + auto get_column_options = GetColumnsOptions(GetColumnsOptions::All).withExtendedObjects().withVirtuals(); + auto column_names_and_types = getColumns(get_column_options); - if (storage.hasLightweightDeletedMask()) - system_columns[LightweightDeleteDescription::FILTER_COLUMN.name] = LightweightDeleteDescription::FILTER_COLUMN.type; - - system_columns[BlockNumberColumn::name] = BlockNumberColumn::type; + return ColumnsDescription{column_names_and_types}; } NamesAndTypesList StorageSnapshot::getColumns(const GetColumnsOptions & options) const @@ -46,35 +84,22 @@ NamesAndTypesList StorageSnapshot::getColumns(const GetColumnsOptions & options) if (options.with_extended_objects) extendObjectColumns(all_columns, object_columns, options.with_subcolumns); - NameSet column_names; - if (options.with_virtuals) + if (options.virtuals_kind != VirtualsKind::None && !virtual_columns->empty()) { - /// Virtual columns must be appended after ordinary, - /// because user can override them. - if (!virtual_columns.empty()) + NameSet column_names; + for (const auto & column : all_columns) + column_names.insert(column.name); + + auto virtuals_list = virtual_columns->getNamesAndTypesList(options.virtuals_kind); + for (const auto & column : virtuals_list) { - for (const auto & column : all_columns) - column_names.insert(column.name); + if (column_names.contains(column.name)) + continue; - for (const auto & [name, type] : virtual_columns) - if (!column_names.contains(name)) - all_columns.emplace_back(name, type); + all_columns.emplace_back(column.name, column.type); } } - if (options.with_system_columns) - { - if (!system_columns.empty() && column_names.empty()) - { - for (const auto & column : all_columns) - column_names.insert(column.name); - } - - for (const auto & [name, type] : system_columns) - if (!column_names.contains(name)) - all_columns.emplace_back(name, type); - } - return all_columns; } @@ -100,18 +125,11 @@ std::optional StorageSnapshot::tryGetColumn(const GetColumnsOpt return object_column; } - if (options.with_virtuals) + if (options.virtuals_kind != VirtualsKind::None) { - auto it = virtual_columns.find(column_name); - if (it != virtual_columns.end()) - return NameAndTypePair(column_name, it->second); - } - - if (options.with_system_columns) - { - auto it = system_columns.find(column_name); - if (it != system_columns.end()) - return NameAndTypePair(column_name, it->second); + auto virtual_column = virtual_columns->tryGet(column_name, options.virtuals_kind); + if (virtual_column) + return NameAndTypePair{virtual_column->name, virtual_column->type}; } return {}; @@ -126,6 +144,47 @@ NameAndTypePair StorageSnapshot::getColumn(const GetColumnsOptions & options, co return *column; } +CompressionCodecPtr StorageSnapshot::getCodecOrDefault(const String & column_name, CompressionCodecPtr default_codec) const +{ + auto get_codec_or_default = [&](const auto & column_desc) + { + return column_desc.codec + ? CompressionCodecFactory::instance().get(column_desc.codec, column_desc.type, default_codec) + : default_codec; + }; + + const auto & columns = metadata->getColumns(); + if (const auto * column_desc = columns.tryGet(column_name)) + return get_codec_or_default(*column_desc); + + if (const auto * virtual_desc = virtual_columns->tryGetDescription(column_name)) + return get_codec_or_default(*virtual_desc); + + return default_codec; +} + +CompressionCodecPtr StorageSnapshot::getCodecOrDefault(const String & column_name) const +{ + return getCodecOrDefault(column_name, CompressionCodecFactory::instance().getDefaultCodec()); +} + +ASTPtr StorageSnapshot::getCodecDescOrDefault(const String & column_name, CompressionCodecPtr default_codec) const +{ + auto get_codec_or_default = [&](const auto & column_desc) + { + return column_desc.codec ? column_desc.codec : default_codec->getFullCodecDesc(); + }; + + const auto & columns = metadata->getColumns(); + if (const auto * column_desc = columns.tryGet(column_name)) + return get_codec_or_default(*column_desc); + + if (const auto * virtual_desc = virtual_columns->tryGetDescription(column_name)) + return get_codec_or_default(*virtual_desc); + + return default_codec->getFullCodecDesc(); +} + Block StorageSnapshot::getSampleBlockForColumns(const Names & column_names) const { Block res; @@ -143,11 +202,11 @@ Block StorageSnapshot::getSampleBlockForColumns(const Names & column_names) cons { res.insert({object_column->type->createColumn(), object_column->type, column_name}); } - else if (auto it = virtual_columns.find(column_name); it != virtual_columns.end()) + else if (auto virtual_column = virtual_columns->tryGet(column_name)) { /// Virtual columns must be appended after ordinary, because user can /// override them. - const auto & type = it->second; + const auto & type = virtual_column->type; res.insert({type->createColumn(), type, column_name}); } else @@ -175,12 +234,11 @@ ColumnsDescription StorageSnapshot::getDescriptionForColumns(const Names & colum { res.add(*object_column, "", false, false); } - else if (auto it = virtual_columns.find(name); it != virtual_columns.end()) + else if (auto virtual_column = virtual_columns->tryGet(name)) { /// Virtual columns must be appended after ordinary, because user can /// override them. - const auto & type = it->second; - res.add({name, type}); + res.add({name, virtual_column->type}); } else { @@ -216,7 +274,7 @@ void StorageSnapshot::check(const Names & column_names) const { bool has_column = columns.hasColumnOrSubcolumn(GetColumnsOptions::AllPhysical, name) || object_columns.hasColumnOrSubcolumn(GetColumnsOptions::AllPhysical, name) - || virtual_columns.contains(name); + || virtual_columns->has(name); if (!has_column) { diff --git a/src/Storages/StorageSnapshot.h b/src/Storages/StorageSnapshot.h index d62e118e1f2..89e97f2abb8 100644 --- a/src/Storages/StorageSnapshot.h +++ b/src/Storages/StorageSnapshot.h @@ -1,10 +1,14 @@ #pragma once #include +#include namespace DB { class IStorage; +class ICompressionCodec; + +using CompressionCodecPtr = std::shared_ptr; /// Snapshot of storage that fixes set columns that can be read in query. /// There are 3 sources of columns: regular columns from metadata, @@ -13,6 +17,7 @@ struct StorageSnapshot { const IStorage & storage; const StorageMetadataPtr metadata; + const VirtualsDescriptionPtr virtual_columns; const ColumnsDescription object_columns; /// Additional data, on which set of columns may depend. @@ -30,38 +35,29 @@ struct StorageSnapshot StorageSnapshot( const IStorage & storage_, - StorageMetadataPtr metadata_) - : storage(storage_), metadata(std::move(metadata_)) - { - init(); - } + StorageMetadataPtr metadata_); StorageSnapshot( const IStorage & storage_, StorageMetadataPtr metadata_, - ColumnsDescription object_columns_) - : storage(storage_) - , metadata(std::move(metadata_)) - , object_columns(std::move(object_columns_)) - { - init(); - } + VirtualsDescriptionPtr virtual_columns_); + + StorageSnapshot( + const IStorage & storage_, + StorageMetadataPtr metadata_, + ColumnsDescription object_columns_); StorageSnapshot( const IStorage & storage_, StorageMetadataPtr metadata_, ColumnsDescription object_columns_, - DataPtr data_) - : storage(storage_) - , metadata(std::move(metadata_)) - , object_columns(std::move(object_columns_)) - , data(std::move(data_)) - { - init(); - } + DataPtr data_); std::shared_ptr clone(DataPtr data_) const; + /// Get columns description + ColumnsDescription getAllColumnsDescription() const; + /// Get all available columns with types according to options. NamesAndTypesList getColumns(const GetColumnsOptions & options) const; @@ -72,6 +68,10 @@ struct StorageSnapshot std::optional tryGetColumn(const GetColumnsOptions & options, const String & column_name) const; NameAndTypePair getColumn(const GetColumnsOptions & options, const String & column_name) const; + CompressionCodecPtr getCodecOrDefault(const String & column_name, CompressionCodecPtr default_codec) const; + CompressionCodecPtr getCodecOrDefault(const String & column_name) const; + ASTPtr getCodecDescOrDefault(const String & column_name, CompressionCodecPtr default_codec) const; + /// Block with ordinary + materialized + aliases + virtuals + subcolumns. Block getSampleBlockForColumns(const Names & column_names) const; @@ -87,15 +87,6 @@ struct StorageSnapshot /// If we have a projection then we should use its metadata. StorageMetadataPtr getMetadataForQuery() const { return projection ? projection->metadata : metadata; } - -private: - void init(); - - std::unordered_map virtual_columns; - - /// System columns are not visible in the schema but might be persisted in the data. - /// One example of such column is lightweight delete mask '_row_exists'. - std::unordered_map system_columns; }; using StorageSnapshotPtr = std::shared_ptr; diff --git a/src/Storages/StorageStripeLog.cpp b/src/Storages/StorageStripeLog.cpp index 359f142949f..f47eeb60918 100644 --- a/src/Storages/StorageStripeLog.cpp +++ b/src/Storages/StorageStripeLog.cpp @@ -267,7 +267,7 @@ StorageStripeLog::StorageStripeLog( const ColumnsDescription & columns_, const ConstraintsDescription & constraints_, const String & comment, - bool attach, + LoadingStrictnessLevel mode, ContextMutablePtr context_) : IStorage(table_id_) , WithMutableContext(context_) @@ -295,7 +295,7 @@ StorageStripeLog::StorageStripeLog( file_checker.setEmpty(index_file_path); } - if (!attach) + if (mode < LoadingStrictnessLevel::ATTACH) { /// create directories if they do not exist disk->createDirectories(table_path); @@ -698,7 +698,7 @@ void registerStorageStripeLog(StorageFactory & factory) args.columns, args.constraints, args.comment, - args.attach, + args.mode, args.getContext()); }, features); } diff --git a/src/Storages/StorageStripeLog.h b/src/Storages/StorageStripeLog.h index c7f3e7e21e6..dc2f8d8be4a 100644 --- a/src/Storages/StorageStripeLog.h +++ b/src/Storages/StorageStripeLog.h @@ -33,7 +33,7 @@ public: const ColumnsDescription & columns_, const ConstraintsDescription & constraints_, const String & comment, - bool attach, + LoadingStrictnessLevel mode, ContextMutablePtr context_); ~StorageStripeLog() override; diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp index ce9b0cd366b..adcb83218fd 100644 --- a/src/Storages/StorageURL.cpp +++ b/src/Storages/StorageURL.cpp @@ -5,7 +5,7 @@ #include #include -#include +#include #include #include #include @@ -101,7 +101,7 @@ static ConnectionTimeouts getHTTPTimeouts(ContextPtr context) IStorageURLBase::IStorageURLBase( const String & uri_, - ContextPtr context_, + const ContextPtr & context_, const StorageID & table_id_, const String & format_name_, const std::optional & format_settings_, @@ -123,16 +123,26 @@ IStorageURLBase::IStorageURLBase( , partition_by(partition_by_) , distributed_processing(distributed_processing_) { - FormatFactory::instance().checkFormatName(format_name); + if (format_name != "auto") + FormatFactory::instance().checkFormatName(format_name); + StorageInMemoryMetadata storage_metadata; if (columns_.empty()) { - auto columns = getTableStructureFromData(format_name, uri, compression_method, headers, format_settings, context_); + ColumnsDescription columns; + if (format_name == "auto") + std::tie(columns, format_name) = getTableStructureAndFormatFromData(uri, compression_method, headers, format_settings, context_); + else + columns = getTableStructureFromData(format_name, uri, compression_method, headers, format_settings, context_); + storage_metadata.setColumns(columns); } else { + if (format_name == "auto") + format_name = getTableStructureAndFormatFromData(uri, compression_method, headers, format_settings, context_).second; + /// We don't allow special columns in URL storage. if (!columns_.hasOnlyOrdinary()) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Table engine URL doesn't support special columns like MATERIALIZED, ALIAS or EPHEMERAL"); @@ -142,8 +152,7 @@ IStorageURLBase::IStorageURLBase( storage_metadata.setConstraints(constraints_); storage_metadata.setComment(comment); setInMemoryMetadata(storage_metadata); - - virtual_columns = VirtualColumnUtils::getPathFileAndSizeVirtualsForStorage(storage_metadata.getSampleBlock().getNamesAndTypesList()); + setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns())); } @@ -257,7 +266,7 @@ StorageURLSource::StorageURLSource( const String & format_, const std::optional & format_settings_, String name_, - ContextPtr context_, + const ContextPtr & context_, UInt64 max_block_size, const ConnectionTimeouts & timeouts, CompressionMethod compression_method, @@ -451,22 +460,20 @@ std::pair> StorageURLSource: try { - auto res = std::make_unique( - request_uri, - http_method, - callback, - timeouts, - credentials, - settings.max_http_get_redirects, - settings.max_read_buffer_size, - read_settings, - headers, - &context_->getRemoteHostFilter(), - delay_initialization, - /* use_external_buffer */ false, - /* skip_url_not_found_error */ skip_url_not_found_error, - /* file_info */ std::nullopt, - proxy_config); + auto res = BuilderRWBufferFromHTTP(request_uri) + .withConnectionGroup(HTTPConnectionGroupType::STORAGE) + .withMethod(http_method) + .withProxy(proxy_config) + .withSettings(read_settings) + .withTimeouts(timeouts) + .withHostFilter(&context_->getRemoteHostFilter()) + .withBufSize(settings.max_read_buffer_size) + .withRedirects(settings.max_http_get_redirects) + .withOutCallback(callback) + .withSkipNotFound(skip_url_not_found_error) + .withHeaders(headers) + .withDelayInit(delay_initialization) + .create(credentials); if (context_->getSettingsRef().engine_url_skip_empty_files && res->eof() && option != std::prev(end)) { @@ -525,7 +532,7 @@ StorageURLSink::StorageURLSink( const String & format, const std::optional & format_settings, const Block & sample_block, - ContextPtr context, + const ContextPtr & context, const ConnectionTimeouts & timeouts, const CompressionMethod compression_method, const HTTPHeaderEntries & headers, @@ -538,7 +545,7 @@ StorageURLSink::StorageURLSink( auto proxy_config = getProxyConfiguration(http_method); auto write_buffer = std::make_unique( - Poco::URI(uri), http_method, content_type, content_encoding, headers, timeouts, DBMS_DEFAULT_BUFFER_SIZE, proxy_config + HTTPConnectionGroupType::STORAGE, Poco::URI(uri), http_method, content_type, content_encoding, headers, timeouts, DBMS_DEFAULT_BUFFER_SIZE, proxy_config ); const auto & settings = context->getSettingsRef(); @@ -668,7 +675,7 @@ std::vector> IStorageURLBase::getReadURIPara const Names & /*column_names*/, const StorageSnapshotPtr & /*storage_snapshot*/, const SelectQueryInfo & /*query_info*/, - ContextPtr /*context*/, + const ContextPtr & /*context*/, QueryProcessingStage::Enum & /*processed_stage*/, size_t /*max_block_size*/) const { @@ -679,7 +686,7 @@ std::function IStorageURLBase::getReadPOSTDataCallback( const Names & /*column_names*/, const ColumnsDescription & /* columns_description */, const SelectQueryInfo & /*query_info*/, - ContextPtr /*context*/, + const ContextPtr & /*context*/, QueryProcessingStage::Enum & /*processed_stage*/, size_t /*max_block_size*/) const { @@ -693,28 +700,48 @@ namespace public: ReadBufferIterator( const std::vector & urls_to_check_, - const String & format_, + std::optional format_, const CompressionMethod & compression_method_, const HTTPHeaderEntries & headers_, const std::optional & format_settings_, const ContextPtr & context_) - : WithContext(context_), format(format_), compression_method(compression_method_), headers(headers_), format_settings(format_settings_) + : WithContext(context_), format(std::move(format_)), compression_method(compression_method_), headers(headers_), format_settings(format_settings_) { url_options_to_check.reserve(urls_to_check_.size()); for (const auto & url : urls_to_check_) url_options_to_check.push_back(getFailoverOptions(url, getContext()->getSettingsRef().glob_expansion_max_elements)); } - std::pair, std::optional> next() override + Data next() override { bool is_first = (current_index == 0); - /// For default mode check cached columns for all urls on first iteration. - if (is_first && getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::DEFAULT) + if (is_first) { - for (const auto & options : url_options_to_check) + /// If format is unknown we iterate through all url options on first iteration and + /// try to determine format by file name. + if (!format) { - if (auto cached_columns = tryGetColumnsFromCache(options)) - return {nullptr, cached_columns}; + for (const auto & options : url_options_to_check) + { + for (const auto & url : options) + { + if (auto format_from_file_name = FormatFactory::instance().tryGetFormatFromFileName(url)) + { + format = format_from_file_name; + break; + } + } + } + } + + /// For default mode check cached columns for all urls on first iteration. + if (getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::DEFAULT) + { + for (const auto & options : url_options_to_check) + { + if (auto cached_columns = tryGetColumnsFromCache(options)) + return {nullptr, cached_columns, format}; + } } } @@ -724,20 +751,30 @@ namespace if (current_index == url_options_to_check.size()) { if (is_first) + { + if (format) + throw Exception( + ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, + "The table structure cannot be extracted from a {} format file, because all files are empty. " + "You can specify table structure manually", + *format); + throw Exception( ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, - "Cannot extract table structure from {} format file, because all files are empty. " - "You must specify table structure manually", - format); - return {nullptr, std::nullopt}; + "The data format cannot be detected by the contents of the files, because there are no files with provided path " + "You can specify the format manually"); + + } + + return {nullptr, std::nullopt, format}; } if (getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::UNION) { - if (auto cached_columns = tryGetColumnsFromCache(url_options_to_check[current_index])) + if (auto cached_schema = tryGetColumnsFromCache(url_options_to_check[current_index])) { ++current_index; - return {nullptr, cached_columns}; + return {nullptr, cached_schema, format}; } } @@ -762,7 +799,7 @@ namespace return {wrapReadBufferWithCompressionMethod( std::move(uri_and_buf.second), compression_method, - static_cast(getContext()->getSettingsRef().zstd_window_log_max)), std::nullopt}; + static_cast(getContext()->getSettingsRef().zstd_window_log_max)), std::nullopt, format}; } void setNumRowsToLastFile(size_t num_rows) override @@ -770,7 +807,7 @@ namespace if (!getContext()->getSettingsRef().schema_inference_use_cache_for_url) return; - auto key = getKeyForSchemaCache(current_url_option, format, format_settings, getContext()); + auto key = getKeyForSchemaCache(current_url_option, *format, format_settings, getContext()); StorageURL::getSchemaCache(getContext()).addNumRows(key, num_rows); } @@ -780,7 +817,7 @@ namespace || getContext()->getSettingsRef().schema_inference_mode != SchemaInferenceMode::UNION) return; - auto key = getKeyForSchemaCache(current_url_option, format, format_settings, getContext()); + auto key = getKeyForSchemaCache(current_url_option, *format, format_settings, getContext()); StorageURL::getSchemaCache(getContext()).addColumns(key, columns); } @@ -792,17 +829,45 @@ namespace for (const auto & options : url_options_to_check) { - auto keys = getKeysForSchemaCache(options, format, format_settings, getContext()); + auto keys = getKeysForSchemaCache(options, *format, format_settings, getContext()); StorageURL::getSchemaCache(getContext()).addManyColumns(keys, columns); } } + void setFormatName(const String & format_name) override + { + format = format_name; + } + String getLastFileName() const override { return current_url_option; } + bool supportsLastReadBufferRecreation() const override { return true; } + + std::unique_ptr recreateLastReadBuffer() override + { + chassert(current_index > 0 && current_index <= url_options_to_check.size()); + auto first_option = url_options_to_check[current_index - 1].cbegin(); + auto uri_and_buf = StorageURLSource::getFirstAvailableURIAndReadBuffer( + first_option, + url_options_to_check[current_index - 1].cend(), + getContext(), + {}, + Poco::Net::HTTPRequest::HTTP_GET, + {}, + getHTTPTimeouts(getContext()), + credentials, + headers, + false, + false); + + return wrapReadBufferWithCompressionMethod(std::move(uri_and_buf.second), compression_method, static_cast(getContext()->getSettingsRef().zstd_window_log_max)); + } + private: std::optional tryGetColumnsFromCache(const Strings & urls) { - if (!getContext()->getSettingsRef().schema_inference_use_cache_for_url) + auto context = getContext(); + if (!context->getSettingsRef().schema_inference_use_cache_for_url) return std::nullopt; auto & schema_cache = StorageURL::getSchemaCache(getContext()); @@ -810,7 +875,7 @@ namespace { auto get_last_mod_time = [&]() -> std::optional { - auto last_mod_time = StorageURL::tryGetLastModificationTime(url, headers, credentials, getContext()); + auto last_mod_time = StorageURL::tryGetLastModificationTime(url, headers, credentials, context); /// Some URLs could not have Last-Modified header, in this case we cannot be sure that /// data wasn't changed after adding it's schema to cache. Use schema from cache only if /// special setting for this case is enabled. @@ -819,10 +884,27 @@ namespace return last_mod_time; }; - auto cache_key = getKeyForSchemaCache(url, format, format_settings, getContext()); - auto columns = schema_cache.tryGetColumns(cache_key, get_last_mod_time); - if (columns) - return columns; + if (format) + { + auto cache_key = getKeyForSchemaCache(url, *format, format_settings, context); + if (auto columns = schema_cache.tryGetColumns(cache_key, get_last_mod_time)) + return columns; + } + else + { + /// If format is unknown, we can iterate through all possible input formats + /// and check if we have an entry with this format and this file in schema cache. + /// If we have such entry for some format, we can use this format to read the file. + for (const auto & format_name : FormatFactory::instance().getAllInputFormats()) + { + auto cache_key = getKeyForSchemaCache(url, format_name, format_settings, context); + if (auto columns = schema_cache.tryGetColumns(cache_key, get_last_mod_time)) + { + format = format_name; + return columns; + } + } + } } return std::nullopt; @@ -831,7 +913,7 @@ namespace std::vector> url_options_to_check; size_t current_index = 0; String current_url_option; - const String & format; + std::optional format; const CompressionMethod & compression_method; const HTTPHeaderEntries & headers; Poco::Net::HTTPBasicCredentials credentials; @@ -839,13 +921,13 @@ namespace }; } -ColumnsDescription IStorageURLBase::getTableStructureFromData( - const String & format, +std::pair IStorageURLBase::getTableStructureAndFormatFromDataImpl( + std::optional format, const String & uri, CompressionMethod compression_method, const HTTPHeaderEntries & headers, const std::optional & format_settings, - ContextPtr context) + const ContextPtr & context) { context->getRemoteHostFilter().checkURL(Poco::URI(uri)); @@ -858,7 +940,30 @@ ColumnsDescription IStorageURLBase::getTableStructureFromData( urls_to_check = {uri}; ReadBufferIterator read_buffer_iterator(urls_to_check, format, compression_method, headers, format_settings, context); - return readSchemaFromFormat(format, format_settings, read_buffer_iterator, urls_to_check.size() > 1, context); + if (format) + return {readSchemaFromFormat(*format, format_settings, read_buffer_iterator, context), *format}; + return detectFormatAndReadSchema(format_settings, read_buffer_iterator, context); +} + +ColumnsDescription IStorageURLBase::getTableStructureFromData( + const String & format, + const String & uri, + CompressionMethod compression_method, + const HTTPHeaderEntries & headers, + const std::optional & format_settings, + const ContextPtr & context) +{ + return getTableStructureAndFormatFromDataImpl(format, uri, compression_method, headers, format_settings, context).first; +} + +std::pair IStorageURLBase::getTableStructureAndFormatFromData( + const String & uri, + CompressionMethod compression_method, + const HTTPHeaderEntries & headers, + const std::optional & format_settings, + const ContextPtr & context) +{ + return getTableStructureAndFormatFromDataImpl(std::nullopt, uri, compression_method, headers, format_settings, context); } bool IStorageURLBase::supportsSubsetOfColumns(const ContextPtr & context) const @@ -881,9 +986,13 @@ class ReadFromURL : public SourceStepWithFilter public: std::string getName() const override { return "ReadFromURL"; } void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override; - void applyFilters() override; + void applyFilters(ActionDAGNodes added_filter_nodes) override; ReadFromURL( + const Names & column_names_, + const SelectQueryInfo & query_info_, + const StorageSnapshotPtr & storage_snapshot_, + const ContextPtr & context_, Block sample_block, std::shared_ptr storage_, std::vector * uri_options_, @@ -891,17 +1000,15 @@ public: const bool need_only_count_, std::vector> read_uri_params_, std::function read_post_data_callback_, - ContextPtr context_, size_t max_block_size_, size_t num_streams_) - : SourceStepWithFilter(DataStream{.header = std::move(sample_block)}) + : SourceStepWithFilter(DataStream{.header = std::move(sample_block)}, column_names_, query_info_, storage_snapshot_, context_) , storage(std::move(storage_)) , uri_options(uri_options_) , info(std::move(info_)) , need_only_count(need_only_count_) , read_uri_params(std::move(read_uri_params_)) , read_post_data_callback(std::move(read_post_data_callback_)) - , context(std::move(context_)) , max_block_size(max_block_size_) , num_streams(num_streams_) , max_num_streams(num_streams_) @@ -917,8 +1024,6 @@ private: std::vector> read_uri_params; std::function read_post_data_callback; - ContextPtr context; - size_t max_block_size; size_t num_streams; const size_t max_num_streams; @@ -930,9 +1035,9 @@ private: void createIterator(const ActionsDAG::Node * predicate); }; -void ReadFromURL::applyFilters() +void ReadFromURL::applyFilters(ActionDAGNodes added_filter_nodes) { - auto filter_actions_dag = ActionsDAG::buildFilterActionsDAG(filter_nodes.nodes); + filter_actions_dag = ActionsDAG::buildFilterActionsDAG(added_filter_nodes.nodes); const ActionsDAG::Node * predicate = nullptr; if (filter_actions_dag) predicate = filter_actions_dag->getOutputs().at(0); @@ -951,7 +1056,7 @@ void IStorageURLBase::read( size_t num_streams) { auto params = getReadURIParams(column_names, storage_snapshot, query_info, local_context, processed_stage, max_block_size); - auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(local_context), getVirtuals()); + auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(local_context)); bool need_only_count = (query_info.optimize_trivial_count || read_from_format_info.requested_columns.empty()) && local_context->getSettingsRef().optimize_count_from_files; @@ -967,6 +1072,10 @@ void IStorageURLBase::read( auto this_ptr = std::static_pointer_cast(shared_from_this()); auto reading = std::make_unique( + column_names, + query_info, + storage_snapshot, + local_context, read_from_format_info.source_header, std::move(this_ptr), nullptr, @@ -974,7 +1083,6 @@ void IStorageURLBase::read( need_only_count, std::move(params), std::move(read_post_data_callback), - local_context, max_block_size, num_streams); @@ -1016,7 +1124,7 @@ void ReadFromURL::createIterator(const ActionsDAG::Node * predicate) else if (is_url_with_globs) { /// Iterate through disclosed globs and make a source for each file - auto glob_iterator = std::make_shared(storage->uri, max_addresses, predicate, storage->virtual_columns, context); + auto glob_iterator = std::make_shared(storage->uri, max_addresses, predicate, storage->getVirtualsList(), context); /// check if we filtered out all the paths if (glob_iterator->size() == 0) @@ -1085,7 +1193,7 @@ void ReadFromURL::initializePipeline(QueryPipelineBuilder & pipeline, const Buil is_url_with_globs, need_only_count); - source->setKeyCondition(filter_nodes.nodes, context); + source->setKeyCondition(filter_actions_dag, context); pipes.emplace_back(std::move(source)); } @@ -1119,7 +1227,7 @@ void StorageURLWithFailover::read( size_t num_streams) { auto params = getReadURIParams(column_names, storage_snapshot, query_info, local_context, processed_stage, max_block_size); - auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(local_context), getVirtuals()); + auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(local_context)); bool need_only_count = (query_info.optimize_trivial_count || read_from_format_info.requested_columns.empty()) && local_context->getSettingsRef().optimize_count_from_files; @@ -1135,6 +1243,10 @@ void StorageURLWithFailover::read( auto this_ptr = std::static_pointer_cast(shared_from_this()); auto reading = std::make_unique( + column_names, + query_info, + storage_snapshot, + local_context, read_from_format_info.source_header, std::move(this_ptr), &uri_options, @@ -1142,7 +1254,6 @@ void StorageURLWithFailover::read( need_only_count, std::move(params), std::move(read_post_data_callback), - local_context, max_block_size, num_streams); @@ -1189,16 +1300,6 @@ SinkToStoragePtr IStorageURLBase::write(const ASTPtr & query, const StorageMetad } } -NamesAndTypesList IStorageURLBase::getVirtuals() const -{ - return virtual_columns; -} - -Names IStorageURLBase::getVirtualColumnNames() -{ - return VirtualColumnUtils::getPathFileAndSizeVirtualsForStorage({}).getNames(); -} - SchemaCache & IStorageURLBase::getSchemaCache(const ContextPtr & context) { static SchemaCache schema_cache(context->getConfigRef().getUInt("schema_inference_cache_max_elements_for_url", DEFAULT_SCHEMA_CACHE_ELEMENTS)); @@ -1217,24 +1318,17 @@ std::optional IStorageURLBase::tryGetLastModificationTime( auto proxy_config = getProxyConfiguration(uri.getScheme()); - ReadWriteBufferFromHTTP buf( - uri, - Poco::Net::HTTPRequest::HTTP_GET, - {}, - getHTTPTimeouts(context), - credentials, - settings.max_http_get_redirects, - settings.max_read_buffer_size, - context->getReadSettings(), - headers, - &context->getRemoteHostFilter(), - true, - false, - false, - std::nullopt, - proxy_config); + auto buf = BuilderRWBufferFromHTTP(uri) + .withConnectionGroup(HTTPConnectionGroupType::STORAGE) + .withSettings(context->getReadSettings()) + .withTimeouts(getHTTPTimeouts(context)) + .withHostFilter(&context->getRemoteHostFilter()) + .withBufSize(settings.max_read_buffer_size) + .withRedirects(settings.max_http_get_redirects) + .withHeaders(headers) + .create(credentials); - return buf.tryGetLastModificationTime(); + return buf->tryGetLastModificationTime(); } StorageURL::StorageURL( @@ -1245,7 +1339,7 @@ StorageURL::StorageURL( const ColumnsDescription & columns_, const ConstraintsDescription & constraints_, const String & comment, - ContextPtr context_, + const ContextPtr & context_, const String & compression_method_, const HTTPHeaderEntries & headers_, const String & http_method_, @@ -1278,7 +1372,7 @@ StorageURLWithFailover::StorageURLWithFailover( const std::optional & format_settings_, const ColumnsDescription & columns_, const ConstraintsDescription & constraints_, - ContextPtr context_, + const ContextPtr & context_, const String & compression_method_) : StorageURL("", table_id_, format_name_, format_settings_, columns_, constraints_, String{}, context_, compression_method_) { @@ -1327,7 +1421,7 @@ FormatSettings StorageURL::getFormatSettingsFromArgs(const StorageFactory::Argum } size_t StorageURL::evalArgsAndCollectHeaders( - ASTs & url_function_args, HTTPHeaderEntries & header_entries, ContextPtr context) + ASTs & url_function_args, HTTPHeaderEntries & header_entries, const ContextPtr & context) { ASTs::iterator headers_it = url_function_args.end(); @@ -1403,7 +1497,7 @@ void StorageURL::processNamedCollectionResult(Configuration & configuration, con && configuration.http_method != Poco::Net::HTTPRequest::HTTP_PUT) throw Exception( ErrorCodes::BAD_ARGUMENTS, - "Http method can be POST or PUT (current: {}). For insert default is POST, for select GET", + "HTTP method can be POST or PUT (current: {}). For insert default is POST, for select GET", configuration.http_method); configuration.format = collection.getOrDefault("format", "auto"); @@ -1411,7 +1505,7 @@ void StorageURL::processNamedCollectionResult(Configuration & configuration, con configuration.structure = collection.getOrDefault("structure", "auto"); } -StorageURL::Configuration StorageURL::getConfiguration(ASTs & args, ContextPtr local_context) +StorageURL::Configuration StorageURL::getConfiguration(ASTs & args, const ContextPtr & local_context) { StorageURL::Configuration configuration; @@ -1435,7 +1529,7 @@ StorageURL::Configuration StorageURL::getConfiguration(ASTs & args, ContextPtr l } if (configuration.format == "auto") - configuration.format = FormatFactory::instance().getFormatFromFileName(Poco::URI(configuration.url).getPath(), true); + configuration.format = FormatFactory::instance().tryGetFormatFromFileName(Poco::URI(configuration.url).getPath()).value_or("auto"); for (const auto & [header, value] : configuration.headers) { diff --git a/src/Storages/StorageURL.h b/src/Storages/StorageURL.h index c8b8d0942f4..294b1f828bb 100644 --- a/src/Storages/StorageURL.h +++ b/src/Storages/StorageURL.h @@ -48,16 +48,21 @@ public: bool supportsPartitionBy() const override { return true; } - NamesAndTypesList getVirtuals() const override; - static Names getVirtualColumnNames(); - static ColumnsDescription getTableStructureFromData( const String & format, const String & uri, CompressionMethod compression_method, const HTTPHeaderEntries & headers, const std::optional & format_settings, - ContextPtr context); + const ContextPtr & context); + + static std::pair getTableStructureAndFormatFromData( + const String & uri, + CompressionMethod compression_method, + const HTTPHeaderEntries & headers, + const std::optional & format_settings, + const ContextPtr & context); + static SchemaCache & getSchemaCache(const ContextPtr & context); @@ -72,7 +77,7 @@ protected: IStorageURLBase( const String & uri_, - ContextPtr context_, + const ContextPtr & context_, const StorageID & id_, const String & format_name_, const std::optional & format_settings_, @@ -98,15 +103,13 @@ protected: ASTPtr partition_by; bool distributed_processing; - NamesAndTypesList virtual_columns; - virtual std::string getReadMethod() const; virtual std::vector> getReadURIParams( const Names & column_names, const StorageSnapshotPtr & storage_snapshot, const SelectQueryInfo & query_info, - ContextPtr context, + const ContextPtr & context, QueryProcessingStage::Enum & processed_stage, size_t max_block_size) const; @@ -114,7 +117,7 @@ protected: const Names & column_names, const ColumnsDescription & columns_description, const SelectQueryInfo & query_info, - ContextPtr context, + const ContextPtr & context, QueryProcessingStage::Enum & processed_stage, size_t max_block_size) const; @@ -127,6 +130,14 @@ protected: bool supportsTrivialCountOptimization() const override { return true; } private: + static std::pair getTableStructureAndFormatFromDataImpl( + std::optional format, + const String & uri, + CompressionMethod compression_method, + const HTTPHeaderEntries & headers, + const std::optional & format_settings, + const ContextPtr & context); + virtual Block getHeaderBlock(const Names & column_names, const StorageSnapshotPtr & storage_snapshot) const = 0; }; @@ -160,7 +171,7 @@ public: const String & format, const std::optional & format_settings, String name_, - ContextPtr context, + const ContextPtr & context, UInt64 max_block_size, const ConnectionTimeouts & timeouts, CompressionMethod compression_method, @@ -172,9 +183,9 @@ public: String getName() const override { return name; } - void setKeyCondition(const ActionsDAG::NodeRawConstPtrs & nodes, ContextPtr context_) override + void setKeyCondition(const ActionsDAGPtr & filter_actions_dag, ContextPtr context_) override { - setKeyConditionImpl(nodes, context_, block_for_format); + setKeyConditionImpl(filter_actions_dag, context_, block_for_format); } Chunk generate() override; @@ -231,7 +242,7 @@ public: const String & format, const std::optional & format_settings, const Block & sample_block, - ContextPtr context, + const ContextPtr & context, const ConnectionTimeouts & timeouts, CompressionMethod compression_method, const HTTPHeaderEntries & headers = {}, @@ -263,7 +274,7 @@ public: const ColumnsDescription & columns_, const ConstraintsDescription & constraints_, const String & comment, - ContextPtr context_, + const ContextPtr & context_, const String & compression_method_, const HTTPHeaderEntries & headers_ = {}, const String & method_ = "", @@ -292,12 +303,12 @@ public: std::string addresses_expr; }; - static Configuration getConfiguration(ASTs & args, ContextPtr context); + static Configuration getConfiguration(ASTs & args, const ContextPtr & context); /// Does evaluateConstantExpressionOrIdentifierAsLiteral() on all arguments. /// If `headers(...)` argument is present, parses it and moves it to the end of the array. /// Returns number of arguments excluding `headers(...)`. - static size_t evalArgsAndCollectHeaders(ASTs & url_function_args, HTTPHeaderEntries & header_entries, ContextPtr context); + static size_t evalArgsAndCollectHeaders(ASTs & url_function_args, HTTPHeaderEntries & header_entries, const ContextPtr & context); static void processNamedCollectionResult(Configuration & configuration, const NamedCollection & collection); }; @@ -314,7 +325,7 @@ public: const std::optional & format_settings_, const ColumnsDescription & columns_, const ConstraintsDescription & constraints_, - ContextPtr context_, + const ContextPtr & context_, const String & compression_method_); void read( diff --git a/src/Storages/StorageURLCluster.cpp b/src/Storages/StorageURLCluster.cpp index 2365887983d..2e7c63d0097 100644 --- a/src/Storages/StorageURLCluster.cpp +++ b/src/Storages/StorageURLCluster.cpp @@ -35,55 +35,62 @@ namespace ErrorCodes } StorageURLCluster::StorageURLCluster( - ContextPtr context_, + const ContextPtr & context, const String & cluster_name_, const String & uri_, const String & format_, - const String & compression_method_, + const String & compression_method, const StorageID & table_id_, const ColumnsDescription & columns_, const ConstraintsDescription & constraints_, - const StorageURL::Configuration & configuration_, - bool structure_argument_was_provided_) - : IStorageCluster(cluster_name_, table_id_, getLogger("StorageURLCluster (" + table_id_.table_name + ")"), structure_argument_was_provided_) - , uri(uri_) + const StorageURL::Configuration & configuration_) + : IStorageCluster(cluster_name_, table_id_, getLogger("StorageURLCluster (" + table_id_.table_name + ")")) + , uri(uri_), format_name(format_) { - context_->getRemoteHostFilter().checkURL(Poco::URI(uri)); - context_->getHTTPHeaderFilter().checkHeaders(configuration_.headers); + context->getRemoteHostFilter().checkURL(Poco::URI(uri)); + context->getHTTPHeaderFilter().checkHeaders(configuration_.headers); StorageInMemoryMetadata storage_metadata; if (columns_.empty()) { - auto columns = StorageURL::getTableStructureFromData(format_, - uri, - chooseCompressionMethod(Poco::URI(uri).getPath(), compression_method_), - configuration_.headers, - std::nullopt, - context_); + ColumnsDescription columns; + if (format_name == "auto") + std::tie(columns, format_name) = StorageURL::getTableStructureAndFormatFromData( + uri, chooseCompressionMethod(Poco::URI(uri).getPath(), compression_method), configuration_.headers, std::nullopt, context); + else + columns = StorageURL::getTableStructureFromData( + format_, uri, chooseCompressionMethod(Poco::URI(uri).getPath(), compression_method), configuration_.headers, std::nullopt, context); + storage_metadata.setColumns(columns); } else + { + if (format_name == "auto") + format_name = StorageURL::getTableStructureAndFormatFromData( + uri, chooseCompressionMethod(Poco::URI(uri).getPath(), compression_method), configuration_.headers, std::nullopt, context).second; + storage_metadata.setColumns(columns_); + } storage_metadata.setConstraints(constraints_); setInMemoryMetadata(storage_metadata); - - virtual_columns = VirtualColumnUtils::getPathFileAndSizeVirtualsForStorage(storage_metadata.getSampleBlock().getNamesAndTypesList()); + setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns())); } -void StorageURLCluster::addColumnsStructureToQuery(ASTPtr & query, const String & structure, const ContextPtr & context) +void StorageURLCluster::updateQueryToSendIfNeeded(ASTPtr & query, const StorageSnapshotPtr & storage_snapshot, const ContextPtr & context) { ASTExpressionList * expression_list = extractTableFunctionArgumentsFromSelectQuery(query); if (!expression_list) throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected SELECT query from table function urlCluster, got '{}'", queryToString(query)); - TableFunctionURLCluster::addColumnsStructureToArguments(expression_list->children, structure, context); + TableFunctionURLCluster::updateStructureAndFormatArgumentsIfNeeded( + expression_list->children, storage_snapshot->metadata->getColumns().getAll().toNamesAndTypesDescription(), format_name, context); } RemoteQueryExecutor::Extension StorageURLCluster::getTaskIteratorExtension(const ActionsDAG::Node * predicate, const ContextPtr & context) const { - auto iterator = std::make_shared(uri, context->getSettingsRef().glob_expansion_max_elements, predicate, virtual_columns, context); + auto iterator = std::make_shared(uri, context->getSettingsRef().glob_expansion_max_elements, predicate, getVirtualsList(), context); auto callback = std::make_shared([iter = std::move(iterator)]() mutable -> String { return iter->next(); }); return RemoteQueryExecutor::Extension{.task_iterator = std::move(callback)}; } diff --git a/src/Storages/StorageURLCluster.h b/src/Storages/StorageURLCluster.h index 07978040029..dce2e0106ea 100644 --- a/src/Storages/StorageURLCluster.h +++ b/src/Storages/StorageURLCluster.h @@ -19,21 +19,18 @@ class StorageURLCluster : public IStorageCluster { public: StorageURLCluster( - ContextPtr context_, + const ContextPtr & context, const String & cluster_name_, const String & uri_, const String & format_, - const String & compression_method_, + const String & compression_method, const StorageID & table_id_, const ColumnsDescription & columns_, const ConstraintsDescription & constraints_, - const StorageURL::Configuration & configuration_, - bool structure_argument_was_provided_); + const StorageURL::Configuration & configuration_); std::string getName() const override { return "URLCluster"; } - NamesAndTypesList getVirtuals() const override { return virtual_columns; } - RemoteQueryExecutor::Extension getTaskIteratorExtension(const ActionsDAG::Node * predicate, const ContextPtr & context) const override; bool supportsSubcolumns() const override { return true; } @@ -41,12 +38,10 @@ public: bool supportsTrivialCountOptimization() const override { return true; } private: - void addColumnsStructureToQuery(ASTPtr & query, const String & structure, const ContextPtr & context) override; + void updateQueryToSendIfNeeded(ASTPtr & query, const StorageSnapshotPtr & storage_snapshot, const ContextPtr & context) override; String uri; String format_name; - String compression_method; - NamesAndTypesList virtual_columns; }; diff --git a/src/Storages/StorageValues.cpp b/src/Storages/StorageValues.cpp index 300b11b7346..a930ffd1307 100644 --- a/src/Storages/StorageValues.cpp +++ b/src/Storages/StorageValues.cpp @@ -12,12 +12,13 @@ StorageValues::StorageValues( const StorageID & table_id_, const ColumnsDescription & columns_, const Block & res_block_, - const NamesAndTypesList & virtuals_) - : IStorage(table_id_), res_block(res_block_), virtuals(virtuals_) + VirtualColumnsDescription virtuals_) + : IStorage(table_id_), res_block(res_block_) { StorageInMemoryMetadata storage_metadata; storage_metadata.setColumns(columns_); setInMemoryMetadata(storage_metadata); + setVirtuals(std::move(virtuals_)); } Pipe StorageValues::read( diff --git a/src/Storages/StorageValues.h b/src/Storages/StorageValues.h index af1c134077b..ebb182ab667 100644 --- a/src/Storages/StorageValues.h +++ b/src/Storages/StorageValues.h @@ -11,8 +11,14 @@ namespace DB class StorageValues final : public IStorage { public: + /// Why we may have virtual columns in the storage from a single block? + /// Because it used as tmp storage for pushing blocks into views, and some + /// views may contain virtual columns from original storage. StorageValues( - const StorageID & table_id_, const ColumnsDescription & columns_, const Block & res_block_, const NamesAndTypesList & virtuals_ = {}); + const StorageID & table_id_, + const ColumnsDescription & columns_, + const Block & res_block_, + VirtualColumnsDescription virtuals_ = {}); std::string getName() const override { return "Values"; } @@ -25,13 +31,6 @@ public: size_t max_block_size, size_t num_streams) override; - /// Why we may have virtual columns in the storage from a single block? - /// Because it used as tmp storage for pushing blocks into views, and some - /// views may contain virtual columns from original storage. - NamesAndTypesList getVirtuals() const override - { - return virtuals; - } /// FIXME probably it should return false, but StorageValues is used in ExecutingInnerQueryFromViewTransform (whatever it is) bool supportsTransactions() const override { return true; } @@ -40,7 +39,6 @@ public: private: Block res_block; - NamesAndTypesList virtuals; }; } diff --git a/src/Storages/StorageView.cpp b/src/Storages/StorageView.cpp index f0f9b9540de..673ca61cd50 100644 --- a/src/Storages/StorageView.cpp +++ b/src/Storages/StorageView.cpp @@ -12,6 +12,7 @@ #include #include +#include #include #include #include @@ -35,6 +36,7 @@ namespace ErrorCodes { extern const int INCORRECT_QUERY; extern const int LOGICAL_ERROR; + extern const int NOT_IMPLEMENTED; } @@ -90,10 +92,10 @@ bool hasJoin(const ASTSelectWithUnionQuery & ast) /** There are no limits on the maximum size of the result for the view. * Since the result of the view is not the result of the entire query. */ -ContextPtr getViewContext(ContextPtr context) +ContextPtr getViewContext(ContextPtr context, const StorageSnapshotPtr & storage_snapshot) { - auto view_context = Context::createCopy(context); - Settings view_settings = context->getSettings(); + auto view_context = storage_snapshot->metadata->getSQLSecurityOverriddenContext(context); + Settings view_settings = view_context->getSettings(); view_settings.max_result_rows = 0; view_settings.max_result_bytes = 0; view_settings.extremes = false; @@ -112,8 +114,18 @@ StorageView::StorageView( : IStorage(table_id_) { StorageInMemoryMetadata storage_metadata; - storage_metadata.setColumns(columns_); + if (!is_parameterized_view_) + { + /// If CREATE query is to create parameterized view, then we dont want to set columns + if (!query.isParameterizedView()) + storage_metadata.setColumns(columns_); + } + else + storage_metadata.setColumns(columns_); + storage_metadata.setComment(comment); + if (query.sql_security) + storage_metadata.setSQLSecurity(query.sql_security->as()); if (!query.select) throw Exception(ErrorCodes::INCORRECT_QUERY, "SELECT query is not specified for {}", getName()); @@ -152,13 +164,13 @@ void StorageView::read( if (context->getSettingsRef().allow_experimental_analyzer) { - InterpreterSelectQueryAnalyzer interpreter(current_inner_query, getViewContext(context), options); + InterpreterSelectQueryAnalyzer interpreter(current_inner_query, getViewContext(context, storage_snapshot), options); interpreter.addStorageLimits(*query_info.storage_limits); query_plan = std::move(interpreter).extractQueryPlan(); } else { - InterpreterSelectWithUnionQuery interpreter(current_inner_query, getViewContext(context), options, column_names); + InterpreterSelectWithUnionQuery interpreter(current_inner_query, getViewContext(context, storage_snapshot), options, column_names); interpreter.addStorageLimits(*query_info.storage_limits); interpreter.buildQueryPlan(query_plan); } @@ -199,12 +211,12 @@ void StorageView::read( static ASTTableExpression * getFirstTableExpression(ASTSelectQuery & select_query) { if (!select_query.tables() || select_query.tables()->children.empty()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: no table expression in view select AST"); + throw Exception(ErrorCodes::LOGICAL_ERROR, "No table expression in view select AST"); auto * select_element = select_query.tables()->children[0]->as(); if (!select_element->table_expression) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: incorrect table expression"); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Incorrect table expression"); return select_element->table_expression->as(); } @@ -235,7 +247,7 @@ void StorageView::replaceWithSubquery(ASTSelectQuery & outer_query, ASTPtr view_ } if (!table_expression->database_and_table_name) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: incorrect table expression"); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Incorrect table expression"); } DatabaseAndTableWithAlias db_table(table_expression->database_and_table_name); @@ -243,8 +255,7 @@ void StorageView::replaceWithSubquery(ASTSelectQuery & outer_query, ASTPtr view_ view_name = table_expression->database_and_table_name; table_expression->database_and_table_name = {}; - table_expression->subquery = std::make_shared(); - table_expression->subquery->children.push_back(view_query); + table_expression->subquery = std::make_shared(view_query); table_expression->subquery->setAlias(alias); for (auto & child : table_expression->children) @@ -263,7 +274,7 @@ ASTPtr StorageView::restoreViewName(ASTSelectQuery & select_query, const ASTPtr ASTTableExpression * table_expression = getFirstTableExpression(select_query); if (!table_expression->subquery) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: incorrect table expression"); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Incorrect table expression"); ASTPtr subquery = table_expression->subquery; table_expression->subquery = {}; @@ -275,6 +286,15 @@ ASTPtr StorageView::restoreViewName(ASTSelectQuery & select_query, const ASTPtr return subquery->children[0]; } +void StorageView::checkAlterIsPossible(const AlterCommands & commands, ContextPtr /* local_context */) const +{ + for (const auto & command : commands) + { + if (!command.isCommentAlter() && command.type != AlterCommand::MODIFY_SQL_SECURITY) + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Alter of type '{}' is not supported by storage {}", command.type, getName()); + } +} + void registerStorageView(StorageFactory & factory) { factory.registerStorage("View", [](const StorageFactory::Arguments & args) diff --git a/src/Storages/StorageView.h b/src/Storages/StorageView.h index b8bf5585c0f..4d265eed86b 100644 --- a/src/Storages/StorageView.h +++ b/src/Storages/StorageView.h @@ -26,6 +26,8 @@ public: bool supportsSampling() const override { return true; } bool supportsFinal() const override { return true; } + void checkAlterIsPossible(const AlterCommands & commands, ContextPtr local_context) const override; + void read( QueryPlan & query_plan, const Names & column_names, diff --git a/src/Storages/StorageXDBC.cpp b/src/Storages/StorageXDBC.cpp index 259abefb00f..fb8fa2d6da4 100644 --- a/src/Storages/StorageXDBC.cpp +++ b/src/Storages/StorageXDBC.cpp @@ -59,7 +59,7 @@ std::vector> StorageXDBC::getReadURIParams( const Names & /* column_names */, const StorageSnapshotPtr & /*storage_snapshot*/, const SelectQueryInfo & /*query_info*/, - ContextPtr /*context*/, + const ContextPtr & /*context*/, QueryProcessingStage::Enum & /*processed_stage*/, size_t max_block_size) const { @@ -70,7 +70,7 @@ std::function StorageXDBC::getReadPOSTDataCallback( const Names & column_names, const ColumnsDescription & columns_description, const SelectQueryInfo & query_info, - ContextPtr local_context, + const ContextPtr & local_context, QueryProcessingStage::Enum & /*processed_stage*/, size_t /*max_block_size*/) const { diff --git a/src/Storages/StorageXDBC.h b/src/Storages/StorageXDBC.h index cba15a83226..7cec7266760 100644 --- a/src/Storages/StorageXDBC.h +++ b/src/Storages/StorageXDBC.h @@ -55,7 +55,7 @@ private: const Names & column_names, const StorageSnapshotPtr & storage_snapshot, const SelectQueryInfo & query_info, - ContextPtr context, + const ContextPtr & context, QueryProcessingStage::Enum & processed_stage, size_t max_block_size) const override; @@ -63,7 +63,7 @@ private: const Names & column_names, const ColumnsDescription & columns_description, const SelectQueryInfo & query_info, - ContextPtr context, + const ContextPtr & context, QueryProcessingStage::Enum & processed_stage, size_t max_block_size) const override; diff --git a/src/Storages/System/IStorageSystemOneBlock.cpp b/src/Storages/System/IStorageSystemOneBlock.cpp new file mode 100644 index 00000000000..53399654c8d --- /dev/null +++ b/src/Storages/System/IStorageSystemOneBlock.cpp @@ -0,0 +1,99 @@ +#include +// #include +// #include +// #include +// #include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +class ReadFromSystemOneBlock : public SourceStepWithFilter +{ +public: + std::string getName() const override { return "ReadFromSystemOneBlock"; } + void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override; + + ReadFromSystemOneBlock( + const Names & column_names_, + const SelectQueryInfo & query_info_, + const StorageSnapshotPtr & storage_snapshot_, + const ContextPtr & context_, + Block sample_block, + std::shared_ptr storage_, + std::vector columns_mask_) + : SourceStepWithFilter( + DataStream{.header = std::move(sample_block)}, + column_names_, + query_info_, + storage_snapshot_, + context_) + , storage(std::move(storage_)) + , columns_mask(std::move(columns_mask_)) + { + } + + void applyFilters(ActionDAGNodes added_filter_nodes) override; + +private: + std::shared_ptr storage; + std::vector columns_mask; + const ActionsDAG::Node * predicate = nullptr; +}; + +void IStorageSystemOneBlock::read( + QueryPlan & query_plan, + const Names & column_names, + const StorageSnapshotPtr & storage_snapshot, + SelectQueryInfo & query_info, + ContextPtr context, + QueryProcessingStage::Enum /*processed_stage*/, + size_t /*max_block_size*/, + size_t /*num_streams*/) +{ + storage_snapshot->check(column_names); + Block sample_block = storage_snapshot->metadata->getSampleBlockWithVirtuals(getVirtualsList()); + std::vector columns_mask; + + if (supportsColumnsMask()) + { + auto [columns_mask_, header] = getQueriedColumnsMaskAndHeader(sample_block, column_names); + columns_mask = std::move(columns_mask_); + sample_block = std::move(header); + } + + auto this_ptr = std::static_pointer_cast(shared_from_this()); + + auto reading = std::make_unique( + column_names, query_info, storage_snapshot, + std::move(context), std::move(sample_block), std::move(this_ptr), std::move(columns_mask)); + + query_plan.addStep(std::move(reading)); +} + +void ReadFromSystemOneBlock::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) +{ + const auto & sample_block = getOutputStream().header; + MutableColumns res_columns = sample_block.cloneEmptyColumns(); + storage->fillData(res_columns, context, predicate, std::move(columns_mask)); + + UInt64 num_rows = res_columns.at(0)->size(); + Chunk chunk(std::move(res_columns), num_rows); + + pipeline.init(Pipe(std::make_shared(sample_block, std::move(chunk)))); +} + +void ReadFromSystemOneBlock::applyFilters(ActionDAGNodes added_filter_nodes) +{ + filter_actions_dag = ActionsDAG::buildFilterActionsDAG(added_filter_nodes.nodes); + if (filter_actions_dag) + predicate = filter_actions_dag->getOutputs().at(0); +} + +} diff --git a/src/Storages/System/IStorageSystemOneBlock.h b/src/Storages/System/IStorageSystemOneBlock.h index 3b2807965a4..a20434fd97e 100644 --- a/src/Storages/System/IStorageSystemOneBlock.h +++ b/src/Storages/System/IStorageSystemOneBlock.h @@ -1,13 +1,6 @@ #pragma once -#include -#include -#include #include -#include -#include -#include -#include namespace DB { @@ -17,8 +10,8 @@ class Context; /** IStorageSystemOneBlock is base class for system tables whose all columns can be synchronously fetched. * - * Client class need to provide static method static NamesAndTypesList getNamesAndTypes() that will return list of column names and - * their types. IStorageSystemOneBlock during read will create result columns in same order as result of getNamesAndTypes + * Client class need to provide columns_description. + * IStorageSystemOneBlock during read will create result columns in same order as in columns_description * and pass it with fillData method. * * Client also must override fillData and fill result columns. @@ -26,49 +19,32 @@ class Context; * If subclass want to support virtual columns, it should override getVirtuals method of IStorage interface. * IStorageSystemOneBlock will add virtuals columns at the end of result columns of fillData method. */ -template class IStorageSystemOneBlock : public IStorage { protected: - virtual void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const = 0; + virtual void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node * predicate, std::vector columns_mask) const = 0; virtual bool supportsColumnsMask() const { return false; } + friend class ReadFromSystemOneBlock; + public: - explicit IStorageSystemOneBlock(const StorageID & table_id_) : IStorage(table_id_) + explicit IStorageSystemOneBlock(const StorageID & table_id_, ColumnsDescription columns_description) : IStorage(table_id_) { StorageInMemoryMetadata storage_metadata; - storage_metadata.setColumns(Self::getColumnsDescription()); + storage_metadata.setColumns(std::move(columns_description)); setInMemoryMetadata(storage_metadata); } - Pipe read( + void read( + QueryPlan & query_plan, const Names & column_names, const StorageSnapshotPtr & storage_snapshot, SelectQueryInfo & query_info, ContextPtr context, QueryProcessingStage::Enum /*processed_stage*/, size_t /*max_block_size*/, - size_t /*num_streams*/) override - { - storage_snapshot->check(column_names); - Block sample_block = storage_snapshot->metadata->getSampleBlockWithVirtuals(getVirtuals()); - - if (supportsColumnsMask()) - { - auto [columns_mask, header] = getQueriedColumnsMaskAndHeader(sample_block, column_names); - query_info.columns_mask = std::move(columns_mask); - sample_block = std::move(header); - } - - MutableColumns res_columns = sample_block.cloneEmptyColumns(); - fillData(res_columns, context, query_info); - - UInt64 num_rows = res_columns.at(0)->size(); - Chunk chunk(std::move(res_columns), num_rows); - - return Pipe(std::make_shared(sample_block, std::move(chunk))); - } + size_t /*num_streams*/) override; bool isSystemStorage() const override { return true; } diff --git a/src/Storages/System/StorageSystemAggregateFunctionCombinators.cpp b/src/Storages/System/StorageSystemAggregateFunctionCombinators.cpp index 8e32a137fcb..7f3994528a9 100644 --- a/src/Storages/System/StorageSystemAggregateFunctionCombinators.cpp +++ b/src/Storages/System/StorageSystemAggregateFunctionCombinators.cpp @@ -13,7 +13,7 @@ ColumnsDescription StorageSystemAggregateFunctionCombinators::getColumnsDescript }; } -void StorageSystemAggregateFunctionCombinators::fillData(MutableColumns & res_columns, ContextPtr, const SelectQueryInfo &) const +void StorageSystemAggregateFunctionCombinators::fillData(MutableColumns & res_columns, ContextPtr, const ActionsDAG::Node *, std::vector) const { const auto & combinators = AggregateFunctionCombinatorFactory::instance().getAllAggregateFunctionCombinators(); for (const auto & pair : combinators) diff --git a/src/Storages/System/StorageSystemAggregateFunctionCombinators.h b/src/Storages/System/StorageSystemAggregateFunctionCombinators.h index 6f3f90b58af..45036043636 100644 --- a/src/Storages/System/StorageSystemAggregateFunctionCombinators.h +++ b/src/Storages/System/StorageSystemAggregateFunctionCombinators.h @@ -6,10 +6,10 @@ namespace DB { -class StorageSystemAggregateFunctionCombinators final : public IStorageSystemOneBlock +class StorageSystemAggregateFunctionCombinators final : public IStorageSystemOneBlock { protected: - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const override; + void fillData(MutableColumns & res_columns, ContextPtr, const ActionsDAG::Node *, std::vector) const override; using IStorageSystemOneBlock::IStorageSystemOneBlock; public: diff --git a/src/Storages/System/StorageSystemAsyncLoader.cpp b/src/Storages/System/StorageSystemAsyncLoader.cpp index c56a3c3ce78..a7ffa282429 100644 --- a/src/Storages/System/StorageSystemAsyncLoader.cpp +++ b/src/Storages/System/StorageSystemAsyncLoader.cpp @@ -74,7 +74,7 @@ ColumnsDescription StorageSystemAsyncLoader::getColumnsDescription() }; } -void StorageSystemAsyncLoader::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const +void StorageSystemAsyncLoader::fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const { TimePoint now = std::chrono::system_clock::now(); diff --git a/src/Storages/System/StorageSystemAsyncLoader.h b/src/Storages/System/StorageSystemAsyncLoader.h index fa0ce11efe3..685db264a10 100644 --- a/src/Storages/System/StorageSystemAsyncLoader.h +++ b/src/Storages/System/StorageSystemAsyncLoader.h @@ -10,7 +10,7 @@ namespace DB class Context; /// system.asynchronous_loader table. Takes data from context.getAsyncLoader() -class StorageSystemAsyncLoader final : public IStorageSystemOneBlock +class StorageSystemAsyncLoader final : public IStorageSystemOneBlock { public: std::string getName() const override { return "SystemAsyncLoader"; } @@ -20,7 +20,7 @@ public: protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const override; + void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const override; }; } diff --git a/src/Storages/System/StorageSystemAsynchronousInserts.cpp b/src/Storages/System/StorageSystemAsynchronousInserts.cpp index 20ba4d1cdfb..b5f413f2e20 100644 --- a/src/Storages/System/StorageSystemAsynchronousInserts.cpp +++ b/src/Storages/System/StorageSystemAsynchronousInserts.cpp @@ -30,7 +30,7 @@ ColumnsDescription StorageSystemAsynchronousInserts::getColumnsDescription() }; } -void StorageSystemAsynchronousInserts::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const +void StorageSystemAsynchronousInserts::fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const { using namespace std::chrono; diff --git a/src/Storages/System/StorageSystemAsynchronousInserts.h b/src/Storages/System/StorageSystemAsynchronousInserts.h index 891494ffbeb..748937cffb8 100644 --- a/src/Storages/System/StorageSystemAsynchronousInserts.h +++ b/src/Storages/System/StorageSystemAsynchronousInserts.h @@ -8,7 +8,7 @@ namespace DB /** Implements the system table `asynhronous_inserts`, * which contains information about pending asynchronous inserts in queue. */ -class StorageSystemAsynchronousInserts final : public IStorageSystemOneBlock +class StorageSystemAsynchronousInserts final : public IStorageSystemOneBlock { public: std::string getName() const override { return "SystemAsynchronousInserts"; } @@ -16,7 +16,7 @@ public: protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const override; + void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const override; }; } diff --git a/src/Storages/System/StorageSystemAsynchronousMetrics.cpp b/src/Storages/System/StorageSystemAsynchronousMetrics.cpp index 58940a7e52e..a54b577d72b 100644 --- a/src/Storages/System/StorageSystemAsynchronousMetrics.cpp +++ b/src/Storages/System/StorageSystemAsynchronousMetrics.cpp @@ -19,11 +19,11 @@ ColumnsDescription StorageSystemAsynchronousMetrics::getColumnsDescription() StorageSystemAsynchronousMetrics::StorageSystemAsynchronousMetrics(const StorageID & table_id_, const AsynchronousMetrics & async_metrics_) - : IStorageSystemOneBlock(table_id_), async_metrics(async_metrics_) + : IStorageSystemOneBlock(table_id_, getColumnsDescription()), async_metrics(async_metrics_) { } -void StorageSystemAsynchronousMetrics::fillData(MutableColumns & res_columns, ContextPtr, const SelectQueryInfo &) const +void StorageSystemAsynchronousMetrics::fillData(MutableColumns & res_columns, ContextPtr, const ActionsDAG::Node *, std::vector) const { auto async_metrics_values = async_metrics.getValues(); for (const auto & name_value : async_metrics_values) diff --git a/src/Storages/System/StorageSystemAsynchronousMetrics.h b/src/Storages/System/StorageSystemAsynchronousMetrics.h index 026377c77a0..3543b7684d7 100644 --- a/src/Storages/System/StorageSystemAsynchronousMetrics.h +++ b/src/Storages/System/StorageSystemAsynchronousMetrics.h @@ -11,7 +11,7 @@ class Context; /** Implements system table asynchronous_metrics, which allows to get values of periodically (asynchronously) updated metrics. */ -class StorageSystemAsynchronousMetrics final : public IStorageSystemOneBlock +class StorageSystemAsynchronousMetrics final : public IStorageSystemOneBlock { public: StorageSystemAsynchronousMetrics(const StorageID & table_id_, const AsynchronousMetrics & async_metrics_); @@ -24,7 +24,7 @@ private: const AsynchronousMetrics & async_metrics; protected: - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const override; + void fillData(MutableColumns & res_columns, ContextPtr, const ActionsDAG::Node *, std::vector) const override; }; } diff --git a/src/Storages/System/StorageSystemBackups.cpp b/src/Storages/System/StorageSystemBackups.cpp index 17fb56e0a92..fec92229556 100644 --- a/src/Storages/System/StorageSystemBackups.cpp +++ b/src/Storages/System/StorageSystemBackups.cpp @@ -22,6 +22,7 @@ ColumnsDescription StorageSystemBackups::getColumnsDescription() {"id", std::make_shared(), "Operation ID, can be either passed via SETTINGS id=... or be randomly generated UUID."}, {"name", std::make_shared(), "Operation name, a string like `Disk('backups', 'my_backup')`"}, {"base_backup_name", std::make_shared(), "Base Backup Operation name, a string like `Disk('backups', 'my_base_backup')`"}, + {"query_id", std::make_shared(), "Query ID of a query that started backup."}, {"status", std::make_shared(getBackupStatusEnumValues()), "Status of backup or restore operation."}, {"error", std::make_shared(), "The error message if any."}, {"start_time", std::make_shared(), "The time when operation started."}, @@ -38,12 +39,13 @@ ColumnsDescription StorageSystemBackups::getColumnsDescription() } -void StorageSystemBackups::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const +void StorageSystemBackups::fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const { size_t column_index = 0; auto & column_id = assert_cast(*res_columns[column_index++]); auto & column_name = assert_cast(*res_columns[column_index++]); auto & column_base_backup_name = assert_cast(*res_columns[column_index++]); + auto & column_query_id = assert_cast(*res_columns[column_index++]); auto & column_status = assert_cast(*res_columns[column_index++]); auto & column_error = assert_cast(*res_columns[column_index++]); auto & column_start_time = assert_cast(*res_columns[column_index++]); @@ -62,6 +64,7 @@ void StorageSystemBackups::fillData(MutableColumns & res_columns, ContextPtr con column_id.insertData(info.id.data(), info.id.size()); column_name.insertData(info.name.data(), info.name.size()); column_base_backup_name.insertData(info.base_backup_name.data(), info.base_backup_name.size()); + column_query_id.insertData(info.query_id.data(), info.query_id.size()); column_status.insertValue(static_cast(info.status)); column_error.insertData(info.error_message.data(), info.error_message.size()); column_start_time.insertValue(static_cast(std::chrono::system_clock::to_time_t(info.start_time))); diff --git a/src/Storages/System/StorageSystemBackups.h b/src/Storages/System/StorageSystemBackups.h index a081bd52b4c..48e03d12e18 100644 --- a/src/Storages/System/StorageSystemBackups.h +++ b/src/Storages/System/StorageSystemBackups.h @@ -7,7 +7,7 @@ namespace DB { /// Implements `grants` system table, which allows you to get information about grants. -class StorageSystemBackups final : public IStorageSystemOneBlock +class StorageSystemBackups final : public IStorageSystemOneBlock { public: std::string getName() const override { return "SystemBackups"; } @@ -15,7 +15,7 @@ public: protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const override; + void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const override; }; } diff --git a/src/Storages/System/StorageSystemBuildOptions.cpp b/src/Storages/System/StorageSystemBuildOptions.cpp index c12935078af..a7144299eca 100644 --- a/src/Storages/System/StorageSystemBuildOptions.cpp +++ b/src/Storages/System/StorageSystemBuildOptions.cpp @@ -17,7 +17,7 @@ ColumnsDescription StorageSystemBuildOptions::getColumnsDescription() }; } -void StorageSystemBuildOptions::fillData(MutableColumns & res_columns, ContextPtr, const SelectQueryInfo &) const +void StorageSystemBuildOptions::fillData(MutableColumns & res_columns, ContextPtr, const ActionsDAG::Node *, std::vector) const { for (auto * it = auto_config_build; *it; it += 2) { diff --git a/src/Storages/System/StorageSystemBuildOptions.h b/src/Storages/System/StorageSystemBuildOptions.h index 7c0bbf6b5fd..dbe651d7513 100644 --- a/src/Storages/System/StorageSystemBuildOptions.h +++ b/src/Storages/System/StorageSystemBuildOptions.h @@ -11,10 +11,10 @@ class Context; /** System table "build_options" with many params used for clickhouse building */ -class StorageSystemBuildOptions final : public IStorageSystemOneBlock +class StorageSystemBuildOptions final : public IStorageSystemOneBlock { protected: - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const override; + void fillData(MutableColumns & res_columns, ContextPtr, const ActionsDAG::Node *, std::vector) const override; using IStorageSystemOneBlock::IStorageSystemOneBlock; diff --git a/src/Storages/System/StorageSystemCertificates.cpp b/src/Storages/System/StorageSystemCertificates.cpp index c372e4b32b0..5ec683e1784 100644 --- a/src/Storages/System/StorageSystemCertificates.cpp +++ b/src/Storages/System/StorageSystemCertificates.cpp @@ -19,19 +19,18 @@ namespace DB ColumnsDescription StorageSystemCertificates::getColumnsDescription() { - /// TODO: Fill in all the comments. return ColumnsDescription { - {"version", std::make_shared>()}, - {"serial_number", std::make_shared(std::make_shared())}, - {"signature_algo", std::make_shared(std::make_shared())}, - {"issuer", std::make_shared(std::make_shared())}, - {"not_before", std::make_shared(std::make_shared())}, - {"not_after", std::make_shared(std::make_shared())}, - {"subject", std::make_shared(std::make_shared())}, - {"pkey_algo", std::make_shared(std::make_shared())}, - {"path", std::make_shared()}, - {"default", std::make_shared>()} + {"version", std::make_shared>(), "Version of the certificate. Values are 0 for v1, 1 for v2, 2 for v3."}, + {"serial_number", std::make_shared(std::make_shared()), "Serial Number of the certificate assigned by the issuer."}, + {"signature_algo", std::make_shared(std::make_shared()), "Signature Algorithm - an algorithm used by the issuer to sign this certificate."}, + {"issuer", std::make_shared(std::make_shared()), "Issuer - an unique identifier for the Certificate Authority issuing this certificate."}, + {"not_before", std::make_shared(std::make_shared()), "The beginning of the time window when this certificate is valid."}, + {"not_after", std::make_shared(std::make_shared()), "The end of the time window when this certificate is valid."}, + {"subject", std::make_shared(std::make_shared()), "Subject - identifies the owner of the public key."}, + {"pkey_algo", std::make_shared(std::make_shared()), "Public Key Algorithm defines the algorithm the public key can be used with."}, + {"path", std::make_shared(), "Path to the file or directory containing this certificate."}, + {"default", std::make_shared>(), "Certificate is in the default certificate location."} }; } @@ -169,7 +168,7 @@ static void enumCertificates(const std::string & dir, bool def, MutableColumns & #endif -void StorageSystemCertificates::fillData([[maybe_unused]] MutableColumns & res_columns, ContextPtr/* context*/, const SelectQueryInfo &) const +void StorageSystemCertificates::fillData([[maybe_unused]] MutableColumns & res_columns, ContextPtr, const ActionsDAG::Node *, std::vector) const { #if USE_SSL const auto & ca_paths = Poco::Net::SSLManager::instance().defaultServerContext()->getCAPaths(); diff --git a/src/Storages/System/StorageSystemCertificates.h b/src/Storages/System/StorageSystemCertificates.h index f8c8477c998..bc3fef83853 100644 --- a/src/Storages/System/StorageSystemCertificates.h +++ b/src/Storages/System/StorageSystemCertificates.h @@ -13,7 +13,7 @@ class Cluster; * that allows to obtain information about available certificates * and their sources. */ -class StorageSystemCertificates final : public IStorageSystemOneBlock +class StorageSystemCertificates final : public IStorageSystemOneBlock { public: std::string getName() const override { return "SystemCertificates"; } @@ -23,7 +23,7 @@ public: protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const override; + void fillData(MutableColumns & res_columns, ContextPtr, const ActionsDAG::Node *, std::vector) const override; }; } diff --git a/src/Storages/System/StorageSystemClusters.cpp b/src/Storages/System/StorageSystemClusters.cpp index 6172d4329cd..3c01b4717cc 100644 --- a/src/Storages/System/StorageSystemClusters.cpp +++ b/src/Storages/System/StorageSystemClusters.cpp @@ -39,7 +39,7 @@ ColumnsDescription StorageSystemClusters::getColumnsDescription() return description; } -void StorageSystemClusters::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const +void StorageSystemClusters::fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const { for (const auto & name_and_cluster : context->getClusters()) writeCluster(res_columns, name_and_cluster, {}); diff --git a/src/Storages/System/StorageSystemClusters.h b/src/Storages/System/StorageSystemClusters.h index 7b568641cb2..0f7c792261d 100644 --- a/src/Storages/System/StorageSystemClusters.h +++ b/src/Storages/System/StorageSystemClusters.h @@ -15,7 +15,7 @@ class Cluster; * that allows to obtain information about available clusters * (which may be specified in Distributed tables). */ -class StorageSystemClusters final : public IStorageSystemOneBlock +class StorageSystemClusters final : public IStorageSystemOneBlock { public: std::string getName() const override { return "SystemClusters"; } @@ -26,7 +26,7 @@ protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; using NameAndCluster = std::pair>; - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const override; + void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const override; static void writeCluster(MutableColumns & res_columns, const NameAndCluster & name_and_cluster, const std::vector & is_active); }; diff --git a/src/Storages/System/StorageSystemCollations.cpp b/src/Storages/System/StorageSystemCollations.cpp index 2e5d11943af..5ef075e53be 100644 --- a/src/Storages/System/StorageSystemCollations.cpp +++ b/src/Storages/System/StorageSystemCollations.cpp @@ -1,6 +1,8 @@ #include #include +#include #include +#include namespace DB { @@ -14,7 +16,7 @@ ColumnsDescription StorageSystemCollations::getColumnsDescription() }; } -void StorageSystemCollations::fillData(MutableColumns & res_columns, ContextPtr, const SelectQueryInfo &) const +void StorageSystemCollations::fillData(MutableColumns & res_columns, ContextPtr, const ActionsDAG::Node *, std::vector) const { for (const auto & [locale, lang]: AvailableCollationLocales::instance().getAvailableCollations()) { diff --git a/src/Storages/System/StorageSystemCollations.h b/src/Storages/System/StorageSystemCollations.h index 1fc0ff0e024..449e0d8ac06 100644 --- a/src/Storages/System/StorageSystemCollations.h +++ b/src/Storages/System/StorageSystemCollations.h @@ -5,10 +5,10 @@ namespace DB { -class StorageSystemCollations final : public IStorageSystemOneBlock +class StorageSystemCollations final : public IStorageSystemOneBlock { protected: - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const override; + void fillData(MutableColumns & res_columns, ContextPtr, const ActionsDAG::Node *, std::vector) const override; using IStorageSystemOneBlock::IStorageSystemOneBlock; diff --git a/src/Storages/System/StorageSystemColumns.cpp b/src/Storages/System/StorageSystemColumns.cpp index ad65f6f5476..5c96c6502af 100644 --- a/src/Storages/System/StorageSystemColumns.cpp +++ b/src/Storages/System/StorageSystemColumns.cpp @@ -16,6 +16,9 @@ #include #include #include +#include +#include +#include namespace DB @@ -28,27 +31,32 @@ StorageSystemColumns::StorageSystemColumns(const StorageID & table_id_) StorageInMemoryMetadata storage_metadata; storage_metadata.setColumns(ColumnsDescription( { - { "database", std::make_shared() }, - { "table", std::make_shared() }, - { "name", std::make_shared() }, - { "type", std::make_shared() }, - { "position", std::make_shared() }, - { "default_kind", std::make_shared() }, - { "default_expression", std::make_shared() }, - { "data_compressed_bytes", std::make_shared() }, - { "data_uncompressed_bytes", std::make_shared() }, - { "marks_bytes", std::make_shared() }, - { "comment", std::make_shared() }, - { "is_in_partition_key", std::make_shared() }, - { "is_in_sorting_key", std::make_shared() }, - { "is_in_primary_key", std::make_shared() }, - { "is_in_sampling_key", std::make_shared() }, - { "compression_codec", std::make_shared() }, - { "character_octet_length", std::make_shared(std::make_shared()) }, - { "numeric_precision", std::make_shared(std::make_shared()) }, - { "numeric_precision_radix", std::make_shared(std::make_shared()) }, - { "numeric_scale", std::make_shared(std::make_shared()) }, - { "datetime_precision", std::make_shared(std::make_shared()) }, + { "database", std::make_shared(), "Database name."}, + { "table", std::make_shared(), "Table name."}, + { "name", std::make_shared(), "Column name."}, + { "type", std::make_shared(), "Column type."}, + { "position", std::make_shared(), "Ordinal position of a column in a table starting with 1."}, + { "default_kind", std::make_shared(), "Expression type (DEFAULT, MATERIALIZED, ALIAS) for the default value, or an empty string if it is not defined."}, + { "default_expression", std::make_shared(), "Expression for the default value, or an empty string if it is not defined."}, + { "data_compressed_bytes", std::make_shared(), "The size of compressed data, in bytes."}, + { "data_uncompressed_bytes", std::make_shared(), "The size of decompressed data, in bytes."}, + { "marks_bytes", std::make_shared(), "The size of marks, in bytes."}, + { "comment", std::make_shared(), "Comment on the column, or an empty string if it is not defined."}, + { "is_in_partition_key", std::make_shared(), "Flag that indicates whether the column is in the partition expression."}, + { "is_in_sorting_key", std::make_shared(), "Flag that indicates whether the column is in the sorting key expression."}, + { "is_in_primary_key", std::make_shared(), "Flag that indicates whether the column is in the primary key expression."}, + { "is_in_sampling_key", std::make_shared(), "Flag that indicates whether the column is in the sampling key expression."}, + { "compression_codec", std::make_shared(), "Compression codec name."}, + { "character_octet_length", std::make_shared(std::make_shared()), + "Maximum length in bytes for binary data, character data, or text data and images. In ClickHouse makes sense only for FixedString data type. Otherwise, the NULL value is returned."}, + { "numeric_precision", std::make_shared(std::make_shared()), + "Accuracy of approximate numeric data, exact numeric data, integer data, or monetary data. In ClickHouse it is bit width for integer types and decimal precision for Decimal types. Otherwise, the NULL value is returned."}, + { "numeric_precision_radix", std::make_shared(std::make_shared()), + "The base of the number system is the accuracy of approximate numeric data, exact numeric data, integer data or monetary data. In ClickHouse it's 2 for integer types and 10 for Decimal types. Otherwise, the NULL value is returned."}, + { "numeric_scale", std::make_shared(std::make_shared()), + "The scale of approximate numeric data, exact numeric data, integer data, or monetary data. In ClickHouse makes sense only for Decimal types. Otherwise, the NULL value is returned."}, + { "datetime_precision", std::make_shared(std::make_shared()), + "Decimal precision of DateTime64 data type. For other data types, the NULL value is returned."}, })); setInMemoryMetadata(storage_metadata); @@ -291,8 +299,51 @@ private: std::chrono::milliseconds lock_acquire_timeout; }; +class ReadFromSystemColumns : public SourceStepWithFilter +{ +public: + std::string getName() const override { return "ReadFromSystemColumns"; } + void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override; -Pipe StorageSystemColumns::read( + ReadFromSystemColumns( + const Names & column_names_, + const SelectQueryInfo & query_info_, + const StorageSnapshotPtr & storage_snapshot_, + const ContextPtr & context_, + Block sample_block, + std::shared_ptr storage_, + std::vector columns_mask_, + size_t max_block_size_) + : SourceStepWithFilter( + DataStream{.header = std::move(sample_block)}, + column_names_, + query_info_, + storage_snapshot_, + context_) + , storage(std::move(storage_)) + , columns_mask(std::move(columns_mask_)) + , max_block_size(max_block_size_) + { + } + + void applyFilters(ActionDAGNodes added_filter_nodes) override; + +private: + std::shared_ptr storage; + std::vector columns_mask; + const size_t max_block_size; + const ActionsDAG::Node * predicate = nullptr; +}; + +void ReadFromSystemColumns::applyFilters(ActionDAGNodes added_filter_nodes) +{ + filter_actions_dag = ActionsDAG::buildFilterActionsDAG(added_filter_nodes.nodes); + if (filter_actions_dag) + predicate = filter_actions_dag->getOutputs().at(0); +} + +void StorageSystemColumns::read( + QueryPlan & query_plan, const Names & column_names, const StorageSnapshotPtr & storage_snapshot, SelectQueryInfo & query_info, @@ -306,9 +357,22 @@ Pipe StorageSystemColumns::read( auto [columns_mask, header] = getQueriedColumnsMaskAndHeader(sample_block, column_names); + + auto this_ptr = std::static_pointer_cast(shared_from_this()); + + auto reading = std::make_unique( + column_names, query_info, storage_snapshot, + std::move(context), std::move(header), std::move(this_ptr), std::move(columns_mask), max_block_size); + + query_plan.addStep(std::move(reading)); +} + +void ReadFromSystemColumns::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) +{ Block block_to_filter; Storages storages; Pipes pipes; + auto header = getOutputStream().header; { /// Add `database` column. @@ -338,12 +402,13 @@ Pipe StorageSystemColumns::read( block_to_filter.insert(ColumnWithTypeAndName(std::move(database_column_mut), std::make_shared(), "database")); /// Filter block with `database` column. - VirtualColumnUtils::filterBlockWithQuery(query_info.query, block_to_filter, context); + VirtualColumnUtils::filterBlockWithPredicate(predicate, block_to_filter, context); if (!block_to_filter.rows()) { - pipes.emplace_back(std::make_shared(header)); - return Pipe::unitePipes(std::move(pipes)); + pipes.emplace_back(std::make_shared(std::move(header))); + pipeline.init(Pipe::unitePipes(std::move(pipes))); + return; } ColumnPtr & database_column = block_to_filter.getByName("database").column; @@ -384,12 +449,13 @@ Pipe StorageSystemColumns::read( } /// Filter block with `database` and `table` columns. - VirtualColumnUtils::filterBlockWithQuery(query_info.query, block_to_filter, context); + VirtualColumnUtils::filterBlockWithPredicate(predicate, block_to_filter, context); if (!block_to_filter.rows()) { - pipes.emplace_back(std::make_shared(header)); - return Pipe::unitePipes(std::move(pipes)); + pipes.emplace_back(std::make_shared(std::move(header))); + pipeline.init(Pipe::unitePipes(std::move(pipes))); + return; } ColumnPtr filtered_database_column = block_to_filter.getByName("database").column; @@ -400,7 +466,7 @@ Pipe StorageSystemColumns::read( std::move(filtered_database_column), std::move(filtered_table_column), std::move(storages), context)); - return Pipe::unitePipes(std::move(pipes)); + pipeline.init(Pipe::unitePipes(std::move(pipes))); } } diff --git a/src/Storages/System/StorageSystemColumns.h b/src/Storages/System/StorageSystemColumns.h index 7b4b5dd8fb3..22b2541d93f 100644 --- a/src/Storages/System/StorageSystemColumns.h +++ b/src/Storages/System/StorageSystemColumns.h @@ -17,7 +17,8 @@ public: std::string getName() const override { return "SystemColumns"; } - Pipe read( + void read( + QueryPlan & query_plan, const Names & column_names, const StorageSnapshotPtr & storage_snapshot, SelectQueryInfo & query_info, diff --git a/src/Storages/System/StorageSystemContributors.cpp b/src/Storages/System/StorageSystemContributors.cpp index 860a96c4388..f928337e1bd 100644 --- a/src/Storages/System/StorageSystemContributors.cpp +++ b/src/Storages/System/StorageSystemContributors.cpp @@ -17,7 +17,7 @@ ColumnsDescription StorageSystemContributors::getColumnsDescription() }; } -void StorageSystemContributors::fillData(MutableColumns & res_columns, ContextPtr, const SelectQueryInfo &) const +void StorageSystemContributors::fillData(MutableColumns & res_columns, ContextPtr, const ActionsDAG::Node *, std::vector) const { std::vector contributors; for (auto * it = auto_contributors; *it; ++it) diff --git a/src/Storages/System/StorageSystemContributors.generated.cpp b/src/Storages/System/StorageSystemContributors.generated.cpp index b5a985fec9b..0ad22082863 100644 --- a/src/Storages/System/StorageSystemContributors.generated.cpp +++ b/src/Storages/System/StorageSystemContributors.generated.cpp @@ -12,6 +12,7 @@ const char * auto_contributors[] { "7vikpeculiar", "821008736@qq.com", "94rain", + "9611008+johnnymatthews@users.noreply.github.com", "AN", "ANDREI STAROVEROV", "AVMusorin", @@ -102,6 +103,7 @@ const char * auto_contributors[] { "Alexey Tronov", "Alexey Vasiliev", "Alexey Zatelepin", + "AlexeyGrezz", "Alexsey Shestakov", "AlfVII", "Alfonso Martinez", @@ -177,6 +179,7 @@ const char * auto_contributors[] { "Antonio Bonuccelli", "Aram Peres", "Ariel Robaldo", + "Aris Tritas", "Arsen Hakobyan", "Arslan G", "ArtCorp", @@ -251,6 +254,7 @@ const char * auto_contributors[] { "Chang Chen", "Chao Ma", "Chao Wang", + "Charlie", "CheSema", "Chebarykov Pavel", "Chen Lixiang", @@ -397,6 +401,7 @@ const char * auto_contributors[] { "Filipe Caixeta", "Filipp Ozinov", "Filippov Denis", + "Fille", "Flowyi", "Francisco Barón", "Frank Chen", @@ -454,6 +459,7 @@ const char * auto_contributors[] { "Hongbin", "Hongbin Ma", "Hosun Lee", + "HowePa", "HuFuwang", "Hui Wang", "ILya Limarenko", @@ -549,7 +555,9 @@ const char * auto_contributors[] { "Joris Giovannangeli", "Jose", "Josh Taylor", + "Joshua Hildred", "João Figueiredo", + "Juan Madurga", "Julia Kartseva", "Julian Gilyadov", "Julian Maicher", @@ -577,6 +585,7 @@ const char * auto_contributors[] { "Kirill Danshin", "Kirill Ershov", "Kirill Malev", + "Kirill Nikiforov", "Kirill Shvakov", "KitKatKKK", "Koblikov Mihail", @@ -767,6 +776,7 @@ const char * auto_contributors[] { "Nick-71", "Nickita", "Nickita Taranov", + "Nickolaj Jepsen", "Nickolay Yastrebov", "Nico Mandery", "Nico Piderman", @@ -787,6 +797,7 @@ const char * auto_contributors[] { "Nikita Tikhomirov", "Nikita Vasilev", "NikitaEvs", + "Nikolai Fedorovskikh", "Nikolai Kochetov", "Nikolai Sorokin", "Nikolay", @@ -823,6 +834,7 @@ const char * auto_contributors[] { "PHO", "Pablo Alegre", "Pablo Marcos", + "Pablo Musa", "Palash Goel", "Paramtamtam", "Patrick Zippenfenig", @@ -899,6 +911,7 @@ const char * auto_contributors[] { "Roman Vasin", "Roman Vlasenko", "Roman Zhukov", + "Ronald Bradford", "Rory Crispin", "Roy Bellingan", "Ruslan", @@ -1146,6 +1159,7 @@ const char * auto_contributors[] { "Yağızcan DeÄŸirmenci", "Yegor Andreenko", "Yegor Levankov", + "YenchangChan", "Yingchun Lai", "Yingfan Chen", "Yinzheng-Sun", @@ -1288,6 +1302,7 @@ const char * auto_contributors[] { "cnmade", "comunodi", "congbaoyangrou", + "conicliu", "copperybean", "coraxster", "cwkyaoyao", @@ -1434,6 +1449,7 @@ const char * auto_contributors[] { "jianmei zhang", "jinjunzh", "jiyoungyoooo", + "jktng", "jkuklis", "joelynch", "johanngan", @@ -1555,6 +1571,7 @@ const char * auto_contributors[] { "miha-g", "mikael", "mikepop7", + "mikhnenko", "millb", "minhthucdao", "mlkui", @@ -1644,6 +1661,7 @@ const char * auto_contributors[] { "robot-clickhouse-ci-2", "robot-metrika-test", "rodrigargar", + "rogeryk", "roman", "romanzhukov", "rondo_1895", @@ -1721,6 +1739,7 @@ const char * auto_contributors[] { "turbo jason", "tyrionhuang", "ubuntu", + "una", "unbyte", "unegare", "unknown", @@ -1834,6 +1853,7 @@ const char * auto_contributors[] { "Иванов Евгений", "Ð˜Ð»ÑŒÑ Ð˜Ñаев", "Ð˜Ð»ÑŒÑ ÐšÐ¾Ñ€Ð³ÑƒÐ½", + "Кирилл Гарбар", "Коренберг Марк", "Коренберг â˜¢ï¸ ÐœÐ°Ñ€Ðº", "Павел Литвиненко", diff --git a/src/Storages/System/StorageSystemContributors.h b/src/Storages/System/StorageSystemContributors.h index ed983c5e61f..f8d082542d2 100644 --- a/src/Storages/System/StorageSystemContributors.h +++ b/src/Storages/System/StorageSystemContributors.h @@ -9,10 +9,10 @@ class Context; /** System table "contributors" with list of clickhouse contributors */ -class StorageSystemContributors final : public IStorageSystemOneBlock +class StorageSystemContributors final : public IStorageSystemOneBlock { protected: - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const override; + void fillData(MutableColumns & res_columns, ContextPtr, const ActionsDAG::Node *, std::vector) const override; using IStorageSystemOneBlock::IStorageSystemOneBlock; diff --git a/src/Storages/System/StorageSystemCurrentRoles.cpp b/src/Storages/System/StorageSystemCurrentRoles.cpp index 88bdf088175..bfa3a7c2e18 100644 --- a/src/Storages/System/StorageSystemCurrentRoles.cpp +++ b/src/Storages/System/StorageSystemCurrentRoles.cpp @@ -22,7 +22,7 @@ ColumnsDescription StorageSystemCurrentRoles::getColumnsDescription() } -void StorageSystemCurrentRoles::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const +void StorageSystemCurrentRoles::fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const { auto roles_info = context->getRolesInfo(); auto user = context->getUser(); diff --git a/src/Storages/System/StorageSystemCurrentRoles.h b/src/Storages/System/StorageSystemCurrentRoles.h index 4cc9b11d3f4..db1245e0ea7 100644 --- a/src/Storages/System/StorageSystemCurrentRoles.h +++ b/src/Storages/System/StorageSystemCurrentRoles.h @@ -8,7 +8,7 @@ namespace DB class Context; /// Implements `current_roles` system table, which allows you to get information about current roles. -class StorageSystemCurrentRoles final : public IStorageSystemOneBlock +class StorageSystemCurrentRoles final : public IStorageSystemOneBlock { public: std::string getName() const override { return "SystemCurrentRoles"; } @@ -16,7 +16,7 @@ public: protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const override; + void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const override; }; } diff --git a/src/Storages/System/StorageSystemDDLWorkerQueue.cpp b/src/Storages/System/StorageSystemDDLWorkerQueue.cpp index c133a1b597c..ac5dd6c05d0 100644 --- a/src/Storages/System/StorageSystemDDLWorkerQueue.cpp +++ b/src/Storages/System/StorageSystemDDLWorkerQueue.cpp @@ -204,7 +204,7 @@ static void fillStatusColumns(MutableColumns & res_columns, size_t & col, } -void StorageSystemDDLWorkerQueue::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const +void StorageSystemDDLWorkerQueue::fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const { auto& ddl_worker = context->getDDLWorker(); fs::path ddl_zookeeper_path = ddl_worker.getQueueDir(); diff --git a/src/Storages/System/StorageSystemDDLWorkerQueue.h b/src/Storages/System/StorageSystemDDLWorkerQueue.h index 871bb706f94..eaffb488322 100644 --- a/src/Storages/System/StorageSystemDDLWorkerQueue.h +++ b/src/Storages/System/StorageSystemDDLWorkerQueue.h @@ -11,10 +11,10 @@ class Context; /** System table "distributed_ddl_queue" with list of queries that are currently in the DDL worker queue. */ -class StorageSystemDDLWorkerQueue final : public IStorageSystemOneBlock +class StorageSystemDDLWorkerQueue final : public IStorageSystemOneBlock { protected: - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const override; + void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const override; using IStorageSystemOneBlock::IStorageSystemOneBlock; diff --git a/src/Storages/System/StorageSystemDNSCache.cpp b/src/Storages/System/StorageSystemDNSCache.cpp new file mode 100644 index 00000000000..6fb83597908 --- /dev/null +++ b/src/Storages/System/StorageSystemDNSCache.cpp @@ -0,0 +1,61 @@ +#include +#include +#include +#include +#include +#include +#include +#include "StorageSystemDNSCache.h" + +namespace DB +{ + +static DataTypePtr getIPFamilyEnumType() +{ + return std::make_shared( + DataTypeEnum8::Values + { + {"IPv4", static_cast(Poco::Net::AddressFamily::IPv4)}, + {"IPv6", static_cast(Poco::Net::AddressFamily::IPv6)}, + {"UNIX_LOCAL", static_cast(Poco::Net::AddressFamily::UNIX_LOCAL)}, + }); +} + +ColumnsDescription StorageSystemDNSCache::getColumnsDescription() +{ + return ColumnsDescription + { + {"hostname", std::make_shared(), "Hostname."}, + {"ip_address", std::make_shared(), "IP address."}, + {"ip_family", getIPFamilyEnumType(), "IP address family."}, + {"cached_at", std::make_shared(), "Record cached timestamp."}, + }; +} + +void StorageSystemDNSCache::fillData(MutableColumns & res_columns, ContextPtr, const ActionsDAG::Node *, std::vector) const +{ + using HostIPPair = std::pair; + std::unordered_set>> reported_elements; + + for (const auto & [hostname, entry] : DNSResolver::instance().cacheEntries()) + { + for (const auto &address : entry.addresses) + { + std::string ip = address.toString(); + + // Cache might report the same ip address multiple times. Report only one of them. + if (reported_elements.contains(HostIPPair(hostname, ip))) + continue; + + reported_elements.insert(HostIPPair(hostname, ip)); + + size_t i = 0; + res_columns[i++]->insert(hostname); + res_columns[i++]->insert(ip); + res_columns[i++]->insert(address.family()); + res_columns[i++]->insert(static_cast(std::chrono::system_clock::to_time_t(entry.cached_at))); + } + } +} + +} diff --git a/src/Storages/System/StorageSystemDNSCache.h b/src/Storages/System/StorageSystemDNSCache.h new file mode 100644 index 00000000000..dd24d2f35f6 --- /dev/null +++ b/src/Storages/System/StorageSystemDNSCache.h @@ -0,0 +1,25 @@ +#pragma once + +#include + + +namespace DB +{ + +class Context; + +/// system.dns_cache table. +class StorageSystemDNSCache final : public IStorageSystemOneBlock +{ +public: + std::string getName() const override { return "SystemDNSCache"; } + + static ColumnsDescription getColumnsDescription(); + +protected: + using IStorageSystemOneBlock::IStorageSystemOneBlock; + + void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const override; +}; + +} diff --git a/src/Storages/System/StorageSystemDashboards.cpp b/src/Storages/System/StorageSystemDashboards.cpp index 7c9e8b73519..23d8fcfc481 100644 --- a/src/Storages/System/StorageSystemDashboards.cpp +++ b/src/Storages/System/StorageSystemDashboards.cpp @@ -22,7 +22,7 @@ String trim(const char * text) return String(view); } -void StorageSystemDashboards::fillData(MutableColumns & res_columns, ContextPtr, const SelectQueryInfo &) const +void StorageSystemDashboards::fillData(MutableColumns & res_columns, ContextPtr, const ActionsDAG::Node *, std::vector) const { static const std::vector> dashboards { diff --git a/src/Storages/System/StorageSystemDashboards.h b/src/Storages/System/StorageSystemDashboards.h index 83a8664ad27..f3e957e06c5 100644 --- a/src/Storages/System/StorageSystemDashboards.h +++ b/src/Storages/System/StorageSystemDashboards.h @@ -12,7 +12,7 @@ namespace DB class Context; -class StorageSystemDashboards final : public IStorageSystemOneBlock +class StorageSystemDashboards final : public IStorageSystemOneBlock { public: std::string getName() const override { return "SystemDashboards"; } @@ -22,7 +22,7 @@ public: protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const override; + void fillData(MutableColumns & res_columns, ContextPtr, const ActionsDAG::Node *, std::vector) const override; }; } diff --git a/src/Storages/System/StorageSystemDataSkippingIndices.cpp b/src/Storages/System/StorageSystemDataSkippingIndices.cpp index 0c4eb197efd..ff782647c79 100644 --- a/src/Storages/System/StorageSystemDataSkippingIndices.cpp +++ b/src/Storages/System/StorageSystemDataSkippingIndices.cpp @@ -12,7 +12,10 @@ #include #include #include +#include +#include #include +#include namespace DB @@ -23,16 +26,16 @@ StorageSystemDataSkippingIndices::StorageSystemDataSkippingIndices(const Storage StorageInMemoryMetadata storage_metadata; storage_metadata.setColumns(ColumnsDescription( { - { "database", std::make_shared() }, - { "table", std::make_shared() }, - { "name", std::make_shared() }, - { "type", std::make_shared() }, - { "type_full", std::make_shared() }, - { "expr", std::make_shared() }, - { "granularity", std::make_shared() }, - { "data_compressed_bytes", std::make_shared() }, - { "data_uncompressed_bytes", std::make_shared() }, - { "marks", std::make_shared()} + { "database", std::make_shared(), "Database name."}, + { "table", std::make_shared(), "Table name."}, + { "name", std::make_shared(), "Index name."}, + { "type", std::make_shared(), "Index type."}, + { "type_full", std::make_shared(), "Index type expression from create statement."}, + { "expr", std::make_shared(), "Expression for the index calculation."}, + { "granularity", std::make_shared(), "The number of granules in the block."}, + { "data_compressed_bytes", std::make_shared(), "The size of compressed data, in bytes."}, + { "data_uncompressed_bytes", std::make_shared(), "The size of decompressed data, in bytes."}, + { "marks", std::make_shared(), "The size of marks, in bytes."} })); setInMemoryMetadata(storage_metadata); } @@ -176,7 +179,51 @@ private: DatabaseTablesIteratorPtr tables_it; }; -Pipe StorageSystemDataSkippingIndices::read( +class ReadFromSystemDataSkippingIndices : public SourceStepWithFilter +{ +public: + std::string getName() const override { return "ReadFromSystemDataSkippingIndices"; } + void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override; + + ReadFromSystemDataSkippingIndices( + const Names & column_names_, + const SelectQueryInfo & query_info_, + const StorageSnapshotPtr & storage_snapshot_, + const ContextPtr & context_, + Block sample_block, + std::shared_ptr storage_, + std::vector columns_mask_, + size_t max_block_size_) + : SourceStepWithFilter( + DataStream{.header = std::move(sample_block)}, + column_names_, + query_info_, + storage_snapshot_, + context_) + , storage(std::move(storage_)) + , columns_mask(std::move(columns_mask_)) + , max_block_size(max_block_size_) + { + } + + void applyFilters(ActionDAGNodes added_filter_nodes) override; + +private: + std::shared_ptr storage; + std::vector columns_mask; + const size_t max_block_size; + const ActionsDAG::Node * predicate = nullptr; +}; + +void ReadFromSystemDataSkippingIndices::applyFilters(ActionDAGNodes added_filter_nodes) +{ + filter_actions_dag = ActionsDAG::buildFilterActionsDAG(added_filter_nodes.nodes); + if (filter_actions_dag) + predicate = filter_actions_dag->getOutputs().at(0); +} + +void StorageSystemDataSkippingIndices::read( + QueryPlan & query_plan, const Names & column_names, const StorageSnapshotPtr & storage_snapshot, SelectQueryInfo & query_info, @@ -190,6 +237,17 @@ Pipe StorageSystemDataSkippingIndices::read( auto [columns_mask, header] = getQueriedColumnsMaskAndHeader(sample_block, column_names); + auto this_ptr = std::static_pointer_cast(shared_from_this()); + + auto reading = std::make_unique( + column_names, query_info, storage_snapshot, + std::move(context), std::move(header), std::move(this_ptr), std::move(columns_mask), max_block_size); + + query_plan.addStep(std::move(reading)); +} + +void ReadFromSystemDataSkippingIndices::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) +{ MutableColumnPtr column = ColumnString::create(); const auto databases = DatabaseCatalog::instance().getDatabases(); @@ -207,11 +265,11 @@ Pipe StorageSystemDataSkippingIndices::read( /// Condition on "database" in a query acts like an index. Block block { ColumnWithTypeAndName(std::move(column), std::make_shared(), "database") }; - VirtualColumnUtils::filterBlockWithQuery(query_info.query, block, context); + VirtualColumnUtils::filterBlockWithPredicate(predicate, block, context); ColumnPtr & filtered_databases = block.getByPosition(0).column; - return Pipe(std::make_shared( - std::move(columns_mask), std::move(header), max_block_size, std::move(filtered_databases), context)); + pipeline.init(Pipe(std::make_shared( + std::move(columns_mask), getOutputStream().header, max_block_size, std::move(filtered_databases), context))); } } diff --git a/src/Storages/System/StorageSystemDataSkippingIndices.h b/src/Storages/System/StorageSystemDataSkippingIndices.h index 8a1e8c159b4..8bf1da98368 100644 --- a/src/Storages/System/StorageSystemDataSkippingIndices.h +++ b/src/Storages/System/StorageSystemDataSkippingIndices.h @@ -14,7 +14,8 @@ public: std::string getName() const override { return "SystemDataSkippingIndices"; } - Pipe read( + void read( + QueryPlan & query_plan, const Names & column_names, const StorageSnapshotPtr & storage_snapshot, SelectQueryInfo & query_info, diff --git a/src/Storages/System/StorageSystemDataTypeFamilies.cpp b/src/Storages/System/StorageSystemDataTypeFamilies.cpp index 821a43ed530..da607ade410 100644 --- a/src/Storages/System/StorageSystemDataTypeFamilies.cpp +++ b/src/Storages/System/StorageSystemDataTypeFamilies.cpp @@ -17,7 +17,7 @@ ColumnsDescription StorageSystemDataTypeFamilies::getColumnsDescription() }; } -void StorageSystemDataTypeFamilies::fillData(MutableColumns & res_columns, ContextPtr, const SelectQueryInfo &) const +void StorageSystemDataTypeFamilies::fillData(MutableColumns & res_columns, ContextPtr, const ActionsDAG::Node *, std::vector) const { const auto & factory = DataTypeFactory::instance(); auto names = factory.getAllRegisteredNames(); diff --git a/src/Storages/System/StorageSystemDataTypeFamilies.h b/src/Storages/System/StorageSystemDataTypeFamilies.h index 2cb834f6931..6322c3bcfdd 100644 --- a/src/Storages/System/StorageSystemDataTypeFamilies.h +++ b/src/Storages/System/StorageSystemDataTypeFamilies.h @@ -5,10 +5,10 @@ namespace DB { -class StorageSystemDataTypeFamilies final : public IStorageSystemOneBlock +class StorageSystemDataTypeFamilies final : public IStorageSystemOneBlock { protected: - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const override; + void fillData(MutableColumns & res_columns, ContextPtr, const ActionsDAG::Node *, std::vector) const override; using IStorageSystemOneBlock::IStorageSystemOneBlock; diff --git a/src/Storages/System/StorageSystemDatabaseEngines.cpp b/src/Storages/System/StorageSystemDatabaseEngines.cpp index 514ca6d0ab3..59ba60fa871 100644 --- a/src/Storages/System/StorageSystemDatabaseEngines.cpp +++ b/src/Storages/System/StorageSystemDatabaseEngines.cpp @@ -14,7 +14,7 @@ ColumnsDescription StorageSystemDatabaseEngines::getColumnsDescription() }; } -void StorageSystemDatabaseEngines::fillData(MutableColumns & res_columns, ContextPtr, const SelectQueryInfo &) const +void StorageSystemDatabaseEngines::fillData(MutableColumns & res_columns, ContextPtr, const ActionsDAG::Node *, std::vector) const { for (const auto & [engine, _] : DatabaseFactory::instance().getDatabaseEngines()) { diff --git a/src/Storages/System/StorageSystemDatabaseEngines.h b/src/Storages/System/StorageSystemDatabaseEngines.h index 16b517c91e6..3af13598c17 100644 --- a/src/Storages/System/StorageSystemDatabaseEngines.h +++ b/src/Storages/System/StorageSystemDatabaseEngines.h @@ -6,10 +6,10 @@ namespace DB { -class StorageSystemDatabaseEngines final : public IStorageSystemOneBlock +class StorageSystemDatabaseEngines final : public IStorageSystemOneBlock { protected: - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const override; + void fillData(MutableColumns & res_columns, ContextPtr, const ActionsDAG::Node *, std::vector) const override; using IStorageSystemOneBlock::IStorageSystemOneBlock; diff --git a/src/Storages/System/StorageSystemDatabases.cpp b/src/Storages/System/StorageSystemDatabases.cpp index 51ecb8f17ca..f5537b508ba 100644 --- a/src/Storages/System/StorageSystemDatabases.cpp +++ b/src/Storages/System/StorageSystemDatabases.cpp @@ -72,7 +72,7 @@ static String getEngineFull(const ContextPtr & ctx, const DatabasePtr & database return engine_full; } -static ColumnPtr getFilteredDatabases(const Databases & databases, const SelectQueryInfo & query_info, ContextPtr context) +static ColumnPtr getFilteredDatabases(const Databases & databases, const ActionsDAG::Node * predicate, ContextPtr context) { MutableColumnPtr name_column = ColumnString::create(); MutableColumnPtr engine_column = ColumnString::create(); @@ -94,17 +94,17 @@ static ColumnPtr getFilteredDatabases(const Databases & databases, const SelectQ ColumnWithTypeAndName(std::move(engine_column), std::make_shared(), "engine"), ColumnWithTypeAndName(std::move(uuid_column), std::make_shared(), "uuid") }; - VirtualColumnUtils::filterBlockWithQuery(query_info.query, block, context); + VirtualColumnUtils::filterBlockWithPredicate(predicate, block, context); return block.getByPosition(0).column; } -void StorageSystemDatabases::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const +void StorageSystemDatabases::fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node * predicate, std::vector columns_mask) const { const auto access = context->getAccess(); const bool check_access_for_databases = !access->isGranted(AccessType::SHOW_DATABASES); const auto databases = DatabaseCatalog::instance().getDatabases(); - ColumnPtr filtered_databases_column = getFilteredDatabases(databases, query_info, context); + ColumnPtr filtered_databases_column = getFilteredDatabases(databases, predicate, context); for (size_t i = 0; i < filtered_databases_column->size(); ++i) { @@ -120,7 +120,6 @@ void StorageSystemDatabases::fillData(MutableColumns & res_columns, ContextPtr c size_t src_index = 0; size_t res_index = 0; - const auto & columns_mask = query_info.columns_mask; if (columns_mask[src_index++]) res_columns[res_index++]->insert(database_name); if (columns_mask[src_index++]) diff --git a/src/Storages/System/StorageSystemDatabases.h b/src/Storages/System/StorageSystemDatabases.h index 2fd9ccdc970..fa55f0aea32 100644 --- a/src/Storages/System/StorageSystemDatabases.h +++ b/src/Storages/System/StorageSystemDatabases.h @@ -11,7 +11,7 @@ class Context; /** Implements `databases` system table, which allows you to get information about all databases. */ -class StorageSystemDatabases final : public IStorageSystemOneBlock +class StorageSystemDatabases final : public IStorageSystemOneBlock { public: std::string getName() const override @@ -26,7 +26,7 @@ protected: bool supportsColumnsMask() const override { return true; } - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const override; + void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node * predicate, std::vector columns_mask) const override; }; } diff --git a/src/Storages/System/StorageSystemDetachedParts.cpp b/src/Storages/System/StorageSystemDetachedParts.cpp index a9cd5f2610a..1eb79744022 100644 --- a/src/Storages/System/StorageSystemDetachedParts.cpp +++ b/src/Storages/System/StorageSystemDetachedParts.cpp @@ -11,7 +11,9 @@ #include #include #include -#include +#include +#include +#include #include @@ -269,23 +271,69 @@ StorageSystemDetachedParts::StorageSystemDetachedParts(const StorageID & table_i { StorageInMemoryMetadata storage_metadata; storage_metadata.setColumns(ColumnsDescription{{ - {"database", std::make_shared()}, - {"table", std::make_shared()}, - {"partition_id", std::make_shared(std::make_shared())}, - {"name", std::make_shared()}, - {"bytes_on_disk", std::make_shared()}, - {"modification_time",std::make_shared()}, - {"disk", std::make_shared()}, - {"path", std::make_shared()}, - {"reason", std::make_shared(std::make_shared())}, - {"min_block_number", std::make_shared(std::make_shared())}, - {"max_block_number", std::make_shared(std::make_shared())}, - {"level", std::make_shared(std::make_shared())}, + {"database", std::make_shared(), "The name of the database this part belongs to."}, + {"table", std::make_shared(), "The name of the table this part belongs to."}, + {"partition_id", std::make_shared(std::make_shared()), "The identifier of the partition this part belongs to."}, + {"name", std::make_shared(), "The name of the part."}, + {"bytes_on_disk", std::make_shared(), "Total size of all the data part files in bytes."}, + {"modification_time",std::make_shared(), "The time the directory with the data part was modified. This usually corresponds to the time when detach happened."}, + {"disk", std::make_shared(), "The name of the disk that stores this data part."}, + {"path", std::make_shared(), "The path of the disk to the file of this data part."}, + {"reason", std::make_shared(std::make_shared()), "The explanation why this part was detached."}, + {"min_block_number", std::make_shared(std::make_shared()), "The minimum number of data parts that make up the current part after merging."}, + {"max_block_number", std::make_shared(std::make_shared()), "The maximum number of data parts that make up the current part after merging."}, + {"level", std::make_shared(std::make_shared()), "Depth of the merge tree. Zero means that the current part was created by insert rather than by merging other parts."}, }}); setInMemoryMetadata(storage_metadata); } -Pipe StorageSystemDetachedParts::read( +class ReadFromSystemDetachedParts : public SourceStepWithFilter +{ +public: + ReadFromSystemDetachedParts( + const Names & column_names_, + const SelectQueryInfo & query_info_, + const StorageSnapshotPtr & storage_snapshot_, + const ContextPtr & context_, + Block sample_block, + std::shared_ptr storage_, + std::vector columns_mask_, + size_t max_block_size_, + size_t num_streams_) + : SourceStepWithFilter( + DataStream{.header = std::move(sample_block)}, + column_names_, + query_info_, + storage_snapshot_, + context_) + , storage(std::move(storage_)) + , columns_mask(std::move(columns_mask_)) + , max_block_size(max_block_size_) + , num_streams(num_streams_) + {} + + std::string getName() const override { return "ReadFromSystemDetachedParts"; } + void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override; + void applyFilters(ActionDAGNodes added_filter_nodes) override; + +protected: + std::shared_ptr storage; + std::vector columns_mask; + + const ActionsDAG::Node * predicate = nullptr; + const size_t max_block_size; + const size_t num_streams; +}; + +void ReadFromSystemDetachedParts::applyFilters(ActionDAGNodes added_filter_nodes) +{ + filter_actions_dag = ActionsDAG::buildFilterActionsDAG(added_filter_nodes.nodes); + if (filter_actions_dag) + predicate = filter_actions_dag->getOutputs().at(0); +} + +void StorageSystemDetachedParts::read( + QueryPlan & query_plan, const Names & column_names, const StorageSnapshotPtr & storage_snapshot, SelectQueryInfo & query_info, @@ -299,17 +347,28 @@ Pipe StorageSystemDetachedParts::read( auto [columns_mask, header] = getQueriedColumnsMaskAndHeader(sample_block, column_names); - auto state = std::make_shared(StoragesInfoStream(query_info, context)); + auto this_ptr = std::static_pointer_cast(shared_from_this()); + + auto reading = std::make_unique( + column_names, query_info, storage_snapshot, + std::move(context), std::move(header), std::move(this_ptr), std::move(columns_mask), max_block_size, num_streams); + + query_plan.addStep(std::move(reading)); +} + +void ReadFromSystemDetachedParts::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) +{ + auto state = std::make_shared(StoragesInfoStream(predicate, context)); Pipe pipe; for (size_t i = 0; i < num_streams; ++i) { - auto source = std::make_shared(header.cloneEmpty(), state, columns_mask, max_block_size); + auto source = std::make_shared(getOutputStream().header, state, columns_mask, max_block_size); pipe.addSource(std::move(source)); } - return pipe; + pipeline.init(std::move(pipe)); } } diff --git a/src/Storages/System/StorageSystemDetachedParts.h b/src/Storages/System/StorageSystemDetachedParts.h index 20ac69f0eea..52b964e3b3c 100644 --- a/src/Storages/System/StorageSystemDetachedParts.h +++ b/src/Storages/System/StorageSystemDetachedParts.h @@ -20,14 +20,15 @@ public: bool isSystemStorage() const override { return true; } protected: - Pipe read( - const Names & /* column_names */, - const StorageSnapshotPtr & storage_snapshot, - SelectQueryInfo & query_info, - ContextPtr context, - QueryProcessingStage::Enum /*processed_stage*/, - size_t /*max_block_size*/, - size_t /*num_streams*/) override; + void read( + QueryPlan & query_plan, + const Names & /* column_names */, + const StorageSnapshotPtr & storage_snapshot, + SelectQueryInfo & query_info, + ContextPtr context, + QueryProcessingStage::Enum /*processed_stage*/, + size_t /*max_block_size*/, + size_t /*num_streams*/) override; }; } diff --git a/src/Storages/System/StorageSystemDictionaries.cpp b/src/Storages/System/StorageSystemDictionaries.cpp index c2ed35c5510..8b528b4a298 100644 --- a/src/Storages/System/StorageSystemDictionaries.cpp +++ b/src/Storages/System/StorageSystemDictionaries.cpp @@ -16,7 +16,6 @@ #include #include -#include namespace DB { @@ -52,6 +51,14 @@ catch (const DB::Exception &) } +StorageSystemDictionaries::StorageSystemDictionaries(const StorageID & storage_id_, ColumnsDescription columns_description_) + : IStorageSystemOneBlock(storage_id_, std::move(columns_description_)) +{ + VirtualColumnsDescription virtuals; + virtuals.addEphemeral("key", std::make_shared(), ""); + setVirtuals(std::move(virtuals)); +} + ColumnsDescription StorageSystemDictionaries::getColumnsDescription() { return ColumnsDescription @@ -75,7 +82,7 @@ ColumnsDescription StorageSystemDictionaries::getColumnsDescription() {"attribute.names", std::make_shared(std::make_shared()), "Array of attribute names provided by the dictionary."}, {"attribute.types", std::make_shared(std::make_shared()), "Corresponding array of attribute types provided by the dictionary."}, {"bytes_allocated", std::make_shared(), "Amount of RAM allocated for the dictionary."}, - {"hierarchical_index_bytes_allocated", std::make_shared(), ""}, + {"hierarchical_index_bytes_allocated", std::make_shared(), "Amount of RAM allocated for hierarchical index."}, {"query_count", std::make_shared(), "Number of queries since the dictionary was loaded or since the last successful reboot."}, {"hit_rate", std::make_shared(), "For cache dictionaries, the percentage of uses for which the value was in the cache."}, {"found_rate", std::make_shared(), "The percentage of uses for which the value was found."}, @@ -92,14 +99,7 @@ ColumnsDescription StorageSystemDictionaries::getColumnsDescription() }; } -NamesAndTypesList StorageSystemDictionaries::getVirtuals() const -{ - return { - {"key", std::make_shared()} - }; -} - -void StorageSystemDictionaries::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & /*query_info*/) const +void StorageSystemDictionaries::fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const { const auto access = context->getAccess(); const bool check_access_for_dictionaries = access->isGranted(AccessType::SHOW_DICTIONARIES); diff --git a/src/Storages/System/StorageSystemDictionaries.h b/src/Storages/System/StorageSystemDictionaries.h index 792b3c0dd30..058b8b163d9 100644 --- a/src/Storages/System/StorageSystemDictionaries.h +++ b/src/Storages/System/StorageSystemDictionaries.h @@ -9,19 +9,17 @@ namespace DB class Context; -class StorageSystemDictionaries final : public IStorageSystemOneBlock +class StorageSystemDictionaries final : public IStorageSystemOneBlock { public: + StorageSystemDictionaries(const StorageID & storage_id_, ColumnsDescription columns_description_); + std::string getName() const override { return "SystemDictionaries"; } static ColumnsDescription getColumnsDescription(); - NamesAndTypesList getVirtuals() const override; - protected: - using IStorageSystemOneBlock::IStorageSystemOneBlock; - - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const override; + void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const override; }; } diff --git a/src/Storages/System/StorageSystemDisks.cpp b/src/Storages/System/StorageSystemDisks.cpp index 30d64156b22..eecc889f86b 100644 --- a/src/Storages/System/StorageSystemDisks.cpp +++ b/src/Storages/System/StorageSystemDisks.cpp @@ -18,19 +18,21 @@ StorageSystemDisks::StorageSystemDisks(const StorageID & table_id_) StorageInMemoryMetadata storage_metadata; storage_metadata.setColumns(ColumnsDescription( { - {"name", std::make_shared()}, - {"path", std::make_shared()}, - {"free_space", std::make_shared()}, - {"total_space", std::make_shared()}, - {"unreserved_space", std::make_shared()}, - {"keep_free_space", std::make_shared()}, - {"type", std::make_shared()}, - {"is_encrypted", std::make_shared()}, - {"is_read_only", std::make_shared()}, - {"is_write_once", std::make_shared()}, - {"is_remote", std::make_shared()}, - {"is_broken", std::make_shared()}, - {"cache_path", std::make_shared()}, + {"name", std::make_shared(), "Name of a disk in the server configuration."}, + {"path", std::make_shared(), "Path to the mount point in the file system."}, + {"free_space", std::make_shared(), "Free space on disk in bytes."}, + {"total_space", std::make_shared(), "Disk volume in bytes."}, + {"unreserved_space", std::make_shared(), "Free space which is not taken by reservations (free_space minus the size of reservations taken by merges, inserts, and other disk write operations currently running)."}, + {"keep_free_space", std::make_shared(), "Amount of disk space that should stay free on disk in bytes. Defined in the keep_free_space_bytes parameter of disk configuration."}, + {"type", std::make_shared(), "The disk type which tells where this disk stores the data - RAM, local drive or remote storage."}, + {"object_storage_type", std::make_shared(), "Type of object storage if disk type is object_storage"}, + {"metadata_type", std::make_shared(), "Type of metadata storage if disk type is object_storage"}, + {"is_encrypted", std::make_shared(), "Flag which shows whether this disk ecrypts the underlying data. "}, + {"is_read_only", std::make_shared(), "Flag which indicates that you can only perform read operations with this disk."}, + {"is_write_once", std::make_shared(), "Flag which indicates if disk is write-once. Which means that it does support BACKUP to this disk, but does not support INSERT into MergeTree table on this disk."}, + {"is_remote", std::make_shared(), "Flag which indicated what operations with this disk involve network interaction."}, + {"is_broken", std::make_shared(), "Flag which indicates if disk is broken. Broken disks will have 0 space and cannot be used."}, + {"cache_path", std::make_shared(), "The path to the cache directory on local drive in case when the disk supports caching."}, })); setInMemoryMetadata(storage_metadata); } @@ -53,6 +55,8 @@ Pipe StorageSystemDisks::read( MutableColumnPtr col_unreserved = ColumnUInt64::create(); MutableColumnPtr col_keep = ColumnUInt64::create(); MutableColumnPtr col_type = ColumnString::create(); + MutableColumnPtr col_object_storage_type = ColumnString::create(); + MutableColumnPtr col_metadata_type = ColumnString::create(); MutableColumnPtr col_is_encrypted = ColumnUInt8::create(); MutableColumnPtr col_is_read_only = ColumnUInt8::create(); MutableColumnPtr col_is_write_once = ColumnUInt8::create(); @@ -69,7 +73,9 @@ Pipe StorageSystemDisks::read( col_unreserved->insert(disk_ptr->getUnreservedSpace().value_or(std::numeric_limits::max())); col_keep->insert(disk_ptr->getKeepingFreeSpace()); auto data_source_description = disk_ptr->getDataSourceDescription(); - col_type->insert(data_source_description.toString()); + col_type->insert(magic_enum::enum_name(data_source_description.type)); + col_object_storage_type->insert(magic_enum::enum_name(data_source_description.object_storage_type)); + col_metadata_type->insert(magic_enum::enum_name(data_source_description.metadata_type)); col_is_encrypted->insert(data_source_description.is_encrypted); col_is_read_only->insert(disk_ptr->isReadOnly()); col_is_write_once->insert(disk_ptr->isWriteOnce()); @@ -91,6 +97,8 @@ Pipe StorageSystemDisks::read( res_columns.emplace_back(std::move(col_unreserved)); res_columns.emplace_back(std::move(col_keep)); res_columns.emplace_back(std::move(col_type)); + res_columns.emplace_back(std::move(col_object_storage_type)); + res_columns.emplace_back(std::move(col_metadata_type)); res_columns.emplace_back(std::move(col_is_encrypted)); res_columns.emplace_back(std::move(col_is_read_only)); res_columns.emplace_back(std::move(col_is_write_once)); diff --git a/src/Storages/System/StorageSystemDistributionQueue.cpp b/src/Storages/System/StorageSystemDistributionQueue.cpp index f0247275a8b..50c6436f316 100644 --- a/src/Storages/System/StorageSystemDistributionQueue.cpp +++ b/src/Storages/System/StorageSystemDistributionQueue.cpp @@ -107,7 +107,7 @@ ColumnsDescription StorageSystemDistributionQueue::getColumnsDescription() } -void StorageSystemDistributionQueue::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const +void StorageSystemDistributionQueue::fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node * predicate, std::vector) const { const auto access = context->getAccess(); const bool check_access_for_databases = !access->isGranted(AccessType::SHOW_TABLES); @@ -159,7 +159,7 @@ void StorageSystemDistributionQueue::fillData(MutableColumns & res_columns, Cont { col_table_to_filter, std::make_shared(), "table" }, }; - VirtualColumnUtils::filterBlockWithQuery(query_info.query, filtered_block, context); + VirtualColumnUtils::filterBlockWithPredicate(predicate, filtered_block, context); if (!filtered_block.rows()) return; diff --git a/src/Storages/System/StorageSystemDistributionQueue.h b/src/Storages/System/StorageSystemDistributionQueue.h index 477a9d6e245..159a86bf082 100644 --- a/src/Storages/System/StorageSystemDistributionQueue.h +++ b/src/Storages/System/StorageSystemDistributionQueue.h @@ -11,7 +11,7 @@ class Context; /** Implements the `distribution_queue` system table, which allows you to view the INSERT queues for the Distributed tables. */ -class StorageSystemDistributionQueue final : public IStorageSystemOneBlock +class StorageSystemDistributionQueue final : public IStorageSystemOneBlock { public: std::string getName() const override { return "SystemDistributionQueue"; } @@ -21,7 +21,7 @@ public: protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const override; + void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node * predicate, std::vector) const override; }; } diff --git a/src/Storages/System/StorageSystemDroppedTables.cpp b/src/Storages/System/StorageSystemDroppedTables.cpp index a95127847f8..528f4d8995d 100644 --- a/src/Storages/System/StorageSystemDroppedTables.cpp +++ b/src/Storages/System/StorageSystemDroppedTables.cpp @@ -29,7 +29,7 @@ ColumnsDescription StorageSystemDroppedTables::getColumnsDescription() } -void StorageSystemDroppedTables::fillData(MutableColumns & res_columns, ContextPtr, const SelectQueryInfo &) const +void StorageSystemDroppedTables::fillData(MutableColumns & res_columns, ContextPtr, const ActionsDAG::Node *, std::vector) const { auto tables_mark_dropped = DatabaseCatalog::instance().getTablesMarkedDropped(); diff --git a/src/Storages/System/StorageSystemDroppedTables.h b/src/Storages/System/StorageSystemDroppedTables.h index d7c3569eb62..4fc620ab962 100644 --- a/src/Storages/System/StorageSystemDroppedTables.h +++ b/src/Storages/System/StorageSystemDroppedTables.h @@ -6,7 +6,7 @@ namespace DB { -class StorageSystemDroppedTables final : public IStorageSystemOneBlock +class StorageSystemDroppedTables final : public IStorageSystemOneBlock { public: std::string getName() const override { return "SystemMarkedDroppedTables"; } @@ -14,7 +14,7 @@ public: protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const override; + void fillData(MutableColumns & res_columns, ContextPtr, const ActionsDAG::Node *, std::vector) const override; }; } diff --git a/src/Storages/System/StorageSystemDroppedTablesParts.cpp b/src/Storages/System/StorageSystemDroppedTablesParts.cpp index bbe99c688c9..20baeee1d3b 100644 --- a/src/Storages/System/StorageSystemDroppedTablesParts.cpp +++ b/src/Storages/System/StorageSystemDroppedTablesParts.cpp @@ -10,7 +10,7 @@ namespace DB { -StoragesDroppedInfoStream::StoragesDroppedInfoStream(const SelectQueryInfo & query_info, ContextPtr context) +StoragesDroppedInfoStream::StoragesDroppedInfoStream(const ActionsDAG::Node * predicate, ContextPtr context) : StoragesInfoStreamBase(context) { /// Will apply WHERE to subset of columns and then add more columns. @@ -73,7 +73,7 @@ StoragesDroppedInfoStream::StoragesDroppedInfoStream(const SelectQueryInfo & que if (block_to_filter.rows()) { /// Filter block_to_filter with columns 'database', 'table', 'engine', 'active'. - VirtualColumnUtils::filterBlockWithQuery(query_info.query, block_to_filter, context); + VirtualColumnUtils::filterBlockWithPredicate(predicate, block_to_filter, context); rows = block_to_filter.rows(); } diff --git a/src/Storages/System/StorageSystemDroppedTablesParts.h b/src/Storages/System/StorageSystemDroppedTablesParts.h index f548697a6a9..a44abea7285 100644 --- a/src/Storages/System/StorageSystemDroppedTablesParts.h +++ b/src/Storages/System/StorageSystemDroppedTablesParts.h @@ -9,7 +9,7 @@ namespace DB class StoragesDroppedInfoStream : public StoragesInfoStreamBase { public: - StoragesDroppedInfoStream(const SelectQueryInfo & query_info, ContextPtr context); + StoragesDroppedInfoStream(const ActionsDAG::Node * predicate, ContextPtr context); protected: bool tryLockTable(StoragesInfo &) override { @@ -30,9 +30,9 @@ public: std::string getName() const override { return "SystemDroppedTablesParts"; } protected: - std::unique_ptr getStoragesInfoStream(const SelectQueryInfo & query_info, ContextPtr context) override + std::unique_ptr getStoragesInfoStream(const ActionsDAG::Node * predicate, ContextPtr context) override { - return std::make_unique(query_info, context); + return std::make_unique(predicate, context); } }; diff --git a/src/Storages/System/StorageSystemEnabledRoles.cpp b/src/Storages/System/StorageSystemEnabledRoles.cpp index 6dbb6f18488..42806a832cf 100644 --- a/src/Storages/System/StorageSystemEnabledRoles.cpp +++ b/src/Storages/System/StorageSystemEnabledRoles.cpp @@ -23,7 +23,7 @@ ColumnsDescription StorageSystemEnabledRoles::getColumnsDescription() } -void StorageSystemEnabledRoles::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const +void StorageSystemEnabledRoles::fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const { auto roles_info = context->getRolesInfo(); auto user = context->getUser(); diff --git a/src/Storages/System/StorageSystemEnabledRoles.h b/src/Storages/System/StorageSystemEnabledRoles.h index 5367b2ccbea..1b11d025367 100644 --- a/src/Storages/System/StorageSystemEnabledRoles.h +++ b/src/Storages/System/StorageSystemEnabledRoles.h @@ -8,7 +8,7 @@ namespace DB class Context; /// Implements `enabled_roles` system table, which allows you to get information about enabled roles. -class StorageSystemEnabledRoles final : public IStorageSystemOneBlock +class StorageSystemEnabledRoles final : public IStorageSystemOneBlock { public: std::string getName() const override { return "SystemEnabledRoles"; } @@ -16,7 +16,7 @@ public: protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const override; + void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const override; }; } diff --git a/src/Storages/System/StorageSystemErrors.cpp b/src/Storages/System/StorageSystemErrors.cpp index 730e4cf05da..2da268305f8 100644 --- a/src/Storages/System/StorageSystemErrors.cpp +++ b/src/Storages/System/StorageSystemErrors.cpp @@ -25,7 +25,7 @@ ColumnsDescription StorageSystemErrors::getColumnsDescription() } -void StorageSystemErrors::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const +void StorageSystemErrors::fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const { auto add_row = [&](std::string_view name, size_t code, const auto & error, bool remote) { diff --git a/src/Storages/System/StorageSystemErrors.h b/src/Storages/System/StorageSystemErrors.h index 9e8ec628bac..bc86c085ff1 100644 --- a/src/Storages/System/StorageSystemErrors.h +++ b/src/Storages/System/StorageSystemErrors.h @@ -13,7 +13,7 @@ class Context; * Implements the `errors` system table, which shows the error code and the number of times it happens * (i.e. Exception with this code had been thrown). */ -class StorageSystemErrors final : public IStorageSystemOneBlock +class StorageSystemErrors final : public IStorageSystemOneBlock { public: std::string getName() const override { return "SystemErrors"; } @@ -23,7 +23,7 @@ public: protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; - void fillData(MutableColumns & res_columns, ContextPtr, const SelectQueryInfo &) const override; + void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const override; }; } diff --git a/src/Storages/System/StorageSystemEvents.cpp b/src/Storages/System/StorageSystemEvents.cpp index 5d9bc3a773a..822d5c77788 100644 --- a/src/Storages/System/StorageSystemEvents.cpp +++ b/src/Storages/System/StorageSystemEvents.cpp @@ -23,7 +23,7 @@ ColumnsDescription StorageSystemEvents::getColumnsDescription() return description; } -void StorageSystemEvents::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const +void StorageSystemEvents::fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const { for (ProfileEvents::Event i = ProfileEvents::Event(0), end = ProfileEvents::end(); i < end; ++i) { diff --git a/src/Storages/System/StorageSystemEvents.h b/src/Storages/System/StorageSystemEvents.h index cbd92f90d7a..9217fdfb53e 100644 --- a/src/Storages/System/StorageSystemEvents.h +++ b/src/Storages/System/StorageSystemEvents.h @@ -10,7 +10,7 @@ class Context; /** Implements `events` system table, which allows you to obtain information for profiling. */ -class StorageSystemEvents final : public IStorageSystemOneBlock +class StorageSystemEvents final : public IStorageSystemOneBlock { public: std::string getName() const override { return "SystemEvents"; } @@ -20,7 +20,7 @@ public: protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const override; + void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const override; }; } diff --git a/src/Storages/System/StorageSystemFilesystemCache.cpp b/src/Storages/System/StorageSystemFilesystemCache.cpp index d2bd085f934..cfb388bc232 100644 --- a/src/Storages/System/StorageSystemFilesystemCache.cpp +++ b/src/Storages/System/StorageSystemFilesystemCache.cpp @@ -17,30 +17,30 @@ ColumnsDescription StorageSystemFilesystemCache::getColumnsDescription() /// TODO: Fill in all the comments. return ColumnsDescription { - {"cache_name", std::make_shared()}, - {"cache_base_path", std::make_shared()}, - {"cache_path", std::make_shared()}, - {"key", std::make_shared()}, - {"file_segment_range_begin", std::make_shared()}, - {"file_segment_range_end", std::make_shared()}, - {"size", std::make_shared()}, - {"state", std::make_shared()}, - {"cache_hits", std::make_shared()}, - {"references", std::make_shared()}, - {"downloaded_size", std::make_shared()}, - {"kind", std::make_shared()}, - {"unbound", std::make_shared>()}, - {"user_id", std::make_shared()}, - {"file_size", std::make_shared(std::make_shared())}, + {"cache_name", std::make_shared(), "Name of the cache object. Can be used in `SYSTEM DESCRIBE FILESYSTEM CACHE `, `SYSTEM DROP FILESYSTEM CACHE ` commands"}, + {"cache_base_path", std::make_shared(), "Path to the base directory where all caches files (of a cache identidied by `cache_name`) are stored."}, + {"cache_path", std::make_shared(), "Path to a particular cache file, corresponding to a file segment in a source file"}, + {"key", std::make_shared(), "Cache key of the file segment"}, + {"file_segment_range_begin", std::make_shared(), "Offset corresponding to the beginning of the file segment range"}, + {"file_segment_range_end", std::make_shared(), "Offset corresponding to the (including) end of the file segment range"}, + {"size", std::make_shared(), "Size of the file segment"}, + {"state", std::make_shared(), "File segment state (DOWNLOADED, DOWNLOADING, PARTIALLY_DOWNLOADED, ...)"}, + {"cache_hits", std::make_shared(), "Number of cache hits of corresponding file segment"}, + {"references", std::make_shared(), "Number of references to corresponding file segment. Value 1 means that nobody uses it at the moment (the only existing reference is in cache storage itself)"}, + {"downloaded_size", std::make_shared(), "Downloaded size of the file segment"}, + {"kind", std::make_shared(), "File segment kind (used to distringuish between file segments added as a part of 'Temporary data in cache')"}, + {"unbound", std::make_shared>(), "Internal implementation flag"}, + {"user_id", std::make_shared(), "User id of the user which created the file segment"}, + {"file_size", std::make_shared(std::make_shared()), "File size of the file to which current file segment belongs"}, }; } StorageSystemFilesystemCache::StorageSystemFilesystemCache(const StorageID & table_id_) - : IStorageSystemOneBlock(table_id_) + : IStorageSystemOneBlock(table_id_, getColumnsDescription()) { } -void StorageSystemFilesystemCache::fillData(MutableColumns & res_columns, ContextPtr, const SelectQueryInfo &) const +void StorageSystemFilesystemCache::fillData(MutableColumns & res_columns, ContextPtr, const ActionsDAG::Node *, std::vector) const { auto caches = FileCacheFactory::instance().getAll(); diff --git a/src/Storages/System/StorageSystemFilesystemCache.h b/src/Storages/System/StorageSystemFilesystemCache.h index 4b13b375f95..ea49fd16ba2 100644 --- a/src/Storages/System/StorageSystemFilesystemCache.h +++ b/src/Storages/System/StorageSystemFilesystemCache.h @@ -29,7 +29,7 @@ namespace DB * FORMAT Vertical */ -class StorageSystemFilesystemCache final : public IStorageSystemOneBlock +class StorageSystemFilesystemCache final : public IStorageSystemOneBlock { public: explicit StorageSystemFilesystemCache(const StorageID & table_id_); @@ -39,7 +39,7 @@ public: static ColumnsDescription getColumnsDescription(); protected: - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const override; + void fillData(MutableColumns & res_columns, ContextPtr, const ActionsDAG::Node *, std::vector) const override; }; } diff --git a/src/Storages/System/StorageSystemFormats.cpp b/src/Storages/System/StorageSystemFormats.cpp index a360971e1f7..3c6a19c4f95 100644 --- a/src/Storages/System/StorageSystemFormats.cpp +++ b/src/Storages/System/StorageSystemFormats.cpp @@ -18,12 +18,13 @@ ColumnsDescription StorageSystemFormats::getColumnsDescription() }; } -void StorageSystemFormats::fillData(MutableColumns & res_columns, ContextPtr, const SelectQueryInfo &) const +void StorageSystemFormats::fillData(MutableColumns & res_columns, ContextPtr, const ActionsDAG::Node *, std::vector) const { const auto & formats = FormatFactory::instance().getAllFormats(); for (const auto & pair : formats) { - const auto & [format_name, creators] = pair; + const auto & [name, creators] = pair; + String format_name = creators.name; UInt64 has_input_format(creators.input_creator != nullptr || creators.random_access_input_creator != nullptr); UInt64 has_output_format(creators.output_creator != nullptr); UInt64 supports_parallel_parsing(creators.file_segmentation_engine_creator != nullptr || creators.random_access_input_creator != nullptr); diff --git a/src/Storages/System/StorageSystemFormats.h b/src/Storages/System/StorageSystemFormats.h index 9f9d1df1bde..f93641ee8a8 100644 --- a/src/Storages/System/StorageSystemFormats.h +++ b/src/Storages/System/StorageSystemFormats.h @@ -4,10 +4,10 @@ namespace DB { -class StorageSystemFormats final : public IStorageSystemOneBlock +class StorageSystemFormats final : public IStorageSystemOneBlock { protected: - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const override; + void fillData(MutableColumns & res_columns, ContextPtr, const ActionsDAG::Node *, std::vector) const override; using IStorageSystemOneBlock::IStorageSystemOneBlock; diff --git a/src/Storages/System/StorageSystemFunctions.cpp b/src/Storages/System/StorageSystemFunctions.cpp index 45c00e6de27..967132e4d4a 100644 --- a/src/Storages/System/StorageSystemFunctions.cpp +++ b/src/Storages/System/StorageSystemFunctions.cpp @@ -25,7 +25,7 @@ namespace ErrorCodes extern const int ACCESS_DENIED; }; -enum class FunctionOrigin : Int8 +enum class FunctionOrigin : int8_t { SYSTEM = 0, SQL_USER_DEFINED = 1, @@ -133,7 +133,7 @@ ColumnsDescription StorageSystemFunctions::getColumnsDescription() }; } -void StorageSystemFunctions::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const +void StorageSystemFunctions::fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const { const auto & functions_factory = FunctionFactory::instance(); const auto & function_names = functions_factory.getAllRegisteredNames(); diff --git a/src/Storages/System/StorageSystemFunctions.h b/src/Storages/System/StorageSystemFunctions.h index ac1129e8127..c6f85d436fc 100644 --- a/src/Storages/System/StorageSystemFunctions.h +++ b/src/Storages/System/StorageSystemFunctions.h @@ -12,7 +12,7 @@ class Context; /** Implements `functions`system table, which allows you to get a list * all normal and aggregate functions. */ -class StorageSystemFunctions final : public IStorageSystemOneBlock +class StorageSystemFunctions final : public IStorageSystemOneBlock { public: std::string getName() const override { return "SystemFunctions"; } @@ -25,7 +25,7 @@ public: protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const override; + void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const override; }; } diff --git a/src/Storages/System/StorageSystemGrants.cpp b/src/Storages/System/StorageSystemGrants.cpp index f5f3fa07e53..afa49536983 100644 --- a/src/Storages/System/StorageSystemGrants.cpp +++ b/src/Storages/System/StorageSystemGrants.cpp @@ -38,7 +38,7 @@ ColumnsDescription StorageSystemGrants::getColumnsDescription() } -void StorageSystemGrants::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const +void StorageSystemGrants::fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const { /// If "select_from_system_db_requires_grant" is enabled the access rights were already checked in InterpreterSelectQuery. const auto & access_control = context->getAccessControl(); diff --git a/src/Storages/System/StorageSystemGrants.h b/src/Storages/System/StorageSystemGrants.h index 2202b52ad5f..6bf3793c3dc 100644 --- a/src/Storages/System/StorageSystemGrants.h +++ b/src/Storages/System/StorageSystemGrants.h @@ -8,7 +8,7 @@ namespace DB class Context; /// Implements `grants` system table, which allows you to get information about grants. -class StorageSystemGrants final : public IStorageSystemOneBlock +class StorageSystemGrants final : public IStorageSystemOneBlock { public: std::string getName() const override { return "SystemGrants"; } @@ -16,7 +16,7 @@ public: protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const override; + void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const override; }; } diff --git a/src/Storages/System/StorageSystemGraphite.cpp b/src/Storages/System/StorageSystemGraphite.cpp index ffeb1b6c890..eaa386763c2 100644 --- a/src/Storages/System/StorageSystemGraphite.cpp +++ b/src/Storages/System/StorageSystemGraphite.cpp @@ -11,7 +11,11 @@ ColumnsDescription StorageSystemGraphite::getColumnsDescription() return ColumnsDescription { {"config_name", std::make_shared(), "graphite_rollup parameter name."}, - {"rule_type", std::make_shared(), ""}, + {"rule_type", std::make_shared(), + "The rule type. Possible values: RuleTypeAll = 0 - default, with regex, compatible with old scheme; " + "RuleTypePlain = 1 - plain metrics, with regex, compatible with old scheme; " + "RuleTypeTagged = 2 - tagged metrics, with regex, compatible with old scheme; " + "RuleTypeTagList = 3 - tagged metrics, with regex (converted to RuleTypeTagged from string like 'retention=10min ; env=(staging|prod)')"}, {"regexp", std::make_shared(), "A pattern for the metric name."}, {"function", std::make_shared(), "The name of the aggregating function."}, {"age", std::make_shared(), "The minimum age of the data in seconds."}, @@ -75,7 +79,7 @@ static StorageSystemGraphite::Configs getConfigs(ContextPtr context) return graphite_configs; } -void StorageSystemGraphite::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const +void StorageSystemGraphite::fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const { Configs graphite_configs = getConfigs(context); diff --git a/src/Storages/System/StorageSystemGraphite.h b/src/Storages/System/StorageSystemGraphite.h index be101181cf7..78379afac9d 100644 --- a/src/Storages/System/StorageSystemGraphite.h +++ b/src/Storages/System/StorageSystemGraphite.h @@ -10,7 +10,7 @@ namespace DB { /// Provides information about Graphite configuration. -class StorageSystemGraphite final : public IStorageSystemOneBlock +class StorageSystemGraphite final : public IStorageSystemOneBlock { public: std::string getName() const override { return "SystemGraphite"; } @@ -30,7 +30,7 @@ public: protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const override; + void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const override; }; } diff --git a/src/Storages/System/StorageSystemJemalloc.cpp b/src/Storages/System/StorageSystemJemalloc.cpp index 15543208dd9..b55e32c479c 100644 --- a/src/Storages/System/StorageSystemJemalloc.cpp +++ b/src/Storages/System/StorageSystemJemalloc.cpp @@ -115,7 +115,7 @@ Pipe StorageSystemJemallocBins::read( { storage_snapshot->check(column_names); - auto header = storage_snapshot->metadata->getSampleBlockWithVirtuals(getVirtuals()); + auto header = storage_snapshot->metadata->getSampleBlockWithVirtuals(getVirtualsList()); MutableColumns res_columns = header.cloneEmptyColumns(); fillJemallocBins(res_columns); diff --git a/src/Storages/System/StorageSystemKafkaConsumers.cpp b/src/Storages/System/StorageSystemKafkaConsumers.cpp index 95962d8de8b..86713632339 100644 --- a/src/Storages/System/StorageSystemKafkaConsumers.cpp +++ b/src/Storages/System/StorageSystemKafkaConsumers.cpp @@ -41,13 +41,13 @@ ColumnsDescription StorageSystemKafkaConsumers::getColumnsDescription() {"last_rebalance_time", std::make_shared(), "Timestamp of the most recent Kafka rebalance."}, {"num_rebalance_revocations", std::make_shared(), "Number of times the consumer was revoked its partitions."}, {"num_rebalance_assignments", std::make_shared(), "Number of times the consumer was assigned to Kafka cluster."}, - {"is_currently_used", std::make_shared(), "Consumer is in use."}, - {"last_used", std::make_shared(6)}, + {"is_currently_used", std::make_shared(), "The flag which shows whether the consumer is in use."}, + {"last_used", std::make_shared(6), "The last time this consumer was in use."}, {"rdkafka_stat", std::make_shared(), "Library internal statistic. Set statistics_interval_ms to 0 disable, default is 3000 (once in three seconds)."}, }; } -void StorageSystemKafkaConsumers::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const +void StorageSystemKafkaConsumers::fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const { auto tables_mark_dropped = DatabaseCatalog::instance().getTablesMarkedDropped(); diff --git a/src/Storages/System/StorageSystemKafkaConsumers.h b/src/Storages/System/StorageSystemKafkaConsumers.h index ae2c726849d..8d1fd504810 100644 --- a/src/Storages/System/StorageSystemKafkaConsumers.h +++ b/src/Storages/System/StorageSystemKafkaConsumers.h @@ -11,7 +11,7 @@ namespace DB { -class StorageSystemKafkaConsumers final : public IStorageSystemOneBlock +class StorageSystemKafkaConsumers final : public IStorageSystemOneBlock { public: std::string getName() const override { return "SystemKafkaConsumers"; } @@ -19,7 +19,7 @@ public: protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const override; + void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const override; }; } diff --git a/src/Storages/System/StorageSystemLicenses.cpp b/src/Storages/System/StorageSystemLicenses.cpp index c5c04b4eb94..2faf772aca4 100644 --- a/src/Storages/System/StorageSystemLicenses.cpp +++ b/src/Storages/System/StorageSystemLicenses.cpp @@ -19,7 +19,7 @@ ColumnsDescription StorageSystemLicenses::getColumnsDescription() }; } -void StorageSystemLicenses::fillData(MutableColumns & res_columns, ContextPtr, const SelectQueryInfo &) const +void StorageSystemLicenses::fillData(MutableColumns & res_columns, ContextPtr, const ActionsDAG::Node *, std::vector) const { for (const auto * it = library_licenses; *it; it += 4) { diff --git a/src/Storages/System/StorageSystemLicenses.h b/src/Storages/System/StorageSystemLicenses.h index 57a3ff201a2..ab74a590dea 100644 --- a/src/Storages/System/StorageSystemLicenses.h +++ b/src/Storages/System/StorageSystemLicenses.h @@ -10,10 +10,10 @@ class Context; /** System table "licenses" with list of licenses of 3rd party libraries */ -class StorageSystemLicenses final : public IStorageSystemOneBlock +class StorageSystemLicenses final : public IStorageSystemOneBlock { protected: - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const override; + void fillData(MutableColumns & res_columns, ContextPtr, const ActionsDAG::Node *, std::vector) const override; using IStorageSystemOneBlock::IStorageSystemOneBlock; diff --git a/src/Storages/System/StorageSystemMacros.cpp b/src/Storages/System/StorageSystemMacros.cpp index 6c1a24d152a..82408fd5a7e 100644 --- a/src/Storages/System/StorageSystemMacros.cpp +++ b/src/Storages/System/StorageSystemMacros.cpp @@ -15,7 +15,7 @@ ColumnsDescription StorageSystemMacros::getColumnsDescription() }; } -void StorageSystemMacros::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const +void StorageSystemMacros::fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const { auto macros = context->getMacros(); diff --git a/src/Storages/System/StorageSystemMacros.h b/src/Storages/System/StorageSystemMacros.h index ffbeb70796e..c272985e978 100644 --- a/src/Storages/System/StorageSystemMacros.h +++ b/src/Storages/System/StorageSystemMacros.h @@ -12,7 +12,7 @@ class Context; /** Information about macros for introspection. */ -class StorageSystemMacros final : public IStorageSystemOneBlock +class StorageSystemMacros final : public IStorageSystemOneBlock { public: std::string getName() const override { return "SystemMacros"; } @@ -22,7 +22,7 @@ public: protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const override; + void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const override; }; } diff --git a/src/Storages/System/StorageSystemMergeTreeSettings.cpp b/src/Storages/System/StorageSystemMergeTreeSettings.cpp index 85caa572edd..7781e3789a4 100644 --- a/src/Storages/System/StorageSystemMergeTreeSettings.cpp +++ b/src/Storages/System/StorageSystemMergeTreeSettings.cpp @@ -31,7 +31,7 @@ ColumnsDescription SystemMergeTreeSettings::getColumnsDescription() } template -void SystemMergeTreeSettings::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const +void SystemMergeTreeSettings::fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const { const auto & settings = replicated ? context->getReplicatedMergeTreeSettings() : context->getMergeTreeSettings(); auto constraints_and_current_profiles = context->getSettingsConstraintsAndCurrentProfiles(); diff --git a/src/Storages/System/StorageSystemMergeTreeSettings.h b/src/Storages/System/StorageSystemMergeTreeSettings.h index 48e83f0a880..e2913a7e55b 100644 --- a/src/Storages/System/StorageSystemMergeTreeSettings.h +++ b/src/Storages/System/StorageSystemMergeTreeSettings.h @@ -14,7 +14,7 @@ class Context; * which allows to get information about the current MergeTree settings. */ template -class SystemMergeTreeSettings final : public IStorageSystemOneBlock> +class SystemMergeTreeSettings final : public IStorageSystemOneBlock { public: std::string getName() const override { return replicated ? "SystemReplicatedMergeTreeSettings" : "SystemMergeTreeSettings"; } @@ -22,9 +22,9 @@ public: static ColumnsDescription getColumnsDescription(); protected: - using IStorageSystemOneBlock>::IStorageSystemOneBlock; + using IStorageSystemOneBlock::IStorageSystemOneBlock; - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const override; + void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const override; }; } diff --git a/src/Storages/System/StorageSystemMerges.cpp b/src/Storages/System/StorageSystemMerges.cpp index 4129e4c235b..0fca5dc84a2 100644 --- a/src/Storages/System/StorageSystemMerges.cpp +++ b/src/Storages/System/StorageSystemMerges.cpp @@ -16,12 +16,12 @@ ColumnsDescription StorageSystemMerges::getColumnsDescription() {"elapsed", std::make_shared(), "The time elapsed (in seconds) since the merge started."}, {"progress", std::make_shared(), "The percentage of completed work from 0 to 1."}, {"num_parts", std::make_shared(), "The number of parts to be merged."}, - {"source_part_names", std::make_shared(std::make_shared()), ""}, + {"source_part_names", std::make_shared(std::make_shared()), "The list of source parts names."}, {"result_part_name", std::make_shared(), "The name of the part that will be formed as the result of merging."}, - {"source_part_paths", std::make_shared(std::make_shared()), ""}, - {"result_part_path", std::make_shared(), ""}, - {"partition_id", std::make_shared()}, - {"partition", std::make_shared()}, + {"source_part_paths", std::make_shared(std::make_shared()), "The list of paths for each source part."}, + {"result_part_path", std::make_shared(), "The path of the part that will be formed as the result of merging."}, + {"partition_id", std::make_shared(), "The identifier of the partition where the merge is happening."}, + {"partition", std::make_shared(), "The name of the partition"}, {"is_mutation", std::make_shared(), "1 if this process is a part mutation."}, {"total_size_bytes_compressed", std::make_shared(), "The total size of the compressed data in the merged chunks."}, {"total_size_bytes_uncompressed", std::make_shared(), "The total size of compressed data in the merged chunks."}, @@ -39,7 +39,7 @@ ColumnsDescription StorageSystemMerges::getColumnsDescription() } -void StorageSystemMerges::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const +void StorageSystemMerges::fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const { const auto access = context->getAccess(); const bool check_access_for_tables = !access->isGranted(AccessType::SHOW_TABLES); diff --git a/src/Storages/System/StorageSystemMerges.h b/src/Storages/System/StorageSystemMerges.h index 961d28daf9a..fd9077c56d5 100644 --- a/src/Storages/System/StorageSystemMerges.h +++ b/src/Storages/System/StorageSystemMerges.h @@ -12,7 +12,7 @@ namespace DB class Context; -class StorageSystemMerges final : public IStorageSystemOneBlock +class StorageSystemMerges final : public IStorageSystemOneBlock { public: std::string getName() const override { return "SystemMerges"; } @@ -22,7 +22,7 @@ public: protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const override; + void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const override; }; } diff --git a/src/Storages/System/StorageSystemMetrics.cpp b/src/Storages/System/StorageSystemMetrics.cpp index 30544d66070..ae34a04cd87 100644 --- a/src/Storages/System/StorageSystemMetrics.cpp +++ b/src/Storages/System/StorageSystemMetrics.cpp @@ -25,7 +25,7 @@ ColumnsDescription StorageSystemMetrics::getColumnsDescription() return description; } -void StorageSystemMetrics::fillData(MutableColumns & res_columns, ContextPtr, const SelectQueryInfo &) const +void StorageSystemMetrics::fillData(MutableColumns & res_columns, ContextPtr, const ActionsDAG::Node *, std::vector) const { for (size_t i = 0, end = CurrentMetrics::end(); i < end; ++i) { diff --git a/src/Storages/System/StorageSystemMetrics.h b/src/Storages/System/StorageSystemMetrics.h index ec0c67cf6b7..829fc231a79 100644 --- a/src/Storages/System/StorageSystemMetrics.h +++ b/src/Storages/System/StorageSystemMetrics.h @@ -11,7 +11,7 @@ class Context; /** Implements `metrics` system table, which provides information about the operation of the server. */ -class StorageSystemMetrics final : public IStorageSystemOneBlock +class StorageSystemMetrics final : public IStorageSystemOneBlock { public: std::string getName() const override { return "SystemMetrics"; } @@ -21,7 +21,7 @@ public: protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const override; + void fillData(MutableColumns & res_columns, ContextPtr, const ActionsDAG::Node *, std::vector) const override; }; } diff --git a/src/Storages/System/StorageSystemModels.cpp b/src/Storages/System/StorageSystemModels.cpp index 03b659d79bd..8960d0625ba 100644 --- a/src/Storages/System/StorageSystemModels.cpp +++ b/src/Storages/System/StorageSystemModels.cpp @@ -13,17 +13,16 @@ namespace DB ColumnsDescription StorageSystemModels::getColumnsDescription() { - /// TODO: Fill in all the comments. return ColumnsDescription { - { "model_path", std::make_shared() }, - { "type", std::make_shared() }, - { "loading_start_time", std::make_shared() }, - { "loading_duration", std::make_shared() }, + { "model_path", std::make_shared(), "Path to trained model."}, + { "type", std::make_shared(), "Model type. Now catboost only."}, + { "loading_start_time", std::make_shared(), "The time when the loading of the model started."}, + { "loading_duration", std::make_shared(), "How much time did it take to load the model."}, }; } -void StorageSystemModels::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const +void StorageSystemModels::fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const { auto bridge_helper = std::make_unique(context); ExternalModelInfos infos = bridge_helper->listModels(); diff --git a/src/Storages/System/StorageSystemModels.h b/src/Storages/System/StorageSystemModels.h index 91fa3761743..419b623dac0 100644 --- a/src/Storages/System/StorageSystemModels.h +++ b/src/Storages/System/StorageSystemModels.h @@ -9,7 +9,7 @@ namespace DB class Context; -class StorageSystemModels final : public IStorageSystemOneBlock +class StorageSystemModels final : public IStorageSystemOneBlock { public: std::string getName() const override { return "SystemModels"; } @@ -19,7 +19,7 @@ public: protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const override; + void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const override; }; } diff --git a/src/Storages/System/StorageSystemMoves.cpp b/src/Storages/System/StorageSystemMoves.cpp index 9b749218283..2c43043d820 100644 --- a/src/Storages/System/StorageSystemMoves.cpp +++ b/src/Storages/System/StorageSystemMoves.cpp @@ -23,7 +23,7 @@ ColumnsDescription StorageSystemMoves::getColumnsDescription() } -void StorageSystemMoves::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const +void StorageSystemMoves::fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const { const auto access = context->getAccess(); const bool check_access_for_tables = !access->isGranted(AccessType::SHOW_TABLES); diff --git a/src/Storages/System/StorageSystemMoves.h b/src/Storages/System/StorageSystemMoves.h index acdd9642f8f..45b1e9c6121 100644 --- a/src/Storages/System/StorageSystemMoves.h +++ b/src/Storages/System/StorageSystemMoves.h @@ -12,7 +12,7 @@ namespace DB class Context; -class StorageSystemMoves final : public IStorageSystemOneBlock +class StorageSystemMoves final : public IStorageSystemOneBlock { public: std::string getName() const override { return "SystemMoves"; } @@ -22,7 +22,7 @@ public: protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const override; + void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const override; }; } diff --git a/src/Storages/System/StorageSystemMutations.cpp b/src/Storages/System/StorageSystemMutations.cpp index 50545a55c7f..60b80e0b0ad 100644 --- a/src/Storages/System/StorageSystemMutations.cpp +++ b/src/Storages/System/StorageSystemMutations.cpp @@ -38,7 +38,7 @@ ColumnsDescription StorageSystemMutations::getColumnsDescription() "1 if the mutation is completed, " "0 if the mutation is still in process. " }, - { "is_killed", std::make_shared() }, + { "is_killed", std::make_shared(), "Only available in ClickHouse Cloud."}, { "latest_failed_part", std::make_shared(), "The name of the most recent part that could not be mutated."}, { "latest_fail_time", std::make_shared(), "The date and time of the most recent part mutation failure."}, { "latest_fail_reason", std::make_shared(), "The exception message that caused the most recent part mutation failure."}, @@ -46,7 +46,7 @@ ColumnsDescription StorageSystemMutations::getColumnsDescription() } -void StorageSystemMutations::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const +void StorageSystemMutations::fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node * predicate, std::vector) const { const auto access = context->getAccess(); const bool check_access_for_databases = !access->isGranted(AccessType::SHOW_TABLES); @@ -100,7 +100,7 @@ void StorageSystemMutations::fillData(MutableColumns & res_columns, ContextPtr c { col_table, std::make_shared(), "table" }, }; - VirtualColumnUtils::filterBlockWithQuery(query_info.query, filtered_block, context); + VirtualColumnUtils::filterBlockWithPredicate(predicate, filtered_block, context); if (!filtered_block.rows()) return; diff --git a/src/Storages/System/StorageSystemMutations.h b/src/Storages/System/StorageSystemMutations.h index 2db6e0c17f1..c60157cd853 100644 --- a/src/Storages/System/StorageSystemMutations.h +++ b/src/Storages/System/StorageSystemMutations.h @@ -11,7 +11,7 @@ class Context; /// Implements the `mutations` system table, which provides information about the status of mutations /// in the MergeTree tables. -class StorageSystemMutations final : public IStorageSystemOneBlock +class StorageSystemMutations final : public IStorageSystemOneBlock { public: String getName() const override { return "SystemMutations"; } @@ -21,7 +21,7 @@ public: protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const override; + void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node * predicate, std::vector) const override; }; } diff --git a/src/Storages/System/StorageSystemNamedCollections.cpp b/src/Storages/System/StorageSystemNamedCollections.cpp index 25401bb751b..156fa5e5a9b 100644 --- a/src/Storages/System/StorageSystemNamedCollections.cpp +++ b/src/Storages/System/StorageSystemNamedCollections.cpp @@ -25,11 +25,11 @@ ColumnsDescription StorageSystemNamedCollections::getColumnsDescription() } StorageSystemNamedCollections::StorageSystemNamedCollections(const StorageID & table_id_) - : IStorageSystemOneBlock(table_id_) + : IStorageSystemOneBlock(table_id_, getColumnsDescription()) { } -void StorageSystemNamedCollections::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const +void StorageSystemNamedCollections::fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const { const auto & access = context->getAccess(); diff --git a/src/Storages/System/StorageSystemNamedCollections.h b/src/Storages/System/StorageSystemNamedCollections.h index 596df99be83..ab302b400fc 100644 --- a/src/Storages/System/StorageSystemNamedCollections.h +++ b/src/Storages/System/StorageSystemNamedCollections.h @@ -5,7 +5,7 @@ namespace DB { -class StorageSystemNamedCollections final : public IStorageSystemOneBlock +class StorageSystemNamedCollections final : public IStorageSystemOneBlock { public: explicit StorageSystemNamedCollections(const StorageID & table_id_); @@ -15,7 +15,7 @@ public: static ColumnsDescription getColumnsDescription(); protected: - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const override; + void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const override; }; } diff --git a/src/Storages/System/StorageSystemNumbers.cpp b/src/Storages/System/StorageSystemNumbers.cpp index b100be7cdf4..10898f79d10 100644 --- a/src/Storages/System/StorageSystemNumbers.cpp +++ b/src/Storages/System/StorageSystemNumbers.cpp @@ -19,6 +19,8 @@ StorageSystemNumbers::StorageSystemNumbers(const StorageID & table_id, bool mult : IStorage(table_id), multithreaded(multithreaded_), limit(limit_), offset(offset_) { StorageInMemoryMetadata storage_metadata; + /// This column doesn't have a comment, because otherwise it will be added to all the tables which were created via + /// CREATE TABLE test as numbers(5) storage_metadata.setColumns(ColumnsDescription({{"number", std::make_shared()}})); setInMemoryMetadata(storage_metadata); } @@ -34,7 +36,7 @@ void StorageSystemNumbers::read( size_t num_streams) { query_plan.addStep(std::make_unique( - column_names, shared_from_this(), storage_snapshot, query_info, std::move(context), max_block_size, num_streams)); + column_names, query_info, storage_snapshot, context, shared_from_this(), max_block_size, num_streams)); } } diff --git a/src/Storages/System/StorageSystemOne.cpp b/src/Storages/System/StorageSystemOne.cpp index 3091ffdb51a..936d55e61a0 100644 --- a/src/Storages/System/StorageSystemOne.cpp +++ b/src/Storages/System/StorageSystemOne.cpp @@ -15,6 +15,8 @@ StorageSystemOne::StorageSystemOne(const StorageID & table_id_) : IStorage(table_id_) { StorageInMemoryMetadata storage_metadata; + /// This column doesn't have a comment, because otherwise it will be added to all tables created via: + /// CREATE TABLE test (dummy UInt8) ENGINE = Distributed(`default`, `system.one`) storage_metadata.setColumns(ColumnsDescription({{"dummy", std::make_shared()}})); setInMemoryMetadata(storage_metadata); } diff --git a/src/Storages/System/StorageSystemPartMovesBetweenShards.cpp b/src/Storages/System/StorageSystemPartMovesBetweenShards.cpp index 1a2646d3295..9cba92bca12 100644 --- a/src/Storages/System/StorageSystemPartMovesBetweenShards.cpp +++ b/src/Storages/System/StorageSystemPartMovesBetweenShards.cpp @@ -18,33 +18,32 @@ namespace DB ColumnsDescription StorageSystemPartMovesBetweenShards::getColumnsDescription() { - /// TODO: Fill in all the comments return ColumnsDescription { /// Table properties. - { "database", std::make_shared() }, - { "table", std::make_shared() }, + { "database", std::make_shared(), "The name of the database where move is performed."}, + { "table", std::make_shared(), "The name of the table where move is performed."}, /// Constant element properties. - { "task_name", std::make_shared() }, - { "task_uuid", std::make_shared() }, - { "create_time", std::make_shared() }, - { "part_name", std::make_shared() }, - { "part_uuid", std::make_shared() }, - { "to_shard", std::make_shared() }, - { "dst_part_name", std::make_shared() }, + { "task_name", std::make_shared(), "The name of the moving task."}, + { "task_uuid", std::make_shared(), "The identifier of the moving task."}, + { "create_time", std::make_shared(), "The time when the task was created."}, + { "part_name", std::make_shared(), "The name of the part which is in a process of moving."}, + { "part_uuid", std::make_shared(), "The UUID of the part which is in a process of moving."}, + { "to_shard", std::make_shared(), "The name of the destination shard."}, + { "dst_part_name", std::make_shared(), "The result part name."}, /// Processing status of item. - { "update_time", std::make_shared() }, - { "state", std::make_shared() }, - { "rollback", std::make_shared() }, - { "num_tries", std::make_shared() }, - { "last_exception", std::make_shared() }, + { "update_time", std::make_shared(), "The last time update was performed."}, + { "state", std::make_shared(), "The current state of the move."}, + { "rollback", std::make_shared(), "The flag which indicated whether the operation was rolled back."}, + { "num_tries", std::make_shared(), "The number of tries to complete the operation."}, + { "last_exception", std::make_shared(), "The last exception name if any."}, }; } -void StorageSystemPartMovesBetweenShards::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const +void StorageSystemPartMovesBetweenShards::fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node * predicate, std::vector) const { const auto access = context->getAccess(); const bool check_access_for_databases = !access->isGranted(AccessType::SHOW_TABLES); @@ -95,7 +94,7 @@ void StorageSystemPartMovesBetweenShards::fillData(MutableColumns & res_columns, { col_table_to_filter, std::make_shared(), "table" }, }; - VirtualColumnUtils::filterBlockWithQuery(query_info.query, filtered_block, context); + VirtualColumnUtils::filterBlockWithPredicate(predicate, filtered_block, context); if (!filtered_block.rows()) return; diff --git a/src/Storages/System/StorageSystemPartMovesBetweenShards.h b/src/Storages/System/StorageSystemPartMovesBetweenShards.h index 93a26bcd1b7..6a859d4de80 100644 --- a/src/Storages/System/StorageSystemPartMovesBetweenShards.h +++ b/src/Storages/System/StorageSystemPartMovesBetweenShards.h @@ -9,7 +9,7 @@ namespace DB class Context; -class StorageSystemPartMovesBetweenShards final : public IStorageSystemOneBlock +class StorageSystemPartMovesBetweenShards final : public IStorageSystemOneBlock { public: std::string getName() const override { return "SystemShardMoves"; } @@ -19,7 +19,7 @@ public: protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const override; + void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node * predicate, std::vector) const override; }; } diff --git a/src/Storages/System/StorageSystemParts.cpp b/src/Storages/System/StorageSystemParts.cpp index b6e4ee4161e..1b800fd64a9 100644 --- a/src/Storages/System/StorageSystemParts.cpp +++ b/src/Storages/System/StorageSystemParts.cpp @@ -94,33 +94,33 @@ StorageSystemParts::StorageSystemParts(const StorageID & table_id_) {"move_ttl_info.min", std::make_shared(std::make_shared()), "Array of date and time values. Each element describes the minimum key value for a TTL MOVE rule."}, {"move_ttl_info.max", std::make_shared(std::make_shared()), "Array of date and time values. Each element describes the maximum key value for a TTL MOVE rule."}, - {"default_compression_codec", std::make_shared()}, + {"default_compression_codec", std::make_shared(), "The name of the codec used to compress this data part (in case when there is no explicit codec for columns)."}, - {"recompression_ttl_info.expression", std::make_shared(std::make_shared())}, - {"recompression_ttl_info.min", std::make_shared(std::make_shared())}, - {"recompression_ttl_info.max", std::make_shared(std::make_shared())}, + {"recompression_ttl_info.expression", std::make_shared(std::make_shared()), "The TTL expression."}, + {"recompression_ttl_info.min", std::make_shared(std::make_shared()), "The minimum value of the calculated TTL expression within this part. Used to understand whether we have at least one row with expired TTL."}, + {"recompression_ttl_info.max", std::make_shared(std::make_shared()), "The maximum value of the calculated TTL expression within this part. Used to understand whether we have all rows with expired TTL."}, - {"group_by_ttl_info.expression", std::make_shared(std::make_shared())}, - {"group_by_ttl_info.min", std::make_shared(std::make_shared())}, - {"group_by_ttl_info.max", std::make_shared(std::make_shared())}, + {"group_by_ttl_info.expression", std::make_shared(std::make_shared()), "The TTL expression."}, + {"group_by_ttl_info.min", std::make_shared(std::make_shared()), "The minimum value of the calculated TTL expression within this part. Used to understand whether we have at least one row with expired TTL."}, + {"group_by_ttl_info.max", std::make_shared(std::make_shared()), "The maximum value of the calculated TTL expression within this part. Used to understand whether we have all rows with expired TTL."}, - {"rows_where_ttl_info.expression", std::make_shared(std::make_shared())}, - {"rows_where_ttl_info.min", std::make_shared(std::make_shared())}, - {"rows_where_ttl_info.max", std::make_shared(std::make_shared())}, + {"rows_where_ttl_info.expression", std::make_shared(std::make_shared()), "The TTL expression."}, + {"rows_where_ttl_info.min", std::make_shared(std::make_shared()), "The minimum value of the calculated TTL expression within this part. Used to understand whether we have at least one row with expired TTL."}, + {"rows_where_ttl_info.max", std::make_shared(std::make_shared()), "The maximum value of the calculated TTL expression within this part. Used to understand whether we have all rows with expired TTL."}, - {"projections", std::make_shared(std::make_shared())}, + {"projections", std::make_shared(std::make_shared()), "The list of projection names calculated for this part."}, - {"visible", std::make_shared()}, - {"creation_tid", getTransactionIDDataType()}, - {"removal_tid_lock", std::make_shared()}, - {"removal_tid", getTransactionIDDataType()}, - {"creation_csn", std::make_shared()}, - {"removal_csn", std::make_shared()}, + {"visible", std::make_shared(), "Flag which indicated whether this part is visible for SELECT queries."}, + {"creation_tid", getTransactionIDDataType(), "ID of transaction that has created/is trying to create this object."}, + {"removal_tid_lock", std::make_shared(), "Hash of removal_tid, used to lock an object for removal."}, + {"removal_tid", getTransactionIDDataType(), "ID of transaction that has removed/is trying to remove this object"}, + {"creation_csn", std::make_shared(), "CSN of transaction that has created this object"}, + {"removal_csn", std::make_shared(), "CSN of transaction that has removed this object"}, - {"has_lightweight_delete", std::make_shared()}, + {"has_lightweight_delete", std::make_shared(), "The flag which indicated whether the part has lightweight delete mask."}, - {"last_removal_attempt_time", std::make_shared()}, - {"removal_state", std::make_shared()}, + {"last_removal_attempt_time", std::make_shared(), "The last time the server tried to delete this part."}, + {"removal_state", std::make_shared(), "The current state of part removal process."}, } ) { diff --git a/src/Storages/System/StorageSystemPartsBase.cpp b/src/Storages/System/StorageSystemPartsBase.cpp index 48dab8c4777..6bdfdd357e8 100644 --- a/src/Storages/System/StorageSystemPartsBase.cpp +++ b/src/Storages/System/StorageSystemPartsBase.cpp @@ -2,6 +2,9 @@ #include #include #include +#include +#include +#include #include #include #include @@ -79,7 +82,7 @@ StoragesInfo::getProjectionParts(MergeTreeData::DataPartStateVector & state, boo return data->getProjectionPartsVectorForInternalUsage({State::Active}, &state); } -StoragesInfoStream::StoragesInfoStream(const SelectQueryInfo & query_info, ContextPtr context) +StoragesInfoStream::StoragesInfoStream(const ActionsDAG::Node * predicate, ContextPtr context) : StoragesInfoStreamBase(context) { /// Will apply WHERE to subset of columns and then add more columns. @@ -111,7 +114,7 @@ StoragesInfoStream::StoragesInfoStream(const SelectQueryInfo & query_info, Conte std::move(database_column_mut), std::make_shared(), "database")); /// Filter block_to_filter with column 'database'. - VirtualColumnUtils::filterBlockWithQuery(query_info.query, block_to_filter, context); + VirtualColumnUtils::filterBlockWithPredicate(predicate, block_to_filter, context); rows = block_to_filter.rows(); /// Block contains new columns, update database_column. @@ -138,7 +141,7 @@ StoragesInfoStream::StoragesInfoStream(const SelectQueryInfo & query_info, Conte String engine_name = storage->getName(); UUID storage_uuid = storage->getStorageID().uuid; - if (database->getEngineName() == "Ordinary") + if (storage_uuid == UUIDHelpers::Nil) { SipHash hash; hash.update(database_name); @@ -190,7 +193,7 @@ StoragesInfoStream::StoragesInfoStream(const SelectQueryInfo & query_info, Conte if (rows) { /// Filter block_to_filter with columns 'database', 'table', 'engine', 'active'. - VirtualColumnUtils::filterBlockWithQuery(query_info.query, block_to_filter, context); + VirtualColumnUtils::filterBlockWithPredicate(predicate, block_to_filter, context); rows = block_to_filter.rows(); } @@ -200,8 +203,61 @@ StoragesInfoStream::StoragesInfoStream(const SelectQueryInfo & query_info, Conte storage_uuid_column = block_to_filter.getByName("uuid").column; } +class ReadFromSystemPartsBase : public SourceStepWithFilter +{ +public: + std::string getName() const override { return "ReadFromSystemPartsBase"; } + void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override; -Pipe StorageSystemPartsBase::read( + ReadFromSystemPartsBase( + const Names & column_names_, + const SelectQueryInfo & query_info_, + const StorageSnapshotPtr & storage_snapshot_, + const ContextPtr & context_, + Block sample_block, + std::shared_ptr storage_, + std::vector columns_mask_, + bool has_state_column_); + + void applyFilters(ActionDAGNodes added_filter_nodes) override; + +protected: + std::shared_ptr storage; + std::vector columns_mask; + const bool has_state_column; + const ActionsDAG::Node * predicate = nullptr; +}; + +ReadFromSystemPartsBase::ReadFromSystemPartsBase( + const Names & column_names_, + const SelectQueryInfo & query_info_, + const StorageSnapshotPtr & storage_snapshot_, + const ContextPtr & context_, + Block sample_block, + std::shared_ptr storage_, + std::vector columns_mask_, + bool has_state_column_) + : SourceStepWithFilter( + DataStream{.header = std::move(sample_block)}, + column_names_, + query_info_, + storage_snapshot_, + context_) + , storage(std::move(storage_)) + , columns_mask(std::move(columns_mask_)) + , has_state_column(has_state_column_) +{ +} + +void ReadFromSystemPartsBase::applyFilters(ActionDAGNodes added_filter_nodes) +{ + filter_actions_dag = ActionsDAG::buildFilterActionsDAG(added_filter_nodes.nodes); + if (filter_actions_dag) + predicate = filter_actions_dag->getOutputs().at(0); +} + +void StorageSystemPartsBase::read( + QueryPlan & query_plan, const Names & column_names, const StorageSnapshotPtr & storage_snapshot, SelectQueryInfo & query_info, @@ -212,29 +268,39 @@ Pipe StorageSystemPartsBase::read( { bool has_state_column = hasStateColumn(column_names, storage_snapshot); - auto stream = getStoragesInfoStream(query_info, context); - /// Create the result. Block sample = storage_snapshot->metadata->getSampleBlock(); auto [columns_mask, header] = getQueriedColumnsMaskAndHeader(sample, column_names); - MutableColumns res_columns = header.cloneEmptyColumns(); if (has_state_column) - res_columns.push_back(ColumnString::create()); + header.insert(ColumnWithTypeAndName(std::make_shared(), "_state")); + + auto this_ptr = std::static_pointer_cast(shared_from_this()); + + auto reading = std::make_unique( + column_names, query_info, storage_snapshot, + std::move(context), std::move(header), std::move(this_ptr), std::move(columns_mask), has_state_column); + + query_plan.addStep(std::move(reading)); +} + +void ReadFromSystemPartsBase::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) +{ + auto stream = storage->getStoragesInfoStream(predicate, context); + auto header = getOutputStream().header; + + MutableColumns res_columns = header.cloneEmptyColumns(); while (StoragesInfo info = stream->next()) { - processNextStorage(context, res_columns, columns_mask, info, has_state_column); + storage->processNextStorage(context, res_columns, columns_mask, info, has_state_column); } - if (has_state_column) - header.insert(ColumnWithTypeAndName(std::make_shared(), "_state")); - UInt64 num_rows = res_columns.at(0)->size(); Chunk chunk(std::move(res_columns), num_rows); - return Pipe(std::make_shared(std::move(header), std::move(chunk))); + pipeline.init(Pipe(std::make_shared(std::move(header), std::move(chunk)))); } @@ -259,12 +325,10 @@ StorageSystemPartsBase::StorageSystemPartsBase(const StorageID & table_id_, Colu StorageInMemoryMetadata storage_metadata; storage_metadata.setColumns(columns); setInMemoryMetadata(storage_metadata); + + VirtualColumnsDescription virtuals; + virtuals.addEphemeral("_state", std::make_shared(), ""); + setVirtuals(std::move(virtuals)); } -NamesAndTypesList StorageSystemPartsBase::getVirtuals() const -{ - return NamesAndTypesList{ - NameAndTypePair("_state", std::make_shared()) - }; -} } diff --git a/src/Storages/System/StorageSystemPartsBase.h b/src/Storages/System/StorageSystemPartsBase.h index 0a45d0f9dfe..0527a99b841 100644 --- a/src/Storages/System/StorageSystemPartsBase.h +++ b/src/Storages/System/StorageSystemPartsBase.h @@ -3,6 +3,7 @@ #include #include #include +#include namespace DB @@ -37,7 +38,7 @@ struct StoragesInfo class StoragesInfoStreamBase { public: - StoragesInfoStreamBase(ContextPtr context) + explicit StoragesInfoStreamBase(ContextPtr context) : query_id(context->getCurrentQueryId()), settings(context->getSettingsRef()), next_row(0), rows(0) {} @@ -114,7 +115,7 @@ protected: class StoragesInfoStream : public StoragesInfoStreamBase { public: - StoragesInfoStream(const SelectQueryInfo & query_info, ContextPtr context); + StoragesInfoStream(const ActionsDAG::Node * predicate, ContextPtr context); }; /** Implements system table 'parts' which allows to get information about data parts for tables of MergeTree family. @@ -122,7 +123,8 @@ public: class StorageSystemPartsBase : public IStorage { public: - Pipe read( + void read( + QueryPlan & query_plan, const Names & column_names, const StorageSnapshotPtr & storage_snapshot, SelectQueryInfo & query_info, @@ -131,21 +133,21 @@ public: size_t max_block_size, size_t num_streams) override; - NamesAndTypesList getVirtuals() const override; - bool isSystemStorage() const override { return true; } private: static bool hasStateColumn(const Names & column_names, const StorageSnapshotPtr & storage_snapshot); protected: + friend class ReadFromSystemPartsBase; + const FormatSettings format_settings = {}; StorageSystemPartsBase(const StorageID & table_id_, ColumnsDescription && columns); - virtual std::unique_ptr getStoragesInfoStream(const SelectQueryInfo & query_info, ContextPtr context) + virtual std::unique_ptr getStoragesInfoStream(const ActionsDAG::Node * predicate, ContextPtr context) { - return std::make_unique(query_info, context); + return std::make_unique(predicate, context); } virtual void diff --git a/src/Storages/System/StorageSystemPartsColumns.cpp b/src/Storages/System/StorageSystemPartsColumns.cpp index 833a5e1ec16..f34b0e0cfda 100644 --- a/src/Storages/System/StorageSystemPartsColumns.cpp +++ b/src/Storages/System/StorageSystemPartsColumns.cpp @@ -22,59 +22,60 @@ namespace DB StorageSystemPartsColumns::StorageSystemPartsColumns(const StorageID & table_id_) : StorageSystemPartsBase(table_id_, ColumnsDescription{ - {"partition", std::make_shared()}, - {"name", std::make_shared()}, - {"uuid", std::make_shared()}, - {"part_type", std::make_shared()}, - {"active", std::make_shared()}, - {"marks", std::make_shared()}, - {"rows", std::make_shared()}, - {"bytes_on_disk", std::make_shared()}, - {"data_compressed_bytes", std::make_shared()}, - {"data_uncompressed_bytes", std::make_shared()}, - {"marks_bytes", std::make_shared()}, - {"modification_time", std::make_shared()}, - {"remove_time", std::make_shared()}, - {"refcount", std::make_shared()}, - {"min_date", std::make_shared()}, - {"max_date", std::make_shared()}, - {"min_time", std::make_shared()}, - {"max_time", std::make_shared()}, - {"partition_id", std::make_shared()}, - {"min_block_number", std::make_shared()}, - {"max_block_number", std::make_shared()}, - {"level", std::make_shared()}, - {"data_version", std::make_shared()}, - {"primary_key_bytes_in_memory", std::make_shared()}, - {"primary_key_bytes_in_memory_allocated", std::make_shared()}, + {"partition", std::make_shared(), "The partition name."}, + {"name", std::make_shared(), "Name of the data part."}, + {"uuid", std::make_shared(), "The parts UUID."}, + {"part_type", std::make_shared(), "The data part storing format. " + "Possible values: Wide — Each column is stored in a separate file in a filesystem, Compact — All columns are stored in one file in a filesystem."}, + {"active", std::make_shared(), "Flag that indicates whether the data part is active. If a data part is active, it’s used in a table. Otherwise, it’s deleted. Inactive data parts remain after merging."}, + {"marks", std::make_shared(), "The number of marks. To get the approximate number of rows in a data part, multiply marks by the index granularity (usually 8192) (this hint does not work for adaptive granularity)."}, + {"rows", std::make_shared(), "The number of rows."}, + {"bytes_on_disk", std::make_shared(), "Total size of all the data part files in bytes."}, + {"data_compressed_bytes", std::make_shared(), "Total size of compressed data in the data part. All the auxiliary files (for example, files with marks) are not included."}, + {"data_uncompressed_bytes", std::make_shared(), "Total size of uncompressed data in the data part. All the auxiliary files (for example, files with marks) are not included."}, + {"marks_bytes", std::make_shared(),"The size of the file with marks."}, + {"modification_time", std::make_shared(), "The time the directory with the data part was modified. This usually corresponds to the time of data part creation."}, + {"remove_time", std::make_shared(), "The time when the data part became inactive."}, + {"refcount", std::make_shared(), "The number of places where the data part is used. A value greater than 2 indicates that the data part is used in queries or merges."}, + {"min_date", std::make_shared(), "The minimum value for the Date column if that is included in the partition key."}, + {"max_date", std::make_shared(), "The maximum value for the Date column if that is included in the partition key."}, + {"min_time", std::make_shared(), "The minimum value for the DateTime column if that is included in the partition key."}, + {"max_time", std::make_shared(), "The maximum value for the DateTime column if that is included in the partition key."}, + {"partition_id", std::make_shared(), "ID of the partition."}, + {"min_block_number", std::make_shared(), "The minimum number of data parts that make up the current part after merging."}, + {"max_block_number", std::make_shared(), "The maximum number of data parts that make up the current part after merging."}, + {"level", std::make_shared(), "Depth of the merge tree. Zero means that the current part was created by insert rather than by merging other parts."}, + {"data_version", std::make_shared(), "Number that is used to determine which mutations should be applied to the data part (mutations with a version higher than data_version)."}, + {"primary_key_bytes_in_memory", std::make_shared(), "The amount of memory (in bytes) used by primary key values."}, + {"primary_key_bytes_in_memory_allocated", std::make_shared(), "The amount of memory (in bytes) reserved for primary key values."}, - {"database", std::make_shared()}, - {"table", std::make_shared()}, - {"engine", std::make_shared()}, - {"disk_name", std::make_shared()}, - {"path", std::make_shared()}, + {"database", std::make_shared(), "Name of the database."}, + {"table", std::make_shared(), "Name of the table."}, + {"engine", std::make_shared(), "Name of the table engine without parameters."}, + {"disk_name", std::make_shared(), "Name of a disk that stores the data part."}, + {"path", std::make_shared(), "Absolute path to the folder with data part files."}, - {"column", std::make_shared()}, - {"type", std::make_shared()}, - {"column_position", std::make_shared()}, - {"default_kind", std::make_shared()}, - {"default_expression", std::make_shared()}, - {"column_bytes_on_disk", std::make_shared()}, - {"column_data_compressed_bytes", std::make_shared()}, - {"column_data_uncompressed_bytes", std::make_shared()}, - {"column_marks_bytes", std::make_shared()}, - {"column_modification_time", std::make_shared(std::make_shared())}, + {"column", std::make_shared(), "Name of the column."}, + {"type", std::make_shared(), "Column type."}, + {"column_position", std::make_shared(), "Ordinal position of a column in a table starting with 1."}, + {"default_kind", std::make_shared(), "Expression type (DEFAULT, MATERIALIZED, ALIAS) for the default value, or an empty string if it is not defined."}, + {"default_expression", std::make_shared(), "Expression for the default value, or an empty string if it is not defined."}, + {"column_bytes_on_disk", std::make_shared(), "Total size of the column in bytes."}, + {"column_data_compressed_bytes", std::make_shared(), "Total size of the compressed data in the column, in bytes."}, + {"column_data_uncompressed_bytes", std::make_shared(), "Total size of the decompressed data in the column, in bytes."}, + {"column_marks_bytes", std::make_shared(), "The size of the marks for column, in bytes."}, + {"column_modification_time", std::make_shared(std::make_shared()), "The last time the column was modified."}, - {"serialization_kind", std::make_shared()}, - {"substreams", std::make_shared(std::make_shared())}, - {"filenames", std::make_shared(std::make_shared())}, - {"subcolumns.names", std::make_shared(std::make_shared())}, - {"subcolumns.types", std::make_shared(std::make_shared())}, - {"subcolumns.serializations", std::make_shared(std::make_shared())}, - {"subcolumns.bytes_on_disk", std::make_shared(std::make_shared())}, - {"subcolumns.data_compressed_bytes", std::make_shared(std::make_shared())}, - {"subcolumns.data_uncompressed_bytes", std::make_shared(std::make_shared())}, - {"subcolumns.marks_bytes", std::make_shared(std::make_shared())}, + {"serialization_kind", std::make_shared(), "Kind of serialization of a column"}, + {"substreams", std::make_shared(std::make_shared()), "Names of substreams to which column is serialized"}, + {"filenames", std::make_shared(std::make_shared()), "Names of files for each substream of a column respectively"}, + {"subcolumns.names", std::make_shared(std::make_shared()), "Names of subcolumns of a column"}, + {"subcolumns.types", std::make_shared(std::make_shared()), "Types of subcolumns of a column"}, + {"subcolumns.serializations", std::make_shared(std::make_shared()), "Kinds of serialization of subcolumns of a column"}, + {"subcolumns.bytes_on_disk", std::make_shared(std::make_shared()), "Sizes in bytes for each subcolumn"}, + {"subcolumns.data_compressed_bytes", std::make_shared(std::make_shared()), "Sizes of the compressed data for each subcolumn, in bytes"}, + {"subcolumns.data_uncompressed_bytes", std::make_shared(std::make_shared()), "Sizes of the decompressed data for each subcolumn, in bytes"}, + {"subcolumns.marks_bytes", std::make_shared(std::make_shared()), "Sizes of the marks for each subcolumn of a column, in bytes"}, } ) { diff --git a/src/Storages/System/StorageSystemPrivileges.cpp b/src/Storages/System/StorageSystemPrivileges.cpp index f45f3c6ed01..58dcf62115e 100644 --- a/src/Storages/System/StorageSystemPrivileges.cpp +++ b/src/Storages/System/StorageSystemPrivileges.cpp @@ -29,6 +29,7 @@ namespace VIEW, COLUMN, NAMED_COLLECTION, + USER_NAME, }; DataTypeEnum8::Values getLevelEnumValues() @@ -41,6 +42,7 @@ namespace enum_values.emplace_back("VIEW", static_cast(VIEW)); enum_values.emplace_back("COLUMN", static_cast(COLUMN)); enum_values.emplace_back("NAMED_COLLECTION", static_cast(NAMED_COLLECTION)); + enum_values.emplace_back("USER_NAME", static_cast(USER_NAME)); return enum_values; } } @@ -66,18 +68,26 @@ const std::vector> & StorageSystemPrivileges::getAccess ColumnsDescription StorageSystemPrivileges::getColumnsDescription() { - /// TODO: Fill in all the comments. - return ColumnsDescription - { - {"privilege", std::make_shared(getAccessTypeEnumValues())}, - {"aliases", std::make_shared(std::make_shared())}, - {"level", std::make_shared(std::make_shared(getLevelEnumValues()))}, - {"parent_group", std::make_shared(std::make_shared(getAccessTypeEnumValues()))}, + return ColumnsDescription{ + {"privilege", + std::make_shared(getAccessTypeEnumValues()), + "Name of a privilege which can be used in the GRANT command."}, + {"aliases", + std::make_shared(std::make_shared()), + "List of aliases which can be used instead of the name of the privilege."}, + {"level", + std::make_shared(std::make_shared(getLevelEnumValues())), + "Level of the privilege. GLOBAL privileges can be granted only globally (ON *.*), DATABASE privileges can be granted " + "on a specific database (ON .*) or globally (ON *.*), TABLE privileges can be granted either on a specific table or " + "on a specific database or globally, and COLUMN privileges can be granted like TABLE privileges but also allow to specify columns."}, + {"parent_group", std::make_shared(std::make_shared(getAccessTypeEnumValues())), + "Parent privilege - if the parent privilege is granted then all its children privileges are considered as granted too." + }, }; } -void StorageSystemPrivileges::fillData(MutableColumns & res_columns, ContextPtr, const SelectQueryInfo &) const +void StorageSystemPrivileges::fillData(MutableColumns & res_columns, ContextPtr, const ActionsDAG::Node *, std::vector) const { size_t column_index = 0; auto & column_access_type = assert_cast(*res_columns[column_index++]).getData(); diff --git a/src/Storages/System/StorageSystemPrivileges.h b/src/Storages/System/StorageSystemPrivileges.h index 4441cf78d5c..eaef7f0db6d 100644 --- a/src/Storages/System/StorageSystemPrivileges.h +++ b/src/Storages/System/StorageSystemPrivileges.h @@ -8,7 +8,7 @@ namespace DB class Context; /// Implements `privileges` system table, which allows you to get information about access types. -class StorageSystemPrivileges final : public IStorageSystemOneBlock +class StorageSystemPrivileges final : public IStorageSystemOneBlock { public: std::string getName() const override { return "SystemPrivileges"; } @@ -17,7 +17,7 @@ public: protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const override; + void fillData(MutableColumns & res_columns, ContextPtr, const ActionsDAG::Node *, std::vector) const override; }; } diff --git a/src/Storages/System/StorageSystemProcesses.cpp b/src/Storages/System/StorageSystemProcesses.cpp index 6702e68b81e..bef98e59687 100644 --- a/src/Storages/System/StorageSystemProcesses.cpp +++ b/src/Storages/System/StorageSystemProcesses.cpp @@ -21,35 +21,35 @@ ColumnsDescription StorageSystemProcesses::getColumnsDescription() { auto description = ColumnsDescription { - {"is_initial_query", std::make_shared()}, + {"is_initial_query", std::make_shared(), "Whether this query comes directly from user or was issues by ClickHouse server in a scope of distributed query execution."}, {"user", std::make_shared(), "The user who made the query. Keep in mind that for distributed processing, queries are sent to remote servers under the default user. The field contains the username for a specific query, not for a query that this query initiated."}, {"query_id", std::make_shared(), "Query ID, if defined."}, - {"address", DataTypeFactory::instance().get("IPv6"), "The IP address the request was made from. The same for distributed processing. To track where a distributed query was originally made from, look at system.processes on the query requestor server."}, - {"port", std::make_shared()}, + {"address", DataTypeFactory::instance().get("IPv6"), "The IP address the query was made from. The same for distributed processing. To track where a distributed query was originally made from, look at system.processes on the query requestor server."}, + {"port", std::make_shared(), "The client port the query was made from."}, - {"initial_user", std::make_shared()}, - {"initial_query_id", std::make_shared()}, - {"initial_address", DataTypeFactory::instance().get("IPv6")}, - {"initial_port", std::make_shared()}, + {"initial_user", std::make_shared(), "Name of the user who ran the initial query (for distributed query execution)."}, + {"initial_query_id", std::make_shared(), "ID of the initial query (for distributed query execution)."}, + {"initial_address", DataTypeFactory::instance().get("IPv6"), "IP address that the parent query was launched from."}, + {"initial_port", std::make_shared(), "The client port that was used to make the parent query."}, - {"interface", std::make_shared()}, + {"interface", std::make_shared(), "The interface which was used to send the query. TCP = 1, HTTP = 2, GRPC = 3, MYSQL = 4, POSTGRESQL = 5, LOCAL = 6, TCP_INTERSERVER = 7."}, - {"os_user", std::make_shared()}, - {"client_hostname", std::make_shared()}, - {"client_name", std::make_shared()}, - {"client_revision", std::make_shared()}, - {"client_version_major", std::make_shared()}, - {"client_version_minor", std::make_shared()}, - {"client_version_patch", std::make_shared()}, + {"os_user", std::make_shared(), "Operating system username who runs clickhouse-client."}, + {"client_hostname", std::make_shared(), "Hostname of the client machine where the clickhouse-client or another TCP client is run."}, + {"client_name", std::make_shared(), "The clickhouse-client or another TCP client name."}, + {"client_revision", std::make_shared(), "Revision of the clickhouse-client or another TCP client."}, + {"client_version_major", std::make_shared(), "Major version of the clickhouse-client or another TCP client."}, + {"client_version_minor", std::make_shared(), "Minor version of the clickhouse-client or another TCP client."}, + {"client_version_patch", std::make_shared(), "Patch component of the clickhouse-client or another TCP client version."}, - {"http_method", std::make_shared()}, - {"http_user_agent", std::make_shared()}, - {"http_referer", std::make_shared()}, - {"forwarded_for", std::make_shared()}, + {"http_method", std::make_shared(), "HTTP method that initiated the query. Possible values: 0 — The query was launched from the TCP interface. 1 — GET method was used. 2 — POST method was used."}, + {"http_user_agent", std::make_shared(), "HTTP header UserAgent passed in the HTTP query."}, + {"http_referer", std::make_shared(), "HTTP header Referer passed in the HTTP query (contains an absolute or partial address of the page making the query)."}, + {"forwarded_for", std::make_shared(), "HTTP header X-Forwarded-For passed in the HTTP query."}, - {"quota_key", std::make_shared()}, - {"distributed_depth", std::make_shared()}, + {"quota_key", std::make_shared(), "The quota key specified in the quotas setting (see keyed)."}, + {"distributed_depth", std::make_shared(), "The number of times query was retransmitted between server nodes internally."}, {"elapsed", std::make_shared(), "The time in seconds since request execution started."}, {"is_cancelled", std::make_shared(), "Was query cancelled."}, @@ -57,18 +57,18 @@ ColumnsDescription StorageSystemProcesses::getColumnsDescription() {"read_rows", std::make_shared(), "The number of rows read from the table. For distributed processing, on the requestor server, this is the total for all remote servers."}, {"read_bytes", std::make_shared(), "The number of uncompressed bytes read from the table. For distributed processing, on the requestor server, this is the total for all remote servers."}, {"total_rows_approx", std::make_shared(), "The approximation of the total number of rows that should be read. For distributed processing, on the requestor server, this is the total for all remote servers. It can be updated during request processing, when new sources to process become known."}, - {"written_rows", std::make_shared()}, - {"written_bytes", std::make_shared()}, + {"written_rows", std::make_shared(), "The amount of rows written to the storage."}, + {"written_bytes", std::make_shared(), "The amount of bytes written to the storage."}, {"memory_usage", std::make_shared(), "Amount of RAM the query uses. It might not include some types of dedicated memory"}, - {"peak_memory_usage", std::make_shared()}, + {"peak_memory_usage", std::make_shared(), "The current peak of memory usage."}, {"query", std::make_shared(), "The query text. For INSERT, it does not include the data to insert."}, - {"query_kind", std::make_shared()}, + {"query_kind", std::make_shared(), "The type of the query - SELECT, INSERT, etc."}, - {"thread_ids", std::make_shared(std::make_shared())}, - {"ProfileEvents", std::make_shared(std::make_shared(), std::make_shared())}, - {"Settings", std::make_shared(std::make_shared(), std::make_shared())}, + {"thread_ids", std::make_shared(std::make_shared()), "The list of identificators of all threads which executed this query."}, + {"ProfileEvents", std::make_shared(std::make_shared(), std::make_shared()), "ProfileEvents calculated for this query."}, + {"Settings", std::make_shared(std::make_shared(), std::make_shared()), "The list of modified user-level settings."}, - {"current_database", std::make_shared()}, + {"current_database", std::make_shared(), "The name of the current database."}, }; description.setAliases({ @@ -81,7 +81,7 @@ ColumnsDescription StorageSystemProcesses::getColumnsDescription() return description; } -void StorageSystemProcesses::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const +void StorageSystemProcesses::fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const { ProcessList::Info info = context->getProcessList().getInfo(true, true, true); diff --git a/src/Storages/System/StorageSystemProcesses.h b/src/Storages/System/StorageSystemProcesses.h index 3017f9fd367..eb241d4b1ae 100644 --- a/src/Storages/System/StorageSystemProcesses.h +++ b/src/Storages/System/StorageSystemProcesses.h @@ -11,7 +11,7 @@ class Context; /** Implements `processes` system table, which allows you to get information about the queries that are currently executing. */ -class StorageSystemProcesses final : public IStorageSystemOneBlock +class StorageSystemProcesses final : public IStorageSystemOneBlock { public: std::string getName() const override { return "SystemProcesses"; } @@ -21,7 +21,7 @@ public: protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const override; + void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const override; }; } diff --git a/src/Storages/System/StorageSystemProjectionParts.cpp b/src/Storages/System/StorageSystemProjectionParts.cpp index b1494f2ba98..4bdcea67313 100644 --- a/src/Storages/System/StorageSystemProjectionParts.cpp +++ b/src/Storages/System/StorageSystemProjectionParts.cpp @@ -19,75 +19,71 @@ namespace DB StorageSystemProjectionParts::StorageSystemProjectionParts(const StorageID & table_id_) : StorageSystemPartsBase(table_id_, ColumnsDescription{ - {"partition", std::make_shared()}, - {"name", std::make_shared()}, - {"part_type", std::make_shared()}, - {"parent_name", std::make_shared()}, - {"parent_uuid", std::make_shared()}, - {"parent_part_type", std::make_shared()}, - {"active", std::make_shared()}, - {"marks", std::make_shared()}, - {"rows", std::make_shared()}, - {"bytes_on_disk", std::make_shared()}, - {"data_compressed_bytes", std::make_shared()}, - {"data_uncompressed_bytes", std::make_shared()}, - {"marks_bytes", std::make_shared()}, - {"parent_marks", std::make_shared()}, - {"parent_rows", std::make_shared()}, - {"parent_bytes_on_disk", std::make_shared()}, - {"parent_data_compressed_bytes", std::make_shared()}, - {"parent_data_uncompressed_bytes", std::make_shared()}, - {"parent_marks_bytes", std::make_shared()}, - {"modification_time", std::make_shared()}, - {"remove_time", std::make_shared()}, - {"refcount", std::make_shared()}, - {"min_date", std::make_shared()}, - {"max_date", std::make_shared()}, - {"min_time", std::make_shared()}, - {"max_time", std::make_shared()}, - {"partition_id", std::make_shared()}, - {"min_block_number", std::make_shared()}, - {"max_block_number", std::make_shared()}, - {"level", std::make_shared()}, - {"data_version", std::make_shared()}, - {"primary_key_bytes_in_memory", std::make_shared()}, - {"primary_key_bytes_in_memory_allocated", std::make_shared()}, - {"is_frozen", std::make_shared()}, + {"partition", std::make_shared(), "The partition name."}, + {"name", std::make_shared(), "Name of the data part."}, + {"part_type", std::make_shared(), "The data part storing format. Possible Values: Wide (a file per column) and Compact (a single file for all columns)."}, + {"parent_name", std::make_shared(), "The name of the source (parent) data part."}, + {"parent_uuid", std::make_shared(), "The UUID of the source (parent) data part."}, + {"parent_part_type", std::make_shared(), "The source (parent) data part storing format."}, + {"active", std::make_shared(), "Flag that indicates whether the data part is active. If a data part is active, it's used in a table. Otherwise, it's about to be deleted. Inactive data parts appear after merging and mutating operations."}, + {"marks", std::make_shared(), "The number of marks. To get the approximate number of rows in a data part, multiply marks by the index granularity (usually 8192) (this hint does not work for adaptive granularity)."}, + {"rows", std::make_shared(), "The number of rows."}, + {"bytes_on_disk", std::make_shared(), "Total size of all the data part files in bytes."}, + {"data_compressed_bytes", std::make_shared(), "Total size of compressed data in the data part. All the auxiliary files (for example, files with marks) are not included."}, + {"data_uncompressed_bytes", std::make_shared(), "Total size of uncompressed data in the data part. All the auxiliary files (for example, files with marks) are not included."}, + {"marks_bytes", std::make_shared(), "The size of the file with marks."}, + {"parent_marks", std::make_shared(), "The number of marks in the source (parent) part."}, + {"parent_rows", std::make_shared(), "The number of rows in the source (parent) part."}, + {"parent_bytes_on_disk", std::make_shared(), "Total size of all the source (parent) data part files in bytes."}, + {"parent_data_compressed_bytes", std::make_shared(), "Total size of compressed data in the source (parent) data part."}, + {"parent_data_uncompressed_bytes", std::make_shared(), "Total size of uncompressed data in the source (parent) data part."}, + {"parent_marks_bytes", std::make_shared(), "The size of the file with marks in the source (parent) data part."}, + {"modification_time", std::make_shared(), "The time the directory with the data part was modified. This usually corresponds to the time of data part creation."}, + {"remove_time", std::make_shared(), "The time when the data part became inactive."}, + {"refcount", std::make_shared(), "The number of places where the data part is used. A value greater than 2 indicates that the data part is used in queries or merges."}, + {"min_date", std::make_shared(), "The minimum value of the date key in the data part."}, + {"max_date", std::make_shared(), "The maximum value of the date key in the data part."}, + {"min_time", std::make_shared(), "The minimum value of the date and time key in the data part."}, + {"max_time", std::make_shared(), "The maximum value of the date and time key in the data part."}, + {"partition_id", std::make_shared(), "ID of the partition."}, + {"min_block_number", std::make_shared(), "The minimum number of data parts that make up the current part after merging."}, + {"max_block_number", std::make_shared(), "The maximum number of data parts that make up the current part after merging."}, + {"level", std::make_shared(), "Depth of the merge tree. Zero means that the current part was created by insert rather than by merging other parts."}, + {"data_version", std::make_shared(), "Number that is used to determine which mutations should be applied to the data part (mutations with a version higher than data_version)."}, + {"primary_key_bytes_in_memory", std::make_shared(), "The amount of memory (in bytes) used by primary key values."}, + {"primary_key_bytes_in_memory_allocated", std::make_shared(), "The amount of memory (in bytes) reserved for primary key values."}, + {"is_frozen", std::make_shared(), "Flag that shows that a partition data backup exists. 1, the backup exists. 0, the backup does not exist. "}, - {"database", std::make_shared()}, - {"table", std::make_shared()}, - {"engine", std::make_shared()}, - {"disk_name", std::make_shared()}, - {"path", std::make_shared()}, + {"database", std::make_shared(), "Name of the database."}, + {"table", std::make_shared(), "Name of the table."}, + {"engine", std::make_shared(), "Name of the table engine without parameters."}, + {"disk_name", std::make_shared(), "Name of a disk that stores the data part."}, + {"path", std::make_shared(), "Absolute path to the folder with data part files."}, - {"hash_of_all_files", std::make_shared()}, - {"hash_of_uncompressed_files", std::make_shared()}, - {"uncompressed_hash_of_compressed_files", std::make_shared()}, + {"hash_of_all_files", std::make_shared(), "sipHash128 of compressed files."}, + {"hash_of_uncompressed_files", std::make_shared(), "sipHash128 of uncompressed files (files with marks, index file etc.)."}, + {"uncompressed_hash_of_compressed_files", std::make_shared(), "sipHash128 of data in the compressed files as if they were uncompressed."}, - {"delete_ttl_info_min", std::make_shared()}, - {"delete_ttl_info_max", std::make_shared()}, + {"delete_ttl_info_min", std::make_shared(), "The minimum value of the date and time key for TTL DELETE rule."}, + {"delete_ttl_info_max", std::make_shared(), "The maximum value of the date and time key for TTL DELETE rule."}, - {"move_ttl_info.expression", std::make_shared(std::make_shared())}, - {"move_ttl_info.min", std::make_shared(std::make_shared())}, - {"move_ttl_info.max", std::make_shared(std::make_shared())}, + {"move_ttl_info.expression", std::make_shared(std::make_shared()), "Array of expressions. Each expression defines a TTL MOVE rule."}, + {"move_ttl_info.min", std::make_shared(std::make_shared()), "Array of date and time values. Each element describes the minimum key value for a TTL MOVE rule."}, + {"move_ttl_info.max", std::make_shared(std::make_shared()), "Array of date and time values. Each element describes the maximum key value for a TTL MOVE rule."}, - {"default_compression_codec", std::make_shared()}, + {"default_compression_codec", std::make_shared(), "The name of the codec used to compress this data part (in case when there is no explicit codec for columns)."}, - {"recompression_ttl_info.expression", std::make_shared(std::make_shared())}, - {"recompression_ttl_info.min", std::make_shared(std::make_shared())}, - {"recompression_ttl_info.max", std::make_shared(std::make_shared())}, + {"recompression_ttl_info.expression", std::make_shared(std::make_shared()), "The TTL expression."}, + {"recompression_ttl_info.min", std::make_shared(std::make_shared()), "The minimum value of the calculated TTL expression within this part. Used to understand whether we have at least one row with expired TTL."}, + {"recompression_ttl_info.max", std::make_shared(std::make_shared()), "The maximum value of the calculated TTL expression within this part. Used to understand whether we have all rows with expired TTL."}, - {"group_by_ttl_info.expression", std::make_shared(std::make_shared())}, - {"group_by_ttl_info.min", std::make_shared(std::make_shared())}, - {"group_by_ttl_info.max", std::make_shared(std::make_shared())}, + {"group_by_ttl_info.expression", std::make_shared(std::make_shared()), "The TTL expression."}, + {"group_by_ttl_info.min", std::make_shared(std::make_shared()), "The minimum value of the calculated TTL expression within this part. Used to understand whether we have at least one row with expired TTL."}, + {"group_by_ttl_info.max", std::make_shared(std::make_shared()), "The maximum value of the calculated TTL expression within this part. Used to understand whether we have all rows with expired TTL."}, - {"rows_where_ttl_info.expression", std::make_shared(std::make_shared())}, - {"rows_where_ttl_info.min", std::make_shared(std::make_shared())}, - {"rows_where_ttl_info.max", std::make_shared(std::make_shared())}, - - {"is_broken", std::make_shared()}, - {"exception_code", std::make_shared()}, - {"exception", std::make_shared()}, + {"rows_where_ttl_info.expression", std::make_shared(std::make_shared()), "The TTL expression."}, + {"rows_where_ttl_info.min", std::make_shared(std::make_shared()), "The minimum value of the calculated TTL expression within this part. Used to understand whether we have at least one row with expired TTL."}, + {"rows_where_ttl_info.max", std::make_shared(std::make_shared()), "The maximum value of the calculated TTL expression within this part. Used to understand whether we have all rows with expired TTL."}, } ) { @@ -276,38 +272,12 @@ void StorageSystemProjectionParts::processNextStorage( add_ttl_info_map(part->ttl_infos.moves_ttl); if (columns_mask[src_index++]) - { - if (part->default_codec) - columns[res_index++]->insert(queryToString(part->default_codec->getCodecDesc())); - else - columns[res_index++]->insertDefault(); - } + columns[res_index++]->insert(queryToString(part->default_codec->getCodecDesc())); add_ttl_info_map(part->ttl_infos.recompression_ttl); add_ttl_info_map(part->ttl_infos.group_by_ttl); add_ttl_info_map(part->ttl_infos.rows_where_ttl); - { - if (columns_mask[src_index++]) - columns[res_index++]->insert(part->is_broken.load(std::memory_order_relaxed)); - - if (part->is_broken) - { - std::lock_guard lock(part->broken_reason_mutex); - if (columns_mask[src_index++]) - columns[res_index++]->insert(part->exception_code); - if (columns_mask[src_index++]) - columns[res_index++]->insert(part->exception); - } - else - { - if (columns_mask[src_index++]) - columns[res_index++]->insertDefault(); - if (columns_mask[src_index++]) - columns[res_index++]->insertDefault(); - } - } - /// _state column should be the latest. /// Do not use part->getState*, it can be changed from different thread if (has_state_column) diff --git a/src/Storages/System/StorageSystemProjectionPartsColumns.cpp b/src/Storages/System/StorageSystemProjectionPartsColumns.cpp index 2ff25f86366..9521605688d 100644 --- a/src/Storages/System/StorageSystemProjectionPartsColumns.cpp +++ b/src/Storages/System/StorageSystemProjectionPartsColumns.cpp @@ -19,56 +19,56 @@ namespace DB StorageSystemProjectionPartsColumns::StorageSystemProjectionPartsColumns(const StorageID & table_id_) : StorageSystemPartsBase(table_id_, ColumnsDescription{ - {"partition", std::make_shared()}, - {"name", std::make_shared()}, - {"part_type", std::make_shared()}, - {"parent_name", std::make_shared()}, - {"parent_uuid", std::make_shared()}, - {"parent_part_type", std::make_shared()}, - {"active", std::make_shared()}, - {"marks", std::make_shared()}, - {"rows", std::make_shared()}, - {"bytes_on_disk", std::make_shared()}, - {"data_compressed_bytes", std::make_shared()}, - {"data_uncompressed_bytes", std::make_shared()}, - {"marks_bytes", std::make_shared()}, - {"parent_marks", std::make_shared()}, - {"parent_rows", std::make_shared()}, - {"parent_bytes_on_disk", std::make_shared()}, - {"parent_data_compressed_bytes", std::make_shared()}, - {"parent_data_uncompressed_bytes", std::make_shared()}, - {"parent_marks_bytes", std::make_shared()}, - {"modification_time", std::make_shared()}, - {"remove_time", std::make_shared()}, - {"refcount", std::make_shared()}, - {"min_date", std::make_shared()}, - {"max_date", std::make_shared()}, - {"min_time", std::make_shared()}, - {"max_time", std::make_shared()}, - {"partition_id", std::make_shared()}, - {"min_block_number", std::make_shared()}, - {"max_block_number", std::make_shared()}, - {"level", std::make_shared()}, - {"data_version", std::make_shared()}, - {"primary_key_bytes_in_memory", std::make_shared()}, - {"primary_key_bytes_in_memory_allocated", std::make_shared()}, + {"partition", std::make_shared(), "The partition name. "}, + {"name", std::make_shared(), "Name of the data part."}, + {"part_type", std::make_shared(), "The data part storing format."}, + {"parent_name", std::make_shared(), "The name of the source (parent) data part."}, + {"parent_uuid", std::make_shared(), "The UUID of the source (parent) data part."}, + {"parent_part_type", std::make_shared(), "The source (parent) data part storing format."}, + {"active", std::make_shared(), "Flag that indicates whether the data part is active"}, + {"marks", std::make_shared(), "The number of marks."}, + {"rows", std::make_shared(), "The number of rows."}, + {"bytes_on_disk", std::make_shared(), "Total size of all the data part files in bytes."}, + {"data_compressed_bytes", std::make_shared(), "Total size of compressed data in the data part. All the auxiliary files (for example, files with marks) are not included."}, + {"data_uncompressed_bytes", std::make_shared(), "Total size of uncompressed data in the data part. All the auxiliary files (for example, files with marks) are not included."}, + {"marks_bytes", std::make_shared(), "The size of the file with marks."}, + {"parent_marks", std::make_shared(), "The number of marks in the source (parent) part."}, + {"parent_rows", std::make_shared(), "The number of rows in the source (parent) part."}, + {"parent_bytes_on_disk", std::make_shared(), "Total size of all the source (parent) data part files in bytes."}, + {"parent_data_compressed_bytes", std::make_shared(), "Total size of compressed data in the source (parent) data part."}, + {"parent_data_uncompressed_bytes", std::make_shared(), "Total size of uncompressed data in the source (parent) data part."}, + {"parent_marks_bytes", std::make_shared(), "The size of the file with marks in the source (parent) data part."}, + {"modification_time", std::make_shared(), "The time the directory with the data part was modified. This usually corresponds to the time of data part creation."}, + {"remove_time", std::make_shared(), "The time when the data part became inactive."}, + {"refcount", std::make_shared(), "The number of places where the data part is used. A value greater than 2 indicates that the data part is used in queries or merges."}, + {"min_date", std::make_shared(), "The minimum value for the Date column if that is included in the partition key."}, + {"max_date", std::make_shared(), "The maximum value for the Date column if that is included in the partition key."}, + {"min_time", std::make_shared(), "The minimum value for the DateTime column if that is included in the partition key."}, + {"max_time", std::make_shared(), "The maximum value for the DateTime column if that is included in the partition key."}, + {"partition_id", std::make_shared(), "ID of the partition."}, + {"min_block_number", std::make_shared(), "The minimum number of data parts that make up the current part after merging."}, + {"max_block_number", std::make_shared(), "The maximum number of data parts that make up the current part after merging."}, + {"level", std::make_shared(), "Depth of the merge tree. Zero means that the current part was created by insert rather than by merging other parts."}, + {"data_version", std::make_shared(), "Number that is used to determine which mutations should be applied to the data part (mutations with a version higher than data_version)."}, + {"primary_key_bytes_in_memory", std::make_shared(), "The amount of memory (in bytes) used by primary key values."}, + {"primary_key_bytes_in_memory_allocated", std::make_shared(), "The amount of memory (in bytes) reserved for primary key values."}, - {"database", std::make_shared()}, - {"table", std::make_shared()}, - {"engine", std::make_shared()}, - {"disk_name", std::make_shared()}, - {"path", std::make_shared()}, + {"database", std::make_shared(), "Name of the database."}, + {"table", std::make_shared(), "Name of the table."}, + {"engine", std::make_shared(), "Name of the table engine without parameters."}, + {"disk_name", std::make_shared(), "Name of a disk that stores the data part."}, + {"path", std::make_shared(), "Absolute path to the folder with data part files."}, - {"column", std::make_shared()}, - {"type", std::make_shared()}, - {"column_position", std::make_shared()}, - {"default_kind", std::make_shared()}, - {"default_expression", std::make_shared()}, - {"column_bytes_on_disk", std::make_shared()}, - {"column_data_compressed_bytes", std::make_shared()}, - {"column_data_uncompressed_bytes", std::make_shared()}, - {"column_marks_bytes", std::make_shared()}, - {"column_modification_time", std::make_shared(std::make_shared())}, + {"column", std::make_shared(), "Name of the column."}, + {"type", std::make_shared(), "Column type."}, + {"column_position", std::make_shared(), "Ordinal position of a column in a table starting with 1."}, + {"default_kind", std::make_shared(), "Expression type (DEFAULT, MATERIALIZED, ALIAS) for the default value, or an empty string if it is not defined."}, + {"default_expression", std::make_shared(), "Expression for the default value, or an empty string if it is not defined."}, + {"column_bytes_on_disk", std::make_shared(), "Total size of the column in bytes."}, + {"column_data_compressed_bytes", std::make_shared(), "Total size of compressed data in the column, in bytes."}, + {"column_data_uncompressed_bytes", std::make_shared(), "Total size of the decompressed data in the column, in bytes."}, + {"column_marks_bytes", std::make_shared(), "The size of the column with marks, in bytes."}, + {"column_modification_time", std::make_shared(std::make_shared()), "The last time the column was modified."}, } ) { diff --git a/src/Storages/System/StorageSystemQueryCache.cpp b/src/Storages/System/StorageSystemQueryCache.cpp index 03111755904..e454012da3a 100644 --- a/src/Storages/System/StorageSystemQueryCache.cpp +++ b/src/Storages/System/StorageSystemQueryCache.cpp @@ -24,11 +24,11 @@ ColumnsDescription StorageSystemQueryCache::getColumnsDescription() } StorageSystemQueryCache::StorageSystemQueryCache(const StorageID & table_id_) - : IStorageSystemOneBlock(table_id_) + : IStorageSystemOneBlock(table_id_, getColumnsDescription()) { } -void StorageSystemQueryCache::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const +void StorageSystemQueryCache::fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const { QueryCachePtr query_cache = context->getQueryCache(); diff --git a/src/Storages/System/StorageSystemQueryCache.h b/src/Storages/System/StorageSystemQueryCache.h index 08ad30afb81..22856c2b1bb 100644 --- a/src/Storages/System/StorageSystemQueryCache.h +++ b/src/Storages/System/StorageSystemQueryCache.h @@ -5,7 +5,7 @@ namespace DB { -class StorageSystemQueryCache final : public IStorageSystemOneBlock +class StorageSystemQueryCache final : public IStorageSystemOneBlock { public: explicit StorageSystemQueryCache(const StorageID & table_id_); @@ -15,7 +15,7 @@ public: static ColumnsDescription getColumnsDescription(); protected: - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const override; + void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const override; }; } diff --git a/src/Storages/System/StorageSystemQuotaLimits.cpp b/src/Storages/System/StorageSystemQuotaLimits.cpp index f125a990a88..65acfba0c1b 100644 --- a/src/Storages/System/StorageSystemQuotaLimits.cpp +++ b/src/Storages/System/StorageSystemQuotaLimits.cpp @@ -68,14 +68,15 @@ ColumnsDescription StorageSystemQuotaLimits::getColumnsDescription() data_type = std::make_shared(); else data_type = std::make_shared(); - result.add({column_name, std::make_shared(data_type)}); + + result.add({column_name, std::make_shared(data_type), type_info.max_allowed_usage_description}); } return result; } -void StorageSystemQuotaLimits::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const +void StorageSystemQuotaLimits::fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const { /// If "select_from_system_db_requires_grant" is enabled the access rights were already checked in InterpreterSelectQuery. const auto & access_control = context->getAccessControl(); diff --git a/src/Storages/System/StorageSystemQuotaLimits.h b/src/Storages/System/StorageSystemQuotaLimits.h index acc977d0df7..a8385e878ca 100644 --- a/src/Storages/System/StorageSystemQuotaLimits.h +++ b/src/Storages/System/StorageSystemQuotaLimits.h @@ -8,7 +8,7 @@ namespace DB class Context; /// Implements `quota_limits` system table, which allows you to get information about the limits set for quotas. -class StorageSystemQuotaLimits final : public IStorageSystemOneBlock +class StorageSystemQuotaLimits final : public IStorageSystemOneBlock { public: std::string getName() const override { return "SystemQuotaLimits"; } @@ -16,7 +16,7 @@ public: protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const override; + void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const override; }; } diff --git a/src/Storages/System/StorageSystemQuotaUsage.cpp b/src/Storages/System/StorageSystemQuotaUsage.cpp index a91e8b7b2c1..da9c174b0d3 100644 --- a/src/Storages/System/StorageSystemQuotaUsage.cpp +++ b/src/Storages/System/StorageSystemQuotaUsage.cpp @@ -81,15 +81,15 @@ ColumnsDescription StorageSystemQuotaUsage::getColumnsDescriptionImpl(bool add_c data_type = std::make_shared(); else data_type = std::make_shared(); - description.add({column_name, std::make_shared(data_type)}); - description.add({String("max_") + column_name, std::make_shared(data_type)}); + description.add({column_name, std::make_shared(data_type), type_info.current_usage_description}); + description.add({String("max_") + column_name, std::make_shared(data_type), type_info.max_allowed_usage_description}); } return description; } -void StorageSystemQuotaUsage::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const +void StorageSystemQuotaUsage::fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const { /// If "select_from_system_db_requires_grant" is enabled the access rights were already checked in InterpreterSelectQuery. const auto & access_control = context->getAccessControl(); diff --git a/src/Storages/System/StorageSystemQuotaUsage.h b/src/Storages/System/StorageSystemQuotaUsage.h index a3109e9ca31..3100098fe87 100644 --- a/src/Storages/System/StorageSystemQuotaUsage.h +++ b/src/Storages/System/StorageSystemQuotaUsage.h @@ -12,7 +12,7 @@ struct QuotaUsage; /** Implements the `quota_usage` system table, which allows you to get information about * how the current user uses the quota. */ -class StorageSystemQuotaUsage final : public IStorageSystemOneBlock +class StorageSystemQuotaUsage final : public IStorageSystemOneBlock { public: std::string getName() const override { return "SystemQuotaUsage"; } @@ -23,7 +23,7 @@ public: protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const override; + void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const override; }; } diff --git a/src/Storages/System/StorageSystemQuotas.cpp b/src/Storages/System/StorageSystemQuotas.cpp index ee302f2f163..641bbb319d5 100644 --- a/src/Storages/System/StorageSystemQuotas.cpp +++ b/src/Storages/System/StorageSystemQuotas.cpp @@ -66,7 +66,7 @@ ColumnsDescription StorageSystemQuotas::getColumnsDescription() } -void StorageSystemQuotas::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const +void StorageSystemQuotas::fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const { /// If "select_from_system_db_requires_grant" is enabled the access rights were already checked in InterpreterSelectQuery. const auto & access_control = context->getAccessControl(); diff --git a/src/Storages/System/StorageSystemQuotas.h b/src/Storages/System/StorageSystemQuotas.h index cafd8b921fa..76e6f1df536 100644 --- a/src/Storages/System/StorageSystemQuotas.h +++ b/src/Storages/System/StorageSystemQuotas.h @@ -9,7 +9,7 @@ class Context; /** Implements the `quotas` system tables, which allows you to get information about quotas. */ -class StorageSystemQuotas final : public IStorageSystemOneBlock +class StorageSystemQuotas final : public IStorageSystemOneBlock { public: std::string getName() const override { return "SystemQuotas"; } @@ -20,7 +20,7 @@ public: protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const override; + void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const override; }; } diff --git a/src/Storages/System/StorageSystemQuotasUsage.cpp b/src/Storages/System/StorageSystemQuotasUsage.cpp index ed22f73dd50..1587048e7e7 100644 --- a/src/Storages/System/StorageSystemQuotasUsage.cpp +++ b/src/Storages/System/StorageSystemQuotasUsage.cpp @@ -13,7 +13,7 @@ ColumnsDescription StorageSystemQuotasUsage::getColumnsDescription() return StorageSystemQuotaUsage::getColumnsDescriptionImpl(/* add_column_is_current = */ true); } -void StorageSystemQuotasUsage::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const +void StorageSystemQuotasUsage::fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const { /// If "select_from_system_db_requires_grant" is enabled the access rights were already checked in InterpreterSelectQuery. const auto & access_control = context->getAccessControl(); diff --git a/src/Storages/System/StorageSystemQuotasUsage.h b/src/Storages/System/StorageSystemQuotasUsage.h index ecdc62865d1..516e722f7df 100644 --- a/src/Storages/System/StorageSystemQuotasUsage.h +++ b/src/Storages/System/StorageSystemQuotasUsage.h @@ -10,7 +10,7 @@ class Context; /** Implements the `quotas_usage` system table, which allows you to get information about * how all users use the quotas. */ -class StorageSystemQuotasUsage final : public IStorageSystemOneBlock +class StorageSystemQuotasUsage final : public IStorageSystemOneBlock { public: std::string getName() const override { return "SystemQuotasUsage"; } @@ -18,7 +18,7 @@ public: protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const override; + void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const override; }; } diff --git a/src/Storages/System/StorageSystemRemoteDataPaths.cpp b/src/Storages/System/StorageSystemRemoteDataPaths.cpp index 87b7a84e8ba..7cc1951fc05 100644 --- a/src/Storages/System/StorageSystemRemoteDataPaths.cpp +++ b/src/Storages/System/StorageSystemRemoteDataPaths.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include @@ -19,14 +20,14 @@ StorageSystemRemoteDataPaths::StorageSystemRemoteDataPaths(const StorageID & tab StorageInMemoryMetadata storage_metadata; storage_metadata.setColumns(ColumnsDescription( { - {"disk_name", std::make_shared()}, - {"path", std::make_shared()}, - {"cache_base_path", std::make_shared()}, - {"local_path", std::make_shared()}, - {"remote_path", std::make_shared()}, - {"size", std::make_shared()}, - {"common_prefix_for_blobs", std::make_shared()}, - {"cache_paths", std::make_shared(std::make_shared())}, + {"disk_name", std::make_shared(), "Disk name."}, + {"path", std::make_shared(), "Disk path."}, + {"cache_base_path", std::make_shared(), "Base directory of cache files."}, + {"local_path", std::make_shared(), "Path of ClickHouse file, also used as metadata path."}, + {"remote_path", std::make_shared(), "Blob path in object storage, with which ClickHouse file is associated with."}, + {"size", std::make_shared(), "Size of the file (compressed)."}, + {"common_prefix_for_blobs", std::make_shared(), "Common prefix for blobs in object storage."}, + {"cache_paths", std::make_shared(std::make_shared()), "Cache files for corresponding blob."}, })); setInMemoryMetadata(storage_metadata); } diff --git a/src/Storages/System/StorageSystemReplicas.cpp b/src/Storages/System/StorageSystemReplicas.cpp index eeb3db342b4..af8d67cbc21 100644 --- a/src/Storages/System/StorageSystemReplicas.cpp +++ b/src/Storages/System/StorageSystemReplicas.cpp @@ -14,6 +14,9 @@ #include #include #include +#include +#include +#include #include #include #include @@ -182,13 +185,6 @@ public: , requests_with_zk_fields(max_threads) {} - Pipe read( - const Names & column_names, - const StorageSnapshotPtr & storage_snapshot, - SelectQueryInfo & query_info, - ContextPtr context); - -private: StatusRequestsPool requests_without_zk_fields; StatusRequestsPool requests_with_zk_fields; }; @@ -200,49 +196,98 @@ StorageSystemReplicas::StorageSystemReplicas(const StorageID & table_id_) { StorageInMemoryMetadata storage_metadata; storage_metadata.setColumns(ColumnsDescription({ - { "database", std::make_shared() }, - { "table", std::make_shared() }, - { "engine", std::make_shared() }, - { "is_leader", std::make_shared() }, - { "can_become_leader", std::make_shared() }, - { "is_readonly", std::make_shared() }, - { "is_session_expired", std::make_shared() }, - { "future_parts", std::make_shared() }, - { "parts_to_check", std::make_shared() }, - { "zookeeper_name", std::make_shared() }, - { "zookeeper_path", std::make_shared() }, - { "replica_name", std::make_shared() }, - { "replica_path", std::make_shared() }, - { "columns_version", std::make_shared() }, - { "queue_size", std::make_shared() }, - { "inserts_in_queue", std::make_shared() }, - { "merges_in_queue", std::make_shared() }, - { "part_mutations_in_queue", std::make_shared() }, - { "queue_oldest_time", std::make_shared() }, - { "inserts_oldest_time", std::make_shared() }, - { "merges_oldest_time", std::make_shared() }, - { "part_mutations_oldest_time", std::make_shared() }, - { "oldest_part_to_get", std::make_shared() }, - { "oldest_part_to_merge_to", std::make_shared() }, - { "oldest_part_to_mutate_to", std::make_shared() }, - { "log_max_index", std::make_shared() }, - { "log_pointer", std::make_shared() }, - { "last_queue_update", std::make_shared() }, - { "absolute_delay", std::make_shared() }, - { "total_replicas", std::make_shared() }, - { "active_replicas", std::make_shared() }, - { "lost_part_count", std::make_shared() }, - { "last_queue_update_exception", std::make_shared() }, - { "zookeeper_exception", std::make_shared() }, - { "replica_is_active", std::make_shared(std::make_shared(), std::make_shared()) } + { "database", std::make_shared(), "Database name."}, + { "table", std::make_shared(), "Table name."}, + { "engine", std::make_shared(), "Table engine name."}, + { "is_leader", std::make_shared(), "Whether the replica is the leader. Multiple replicas can be leaders at the same time. " + "A replica can be prevented from becoming a leader using the merge_tree setting replicated_can_become_leader. " + "The leaders are responsible for scheduling background merges. " + "Note that writes can be performed to any replica that is available and has a session in ZK, regardless of whether it is a leader."}, + { "can_become_leader", std::make_shared(), "Whether the replica can be a leader."}, + { "is_readonly", std::make_shared(), "Whether the replica is in read-only mode. This mode is turned on if the config does not have sections with ClickHouse Keeper, " + "if an unknown error occurred when reinitializing sessions in ClickHouse Keeper, and during session reinitialization in ClickHouse Keeper."}, + { "is_session_expired", std::make_shared(), "Whether the session with ClickHouse Keeper has expired. Basically the same as `is_readonly`."}, + { "future_parts", std::make_shared(), "The number of data parts that will appear as the result of INSERTs or merges that haven't been done yet."}, + { "parts_to_check", std::make_shared(), "The number of data parts in the queue for verification. A part is put in the verification queue if there is suspicion that it might be damaged."}, + { "zookeeper_name", std::make_shared(), "The name of the the [Zoo]Keeper cluster (possibly auxiliary one) where the table's metadata is stored"}, + { "zookeeper_path", std::make_shared(), "Path to table data in ClickHouse Keeper."}, + { "replica_name", std::make_shared(), "Replica name in ClickHouse Keeper. Different replicas of the same table have different names."}, + { "replica_path", std::make_shared(), "Path to replica data in ClickHouse Keeper. The same as concatenating 'zookeeper_path/replicas/replica_path'."}, + { "columns_version", std::make_shared(), "Version number of the table structure. Indicates how many times ALTER was performed. " + "If replicas have different versions, it means some replicas haven't made all of the ALTERs yet."}, + { "queue_size", std::make_shared(), "Size of the queue for operations waiting to be performed. Operations include inserting blocks of data, merges, and certain other actions. It usually coincides with future_parts."}, + { "inserts_in_queue", std::make_shared(), "Number of inserts of blocks of data that need to be made. Insertions are usually replicated fairly quickly. If this number is large, it means something is wrong."}, + { "merges_in_queue", std::make_shared(), "The number of merges waiting to be made. Sometimes merges are lengthy, so this value may be greater than zero for a long time."}, + { "part_mutations_in_queue", std::make_shared(), "The number of mutations waiting to be made."}, + { "queue_oldest_time", std::make_shared(), "If `queue_size` greater than 0, shows when the oldest operation was added to the queue."}, + { "inserts_oldest_time", std::make_shared(), "See `queue_oldest_time`."}, + { "merges_oldest_time", std::make_shared(), "See `queue_oldest_time`."}, + { "part_mutations_oldest_time", std::make_shared(), "See `queue_oldest_time`."}, + { "oldest_part_to_get", std::make_shared(), "The name of the part to fetch from other replicas obtained from the oldest GET_PARTS entry in the replication queue."}, + { "oldest_part_to_merge_to", std::make_shared(), "The result part name to merge to obtained from the oldest MERGE_PARTS entry in the replication queue."}, + { "oldest_part_to_mutate_to", std::make_shared(), "The result part name to mutate to obtained from the oldest MUTATE_PARTS entry in the replication queue."}, + { "log_max_index", std::make_shared(), "Maximum entry number in the log of general activity."}, + { "log_pointer", std::make_shared(), "Maximum entry number in the log of general activity that the replica copied to its execution queue, plus one. " + "If log_pointer is much smaller than log_max_index, something is wrong."}, + { "last_queue_update", std::make_shared(), "When the queue was updated last time."}, + { "absolute_delay", std::make_shared(), "How big lag in seconds the current replica has."}, + { "total_replicas", std::make_shared(), "The total number of known replicas of this table."}, + { "active_replicas", std::make_shared(), "The number of replicas of this table that have a session in ClickHouse Keeper (i.e., the number of functioning replicas)."}, + { "lost_part_count", std::make_shared(), "The number of data parts lost in the table by all replicas in total since table creation. Value is persisted in ClickHouse Keeper and can only increase."}, + { "last_queue_update_exception", std::make_shared(), "When the queue contains broken entries. Especially important when ClickHouse breaks backward compatibility between versions and log entries written by newer versions aren't parseable by old versions."}, + { "zookeeper_exception", std::make_shared(), "The last exception message, got if the error happened when fetching the info from ClickHouse Keeper."}, + { "replica_is_active", std::make_shared(std::make_shared(), std::make_shared()), "Map between replica name and is replica active."} })); setInMemoryMetadata(storage_metadata); } StorageSystemReplicas::~StorageSystemReplicas() = default; +class ReadFromSystemReplicas : public SourceStepWithFilter +{ +public: + std::string getName() const override { return "ReadFromSystemReplicas"; } + void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override; -Pipe StorageSystemReplicas::read( + ReadFromSystemReplicas( + const Names & column_names_, + const SelectQueryInfo & query_info_, + const StorageSnapshotPtr & storage_snapshot_, + const ContextPtr & context_, + Block sample_block, + std::map> replicated_tables_, + bool with_zk_fields_, + std::shared_ptr impl_) + : SourceStepWithFilter( + DataStream{.header = std::move(sample_block)}, + column_names_, + query_info_, + storage_snapshot_, + context_) + , replicated_tables(std::move(replicated_tables_)) + , with_zk_fields(with_zk_fields_) + , impl(std::move(impl_)) + { + } + + void applyFilters(ActionDAGNodes added_filter_nodes) override; + +private: + std::map> replicated_tables; + const bool with_zk_fields; + std::shared_ptr impl; + const ActionsDAG::Node * predicate = nullptr; +}; + +void ReadFromSystemReplicas::applyFilters(ActionDAGNodes added_filter_nodes) +{ + filter_actions_dag = ActionsDAG::buildFilterActionsDAG(added_filter_nodes.nodes); + if (filter_actions_dag) + predicate = filter_actions_dag->getOutputs().at(0); +} + +void StorageSystemReplicas::read( + QueryPlan & query_plan, const Names & column_names, const StorageSnapshotPtr & storage_snapshot, SelectQueryInfo & query_info, @@ -250,15 +295,6 @@ Pipe StorageSystemReplicas::read( QueryProcessingStage::Enum /*processed_stage*/, const size_t /*max_block_size*/, const size_t /*num_streams*/) -{ - return impl->read(column_names, storage_snapshot, query_info, context); -} - -Pipe StorageSystemReplicasImpl::read( - const Names & column_names, - const StorageSnapshotPtr & storage_snapshot, - SelectQueryInfo & query_info, - ContextPtr context) { storage_snapshot->check(column_names); @@ -305,6 +341,18 @@ Pipe StorageSystemReplicasImpl::read( } } + auto header = storage_snapshot->metadata->getSampleBlock(); + auto reading = std::make_unique( + column_names, query_info, storage_snapshot, + std::move(context), std::move(header), std::move(replicated_tables), with_zk_fields, impl); // /*std::move(this_ptr),*/ std::move(columns_mask), max_block_size); + + query_plan.addStep(std::move(reading)); +} + +void ReadFromSystemReplicas::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) +{ + auto header = getOutputStream().header; + MutableColumnPtr col_database_mut = ColumnString::create(); MutableColumnPtr col_table_mut = ColumnString::create(); MutableColumnPtr col_engine_mut = ColumnString::create(); @@ -332,10 +380,14 @@ Pipe StorageSystemReplicasImpl::read( { col_engine, std::make_shared(), "engine" }, }; - VirtualColumnUtils::filterBlockWithQuery(query_info.query, filtered_block, context); + VirtualColumnUtils::filterBlockWithPredicate(predicate, filtered_block, context); if (!filtered_block.rows()) - return {}; + { + auto source = std::make_shared(std::move(header)); + pipeline.init(Pipe(std::move(source))); + return; + } col_database = filtered_block.getByName("database").column; col_table = filtered_block.getByName("table").column; @@ -347,7 +399,7 @@ Pipe StorageSystemReplicasImpl::read( size_t tables_size = col_database->size(); /// Use separate queues for requests with and without ZooKeeper fields. - StatusRequestsPool & get_status_requests = with_zk_fields ? requests_with_zk_fields : requests_without_zk_fields; + StatusRequestsPool & get_status_requests = with_zk_fields ? impl->requests_with_zk_fields : impl->requests_without_zk_fields; QueryStatusPtr query_status = context ? context->getProcessListElement() : nullptr; @@ -435,7 +487,7 @@ Pipe StorageSystemReplicasImpl::read( UInt64 num_rows = fin_columns.at(0)->size(); Chunk chunk(std::move(fin_columns), num_rows); - return Pipe(std::make_shared(storage_snapshot->metadata->getSampleBlock(), std::move(chunk))); + pipeline.init(Pipe(std::make_shared(header, std::move(chunk)))); } diff --git a/src/Storages/System/StorageSystemReplicas.h b/src/Storages/System/StorageSystemReplicas.h index f6bdfc937ea..11c5371310f 100644 --- a/src/Storages/System/StorageSystemReplicas.h +++ b/src/Storages/System/StorageSystemReplicas.h @@ -1,7 +1,7 @@ #pragma once #include -#include +#include namespace DB @@ -20,7 +20,8 @@ public: std::string getName() const override { return "SystemReplicas"; } - Pipe read( + void read( + QueryPlan & query_plan, const Names & column_names, const StorageSnapshotPtr & storage_snapshot, SelectQueryInfo & query_info, @@ -32,7 +33,7 @@ public: bool isSystemStorage() const override { return true; } private: - std::unique_ptr impl; + std::shared_ptr impl; }; } diff --git a/src/Storages/System/StorageSystemReplicatedFetches.cpp b/src/Storages/System/StorageSystemReplicatedFetches.cpp index e643cb9f86a..6913665a8d2 100644 --- a/src/Storages/System/StorageSystemReplicatedFetches.cpp +++ b/src/Storages/System/StorageSystemReplicatedFetches.cpp @@ -34,7 +34,7 @@ ColumnsDescription StorageSystemReplicatedFetches::getColumnsDescription() }; } -void StorageSystemReplicatedFetches::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const +void StorageSystemReplicatedFetches::fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const { const auto access = context->getAccess(); const bool check_access_for_tables = !access->isGranted(AccessType::SHOW_TABLES); diff --git a/src/Storages/System/StorageSystemReplicatedFetches.h b/src/Storages/System/StorageSystemReplicatedFetches.h index a176912cac0..dba9124b39d 100644 --- a/src/Storages/System/StorageSystemReplicatedFetches.h +++ b/src/Storages/System/StorageSystemReplicatedFetches.h @@ -10,7 +10,7 @@ namespace DB class Context; /// system.replicated_fetches table. Takes data from context.getReplicatedFetchList() -class StorageSystemReplicatedFetches final : public IStorageSystemOneBlock +class StorageSystemReplicatedFetches final : public IStorageSystemOneBlock { public: std::string getName() const override { return "SystemReplicatedFetches"; } @@ -20,7 +20,7 @@ public: protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const override; + void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const override; }; } diff --git a/src/Storages/System/StorageSystemReplicationQueue.cpp b/src/Storages/System/StorageSystemReplicationQueue.cpp index 194a2ae6fb8..14b641f46c7 100644 --- a/src/Storages/System/StorageSystemReplicationQueue.cpp +++ b/src/Storages/System/StorageSystemReplicationQueue.cpp @@ -62,7 +62,7 @@ ColumnsDescription StorageSystemReplicationQueue::getColumnsDescription() } -void StorageSystemReplicationQueue::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const +void StorageSystemReplicationQueue::fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node * predicate, std::vector) const { const auto access = context->getAccess(); const bool check_access_for_databases = !access->isGranted(AccessType::SHOW_TABLES); @@ -113,7 +113,7 @@ void StorageSystemReplicationQueue::fillData(MutableColumns & res_columns, Conte { col_table_to_filter, std::make_shared(), "table" }, }; - VirtualColumnUtils::filterBlockWithQuery(query_info.query, filtered_block, context); + VirtualColumnUtils::filterBlockWithPredicate(predicate, filtered_block, context); if (!filtered_block.rows()) return; diff --git a/src/Storages/System/StorageSystemReplicationQueue.h b/src/Storages/System/StorageSystemReplicationQueue.h index 003e4eeb927..a9e57851be1 100644 --- a/src/Storages/System/StorageSystemReplicationQueue.h +++ b/src/Storages/System/StorageSystemReplicationQueue.h @@ -11,7 +11,7 @@ class Context; /** Implements the `replication_queue` system table, which allows you to view the replication queues for the replicated tables. */ -class StorageSystemReplicationQueue final : public IStorageSystemOneBlock +class StorageSystemReplicationQueue final : public IStorageSystemOneBlock { public: std::string getName() const override { return "SystemReplicationQueue"; } @@ -20,7 +20,7 @@ public: protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const override; + void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node * predicate, std::vector) const override; }; } diff --git a/src/Storages/System/StorageSystemRoleGrants.cpp b/src/Storages/System/StorageSystemRoleGrants.cpp index 241481275c4..e5baeed4873 100644 --- a/src/Storages/System/StorageSystemRoleGrants.cpp +++ b/src/Storages/System/StorageSystemRoleGrants.cpp @@ -40,7 +40,7 @@ ColumnsDescription StorageSystemRoleGrants::getColumnsDescription() } -void StorageSystemRoleGrants::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const +void StorageSystemRoleGrants::fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const { /// If "select_from_system_db_requires_grant" is enabled the access rights were already checked in InterpreterSelectQuery. const auto & access_control = context->getAccessControl(); diff --git a/src/Storages/System/StorageSystemRoleGrants.h b/src/Storages/System/StorageSystemRoleGrants.h index 969f82f85d5..f82aece3f24 100644 --- a/src/Storages/System/StorageSystemRoleGrants.h +++ b/src/Storages/System/StorageSystemRoleGrants.h @@ -8,7 +8,7 @@ namespace DB class Context; /// Implements `role_grants` system table, which allows you to get information about granted roles. -class StorageSystemRoleGrants final : public IStorageSystemOneBlock +class StorageSystemRoleGrants final : public IStorageSystemOneBlock { public: std::string getName() const override { return "SystemRoleGrants"; } @@ -16,7 +16,7 @@ public: protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const override; + void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const override; }; } diff --git a/src/Storages/System/StorageSystemRoles.cpp b/src/Storages/System/StorageSystemRoles.cpp index ec26a50db77..9bfddc25ebf 100644 --- a/src/Storages/System/StorageSystemRoles.cpp +++ b/src/Storages/System/StorageSystemRoles.cpp @@ -25,7 +25,7 @@ ColumnsDescription StorageSystemRoles::getColumnsDescription() } -void StorageSystemRoles::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const +void StorageSystemRoles::fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const { /// If "select_from_system_db_requires_grant" is enabled the access rights were already checked in InterpreterSelectQuery. const auto & access_control = context->getAccessControl(); diff --git a/src/Storages/System/StorageSystemRoles.h b/src/Storages/System/StorageSystemRoles.h index 2b4ae93a932..e96bb70d9cd 100644 --- a/src/Storages/System/StorageSystemRoles.h +++ b/src/Storages/System/StorageSystemRoles.h @@ -8,7 +8,7 @@ namespace DB class Context; /// Implements `roles` system table, which allows you to get information about roles. -class StorageSystemRoles final : public IStorageSystemOneBlock +class StorageSystemRoles final : public IStorageSystemOneBlock { public: std::string getName() const override { return "SystemRoles"; } @@ -19,7 +19,7 @@ public: protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const override; + void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const override; }; } diff --git a/src/Storages/System/StorageSystemRowPolicies.cpp b/src/Storages/System/StorageSystemRowPolicies.cpp index 5a959cdf9af..93c5ba60a7f 100644 --- a/src/Storages/System/StorageSystemRowPolicies.cpp +++ b/src/Storages/System/StorageSystemRowPolicies.cpp @@ -38,8 +38,8 @@ ColumnsDescription StorageSystemRowPolicies::getColumnsDescription() for (auto filter_type : collections::range(RowPolicyFilterType::MAX)) { - const String & column_name = RowPolicyFilterTypeInfo::get(filter_type).name; - description.add({column_name, std::make_shared(std::make_shared())}); + const auto & filter_type_info = RowPolicyFilterTypeInfo::get(filter_type); + description.add({filter_type_info.name, std::make_shared(std::make_shared()), filter_type_info.description}); } description.add({"is_restrictive", std::make_shared(), @@ -61,7 +61,7 @@ ColumnsDescription StorageSystemRowPolicies::getColumnsDescription() } -void StorageSystemRowPolicies::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const +void StorageSystemRowPolicies::fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const { /// If "select_from_system_db_requires_grant" is enabled the access rights were already checked in InterpreterSelectQuery. const auto & access_control = context->getAccessControl(); diff --git a/src/Storages/System/StorageSystemRowPolicies.h b/src/Storages/System/StorageSystemRowPolicies.h index f8aa5618126..cdbc4731000 100644 --- a/src/Storages/System/StorageSystemRowPolicies.h +++ b/src/Storages/System/StorageSystemRowPolicies.h @@ -10,7 +10,7 @@ class Context; /// Implements `row_policies` system table, which allows you to get information about row policies. -class StorageSystemRowPolicies final : public IStorageSystemOneBlock +class StorageSystemRowPolicies final : public IStorageSystemOneBlock { public: std::string getName() const override { return "SystemRowPolicies"; } @@ -21,7 +21,7 @@ public: protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const override; + void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const override; }; } diff --git a/src/Storages/System/StorageSystemS3Queue.cpp b/src/Storages/System/StorageSystemS3Queue.cpp index 3637734b225..a6bb7da2b6e 100644 --- a/src/Storages/System/StorageSystemS3Queue.cpp +++ b/src/Storages/System/StorageSystemS3Queue.cpp @@ -25,23 +25,23 @@ ColumnsDescription StorageSystemS3Queue::getColumnsDescription() /// TODO: Fill in all the comments return ColumnsDescription { - {"zookeeper_path", std::make_shared()}, - {"file_name", std::make_shared()}, - {"rows_processed", std::make_shared()}, - {"status", std::make_shared()}, - {"processing_start_time", std::make_shared(std::make_shared())}, - {"processing_end_time", std::make_shared(std::make_shared())}, - {"ProfileEvents", std::make_shared(std::make_shared(), std::make_shared())}, - {"exception", std::make_shared()}, + {"zookeeper_path", std::make_shared(), "Path in zookeeper to S3Queue metadata"}, + {"file_name", std::make_shared(), "File name of a file which is being processed by S3Queue"}, + {"rows_processed", std::make_shared(), "Currently processed number of rows"}, + {"status", std::make_shared(), "Status of processing: Processed, Processing, Failed"}, + {"processing_start_time", std::make_shared(std::make_shared()), "Time at which processing of the file started"}, + {"processing_end_time", std::make_shared(std::make_shared()), "Time at which processing of the file ended"}, + {"ProfileEvents", std::make_shared(std::make_shared(), std::make_shared()), "Profile events collected during processing of the file"}, + {"exception", std::make_shared(), "Exception which happened during processing"}, }; } StorageSystemS3Queue::StorageSystemS3Queue(const StorageID & table_id_) - : IStorageSystemOneBlock(table_id_) + : IStorageSystemOneBlock(table_id_, getColumnsDescription()) { } -void StorageSystemS3Queue::fillData(MutableColumns & res_columns, ContextPtr, const SelectQueryInfo &) const +void StorageSystemS3Queue::fillData(MutableColumns & res_columns, ContextPtr, const ActionsDAG::Node *, std::vector) const { for (const auto & [zookeeper_path, metadata] : S3QueueMetadataFactory::instance().getAll()) { diff --git a/src/Storages/System/StorageSystemS3Queue.h b/src/Storages/System/StorageSystemS3Queue.h index 1dc5c521941..c89e18f458f 100644 --- a/src/Storages/System/StorageSystemS3Queue.h +++ b/src/Storages/System/StorageSystemS3Queue.h @@ -7,7 +7,7 @@ namespace DB { -class StorageSystemS3Queue final : public IStorageSystemOneBlock +class StorageSystemS3Queue final : public IStorageSystemOneBlock { public: explicit StorageSystemS3Queue(const StorageID & table_id_); @@ -17,7 +17,7 @@ public: static ColumnsDescription getColumnsDescription(); protected: - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const override; + void fillData(MutableColumns & res_columns, ContextPtr, const ActionsDAG::Node *, std::vector) const override; }; } diff --git a/src/Storages/System/StorageSystemScheduler.cpp b/src/Storages/System/StorageSystemScheduler.cpp index ba07d44dbf9..cae42011fc5 100644 --- a/src/Storages/System/StorageSystemScheduler.cpp +++ b/src/Storages/System/StorageSystemScheduler.cpp @@ -80,7 +80,7 @@ ColumnsDescription StorageSystemScheduler::getColumnsDescription() } -void StorageSystemScheduler::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const +void StorageSystemScheduler::fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const { context->getResourceManager()->forEachNode([&] (const String & resource, const String & path, const String & type, const SchedulerNodePtr & node) { diff --git a/src/Storages/System/StorageSystemScheduler.h b/src/Storages/System/StorageSystemScheduler.h index 1de72a85e9b..c6a259e5b51 100644 --- a/src/Storages/System/StorageSystemScheduler.h +++ b/src/Storages/System/StorageSystemScheduler.h @@ -8,7 +8,7 @@ namespace DB class Context; /// Implements `system.scheduler` table, which allows you to get information about scheduling nodes. -class StorageSystemScheduler final : public IStorageSystemOneBlock +class StorageSystemScheduler final : public IStorageSystemOneBlock { public: std::string getName() const override { return "SystemScheduler"; } @@ -16,7 +16,7 @@ public: protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const override; + void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const override; }; } diff --git a/src/Storages/System/StorageSystemSchemaInferenceCache.cpp b/src/Storages/System/StorageSystemSchemaInferenceCache.cpp index 1426ea83800..634089bd1cd 100644 --- a/src/Storages/System/StorageSystemSchemaInferenceCache.cpp +++ b/src/Storages/System/StorageSystemSchemaInferenceCache.cpp @@ -72,7 +72,7 @@ static void fillDataImpl(MutableColumns & res_columns, SchemaCache & schema_cach } } -void StorageSystemSchemaInferenceCache::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const +void StorageSystemSchemaInferenceCache::fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const { fillDataImpl(res_columns, StorageFile::getSchemaCache(context), "File"); #if USE_AWS_S3 diff --git a/src/Storages/System/StorageSystemSchemaInferenceCache.h b/src/Storages/System/StorageSystemSchemaInferenceCache.h index e6d306f8252..3e12f4b850b 100644 --- a/src/Storages/System/StorageSystemSchemaInferenceCache.h +++ b/src/Storages/System/StorageSystemSchemaInferenceCache.h @@ -6,7 +6,7 @@ namespace DB { -class StorageSystemSchemaInferenceCache final : public IStorageSystemOneBlock +class StorageSystemSchemaInferenceCache final : public IStorageSystemOneBlock { public: std::string getName() const override { return "SystemSettingsChanges"; } @@ -16,7 +16,7 @@ public: protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const override; + void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const override; }; } diff --git a/src/Storages/System/StorageSystemServerSettings.cpp b/src/Storages/System/StorageSystemServerSettings.cpp index f390985546b..b75f4280877 100644 --- a/src/Storages/System/StorageSystemServerSettings.cpp +++ b/src/Storages/System/StorageSystemServerSettings.cpp @@ -55,7 +55,7 @@ ColumnsDescription StorageSystemServerSettings::getColumnsDescription() }; } -void StorageSystemServerSettings::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const +void StorageSystemServerSettings::fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const { /// When the server configuration file is periodically re-loaded from disk, the server components (e.g. memory tracking) are updated /// with new the setting values but the settings themselves are not stored between re-loads. As a result, if one wants to know the diff --git a/src/Storages/System/StorageSystemServerSettings.h b/src/Storages/System/StorageSystemServerSettings.h index 276f21d674b..03c363c9920 100644 --- a/src/Storages/System/StorageSystemServerSettings.h +++ b/src/Storages/System/StorageSystemServerSettings.h @@ -11,7 +11,7 @@ class Context; /** implements system table "settings", which allows to get information about the current settings. */ -class StorageSystemServerSettings final : public IStorageSystemOneBlock +class StorageSystemServerSettings final : public IStorageSystemOneBlock { public: std::string getName() const override { return "SystemServerSettings"; } @@ -21,7 +21,7 @@ public: protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const override; + void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const override; }; } diff --git a/src/Storages/System/StorageSystemSettings.cpp b/src/Storages/System/StorageSystemSettings.cpp index 43877582af6..b437108b00e 100644 --- a/src/Storages/System/StorageSystemSettings.cpp +++ b/src/Storages/System/StorageSystemSettings.cpp @@ -34,7 +34,7 @@ ColumnsDescription StorageSystemSettings::getColumnsDescription() }; } -void StorageSystemSettings::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const +void StorageSystemSettings::fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const { const Settings & settings = context->getSettingsRef(); auto constraints_and_current_profiles = context->getSettingsConstraintsAndCurrentProfiles(); diff --git a/src/Storages/System/StorageSystemSettings.h b/src/Storages/System/StorageSystemSettings.h index 6749f9b20a4..fae0d69ada8 100644 --- a/src/Storages/System/StorageSystemSettings.h +++ b/src/Storages/System/StorageSystemSettings.h @@ -11,7 +11,7 @@ class Context; /** implements system table "settings", which allows to get information about the current settings. */ -class StorageSystemSettings final : public IStorageSystemOneBlock +class StorageSystemSettings final : public IStorageSystemOneBlock { public: std::string getName() const override { return "SystemSettings"; } @@ -21,7 +21,7 @@ public: protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const override; + void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const override; }; } diff --git a/src/Storages/System/StorageSystemSettingsChanges.cpp b/src/Storages/System/StorageSystemSettingsChanges.cpp index b1942ea9ac6..de47ec52031 100644 --- a/src/Storages/System/StorageSystemSettingsChanges.cpp +++ b/src/Storages/System/StorageSystemSettingsChanges.cpp @@ -12,7 +12,7 @@ ColumnsDescription StorageSystemSettingsChanges::getColumnsDescription() /// TODO: Fill in all the comments return ColumnsDescription { - {"version", std::make_shared()}, + {"version", std::make_shared(), "The ClickHouse server version."}, {"changes", std::make_shared(std::make_shared( DataTypes{ @@ -20,11 +20,11 @@ ColumnsDescription StorageSystemSettingsChanges::getColumnsDescription() std::make_shared(), std::make_shared(), std::make_shared()}, - Names{"name", "previous_value", "new_value", "reason"}))}, + Names{"name", "previous_value", "new_value", "reason"})), "The list of changes in settings which changed the behaviour of ClickHouse."}, }; } -void StorageSystemSettingsChanges::fillData(MutableColumns & res_columns, ContextPtr, const SelectQueryInfo &) const +void StorageSystemSettingsChanges::fillData(MutableColumns & res_columns, ContextPtr, const ActionsDAG::Node *, std::vector) const { for (auto it = settings_changes_history.rbegin(); it != settings_changes_history.rend(); ++it) { diff --git a/src/Storages/System/StorageSystemSettingsChanges.h b/src/Storages/System/StorageSystemSettingsChanges.h index 3a1a8ce23d1..9d8899797fe 100644 --- a/src/Storages/System/StorageSystemSettingsChanges.h +++ b/src/Storages/System/StorageSystemSettingsChanges.h @@ -12,7 +12,7 @@ class Context; /** Implements system table "settings_changes", which allows to get information * about the settings changes through different ClickHouse versions. */ -class StorageSystemSettingsChanges final : public IStorageSystemOneBlock +class StorageSystemSettingsChanges final : public IStorageSystemOneBlock { public: std::string getName() const override { return "SystemSettingsChanges"; } @@ -22,7 +22,7 @@ public: protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const override; + void fillData(MutableColumns & res_columns, ContextPtr, const ActionsDAG::Node *, std::vector) const override; }; } diff --git a/src/Storages/System/StorageSystemSettingsProfileElements.cpp b/src/Storages/System/StorageSystemSettingsProfileElements.cpp index a530bd2c1b3..2af3e6dfd05 100644 --- a/src/Storages/System/StorageSystemSettingsProfileElements.cpp +++ b/src/Storages/System/StorageSystemSettingsProfileElements.cpp @@ -42,7 +42,7 @@ ColumnsDescription StorageSystemSettingsProfileElements::getColumnsDescription() {"value", std::make_shared(std::make_shared()), "Setting value."}, {"min", std::make_shared(std::make_shared()), "The minimum value of the setting. NULL if not set."}, {"max", std::make_shared(std::make_shared()), "The maximum value of the setting. NULL if not set."}, - {"writability", std::make_shared(std::make_shared(getSettingConstraintWritabilityEnumValues()))}, + {"writability", std::make_shared(std::make_shared(getSettingConstraintWritabilityEnumValues())), "The property which shows whether a setting can be changed or not."}, {"inherit_profile", std::make_shared(std::make_shared()), "A parent profile for this setting profile. NULL if not set. " "Setting profile will inherit all the settings' values and constraints (min, max, readonly) from its parent profiles." @@ -51,7 +51,7 @@ ColumnsDescription StorageSystemSettingsProfileElements::getColumnsDescription() } -void StorageSystemSettingsProfileElements::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const +void StorageSystemSettingsProfileElements::fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const { /// If "select_from_system_db_requires_grant" is enabled the access rights were already checked in InterpreterSelectQuery. const auto & access_control = context->getAccessControl(); diff --git a/src/Storages/System/StorageSystemSettingsProfileElements.h b/src/Storages/System/StorageSystemSettingsProfileElements.h index 1dedd616c82..8b08c463071 100644 --- a/src/Storages/System/StorageSystemSettingsProfileElements.h +++ b/src/Storages/System/StorageSystemSettingsProfileElements.h @@ -8,7 +8,7 @@ namespace DB class Context; /// Implements `settings_profile_elements` system table, which allows you to get information about elements of settings profiles. -class StorageSystemSettingsProfileElements final : public IStorageSystemOneBlock +class StorageSystemSettingsProfileElements final : public IStorageSystemOneBlock { public: std::string getName() const override { return "SystemSettingsProfileElements"; } @@ -16,7 +16,7 @@ public: protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const override; + void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const override; }; } diff --git a/src/Storages/System/StorageSystemSettingsProfiles.cpp b/src/Storages/System/StorageSystemSettingsProfiles.cpp index 01041bee445..795152e31f3 100644 --- a/src/Storages/System/StorageSystemSettingsProfiles.cpp +++ b/src/Storages/System/StorageSystemSettingsProfiles.cpp @@ -36,7 +36,7 @@ ColumnsDescription StorageSystemSettingsProfiles::getColumnsDescription() } -void StorageSystemSettingsProfiles::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const +void StorageSystemSettingsProfiles::fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const { /// If "select_from_system_db_requires_grant" is enabled the access rights were already checked in InterpreterSelectQuery. const auto & access_control = context->getAccessControl(); diff --git a/src/Storages/System/StorageSystemSettingsProfiles.h b/src/Storages/System/StorageSystemSettingsProfiles.h index b0c8fc8658c..056666ae4c7 100644 --- a/src/Storages/System/StorageSystemSettingsProfiles.h +++ b/src/Storages/System/StorageSystemSettingsProfiles.h @@ -8,7 +8,7 @@ namespace DB class Context; /// Implements `settings_profiles` system table, which allows you to get information about profiles. -class StorageSystemSettingsProfiles final : public IStorageSystemOneBlock +class StorageSystemSettingsProfiles final : public IStorageSystemOneBlock { public: std::string getName() const override { return "SystemSettingsProfiles"; } @@ -19,7 +19,7 @@ public: protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const override; + void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const override; }; } diff --git a/src/Storages/System/StorageSystemStackTrace.cpp b/src/Storages/System/StorageSystemStackTrace.cpp index 82a5fd4e33f..ba7433fb9ae 100644 --- a/src/Storages/System/StorageSystemStackTrace.cpp +++ b/src/Storages/System/StorageSystemStackTrace.cpp @@ -1,6 +1,5 @@ #ifdef OS_LINUX /// Because of 'rt_tgsigqueueinfo' functions and RT signals. -#include #include #include @@ -168,7 +167,7 @@ bool wait(int timeout_ms) continue; /// Drain delayed notifications. } - throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: read wrong number of bytes from pipe"); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Read wrong number of bytes from pipe"); } } @@ -274,15 +273,21 @@ bool isSignalBlocked(UInt64 tid, int signal) class StackTraceSource : public ISource { public: - StackTraceSource(const Names & column_names, Block header_, ASTPtr && query_, ActionsDAGPtr && filter_dag_, ContextPtr context_, UInt64 max_block_size_, LoggerPtr log_) + StackTraceSource( + const Names & column_names, + Block header_, + ActionsDAGPtr && filter_dag_, + ContextPtr context_, + UInt64 max_block_size_, + LoggerPtr log_) : ISource(header_) , context(context_) , header(std::move(header_)) - , query(std::move(query_)) , filter_dag(std::move(filter_dag_)) , predicate(filter_dag ? filter_dag->getOutputs().at(0) : nullptr) , max_block_size(max_block_size_) - , pipe_read_timeout_ms(static_cast(context->getSettingsRef().storage_system_stack_trace_pipe_read_timeout_ms.totalMilliseconds())) + , pipe_read_timeout_ms( + static_cast(context->getSettingsRef().storage_system_stack_trace_pipe_read_timeout_ms.totalMilliseconds())) , log(log_) , proc_it("/proc/self/task") /// It shouldn't be possible to do concurrent reads from this table. @@ -417,7 +422,6 @@ protected: private: ContextPtr context; Block header; - const ASTPtr query; const ActionsDAGPtr filter_dag; const ActionsDAG::Node * predicate; @@ -463,11 +467,9 @@ public: void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override { - auto filter_actions_dag = ActionsDAG::buildFilterActionsDAG(filter_nodes.nodes); Pipe pipe(std::make_shared( column_names, getOutputStream().header, - std::move(query), std::move(filter_actions_dag), context, max_block_size, @@ -477,15 +479,14 @@ public: ReadFromSystemStackTrace( const Names & column_names_, + const SelectQueryInfo & query_info_, + const StorageSnapshotPtr & storage_snapshot_, + const ContextPtr & context_, Block sample_block, - ASTPtr && query_, - ContextPtr context_, size_t max_block_size_, LoggerPtr log_) - : SourceStepWithFilter(DataStream{.header = std::move(sample_block)}) + : SourceStepWithFilter(DataStream{.header = std::move(sample_block)}, column_names_, query_info_, storage_snapshot_, context_) , column_names(column_names_) - , query(query_) - , context(std::move(context_)) , max_block_size(max_block_size_) , log(log_) { @@ -493,8 +494,6 @@ public: private: Names column_names; - ASTPtr query; - ContextPtr context; size_t max_block_size; LoggerPtr log; }; @@ -508,11 +507,11 @@ StorageSystemStackTrace::StorageSystemStackTrace(const StorageID & table_id_) { StorageInMemoryMetadata storage_metadata; storage_metadata.setColumns(ColumnsDescription({ - { "thread_name", std::make_shared() }, - { "thread_id", std::make_shared() }, - { "query_id", std::make_shared() }, - { "trace", std::make_shared(std::make_shared()) }, - }, { /* aliases */ })); + {"thread_name", std::make_shared(), "The name of the thread."}, + {"thread_id", std::make_shared(), "The thread identifier"}, + {"query_id", std::make_shared(), "The ID of the query this thread belongs to."}, + {"trace", std::make_shared(std::make_shared()), "The stacktrace of this thread. Basically just an array of addresses."}, + })); setInMemoryMetadata(storage_metadata); notification_pipe.open(); @@ -548,12 +547,7 @@ void StorageSystemStackTrace::read( Block sample_block = storage_snapshot->metadata->getSampleBlock(); auto reading = std::make_unique( - column_names, - sample_block, - query_info.query->clone(), - context, - max_block_size, - log); + column_names, query_info, storage_snapshot, context, sample_block, max_block_size, log); query_plan.addStep(std::move(reading)); } diff --git a/src/Storages/System/StorageSystemStoragePolicies.cpp b/src/Storages/System/StorageSystemStoragePolicies.cpp index 39e99884e1d..21251136f7d 100644 --- a/src/Storages/System/StorageSystemStoragePolicies.cpp +++ b/src/Storages/System/StorageSystemStoragePolicies.cpp @@ -37,16 +37,16 @@ StorageSystemStoragePolicies::StorageSystemStoragePolicies(const StorageID & tab StorageInMemoryMetadata storage_metadata; storage_metadata.setColumns( ColumnsDescription({ - {"policy_name", std::make_shared()}, - {"volume_name", std::make_shared()}, - {"volume_priority", std::make_shared()}, - {"disks", std::make_shared(std::make_shared())}, - {"volume_type", std::make_shared(getTypeEnumValues())}, - {"max_data_part_size", std::make_shared()}, - {"move_factor", std::make_shared()}, - {"prefer_not_to_merge", std::make_shared()}, - {"perform_ttl_move_on_insert", std::make_shared()}, - {"load_balancing", std::make_shared(getTypeEnumValues())} + {"policy_name", std::make_shared(), "The name of the storage policy."}, + {"volume_name", std::make_shared(), "The name of the volume."}, + {"volume_priority", std::make_shared(), "The priority of the volume."}, + {"disks", std::make_shared(std::make_shared()), "The list of all disks names which are a part of this storage policy."}, + {"volume_type", std::make_shared(getTypeEnumValues()), "The type of the volume - JBOD or a single disk."}, + {"max_data_part_size", std::make_shared(), "the maximum size of a part that can be stored on any of the volumes disks."}, + {"move_factor", std::make_shared(), "When the amount of available space gets lower than this factor, data automatically starts to move on the next volume if any (by default, 0.1)."}, + {"prefer_not_to_merge", std::make_shared(), "You should not use this setting. Disables merging of data parts on this volume (this is harmful and leads to performance degradation)."}, + {"perform_ttl_move_on_insert", std::make_shared(), "Disables TTL move on data part INSERT. By default (if enabled) if we insert a data part that already expired by the TTL move rule it immediately goes to a volume/disk declared in move rule."}, + {"load_balancing", std::make_shared(getTypeEnumValues()), "Policy for disk balancing, `round_robin` or `least_used`."} })); // TODO: Add string column with custom volume-type-specific options setInMemoryMetadata(storage_metadata); diff --git a/src/Storages/System/StorageSystemSymbols.cpp b/src/Storages/System/StorageSystemSymbols.cpp index 56195544448..6b313407e61 100644 --- a/src/Storages/System/StorageSystemSymbols.cpp +++ b/src/Storages/System/StorageSystemSymbols.cpp @@ -22,9 +22,9 @@ StorageSystemSymbols::StorageSystemSymbols(const StorageID & table_id_) StorageInMemoryMetadata storage_metadata; storage_metadata.setColumns(ColumnsDescription( { - {"symbol", std::make_shared()}, - {"address_begin", std::make_shared()}, - {"address_end", std::make_shared()}, + {"symbol", std::make_shared(), "Symbol name in the binary. It is mangled. You can apply demangle(symbol) to obtain a readable name."}, + {"address_begin", std::make_shared(), "Start address of the symbol in the binary."}, + {"address_end", std::make_shared(), "End address of the symbol in the binary."}, })); setInMemoryMetadata(storage_metadata); } diff --git a/src/Storages/System/StorageSystemTableEngines.cpp b/src/Storages/System/StorageSystemTableEngines.cpp index c0cf95423d9..e27546aa2a4 100644 --- a/src/Storages/System/StorageSystemTableEngines.cpp +++ b/src/Storages/System/StorageSystemTableEngines.cpp @@ -26,7 +26,7 @@ ColumnsDescription StorageSystemTableEngines::getColumnsDescription() }; } -void StorageSystemTableEngines::fillData(MutableColumns & res_columns, ContextPtr, const SelectQueryInfo &) const +void StorageSystemTableEngines::fillData(MutableColumns & res_columns, ContextPtr, const ActionsDAG::Node *, std::vector) const { for (const auto & pair : StorageFactory::instance().getAllStorages()) { diff --git a/src/Storages/System/StorageSystemTableEngines.h b/src/Storages/System/StorageSystemTableEngines.h index 258b9d210b1..d7af471bb2d 100644 --- a/src/Storages/System/StorageSystemTableEngines.h +++ b/src/Storages/System/StorageSystemTableEngines.h @@ -6,10 +6,10 @@ namespace DB { -class StorageSystemTableEngines final : public IStorageSystemOneBlock +class StorageSystemTableEngines final : public IStorageSystemOneBlock { protected: - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const override; + void fillData(MutableColumns & res_columns, ContextPtr, const ActionsDAG::Node *, std::vector) const override; using IStorageSystemOneBlock::IStorageSystemOneBlock; diff --git a/src/Storages/System/StorageSystemTableFunctions.cpp b/src/Storages/System/StorageSystemTableFunctions.cpp index 9fb8e11e4d1..94b7d73a67e 100644 --- a/src/Storages/System/StorageSystemTableFunctions.cpp +++ b/src/Storages/System/StorageSystemTableFunctions.cpp @@ -20,7 +20,7 @@ ColumnsDescription StorageSystemTableFunctions::getColumnsDescription() }; } -void StorageSystemTableFunctions::fillData(MutableColumns & res_columns, ContextPtr, const SelectQueryInfo &) const +void StorageSystemTableFunctions::fillData(MutableColumns & res_columns, ContextPtr, const ActionsDAG::Node *, std::vector) const { const auto & factory = TableFunctionFactory::instance(); const auto & functions_names = factory.getAllRegisteredNames(); diff --git a/src/Storages/System/StorageSystemTableFunctions.h b/src/Storages/System/StorageSystemTableFunctions.h index 804c3b51940..07ef8857135 100644 --- a/src/Storages/System/StorageSystemTableFunctions.h +++ b/src/Storages/System/StorageSystemTableFunctions.h @@ -6,12 +6,12 @@ namespace DB { -class StorageSystemTableFunctions final : public IStorageSystemOneBlock +class StorageSystemTableFunctions final : public IStorageSystemOneBlock { protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const override; + void fillData(MutableColumns & res_columns, ContextPtr, const ActionsDAG::Node *, std::vector) const override; public: std::string getName() const override diff --git a/src/Storages/System/StorageSystemTables.cpp b/src/Storages/System/StorageSystemTables.cpp index e0d2dd03e78..639c1455b83 100644 --- a/src/Storages/System/StorageSystemTables.cpp +++ b/src/Storages/System/StorageSystemTables.cpp @@ -43,6 +43,7 @@ StorageSystemTables::StorageSystemTables(const StorageID & table_id_) {"data_paths", std::make_shared(std::make_shared()), "Paths to the table data in the file systems."}, {"metadata_path", std::make_shared(), "Path to the table metadata in the file system."}, {"metadata_modification_time", std::make_shared(), "Time of latest modification of the table metadata."}, + {"metadata_version", std::make_shared(), "Metadata version for ReplicatedMergeTree table, 0 for non ReplicatedMergeTree table."}, {"dependencies_database", std::make_shared(std::make_shared()), "Database dependencies."}, {"dependencies_table", std::make_shared(std::make_shared()), "Table dependencies (materialized views the current table)."}, {"create_table_query", std::make_shared(), "The query that was used to create the table."}, @@ -66,9 +67,9 @@ StorageSystemTables::StorageSystemTables(const StorageID & table_id_) "Total number of uncompressed bytes, if it's possible to quickly determine the exact number " "of bytes from the part checksums for the table on storage, otherwise NULL (does not take underlying storage (if any) into account)." }, - {"parts", std::make_shared(std::make_shared())}, - {"active_parts", std::make_shared(std::make_shared())}, - {"total_marks", std::make_shared(std::make_shared())}, + {"parts", std::make_shared(std::make_shared()), "The total number of parts in this table."}, + {"active_parts", std::make_shared(std::make_shared()), "The number of active parts in this table."}, + {"total_marks", std::make_shared(std::make_shared()), "The total number of marks in all parts in this table."}, {"lifetime_rows", std::make_shared(std::make_shared()), "Total number of rows INSERTed since server start (only for Buffer tables)." }, @@ -287,6 +288,11 @@ protected: if (columns_mask[src_index++]) res_columns[res_index++]->insertDefault(); + // metadata_version + // Temporary tables does not support replication + if (columns_mask[src_index++]) + res_columns[res_index++]->insertDefault(); + // dependencies_database if (columns_mask[src_index++]) res_columns[res_index++]->insertDefault(); @@ -311,7 +317,7 @@ protected: while (src_index < columns_mask.size()) { // total_rows - if (src_index == 18 && columns_mask[src_index]) + if (src_index == 19 && columns_mask[src_index]) { if (auto total_rows = table.second->totalRows(settings)) res_columns[res_index++]->insert(*total_rows); @@ -319,7 +325,7 @@ protected: res_columns[res_index++]->insertDefault(); } // total_bytes - else if (src_index == 19 && columns_mask[src_index]) + else if (src_index == 20 && columns_mask[src_index]) { if (auto total_bytes = table.second->totalBytes(settings)) res_columns[res_index++]->insert(*total_bytes); @@ -418,6 +424,18 @@ protected: if (columns_mask[src_index++]) res_columns[res_index++]->insert(static_cast(database->getObjectMetadataModificationTime(table_name))); + StorageMetadataPtr metadata_snapshot; + if (table) + metadata_snapshot = table->getInMemoryMetadataPtr(); + + if (columns_mask[src_index++]) + { + if (metadata_snapshot && table->supportsReplication()) + res_columns[res_index++]->insert(metadata_snapshot->metadata_version); + else + res_columns[res_index++]->insertDefault(); + } + { Array views_table_name_array; Array views_database_name_array; @@ -482,10 +500,6 @@ protected: else src_index += 3; - StorageMetadataPtr metadata_snapshot; - if (table) - metadata_snapshot = table->getInMemoryMetadataPtr(); - ASTPtr expression_ptr; if (columns_mask[src_index++]) { @@ -682,21 +696,27 @@ public: void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override; ReadFromSystemTables( + const Names & column_names_, + const SelectQueryInfo & query_info_, + const StorageSnapshotPtr & storage_snapshot_, + const ContextPtr & context_, Block sample_block, - ContextPtr context_, std::vector columns_mask_, size_t max_block_size_) - : SourceStepWithFilter(DataStream{.header = std::move(sample_block)}) - , context(std::move(context_)) + : SourceStepWithFilter( + DataStream{.header = std::move(sample_block)}, + column_names_, + query_info_, + storage_snapshot_, + context_) , columns_mask(std::move(columns_mask_)) , max_block_size(max_block_size_) { } - void applyFilters() override; + void applyFilters(ActionDAGNodes added_filter_nodes) override; private: - ContextPtr context; std::vector columns_mask; size_t max_block_size; @@ -708,7 +728,7 @@ void StorageSystemTables::read( QueryPlan & query_plan, const Names & column_names, const StorageSnapshotPtr & storage_snapshot, - SelectQueryInfo & /*query_info*/, + SelectQueryInfo & query_info, ContextPtr context, QueryProcessingStage::Enum /*processed_stage*/, const size_t max_block_size, @@ -720,17 +740,14 @@ void StorageSystemTables::read( auto [columns_mask, res_block] = getQueriedColumnsMaskAndHeader(sample_block, column_names); auto reading = std::make_unique( - std::move(res_block), - context, - std::move(columns_mask), - max_block_size); + column_names, query_info, storage_snapshot, context, std::move(res_block), std::move(columns_mask), max_block_size); query_plan.addStep(std::move(reading)); } -void ReadFromSystemTables::applyFilters() +void ReadFromSystemTables::applyFilters(ActionDAGNodes added_filter_nodes) { - auto filter_actions_dag = ActionsDAG::buildFilterActionsDAG(filter_nodes.nodes); + filter_actions_dag = ActionsDAG::buildFilterActionsDAG(added_filter_nodes.nodes); const ActionsDAG::Node * predicate = nullptr; if (filter_actions_dag) predicate = filter_actions_dag->getOutputs().at(0); diff --git a/src/Storages/System/StorageSystemTimeZones.cpp b/src/Storages/System/StorageSystemTimeZones.cpp index 14f4ce0f5de..af997c6423f 100644 --- a/src/Storages/System/StorageSystemTimeZones.cpp +++ b/src/Storages/System/StorageSystemTimeZones.cpp @@ -16,7 +16,7 @@ ColumnsDescription StorageSystemTimeZones::getColumnsDescription() }; } -void StorageSystemTimeZones::fillData(MutableColumns & res_columns, ContextPtr, const SelectQueryInfo &) const +void StorageSystemTimeZones::fillData(MutableColumns & res_columns, ContextPtr, const ActionsDAG::Node *, std::vector) const { for (auto * it = auto_time_zones; *it; ++it) res_columns[0]->insert(String(*it)); diff --git a/src/Storages/System/StorageSystemTimeZones.h b/src/Storages/System/StorageSystemTimeZones.h index f3743a1ef09..160e8854e3e 100644 --- a/src/Storages/System/StorageSystemTimeZones.h +++ b/src/Storages/System/StorageSystemTimeZones.h @@ -10,10 +10,10 @@ class Context; /** System table "time_zones" with list of timezones pulled from /contrib/cctz/testdata/zoneinfo */ -class StorageSystemTimeZones final : public IStorageSystemOneBlock +class StorageSystemTimeZones final : public IStorageSystemOneBlock { public: - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const override; + void fillData(MutableColumns & res_columns, ContextPtr, const ActionsDAG::Node *, std::vector) const override; using IStorageSystemOneBlock::IStorageSystemOneBlock; diff --git a/src/Storages/System/StorageSystemTransactions.cpp b/src/Storages/System/StorageSystemTransactions.cpp index 47e44688c14..295d93edf7d 100644 --- a/src/Storages/System/StorageSystemTransactions.cpp +++ b/src/Storages/System/StorageSystemTransactions.cpp @@ -23,18 +23,17 @@ static DataTypePtr getStateEnumType() ColumnsDescription StorageSystemTransactions::getColumnsDescription() { - /// TODO: Fill in all the comments. return ColumnsDescription { - {"tid", getTransactionIDDataType()}, - {"tid_hash", std::make_shared()}, - {"elapsed", std::make_shared()}, - {"is_readonly", std::make_shared()}, - {"state", getStateEnumType()}, + {"tid", getTransactionIDDataType(), "The identifier of the transaction."}, + {"tid_hash", std::make_shared(), "The hash of the identifier."}, + {"elapsed", std::make_shared(), "The amount of time the transaction being processed."}, + {"is_readonly", std::make_shared(), "The flag which shows whether the transaction has executed any write operation."}, + {"state", getStateEnumType(), "The state of the transaction. Possible values: RUNNING, COMMITTING, COMMITTED, ROLLED_BACK."}, }; } -void StorageSystemTransactions::fillData(MutableColumns & res_columns, ContextPtr, const SelectQueryInfo &) const +void StorageSystemTransactions::fillData(MutableColumns & res_columns, ContextPtr, const ActionsDAG::Node *, std::vector) const { auto list = TransactionLog::instance().getTransactionsList(); for (const auto & elem : list) diff --git a/src/Storages/System/StorageSystemTransactions.h b/src/Storages/System/StorageSystemTransactions.h index b5a538b7b55..1c400619c1a 100644 --- a/src/Storages/System/StorageSystemTransactions.h +++ b/src/Storages/System/StorageSystemTransactions.h @@ -8,7 +8,7 @@ namespace DB class Context; -class StorageSystemTransactions final : public IStorageSystemOneBlock +class StorageSystemTransactions final : public IStorageSystemOneBlock { public: String getName() const override { return "SystemTransactions"; } @@ -18,7 +18,7 @@ public: protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const override; + void fillData(MutableColumns & res_columns, ContextPtr, const ActionsDAG::Node *, std::vector) const override; }; } diff --git a/src/Storages/System/StorageSystemUserDirectories.cpp b/src/Storages/System/StorageSystemUserDirectories.cpp index 620c9746a4a..1b3469e7597 100644 --- a/src/Storages/System/StorageSystemUserDirectories.cpp +++ b/src/Storages/System/StorageSystemUserDirectories.cpp @@ -11,18 +11,17 @@ namespace DB { ColumnsDescription StorageSystemUserDirectories::getColumnsDescription() { - /// TODO: Fill in all the comments. return ColumnsDescription { - {"name", std::make_shared()}, - {"type", std::make_shared()}, - {"params", std::make_shared()}, - {"precedence", std::make_shared()}, + {"name", std::make_shared(), "The name of the directory."}, + {"type", std::make_shared(), "The type of the access storage e.g. users.xml or replicated or memory etc."}, + {"params", std::make_shared(), "JSON with the parameters of the access storage."}, + {"precedence", std::make_shared(), "The order in which this directory is declared in the config. The same order is used when ClickHouse tries to find a user or role."}, }; } -void StorageSystemUserDirectories::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const +void StorageSystemUserDirectories::fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const { const auto & access_control = context->getAccessControl(); auto storages = access_control.getStorages(); diff --git a/src/Storages/System/StorageSystemUserDirectories.h b/src/Storages/System/StorageSystemUserDirectories.h index bca6a9b5aa6..3cdaa877b81 100644 --- a/src/Storages/System/StorageSystemUserDirectories.h +++ b/src/Storages/System/StorageSystemUserDirectories.h @@ -8,7 +8,7 @@ namespace DB class Context; /// Implements `users_directories` system table, which allows you to get information about user directories. -class StorageSystemUserDirectories final : public IStorageSystemOneBlock +class StorageSystemUserDirectories final : public IStorageSystemOneBlock { public: std::string getName() const override { return "SystemUserDirectories"; } @@ -16,7 +16,7 @@ public: protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const override; + void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const override; }; } diff --git a/src/Storages/System/StorageSystemUserProcesses.cpp b/src/Storages/System/StorageSystemUserProcesses.cpp index 4fbbd7ab54d..65fbeedf406 100644 --- a/src/Storages/System/StorageSystemUserProcesses.cpp +++ b/src/Storages/System/StorageSystemUserProcesses.cpp @@ -18,10 +18,10 @@ ColumnsDescription StorageSystemUserProcesses::getColumnsDescription() { auto description = ColumnsDescription { - {"user", std::make_shared()}, - {"memory_usage", std::make_shared()}, - {"peak_memory_usage", std::make_shared()}, - {"ProfileEvents", std::make_shared(std::make_shared(), std::make_shared())}, + {"user", std::make_shared(), "User name."}, + {"memory_usage", std::make_shared(), "Sum of RAM used by all processes of the user. It might not include some types of dedicated memory. See the max_memory_usage setting."}, + {"peak_memory_usage", std::make_shared(), "The peak of memory usage of the user. It can be reset when no queries are run for the user."}, + {"ProfileEvents", std::make_shared(std::make_shared(), std::make_shared()), "Summary of ProfileEvents that measure different metrics for the user. The description of them could be found in the table system.events"}, }; description.setAliases({ @@ -32,7 +32,7 @@ ColumnsDescription StorageSystemUserProcesses::getColumnsDescription() return description; } -void StorageSystemUserProcesses::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const +void StorageSystemUserProcesses::fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const { const auto user_info = context->getProcessList().getUserInfo(true); diff --git a/src/Storages/System/StorageSystemUserProcesses.h b/src/Storages/System/StorageSystemUserProcesses.h index 6eb12e30559..3141eae9662 100644 --- a/src/Storages/System/StorageSystemUserProcesses.h +++ b/src/Storages/System/StorageSystemUserProcesses.h @@ -11,7 +11,7 @@ class Context; /** Implements `processes` system table, which allows you to get information about the queries that are currently executing. */ -class StorageSystemUserProcesses final : public IStorageSystemOneBlock +class StorageSystemUserProcesses final : public IStorageSystemOneBlock { public: std::string getName() const override { return "SystemUserProcesses"; } @@ -21,7 +21,7 @@ public: protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const override; + void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const override; }; } diff --git a/src/Storages/System/StorageSystemUsers.cpp b/src/Storages/System/StorageSystemUsers.cpp index cf114a85645..0c34f04844d 100644 --- a/src/Storages/System/StorageSystemUsers.cpp +++ b/src/Storages/System/StorageSystemUsers.cpp @@ -69,15 +69,15 @@ ColumnsDescription StorageSystemUsers::getColumnsDescription() {"default_roles_except", std::make_shared(std::make_shared()), "All the granted roles set as default excepting of the listed ones." }, - {"grantees_any", std::make_shared()}, - {"grantees_list", std::make_shared(std::make_shared())}, - {"grantees_except", std::make_shared(std::make_shared())}, - {"default_database", std::make_shared()}, + {"grantees_any", std::make_shared(), "The flag that indicates whether a user with any grant option can grant it to anyone."}, + {"grantees_list", std::make_shared(std::make_shared()), "The list of users or roles to which this user is allowed to grant options to."}, + {"grantees_except", std::make_shared(std::make_shared()), "The list of users or roles to which this user is forbidden from grant options to."}, + {"default_database", std::make_shared(), "The name of the default database for this user."}, }; } -void StorageSystemUsers::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const +void StorageSystemUsers::fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const { /// If "select_from_system_db_requires_grant" is enabled the access rights were already checked in InterpreterSelectQuery. const auto & access_control = context->getAccessControl(); diff --git a/src/Storages/System/StorageSystemUsers.h b/src/Storages/System/StorageSystemUsers.h index cfa5947d370..a1a3d717b35 100644 --- a/src/Storages/System/StorageSystemUsers.h +++ b/src/Storages/System/StorageSystemUsers.h @@ -8,7 +8,7 @@ namespace DB class Context; /// Implements `users` system table, which allows you to get information about users. -class StorageSystemUsers final : public IStorageSystemOneBlock +class StorageSystemUsers final : public IStorageSystemOneBlock { public: std::string getName() const override { return "SystemUsers"; } @@ -19,7 +19,7 @@ public: protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const override; + void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const override; }; } diff --git a/src/Storages/System/StorageSystemViewRefreshes.cpp b/src/Storages/System/StorageSystemViewRefreshes.cpp index d2b933e65a8..30539ed6b6a 100644 --- a/src/Storages/System/StorageSystemViewRefreshes.cpp +++ b/src/Storages/System/StorageSystemViewRefreshes.cpp @@ -36,20 +36,20 @@ ColumnsDescription StorageSystemViewRefreshes::getColumnsDescription() }, {"refresh_count", std::make_shared(), "Number of successful refreshes since last server restart or table creation."}, {"progress", std::make_shared(), "Progress of the current refresh, between 0 and 1."}, - {"elapsed", std::make_shared()}, - {"read_rows", std::make_shared(), "Number of rows read by the current refresh so far."}, - {"read_bytes", std::make_shared()}, + {"elapsed", std::make_shared(), "The amount of nanoseconds the current refresh took."}, + {"read_rows", std::make_shared(), "Number of rows read during the current refresh."}, + {"read_bytes", std::make_shared(), "Number of bytes read during the current refresh."}, {"total_rows", std::make_shared(), "Estimated total number of rows that need to be read by the current refresh."}, - {"total_bytes", std::make_shared()}, - {"written_rows", std::make_shared()}, - {"written_bytes", std::make_shared()}, - {"result_rows", std::make_shared()}, - {"result_bytes", std::make_shared()}, + {"total_bytes", std::make_shared(), "Estimated total number of bytes that need to be read by the current refresh."}, + {"written_rows", std::make_shared(), "Number of rows written during the current refresh."}, + {"written_bytes", std::make_shared(), "Number rof bytes written during the current refresh."}, + {"result_rows", std::make_shared(), "Estimated total number of rows in the result set of the SELECT query."}, + {"result_bytes", std::make_shared(), "Estimated total number of bytes in the result set of the SELECT query."}, }; } void StorageSystemViewRefreshes::fillData( - MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const + MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const { auto access = context->getAccess(); auto valid_access = AccessType::SHOW_TABLES; diff --git a/src/Storages/System/StorageSystemViewRefreshes.h b/src/Storages/System/StorageSystemViewRefreshes.h index 02d3a39dfff..5a29f3a3bc8 100644 --- a/src/Storages/System/StorageSystemViewRefreshes.h +++ b/src/Storages/System/StorageSystemViewRefreshes.h @@ -10,7 +10,7 @@ namespace DB { -class StorageSystemViewRefreshes final : public IStorageSystemOneBlock +class StorageSystemViewRefreshes final : public IStorageSystemOneBlock { public: std::string getName() const override { return "SystemViewRefreshes"; } @@ -20,7 +20,7 @@ public: protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const override; + void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const override; }; } diff --git a/src/Storages/System/StorageSystemWarnings.cpp b/src/Storages/System/StorageSystemWarnings.cpp index e2579372b4d..01e96e980a8 100644 --- a/src/Storages/System/StorageSystemWarnings.cpp +++ b/src/Storages/System/StorageSystemWarnings.cpp @@ -1,5 +1,7 @@ #include +#include #include +#include namespace DB @@ -13,7 +15,7 @@ ColumnsDescription StorageSystemWarnings::getColumnsDescription() }; } -void StorageSystemWarnings::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const +void StorageSystemWarnings::fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const { for (const auto & warning : context->getWarnings()) res_columns[0]->insert(warning); diff --git a/src/Storages/System/StorageSystemWarnings.h b/src/Storages/System/StorageSystemWarnings.h index 42948a765ea..685fb60b430 100644 --- a/src/Storages/System/StorageSystemWarnings.h +++ b/src/Storages/System/StorageSystemWarnings.h @@ -11,7 +11,7 @@ class Context; /** Implements system.warnings table that contains warnings about server configuration * to be displayed in clickhouse-client. */ -class StorageSystemWarnings final : public IStorageSystemOneBlock +class StorageSystemWarnings final : public IStorageSystemOneBlock { public: std::string getName() const override { return "SystemWarnings"; } @@ -21,6 +21,6 @@ public: protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; - void fillData(MutableColumns & res_columns, ContextPtr, const SelectQueryInfo &) const override; + void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const override; }; } diff --git a/src/Storages/System/StorageSystemZooKeeper.cpp b/src/Storages/System/StorageSystemZooKeeper.cpp index abf93bf1ac0..d1bf86ba8ef 100644 --- a/src/Storages/System/StorageSystemZooKeeper.cpp +++ b/src/Storages/System/StorageSystemZooKeeper.cpp @@ -32,7 +32,6 @@ #include #include #include -#include namespace DB @@ -169,29 +168,34 @@ public: /// Type of path to be fetched enum class ZkPathType { - Exact, /// Fetch all nodes under this path - Prefix, /// Fetch all nodes starting with this prefix, recursively (multiple paths may match prefix) - Recurse, /// Fatch all nodes under this path, recursively + Exact, /// Fetch all nodes under this path + Prefix, /// Fetch all nodes starting with this prefix, recursively (multiple paths may match prefix) + Recurse, /// Fetch all nodes under this path, recursively }; -/// List of paths to be feched from zookeeper -using Paths = std::deque>; +/// List of paths to be fetched from zookeeper +using Paths = std::unordered_map; class ReadFromSystemZooKeeper final : public SourceStepWithFilter { public: - ReadFromSystemZooKeeper(const Block & header, SelectQueryInfo & query_info_, UInt64 max_block_size_, ContextPtr context_); + ReadFromSystemZooKeeper( + const Names & column_names_, + const SelectQueryInfo & query_info_, + const StorageSnapshotPtr & storage_snapshot_, + const ContextPtr & context_, + const Block & header, + UInt64 max_block_size_); String getName() const override { return "ReadFromSystemZooKeeper"; } void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings & settings) override; - void applyFilters() override; + void applyFilters(ActionDAGNodes added_filter_nodes) override; private: std::shared_ptr storage_limits; const UInt64 max_block_size; - ContextPtr context; Paths paths; }; @@ -222,20 +226,21 @@ private: ContextPtr context; ZooKeeperWithFaultInjection::Ptr zookeeper; bool started = false; + std::unordered_set visited; }; StorageSystemZooKeeper::StorageSystemZooKeeper(const StorageID & table_id_) : IStorage(table_id_) { - StorageInMemoryMetadata storage_metadata; - storage_metadata.setColumns(getColumnsDescription()); - setInMemoryMetadata(storage_metadata); + StorageInMemoryMetadata storage_metadata; + storage_metadata.setColumns(getColumnsDescription()); + setInMemoryMetadata(storage_metadata); } void StorageSystemZooKeeper::read( QueryPlan & query_plan, - const Names & /*column_names*/, + const Names & column_names, const StorageSnapshotPtr & storage_snapshot, SelectQueryInfo & query_info, ContextPtr context, @@ -243,8 +248,14 @@ void StorageSystemZooKeeper::read( size_t max_block_size, size_t /*num_streams*/) { - auto header = storage_snapshot->metadata->getSampleBlockWithVirtuals(getVirtuals()); - auto read_step = std::make_unique(header, query_info, max_block_size, context); + auto header = storage_snapshot->metadata->getSampleBlockWithVirtuals(getVirtualsList()); + auto read_step = std::make_unique( + column_names, + query_info, + storage_snapshot, + context, + header, + max_block_size); query_plan.addStep(std::move(read_step)); } @@ -365,7 +376,8 @@ static void extractPathImpl(const ActionsDAG::Node & node, Paths & res, ContextP size_t size = values->size(); for (size_t row = 0; row < size; ++row) - res.emplace_back(values->getDataAt(row).toString(), ZkPathType::Exact); + /// Only inserted if the key doesn't exists already + res.insert({values->getDataAt(row).toString(), ZkPathType::Exact}); } else if (function_name == "equals") { @@ -385,7 +397,8 @@ static void extractPathImpl(const ActionsDAG::Node & node, Paths & res, ContextP if (value->column->size() != 1) return; - res.emplace_back(value->column->getDataAt(0).toString(), ZkPathType::Exact); + /// Only inserted if the key doesn't exists already + res.insert({value->column->getDataAt(0).toString(), ZkPathType::Exact}); } else if (allow_unrestricted && function_name == "like") { @@ -404,7 +417,7 @@ static void extractPathImpl(const ActionsDAG::Node & node, Paths & res, ContextP String pattern = value->column->getDataAt(0).toString(); bool has_metasymbol = false; - String prefix; // pattern prefix before the first metasymbol occurrence + String prefix{}; // pattern prefix before the first metasymbol occurrence for (size_t i = 0; i < pattern.size(); i++) { char c = pattern[i]; @@ -430,7 +443,7 @@ static void extractPathImpl(const ActionsDAG::Node & node, Paths & res, ContextP prefix.append(1, c); } - res.emplace_back(prefix, has_metasymbol ? ZkPathType::Prefix : ZkPathType::Exact); + res.insert_or_assign(prefix, has_metasymbol ? ZkPathType::Prefix : ZkPathType::Exact); } } @@ -443,16 +456,26 @@ static Paths extractPath(const ActionsDAG::NodeRawConstPtrs & filter_nodes, Cont for (const auto * node : filter_nodes) extractPathImpl(*node, res, context, allow_unrestricted); + auto node1 = res.find("/"); + auto node2 = res.find(""); + if ((node1 != res.end() && node1->second != ZkPathType::Exact) || (node2 != res.end() && node2->second != ZkPathType::Exact)) + { + /// If we are already searching everything recursively, remove all other nodes + res.clear(); + res.insert({"/", ZkPathType::Recurse}); + } + if (res.empty() && allow_unrestricted) - res.emplace_back("/", ZkPathType::Recurse); + res.insert({"/", ZkPathType::Recurse}); return res; } -void ReadFromSystemZooKeeper::applyFilters() +void ReadFromSystemZooKeeper::applyFilters(ActionDAGNodes added_filter_nodes) { - paths = extractPath(getFilterNodes().nodes, context, context->getSettingsRef().allow_unrestricted_reads_from_keeper); + filter_actions_dag = ActionsDAG::buildFilterActionsDAG(added_filter_nodes.nodes); + paths = extractPath(added_filter_nodes.nodes, context, context->getSettingsRef().allow_unrestricted_reads_from_keeper); } @@ -510,7 +533,6 @@ Chunk SystemZooKeeperSource::generate() String path_part; }; std::vector list_tasks; - std::unordered_set added; while (!paths.empty()) { if (query_status) @@ -530,8 +552,9 @@ Chunk SystemZooKeeperSource::generate() std::vector paths_to_list; while (!paths.empty() && static_cast(list_tasks.size()) < max_inflight_requests) { - auto [path, path_type] = std::move(paths.front()); - paths.pop_front(); + auto node = paths.extract(paths.begin()); + auto & path = node.key(); + auto & path_type = node.mapped(); ListTask task; task.path = path; @@ -612,7 +635,7 @@ Chunk SystemZooKeeperSource::generate() // Deduplication String key = list_task.path_part + '/' + get_task.node; - if (auto [it, inserted] = added.emplace(key); !inserted) + if (auto [it, inserted] = visited.emplace(key); !inserted) continue; const Coordination::Stat & stat = res.stat; @@ -638,7 +661,7 @@ Chunk SystemZooKeeperSource::generate() if (list_task.path_type != ZkPathType::Exact && res.stat.numChildren > 0) { - paths.emplace_back(key, ZkPathType::Recurse); + paths.insert_or_assign(key, ZkPathType::Recurse); } } } @@ -646,11 +669,21 @@ Chunk SystemZooKeeperSource::generate() return Chunk(std::move(res_columns), row_count); } -ReadFromSystemZooKeeper::ReadFromSystemZooKeeper(const Block & header, SelectQueryInfo & query_info, UInt64 max_block_size_, ContextPtr context_) - : SourceStepWithFilter({.header = header}) +ReadFromSystemZooKeeper::ReadFromSystemZooKeeper( + const Names & column_names_, + const SelectQueryInfo & query_info_, + const StorageSnapshotPtr & storage_snapshot_, + const ContextPtr & context_, + const Block & header, + UInt64 max_block_size_) + : SourceStepWithFilter( + {.header = header}, + column_names_, + query_info_, + storage_snapshot_, + context_) , storage_limits(query_info.storage_limits) , max_block_size(max_block_size_) - , context(std::move(context_)) { } diff --git a/src/Storages/System/StorageSystemZooKeeperConnection.cpp b/src/Storages/System/StorageSystemZooKeeperConnection.cpp index c165bfa217d..8041370ee92 100644 --- a/src/Storages/System/StorageSystemZooKeeperConnection.cpp +++ b/src/Storages/System/StorageSystemZooKeeperConnection.cpp @@ -39,7 +39,7 @@ ColumnsDescription StorageSystemZooKeeperConnection::getColumnsDescription() } void StorageSystemZooKeeperConnection::fillData(MutableColumns & res_columns, ContextPtr context, - const SelectQueryInfo &) const + const ActionsDAG::Node *, std::vector) const { const auto add_enabled_feature_flags = [&](const auto & zookeeper) { diff --git a/src/Storages/System/StorageSystemZooKeeperConnection.h b/src/Storages/System/StorageSystemZooKeeperConnection.h index 2b6d3b2e516..f8263e1f1bc 100644 --- a/src/Storages/System/StorageSystemZooKeeperConnection.h +++ b/src/Storages/System/StorageSystemZooKeeperConnection.h @@ -11,7 +11,7 @@ class Context; /** Implements `zookeeper_connection` system table, which allows you to get information about the connected zookeeper info. */ -class StorageSystemZooKeeperConnection final : public IStorageSystemOneBlock +class StorageSystemZooKeeperConnection final : public IStorageSystemOneBlock { public: std::string getName() const override { return "SystemZooKeeperConnection"; } @@ -21,7 +21,7 @@ public: protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const override; + void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const override; }; } diff --git a/src/Storages/System/attachInformationSchemaTables.cpp b/src/Storages/System/attachInformationSchemaTables.cpp index bfe0f20fc92..3482867bbf7 100644 --- a/src/Storages/System/attachInformationSchemaTables.cpp +++ b/src/Storages/System/attachInformationSchemaTables.cpp @@ -35,8 +35,9 @@ static constexpr std::string_view schemata = R"( `DEFAULT_CHARACTER_SET_SCHEMA` Nullable(String), `DEFAULT_CHARACTER_SET_NAME` Nullable(String), `SQL_PATH` Nullable(String) - ) AS - SELECT + ) + SQL SECURITY INVOKER + AS SELECT name AS catalog_name, name AS schema_name, 'default' AS schema_owner, @@ -73,8 +74,9 @@ static constexpr std::string_view tables = R"( `DATA_LENGTH` Nullable(UInt64), `TABLE_COLLATION` Nullable(String), `TABLE_COMMENT` Nullable(String) - ) AS - SELECT + ) + SQL SECURITY INVOKER + AS SELECT database AS table_catalog, database AS table_schema, name AS table_name, @@ -122,8 +124,9 @@ static constexpr std::string_view views = R"( `IS_TRIGGER_UPDATABLE` Enum8('NO' = 0, 'YES' = 1), `IS_TRIGGER_DELETABLE` Enum8('NO' = 0, 'YES' = 1), `IS_TRIGGER_INSERTABLE_INTO` Enum8('NO' = 0, 'YES' = 1) - ) AS - SELECT + ) + SQL SECURITY INVOKER + AS SELECT database AS table_catalog, database AS table_schema, name AS table_name, @@ -203,8 +206,9 @@ static constexpr std::string_view columns = R"( `EXTRA` Nullable(String), `COLUMN_COMMENT` String, `COLUMN_TYPE` String - ) AS - SELECT + ) + SQL SECURITY INVOKER + AS SELECT database AS table_catalog, database AS table_schema, table AS table_name, @@ -291,8 +295,9 @@ static constexpr std::string_view key_column_usage = R"( `REFERENCED_TABLE_SCHEMA` Nullable(String), `REFERENCED_TABLE_NAME` Nullable(String), `REFERENCED_COLUMN_NAME` Nullable(String) - ) AS - SELECT + ) + SQL SECURITY INVOKER + AS SELECT 'def' AS constraint_catalog, database AS constraint_schema, 'PRIMARY' AS constraint_name, @@ -346,8 +351,9 @@ static constexpr std::string_view referential_constraints = R"( `DELETE_RULE` String, `TABLE_NAME` String, `REFERENCED_TABLE_NAME` String - ) AS - SELECT + ) + SQL SECURITY INVOKER + AS SELECT '' AS constraint_catalog, NULL AS constraint_name, '' AS constraint_schema, @@ -412,8 +418,9 @@ static constexpr std::string_view statistics = R"( `INDEX_COMMENT` String, `IS_VISIBLE` String, `EXPRESSION` Nullable(String) - ) AS - SELECT + ) + SQL SECURITY INVOKER + AS SELECT '' AS table_catalog, '' AS table_schema, '' AS table_name, @@ -479,13 +486,13 @@ static void createInformationSchemaView(ContextMutablePtr context, IDatabase & d ast_create.setDatabase(database.getDatabaseName()); StoragePtr view = createTableFromAST(ast_create, database.getDatabaseName(), - database.getTableDataPath(ast_create), context, true).second; + database.getTableDataPath(ast_create), context, LoadingStrictnessLevel::FORCE_RESTORE).second; database.createTable(context, ast_create.getTable(), view, ast); ASTPtr ast_upper = ast_create.clone(); auto & ast_create_upper = ast_upper->as(); ast_create_upper.setTable(Poco::toUpper(view_name)); StoragePtr view_upper = createTableFromAST(ast_create_upper, database.getDatabaseName(), - database.getTableDataPath(ast_create_upper), context, true).second; + database.getTableDataPath(ast_create_upper), context, LoadingStrictnessLevel::FORCE_RESTORE).second; database.createTable(context, ast_create_upper.getTable(), view_upper, ast_upper); diff --git a/src/Storages/System/attachSystemTables.cpp b/src/Storages/System/attachSystemTables.cpp index bf898f57833..79475b9695d 100644 --- a/src/Storages/System/attachSystemTables.cpp +++ b/src/Storages/System/attachSystemTables.cpp @@ -90,6 +90,7 @@ #include #include #include +#include #if defined(__ELF__) && !defined(OS_FREEBSD) #include @@ -117,14 +118,14 @@ namespace DB void attachSystemTablesServer(ContextPtr context, IDatabase & system_database, bool has_zookeeper) { - attach(context, system_database, "one", "This table contains a single row with a single dummy UInt8 column containing the value 0. Used when the table is not specified explicitly, for example in queries like `SELECT 1`."); - attach(context, system_database, "numbers", "Generates all natural numbers, starting from 0 (to 2^64 - 1, and then again) in sorted order.", false); - attach(context, system_database, "numbers_mt", "Multithreaded version of `system.numbers`. Numbers order is not guaranteed.", true); - attach(context, system_database, "zeros", "Produces unlimited number of non-materialized zeros.", false); - attach(context, system_database, "zeros_mt", "Multithreaded version of system.zeros.", true); + attachNoDescription(context, system_database, "one", "This table contains a single row with a single dummy UInt8 column containing the value 0. Used when the table is not specified explicitly, for example in queries like `SELECT 1`."); + attachNoDescription(context, system_database, "numbers", "Generates all natural numbers, starting from 0 (to 2^64 - 1, and then again) in sorted order.", false); + attachNoDescription(context, system_database, "numbers_mt", "Multithreaded version of `system.numbers`. Numbers order is not guaranteed.", true); + attachNoDescription(context, system_database, "zeros", "Produces unlimited number of non-materialized zeros.", false); + attachNoDescription(context, system_database, "zeros_mt", "Multithreaded version of system.zeros.", true); attach(context, system_database, "databases", "Lists all databases of the current server."); - attach(context, system_database, "tables", "Lists all tables of the current server."); - attach(context, system_database, "columns", "Lists all columns from all tables of the current server."); + attachNoDescription(context, system_database, "tables", "Lists all tables of the current server."); + attachNoDescription(context, system_database, "columns", "Lists all columns from all tables of the current server."); attach(context, system_database, "functions", "Contains a list of all available ordinary and aggregate functions with their descriptions."); attach(context, system_database, "events", "Contains profiling events and their current value."); attach(context, system_database, "settings", "Contains a list of all user-level settings (which can be modified in a scope of query or session), their current and default values along with descriptions."); @@ -158,43 +159,44 @@ void attachSystemTablesServer(ContextPtr context, IDatabase & system_database, b attach(context, system_database, "privileges", "Contains a list of all available privileges that could be granted to a user or role."); attach(context, system_database, "errors", "Contains a list of all errors which have ever happened including the error code, last time and message with unsymbolized stacktrace."); attach(context, system_database, "warnings", "Contains warnings about server configuration to be displayed by clickhouse-client right after it connects to the server."); - attach(context, system_database, "data_skipping_indices", "Contains all the information about all the data skipping indices in tables, similar to system.columns."); + attachNoDescription(context, system_database, "data_skipping_indices", "Contains all the information about all the data skipping indices in tables, similar to system.columns."); attach(context, system_database, "licenses", "Contains licenses of third-party libraries that are located in the contrib directory of ClickHouse sources."); attach(context, system_database, "time_zones", "Contains a list of time zones that are supported by the ClickHouse server. This list of timezones might vary depending on the version of ClickHouse."); attach(context, system_database, "backups", "Contains a list of all BACKUP or RESTORE operations with their current states and other propertis. Note, that table is not persistent and it shows only operations executed after the last server restart."); attach(context, system_database, "schema_inference_cache", "Contains information about all cached file schemas."); attach(context, system_database, "dropped_tables", "Contains a list of tables which were dropped from Atomic databases but not completely removed yet."); - attach(context, system_database, "dropped_tables_parts", "Contains parts of system.dropped_tables tables "); + attachNoDescription(context, system_database, "dropped_tables_parts", "Contains parts of system.dropped_tables tables "); attach(context, system_database, "scheduler", "Contains information and status for scheduling nodes residing on the local server."); + attach(context, system_database, "dns_cache", "Contains information about cached DNS records."); #if defined(__ELF__) && !defined(OS_FREEBSD) - attach(context, system_database, "symbols", "Contains information for introspection of ClickHouse binary. This table is only useful for C++ experts and ClickHouse engineers."); + attachNoDescription(context, system_database, "symbols", "Contains information for introspection of ClickHouse binary. This table is only useful for C++ experts and ClickHouse engineers."); #endif #if USE_RDKAFKA attach(context, system_database, "kafka_consumers", "Contains information about Kafka consumers. Applicable for Kafka table engine (native ClickHouse integration)."); #endif #ifdef OS_LINUX - attach(context, system_database, "stack_trace", "Allows to obtain an unsymbolized stacktrace from all the threads of the server process."); + attachNoDescription(context, system_database, "stack_trace", "Allows to obtain an unsymbolized stacktrace from all the threads of the server process."); #endif #if USE_ROCKSDB attach(context, system_database, "rocksdb", "Contains a list of metrics exposed from embedded RocksDB."); #endif #if USE_MYSQL - attach(context, system_database, "mysql_binlogs", "Shows a list of active binlogs for MaterializedMySQL."); + attachNoDescription(context, system_database, "mysql_binlogs", "Shows a list of active binlogs for MaterializedMySQL."); #endif - attach(context, system_database, "parts", "Contains a list of currently existing (both active and inactive) parts of all *-MergeTree tables. Each part is represented by a single row."); - attach(context, system_database, "projection_parts", "Contains a list of currently existing projection parts (a copy of some part containing aggregated data or just sorted in different order) created for all the projections for all tables within a cluster."); - attach(context, system_database, "detached_parts", "Contains a list of all parts which are being found in /detached directory along with a reason why it was detached. ClickHouse server doesn't use such parts anyhow."); - attach(context, system_database, "parts_columns", "Contains a list of columns of all currently existing parts of all MergeTree tables. Each column is represented by a single row."); - attach(context, system_database, "projection_parts_columns", "Contains a list of columns of all currently existing projection parts of all MergeTree tables. Each column is represented by a single row."); - attach(context, system_database, "disks", "Contains information about disks defined in the server configuration."); - attach(context, system_database, "storage_policies", "Contains information about storage policies and volumes defined in the server configuration."); + attachNoDescription(context, system_database, "parts", "Contains a list of currently existing (both active and inactive) parts of all *-MergeTree tables. Each part is represented by a single row."); + attachNoDescription(context, system_database, "projection_parts", "Contains a list of currently existing projection parts (a copy of some part containing aggregated data or just sorted in different order) created for all the projections for all tables within a cluster."); + attachNoDescription(context, system_database, "detached_parts", "Contains a list of all parts which are being found in /detached directory along with a reason why it was detached. ClickHouse server doesn't use such parts anyhow."); + attachNoDescription(context, system_database, "parts_columns", "Contains a list of columns of all currently existing parts of all MergeTree tables. Each column is represented by a single row."); + attachNoDescription(context, system_database, "projection_parts_columns", "Contains a list of columns of all currently existing projection parts of all MergeTree tables. Each column is represented by a single row."); + attachNoDescription(context, system_database, "disks", "Contains information about disks defined in the server configuration."); + attachNoDescription(context, system_database, "storage_policies", "Contains information about storage policies and volumes defined in the server configuration."); attach(context, system_database, "processes", "Contains a list of currently executing processes (queries) with their progress."); attach(context, system_database, "metrics", "Contains metrics which can be calculated instantly, or have a current value. For example, the number of simultaneously processed queries or the current replica delay. This table is always up to date."); attach(context, system_database, "merges", "Contains a list of merges currently executing merges of MergeTree tables and their progress. Each merge operation is represented by a single row."); attach(context, system_database, "moves", "Contains information about in-progress data part moves of MergeTree tables. Each data part movement is represented by a single row."); attach(context, system_database, "mutations", "Contains a list of mutations and their progress. Each mutation command is represented by a single row."); - attach(context, system_database, "replicas", "Contains information and status of all table replicas on current server. Each replica is represented by a single row."); + attachNoDescription(context, system_database, "replicas", "Contains information and status of all table replicas on current server. Each replica is represented by a single row."); attach(context, system_database, "replication_queue", "Contains information about tasks from replication queues stored in ClickHouse Keeper, or ZooKeeper, for each table replica."); attach(context, system_database, "distributed_ddl_queue", "Contains information about distributed DDL queries (ON CLUSTER clause) that were executed on a cluster."); attach(context, system_database, "distribution_queue", "Contains information about local files that are in the queue to be sent to the shards. These local files contain new parts that are created by inserting new data into the Distributed table in asynchronous mode."); @@ -206,21 +208,21 @@ void attachSystemTablesServer(ContextPtr context, IDatabase & system_database, b attach(context, system_database, "replicated_fetches", "Contains information about currently running background fetches."); attach(context, system_database, "part_moves_between_shards", "Contains information about parts which are currently in a process of moving between shards and their progress."); attach(context, system_database, "asynchronous_inserts", "Contains information about pending asynchronous inserts in queue in server's memory."); - attach(context, system_database, "filesystem_cache", "Contains information about all entries inside filesystem cache for remote objects."); - attach(context, system_database, "query_cache", "Contains information about all entries inside query cache in server's memory."); - attach(context, system_database, "remote_data_paths", "Contains a mapping from a filename on local filesystem to a blob name inside object storage."); + attachNoDescription(context, system_database, "filesystem_cache", "Contains information about all entries inside filesystem cache for remote objects."); + attachNoDescription(context, system_database, "query_cache", "Contains information about all entries inside query cache in server's memory."); + attachNoDescription(context, system_database, "remote_data_paths", "Contains a mapping from a filename on local filesystem to a blob name inside object storage."); attach(context, system_database, "certificates", "Contains information about available certificates and their sources."); - attach(context, system_database, "named_collections", "Contains a list of all named collections which were created via SQL query or parsed from configuration file."); + attachNoDescription(context, system_database, "named_collections", "Contains a list of all named collections which were created via SQL query or parsed from configuration file."); attach(context, system_database, "asynchronous_loader", "Contains information and status for recent asynchronous jobs (e.g. for tables loading). The table contains a row for every job."); attach(context, system_database, "user_processes", "This system table can be used to get overview of memory usage and ProfileEvents of users."); - attach(context, system_database, "jemalloc_bins", "Contains information about memory allocations done via jemalloc allocator in different size classes (bins) aggregated from all arenas. These statistics might not be absolutely accurate because of thread local caching in jemalloc."); - attach(context, system_database, "s3queue", "Contains in-memory state of S3Queue metadata and currently processed rows per file."); + attachNoDescription(context, system_database, "jemalloc_bins", "Contains information about memory allocations done via jemalloc allocator in different size classes (bins) aggregated from all arenas. These statistics might not be absolutely accurate because of thread local caching in jemalloc."); + attachNoDescription(context, system_database, "s3queue", "Contains in-memory state of S3Queue metadata and currently processed rows per file."); attach(context, system_database, "dashboards", "Contains queries used by /dashboard page accessible though HTTP interface. This table can be useful for monitoring and troubleshooting. The table contains a row for every chart in a dashboard."); attach(context, system_database, "view_refreshes", "Lists all Refreshable Materialized Views of current server."); if (has_zookeeper) { - attach(context, system_database, "zookeeper", "Exposes data from the [Zoo]Keeper cluster defined in the config. Allow to get the list of children for a particular node or read the value written inside it."); + attachNoDescription(context, system_database, "zookeeper", "Exposes data from the [Zoo]Keeper cluster defined in the config. Allow to get the list of children for a particular node or read the value written inside it."); attach(context, system_database, "zookeeper_connection", "Shows the information about current connections to [Zoo]Keeper (including auxiliary [ZooKeepers)"); } @@ -230,7 +232,7 @@ void attachSystemTablesServer(ContextPtr context, IDatabase & system_database, b void attachSystemTablesAsync(ContextPtr context, IDatabase & system_database, AsynchronousMetrics & async_metrics) { - attach(context, system_database, "asynchronous_metrics", "Contains metrics that are calculated periodically in the background. For example, the amount of RAM in use.", async_metrics); + attachNoDescription(context, system_database, "asynchronous_metrics", "Contains metrics that are calculated periodically in the background. For example, the amount of RAM in use.", async_metrics); } } diff --git a/src/Storages/System/attachSystemTablesImpl.h b/src/Storages/System/attachSystemTablesImpl.h index 9f2c4e8016d..d9ab164f2b3 100644 --- a/src/Storages/System/attachSystemTablesImpl.h +++ b/src/Storages/System/attachSystemTablesImpl.h @@ -10,8 +10,8 @@ namespace DB template using StringLiteral = const char(&)[Length]; -template -void attach(ContextPtr context, IDatabase & system_database, const String & table_name, StringLiteral comment, StorageArgs && ... args) +template +void attachImpl(ContextPtr context, IDatabase & system_database, const String & table_name, StringLiteral comment, StorageArgs && ... args) { static_assert(CommentSize > 15, "The comment for a system table is too short or empty"); assert(system_database.getDatabaseName() == DatabaseCatalog::SYSTEM_DATABASE); @@ -21,7 +21,10 @@ void attach(ContextPtr context, IDatabase & system_database, const String & tabl { /// Attach to Ordinary database. table_id = StorageID(DatabaseCatalog::SYSTEM_DATABASE, table_name); - system_database.attachTable(context, table_name, std::make_shared(table_id, std::forward(args)...)); + if constexpr (with_description) + system_database.attachTable(context, table_name, std::make_shared(table_id, StorageT::getColumnsDescription(), std::forward(args)...)); + else + system_database.attachTable(context, table_name, std::make_shared(table_id, std::forward(args)...)); } else { @@ -31,7 +34,10 @@ void attach(ContextPtr context, IDatabase & system_database, const String & tabl table_id = StorageID(DatabaseCatalog::SYSTEM_DATABASE, table_name, UUIDHelpers::generateV4()); DatabaseCatalog::instance().addUUIDMapping(table_id.uuid); String path = "store/" + DatabaseCatalog::getPathForUUID(table_id.uuid); - system_database.attachTable(context, table_name, std::make_shared(table_id, std::forward(args)...), path); + if constexpr (with_description) + system_database.attachTable(context, table_name, std::make_shared(table_id, StorageT::getColumnsDescription(), std::forward(args)...), path); + else + system_database.attachTable(context, table_name, std::make_shared(table_id, std::forward(args)...), path); } /// Set the comment @@ -42,4 +48,17 @@ void attach(ContextPtr context, IDatabase & system_database, const String & tabl table->setInMemoryMetadata(metadata); } + +template +void attach(ContextPtr context, IDatabase & system_database, const String & table_name, StringLiteral comment, StorageArgs && ... args) +{ + attachImpl(context, system_database, table_name, comment, std::forward(args)...); +} + +template +void attachNoDescription(ContextPtr context, IDatabase & system_database, const String & table_name, StringLiteral comment, StorageArgs && ... args) +{ + attachImpl(context, system_database, table_name, comment, std::forward(args)...); +} + } diff --git a/src/Storages/System/getQueriedColumnsMaskAndHeader.cpp b/src/Storages/System/getQueriedColumnsMaskAndHeader.cpp index c29ccb590ed..b93fe7b8034 100644 --- a/src/Storages/System/getQueriedColumnsMaskAndHeader.cpp +++ b/src/Storages/System/getQueriedColumnsMaskAndHeader.cpp @@ -11,10 +11,11 @@ std::pair, Block> getQueriedColumnsMaskAndHeader(const Block NameSet names_set(column_names.begin(), column_names.end()); for (size_t i = 0; i < columns_mask.size(); ++i) { - if (names_set.contains(sample_block.getByPosition(i).name)) + const auto & column_with_type_and_name = sample_block.getByPosition(i); + if (names_set.contains(column_with_type_and_name.name)) { columns_mask[i] = 1; - header.insert(sample_block.getByPosition(i)); + header.insert(column_with_type_and_name); } } diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp index 33ff6e7104f..c3ac27903c9 100644 --- a/src/Storages/VirtualColumnUtils.cpp +++ b/src/Storages/VirtualColumnUtils.cpp @@ -50,194 +50,9 @@ namespace DB { -namespace ErrorCodes -{ - extern const int LOGICAL_ERROR; -} - -namespace -{ - -/// Verifying that the function depends only on the specified columns -bool isValidFunction(const ASTPtr & expression, const std::function & is_constant) -{ - const auto * function = expression->as(); - if (function && functionIsInOrGlobalInOperator(function->name)) - { - // Second argument of IN can be a scalar subquery - return isValidFunction(function->arguments->children[0], is_constant); - } - else - return is_constant(expression); -} - -/// Extract all subfunctions of the main conjunction, but depending only on the specified columns -bool extractFunctions(const ASTPtr & expression, const std::function & is_constant, ASTs & result) -{ - const auto * function = expression->as(); - - if (function) - { - if (function->name == "and" || function->name == "indexHint") - { - bool ret = true; - for (const auto & child : function->arguments->children) - ret &= extractFunctions(child, is_constant, result); - return ret; - } - else if (function->name == "or") - { - bool ret = false; - ASTs or_args; - for (const auto & child : function->arguments->children) - ret |= extractFunctions(child, is_constant, or_args); - - if (!or_args.empty()) - { - /// In case of there are less number of arguments for which - /// is_constant() == true, we need to add always-true - /// implicitly to avoid breaking AND invariant. - /// - /// Consider the following: - /// - /// ((value = 10) OR (_table = 'v2')) AND ((_table = 'v1') OR (value = 20)) - /// - /// Without implicit always-true: - /// - /// (_table = 'v2') AND (_table = 'v1') - /// - /// With: - /// - /// (_table = 'v2' OR 1) AND (_table = 'v1' OR 1) -> (_table = 'v2') OR (_table = 'v1') - /// - if (or_args.size() != function->arguments->children.size()) - or_args.push_back(std::make_shared(Field(1))); - result.push_back(makeASTForLogicalOr(std::move(or_args))); - } - return ret; - } - } - - if (isValidFunction(expression, is_constant)) - { - result.push_back(expression->clone()); - return true; - } - else - return false; -} - -/// Construct a conjunction from given functions -ASTPtr buildWhereExpression(ASTs && functions) -{ - if (functions.empty()) - return nullptr; - if (functions.size() == 1) - return functions[0]; - return makeASTForLogicalAnd(std::move(functions)); -} - -} - namespace VirtualColumnUtils { -void rewriteEntityInAst(ASTPtr ast, const String & column_name, const Field & value, const String & func) -{ - auto & select = ast->as(); - if (!select.with()) - select.setExpression(ASTSelectQuery::Expression::WITH, std::make_shared()); - - if (func.empty()) - { - auto literal = std::make_shared(value); - literal->alias = column_name; - literal->prefer_alias_to_column_name = true; - select.with()->children.push_back(literal); - } - else - { - auto literal = std::make_shared(value); - literal->prefer_alias_to_column_name = true; - - auto function = makeASTFunction(func, literal); - function->alias = column_name; - function->prefer_alias_to_column_name = true; - select.with()->children.push_back(function); - } -} - -bool prepareFilterBlockWithQuery(const ASTPtr & query, ContextPtr context, Block block, ASTPtr & expression_ast) -{ - if (block.rows() == 0) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot prepare filter with empty block"); - - /// Take the first row of the input block to build a constant block - auto columns = block.getColumns(); - Columns const_columns(columns.size()); - for (size_t i = 0; i < columns.size(); ++i) - { - if (isColumnConst(*columns[i])) - const_columns[i] = columns[i]->cloneResized(1); - else - const_columns[i] = ColumnConst::create(columns[i]->cloneResized(1), 1); - } - - block.setColumns(const_columns); - - bool unmodified = true; - const auto & select = query->as(); - if (!select.where() && !select.prewhere()) - return unmodified; - - // Provide input columns as constant columns to check if an expression is - // constant and depends on the columns from provided block (the last is - // required to allow skipping some conditions for handling OR). - std::function is_constant = [&block, &context](const ASTPtr & expr) - { - auto actions = std::make_shared(block.getColumnsWithTypeAndName()); - PreparedSetsPtr prepared_sets = std::make_shared(); - const NamesAndTypesList source_columns; - const NamesAndTypesList aggregation_keys; - const ColumnNumbersList grouping_set_keys; - - ActionsVisitor::Data visitor_data( - context, SizeLimits{}, 1, source_columns, std::move(actions), prepared_sets, true, true, true, - { aggregation_keys, grouping_set_keys, GroupByKind::NONE }); - - ActionsVisitor(visitor_data).visit(expr); - actions = visitor_data.getActions(); - auto expr_column_name = expr->getColumnName(); - - const auto * expr_const_node = actions->tryFindInOutputs(expr_column_name); - if (!expr_const_node) - return false; - auto filter_actions = ActionsDAG::buildFilterActionsDAG({expr_const_node}); - const auto & nodes = filter_actions->getNodes(); - bool has_dependent_columns = std::any_of(nodes.begin(), nodes.end(), [&](const auto & node) - { - return block.has(node.result_name); - }); - if (!has_dependent_columns) - return false; - - auto expression_actions = std::make_shared(actions); - auto block_with_constants = block; - expression_actions->execute(block_with_constants); - return block_with_constants.has(expr_column_name) && isColumnConst(*block_with_constants.getByName(expr_column_name).column); - }; - - /// Create an expression that evaluates the expressions in WHERE and PREWHERE, depending only on the existing columns. - ASTs functions; - if (select.where()) - unmodified &= extractFunctions(select.where(), is_constant, functions); - if (select.prewhere()) - unmodified &= extractFunctions(select.prewhere(), is_constant, functions); - - expression_ast = buildWhereExpression(std::move(functions)); - return unmodified; -} - static void makeSets(const ExpressionActionsPtr & actions, const ContextPtr & context) { for (const auto & node : actions->getNodes()) @@ -266,7 +81,7 @@ void filterBlockWithDAG(ActionsDAGPtr dag, Block & block, ContextPtr context) auto actions = std::make_shared(dag); makeSets(actions, context); Block block_with_filter = block; - actions->execute(block_with_filter); + actions->execute(block_with_filter, /*dry_run=*/ false, /*allow_duplicates_in_input=*/ true); /// Filter the block. String filter_column_name = dag->getOutputs().at(0)->result_name; @@ -294,72 +109,28 @@ void filterBlockWithDAG(ActionsDAGPtr dag, Block & block, ContextPtr context) } } -void filterBlockWithQuery(const ASTPtr & query, Block & block, ContextPtr context, ASTPtr expression_ast) +NameSet getVirtualNamesForFileLikeStorage() { - if (block.rows() == 0) - return; - - if (!expression_ast) - prepareFilterBlockWithQuery(query, context, block, expression_ast); - - if (!expression_ast) - return; - - /// Let's analyze and calculate the prepared expression. - auto syntax_result = TreeRewriter(context).analyze(expression_ast, block.getNamesAndTypesList()); - ExpressionAnalyzer analyzer(expression_ast, syntax_result, context); - ExpressionActionsPtr actions = analyzer.getActions(false /* add alises */, true /* project result */, CompileExpressions::yes); - - makeSets(actions, context); - - Block block_with_filter = block; - actions->execute(block_with_filter); - - /// Filter the block. - String filter_column_name = expression_ast->getColumnName(); - ColumnPtr filter_column = block_with_filter.getByName(filter_column_name).column->convertToFullIfNeeded(); - if (filter_column->getDataType() != TypeIndex::UInt8) - return; - - ConstantFilterDescription constant_filter(*filter_column); - - if (constant_filter.always_true) - { - return; - } - - if (constant_filter.always_false) - { - block = block.cloneEmpty(); - return; - } - - FilterDescription filter(*filter_column); - - for (size_t i = 0; i < block.columns(); ++i) - { - ColumnPtr & column = block.safeGetByPosition(i).column; - column = column->filter(*filter.data, -1); - } + return {"_path", "_file", "_size"}; } -NamesAndTypesList getPathFileAndSizeVirtualsForStorage(NamesAndTypesList storage_columns) +VirtualColumnsDescription getVirtualsForFileLikeStorage(const ColumnsDescription & storage_columns) { - auto default_virtuals = NamesAndTypesList{ - {"_path", std::make_shared(std::make_shared())}, - {"_file", std::make_shared(std::make_shared())}, - {"_size", makeNullable(std::make_shared())}}; + VirtualColumnsDescription desc; - default_virtuals.sort(); - storage_columns.sort(); + auto add_virtual = [&](const auto & name, const auto & type) + { + if (storage_columns.has(name)) + return; - NamesAndTypesList result_virtuals; - std::set_difference( - default_virtuals.begin(), default_virtuals.end(), storage_columns.begin(), storage_columns.end(), - std::back_inserter(result_virtuals), - [](const NameAndTypePair & lhs, const NameAndTypePair & rhs){ return lhs.name < rhs.name; }); + desc.addEphemeral(name, type, ""); + }; - return result_virtuals; + add_virtual("_path", std::make_shared(std::make_shared())); + add_virtual("_file", std::make_shared(std::make_shared())); + add_virtual("_size", makeNullable(std::make_shared())); + + return desc; } static void addPathAndFileToVirtualColumns(Block & block, const String & path, size_t idx) @@ -467,6 +238,23 @@ static bool canEvaluateSubtree(const ActionsDAG::Node * node, const Block & allo return true; } +bool isDeterministicInScopeOfQuery(const ActionsDAG::Node * node) +{ + for (const auto * child : node->children) + { + if (!isDeterministicInScopeOfQuery(child)) + return false; + } + + if (node->type != ActionsDAG::ActionType::FUNCTION) + return true; + + if (!node->function_base->isDeterministicInScopeOfQuery()) + return false; + + return true; +} + static const ActionsDAG::Node * splitFilterNodeForAllowedInputs( const ActionsDAG::Node * node, const Block * allowed_inputs, @@ -542,6 +330,10 @@ static const ActionsDAG::Node * splitFilterNodeForAllowedInputs( } } } + else if (!isDeterministicInScopeOfQuery(node)) + { + return nullptr; + } } if (allowed_inputs && !canEvaluateSubtree(node, *allowed_inputs)) diff --git a/src/Storages/VirtualColumnUtils.h b/src/Storages/VirtualColumnUtils.h index 7a9b2605339..83494872cac 100644 --- a/src/Storages/VirtualColumnUtils.h +++ b/src/Storages/VirtualColumnUtils.h @@ -4,6 +4,7 @@ #include #include #include +#include #include @@ -17,23 +18,6 @@ class NamesAndTypesList; namespace VirtualColumnUtils { -/// Adds to the select query section `WITH value AS column_name`, and uses func -/// to wrap the value (if any) -/// -/// For example: -/// - `WITH 9000 as _port`. -/// - `WITH toUInt16(9000) as _port`. -void rewriteEntityInAst(ASTPtr ast, const String & column_name, const Field & value, const String & func = ""); - -/// Prepare `expression_ast` to filter block. Returns true if `expression_ast` is not trimmed, that is, -/// `block` provides all needed columns for `expression_ast`, else return false. -bool prepareFilterBlockWithQuery(const ASTPtr & query, ContextPtr context, Block block, ASTPtr & expression_ast); - -/// Leave in the block only the rows that fit under the WHERE clause and the PREWHERE clause of the query. -/// Only elements of the outer conjunction are considered, depending only on the columns present in the block. -/// If `expression_ast` is passed, use it to filter block. -void filterBlockWithQuery(const ASTPtr & query, Block & block, ContextPtr context, ASTPtr expression_ast = {}); - /// Similar to filterBlockWithQuery, but uses ActionsDAG as a predicate. /// Basically it is filterBlockWithDAG(splitFilterDagForAllowedInputs). void filterBlockWithPredicate(const ActionsDAG::Node * predicate, Block & block, ContextPtr context); @@ -41,6 +25,9 @@ void filterBlockWithPredicate(const ActionsDAG::Node * predicate, Block & block, /// Just filters block. Block should contain all the required columns. void filterBlockWithDAG(ActionsDAGPtr dag, Block & block, ContextPtr context); +/// Recursively checks if all functions used in DAG are deterministic in scope of query. +bool isDeterministicInScopeOfQuery(const ActionsDAG::Node * node); + /// Extract a part of predicate that can be evaluated using only columns from input_names. ActionsDAGPtr splitFilterDagForAllowedInputs(const ActionsDAG::Node * predicate, const Block * allowed_inputs); @@ -56,7 +43,8 @@ auto extractSingleValueFromBlock(const Block & block, const String & name) return res; } -NamesAndTypesList getPathFileAndSizeVirtualsForStorage(NamesAndTypesList storage_columns); +NameSet getVirtualNamesForFileLikeStorage(); +VirtualColumnsDescription getVirtualsForFileLikeStorage(const ColumnsDescription & storage_columns); ActionsDAGPtr createPathAndFileFilterDAG(const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns); diff --git a/src/Storages/VirtualColumnsDescription.cpp b/src/Storages/VirtualColumnsDescription.cpp new file mode 100644 index 00000000000..64097224ed9 --- /dev/null +++ b/src/Storages/VirtualColumnsDescription.cpp @@ -0,0 +1,94 @@ +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int DUPLICATE_COLUMN; + extern const int NO_SUCH_COLUMN_IN_TABLE; +} + +VirtualColumnDescription::VirtualColumnDescription( + String name_, DataTypePtr type_, ASTPtr codec_, String comment_, VirtualsKind kind_) + : ColumnDescription(std::move(name_), std::move(type_), std::move(codec_), std::move(comment_)) + , kind(kind_) +{ +} + +void VirtualColumnsDescription::add(VirtualColumnDescription desc) +{ + if (container.get<1>().contains(desc.name)) + throw Exception(ErrorCodes::DUPLICATE_COLUMN, "Virtual column {} already exists", desc.name); + + container.get<0>().push_back(std::move(desc)); +} + +void VirtualColumnsDescription::addEphemeral(String name, DataTypePtr type, String comment) +{ + add({std::move(name), std::move(type), nullptr, std::move(comment), VirtualsKind::Ephemeral}); +} + +void VirtualColumnsDescription::addPersistent(String name, DataTypePtr type, ASTPtr codec, String comment) +{ + add({std::move(name), std::move(type), std::move(codec), std::move(comment), VirtualsKind::Persistent}); +} + +std::optional VirtualColumnsDescription::tryGet(const String & name, VirtualsKind kind) const +{ + auto it = container.get<1>().find(name); + if (it != container.get<1>().end() && (static_cast(it->kind) & static_cast(kind))) + return NameAndTypePair{it->name, it->type}; + return {}; +} + +NameAndTypePair VirtualColumnsDescription::get(const String & name, VirtualsKind kind) const +{ + auto column = tryGet(name, kind); + if (!column) + throw Exception(ErrorCodes::NO_SUCH_COLUMN_IN_TABLE, "There is no virtual column {}", name); + return *column; +} + +const VirtualColumnDescription * VirtualColumnsDescription::tryGetDescription(const String & name, VirtualsKind kind) const +{ + auto it = container.get<1>().find(name); + if (it != container.get<1>().end() && (static_cast(it->kind) & static_cast(kind))) + return &(*it); + return nullptr; +} + +const VirtualColumnDescription & VirtualColumnsDescription::getDescription(const String & name, VirtualsKind kind) const +{ + const auto * column = tryGetDescription(name, kind); + if (!column) + throw Exception(ErrorCodes::NO_SUCH_COLUMN_IN_TABLE, "There is no virtual column {}", name); + return *column; +} + +Block VirtualColumnsDescription::getSampleBlock() const +{ + Block result; + for (const auto & desc : container) + result.insert({desc.type->createColumn(), desc.type, desc.name}); + return result; +} + +NamesAndTypesList VirtualColumnsDescription::getNamesAndTypesList() const +{ + NamesAndTypesList result; + for (const auto & desc : container) + result.emplace_back(desc.name, desc.type); + return result; +} + +NamesAndTypesList VirtualColumnsDescription::getNamesAndTypesList(VirtualsKind kind) const +{ + NamesAndTypesList result; + for (const auto & column : container) + if (static_cast(column.kind) & static_cast(kind)) + result.emplace_back(column.name, column.type); + return result; +} + +} diff --git a/src/Storages/VirtualColumnsDescription.h b/src/Storages/VirtualColumnsDescription.h new file mode 100644 index 00000000000..e11ba706718 --- /dev/null +++ b/src/Storages/VirtualColumnsDescription.h @@ -0,0 +1,71 @@ +#pragma once +#include +#include + +namespace DB +{ + +struct VirtualColumnDescription : public ColumnDescription +{ + using Self = VirtualColumnDescription; + VirtualsKind kind; + + VirtualColumnDescription() = default; + VirtualColumnDescription(String name_, DataTypePtr type_, ASTPtr codec_, String comment_, VirtualsKind kind_); + + bool isEphemeral() const { return kind == VirtualsKind::Ephemeral; } + bool isPersistent() const { return kind == VirtualsKind::Persistent; } + + /// This method is needed for boost::multi_index because field + /// of base class cannot be referenced in boost::multi_index::member. + const String & getName() const { return name; } +}; + +class VirtualColumnsDescription +{ +public: + using Container = boost::multi_index_container< + VirtualColumnDescription, + boost::multi_index::indexed_by< + boost::multi_index::sequenced<>, + boost::multi_index::ordered_unique>>>; + + using const_iterator = Container::const_iterator; + + const_iterator begin() const { return container.begin(); } + const_iterator end() const { return container.end(); } + + VirtualColumnsDescription() = default; + + void add(VirtualColumnDescription desc); + void addEphemeral(String name, DataTypePtr type, String comment); + void addPersistent(String name, DataTypePtr type, ASTPtr codec, String comment); + + size_t size() const { return container.size(); } + bool empty() const { return container.empty(); } + bool has(const String & name) const { return container.get<1>().contains(name); } + + NameAndTypePair get(const String & name, VirtualsKind kind) const; + std::optional tryGet(const String & name, VirtualsKind kind) const; + + NameAndTypePair get(const String & name) const { return get(name, VirtualsKind::All); } + std::optional tryGet(const String & name) const { return tryGet(name, VirtualsKind::All); } + + const VirtualColumnDescription * tryGetDescription(const String & name, VirtualsKind kind) const; + const VirtualColumnDescription & getDescription(const String & name, VirtualsKind kind) const; + + const VirtualColumnDescription * tryGetDescription(const String & name) const { return tryGetDescription(name, VirtualsKind::All); } + const VirtualColumnDescription & getDescription(const String & name) const { return getDescription(name, VirtualsKind::All); } + + Block getSampleBlock() const; + NamesAndTypesList getNamesAndTypesList() const; + NamesAndTypesList getNamesAndTypesList(VirtualsKind kind) const; + +private: + Container container; +}; + +using VirtualsDescriptionPtr = std::shared_ptr; +using MultiVersionVirtualsDescriptionPtr = MultiVersion; + +} diff --git a/src/Storages/WindowView/StorageWindowView.cpp b/src/Storages/WindowView/StorageWindowView.cpp index 0764685cb07..b1984a947c8 100644 --- a/src/Storages/WindowView/StorageWindowView.cpp +++ b/src/Storages/WindowView/StorageWindowView.cpp @@ -1,9 +1,7 @@ #include #include -#include #include -#include #include #include #include @@ -15,7 +13,6 @@ #include #include #include -#include #include #include #include @@ -30,7 +27,6 @@ #include #include #include -#include #include #include #include @@ -51,16 +47,13 @@ #include #include #include -#include #include #include #include - #include #include -#include #include @@ -285,13 +278,13 @@ namespace { switch (kind) { - case IntervalKind::Nanosecond: - case IntervalKind::Microsecond: - case IntervalKind::Millisecond: + case IntervalKind::Kind::Nanosecond: + case IntervalKind::Kind::Microsecond: + case IntervalKind::Kind::Millisecond: throw Exception(ErrorCodes::SYNTAX_ERROR, "Fractional seconds are not supported by windows yet"); #define CASE_WINDOW_KIND(KIND) \ - case IntervalKind::KIND: { \ - return AddTime::execute(time_sec, num_units, time_zone); \ + case IntervalKind::Kind::KIND: { \ + return AddTime::execute(time_sec, num_units, time_zone); \ } CASE_WINDOW_KIND(Second) CASE_WINDOW_KIND(Minute) @@ -875,20 +868,20 @@ UInt32 StorageWindowView::getWindowLowerBound(UInt32 time_sec) { switch (slide_kind) { - case IntervalKind::Nanosecond: - case IntervalKind::Microsecond: - case IntervalKind::Millisecond: + case IntervalKind::Kind::Nanosecond: + case IntervalKind::Kind::Microsecond: + case IntervalKind::Kind::Millisecond: throw Exception(ErrorCodes::SYNTAX_ERROR, "Fractional seconds are not supported by windows yet"); #define CASE_WINDOW_KIND(KIND) \ - case IntervalKind::KIND: \ + case IntervalKind::Kind::KIND: \ { \ if (is_tumble) \ - return ToStartOfTransform::execute(time_sec, window_num_units, *time_zone); \ + return ToStartOfTransform::execute(time_sec, window_num_units, *time_zone); \ else \ {\ - UInt32 w_start = ToStartOfTransform::execute(time_sec, hop_num_units, *time_zone); \ - UInt32 w_end = AddTime::execute(w_start, hop_num_units, *time_zone);\ - return AddTime::execute(w_end, -window_num_units, *time_zone);\ + UInt32 w_start = ToStartOfTransform::execute(time_sec, hop_num_units, *time_zone); \ + UInt32 w_end = AddTime::execute(w_start, hop_num_units, *time_zone);\ + return AddTime::execute(w_end, -window_num_units, *time_zone);\ }\ } CASE_WINDOW_KIND(Second) @@ -908,16 +901,16 @@ UInt32 StorageWindowView::getWindowUpperBound(UInt32 time_sec) { switch (slide_kind) { - case IntervalKind::Nanosecond: - case IntervalKind::Microsecond: - case IntervalKind::Millisecond: + case IntervalKind::Kind::Nanosecond: + case IntervalKind::Kind::Microsecond: + case IntervalKind::Kind::Millisecond: throw Exception(ErrorCodes::SYNTAX_ERROR, "Fractional seconds are not supported by window view yet"); #define CASE_WINDOW_KIND(KIND) \ - case IntervalKind::KIND: \ + case IntervalKind::Kind::KIND: \ { \ - UInt32 w_start = ToStartOfTransform::execute(time_sec, slide_num_units, *time_zone); \ - return AddTime::execute(w_start, slide_num_units, *time_zone); \ + UInt32 w_start = ToStartOfTransform::execute(time_sec, slide_num_units, *time_zone); \ + return AddTime::execute(w_start, slide_num_units, *time_zone); \ } CASE_WINDOW_KIND(Second) CASE_WINDOW_KIND(Minute) @@ -1041,7 +1034,7 @@ void StorageWindowView::threadFuncFireProc() max_fired_watermark = next_fire_signal; auto slide_interval = addTime(0, slide_kind, slide_num_units, *time_zone); /// Convert DayNum into seconds when the slide interval is larger than Day - if (slide_kind > IntervalKind::Day) + if (slide_kind > IntervalKind::Kind::Day) slide_interval *= 86400; next_fire_signal += slide_interval; } @@ -1157,7 +1150,7 @@ StorageWindowView::StorageWindowView( ContextPtr context_, const ASTCreateQuery & query, const ColumnsDescription & columns_, - bool attach_) + LoadingStrictnessLevel mode) : IStorage(table_id_) , WithContext(context_->getGlobalContext()) , log(getLogger(fmt::format("StorageWindowView({}.{})", table_id_.database_name, table_id_.table_name))) @@ -1203,7 +1196,7 @@ StorageWindowView::StorageWindowView( next_fire_signal = getWindowUpperBound(now()); std::exchange(has_inner_table, true); - if (!attach_) + if (mode < LoadingStrictnessLevel::ATTACH) { auto inner_create_query = getInnerTableCreateQuery(inner_query, inner_table_id); auto create_context = Context::createCopy(context_); @@ -1360,7 +1353,7 @@ void StorageWindowView::eventTimeParser(const ASTCreateQuery & query) if (query.is_watermark_ascending) { is_watermark_bounded = true; - watermark_kind = IntervalKind::Second; + watermark_kind = IntervalKind::Kind::Second; watermark_num_units = 1; } else if (query.is_watermark_bounded) @@ -1672,12 +1665,12 @@ void registerStorageWindowView(StorageFactory & factory) { factory.registerStorage("WindowView", [](const StorageFactory::Arguments & args) { - if (!args.attach && !args.getLocalContext()->getSettingsRef().allow_experimental_window_view) + if (args.mode <= LoadingStrictnessLevel::CREATE && !args.getLocalContext()->getSettingsRef().allow_experimental_window_view) throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Experimental WINDOW VIEW feature " "is not enabled (the setting 'allow_experimental_window_view')"); - return std::make_shared(args.table_id, args.getLocalContext(), args.query, args.columns, args.attach); + return std::make_shared(args.table_id, args.getLocalContext(), args.query, args.columns, args.mode); }); } diff --git a/src/Storages/WindowView/StorageWindowView.h b/src/Storages/WindowView/StorageWindowView.h index 969fda8f78e..0b7cd54e3a7 100644 --- a/src/Storages/WindowView/StorageWindowView.h +++ b/src/Storages/WindowView/StorageWindowView.h @@ -111,7 +111,7 @@ public: ContextPtr context_, const ASTCreateQuery & query, const ColumnsDescription & columns_, - bool attach_); + LoadingStrictnessLevel mode); String getName() const override { return "WindowView"; } diff --git a/src/Storages/examples/get_abandonable_lock_in_all_partitions.cpp b/src/Storages/examples/get_abandonable_lock_in_all_partitions.cpp index 9e2b2a83b98..4607d68f02d 100644 --- a/src/Storages/examples/get_abandonable_lock_in_all_partitions.cpp +++ b/src/Storages/examples/get_abandonable_lock_in_all_partitions.cpp @@ -26,7 +26,7 @@ try auto config = processor.loadConfig().configuration; String root_path = argv[2]; - zkutil::ZooKeeper zk(*config, zkutil::getZooKeeperConfigName(*config), nullptr); + auto zk = zkutil::ZooKeeper::createWithoutKillingPreviousSessions(*config, zkutil::getZooKeeperConfigName(*config), nullptr); String temp_path = root_path + "/temp"; String blocks_path = root_path + "/block_numbers"; @@ -34,7 +34,7 @@ try Stopwatch total_timer; Stopwatch timer; - EphemeralLocksInAllPartitions locks(blocks_path, "test_lock-", temp_path, zk); + EphemeralLocksInAllPartitions locks(blocks_path, "test_lock-", temp_path, *zk); std::cerr << "Locked, elapsed: " << timer.elapsedSeconds() << std::endl; for (const auto & lock : locks.getLocks()) diff --git a/src/Storages/examples/get_current_inserts_in_replicated.cpp b/src/Storages/examples/get_current_inserts_in_replicated.cpp index d77b0f5177d..6a4c6efbd23 100644 --- a/src/Storages/examples/get_current_inserts_in_replicated.cpp +++ b/src/Storages/examples/get_current_inserts_in_replicated.cpp @@ -29,7 +29,7 @@ try auto config = processor.loadConfig().configuration; String zookeeper_path = argv[2]; - auto zookeeper = std::make_shared(*config, zkutil::getZooKeeperConfigName(*config), nullptr); + auto zookeeper = zkutil::ZooKeeper::createWithoutKillingPreviousSessions(*config, zkutil::getZooKeeperConfigName(*config), nullptr); std::unordered_map> current_inserts; diff --git a/src/Storages/prepareReadingFromFormat.cpp b/src/Storages/prepareReadingFromFormat.cpp index 6be4213ec6b..406b7f379f9 100644 --- a/src/Storages/prepareReadingFromFormat.cpp +++ b/src/Storages/prepareReadingFromFormat.cpp @@ -4,7 +4,7 @@ namespace DB { -ReadFromFormatInfo prepareReadingFromFormat(const Strings & requested_columns, const StorageSnapshotPtr & storage_snapshot, bool supports_subset_of_columns, const NamesAndTypesList & virtuals) +ReadFromFormatInfo prepareReadingFromFormat(const Strings & requested_columns, const StorageSnapshotPtr & storage_snapshot, bool supports_subset_of_columns) { ReadFromFormatInfo info; /// Collect requested virtual columns and remove them from requested columns. @@ -12,11 +12,11 @@ ReadFromFormatInfo prepareReadingFromFormat(const Strings & requested_columns, c for (const auto & column_name : requested_columns) { bool is_virtual = false; - for (const auto & virtual_column : virtuals) + for (const auto & virtual_column : *storage_snapshot->virtual_columns) { if (column_name == virtual_column.name) { - info.requested_virtual_columns.push_back(virtual_column); + info.requested_virtual_columns.emplace_back(virtual_column.name, virtual_column.type); is_virtual = true; break; } diff --git a/src/Storages/prepareReadingFromFormat.h b/src/Storages/prepareReadingFromFormat.h index c5f3959a550..e4d62c29ec6 100644 --- a/src/Storages/prepareReadingFromFormat.h +++ b/src/Storages/prepareReadingFromFormat.h @@ -22,5 +22,5 @@ namespace DB }; /// Get all needed information for reading from data in some input format. - ReadFromFormatInfo prepareReadingFromFormat(const Strings & requested_columns, const StorageSnapshotPtr & storage_snapshot, bool supports_subset_of_columns, const NamesAndTypesList & virtuals); + ReadFromFormatInfo prepareReadingFromFormat(const Strings & requested_columns, const StorageSnapshotPtr & storage_snapshot, bool supports_subset_of_columns); } diff --git a/src/Storages/tests/gtest_storage_log.cpp b/src/Storages/tests/gtest_storage_log.cpp index 352c44554bd..d75f3616f21 100644 --- a/src/Storages/tests/gtest_storage_log.cpp +++ b/src/Storages/tests/gtest_storage_log.cpp @@ -32,7 +32,7 @@ DB::StoragePtr createStorage(DB::DiskPtr & disk) StoragePtr table = std::make_shared( "Log", disk, "table/", StorageID("test", "test"), ColumnsDescription{names_and_types}, - ConstraintsDescription{}, String{}, false, getContext().context); + ConstraintsDescription{}, String{}, LoadingStrictnessLevel::CREATE, getContext().context); table->startup(); diff --git a/src/Storages/transformQueryForExternalDatabase.cpp b/src/Storages/transformQueryForExternalDatabase.cpp index 4526a38a1c3..afc458ea612 100644 --- a/src/Storages/transformQueryForExternalDatabase.cpp +++ b/src/Storages/transformQueryForExternalDatabase.cpp @@ -145,7 +145,7 @@ bool isCompatible(ASTPtr & node) return false; if (!function->arguments) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: function->arguments is not set"); + throw Exception(ErrorCodes::LOGICAL_ERROR, "function->arguments is not set"); String name = function->name; diff --git a/src/TableFunctions/ITableFunctionCluster.h b/src/TableFunctions/ITableFunctionCluster.h index 7e81d6d21b7..9f56d781bc9 100644 --- a/src/TableFunctions/ITableFunctionCluster.h +++ b/src/TableFunctions/ITableFunctionCluster.h @@ -4,7 +4,6 @@ #include #include -#include #include #include #include @@ -29,14 +28,14 @@ public: String getName() const override = 0; String getSignature() const override = 0; - static void addColumnsStructureToArguments(ASTs & args, const String & desired_structure, const ContextPtr & context) + static void updateStructureAndFormatArgumentsIfNeeded(ASTs & args, const String & structure_, const String & format_, const ContextPtr & context) { if (args.empty()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected empty list of arguments for {}Cluster table function", Base::name); ASTPtr cluster_name_arg = args.front(); args.erase(args.begin()); - Base::addColumnsStructureToArguments(args, desired_structure, context); + Base::updateStructureAndFormatArgumentsIfNeeded(args, structure_, format_, context); args.insert(args.begin(), cluster_name_arg); } diff --git a/src/TableFunctions/ITableFunctionDataLake.h b/src/TableFunctions/ITableFunctionDataLake.h index 961e5683fe2..91165ba6705 100644 --- a/src/TableFunctions/ITableFunctionDataLake.h +++ b/src/TableFunctions/ITableFunctionDataLake.h @@ -34,7 +34,7 @@ protected: columns = parseColumnsListFromString(TableFunction::configuration.structure, context); StoragePtr storage = Storage::create( - TableFunction::configuration, context, false, StorageID(TableFunction::getDatabaseName(), table_name), + TableFunction::configuration, context, LoadingStrictnessLevel::CREATE, StorageID(TableFunction::getDatabaseName(), table_name), columns, ConstraintsDescription{}, String{}, std::nullopt); storage->startup(); diff --git a/src/TableFunctions/ITableFunctionFileLike.cpp b/src/TableFunctions/ITableFunctionFileLike.cpp index b88af855309..1a58be4f75b 100644 --- a/src/TableFunctions/ITableFunctionFileLike.cpp +++ b/src/TableFunctions/ITableFunctionFileLike.cpp @@ -7,6 +7,7 @@ #include #include +#include #include @@ -27,14 +28,19 @@ void ITableFunctionFileLike::parseFirstArguments(const ASTPtr & arg, const Conte filename = checkAndGetLiteralArgument(arg, "source"); } -String ITableFunctionFileLike::getFormatFromFirstArgument() +std::optional ITableFunctionFileLike::tryGetFormatFromFirstArgument() { - return FormatFactory::instance().getFormatFromFileName(filename, true); + return FormatFactory::instance().tryGetFormatFromFileName(filename); } bool ITableFunctionFileLike::supportsReadingSubsetOfColumns(const ContextPtr & context) { - return FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(format, context); + return format != "auto" && FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(format, context); +} + +NameSet ITableFunctionFileLike::getVirtualsToCheckBeforeUsingStructureHint() const +{ + return VirtualColumnUtils::getVirtualNamesForFileLikeStorage(); } void ITableFunctionFileLike::parseArguments(const ASTPtr & ast_function, ContextPtr context) @@ -63,7 +69,10 @@ void ITableFunctionFileLike::parseArgumentsImpl(ASTs & args, const ContextPtr & format = checkAndGetLiteralArgument(args[1], "format"); if (format == "auto") - format = getFormatFromFirstArgument(); + { + if (auto format_from_first_argument = tryGetFormatFromFirstArgument()) + format = *format_from_first_argument; + } if (args.size() > 2) { @@ -79,34 +88,37 @@ void ITableFunctionFileLike::parseArgumentsImpl(ASTs & args, const ContextPtr & compression_method = checkAndGetLiteralArgument(args[3], "compression_method"); } -void ITableFunctionFileLike::addColumnsStructureToArguments(ASTs & args, const String & structure, const ContextPtr &) +void ITableFunctionFileLike::updateStructureAndFormatArgumentsIfNeeded(ASTs & args, const String & structure, const String & format, const ContextPtr & context) { if (args.empty() || args.size() > getMaxNumberOfArguments()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected 1 to {} arguments in table function, got {}", getMaxNumberOfArguments(), args.size()); + auto format_literal = std::make_shared(format); auto structure_literal = std::make_shared(structure); + for (auto & arg : args) + arg = evaluateConstantExpressionOrIdentifierAsLiteral(arg, context); + /// f(filename) if (args.size() == 1) { - /// Add format=auto before structure argument. - args.push_back(std::make_shared("auto")); + args.push_back(format_literal); args.push_back(structure_literal); } /// f(filename, format) else if (args.size() == 2) { + if (checkAndGetLiteralArgument(args[1], "format") == "auto") + args.back() = format_literal; args.push_back(structure_literal); } - /// f(filename, format, 'auto') - else if (args.size() == 3) + /// f(filename, format, structure) or f(filename, format, structure, compression) + else if (args.size() >= 3) { - args.back() = structure_literal; - } - /// f(filename, format, 'auto', compression) - else if (args.size() == 4) - { - args[args.size() - 2] = structure_literal; + if (checkAndGetLiteralArgument(args[1], "format") == "auto") + args[1] = format_literal; + if (checkAndGetLiteralArgument(args[2], "structure") == "auto") + args[2] = structure_literal; } } diff --git a/src/TableFunctions/ITableFunctionFileLike.h b/src/TableFunctions/ITableFunctionFileLike.h index 4a316cedb9b..ba1b7d2bb3f 100644 --- a/src/TableFunctions/ITableFunctionFileLike.h +++ b/src/TableFunctions/ITableFunctionFileLike.h @@ -1,6 +1,7 @@ #pragma once #include +#include "Core/Names.h" #include "Parsers/IAST_fwd.h" namespace DB @@ -29,9 +30,11 @@ public: bool supportsReadingSubsetOfColumns(const ContextPtr & context) override; + NameSet getVirtualsToCheckBeforeUsingStructureHint() const override; + static size_t getMaxNumberOfArguments() { return 4; } - static void addColumnsStructureToArguments(ASTs & args, const String & structure, const ContextPtr &); + static void updateStructureAndFormatArgumentsIfNeeded(ASTs & args, const String & structure, const String & format, const ContextPtr &); protected: @@ -39,7 +42,7 @@ protected: virtual void parseArgumentsImpl(ASTs & args, const ContextPtr & context); virtual void parseFirstArguments(const ASTPtr & arg, const ContextPtr & context); - virtual String getFormatFromFirstArgument(); + virtual std::optional tryGetFormatFromFirstArgument(); String filename; String format = "auto"; diff --git a/src/TableFunctions/ITableFunctionXDBC.cpp b/src/TableFunctions/ITableFunctionXDBC.cpp index ca6d40a05a3..a5c16b3a5aa 100644 --- a/src/TableFunctions/ITableFunctionXDBC.cpp +++ b/src/TableFunctions/ITableFunctionXDBC.cpp @@ -153,17 +153,16 @@ ColumnsDescription ITableFunctionXDBC::getActualTableStructure(ContextPtr contex columns_info_uri.addQueryParameter("external_table_functions_use_nulls", toString(use_nulls)); Poco::Net::HTTPBasicCredentials credentials{}; - ReadWriteBufferFromHTTP buf( - columns_info_uri, - Poco::Net::HTTPRequest::HTTP_POST, - {}, - ConnectionTimeouts::getHTTPTimeouts( - context->getSettingsRef(), - context->getServerSettings().keep_alive_timeout), - credentials); + auto buf = BuilderRWBufferFromHTTP(columns_info_uri) + .withConnectionGroup(HTTPConnectionGroupType::STORAGE) + .withMethod(Poco::Net::HTTPRequest::HTTP_POST) + .withTimeouts(ConnectionTimeouts::getHTTPTimeouts( + context->getSettingsRef(), + context->getServerSettings().keep_alive_timeout)) + .create(credentials); std::string columns_info; - readStringBinary(columns_info, buf); + readStringBinary(columns_info, *buf); NamesAndTypesList columns = NamesAndTypesList::parse(columns_info); return ColumnsDescription{columns}; diff --git a/src/TableFunctions/TableFunctionAzureBlobStorage.cpp b/src/TableFunctions/TableFunctionAzureBlobStorage.cpp index b098cac5144..275cd2a9cbb 100644 --- a/src/TableFunctions/TableFunctionAzureBlobStorage.cpp +++ b/src/TableFunctions/TableFunctionAzureBlobStorage.cpp @@ -16,6 +16,7 @@ #include #include #include +#include #include #include "registerTableFunctions.h" #include @@ -58,7 +59,7 @@ void TableFunctionAzureBlobStorage::parseArgumentsImpl(ASTs & engine_args, const configuration.blobs_paths = {configuration.blob_path}; if (configuration.format == "auto") - configuration.format = FormatFactory::instance().getFormatFromFileName(configuration.blob_path, true); + configuration.format = FormatFactory::instance().tryGetFormatFromFileName(configuration.blob_path).value_or("auto"); } else { @@ -80,7 +81,7 @@ void TableFunctionAzureBlobStorage::parseArgumentsImpl(ASTs & engine_args, const configuration.blob_path = checkAndGetLiteralArgument(engine_args[2], "blobpath"); auto is_format_arg - = [](const std::string & s) -> bool { return s == "auto" || FormatFactory::instance().getAllFormats().contains(s); }; + = [](const std::string & s) -> bool { return s == "auto" || FormatFactory::instance().exists(s); }; if (engine_args.size() == 4) { @@ -155,7 +156,7 @@ void TableFunctionAzureBlobStorage::parseArgumentsImpl(ASTs & engine_args, const configuration.blobs_paths = {configuration.blob_path}; if (configuration.format == "auto") - configuration.format = FormatFactory::instance().getFormatFromFileName(configuration.blob_path, true); + configuration.format = FormatFactory::instance().tryGetFormatFromFileName(configuration.blob_path).value_or("auto"); } } @@ -174,15 +175,24 @@ void TableFunctionAzureBlobStorage::parseArguments(const ASTPtr & ast_function, parseArgumentsImpl(args, context); } -void TableFunctionAzureBlobStorage::addColumnsStructureToArguments(ASTs & args, const String & structure, const ContextPtr & context) +void TableFunctionAzureBlobStorage::updateStructureAndFormatArgumentsIfNeeded(ASTs & args, const String & structure, const String & format, const ContextPtr & context) { - if (tryGetNamedCollectionWithOverrides(args, context)) + if (auto collection = tryGetNamedCollectionWithOverrides(args, context)) { - /// In case of named collection, just add key-value pair "structure='...'" - /// at the end of arguments to override existed structure. - ASTs equal_func_args = {std::make_shared("structure"), std::make_shared(structure)}; - auto equal_func = makeASTFunction("equals", std::move(equal_func_args)); - args.push_back(equal_func); + /// In case of named collection, just add key-value pairs "format='...', structure='...'" + /// at the end of arguments to override existed format and structure with "auto" values. + if (collection->getOrDefault("format", "auto") == "auto") + { + ASTs format_equal_func_args = {std::make_shared("format"), std::make_shared(format)}; + auto format_equal_func = makeASTFunction("equals", std::move(format_equal_func_args)); + args.push_back(format_equal_func); + } + if (collection->getOrDefault("structure", "auto") == "auto") + { + ASTs structure_equal_func_args = {std::make_shared("structure"), std::make_shared(structure)}; + auto structure_equal_func = makeASTFunction("equals", std::move(structure_equal_func_args)); + args.push_back(structure_equal_func); + } } else { @@ -191,65 +201,126 @@ void TableFunctionAzureBlobStorage::addColumnsStructureToArguments(ASTs & args, "Storage Azure requires 3 to 7 arguments: " "AzureBlobStorage(connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure])"); + auto format_literal = std::make_shared(format); auto structure_literal = std::make_shared(structure); + for (auto & arg : args) + arg = evaluateConstantExpressionOrIdentifierAsLiteral(arg, context); + auto is_format_arg - = [](const std::string & s) -> bool { return s == "auto" || FormatFactory::instance().getAllFormats().contains(s); }; - + = [](const std::string & s) -> bool { return s == "auto" || FormatFactory::instance().exists(s); }; + /// (connection_string, container_name, blobpath) if (args.size() == 3) { - /// Add format=auto & compression=auto before structure argument. - args.push_back(std::make_shared("auto")); + args.push_back(format_literal); + /// Add compression = "auto" before structure argument. args.push_back(std::make_shared("auto")); args.push_back(structure_literal); } + /// (connection_string, container_name, blobpath, structure) or + /// (connection_string, container_name, blobpath, format) + /// We can distinguish them by looking at the 4-th argument: check if it's format name or not. else if (args.size() == 4) { auto fourth_arg = checkAndGetLiteralArgument(args[3], "format/account_name/structure"); + /// (..., format) -> (..., format, compression, structure) if (is_format_arg(fourth_arg)) { + if (fourth_arg == "auto") + args[3] = format_literal; /// Add compression=auto before structure argument. args.push_back(std::make_shared("auto")); args.push_back(structure_literal); } + /// (..., structure) -> (..., format, compression, structure) else { - args.back() = structure_literal; + auto structure_arg = args.back(); + args[3] = format_literal; + /// Add compression=auto before structure argument. + args.push_back(std::make_shared("auto")); + if (fourth_arg == "auto") + args.push_back(structure_literal); + else + args.push_back(structure_arg); } } + /// (connection_string, container_name, blobpath, format, compression) or + /// (storage_account_url, container_name, blobpath, account_name, account_key) + /// We can distinguish them by looking at the 4-th argument: check if it's format name or not. else if (args.size() == 5) { auto fourth_arg = checkAndGetLiteralArgument(args[3], "format/account_name"); - if (!is_format_arg(fourth_arg)) + /// (..., format, compression) -> (..., format, compression, structure) + if (is_format_arg(fourth_arg)) { - /// Add format=auto & compression=auto before structure argument. - args.push_back(std::make_shared("auto")); - args.push_back(std::make_shared("auto")); + if (fourth_arg == "auto") + args[3] = format_literal; + args.push_back(structure_literal); } - args.push_back(structure_literal); - } - else if (args.size() == 6) - { - auto fourth_arg = checkAndGetLiteralArgument(args[3], "format/account_name"); - if (!is_format_arg(fourth_arg)) + /// (..., account_name, account_key) -> (..., account_name, account_key, format, compression, structure) + else { + args.push_back(format_literal); /// Add compression=auto before structure argument. args.push_back(std::make_shared("auto")); args.push_back(structure_literal); } + } + /// (connection_string, container_name, blobpath, format, compression, structure) or + /// (storage_account_url, container_name, blobpath, account_name, account_key, structure) or + /// (storage_account_url, container_name, blobpath, account_name, account_key, format) + else if (args.size() == 6) + { + auto fourth_arg = checkAndGetLiteralArgument(args[3], "format/account_name"); + auto sixth_arg = checkAndGetLiteralArgument(args[5], "format/structure"); + + /// (..., format, compression, structure) + if (is_format_arg(fourth_arg)) + { + if (fourth_arg == "auto") + args[3] = format_literal; + if (checkAndGetLiteralArgument(args[5], "structure") == "auto") + args[5] = structure_literal; + } + /// (..., account_name, account_key, format) -> (..., account_name, account_key, format, compression, structure) + else if (is_format_arg(sixth_arg)) + { + if (sixth_arg == "auto") + args[5] = format_literal; + /// Add compression=auto before structure argument. + args.push_back(std::make_shared("auto")); + args.push_back(structure_literal); + } + /// (..., account_name, account_key, structure) -> (..., account_name, account_key, format, compression, structure) else { - args.back() = structure_literal; + auto structure_arg = args.back(); + args[5] = format_literal; + /// Add compression=auto before structure argument. + args.push_back(std::make_shared("auto")); + if (sixth_arg == "auto") + args.push_back(structure_literal); + else + args.push_back(structure_arg); } } + /// (storage_account_url, container_name, blobpath, account_name, account_key, format, compression) else if (args.size() == 7) { + /// (..., format, compression) -> (..., format, compression, structure) + if (checkAndGetLiteralArgument(args[5], "format") == "auto") + args[5] = format_literal; args.push_back(structure_literal); } + /// (storage_account_url, container_name, blobpath, account_name, account_key, format, compression, structure) else if (args.size() == 8) { - args.back() = structure_literal; + if (checkAndGetLiteralArgument(args[5], "format") == "auto") + args[5] = format_literal; + if (checkAndGetLiteralArgument(args[7], "structure") == "auto") + args[7] = structure_literal; } } } @@ -263,7 +334,9 @@ ColumnsDescription TableFunctionAzureBlobStorage::getActualTableStructure(Contex auto settings = StorageAzureBlob::createSettings(context); auto object_storage = std::make_unique("AzureBlobStorageTableFunction", std::move(client), std::move(settings), configuration.container); - return StorageAzureBlob::getTableStructureFromData(object_storage.get(), configuration, std::nullopt, context, false); + if (configuration.format == "auto") + return StorageAzureBlob::getTableStructureAndFormatFromData(object_storage.get(), configuration, std::nullopt, context).first; + return StorageAzureBlob::getTableStructureFromData(object_storage.get(), configuration, std::nullopt, context); } return parseColumnsListFromString(configuration.structure, context); @@ -276,8 +349,7 @@ bool TableFunctionAzureBlobStorage::supportsReadingSubsetOfColumns(const Context std::unordered_set TableFunctionAzureBlobStorage::getVirtualsToCheckBeforeUsingStructureHint() const { - auto virtual_column_names = StorageAzureBlob::getVirtualColumnNames(); - return {virtual_column_names.begin(), virtual_column_names.end()}; + return VirtualColumnUtils::getVirtualNamesForFileLikeStorage(); } StoragePtr TableFunctionAzureBlobStorage::executeImpl(const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/, bool is_insert_query) const diff --git a/src/TableFunctions/TableFunctionAzureBlobStorage.h b/src/TableFunctions/TableFunctionAzureBlobStorage.h index 1a221f60c55..9622881b417 100644 --- a/src/TableFunctions/TableFunctionAzureBlobStorage.h +++ b/src/TableFunctions/TableFunctionAzureBlobStorage.h @@ -55,7 +55,7 @@ public: virtual void parseArgumentsImpl(ASTs & args, const ContextPtr & context); - static void addColumnsStructureToArguments(ASTs & args, const String & structure, const ContextPtr & context); + static void updateStructureAndFormatArgumentsIfNeeded(ASTs & args, const String & structure, const String & format, const ContextPtr & context); protected: diff --git a/src/TableFunctions/TableFunctionAzureBlobStorageCluster.cpp b/src/TableFunctions/TableFunctionAzureBlobStorageCluster.cpp index 1c3b302a186..04dddca7672 100644 --- a/src/TableFunctions/TableFunctionAzureBlobStorageCluster.cpp +++ b/src/TableFunctions/TableFunctionAzureBlobStorageCluster.cpp @@ -21,9 +21,8 @@ StoragePtr TableFunctionAzureBlobStorageCluster::executeImpl( { StoragePtr storage; ColumnsDescription columns; - bool structure_argument_was_provided = configuration.structure != "auto"; - if (structure_argument_was_provided) + if (configuration.structure != "auto") { columns = parseColumnsListFromString(configuration.structure, context); } @@ -59,8 +58,7 @@ StoragePtr TableFunctionAzureBlobStorageCluster::executeImpl( StorageID(getDatabaseName(), table_name), columns, ConstraintsDescription{}, - context, - structure_argument_was_provided); + context); } storage->startup(); diff --git a/src/TableFunctions/TableFunctionExecutable.cpp b/src/TableFunctions/TableFunctionExecutable.cpp index 209446dc9dd..2c3802e8667 100644 --- a/src/TableFunctions/TableFunctionExecutable.cpp +++ b/src/TableFunctions/TableFunctionExecutable.cpp @@ -71,12 +71,12 @@ std::vector TableFunctionExecutable::skipAnalysisForArguments(const Quer const auto & table_function_node_arguments = table_function_node.getArguments().getNodes(); size_t table_function_node_arguments_size = table_function_node_arguments.size(); - if (table_function_node_arguments_size <= 3) + if (table_function_node_arguments_size <= 2) return {}; std::vector result_indexes; - result_indexes.reserve(table_function_node_arguments_size - 3); - for (size_t i = 3; i < table_function_node_arguments_size; ++i) + result_indexes.reserve(table_function_node_arguments_size - 2); + for (size_t i = 2; i < table_function_node_arguments_size; ++i) result_indexes.push_back(i); return result_indexes; diff --git a/src/TableFunctions/TableFunctionExplain.cpp b/src/TableFunctions/TableFunctionExplain.cpp index f993a9820cb..400fc81e6d4 100644 --- a/src/TableFunctions/TableFunctionExplain.cpp +++ b/src/TableFunctions/TableFunctionExplain.cpp @@ -1,4 +1,5 @@ #include +#include #include #include #include @@ -21,6 +22,7 @@ namespace ErrorCodes { extern const int LOGICAL_ERROR; extern const int BAD_ARGUMENTS; + extern const int UNEXPECTED_AST_STRUCTURE; } namespace @@ -103,11 +105,25 @@ void TableFunctionExplain::parseArguments(const ASTPtr & ast_function, ContextPt if (function->arguments->children.size() > 2) { - const auto & query_arg = function->arguments->children[2]; + const auto & subquery_arg = function->arguments->children[2]; + const auto * subquery = subquery_arg->as(); + + if (!subquery) + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Table function '{}' requires a subquery argument, got '{}'", + getName(), queryToString(subquery_arg)); + + if (subquery->children.empty()) + throw Exception(ErrorCodes::UNEXPECTED_AST_STRUCTURE, + "A subquery AST element must have a child"); + + const auto & query_arg = subquery->children[0]; + if (!query_arg->as()) throw Exception(ErrorCodes::BAD_ARGUMENTS, - "Table function '{}' requires a EXPLAIN SELECT query argument, got EXPLAIN '{}'", + "Table function '{}' requires a EXPLAIN's SELECT query argument, got '{}'", getName(), queryToString(query_arg)); + explain_query->setExplainedQuery(query_arg); } else if (kind != ASTExplainQuery::ExplainKind::CurrentTransaction) diff --git a/src/TableFunctions/TableFunctionFile.cpp b/src/TableFunctions/TableFunctionFile.cpp index 8a9dde374ec..28bf72e07fb 100644 --- a/src/TableFunctions/TableFunctionFile.cpp +++ b/src/TableFunctions/TableFunctionFile.cpp @@ -54,12 +54,12 @@ void TableFunctionFile::parseFirstArguments(const ASTPtr & arg, const ContextPtr throw Exception(ErrorCodes::BAD_ARGUMENTS, "The first argument of table function '{}' mush be path or file descriptor", getName()); } -String TableFunctionFile::getFormatFromFirstArgument() +std::optional TableFunctionFile::tryGetFormatFromFirstArgument() { if (fd >= 0) - return FormatFactory::instance().getFormatFromFileDescriptor(fd); + return FormatFactory::instance().tryGetFormatFromFileDescriptor(fd); else - return FormatFactory::instance().getFormatFromFileName(filename, true); + return FormatFactory::instance().tryGetFormatFromFileName(filename); } StoragePtr TableFunctionFile::getStorage(const String & source, @@ -104,19 +104,14 @@ ColumnsDescription TableFunctionFile::getActualTableStructure(ContextPtr context archive_info = StorageFile::getArchiveInfo(path_to_archive, filename, context->getUserFilesPath(), context, total_bytes_to_read); + if (format == "auto") + return StorageFile::getTableStructureAndFormatFromFile(paths, compression_method, std::nullopt, context, archive_info).first; return StorageFile::getTableStructureFromFile(format, paths, compression_method, std::nullopt, context, archive_info); } - return parseColumnsListFromString(structure, context); } -std::unordered_set TableFunctionFile::getVirtualsToCheckBeforeUsingStructureHint() const -{ - auto virtual_column_names = StorageFile::getVirtualColumnNames(); - return {virtual_column_names.begin(), virtual_column_names.end()}; -} - void registerTableFunctionFile(TableFunctionFactory & factory) { factory.registerFunction(); diff --git a/src/TableFunctions/TableFunctionFile.h b/src/TableFunctions/TableFunctionFile.h index fd4fedc21a9..aaf5ba8873a 100644 --- a/src/TableFunctions/TableFunctionFile.h +++ b/src/TableFunctions/TableFunctionFile.h @@ -22,13 +22,11 @@ public: ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override; - std::unordered_set getVirtualsToCheckBeforeUsingStructureHint() const override; - protected: int fd = -1; String path_to_archive; void parseFirstArguments(const ASTPtr & arg, const ContextPtr & context) override; - String getFormatFromFirstArgument() override; + std::optional tryGetFormatFromFirstArgument() override; private: StoragePtr getStorage( diff --git a/src/TableFunctions/TableFunctionFileCluster.cpp b/src/TableFunctions/TableFunctionFileCluster.cpp index 843909e2a58..3e53349b022 100644 --- a/src/TableFunctions/TableFunctionFileCluster.cpp +++ b/src/TableFunctions/TableFunctionFileCluster.cpp @@ -43,8 +43,7 @@ StoragePtr TableFunctionFileCluster::getStorage( compression_method, StorageID(getDatabaseName(), table_name), columns, - ConstraintsDescription{}, - structure != "auto"); + ConstraintsDescription{}); } return storage; diff --git a/src/TableFunctions/TableFunctionFormat.cpp b/src/TableFunctions/TableFunctionFormat.cpp index 4b6d0f70c0a..ad2a142a140 100644 --- a/src/TableFunctions/TableFunctionFormat.cpp +++ b/src/TableFunctions/TableFunctionFormat.cpp @@ -33,7 +33,9 @@ namespace ErrorCodes namespace { -/* format(format_name, data) - ... +/* format(format_name, structure, data) - parses data according to the specified format and structure. + * format(format_name, data) - infers the schema from the data and parses it according to the specified format. + * format(data) - detects the format, infers the schema and parses data according to inferred format and structure. */ class TableFunctionFormat : public ITableFunction { @@ -49,11 +51,11 @@ private: ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override; void parseArguments(const ASTPtr & ast_function, ContextPtr context) override; - Block parseData(ColumnsDescription columns, ContextPtr context) const; + Block parseData(const ColumnsDescription & columns, const String & format_name, const ContextPtr & context) const; - String format; - String data; + String format = "auto"; String structure = "auto"; + String data; }; void TableFunctionFormat::parseArguments(const ASTPtr & ast_function, ContextPtr context) @@ -65,14 +67,15 @@ void TableFunctionFormat::parseArguments(const ASTPtr & ast_function, ContextPtr ASTs & args = args_func.at(0)->children; - if (args.size() != 2 && args.size() != 3) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Table function '{}' requires 2 or 3 arguments: format, [structure], data", getName()); + if (args.empty() || args.size() > 3) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Table function '{}' requires from 1 to 3 arguments: [format, [structure]], data", getName()); for (auto & arg : args) arg = evaluateConstantExpressionOrIdentifierAsLiteral(arg, context); - format = checkAndGetLiteralArgument(args[0], "format"); data = checkAndGetLiteralArgument(args.back(), "data"); + if (args.size() > 1) + format = checkAndGetLiteralArgument(args[0], "format"); if (args.size() == 3) structure = checkAndGetLiteralArgument(args[1], "structure"); } @@ -82,19 +85,21 @@ ColumnsDescription TableFunctionFormat::getActualTableStructure(ContextPtr conte if (structure == "auto") { SingleReadBufferIterator read_buffer_iterator(std::make_unique(data)); - return readSchemaFromFormat(format, std::nullopt, read_buffer_iterator, false, context); + if (format == "auto") + return detectFormatAndReadSchema(std::nullopt, read_buffer_iterator, context).first; + return readSchemaFromFormat(format, std::nullopt, read_buffer_iterator, context); } return parseColumnsListFromString(structure, context); } -Block TableFunctionFormat::parseData(ColumnsDescription columns, ContextPtr context) const +Block TableFunctionFormat::parseData(const ColumnsDescription & columns, const String & format_name, const ContextPtr & context) const { Block block; for (const auto & name_and_type : columns.getAllPhysical()) block.insert({name_and_type.type->createColumn(), name_and_type.type, name_and_type.name}); auto read_buf = std::make_unique(data); - auto input_format = context->getInputFormat(format, *read_buf, block, context->getSettingsRef().max_block_size); + auto input_format = context->getInputFormat(format_name, *read_buf, block, context->getSettingsRef().max_block_size); QueryPipelineBuilder builder; builder.init(Pipe(input_format)); if (columns.hasDefaults()) @@ -120,10 +125,24 @@ Block TableFunctionFormat::parseData(ColumnsDescription columns, ContextPtr cont return concatenateBlocks(blocks); } -StoragePtr TableFunctionFormat::executeImpl(const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/, bool is_insert_query) const +StoragePtr TableFunctionFormat::executeImpl(const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/, bool /*is_insert_query*/) const { - auto columns = getActualTableStructure(context, is_insert_query); - Block res_block = parseData(columns, context); + ColumnsDescription columns; + String format_name = format; + if (structure == "auto") + { + SingleReadBufferIterator read_buffer_iterator(std::make_unique(data)); + if (format_name == "auto") + std::tie(columns, format_name) = detectFormatAndReadSchema(std::nullopt, read_buffer_iterator, context); + else + columns = readSchemaFromFormat(format, std::nullopt, read_buffer_iterator, context); + } + else + { + columns = parseColumnsListFromString(structure, context); + } + + Block res_block = parseData(columns, format_name, context); auto res = std::make_shared(StorageID(getDatabaseName(), table_name), columns, res_block); res->startup(); return res; diff --git a/src/TableFunctions/TableFunctionHDFS.cpp b/src/TableFunctions/TableFunctionHDFS.cpp index 8d48a7ba30e..45829245551 100644 --- a/src/TableFunctions/TableFunctionHDFS.cpp +++ b/src/TableFunctions/TableFunctionHDFS.cpp @@ -33,18 +33,14 @@ ColumnsDescription TableFunctionHDFS::getActualTableStructure(ContextPtr context if (structure == "auto") { context->checkAccess(getSourceAccessType()); + if (format == "auto") + return StorageHDFS::getTableStructureAndFormatFromData(filename, compression_method, context).first; return StorageHDFS::getTableStructureFromData(format, filename, compression_method, context); } return parseColumnsListFromString(structure, context); } -std::unordered_set TableFunctionHDFS::getVirtualsToCheckBeforeUsingStructureHint() const -{ - auto virtual_column_names = StorageHDFS::getVirtualColumnNames(); - return {virtual_column_names.begin(), virtual_column_names.end()}; -} - void registerTableFunctionHDFS(TableFunctionFactory & factory) { factory.registerFunction(); diff --git a/src/TableFunctions/TableFunctionHDFS.h b/src/TableFunctions/TableFunctionHDFS.h index 3a719496b26..f1c0b8a7eae 100644 --- a/src/TableFunctions/TableFunctionHDFS.h +++ b/src/TableFunctions/TableFunctionHDFS.h @@ -36,8 +36,6 @@ public: ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override; - std::unordered_set getVirtualsToCheckBeforeUsingStructureHint() const override; - private: StoragePtr getStorage( const String & source, const String & format_, const ColumnsDescription & columns, ContextPtr global_context, diff --git a/src/TableFunctions/TableFunctionHDFSCluster.cpp b/src/TableFunctions/TableFunctionHDFSCluster.cpp index 6fb7ed0fce5..57ce6d2b9ff 100644 --- a/src/TableFunctions/TableFunctionHDFSCluster.cpp +++ b/src/TableFunctions/TableFunctionHDFSCluster.cpp @@ -45,8 +45,7 @@ StoragePtr TableFunctionHDFSCluster::getStorage( format, columns, ConstraintsDescription{}, - compression_method, - structure != "auto"); + compression_method); } return storage; } diff --git a/src/TableFunctions/TableFunctionMerge.cpp b/src/TableFunctions/TableFunctionMerge.cpp index e7b5a34958f..ff1459ca34b 100644 --- a/src/TableFunctions/TableFunctionMerge.cpp +++ b/src/TableFunctions/TableFunctionMerge.cpp @@ -88,26 +88,38 @@ void TableFunctionMerge::parseArguments(const ASTPtr & ast_function, ContextPtr if (args_func.size() != 1) throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, - "Table function 'merge' requires exactly 2 arguments - name " - "of source database and regexp for table names."); + "Table function 'merge' requires from 1 to 2 parameters: " + "merge(['db_name',] 'tables_regexp')"); ASTs & args = args_func.at(0)->children; - if (args.size() != 2) + if (args.size() == 1) + { + database_is_regexp = false; + source_database_name_or_regexp = context->getCurrentDatabase(); + + args[0] = evaluateConstantExpressionAsLiteral(args[0], context); + source_table_regexp = checkAndGetLiteralArgument(args[0], "table_name_regexp"); + } + else if (args.size() == 2) + { + auto [is_regexp, database_ast] = StorageMerge::evaluateDatabaseName(args[0], context); + + database_is_regexp = is_regexp; + + if (!is_regexp) + args[0] = database_ast; + source_database_name_or_regexp = checkAndGetLiteralArgument(database_ast, "database_name"); + + args[1] = evaluateConstantExpressionAsLiteral(args[1], context); + source_table_regexp = checkAndGetLiteralArgument(args[1], "table_name_regexp"); + } + else + { throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, - "Table function 'merge' requires exactly 2 arguments - name " - "of source database and regexp for table names."); - - auto [is_regexp, database_ast] = StorageMerge::evaluateDatabaseName(args[0], context); - - database_is_regexp = is_regexp; - - if (!is_regexp) - args[0] = database_ast; - source_database_name_or_regexp = checkAndGetLiteralArgument(database_ast, "database_name"); - - args[1] = evaluateConstantExpressionAsLiteral(args[1], context); - source_table_regexp = checkAndGetLiteralArgument(args[1], "table_name_regexp"); + "Table function 'merge' requires from 1 to 2 parameters: " + "merge(['db_name',] 'tables_regexp')"); + } } diff --git a/src/TableFunctions/TableFunctionMergeTreeIndex.cpp b/src/TableFunctions/TableFunctionMergeTreeIndex.cpp new file mode 100644 index 00000000000..435ed4bdf0d --- /dev/null +++ b/src/TableFunctions/TableFunctionMergeTreeIndex.cpp @@ -0,0 +1,204 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + extern const int BAD_ARGUMENTS; + extern const int LOGICAL_ERROR; +} + +class TableFunctionMergeTreeIndex : public ITableFunction +{ +public: + static constexpr auto name = "mergeTreeIndex"; + std::string getName() const override { return name; } + + void parseArguments(const ASTPtr & ast_function, ContextPtr context) override; + ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override; + +private: + StoragePtr executeImpl( + const ASTPtr & ast_function, + ContextPtr context, + const std::string & table_name, + ColumnsDescription cached_columns, + bool is_insert_query) const override; + + const char * getStorageTypeName() const override { return "MergeTreeIndex"; } + + StorageID source_table_id{StorageID::createEmpty()}; + bool with_marks = false; +}; + +void TableFunctionMergeTreeIndex::parseArguments(const ASTPtr & ast_function, ContextPtr context) +{ + ASTs & args_func = ast_function->children; + if (args_func.size() != 1) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Table function ({}) must have arguments.", quoteString(getName())); + + ASTs & args = args_func.at(0)->children; + if (args.size() < 2 || args.size() > 3) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Table function '{}' must have 2 or 3 arguments, got: {}", getName(), args.size()); + + args[0] = evaluateConstantExpressionForDatabaseName(args[0], context); + args[1] = evaluateConstantExpressionOrIdentifierAsLiteral(args[1], context); + + auto database = checkAndGetLiteralArgument(args[0], "database"); + auto table = checkAndGetLiteralArgument(args[1], "table"); + + ASTs rest_args(args.begin() + 2, args.end()); + if (!rest_args.empty()) + { + auto params = getParamsMapFromAST(rest_args, context); + auto param = params.extract("with_marks"); + + if (!param.empty()) + { + auto & value = param.mapped(); + if (value.getType() != Field::Types::Bool && value.getType() != Field::Types::UInt64) + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Table function '{}' expected bool flag for 'with_marks' argument", getName()); + + if (value.getType() == Field::Types::Bool) + with_marks = value.get(); + else + with_marks = value.get(); + } + + if (!params.empty()) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Unexpected arguments '{}' for table function '{}'", + fmt::join(params | boost::adaptors::map_keys, ","), getName()); + } + } + + source_table_id = StorageID{database, table}; +} + +static NameSet getAllPossibleStreamNames( + const NameAndTypePair & column, + const MergeTreeDataPartsVector & data_parts) +{ + NameSet all_streams; + + /// Add the stream with the name of column + /// because it may be abcent in serialization streams (e.g. for Tuple type) + /// but in compact parts we write only marks for whole columns, not subsubcolumns. + auto main_stream_name = escapeForFileName(column.name); + all_streams.insert(Nested::concatenateName(main_stream_name, "mark")); + + auto callback = [&](const auto & substream_path) + { + auto stream_name = ISerialization::getFileNameForStream(column, substream_path); + all_streams.insert(Nested::concatenateName(stream_name, "mark")); + }; + + auto serialization = IDataType::getSerialization(column); + serialization->enumerateStreams(callback); + + if (!column.type->supportsSparseSerialization()) + return all_streams; + + /// If there is at least one part with sparse serialization + /// add columns with marks of its substreams to the table. + for (const auto & part : data_parts) + { + serialization = part->tryGetSerialization(column.name); + if (serialization && serialization->getKind() == ISerialization::Kind::SPARSE) + { + serialization->enumerateStreams(callback); + break; + } + } + + return all_streams; +} + +ColumnsDescription TableFunctionMergeTreeIndex::getActualTableStructure(ContextPtr context, bool /*is_insert_query*/) const +{ + auto source_table = DatabaseCatalog::instance().getTable(source_table_id, context); + auto metadata_snapshot = source_table->getInMemoryMetadataPtr(); + + ColumnsDescription columns; + for (const auto & column : StorageMergeTreeIndex::virtuals_sample_block) + columns.add({column.name, column.type}); + + for (const auto & column : metadata_snapshot->getPrimaryKey().sample_block) + columns.add({column.name, column.type}); + + if (with_marks) + { + auto element_type = std::make_shared(std::make_shared()); + auto mark_type = std::make_shared( + DataTypes{element_type, element_type}, + Names{"offset_in_compressed_file", "offset_in_decompressed_block"}); + + const auto * merge_tree = dynamic_cast(source_table.get()); + if (!merge_tree) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Table function mergeTreeIndex expected MergeTree table, got: {}", source_table->getName()); + + auto data_parts = merge_tree->getDataPartsVectorForInternalUsage(); + auto columns_list = Nested::convertToSubcolumns(metadata_snapshot->getColumns().getAllPhysical()); + + for (const auto & column : columns_list) + { + auto all_streams = getAllPossibleStreamNames(column, data_parts); + for (const auto & stream_name : all_streams) + { + /// There may be shared substreams of columns (e.g. for Nested type) + if (!columns.has(stream_name)) + columns.add({stream_name, mark_type}); + } + } + } + + return columns; +} + +StoragePtr TableFunctionMergeTreeIndex::executeImpl( + const ASTPtr & /*ast_function*/, + ContextPtr context, + const std::string & table_name, + ColumnsDescription /*cached_columns*/, + bool is_insert_query) const +{ + auto source_table = DatabaseCatalog::instance().getTable(source_table_id, context); + auto columns = getActualTableStructure(context, is_insert_query); + + StorageID storage_id(getDatabaseName(), table_name); + auto res = std::make_shared(std::move(storage_id), std::move(source_table), std::move(columns), with_marks); + + res->startup(); + return res; +} + +void registerTableFunctionMergeTreeIndex(TableFunctionFactory & factory) +{ + factory.registerFunction( + { + .documentation = + { + .description = "Represents the contents of index and marks files of MergeTree tables. It can be used for introspection", + .examples = {{"mergeTreeIndex", "SELECT * FROM mergeTreeIndex(currentDatabase(), mt_table, with_marks = true)", ""}}, + .categories = {"Other"}, + }, + .allow_readonly = true, + }); +} + +} diff --git a/src/TableFunctions/TableFunctionRemote.cpp b/src/TableFunctions/TableFunctionRemote.cpp index 1f42ce4ba30..d7774bb6478 100644 --- a/src/TableFunctions/TableFunctionRemote.cpp +++ b/src/TableFunctions/TableFunctionRemote.cpp @@ -317,7 +317,7 @@ StoragePtr TableFunctionRemote::executeImpl(const ASTPtr & /*ast_function*/, Con String{}, String{}, DistributedSettings{}, - false, + LoadingStrictnessLevel::CREATE, cluster) : std::make_shared( StorageID(getDatabaseName(), table_name), @@ -332,7 +332,7 @@ StoragePtr TableFunctionRemote::executeImpl(const ASTPtr & /*ast_function*/, Con String{}, String{}, DistributedSettings{}, - false, + LoadingStrictnessLevel::CREATE, cluster); res->startup(); diff --git a/src/TableFunctions/TableFunctionS3.cpp b/src/TableFunctions/TableFunctionS3.cpp index a9c5a5c99f0..a8c100ebd44 100644 --- a/src/TableFunctions/TableFunctionS3.cpp +++ b/src/TableFunctions/TableFunctionS3.cpp @@ -16,6 +16,7 @@ #include #include #include +#include #include #include "registerTableFunctions.h" #include @@ -61,12 +62,11 @@ void TableFunctionS3::parseArgumentsImpl(ASTs & args, const ContextPtr & context if (configuration.format == "auto") { String file_path = named_collection->getOrDefault("filename", Poco::URI(named_collection->get("url")).getPath()); - configuration.format = FormatFactory::instance().getFormatFromFileName(file_path, true); + configuration.format = FormatFactory::instance().tryGetFormatFromFileName(file_path).value_or("auto"); } } else { - size_t count = StorageURL::evalArgsAndCollectHeaders(args, configuration.headers_from_ast, context); if (count == 0 || count > 7) @@ -101,7 +101,7 @@ void TableFunctionS3::parseArgumentsImpl(ASTs & args, const ContextPtr & context no_sign_request = true; args_to_idx = {{"format", 2}}; } - else if (second_arg == "auto" || FormatFactory::instance().getAllFormats().contains(second_arg)) + else if (second_arg == "auto" || FormatFactory::instance().exists(second_arg)) args_to_idx = {{"format", 1}, {"structure", 2}}; else args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}}; @@ -120,14 +120,14 @@ void TableFunctionS3::parseArgumentsImpl(ASTs & args, const ContextPtr & context no_sign_request = true; args_to_idx = {{"format", 2}, {"structure", 3}}; } - else if (second_arg == "auto" || FormatFactory::instance().getAllFormats().contains(second_arg)) + else if (second_arg == "auto" || FormatFactory::instance().exists(second_arg)) { args_to_idx = {{"format", 1}, {"structure", 2}, {"compression_method", 3}}; } else { auto fourth_arg = checkAndGetLiteralArgument(args[3], "format/session_token"); - if (fourth_arg == "auto" || FormatFactory::instance().getAllFormats().contains(fourth_arg)) + if (fourth_arg == "auto" || FormatFactory::instance().exists(fourth_arg)) { args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"format", 3}}; } @@ -154,7 +154,7 @@ void TableFunctionS3::parseArgumentsImpl(ASTs & args, const ContextPtr & context else { auto fourth_arg = checkAndGetLiteralArgument(args[3], "format/session_token"); - if (fourth_arg == "auto" || FormatFactory::instance().getAllFormats().contains(fourth_arg)) + if (fourth_arg == "auto" || FormatFactory::instance().exists(fourth_arg)) { args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"format", 3}, {"structure", 4}}; } @@ -171,7 +171,7 @@ void TableFunctionS3::parseArgumentsImpl(ASTs & args, const ContextPtr & context else if (count == 6) { auto fourth_arg = checkAndGetLiteralArgument(args[3], "format/session_token"); - if (fourth_arg == "auto" || FormatFactory::instance().getAllFormats().contains(fourth_arg)) + if (fourth_arg == "auto" || FormatFactory::instance().exists(fourth_arg)) { args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"format", 3}, {"structure", 4}, {"compression_method", 5}}; } @@ -216,7 +216,7 @@ void TableFunctionS3::parseArgumentsImpl(ASTs & args, const ContextPtr & context configuration.auth_settings.no_sign_request = no_sign_request; if (configuration.format == "auto") - configuration.format = FormatFactory::instance().getFormatFromFileName(Poco::URI(url).getPath(), true); + configuration.format = FormatFactory::instance().tryGetFormatFromFileName(Poco::URI(url).getPath()).value_or("auto"); } configuration.keys = {configuration.url.key}; @@ -238,15 +238,24 @@ void TableFunctionS3::parseArguments(const ASTPtr & ast_function, ContextPtr con parseArgumentsImpl(args, context); } -void TableFunctionS3::addColumnsStructureToArguments(ASTs & args, const String & structure, const ContextPtr & context) +void TableFunctionS3::updateStructureAndFormatArgumentsIfNeeded(ASTs & args, const String & structure, const String & format, const ContextPtr & context) { - if (tryGetNamedCollectionWithOverrides(args, context)) + if (auto collection = tryGetNamedCollectionWithOverrides(args, context)) { - /// In case of named collection, just add key-value pair "structure='...'" - /// at the end of arguments to override existed structure. - ASTs equal_func_args = {std::make_shared("structure"), std::make_shared(structure)}; - auto equal_func = makeASTFunction("equals", std::move(equal_func_args)); - args.push_back(equal_func); + /// In case of named collection, just add key-value pairs "format='...', structure='...'" + /// at the end of arguments to override existed format and structure with "auto" values. + if (collection->getOrDefault("format", "auto") == "auto") + { + ASTs format_equal_func_args = {std::make_shared("format"), std::make_shared(format)}; + auto format_equal_func = makeASTFunction("equals", std::move(format_equal_func_args)); + args.push_back(format_equal_func); + } + if (collection->getOrDefault("structure", "auto") == "auto") + { + ASTs structure_equal_func_args = {std::make_shared("structure"), std::make_shared(structure)}; + auto structure_equal_func = makeASTFunction("equals", std::move(structure_equal_func_args)); + args.push_back(structure_equal_func); + } } else { @@ -256,23 +265,25 @@ void TableFunctionS3::addColumnsStructureToArguments(ASTs & args, const String & if (count == 0 || count > getMaxNumberOfArguments()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected 1 to {} arguments in table function, got {}", getMaxNumberOfArguments(), count); + auto format_literal = std::make_shared(format); auto structure_literal = std::make_shared(structure); - /// s3(s3_url) + /// s3(s3_url) -> s3(s3_url, format, structure) if (count == 1) { - /// Add format=auto before structure argument. - args.push_back(std::make_shared("auto")); + args.push_back(format_literal); args.push_back(structure_literal); } - /// s3(s3_url, format) or s3(s3_url, NOSIGN) + /// s3(s3_url, format) -> s3(s3_url, format, structure) or + /// s3(s3_url, NOSIGN) -> s3(s3_url, NOSIGN, format, structure) /// We can distinguish them by looking at the 2-nd argument: check if it's NOSIGN or not. else if (count == 2) { auto second_arg = checkAndGetLiteralArgument(args[1], "format/NOSIGN"); - /// If there is NOSIGN, add format=auto before structure. if (boost::iequals(second_arg, "NOSIGN")) - args.push_back(std::make_shared("auto")); + args.push_back(format_literal); + else if (second_arg == "auto") + args.back() = format_literal; args.push_back(structure_literal); } /// s3(source, format, structure) or @@ -282,18 +293,25 @@ void TableFunctionS3::addColumnsStructureToArguments(ASTs & args, const String & else if (count == 3) { auto second_arg = checkAndGetLiteralArgument(args[1], "format/NOSIGN"); + /// s3(source, NOSIGN, format) -> s3(source, NOSIGN, format, structure) if (boost::iequals(second_arg, "NOSIGN")) { + if (checkAndGetLiteralArgument(args[2], "format") == "auto") + args.back() = format_literal; args.push_back(structure_literal); } - else if (second_arg == "auto" || FormatFactory::instance().getAllFormats().contains(second_arg)) + /// s3(source, format, structure) + else if (second_arg == "auto" || FormatFactory::instance().exists(second_arg)) { - args[count - 1] = structure_literal; + if (second_arg == "auto") + args[1] = format_literal; + if (checkAndGetLiteralArgument(args[2], "structure") == "auto") + args[2] = structure_literal; } + /// s3(source, access_key_id, access_key_id) -> s3(source, access_key_id, access_key_id, format, structure) else { - /// Add format=auto before structure argument. - args.push_back(std::make_shared("auto")); + args.push_back(format_literal); args.push_back(structure_literal); } } @@ -304,16 +322,27 @@ void TableFunctionS3::addColumnsStructureToArguments(ASTs & args, const String & else if (count == 4) { auto second_arg = checkAndGetLiteralArgument(args[1], "format/NOSIGN"); + /// s3(source, NOSIGN, format, structure) if (boost::iequals(second_arg, "NOSIGN")) { - args[count - 1] = structure_literal; + if (checkAndGetLiteralArgument(args[2], "format") == "auto") + args[2] = format_literal; + if (checkAndGetLiteralArgument(args[3], "structure") == "auto") + args[3] = structure_literal; } - else if (second_arg == "auto" || FormatFactory::instance().getAllFormats().contains(second_arg)) + /// s3(source, format, structure, compression_method) + else if (second_arg == "auto" || FormatFactory::instance().exists(second_arg)) { - args[count - 2] = structure_literal; + if (second_arg == "auto") + args[1] = format_literal; + if (checkAndGetLiteralArgument(args[2], "structure") == "auto") + args[2] = structure_literal; } + /// s3(source, access_key_id, access_key_id, format) -> s3(source, access_key_id, access_key_id, format, structure) else { + if (checkAndGetLiteralArgument(args[3], "format") == "auto") + args[3] = format_literal; args.push_back(structure_literal); } } @@ -323,19 +352,30 @@ void TableFunctionS3::addColumnsStructureToArguments(ASTs & args, const String & else if (count == 5) { auto sedond_arg = checkAndGetLiteralArgument(args[1], "format/NOSIGN"); + /// s3(source, NOSIGN, format, structure, compression_method) if (boost::iequals(sedond_arg, "NOSIGN")) { - args[count - 2] = structure_literal; + if (checkAndGetLiteralArgument(args[2], "format") == "auto") + args[2] = format_literal; + if (checkAndGetLiteralArgument(args[3], "structure") == "auto") + args[3] = structure_literal; } + /// s3(source, access_key_id, access_key_id, format, structure) else { - args[count - 1] = structure_literal; + if (checkAndGetLiteralArgument(args[3], "format") == "auto") + args[3] = format_literal; + if (checkAndGetLiteralArgument(args[4], "structure") == "auto") + args[4] = structure_literal; } } /// s3(source, access_key_id, secret_access_key, format, structure, compression) else if (count == 6) { - args[count - 2] = structure_literal; + if (checkAndGetLiteralArgument(args[3], "format") == "auto") + args[3] = format_literal; + if (checkAndGetLiteralArgument(args[4], "structure") == "auto") + args[4] = structure_literal; } } } @@ -346,6 +386,9 @@ ColumnsDescription TableFunctionS3::getActualTableStructure(ContextPtr context, { context->checkAccess(getSourceAccessType()); configuration.update(context); + if (configuration.format == "auto") + return StorageS3::getTableStructureAndFormatFromData(configuration, std::nullopt, context).first; + return StorageS3::getTableStructureFromData(configuration, std::nullopt, context); } @@ -359,8 +402,7 @@ bool TableFunctionS3::supportsReadingSubsetOfColumns(const ContextPtr & context) std::unordered_set TableFunctionS3::getVirtualsToCheckBeforeUsingStructureHint() const { - auto virtual_column_names = StorageS3::getVirtualColumnNames(); - return {virtual_column_names.begin(), virtual_column_names.end()}; + return VirtualColumnUtils::getVirtualNamesForFileLikeStorage(); } StoragePtr TableFunctionS3::executeImpl(const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns, bool /*is_insert_query*/) const diff --git a/src/TableFunctions/TableFunctionS3.h b/src/TableFunctions/TableFunctionS3.h index fa73c1d313e..00ca36c6653 100644 --- a/src/TableFunctions/TableFunctionS3.h +++ b/src/TableFunctions/TableFunctionS3.h @@ -57,7 +57,7 @@ public: virtual void parseArgumentsImpl(ASTs & args, const ContextPtr & context); - static void addColumnsStructureToArguments(ASTs & args, const String & structure, const ContextPtr & context); + static void updateStructureAndFormatArgumentsIfNeeded(ASTs & args, const String & structure, const String & format, const ContextPtr & context); protected: diff --git a/src/TableFunctions/TableFunctionS3Cluster.cpp b/src/TableFunctions/TableFunctionS3Cluster.cpp index ce96f7f580b..e727c4e4c89 100644 --- a/src/TableFunctions/TableFunctionS3Cluster.cpp +++ b/src/TableFunctions/TableFunctionS3Cluster.cpp @@ -21,9 +21,8 @@ StoragePtr TableFunctionS3Cluster::executeImpl( { StoragePtr storage; ColumnsDescription columns; - bool structure_argument_was_provided = configuration.structure != "auto"; - if (structure_argument_was_provided) + if (configuration.structure != "auto") { columns = parseColumnsListFromString(configuration.structure, context); } @@ -53,8 +52,7 @@ StoragePtr TableFunctionS3Cluster::executeImpl( StorageID(getDatabaseName(), table_name), columns, ConstraintsDescription{}, - context, - structure_argument_was_provided); + context); } storage->startup(); diff --git a/src/TableFunctions/TableFunctionURL.cpp b/src/TableFunctions/TableFunctionURL.cpp index aa535991d65..2bdc0b449e0 100644 --- a/src/TableFunctions/TableFunctionURL.cpp +++ b/src/TableFunctions/TableFunctionURL.cpp @@ -55,7 +55,7 @@ void TableFunctionURL::parseArgumentsImpl(ASTs & args, const ContextPtr & contex format = configuration.format; if (format == "auto") - format = FormatFactory::instance().getFormatFromFileName(Poco::URI(filename).getPath(), true); + format = FormatFactory::instance().tryGetFormatFromFileName(Poco::URI(filename).getPath()).value_or("auto"); StorageURL::evalArgsAndCollectHeaders(args, configuration.headers, context); } @@ -78,15 +78,24 @@ void TableFunctionURL::parseArgumentsImpl(ASTs & args, const ContextPtr & contex } } -void TableFunctionURL::addColumnsStructureToArguments(ASTs & args, const String & desired_structure, const ContextPtr & context) +void TableFunctionURL::updateStructureAndFormatArgumentsIfNeeded(ASTs & args, const String & structure_, const String & format_, const ContextPtr & context) { - if (tryGetNamedCollectionWithOverrides(args, context)) + if (auto collection = tryGetNamedCollectionWithOverrides(args, context)) { - /// In case of named collection, just add key-value pair "structure='...'" - /// at the end of arguments to override existed structure. - ASTs equal_func_args = {std::make_shared("structure"), std::make_shared(desired_structure)}; - auto equal_func = makeASTFunction("equals", std::move(equal_func_args)); - args.push_back(equal_func); + /// In case of named collection, just add key-value pairs "format='...', structure='...'" + /// at the end of arguments to override existed format and structure with "auto" values. + if (collection->getOrDefault("format", "auto") == "auto") + { + ASTs format_equal_func_args = {std::make_shared("format"), std::make_shared(format_)}; + auto format_equal_func = makeASTFunction("equals", std::move(format_equal_func_args)); + args.push_back(format_equal_func); + } + if (collection->getOrDefault("structure", "auto") == "auto") + { + ASTs structure_equal_func_args = {std::make_shared("structure"), std::make_shared(structure_)}; + auto structure_equal_func = makeASTFunction("equals", std::move(structure_equal_func_args)); + args.push_back(structure_equal_func); + } } else { @@ -101,7 +110,7 @@ void TableFunctionURL::addColumnsStructureToArguments(ASTs & args, const String args.pop_back(); } - ITableFunctionFileLike::addColumnsStructureToArguments(args, desired_structure, context); + ITableFunctionFileLike::updateStructureAndFormatArgumentsIfNeeded(args, structure_, format_, context); if (headers_ast) args.push_back(headers_ast); @@ -131,6 +140,14 @@ ColumnsDescription TableFunctionURL::getActualTableStructure(ContextPtr context, if (structure == "auto") { context->checkAccess(getSourceAccessType()); + if (format == "auto") + return StorageURL::getTableStructureAndFormatFromData( + filename, + chooseCompressionMethod(Poco::URI(filename).getPath(), compression_method), + configuration.headers, + std::nullopt, + context).first; + return StorageURL::getTableStructureFromData(format, filename, chooseCompressionMethod(Poco::URI(filename).getPath(), compression_method), @@ -142,15 +159,9 @@ ColumnsDescription TableFunctionURL::getActualTableStructure(ContextPtr context, return parseColumnsListFromString(structure, context); } -std::unordered_set TableFunctionURL::getVirtualsToCheckBeforeUsingStructureHint() const +std::optional TableFunctionURL::tryGetFormatFromFirstArgument() { - auto virtual_column_names = StorageURL::getVirtualColumnNames(); - return {virtual_column_names.begin(), virtual_column_names.end()}; -} - -String TableFunctionURL::getFormatFromFirstArgument() -{ - return FormatFactory::instance().getFormatFromFileName(Poco::URI(filename).getPath(), true); + return FormatFactory::instance().tryGetFormatFromFileName(Poco::URI(filename).getPath()); } void registerTableFunctionURL(TableFunctionFactory & factory) diff --git a/src/TableFunctions/TableFunctionURL.h b/src/TableFunctions/TableFunctionURL.h index bf417f950c0..a1efddb84c6 100644 --- a/src/TableFunctions/TableFunctionURL.h +++ b/src/TableFunctions/TableFunctionURL.h @@ -34,9 +34,7 @@ public: ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override; - static void addColumnsStructureToArguments(ASTs & args, const String & desired_structure, const ContextPtr & context); - - std::unordered_set getVirtualsToCheckBeforeUsingStructureHint() const override; + static void updateStructureAndFormatArgumentsIfNeeded(ASTs & args, const String & structure_, const String & format_, const ContextPtr & context); protected: void parseArguments(const ASTPtr & ast, ContextPtr context) override; @@ -53,8 +51,7 @@ private: const char * getStorageTypeName() const override { return "URL"; } - String getFormatFromFirstArgument() override; - + std::optional tryGetFormatFromFirstArgument() override; }; } diff --git a/src/TableFunctions/TableFunctionURLCluster.cpp b/src/TableFunctions/TableFunctionURLCluster.cpp index a2949278155..5fd3c3342a5 100644 --- a/src/TableFunctions/TableFunctionURLCluster.cpp +++ b/src/TableFunctions/TableFunctionURLCluster.cpp @@ -40,8 +40,7 @@ StoragePtr TableFunctionURLCluster::getStorage( StorageID(getDatabaseName(), table_name), getActualTableStructure(context, /* is_insert_query */ true), ConstraintsDescription{}, - configuration, - structure != "auto"); + configuration); } return storage; } diff --git a/src/TableFunctions/registerTableFunctions.cpp b/src/TableFunctions/registerTableFunctions.cpp index 8c18c298f45..2b84bd347aa 100644 --- a/src/TableFunctions/registerTableFunctions.cpp +++ b/src/TableFunctions/registerTableFunctions.cpp @@ -23,6 +23,7 @@ void registerTableFunctions() registerTableFunctionGenerate(factory); registerTableFunctionMongoDB(factory); registerTableFunctionRedis(factory); + registerTableFunctionMergeTreeIndex(factory); #if USE_RAPIDJSON || USE_SIMDJSON registerTableFunctionFuzzJSON(factory); #endif diff --git a/src/TableFunctions/registerTableFunctions.h b/src/TableFunctions/registerTableFunctions.h index fae763e7dc8..6984eac619e 100644 --- a/src/TableFunctions/registerTableFunctions.h +++ b/src/TableFunctions/registerTableFunctions.h @@ -20,6 +20,7 @@ void registerTableFunctionInput(TableFunctionFactory & factory); void registerTableFunctionGenerate(TableFunctionFactory & factory); void registerTableFunctionMongoDB(TableFunctionFactory & factory); void registerTableFunctionRedis(TableFunctionFactory & factory); +void registerTableFunctionMergeTreeIndex(TableFunctionFactory & factory); #if USE_RAPIDJSON || USE_SIMDJSON void registerTableFunctionFuzzJSON(TableFunctionFactory & factory); #endif diff --git a/tests/analyzer_integration_broken_tests.txt b/tests/analyzer_integration_broken_tests.txt index 18086b6a5c1..31527dc3476 100644 --- a/tests/analyzer_integration_broken_tests.txt +++ b/tests/analyzer_integration_broken_tests.txt @@ -1,12 +1,4 @@ test_build_sets_from_multiple_threads/test.py::test_set test_concurrent_backups_s3/test.py::test_concurrent_backups -test_distributed_backward_compatability/test.py::test_distributed_in_tuple test_distributed_type_object/test.py::test_distributed_type_object -test_executable_table_function/test.py::test_executable_function_input_python -test_mask_sensitive_info/test.py::test_encryption_functions test_merge_table_over_distributed/test.py::test_global_in -test_merge_table_over_distributed/test.py::test_select_table_name_from_merge_over_distributed -test_passing_max_partitions_to_read_remotely/test.py::test_default_database_on_cluster -test_select_access_rights/test_main.py::test_alias_columns -test_settings_profile/test.py::test_show_profiles -test_sql_user_defined_functions_on_cluster/test.py::test_sql_user_defined_functions_on_cluster diff --git a/tests/analyzer_tech_debt.txt b/tests/analyzer_tech_debt.txt index 29331d674c8..bcf0eee0d0e 100644 --- a/tests/analyzer_tech_debt.txt +++ b/tests/analyzer_tech_debt.txt @@ -4,25 +4,14 @@ 01062_pm_all_join_with_block_continuation 01083_expressions_in_engine_arguments 01155_rename_move_materialized_view -01244_optimize_distributed_group_by_sharding_key 01584_distributed_buffer_cannot_find_column 01624_soft_constraints -01656_test_query_log_factories_info 01747_join_view_filter_dictionary -01761_cast_to_enum_nullable 01925_join_materialized_columns -01952_optimize_distributed_group_by_sharding_key -02174_cte_scalar_cache_mv 02354_annoy -02428_parameterized_view -02493_inconsistent_hex_and_binary_number -02725_agg_projection_resprect_PK -02763_row_policy_storage_merge_alias -02818_parameterized_view_with_cte_multiple_usage # Check after constants refactoring 02901_parallel_replicas_rollup # Flaky. Please don't delete them without fixing them: 01287_max_execution_speed 02003_WithMergeableStateAfterAggregationAndLimit_LIMIT_BY_LIMIT_OFFSET 02404_memory_bound_merging -02479_race_condition_between_insert_and_droppin_mv diff --git a/tests/ci/.mypy.ini b/tests/ci/.mypy.ini index bcaac49f190..9bc44025826 100644 --- a/tests/ci/.mypy.ini +++ b/tests/ci/.mypy.ini @@ -14,4 +14,4 @@ warn_unused_ignores = False warn_return_any = True no_implicit_reexport = True strict_equality = True -strict_concatenate = True +extra_checks = True diff --git a/tests/ci/ast_fuzzer_check.py b/tests/ci/ast_fuzzer_check.py index 0a69d8aab49..1ecf805cadc 100644 --- a/tests/ci/ast_fuzzer_check.py +++ b/tests/ci/ast_fuzzer_check.py @@ -67,14 +67,14 @@ def main(): build_name = get_build_name_for_check(check_name) urls = read_build_urls(build_name, reports_path) if not urls: - raise Exception("No build URLs found") + raise ValueError("No build URLs found") for url in urls: if url.endswith("/clickhouse"): build_url = url break else: - raise Exception("Cannot find the clickhouse binary among build results") + raise ValueError("Cannot find the clickhouse binary among build results") logging.info("Got build url %s", build_url) @@ -114,6 +114,7 @@ def main(): "report.html": workspace_path / "report.html", "core.zst": workspace_path / "core.zst", "dmesg.log": workspace_path / "dmesg.log", + "fatal.log": workspace_path / "fatal.log", } compressed_server_log_path = workspace_path / "server.log.zst" diff --git a/tests/ci/autoscale_runners_lambda/app.py b/tests/ci/autoscale_runners_lambda/app.py index 26a05ab0af4..6c3d71708e9 100644 --- a/tests/ci/autoscale_runners_lambda/app.py +++ b/tests/ci/autoscale_runners_lambda/app.py @@ -63,8 +63,12 @@ def get_scales(runner_type: str) -> Tuple[int, int]: # 10. I am trying 7 now. # 7 still looks a bit slow, so I try 6 # Let's have it the same as the other ASG + # + # All type of style-checkers should be added very quickly to not block the workflows # UPDATE THE COMMENT ON CHANGES scale_up = 3 + if "style" in runner_type: + scale_up = 1 return scale_down, scale_up @@ -138,7 +142,7 @@ def set_capacity( logging.info( "The ASG %s capacity will be increased to %s, current capacity=%s, " - "effective capacity=%sm maximum capacity=%s, running jobs=%s, queue size=%s", + "effective capacity=%s, maximum capacity=%s, running jobs=%s, queue size=%s", asg["AutoScalingGroupName"], desired_capacity, effective_capacity, diff --git a/tests/ci/autoscale_runners_lambda/test_autoscale.py b/tests/ci/autoscale_runners_lambda/test_autoscale.py index 21a407276f9..75f178ac394 100644 --- a/tests/ci/autoscale_runners_lambda/test_autoscale.py +++ b/tests/ci/autoscale_runners_lambda/test_autoscale.py @@ -80,7 +80,7 @@ class TestSetCapacity(unittest.TestCase): ), TestCase("increase-1", 1, 13, 20, [Queue("queued", 23, "increase-1")], 17), TestCase( - "style-checker", 1, 13, 20, [Queue("queued", 33, "style-checker")], 20 + "style-checker", 1, 13, 20, [Queue("queued", 19, "style-checker")], 19 ), TestCase("increase-2", 1, 13, 20, [Queue("queued", 18, "increase-2")], 15), TestCase("increase-3", 1, 13, 20, [Queue("queued", 183, "increase-3")], 20), diff --git a/tests/ci/bugfix_validate_check.py b/tests/ci/bugfix_validate_check.py index 7fda81f11b2..7aaf18e7765 100644 --- a/tests/ci/bugfix_validate_check.py +++ b/tests/ci/bugfix_validate_check.py @@ -1,127 +1,157 @@ #!/usr/bin/env python3 -import argparse import csv import logging +import subprocess +import sys from pathlib import Path -from typing import List, Optional, Tuple +from typing import List, Sequence, Tuple -# isort: off -from github import Github - -# isort: on - -from commit_status_helper import get_commit, post_commit_status -from get_robot_token import get_best_robot_token -from pr_info import PRInfo -from report import ERROR, SUCCESS, TestResult, TestResults -from s3_helper import S3Helper -from upload_result_helper import upload_results - - -def parse_args() -> argparse.Namespace: - parser = argparse.ArgumentParser() - parser.add_argument("files", nargs="+", type=Path, help="Path to status files") - return parser.parse_args() +from ci_config import JobNames +from ci_utils import normalize_string +from env_helper import TEMP_PATH +from functional_test_check import NO_CHANGES_MSG +from report import ( + ERROR, + FAIL, + FAILURE, + OK, + SKIPPED, + SUCCESS, + JobReport, + TestResult, + TestResults, +) +from stopwatch import Stopwatch def post_commit_status_from_file(file_path: Path) -> List[str]: with open(file_path, "r", encoding="utf-8") as f: res = list(csv.reader(f, delimiter="\t")) if len(res) < 1: - raise Exception(f'Can\'t read from "{file_path}"') + raise IndexError(f'Can\'t read from "{file_path}"') if len(res[0]) != 3: - raise Exception(f'Can\'t read from "{file_path}"') + raise IndexError(f'Can\'t read from "{file_path}"') return res[0] -# Returns (is_ok, test_results, error_message) -def process_result(file_path: Path) -> Tuple[bool, TestResults, Optional[str]]: - test_results = [] # type: TestResults - state, report_url, description = post_commit_status_from_file(file_path) - prefix = file_path.parent.name - if description.strip() in [ - "Invalid check_status.tsv", - "Not found test_results.tsv", - "Empty test_results.tsv", - ]: - status = ( - f'Check failed (Report)' - if report_url != "null" - else "Check failed" - ) - return False, [TestResult(f"{prefix}: {description}", status)], "Check failed" - - is_ok = state == SUCCESS - if is_ok and report_url == "null": - return is_ok, test_results, None - - status = ( - f'OK: Bug reproduced (Report)' - if is_ok - else f'Bug is not reproduced (Report)' - ) - test_results.append(TestResult(f"{prefix}: {description}", status)) - return is_ok, test_results, None +def get_failed_test_cases(file_path: Path) -> List[TestResult]: + job_report = JobReport.load(from_file=file_path) + test_results = [] # type: List[TestResult] + for tr in job_report.test_results: + if tr.status == FAIL: + if tr.name == NO_CHANGES_MSG: + tr.status = SKIPPED + else: + tr.name = "[with NOT_OK] " + tr.name + tr.status = OK + elif tr.status == OK: + tr.name = "[with NOT_OK] " + tr.name + tr.status = FAIL + else: + # do not invert error status + pass + test_results.append(tr) + return test_results def process_all_results( - file_paths: List[Path], -) -> Tuple[bool, TestResults, Optional[str]]: - any_ok = False - all_results = [] - error = None - for status_path in file_paths: - is_ok, test_results, error = process_result(status_path) - any_ok = any_ok or is_ok - if test_results is not None: - all_results.extend(test_results) + file_paths: Sequence[Path], +) -> Tuple[str, str, TestResults]: + all_results = [] # type: TestResults + has_fail = False + has_error = False + has_ok = False + for job_report_path in file_paths: + test_results = get_failed_test_cases(job_report_path) + for tr in test_results: + if tr.status == FAIL: + has_fail = True + elif tr.status == ERROR: + has_error = True + elif tr.status == OK: + has_ok = True + all_results.extend(test_results) + if has_error: + status = ERROR + description = "Some error(s) occured in tests" + elif has_ok: + status = SUCCESS + description = "New test(s) reproduced a bug" + elif has_fail: + status = FAILURE + description = "New test(s) failed to reproduce a bug" + else: + status = ERROR + description = "Invalid job results" - return any_ok and error is None, all_results, error + return status, description, all_results def main(): logging.basicConfig(level=logging.INFO) - args = parse_args() - status_files = args.files # type: List[Path] + # args = parse_args() + stopwatch = Stopwatch() + jobs_to_validate = [JobNames.STATELESS_TEST_RELEASE, JobNames.INTEGRATION_TEST] + functional_job_report_file = Path(TEMP_PATH) / "functional_test_job_report.json" + integration_job_report_file = Path(TEMP_PATH) / "integration_test_job_report.json" + jobs_report_files = { + JobNames.STATELESS_TEST_RELEASE: functional_job_report_file, + JobNames.INTEGRATION_TEST: integration_job_report_file, + } + jobs_scripts = { + JobNames.STATELESS_TEST_RELEASE: "functional_test_check.py", + JobNames.INTEGRATION_TEST: "integration_test_check.py", + } - check_name_with_group = "Bugfix validate check" - - is_ok, test_results, error = process_all_results(status_files) - - description = "" - if error: - description = error - elif not is_ok: - description = "Changed tests don't reproduce the bug" - - pr_info = PRInfo() - if not test_results: - description = "No results to upload" - report_url = "" - logging.info("No results to upload") - else: - report_url = upload_results( - S3Helper(), - pr_info.number, - pr_info.sha, - test_results, - status_files, - check_name_with_group, + for test_job in jobs_to_validate: + report_file = jobs_report_files[test_job] + test_script = jobs_scripts[test_job] + if report_file.exists(): + report_file.unlink() + extra_timeout_option = "" + if test_job == JobNames.STATELESS_TEST_RELEASE: + extra_timeout_option = str(3600) + # "bugfix" must be present in checkname, as integration test runner checks this + check_name = f"Validate bugfix: {test_job}" + command = f"python3 {test_script} '{check_name}' {extra_timeout_option} --validate-bugfix --report-to-file {report_file}" + print(f"Going to validate job [{test_job}], command [{command}]") + _ = subprocess.run( + command, + stdout=sys.stdout, + stderr=sys.stderr, + text=True, + check=False, + shell=True, ) + assert ( + report_file.is_file() + ), f"No job report [{report_file}] found after job execution" - gh = Github(get_best_robot_token(), per_page=100) - commit = get_commit(gh, pr_info.sha) - post_commit_status( - commit, - SUCCESS if is_ok else ERROR, - report_url, - description, - check_name_with_group, - pr_info, - dump_to_file=True, + status, description, test_results = process_all_results( + list(jobs_report_files.values()) ) + additional_files = [] + for job_id, report_file in jobs_report_files.items(): + jr = JobReport.load(from_file=report_file) + additional_files.append(report_file) + for file in set(jr.additional_files): + file_ = Path(file) + file_name = file_.name + file_name = file_name.replace(".", "__" + normalize_string(job_id) + ".", 1) + file_ = file_.rename(file_.parent / file_name) + additional_files.append(file_) + + JobReport( + description=description, + test_results=test_results, + status=status, + start_time=stopwatch.start_time_str, + duration=stopwatch.duration_seconds, + additional_files=additional_files, + ).dump() + if __name__ == "__main__": main() diff --git a/tests/ci/build_download_helper.py b/tests/ci/build_download_helper.py index 0d24cb80021..66734f58b23 100644 --- a/tests/ci/build_download_helper.py +++ b/tests/ci/build_download_helper.py @@ -8,7 +8,10 @@ import time from pathlib import Path from typing import Any, Callable, List, Union -import requests # type: ignore +# isort: off +import requests + +# isort: on import get_robot_token as grt # we need an updated ROBOT_TOKEN from ci_config import CI_CONFIG @@ -30,9 +33,10 @@ def get_with_retries( "Getting URL with %i tries and sleep %i in between: %s", retries, sleep, url ) exc = Exception("A placeholder to satisfy typing and avoid nesting") + timeout = kwargs.pop("timeout", 30) for i in range(retries): try: - response = requests.get(url, **kwargs) + response = requests.get(url, timeout=timeout, **kwargs) response.raise_for_status() return response except Exception as e: @@ -74,10 +78,11 @@ def get_gh_api( token_is_set = "Authorization" in kwargs.get("headers", {}) exc = Exception("A placeholder to satisfy typing and avoid nesting") try_cnt = 0 + timeout = kwargs.pop("timeout", 30) while try_cnt < retries: try_cnt += 1 try: - response = requests.get(url, **kwargs) + response = requests.get(url, timeout=timeout, **kwargs) response.raise_for_status() return response except requests.HTTPError as e: @@ -85,7 +90,8 @@ def get_gh_api( ratelimit_exceeded = ( e.response.status_code == 403 and b"rate limit exceeded" - in e.response._content # pylint:disable=protected-access + # pylint:disable-next=protected-access + in (e.response._content or b"") ) try_auth = e.response.status_code == 404 if (ratelimit_exceeded or try_auth) and not token_is_set: diff --git a/tests/ci/build_report_check.py b/tests/ci/build_report_check.py index 8f8f2b28935..48640f15ac0 100644 --- a/tests/ci/build_report_check.py +++ b/tests/ci/build_report_check.py @@ -1,34 +1,31 @@ #!/usr/bin/env python3 -import json import logging import os import sys from pathlib import Path from typing import List +from ci_config import CI_CONFIG, Build from env_helper import ( GITHUB_JOB_URL, GITHUB_REPOSITORY, GITHUB_SERVER_URL, - TEMP_PATH, REPORT_PATH, + TEMP_PATH, ) +from pr_info import PRInfo from report import ( - BuildResult, ERROR, PENDING, SUCCESS, + BuildResult, JobReport, create_build_html_report, get_worst_status, ) - -from pr_info import PRInfo -from ci_config import CI_CONFIG from stopwatch import Stopwatch - # Old way to read the neads_data NEEDS_DATA_PATH = os.getenv("NEEDS_DATA_PATH", "") # Now it's set here. Two-steps migration for backward compatibility @@ -48,60 +45,43 @@ def main(): ) build_check_name = sys.argv[1] - needs_data: List[str] = [] - required_builds = 0 - - if NEEDS_DATA: - needs_data = json.loads(NEEDS_DATA) - # drop non build jobs if any - needs_data = [d for d in needs_data if "Build" in d] - elif os.path.exists(NEEDS_DATA_PATH): - with open(NEEDS_DATA_PATH, "rb") as file_handler: - needs_data = list(json.load(file_handler).keys()) - else: - assert False, "NEEDS_DATA env var required" - - required_builds = len(needs_data) - - if needs_data: - logging.info("The next builds are required: %s", ", ".join(needs_data)) pr_info = PRInfo() - builds_for_check = CI_CONFIG.get_builds_for_report(build_check_name) - required_builds = required_builds or len(builds_for_check) + builds_for_check = CI_CONFIG.get_builds_for_report( + build_check_name, + release=pr_info.is_release(), + backport=pr_info.head_ref.startswith("backport/"), + ) + required_builds = len(builds_for_check) + missing_builds = 0 # Collect reports from json artifacts - build_results = [] + build_results = [] # type: List[BuildResult] for build_name in builds_for_check: build_result = BuildResult.load_any( build_name, pr_info.number, pr_info.head_ref ) if not build_result: - logging.warning("Build results for %s are missing", build_name) - continue - assert ( - pr_info.head_ref == build_result.head_ref or pr_info.number > 0 - ), "BUG. if not a PR, report must be created on the same branch" - build_results.append(build_result) - - # The code to collect missing reports for failed jobs - missing_job_names = [ - name - for name in needs_data - if not any(1 for br in build_results if br.job_name.startswith(name)) - ] - missing_builds = len(missing_job_names) - for job_name in reversed(missing_job_names): - build_result = BuildResult.missing_result("missing") - build_result.job_name = job_name - build_result.status = PENDING - logging.info( - "There is missing report for %s, created a dummy result %s", - job_name, - build_result, - ) - build_results.insert(0, build_result) + if build_name == Build.FUZZERS: + logging.info("Build [%s] is missing - skip", Build.FUZZERS) + continue + logging.warning("Build results for %s is missing", build_name) + build_result = BuildResult.missing_result("missing") + build_result.job_name = build_name + build_result.status = PENDING + logging.info( + "There is missing report for %s, created a dummy result %s", + build_name, + build_result, + ) + missing_builds += 1 + build_results.insert(0, build_result) + else: + assert ( + pr_info.head_ref == build_result.head_ref or pr_info.number > 0 + ), "BUG. if not a PR, report must be created on the same branch" + build_results.append(build_result) # Calculate artifact groups like packages and binaries total_groups = sum(len(br.grouped_urls) for br in build_results) diff --git a/tests/ci/cache_utils.py b/tests/ci/cache_utils.py index 062207fadd1..a0692f4eff2 100644 --- a/tests/ci/cache_utils.py +++ b/tests/ci/cache_utils.py @@ -123,6 +123,13 @@ class Cache: local_s3_cache = Path(url[7:]) if local_s3_cache.is_file(): shutil.copy2(local_s3_cache, compressed_cache) + else: + logging.warning( + "The local cache file %s does not exist, creating empty directory", + local_s3_cache, + ) + self.directory.mkdir(parents=True, exist_ok=True) + return else: download_build_with_progress(url, compressed_cache) except DownloadException as e: @@ -155,7 +162,7 @@ class Cache: logging.info("Remote cache %s already exist, won't reupload", s3_path) return - logging.info("Compressing cargo cache") + logging.info("Compressing cache") archive_path = self.temp_path / self.archive_name compress_fast(self.directory, archive_path) logging.info("Uploading %s to S3 path %s", archive_path, s3_path) diff --git a/tests/ci/cancel_and_rerun_workflow_lambda/app.py b/tests/ci/cancel_and_rerun_workflow_lambda/app.py index 656198c6985..625936ec5c8 100644 --- a/tests/ci/cancel_and_rerun_workflow_lambda/app.py +++ b/tests/ci/cancel_and_rerun_workflow_lambda/app.py @@ -1,16 +1,15 @@ #!/usr/bin/env python3 import json -import re import time from base64 import b64decode from collections import namedtuple from queue import Queue from threading import Thread -from typing import Any, Dict, List, Optional, Tuple +from typing import Any, Dict, List, Optional -import requests # type: ignore -from lambda_shared.pr import CATEGORY_TO_LABEL, check_pr_description +import requests +from lambda_shared.pr import check_pr_description from lambda_shared.token import get_cached_access_token NEED_RERUN_OR_CANCELL_WORKFLOWS = { @@ -48,16 +47,18 @@ class Worker(Thread): def _exec_get_with_retry(url: str, token: str) -> dict: headers = {"Authorization": f"token {token}"} + e = Exception() for i in range(MAX_RETRY): try: - response = requests.get(url, headers=headers) + response = requests.get(url, headers=headers, timeout=30) response.raise_for_status() return response.json() # type: ignore except Exception as ex: print("Got exception executing request", ex) + e = ex time.sleep(i + 1) - raise Exception("Cannot execute GET request with retries") + raise requests.HTTPError("Cannot execute GET request with retries") from e WorkflowDescription = namedtuple( @@ -215,16 +216,18 @@ def get_workflow_description(workflow_url: str, token: str) -> WorkflowDescripti def _exec_post_with_retry(url: str, token: str, json: Optional[Any] = None) -> Any: headers = {"Authorization": f"token {token}"} + e = Exception() for i in range(MAX_RETRY): try: - response = requests.post(url, headers=headers, json=json) + response = requests.post(url, headers=headers, json=json, timeout=30) response.raise_for_status() return response.json() except Exception as ex: print("Got exception executing request", ex) + e = ex time.sleep(i + 1) - raise Exception("Cannot execute POST request with retry") + raise requests.HTTPError("Cannot execute POST request with retry") from e def exec_workflow_url(urls_to_post, token): diff --git a/tests/ci/cherry_pick.py b/tests/ci/cherry_pick.py index 2c40b2a4099..d92504e30bd 100644 --- a/tests/ci/cherry_pick.py +++ b/tests/ci/cherry_pick.py @@ -456,11 +456,13 @@ class Backport: tomorrow = date.today() + timedelta(days=1) logging.info("Receive PRs suppose to be backported") - query_args = dict( - query=f"type:pr repo:{self._fetch_from} -label:{self.backport_created_label}", - label=",".join(self.labels_to_backport + [self.must_create_backport_label]), - merged=[since_date, tomorrow], - ) + query_args = { + "query": f"type:pr repo:{self._fetch_from} -label:{self.backport_created_label}", + "label": ",".join( + self.labels_to_backport + [self.must_create_backport_label] + ), + "merged": [since_date, tomorrow], + } logging.info("Query to find the backport PRs:\n %s", query_args) self.prs_for_backport = self.gh.get_pulls_from_search(**query_args) logging.info( diff --git a/tests/ci/ci.py b/tests/ci/ci.py index 47e20b3ec09..29906e6571f 100644 --- a/tests/ci/ci.py +++ b/tests/ci/ci.py @@ -1,26 +1,28 @@ import argparse import concurrent.futures -from copy import deepcopy -from dataclasses import asdict, dataclass -from enum import Enum import json import logging import os +import random import re import subprocess import sys import time +from copy import deepcopy +from dataclasses import asdict, dataclass +from enum import Enum from pathlib import Path -from typing import Any, Dict, List, Optional, Sequence, Union +from typing import Any, Dict, List, Optional, Sequence, Set, Union import docker_images_helper import upload_result_helper from build_check import get_release_or_pr -from ci_config import CI_CONFIG, Build, Labels, JobNames +from ci_config import CI_CONFIG, Build, CIStages, Labels, JobNames from ci_utils import GHActions, is_hex, normalize_string from clickhouse_helper import ( CiLogsCredentials, ClickHouseHelper, + InsertException, get_instance_id, get_instance_type, prepare_tests_results_for_clickhouse, @@ -53,6 +55,8 @@ from report import ERROR, SUCCESS, BuildResult, JobReport from s3_helper import S3Helper from version_helper import get_version_from_repo +# pylint: disable=too-many-lines + @dataclass class PendingState: @@ -138,7 +142,7 @@ class CiCache: self.s3 = s3 self.job_digests = job_digests self.cache_s3_paths = { - job_type: f"{self._S3_CACHE_PREFIX}/{job_type.value}-{self.job_digests[self._get_reference_job_name(job_type)]}/" + job_type: f"{self._S3_CACHE_PREFIX}/{job_type.value}-{self._get_digest_for_job_type(self.job_digests, job_type)}/" for job_type in self.JobType } self.s3_record_prefixes = { @@ -153,14 +157,23 @@ class CiCache: if not self._LOCAL_CACHE_PATH.exists(): self._LOCAL_CACHE_PATH.mkdir(parents=True, exist_ok=True) - def _get_reference_job_name(self, job_type: JobType) -> str: - res = Build.PACKAGE_RELEASE + def _get_digest_for_job_type( + self, job_digests: Dict[str, str], job_type: JobType + ) -> str: if job_type == self.JobType.DOCS: - res = JobNames.DOCS_CHECK + res = job_digests[JobNames.DOCS_CHECK] elif job_type == self.JobType.SRCS: - res = Build.PACKAGE_RELEASE + # any build type job has the same digest - pick up Build.PACKAGE_RELEASE or Build.PACKAGE_ASAN as a failover + # Build.PACKAGE_RELEASE may not exist in the list if we have reduced CI pipeline + if Build.PACKAGE_RELEASE in job_digests: + res = job_digests[Build.PACKAGE_RELEASE] + elif Build.PACKAGE_ASAN in job_digests: + # failover, if failover does not work - fix it! + res = job_digests[Build.PACKAGE_ASAN] + else: + assert False, "BUG, no build job in digest' list" else: - assert False + assert False, "BUG, New JobType? - please update func" return res def _get_record_file_name( @@ -395,7 +408,7 @@ class CiCache: status.dump_to_file(record_file) elif record_type == self.RecordType.PENDING: assert isinstance(status, PendingState) - with open(record_file, "w") as json_file: + with open(record_file, "w", encoding="utf-8") as json_file: json.dump(asdict(status), json_file) else: assert False @@ -643,13 +656,20 @@ class CiCache: if not jobs_with_params: return {} poll_interval_sec = 300 - TIMEOUT = 3600 + # TIMEOUT * MAX_ROUNDS_TO_WAIT must be less than 6h (GH job timeout) with a room for rest RunConfig work + TIMEOUT = 3000 # 50 min + MAX_ROUNDS_TO_WAIT = 6 + MAX_JOB_NUM_TO_WAIT = 3 await_finished: Dict[str, List[int]] = {} round_cnt = 0 - while len(jobs_with_params) > 4 and round_cnt < 5: + while ( + len(jobs_with_params) > MAX_JOB_NUM_TO_WAIT + and round_cnt < MAX_ROUNDS_TO_WAIT + ): round_cnt += 1 GHActions.print_in_group( - f"Wait pending jobs, round [{round_cnt}]:", list(jobs_with_params) + f"Wait pending jobs, round [{round_cnt}/{MAX_ROUNDS_TO_WAIT}]:", + list(jobs_with_params), ) # this is initial approach to wait pending jobs: # start waiting for the next TIMEOUT seconds if there are more than X(=4) jobs to wait @@ -945,10 +965,18 @@ def _mark_success_action( # FIXME: find generic design for propagating and handling job status (e.g. stop using statuses in GH api) # now job ca be build job w/o status data, any other job that exit with 0 with or w/o status data if CI_CONFIG.is_build_job(job): - # there is no status for build jobs - # create dummy success to mark it as done + # there is no CommitStatus for build jobs + # create dummy status relying on JobReport # FIXME: consider creating commit status for build jobs too, to treat everything the same way - CommitStatusData(SUCCESS, "dummy description", "dummy_url").dump_status() + job_report = JobReport.load() if JobReport.exist() else None + if job_report and job_report.status == SUCCESS: + CommitStatusData( + SUCCESS, + "dummy description", + "dummy_url", + pr_num=pr_info.number, + sha=pr_info.sha, + ).dump_status() job_status = None if CommitStatusData.exist(): @@ -989,7 +1017,7 @@ def _mark_success_action( def _print_results(result: Any, outfile: Optional[str], pretty: bool = False) -> None: if outfile: - with open(outfile, "w") as f: + with open(outfile, "w", encoding="utf-8") as f: if isinstance(result, str): print(result, file=f) elif isinstance(result, dict): @@ -1094,7 +1122,7 @@ def _configure_jobs( digests: Dict[str, str] = {} print("::group::Job Digests") - for job in CI_CONFIG.job_generator(): + for job in CI_CONFIG.job_generator(pr_info.head_ref): digest = job_digester.get_job_digest(CI_CONFIG.get_digest_config(job)) digests[job] = digest print(f" job [{job.rjust(50)}] has digest [{digest}]") @@ -1102,24 +1130,31 @@ def _configure_jobs( ## b. check what we need to run ci_cache = None - if not ci_cache_disabled: + if not ci_cache_disabled and CI: ci_cache = CiCache(s3, digests).update() ci_cache.print_status() jobs_to_wait: Dict[str, Dict[str, Any]] = {} + randomization_buckets = {} # type: Dict[str, Set[str]] - for job in digests: - digest = digests[job] + for job, digest in digests.items(): job_config = CI_CONFIG.get_job_config(job) num_batches: int = job_config.num_batches batches_to_do: List[int] = [] add_to_skip = False + if job_config.pr_only and pr_info.is_release_branch(): + continue + if job_config.release_only and not pr_info.is_release_branch(): + continue + + # fill job randomization buckets (for jobs with configured @random_bucket property)) + if job_config.random_bucket: + if not job_config.random_bucket in randomization_buckets: + randomization_buckets[job_config.random_bucket] = set() + randomization_buckets[job_config.random_bucket].add(job) + for batch in range(num_batches): # type: ignore - if job_config.pr_only and pr_info.is_release_branch(): - continue - if job_config.release_only and not pr_info.is_release_branch(): - continue if job_config.run_by_label: # this job controlled by label, add to todo if its label is set in pr if job_config.run_by_label in pr_info.labels: @@ -1159,13 +1194,31 @@ def _configure_jobs( if batches_to_do: jobs_to_do.append(job) + jobs_params[job] = { + "batches": batches_to_do, + "num_batches": num_batches, + } elif add_to_skip: # treat job as being skipped only if it's controlled by digest jobs_to_skip.append(job) - jobs_params[job] = { - "batches": batches_to_do, - "num_batches": num_batches, - } + + if not pr_info.is_release_branch(): + # randomization bucket filtering (pick one random job from each bucket, for jobs with configured random_bucket property) + for _, jobs in randomization_buckets.items(): + jobs_to_remove_randomization = set() + bucket_ = list(jobs) + random.shuffle(bucket_) + while len(bucket_) > 1: + random_job = bucket_.pop() + if random_job in jobs_to_do: + jobs_to_remove_randomization.add(random_job) + if jobs_to_remove_randomization: + print( + f"Following jobs will be removed due to randomization bucket: [{jobs_to_remove_randomization}]" + ) + jobs_to_do = [ + job for job in jobs_to_do if job not in jobs_to_remove_randomization + ] ## c. check CI controlling labels and commit messages if pr_info.labels: @@ -1226,6 +1279,33 @@ def _configure_jobs( jobs_to_do = list( set(job for job in jobs_to_do_requested if job not in jobs_to_skip) ) + # if requested job does not have params in jobs_params (it happens for "run_by_label" job) + # we need to add params - otherwise it won't run as "batches" list will be empty + for job in jobs_to_do: + if job not in jobs_params: + num_batches = CI_CONFIG.get_job_config(job).num_batches + jobs_params[job] = { + "batches": list(range(num_batches)), + "num_batches": num_batches, + } + + requested_batches = set() + for token in commit_tokens: + if token.startswith("batch_"): + try: + batches = [ + int(batch) for batch in token.removeprefix("batch_").split("_") + ] + except Exception: + print(f"ERROR: failed to parse commit tag [{token}]") + requested_batches.update(batches) + if requested_batches: + print( + f"NOTE: Only specific job batches were requested [{list(requested_batches)}]" + ) + for job, params in jobs_params.items(): + if params["num_batches"] > 1: + params["batches"] = list(requested_batches) return { "digests": digests, @@ -1240,6 +1320,29 @@ def _configure_jobs( } +def _generate_ci_stage_config(jobs_data: Dict[str, Any]) -> Dict[str, Dict[str, Any]]: + """ + populates GH Actions' workflow with real jobs + "Builds_1": [{"job_name": NAME, "runner_type": RUNER_TYPE}] + "Tests_1": [{"job_name": NAME, "runner_type": RUNER_TYPE}] + ... + """ + result = {} # type: Dict[str, Any] + stages_to_do = [] + for job in jobs_data["jobs_to_do"]: + stage_type = CI_CONFIG.get_job_ci_stage(job) + if stage_type == CIStages.NA: + continue + if stage_type not in result: + result[stage_type] = [] + stages_to_do.append(stage_type) + result[stage_type].append( + {"job_name": job, "runner_type": CI_CONFIG.get_runner_type(job)} + ) + result["stages_to_do"] = stages_to_do + return result + + def _create_gh_status( commit: Any, job: str, batch: int, num_batches: int, job_status: CommitStatusData ) -> None: @@ -1284,10 +1387,13 @@ def _update_gh_statuses_action(indata: Dict, s3: S3Helper) -> None: if CI_CONFIG.is_build_job(job): # no GH status for build jobs continue - num_batches = CI_CONFIG.get_job_config(job).num_batches - for batch in range(num_batches): + job_config = CI_CONFIG.get_job_config(job) + if not job_config: + # there might be a new job that does not exist on this branch - skip it + continue + for batch in range(job_config.num_batches): future = executor.submit( - _concurrent_create_status, job, batch, num_batches + _concurrent_create_status, job, batch, job_config.num_batches ) futures.append(future) done, _ = concurrent.futures.wait(futures) @@ -1304,7 +1410,11 @@ def _update_gh_statuses_action(indata: Dict, s3: S3Helper) -> None: def _fetch_commit_tokens(message: str) -> List[str]: pattern = r"#[\w-]+" matches = [match[1:] for match in re.findall(pattern, message)] - res = [match for match in matches if match in Labels or match.startswith("job_")] + res = [ + match + for match in matches + if match in Labels or match.startswith("job_") or match.startswith("batch_") + ] return res @@ -1468,7 +1578,10 @@ def _upload_build_profile_data( profile_data_file.stat().st_size, query, ) - ch_helper.insert_file(url, auth, query, profile_data_file) + try: + ch_helper.insert_file(url, auth, query, profile_data_file) + except InsertException: + logging.error("Failed to insert profile data for the build, continue") query = f"""INSERT INTO binary_sizes ( @@ -1494,7 +1607,10 @@ def _upload_build_profile_data( binary_sizes_file.stat().st_size, query, ) - ch_helper.insert_file(url, auth, query, binary_sizes_file) + try: + ch_helper.insert_file(url, auth, query, binary_sizes_file) + except InsertException: + logging.error("Failed to insert binary_size_file for the build, continue") def _run_test(job_name: str, run_command: str) -> int: @@ -1561,11 +1677,11 @@ def main() -> int: indata: Optional[Dict[str, Any]] = None if args.infile: - indata = ( - json.loads(args.infile) - if not os.path.isfile(args.infile) - else json.load(open(args.infile)) - ) + if os.path.isfile(args.infile): + with open(args.infile, encoding="utf-8") as jfd: + indata = json.load(jfd) + else: + indata = json.loads(args.infile) assert indata and isinstance(indata, dict), "Invalid --infile json" result: Dict[str, Any] = {} @@ -1639,13 +1755,7 @@ def main() -> int: if not args.skip_jobs: ci_cache = CiCache(s3, jobs_data["digests"]) - if ( - pr_info.is_release_branch() - or pr_info.event.get("pull_request", {}) - .get("user", {}) - .get("login", "not_maxknv") - == "maxknv" - ): + if pr_info.is_master(): # wait for pending jobs to be finished, await_jobs is a long blocking call # wait pending jobs (for now only on release/master branches) ready_jobs_batches_dict = ci_cache.await_jobs( @@ -1688,6 +1798,8 @@ def main() -> int: result["build"] = build_digest result["docs"] = docs_digest result["ci_flags"] = ci_flags + if not args.skip_jobs: + result["stages_data"] = _generate_ci_stage_config(jobs_data) result["jobs_data"] = jobs_data result["docker_data"] = docker_data ### CONFIGURE action: end @@ -1726,24 +1838,29 @@ def main() -> int: print(build_result.as_json()) print("::endgroup::") else: - # this is a test job - check if GH commit status is present - - # rerun helper check - # FIXME: remove rerun_helper check and rely on ci cache only + # this is a test job - check if GH commit status or cache record is present commit = get_commit( Github(get_best_robot_token(), per_page=100), pr_info.sha ) - rerun_helper = RerunHelper(commit, check_name_with_group) - if rerun_helper.is_already_finished_by_status(): - status = rerun_helper.get_finished_status() - assert status - previous_status = status.state - print("::group::Commit Status") - print(status) - print("::endgroup::") + + # rerun helper check + # FIXME: remove rerun_helper check and rely on ci cache only + if check_name not in ( + # we might want to rerun reports' jobs - disable rerun check for them + JobNames.BUILD_CHECK, + JobNames.BUILD_CHECK_SPECIAL, + ): + rerun_helper = RerunHelper(commit, check_name_with_group) + if rerun_helper.is_already_finished_by_status(): + status = rerun_helper.get_finished_status() + assert status + previous_status = status.state + print("::group::Commit Status") + print(status) + print("::endgroup::") # ci cache check - elif not indata["ci_flags"][Labels.NO_CI_CACHE]: + if not previous_status and not indata["ci_flags"][Labels.NO_CI_CACHE]: ci_cache = CiCache(s3, indata["jobs_data"]["digests"]).update() job_config = CI_CONFIG.get_job_config(check_name) if ci_cache.is_successful( @@ -1835,7 +1952,7 @@ def main() -> int: pr_info.sha, job_report.test_results, job_report.additional_files, - job_report.check_name or args.job_name, + job_report.check_name or _get_ext_check_name(args.job_name), additional_urls=additional_urls or None, ) commit = get_commit( @@ -1846,7 +1963,7 @@ def main() -> int: job_report.status, check_url, format_description(job_report.description), - job_report.check_name or args.job_name, + job_report.check_name or _get_ext_check_name(args.job_name), pr_info, dump_to_file=True, ) @@ -1864,7 +1981,7 @@ def main() -> int: job_report.duration, job_report.start_time, check_url or "", - job_report.check_name or args.job_name, + job_report.check_name or _get_ext_check_name(args.job_name), ) ch_helper.insert_events_into( db="default", table="checks", events=prepared_events diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index db5a83d5b96..7c213da27ec 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -1,5 +1,6 @@ #!/usr/bin/env python3 +from copy import deepcopy import logging from argparse import ArgumentDefaultsHelpFormatter, ArgumentParser from dataclasses import dataclass, field @@ -10,6 +11,32 @@ from ci_utils import WithIter from integration_test_images import IMAGES +class WorkFlows(metaclass=WithIter): + PULL_REQUEST = "PULL_REQUEST" + MASTER = "MASTER" + BACKPORT = "BACKPORT" + RELEASE = "RELEASE" + SYNC = "SYNC" + + +class CIStages(metaclass=WithIter): + NA = "UNKNOWN" + BUILDS_1 = "Builds_1" + BUILDS_2 = "Builds_2" + TESTS_1 = "Tests_1" + TESTS_2 = "Tests_2" + + +class Runners(metaclass=WithIter): + BUILDER = "builder" + STYLE_CHECKER = "style-checker" + STYLE_CHECKER_ARM = "style-checker-aarch64" + FUNC_TESTER = "func-tester" + FUNC_TESTER_ARM = "func-tester-aarch64" + STRESS_TESTER = "stress-tester" + FUZZER_UNIT_TESTER = "fuzzer-unit-tester" + + class Labels(metaclass=WithIter): """ Label names or commit tokens in normalized form @@ -21,6 +48,7 @@ class Labels(metaclass=WithIter): CI_SET_REDUCED = "ci_set_reduced" CI_SET_ARM = "ci_set_arm" CI_SET_INTEGRATION = "ci_set_integration" + CI_SET_ANALYZER = "ci_set_analyzer" libFuzzer = "libFuzzer" @@ -65,9 +93,13 @@ class JobNames(metaclass=WithIter): STATELESS_TEST_TSAN = "Stateless tests (tsan)" STATELESS_TEST_MSAN = "Stateless tests (msan)" STATELESS_TEST_UBSAN = "Stateless tests (ubsan)" - STATELESS_TEST_ANALYZER_RELEASE = "Stateless tests (release, analyzer)" - STATELESS_TEST_DB_REPL_RELEASE = "Stateless tests (release, DatabaseReplicated)" - STATELESS_TEST_S3_RELEASE = "Stateless tests (release, s3 storage)" + STATELESS_TEST_ANALYZER_S3_REPLICATED_RELEASE = ( + "Stateless tests (release, analyzer, s3, DatabaseReplicated)" + ) + # merged into STATELESS_TEST_ANALYZER_S3_REPLICATED_RELEASE: + # STATELESS_TEST_ANALYZER_RELEASE = "Stateless tests (release, analyzer)" + # STATELESS_TEST_DB_REPL_RELEASE = "Stateless tests (release, DatabaseReplicated)" + # STATELESS_TEST_S3_RELEASE = "Stateless tests (release, s3 storage)" STATELESS_TEST_S3_DEBUG = "Stateless tests (debug, s3 storage)" STATELESS_TEST_S3_TSAN = "Stateless tests (tsan, s3 storage)" STATELESS_TEST_FLAKY_ASAN = "Stateless tests flaky check (asan)" @@ -141,7 +173,7 @@ class JobNames(metaclass=WithIter): BUILD_CHECK_SPECIAL = "ClickHouse special build check" DOCS_CHECK = "Docs check" - BUGFIX_VALIDATE = "tests bugfix validate check" + BUGFIX_VALIDATE = "Bugfix validation" # dynamically update JobName with Build jobs @@ -198,6 +230,47 @@ class JobConfig: pr_only: bool = False # job is for release/master branches only release_only: bool = False + # to randomly pick and run one job among jobs in the same @random_bucket. Applied in PR branches only. + random_bucket: str = "" + + +builds_job_config = JobConfig( + required_on_release_branch=True, + digest=DigestConfig( + include_paths=[ + "./src", + "./contrib/*-cmake", + "./contrib/consistent-hashing", + "./contrib/murmurhash", + "./contrib/libfarmhash", + "./contrib/pdqsort", + "./contrib/cityhash102", + "./contrib/sparse-checkout", + "./contrib/libmetrohash", + "./contrib/update-submodules.sh", + "./contrib/CMakeLists.txt", + "./CMakeLists.txt", + "./PreLoad.cmake", + "./cmake", + "./base", + "./programs", + "./packages", + "./docker/packager/packager", + "./rust", + # FIXME: This is a WA to rebuild the CH and recreate the Performance.tar.zst artifact + # when there are changes in performance test scripts. + # Due to the current design of the perf test we need to rebuild CH when the performance test changes, + # otherwise the changes will not be visible in the PerformanceTest job in CI + "./tests/performance", + ], + exclude_files=[".md"], + docker=["clickhouse/binary-builder"], + git_submodules=True, + ), + run_command="build_check.py $BUILD_NAME", +) +fuzzer_build_job_config = deepcopy(builds_job_config) +fuzzer_build_job_config.run_by_label = Labels.libFuzzer @dataclass @@ -215,48 +288,14 @@ class BuildConfig: sparse_checkout: bool = False comment: str = "" static_binary_name: str = "" - job_config: JobConfig = field( - default_factory=lambda: JobConfig( - required_on_release_branch=True, - digest=DigestConfig( - include_paths=[ - "./src", - "./contrib/*-cmake", - "./contrib/consistent-hashing", - "./contrib/murmurhash", - "./contrib/libfarmhash", - "./contrib/pdqsort", - "./contrib/cityhash102", - "./contrib/sparse-checkout", - "./contrib/libmetrohash", - "./contrib/update-submodules.sh", - "./contrib/CMakeLists.txt", - "./CMakeLists.txt", - "./PreLoad.cmake", - "./cmake", - "./base", - "./programs", - "./packages", - "./docker/packager/packager", - "./rust", - # FIXME: This is a WA to rebuild the CH and recreate the Performance.tar.zst artifact - # when there are changes in performance test scripts. - # Due to the current design of the perf test we need to rebuild CH when the performance test changes, - # otherwise the changes will not be visible in the PerformanceTest job in CI - "./tests/performance", - ], - exclude_files=[".md"], - docker=["clickhouse/binary-builder"], - git_submodules=True, - ), - run_command="build_check.py $BUILD_NAME", - ) - ) + job_config: JobConfig = field(default_factory=lambda: deepcopy(builds_job_config)) def export_env(self, export: bool = False) -> str: def process(field_name: str, field: Union[bool, str]) -> str: if isinstance(field, bool): field = str(field).lower() + elif not isinstance(field, str): + field = "" if export: return f"export BUILD_{field_name.upper()}={repr(field)}" return f"BUILD_{field_name.upper()}={field}" @@ -269,6 +308,7 @@ class BuildReportConfig: builds: List[str] job_config: JobConfig = field( default_factory=lambda: JobConfig( + run_command='build_report_check.py "$CHECK_NAME"', digest=DigestConfig( include_paths=[ "./tests/ci/build_report_check.py", @@ -282,7 +322,6 @@ class BuildReportConfig: @dataclass class TestConfig: required_build: str - force_tests: bool = False job_config: JobConfig = field(default_factory=JobConfig) @@ -302,6 +341,7 @@ install_check_digest = DigestConfig( ) stateless_check_digest = DigestConfig( include_paths=[ + "./tests/ci/functional_test_check.py", "./tests/queries/0_stateless/", "./tests/clickhouse-test", "./tests/config", @@ -312,6 +352,7 @@ stateless_check_digest = DigestConfig( ) stateful_check_digest = DigestConfig( include_paths=[ + "./tests/ci/functional_test_check.py", "./tests/queries/1_stateful/", "./tests/clickhouse-test", "./tests/config", @@ -391,6 +432,20 @@ bugfix_validate_check = DigestConfig( ], ) # common test params +docker_server_job_config = JobConfig( + required_on_release_branch=True, + run_command='docker_server.py --check-name "$CHECK_NAME" --release-type head --allow-build-reuse', + digest=DigestConfig( + include_paths=[ + "tests/ci/docker_server.py", + "./docker/server", + ] + ), +) +compatibility_test_common_params = { + "digest": compatibility_check_digest, + "run_command": "compatibility_check.py", +} statless_test_common_params = { "digest": stateless_check_digest, "run_command": 'functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT', @@ -426,20 +481,36 @@ perf_test_common_params = { "digest": perf_check_digest, "run_command": "performance_comparison_check.py", } -sqllancer_test_common_params = { - "digest": sqllancer_check_digest, - "run_command": "sqlancer_check.py", - "run_always": True, +sqllancer_test_common_params = JobConfig( + digest=sqllancer_check_digest, + run_command="sqlancer_check.py", + release_only=True, + run_always=True, +) +sqllogic_test_params = JobConfig( + digest=sqllogic_check_digest, + run_command="sqllogic_test.py", + timeout=10800, + release_only=True, +) +sql_test_params = JobConfig( + digest=sqltest_check_digest, + run_command="sqltest.py", + timeout=10800, + release_only=True, +) +clickbench_test_params = { + "digest": DigestConfig( + include_paths=[ + "tests/ci/clickbench.py", + ], + docker=["clickhouse/clickbench"], + ), + "run_command": 'clickbench.py "$CHECK_NAME"', } -sqllogic_test_params = { - "digest": sqllogic_check_digest, - "run_command": "sqllogic_test.py", - "timeout": 10800, -} -sql_test_params = { - "digest": sqltest_check_digest, - "run_command": "sqltest.py", - "timeout": 10800, +install_test_params = { + "digest": install_check_digest, + "run_command": 'install_check.py "$CHECK_NAME"', } @@ -462,6 +533,45 @@ class CIConfig: return config return None + def get_job_ci_stage(self, job_name: str) -> str: + if job_name in [ + JobNames.STYLE_CHECK, + JobNames.FAST_TEST, + JobNames.JEPSEN_KEEPER, + JobNames.BUILD_CHECK, + JobNames.BUILD_CHECK_SPECIAL, + ]: + # FIXME: we can't currently handle Jepsen in the Stage as it's job has concurrency directive + # BUILD_CHECK and BUILD_CHECK_SPECIAL runs not in stage because we need them even if Builds stage failed + return CIStages.NA + stage_type = None + if self.is_build_job(job_name): + stage_type = CIStages.BUILDS_1 + if job_name in CI_CONFIG.get_builds_for_report( + JobNames.BUILD_CHECK_SPECIAL + ): + # special builds go to Build_2 stage to not delay Builds_1/Test_1 + stage_type = CIStages.BUILDS_2 + elif self.is_docs_job(job_name): + stage_type = CIStages.TESTS_1 + elif job_name == JobNames.BUILD_CHECK_SPECIAL: + stage_type = CIStages.TESTS_2 + elif self.is_test_job(job_name): + stage_type = CIStages.TESTS_1 + if job_name in CI_CONFIG.test_configs: + required_build = CI_CONFIG.test_configs[job_name].required_build + assert required_build + if required_build in CI_CONFIG.get_builds_for_report( + JobNames.BUILD_CHECK + ): + stage_type = CIStages.TESTS_1 + else: + stage_type = CIStages.TESTS_2 + else: + stage_type = CIStages.TESTS_1 + assert stage_type, f"BUG [{job_name}]" + return stage_type + def get_job_config(self, check_name: str) -> JobConfig: res = None for config in ( @@ -473,11 +583,67 @@ class CIConfig: if check_name in config: # type: ignore res = config[check_name].job_config # type: ignore break - assert ( - res is not None - ), f"Invalid check_name or CI_CONFIG outdated, config not found for [{check_name}]" return res # type: ignore + def get_runner_type(self, check_name: str) -> str: + result = None + if self.is_build_job(check_name) or check_name == JobNames.FAST_TEST: + result = Runners.BUILDER + elif any( + words in check_name.lower() + for words in [ + "install packages", + "compatibility check", + "docker", + "build check", + "jepsen", + "style check", + ] + ): + result = Runners.STYLE_CHECKER + elif check_name == JobNames.DOCS_CHECK: + # docs job is demanding + result = Runners.FUNC_TESTER_ARM + elif any( + words in check_name.lower() + for words in [ + "stateless", + "stateful", + "clickbench", + "sqllogic test", + "libfuzzer", + "bugfix validation", + ] + ): + result = Runners.FUNC_TESTER + elif any( + words in check_name.lower() + for words in ["stress", "upgrade", "integration", "performance comparison"] + ): + result = Runners.STRESS_TESTER + elif any( + words in check_name.lower() + for words in ["ast fuzzer", "unit tests", "sqlancer", "sqltest"] + ): + result = Runners.FUZZER_UNIT_TESTER + + assert result, f"BUG, no runner for [{check_name}]" + + if ( + "aarch" in check_name.lower() or "arm64" in check_name.lower() + ) and "aarch" not in result: + if result == Runners.STRESS_TESTER: + # FIXME: no arm stress tester group atm + result = Runners.FUNC_TESTER_ARM + elif result == Runners.BUILDER: + # crosscompile - no arm required + pass + else: + # switch to aarch64 runnner + result += "-aarch64" + + return result + @staticmethod def normalize_string(input_string: str) -> str: lowercase_string = input_string.lower() @@ -538,20 +704,38 @@ class CIConfig: ), f"Invalid check_name or CI_CONFIG outdated, config not found for [{check_name}]" return res # type: ignore - def job_generator(self) -> Iterable[str]: + def job_generator(self, branch: str) -> Iterable[str]: """ traverses all check names in CI pipeline """ + assert branch for config in ( self.other_jobs_configs, self.build_config, self.builds_report_config, self.test_configs, ): - for check_name in config: # type: ignore - yield check_name + yield from config # type: ignore + + def get_builds_for_report( + self, report_name: str, release: bool = False, backport: bool = False + ) -> List[str]: + # hack to modify build list for release and bp wf + assert not (release and backport), "Invalid input" + if backport and report_name == JobNames.BUILD_CHECK: + return [ + Build.PACKAGE_RELEASE, + Build.PACKAGE_AARCH64, + Build.PACKAGE_ASAN, + Build.PACKAGE_TSAN, + Build.PACKAGE_DEBUG, + ] + if (release or backport) and report_name == JobNames.BUILD_CHECK_SPECIAL: + return [ + Build.BINARY_DARWIN, + Build.BINARY_DARWIN_AARCH64, + ] - def get_builds_for_report(self, report_name: str) -> List[str]: return self.builds_report_config[report_name].builds @classmethod @@ -560,11 +744,7 @@ class CIConfig: @classmethod def is_test_job(cls, job: str) -> bool: - return ( - not cls.is_build_job(job) - and not cls.is_build_job(job) - and job != JobNames.STYLE_CHECK - ) + return not cls.is_build_job(job) and job != JobNames.STYLE_CHECK @classmethod def is_docs_job(cls, job: str) -> bool: @@ -639,16 +819,18 @@ CI_CONFIG = CIConfig( Labels.CI_SET_INTEGRATION: LabelConfig( run_jobs=[ JobNames.STYLE_CHECK, - Build.PACKAGE_ASAN, Build.PACKAGE_RELEASE, - Build.PACKAGE_TSAN, - Build.PACKAGE_AARCH64, - JobNames.INTEGRATION_TEST_ASAN, - JobNames.INTEGRATION_TEST_ARM, JobNames.INTEGRATION_TEST, + ] + ), + Labels.CI_SET_ANALYZER: LabelConfig( + run_jobs=[ + JobNames.STYLE_CHECK, + JobNames.FAST_TEST, + Build.PACKAGE_RELEASE, + Build.PACKAGE_ASAN, + JobNames.STATELESS_TEST_ANALYZER_S3_REPLICATED_RELEASE, JobNames.INTEGRATION_TEST_ASAN_ANALYZER, - JobNames.INTEGRATION_TEST_TSAN, - JobNames.INTEGRATION_TEST_FLAKY, ] ), Labels.CI_SET_REDUCED: LabelConfig( @@ -656,17 +838,15 @@ CI_CONFIG = CIConfig( job for job in JobNames if not any( - [ - nogo in job - for nogo in ( - "asan", - "tsan", - "msan", - "ubsan", - # skip build report jobs as not all builds will be done - "build check", - ) - ] + nogo in job + for nogo in ( + "asan", + "tsan", + "msan", + "ubsan", + # skip build report jobs as not all builds will be done + "build check", + ) ) ] ), @@ -803,7 +983,7 @@ CI_CONFIG = CIConfig( name=Build.FUZZERS, compiler="clang-17", package_type="fuzzers", - job_config=JobConfig(run_by_label=Labels.libFuzzer), + job_config=fuzzer_build_job_config, ), }, builds_report_config={ @@ -816,9 +996,6 @@ CI_CONFIG = CIConfig( Build.PACKAGE_TSAN, Build.PACKAGE_MSAN, Build.PACKAGE_DEBUG, - Build.PACKAGE_RELEASE_COVERAGE, - Build.BINARY_RELEASE, - Build.FUZZERS, ] ), JobNames.BUILD_CHECK_SPECIAL: BuildReportConfig( @@ -834,33 +1011,15 @@ CI_CONFIG = CIConfig( Build.BINARY_S390X, Build.BINARY_AMD64_COMPAT, Build.BINARY_AMD64_MUSL, + Build.PACKAGE_RELEASE_COVERAGE, + Build.BINARY_RELEASE, + Build.FUZZERS, ] ), }, other_jobs_configs={ - JobNames.DOCKER_SERVER: TestConfig( - "", - job_config=JobConfig( - required_on_release_branch=True, - digest=DigestConfig( - include_paths=[ - "tests/ci/docker_server.py", - "./docker/server", - ] - ), - ), - ), - JobNames.DOCKER_KEEPER: TestConfig( - "", - job_config=JobConfig( - digest=DigestConfig( - include_paths=[ - "tests/ci/docker_server.py", - "./docker/keeper", - ] - ), - ), - ), + JobNames.DOCKER_SERVER: TestConfig("", job_config=docker_server_job_config), + JobNames.DOCKER_KEEPER: TestConfig("", job_config=docker_server_job_config), JobNames.DOCS_CHECK: TestConfig( "", job_config=JobConfig( @@ -868,6 +1027,7 @@ CI_CONFIG = CIConfig( include_paths=["**/*.md", "./docs", "tests/ci/docs_check.py"], docker=["clickhouse/docs-builder"], ), + run_command="docs_check.py", ), ), JobNames.FAST_TEST: TestConfig( @@ -890,15 +1050,17 @@ CI_CONFIG = CIConfig( JobNames.BUGFIX_VALIDATE: TestConfig( "", # we run this check by label - no digest required - job_config=JobConfig(run_by_label="pr-bugfix"), + job_config=JobConfig( + run_by_label="pr-bugfix", run_command="bugfix_validate_check.py" + ), ), }, test_configs={ JobNames.INSTALL_TEST_AMD: TestConfig( - Build.PACKAGE_RELEASE, job_config=JobConfig(digest=install_check_digest) + Build.PACKAGE_RELEASE, job_config=JobConfig(**install_test_params) # type: ignore ), JobNames.INSTALL_TEST_ARM: TestConfig( - Build.PACKAGE_AARCH64, job_config=JobConfig(digest=install_check_digest) + Build.PACKAGE_AARCH64, job_config=JobConfig(**install_test_params) # type: ignore ), JobNames.STATEFUL_TEST_ASAN: TestConfig( Build.PACKAGE_ASAN, job_config=JobConfig(**stateful_test_common_params) # type: ignore @@ -932,16 +1094,16 @@ CI_CONFIG = CIConfig( Build.PACKAGE_DEBUG, job_config=JobConfig(**stateful_test_common_params) # type: ignore ), JobNames.STATEFUL_TEST_PARALLEL_REPL_ASAN: TestConfig( - Build.PACKAGE_ASAN, job_config=JobConfig(**stateful_test_common_params) # type: ignore + Build.PACKAGE_ASAN, job_config=JobConfig(random_bucket="parrepl_with_sanitizer", **stateful_test_common_params) # type: ignore ), JobNames.STATEFUL_TEST_PARALLEL_REPL_MSAN: TestConfig( - Build.PACKAGE_MSAN, job_config=JobConfig(**stateful_test_common_params) # type: ignore + Build.PACKAGE_MSAN, job_config=JobConfig(random_bucket="parrepl_with_sanitizer", **stateful_test_common_params) # type: ignore ), JobNames.STATEFUL_TEST_PARALLEL_REPL_UBSAN: TestConfig( - Build.PACKAGE_UBSAN, job_config=JobConfig(**stateful_test_common_params) # type: ignore + Build.PACKAGE_UBSAN, job_config=JobConfig(random_bucket="parrepl_with_sanitizer", **stateful_test_common_params) # type: ignore ), JobNames.STATEFUL_TEST_PARALLEL_REPL_TSAN: TestConfig( - Build.PACKAGE_TSAN, job_config=JobConfig(**stateful_test_common_params) # type: ignore + Build.PACKAGE_TSAN, job_config=JobConfig(random_bucket="parrepl_with_sanitizer", **stateful_test_common_params) # type: ignore ), # End stateful tests for parallel replicas JobNames.STATELESS_TEST_ASAN: TestConfig( @@ -974,17 +1136,10 @@ CI_CONFIG = CIConfig( JobNames.STATELESS_TEST_AARCH64: TestConfig( Build.PACKAGE_AARCH64, job_config=JobConfig(**statless_test_common_params) # type: ignore ), - JobNames.STATELESS_TEST_ANALYZER_RELEASE: TestConfig( - Build.PACKAGE_RELEASE, job_config=JobConfig(**statless_test_common_params) # type: ignore - ), - JobNames.STATELESS_TEST_DB_REPL_RELEASE: TestConfig( + JobNames.STATELESS_TEST_ANALYZER_S3_REPLICATED_RELEASE: TestConfig( Build.PACKAGE_RELEASE, job_config=JobConfig(num_batches=4, **statless_test_common_params), # type: ignore ), - JobNames.STATELESS_TEST_S3_RELEASE: TestConfig( - Build.PACKAGE_RELEASE, - job_config=JobConfig(num_batches=2, **statless_test_common_params), # type: ignore - ), JobNames.STATELESS_TEST_S3_DEBUG: TestConfig( Build.PACKAGE_DEBUG, job_config=JobConfig(num_batches=6, **statless_test_common_params), # type: ignore @@ -993,36 +1148,36 @@ CI_CONFIG = CIConfig( Build.PACKAGE_TSAN, job_config=JobConfig(num_batches=5, **statless_test_common_params), # type: ignore ), - JobNames.STRESS_TEST_ASAN: TestConfig( - Build.PACKAGE_ASAN, job_config=JobConfig(**stress_test_common_params) # type: ignore + JobNames.STRESS_TEST_DEBUG: TestConfig( + Build.PACKAGE_DEBUG, job_config=JobConfig(**stress_test_common_params) # type: ignore ), JobNames.STRESS_TEST_TSAN: TestConfig( Build.PACKAGE_TSAN, job_config=JobConfig(**stress_test_common_params) # type: ignore ), + JobNames.STRESS_TEST_ASAN: TestConfig( + Build.PACKAGE_ASAN, job_config=JobConfig(random_bucket="stress_with_sanitizer", **stress_test_common_params) # type: ignore + ), JobNames.STRESS_TEST_UBSAN: TestConfig( - Build.PACKAGE_UBSAN, job_config=JobConfig(**stress_test_common_params) # type: ignore + Build.PACKAGE_UBSAN, job_config=JobConfig(random_bucket="stress_with_sanitizer", **stress_test_common_params) # type: ignore ), JobNames.STRESS_TEST_MSAN: TestConfig( - Build.PACKAGE_MSAN, job_config=JobConfig(**stress_test_common_params) # type: ignore - ), - JobNames.STRESS_TEST_DEBUG: TestConfig( - Build.PACKAGE_DEBUG, job_config=JobConfig(**stress_test_common_params) # type: ignore + Build.PACKAGE_MSAN, job_config=JobConfig(random_bucket="stress_with_sanitizer", **stress_test_common_params) # type: ignore ), JobNames.UPGRADE_TEST_ASAN: TestConfig( - Build.PACKAGE_ASAN, job_config=JobConfig(pr_only=True, **upgrade_test_common_params) # type: ignore + Build.PACKAGE_ASAN, job_config=JobConfig(pr_only=True, random_bucket="upgrade_with_sanitizer", **upgrade_test_common_params) # type: ignore ), JobNames.UPGRADE_TEST_TSAN: TestConfig( - Build.PACKAGE_TSAN, job_config=JobConfig(pr_only=True, **upgrade_test_common_params) # type: ignore + Build.PACKAGE_TSAN, job_config=JobConfig(pr_only=True, random_bucket="upgrade_with_sanitizer", **upgrade_test_common_params) # type: ignore ), JobNames.UPGRADE_TEST_MSAN: TestConfig( - Build.PACKAGE_MSAN, job_config=JobConfig(pr_only=True, **upgrade_test_common_params) # type: ignore + Build.PACKAGE_MSAN, job_config=JobConfig(pr_only=True, random_bucket="upgrade_with_sanitizer", **upgrade_test_common_params) # type: ignore ), JobNames.UPGRADE_TEST_DEBUG: TestConfig( Build.PACKAGE_DEBUG, job_config=JobConfig(pr_only=True, **upgrade_test_common_params) # type: ignore ), JobNames.INTEGRATION_TEST_ASAN: TestConfig( Build.PACKAGE_ASAN, - job_config=JobConfig(num_batches=4, **integration_test_common_params), # type: ignore + job_config=JobConfig(num_batches=4, **integration_test_common_params, release_only=True), # type: ignore ), JobNames.INTEGRATION_TEST_ASAN_ANALYZER: TestConfig( Build.PACKAGE_ASAN, @@ -1037,12 +1192,9 @@ CI_CONFIG = CIConfig( # add [run_by_label="test arm"] to not run in regular pr workflow by default job_config=JobConfig(num_batches=6, **integration_test_common_params, run_by_label="test arm"), # type: ignore ), - # FIXME: currently no wf has this job. Try to enable - # "Integration tests (msan)": TestConfig(Build.PACKAGE_MSAN, job_config=JobConfig(num_batches=6, **integration_test_common_params) # type: ignore - # ), JobNames.INTEGRATION_TEST: TestConfig( Build.PACKAGE_RELEASE, - job_config=JobConfig(num_batches=4, **integration_test_common_params), # type: ignore + job_config=JobConfig(num_batches=4, **integration_test_common_params, release_only=True), # type: ignore ), JobNames.INTEGRATION_TEST_FLAKY: TestConfig( Build.PACKAGE_ASAN, job_config=JobConfig(pr_only=True, **integration_test_common_params) # type: ignore @@ -1050,13 +1202,13 @@ CI_CONFIG = CIConfig( JobNames.COMPATIBILITY_TEST: TestConfig( Build.PACKAGE_RELEASE, job_config=JobConfig( - required_on_release_branch=True, digest=compatibility_check_digest + required_on_release_branch=True, **compatibility_test_common_params # type: ignore ), ), JobNames.COMPATIBILITY_TEST_ARM: TestConfig( Build.PACKAGE_AARCH64, job_config=JobConfig( - required_on_release_branch=True, digest=compatibility_check_digest + required_on_release_branch=True, **compatibility_test_common_params # type: ignore ), ), JobNames.UNIT_TEST: TestConfig( @@ -1115,20 +1267,29 @@ CI_CONFIG = CIConfig( job_config=JobConfig(num_batches=4, run_by_label="pr-performance", **perf_test_common_params), # type: ignore ), JobNames.SQLANCER: TestConfig( - Build.PACKAGE_RELEASE, job_config=JobConfig(**sqllancer_test_common_params) # type: ignore + Build.PACKAGE_RELEASE, job_config=sqllancer_test_common_params ), JobNames.SQLANCER_DEBUG: TestConfig( - Build.PACKAGE_DEBUG, job_config=JobConfig(**sqllancer_test_common_params) # type: ignore + Build.PACKAGE_DEBUG, job_config=sqllancer_test_common_params ), JobNames.SQL_LOGIC_TEST: TestConfig( - Build.PACKAGE_RELEASE, job_config=JobConfig(**sqllogic_test_params) # type: ignore + Build.PACKAGE_RELEASE, job_config=sqllogic_test_params ), - JobNames.SQLTEST: TestConfig( - Build.PACKAGE_RELEASE, job_config=JobConfig(**sql_test_params) # type: ignore + JobNames.SQLTEST: TestConfig(Build.PACKAGE_RELEASE, job_config=sql_test_params), + JobNames.CLCIKBENCH_TEST: TestConfig( + Build.PACKAGE_RELEASE, job_config=JobConfig(**clickbench_test_params) # type: ignore ), - JobNames.CLCIKBENCH_TEST: TestConfig(Build.PACKAGE_RELEASE), - JobNames.CLCIKBENCH_TEST_ARM: TestConfig(Build.PACKAGE_AARCH64), - JobNames.LIBFUZZER_TEST: TestConfig(Build.FUZZERS, job_config=JobConfig(run_by_label=Labels.libFuzzer)), # type: ignore + JobNames.CLCIKBENCH_TEST_ARM: TestConfig( + Build.PACKAGE_AARCH64, job_config=JobConfig(**clickbench_test_params) # type: ignore + ), + JobNames.LIBFUZZER_TEST: TestConfig( + Build.FUZZERS, + job_config=JobConfig( + run_by_label=Labels.libFuzzer, + timeout=10800, + run_command='libfuzzer_test_check.py "$CHECK_NAME" 10800', + ), + ), # type: ignore }, ) CI_CONFIG.validate() @@ -1137,6 +1298,7 @@ CI_CONFIG.validate() # checks required by Mergeable Check REQUIRED_CHECKS = [ "PR Check", + "A Sync", # Cloud sync JobNames.BUILD_CHECK, JobNames.BUILD_CHECK_SPECIAL, JobNames.DOCS_CHECK, @@ -1150,7 +1312,7 @@ REQUIRED_CHECKS = [ JobNames.UNIT_TEST_TSAN, JobNames.UNIT_TEST_UBSAN, JobNames.INTEGRATION_TEST_ASAN_ANALYZER, - JobNames.STATELESS_TEST_ANALYZER_RELEASE, + JobNames.STATELESS_TEST_ANALYZER_S3_REPLICATED_RELEASE, ] @@ -1173,10 +1335,10 @@ CHECK_DESCRIPTIONS = [ lambda x: x.startswith("AST fuzzer"), ), CheckDescription( - "Bugfix validate check", + JobNames.BUGFIX_VALIDATE, "Checks that either a new test (functional or integration) or there " "some changed tests that fail with the binary built on master branch", - lambda x: x == "Bugfix validate check", + lambda x: x == JobNames.BUGFIX_VALIDATE, ), CheckDescription( "CI running", diff --git a/tests/ci/ci_runners_metrics_lambda/app.py b/tests/ci/ci_runners_metrics_lambda/app.py index 5cb1e45dd14..47161215a97 100644 --- a/tests/ci/ci_runners_metrics_lambda/app.py +++ b/tests/ci/ci_runners_metrics_lambda/app.py @@ -8,23 +8,14 @@ Lambda function to: import argparse import sys -from datetime import datetime -from typing import Dict, List +from typing import Dict -import requests # type: ignore import boto3 # type: ignore -from botocore.exceptions import ClientError # type: ignore - -from lambda_shared import ( - RUNNER_TYPE_LABELS, - RunnerDescription, - RunnerDescriptions, - list_runners, -) +from lambda_shared import RUNNER_TYPE_LABELS, RunnerDescriptions, list_runners from lambda_shared.token import ( + get_access_token_by_key_app, get_cached_access_token, get_key_and_app_from_aws, - get_access_token_by_key_app, ) UNIVERSAL_LABEL = "universal" @@ -162,7 +153,7 @@ if __name__ == "__main__": if args.private_key: private_key = args.private_key elif args.private_key_path: - with open(args.private_key_path, "r") as key_file: + with open(args.private_key_path, "r", encoding="utf-8") as key_file: private_key = key_file.read() else: print("Attempt to get key and id from AWS secret manager") diff --git a/tests/ci/clean_lost_instances_lambda/app.py b/tests/ci/clean_lost_instances_lambda/app.py index 65f6ff78d4a..4accc14f7ae 100644 --- a/tests/ci/clean_lost_instances_lambda/app.py +++ b/tests/ci/clean_lost_instances_lambda/app.py @@ -8,14 +8,13 @@ Lambda function to: import argparse import sys -from datetime import datetime from dataclasses import dataclass +from datetime import datetime from typing import Dict, List -import requests # type: ignore import boto3 # type: ignore +import requests from botocore.exceptions import ClientError # type: ignore - from lambda_shared import ( RUNNER_TYPE_LABELS, RunnerDescription, @@ -23,9 +22,9 @@ from lambda_shared import ( list_runners, ) from lambda_shared.token import ( + get_access_token_by_key_app, get_cached_access_token, get_key_and_app_from_aws, - get_access_token_by_key_app, ) UNIVERSAL_LABEL = "universal" @@ -140,6 +139,7 @@ def delete_runner(access_token: str, runner: RunnerDescription) -> bool: response = requests.delete( f"https://api.github.com/orgs/ClickHouse/actions/runners/{runner.id}", headers=headers, + timeout=30, ) response.raise_for_status() print(f"Response code deleting {runner.name} is {response.status_code}") @@ -325,7 +325,7 @@ if __name__ == "__main__": if args.private_key: private_key = args.private_key elif args.private_key_path: - with open(args.private_key_path, "r") as key_file: + with open(args.private_key_path, "r", encoding="utf-8") as key_file: private_key = key_file.read() else: print("Attempt to get key and id from AWS secret manager") diff --git a/tests/ci/clickbench.py b/tests/ci/clickbench.py index f8707cbcff7..a1988abb1f5 100644 --- a/tests/ci/clickbench.py +++ b/tests/ci/clickbench.py @@ -10,14 +10,15 @@ from pathlib import Path from typing import List, Tuple from build_download_helper import download_all_deb_packages -from clickhouse_helper import CiLogsCredentials -from commit_status_helper import override_status -from docker_images_helper import DockerImage, get_docker_image, pull_image -from env_helper import REPORT_PATH, TEMP_PATH -from pr_info import FORCE_TESTS_LABEL, PRInfo -from report import ERROR, SUCCESS, JobReport, StatusType, TestResults +from clickhouse_helper import ( + CiLogsCredentials, +) +from docker_images_helper import get_docker_image, pull_image, DockerImage +from env_helper import TEMP_PATH, REPORT_PATH +from pr_info import PRInfo from stopwatch import Stopwatch from tee_popen import TeePopen +from report import ERROR, SUCCESS, JobReport, StatusType, TestResults def get_image_name() -> str: @@ -41,6 +42,7 @@ def get_run_command( f"{ci_logs_args}" f"--volume={result_path}:/test_output " f"--volume={server_log_path}:/var/log/clickhouse-server " + "--security-opt seccomp=unconfined " # required to issue io_uring sys-calls f"--cap-add=SYS_PTRACE {env_str} {image}" ) @@ -164,7 +166,6 @@ def main(): state, description, test_results, additional_logs = process_results( result_path, server_log_path ) - state = override_status(state, check_name) JobReport( description=description, @@ -176,10 +177,7 @@ def main(): ).dump() if state != SUCCESS: - if FORCE_TESTS_LABEL in pr_info.labels: - print(f"'{FORCE_TESTS_LABEL}' enabled, will report success") - else: - sys.exit(1) + sys.exit(1) if __name__ == "__main__": diff --git a/tests/ci/clickhouse_helper.py b/tests/ci/clickhouse_helper.py index f338a1e14c3..637c4519d3d 100644 --- a/tests/ci/clickhouse_helper.py +++ b/tests/ci/clickhouse_helper.py @@ -1,13 +1,12 @@ #!/usr/bin/env python3 -from pathlib import Path -from typing import Dict, List, Optional import fileinput import json import logging import time +from pathlib import Path +from typing import Dict, List, Optional -import requests # type: ignore - +import requests from get_robot_token import get_parameter_from_ssm from pr_info import PRInfo from report import TestResults @@ -72,11 +71,11 @@ class ClickHouseHelper: if args: url = args[0] url = kwargs.get("url", url) - kwargs["timeout"] = kwargs.get("timeout", 100) + timeout = kwargs.pop("timeout", 100) for i in range(5): try: - response = requests.post(*args, **kwargs) + response = requests.post(*args, timeout=timeout, **kwargs) except Exception as e: error = f"Received exception while sending data to {url} on {i} attempt: {e}" logging.warning(error) @@ -148,7 +147,9 @@ class ClickHouseHelper: for i in range(5): response = None try: - response = requests.get(self.url, params=params, headers=self.auth) + response = requests.get( + self.url, params=params, headers=self.auth, timeout=100 + ) response.raise_for_status() return response.text except Exception as ex: @@ -215,24 +216,24 @@ def prepare_tests_results_for_clickhouse( head_ref = pr_info.head_ref head_repo = pr_info.head_name - common_properties = dict( - pull_request_number=pr_info.number, - commit_sha=pr_info.sha, - commit_url=pr_info.commit_html_url, - check_name=check_name, - check_status=check_status, - check_duration_ms=int(float(check_duration) * 1000), - check_start_time=check_start_time, - report_url=report_url, - pull_request_url=pull_request_url, - base_ref=base_ref, - base_repo=base_repo, - head_ref=head_ref, - head_repo=head_repo, - task_url=pr_info.task_url, - instance_type=get_instance_type(), - instance_id=get_instance_id(), - ) + common_properties = { + "pull_request_number": pr_info.number, + "commit_sha": pr_info.sha, + "commit_url": pr_info.commit_html_url, + "check_name": check_name, + "check_status": check_status, + "check_duration_ms": int(float(check_duration) * 1000), + "check_start_time": check_start_time, + "report_url": report_url, + "pull_request_url": pull_request_url, + "base_ref": base_ref, + "base_repo": base_repo, + "head_ref": head_ref, + "head_repo": head_repo, + "task_url": pr_info.task_url, + "instance_type": get_instance_type(), + "instance_id": get_instance_id(), + } # Always publish a total record for all checks. For checks with individual # tests, also publish a record per test. diff --git a/tests/ci/commit_status_helper.py b/tests/ci/commit_status_helper.py index 8a34d375d1e..1c2d8b2ade8 100644 --- a/tests/ci/commit_status_helper.py +++ b/tests/ci/commit_status_helper.py @@ -18,9 +18,7 @@ from github.GithubObject import NotSet from github.IssueComment import IssueComment from github.Repository import Repository -# isort: on - -from ci_config import CHECK_DESCRIPTIONS, CI_CONFIG, REQUIRED_CHECKS, CheckDescription +from ci_config import REQUIRED_CHECKS, CHECK_DESCRIPTIONS, CheckDescription from env_helper import GITHUB_JOB_URL, GITHUB_REPOSITORY, TEMP_PATH from pr_info import SKIP_MERGEABLE_CHECK_LABEL, PRInfo from report import ( @@ -67,21 +65,6 @@ class RerunHelper: return None -def override_status( - status: StatusType, check_name: str, invert: bool = False -) -> StatusType: - test_config = CI_CONFIG.test_configs.get(check_name) - if test_config and test_config.force_tests: - return SUCCESS - - if invert: - if status == SUCCESS: - return ERROR - return SUCCESS - - return status - - def get_commit(gh: Github, commit_sha: str, retry_count: int = RETRY) -> Commit: for i in range(retry_count): try: @@ -320,7 +303,7 @@ def post_commit_status_to_file( file_path: Path, description: str, state: str, report_url: str ) -> None: if file_path.exists(): - raise Exception(f'File "{file_path}" already exists!') + raise FileExistsError(f'File "{file_path}" already exists!') with open(file_path, "w", encoding="utf-8") as f: out = csv.writer(f, delimiter="\t") out.writerow([state, report_url, description]) @@ -346,7 +329,7 @@ class CommitStatusData: @classmethod def load_from_file(cls, file_path: Union[Path, str]): # type: ignore res = {} - with open(file_path, "r") as json_file: + with open(file_path, "r", encoding="utf-8") as json_file: res = json.load(json_file) return CommitStatusData(**cls._filter_dict(res)) @@ -364,7 +347,7 @@ class CommitStatusData: def dump_to_file(self, file_path: Union[Path, str]) -> None: file_path = Path(file_path) or STATUS_FILE_PATH - with open(file_path, "w") as json_file: + with open(file_path, "w", encoding="utf-8") as json_file: json.dump(asdict(self), json_file) def is_ok(self): @@ -442,13 +425,14 @@ def set_mergeable_check( ) -> None: commit.create_status( context=MERGEABLE_NAME, - description=description, + description=format_description(description), state=state, target_url=GITHUB_JOB_URL(), ) def update_mergeable_check(commit: Commit, pr_info: PRInfo, check_name: str) -> None: + "check if the check_name in REQUIRED_CHECKS and then trigger update" not_run = ( pr_info.labels.intersection({SKIP_MERGEABLE_CHECK_LABEL, "release"}) or check_name not in REQUIRED_CHECKS @@ -462,7 +446,11 @@ def update_mergeable_check(commit: Commit, pr_info: PRInfo, check_name: str) -> logging.info("Update Mergeable Check by %s", check_name) statuses = get_commit_filtered_statuses(commit) + trigger_mergeable_check(commit, statuses) + +def trigger_mergeable_check(commit: Commit, statuses: CommitStatuses) -> None: + """calculate and update MERGEABLE_NAME""" required_checks = [ status for status in statuses if status.context in REQUIRED_CHECKS ] diff --git a/tests/ci/compatibility_check.py b/tests/ci/compatibility_check.py index a2e6c94cf48..5e980660749 100644 --- a/tests/ci/compatibility_check.py +++ b/tests/ci/compatibility_check.py @@ -2,12 +2,17 @@ import argparse import logging +import os import subprocess import sys -from distutils.version import StrictVersion from pathlib import Path from typing import List, Tuple +# isort: off +from pip._vendor.packaging.version import Version + +# isort: on + from build_download_helper import download_builds_filter from docker_images_helper import DockerImage, get_docker_image, pull_image from env_helper import REPORT_PATH, TEMP_PATH @@ -21,7 +26,7 @@ DOWNLOAD_RETRIES_COUNT = 5 def process_os_check(log_path: Path) -> TestResult: name = log_path.name - with open(log_path, "r") as log: + with open(log_path, "r", encoding="utf-8") as log: line = log.read().split("\n")[0].strip() if line != "OK": return TestResult(name, "FAIL") @@ -30,7 +35,7 @@ def process_os_check(log_path: Path) -> TestResult: def process_glibc_check(log_path: Path, max_glibc_version: str) -> TestResults: test_results = [] # type: TestResults - with open(log_path, "r") as log: + with open(log_path, "r", encoding="utf-8") as log: for line in log: if line.strip(): columns = line.strip().split(" ") @@ -38,7 +43,7 @@ def process_glibc_check(log_path: Path, max_glibc_version: str) -> TestResults: _, version = symbol_with_glibc.split("@GLIBC_") if version == "PRIVATE": test_results.append(TestResult(symbol_with_glibc, "FAIL")) - elif StrictVersion(version) > max_glibc_version: + elif Version(version) > Version(max_glibc_version): test_results.append(TestResult(symbol_with_glibc, "FAIL")) if not test_results: test_results.append(TestResult("glibc check", "OK")) @@ -118,11 +123,7 @@ def get_run_commands_distributions( def parse_args(): parser = argparse.ArgumentParser("Check compatibility with old distributions") - parser.add_argument("--check-name", required=True) - parser.add_argument("--check-glibc", action="store_true") - parser.add_argument( - "--check-distributions", action="store_true" - ) # currently hardcoded to x86, don't enable for ARM + parser.add_argument("--check-name", required=False) return parser.parse_args() @@ -130,6 +131,13 @@ def main(): logging.basicConfig(level=logging.INFO) args = parse_args() + check_name = args.check_name or os.getenv("CHECK_NAME") + assert check_name + check_glibc = True + # currently hardcoded to x86, don't enable for ARM + check_distributions = ( + "aarch64" not in check_name.lower() and "arm64" not in check_name.lower() + ) stopwatch = Stopwatch() @@ -146,7 +154,7 @@ def main(): "clickhouse-common-static_" in url or "clickhouse-server_" in url ) - download_builds_filter(args.check_name, reports_path, packages_path, url_filter) + download_builds_filter(check_name, reports_path, packages_path, url_filter) for package in packages_path.iterdir(): if package.suffix == ".deb": @@ -162,11 +170,11 @@ def main(): run_commands = [] - if args.check_glibc: + if check_glibc: check_glibc_commands = get_run_commands_glibc(packages_path, result_path) run_commands.extend(check_glibc_commands) - if args.check_distributions: + if check_distributions: centos_image = pull_image(get_docker_image(IMAGE_CENTOS)) ubuntu_image = pull_image(get_docker_image(IMAGE_UBUNTU)) check_distributions_commands = get_run_commands_distributions( @@ -191,18 +199,18 @@ def main(): # See https://sourceware.org/glibc/wiki/Glibc%20Timeline max_glibc_version = "" - if "amd64" in args.check_name: + if "amd64" in check_name: max_glibc_version = "2.4" - elif "aarch64" in args.check_name: + elif "aarch64" in check_name: max_glibc_version = "2.18" # because of build with newer sysroot? else: - raise Exception("Can't determine max glibc version") + raise RuntimeError("Can't determine max glibc version") state, description, test_results, additional_logs = process_result( result_path, server_log_path, - args.check_glibc, - args.check_distributions, + check_glibc, + check_distributions, max_glibc_version, ) diff --git a/tests/ci/docker_images_check.py b/tests/ci/docker_images_check.py index af0416d83dc..b04a3975545 100644 --- a/tests/ci/docker_images_check.py +++ b/tests/ci/docker_images_check.py @@ -25,7 +25,6 @@ from stopwatch import Stopwatch from tee_popen import TeePopen from upload_result_helper import upload_results -NAME = "Push to Dockerhub" TEMP_PATH = Path(RUNNER_TEMP) / "docker_images_check" TEMP_PATH.mkdir(parents=True, exist_ok=True) @@ -177,6 +176,9 @@ def main(): stopwatch = Stopwatch() args = parse_args() + + NAME = f"Push to Dockerhub {args.suffix}" + if args.push: logging.info("login to docker hub") docker_login() @@ -193,18 +195,21 @@ def main(): ok_cnt = 0 status = SUCCESS # type: StatusType - image_tags = ( - json.loads(args.image_tags) - if not os.path.isfile(args.image_tags) - else json.load(open(args.image_tags)) - ) - missing_images = ( - image_tags - if args.missing_images == "all" - else json.loads(args.missing_images) - if not os.path.isfile(args.missing_images) - else json.load(open(args.missing_images)) - ) + + if os.path.isfile(args.image_tags): + with open(args.image_tags, "r", encoding="utf-8") as jfd: + image_tags = json.load(jfd) + else: + image_tags = json.loads(args.image_tags) + + if args.missing_images == "all": + missing_images = image_tags + elif os.path.isfile(args.missing_images): + with open(args.missing_images, "r", encoding="utf-8") as jfd: + missing_images = json.load(jfd) + else: + missing_images = json.loads(args.missing_images) + images_build_list = get_images_oredered_list() for image in images_build_list: diff --git a/tests/ci/docker_manifests_merge.py b/tests/ci/docker_manifests_merge.py index fc00969d5d6..3c122545735 100644 --- a/tests/ci/docker_manifests_merge.py +++ b/tests/ci/docker_manifests_merge.py @@ -135,18 +135,20 @@ def main(): archs = args.suffixes assert len(archs) > 1, "arch suffix input param is invalid" - image_tags = ( - json.loads(args.image_tags) - if not os.path.isfile(args.image_tags) - else json.load(open(args.image_tags)) - ) - missing_images = ( - list(image_tags) - if args.missing_images == "all" - else json.loads(args.missing_images) - if not os.path.isfile(args.missing_images) - else json.load(open(args.missing_images)) - ) + if os.path.isfile(args.image_tags): + with open(args.image_tags, "r", encoding="utf-8") as jfd: + image_tags = json.load(jfd) + else: + image_tags = json.loads(args.image_tags) + + if args.missing_images == "all": + missing_images = image_tags + elif os.path.isfile(args.missing_images): + with open(args.missing_images, "r", encoding="utf-8") as jfd: + missing_images = json.load(jfd) + else: + missing_images = json.loads(args.missing_images) + test_results = [] status = SUCCESS # type: StatusType diff --git a/tests/ci/docker_server.py b/tests/ci/docker_server.py index 7f53034fd0f..fda0dd16d0b 100644 --- a/tests/ci/docker_server.py +++ b/tests/ci/docker_server.py @@ -51,7 +51,11 @@ def parse_args() -> argparse.Namespace: description="A program to build clickhouse-server image, both alpine and " "ubuntu versions", ) - + parser.add_argument( + "--check-name", + required=False, + default="", + ) parser.add_argument( "--version", type=version_arg, @@ -71,13 +75,13 @@ def parse_args() -> argparse.Namespace: parser.add_argument( "--image-path", type=str, - default="docker/server", + default="", help="a path to docker context directory", ) parser.add_argument( "--image-repo", type=str, - default="clickhouse/clickhouse-server", + default="", help="image name on docker hub", ) parser.add_argument( @@ -92,14 +96,7 @@ def parse_args() -> argparse.Namespace: default=argparse.SUPPRESS, help="don't push reports to S3 and github", ) - parser.add_argument("--push", default=True, help=argparse.SUPPRESS) - parser.add_argument( - "--no-push-images", - action="store_false", - dest="push", - default=argparse.SUPPRESS, - help="don't push images to docker hub", - ) + parser.add_argument("--push", action="store_true", help=argparse.SUPPRESS) parser.add_argument("--os", default=["ubuntu", "alpine"], help=argparse.SUPPRESS) parser.add_argument( "--no-ubuntu", @@ -337,13 +334,37 @@ def main(): makedirs(TEMP_PATH, exist_ok=True) args = parse_args() - image = DockerImageData(args.image_path, args.image_repo, False) + + pr_info = PRInfo() + + if args.check_name: + assert not args.image_path and not args.image_repo + if "server image" in args.check_name: + image_path = "docker/server" + image_repo = "clickhouse/clickhouse-server" + elif "keeper image" in args.check_name: + image_path = "docker/keeper" + image_repo = "clickhouse/clickhouse-keeper" + else: + assert False, "Invalid --check-name" + else: + assert args.image_path and args.image_repo + image_path = args.image_path + image_repo = args.image_repo + + push = args.push + del args.image_path + del args.image_repo + del args.push + + if pr_info.is_master(): + push = True + + image = DockerImageData(image_path, image_repo, False) args.release_type = auto_release_type(args.version, args.release_type) tags = gen_tags(args.version, args.release_type) - pr_info = None - repo_urls = dict() - direct_urls: Dict[str, List[str]] = dict() - pr_info = PRInfo() + repo_urls = {} + direct_urls: Dict[str, List[str]] = {} release_or_pr, _ = get_release_or_pr(pr_info, args.version) for arch, build_name in zip(ARCH, ("package_release", "package_aarch64")): @@ -355,13 +376,13 @@ def main(): repo_urls[arch] = f"{args.bucket_prefix}/{build_name}" if args.allow_build_reuse: # read s3 urls from pre-downloaded build reports - if "clickhouse-server" in args.image_repo: + if "clickhouse-server" in image_repo: PACKAGES = [ "clickhouse-client", "clickhouse-server", "clickhouse-common-static", ] - elif "clickhouse-keeper" in args.image_repo: + elif "clickhouse-keeper" in image_repo: PACKAGES = ["clickhouse-keeper"] else: assert False, "BUG" @@ -375,7 +396,7 @@ def main(): if any(package in url for package in PACKAGES) and "-dbg" not in url ] - if args.push: + if push: docker_login() logging.info("Following tags will be created: %s", ", ".join(tags)) @@ -385,7 +406,7 @@ def main(): for tag in tags: test_results.extend( build_and_push_image( - image, args.push, repo_urls, os, tag, args.version, direct_urls + image, push, repo_urls, os, tag, args.version, direct_urls ) ) if test_results[-1].status != "OK": diff --git a/tests/ci/download_release_packages.py b/tests/ci/download_release_packages.py index 26223de2f8a..550301e8fa2 100755 --- a/tests/ci/download_release_packages.py +++ b/tests/ci/download_release_packages.py @@ -3,7 +3,7 @@ import os import logging -import requests # type: ignore +import requests from requests.adapters import HTTPAdapter # type: ignore from urllib3.util.retry import Retry # type: ignore diff --git a/tests/ci/fast_test_check.py b/tests/ci/fast_test_check.py index e483e9d4ac2..a624ee06e85 100644 --- a/tests/ci/fast_test_check.py +++ b/tests/ci/fast_test_check.py @@ -10,7 +10,7 @@ from typing import Tuple from docker_images_helper import DockerImage, get_docker_image, pull_image from env_helper import REPO_COPY, S3_BUILDS_BUCKET, TEMP_PATH -from pr_info import FORCE_TESTS_LABEL, PRInfo +from pr_info import PRInfo from report import ( ERROR, FAILURE, @@ -37,9 +37,10 @@ def get_fasttest_cmd( ) -> str: return ( f"docker run --cap-add=SYS_PTRACE --user={os.geteuid()}:{os.getegid()} " + "--security-opt seccomp=unconfined " # required to issue io_uring sys-calls "--network=host " # required to get access to IAM credentials f"-e FASTTEST_WORKSPACE=/fasttest-workspace -e FASTTEST_OUTPUT=/test_output " - f"-e FASTTEST_SOURCE=/ClickHouse --cap-add=SYS_PTRACE " + f"-e FASTTEST_SOURCE=/ClickHouse " f"-e FASTTEST_CMAKE_FLAGS='-DCOMPILER_CACHE=sccache' " f"-e PULL_REQUEST_NUMBER={pr_number} -e COMMIT_SHA={commit_sha} " f"-e COPY_CLICKHOUSE_BINARY_TO_OUTPUT=1 " @@ -190,13 +191,7 @@ def main(): # Refuse other checks to run if fast test failed if state != SUCCESS: - if state == ERROR: - print("The status is 'error', report failure disregard the labels") - sys.exit(1) - elif FORCE_TESTS_LABEL in pr_info.labels: - print(f"'{FORCE_TESTS_LABEL}' enabled, reporting success") - else: - sys.exit(1) + sys.exit(1) if __name__ == "__main__": diff --git a/tests/ci/finish_check.py b/tests/ci/finish_check.py index e5268947304..eebc846f4b1 100644 --- a/tests/ci/finish_check.py +++ b/tests/ci/finish_check.py @@ -11,7 +11,7 @@ from commit_status_helper import ( get_commit, get_commit_filtered_statuses, post_commit_status, - update_mergeable_check, + trigger_mergeable_check, ) from get_robot_token import get_best_robot_token from pr_info import PRInfo @@ -24,14 +24,11 @@ def main(): pr_info = PRInfo(need_orgs=True) gh = Github(get_best_robot_token(), per_page=100) commit = get_commit(gh, pr_info.sha) - # Update the Mergeable Check at the final step - update_mergeable_check(commit, pr_info, CI_STATUS_NAME) + # Unconditionally update the Mergeable Check at the final step + statuses = get_commit_filtered_statuses(commit) + trigger_mergeable_check(commit, statuses) - statuses = [ - status - for status in get_commit_filtered_statuses(commit) - if status.context == CI_STATUS_NAME - ] + statuses = [s for s in statuses if s.context == CI_STATUS_NAME] if not statuses: return # Take the latest status diff --git a/tests/ci/functional_test_check.py b/tests/ci/functional_test_check.py index e230aa5a679..4416b7863a6 100644 --- a/tests/ci/functional_test_check.py +++ b/tests/ci/functional_test_check.py @@ -1,7 +1,6 @@ #!/usr/bin/env python3 import argparse -import atexit import csv import logging import os @@ -11,34 +10,15 @@ import sys from pathlib import Path from typing import List, Tuple -# isort: off -from github import Github - -# isort: on - from build_download_helper import download_all_deb_packages -from clickhouse_helper import ( - CiLogsCredentials, - ClickHouseHelper, - prepare_tests_results_for_clickhouse, -) -from commit_status_helper import ( - get_commit, - override_status, - post_commit_status, - post_commit_status_to_file, - update_mergeable_check, -) +from clickhouse_helper import CiLogsCredentials from docker_images_helper import DockerImage, get_docker_image, pull_image from download_release_packages import download_last_release from env_helper import REPO_COPY, REPORT_PATH, TEMP_PATH -from get_robot_token import get_best_robot_token -from pr_info import FORCE_TESTS_LABEL, PRInfo -from report import ERROR, SUCCESS, StatusType, TestResults, read_test_results -from s3_helper import S3Helper +from pr_info import PRInfo +from report import ERROR, SUCCESS, JobReport, StatusType, TestResults, read_test_results from stopwatch import Stopwatch from tee_popen import TeePopen -from upload_result_helper import upload_results NO_CHANGES_MSG = "Nothing to run" @@ -73,8 +53,7 @@ def get_image_name(check_name: str) -> str: return "clickhouse/stateless-test" if "stateful" in check_name.lower(): return "clickhouse/stateful-test" - else: - raise Exception(f"Cannot deduce image name based on check name {check_name}") + raise ValueError(f"Cannot deduce image name based on check name {check_name}") def get_run_command( @@ -126,11 +105,12 @@ def get_run_command( f"{volume_with_broken_test}" f"--volume={result_path}:/test_output " f"--volume={server_log_path}:/var/log/clickhouse-server " + "--security-opt seccomp=unconfined " # required to issue io_uring sys-calls f"--cap-add=SYS_PTRACE {env_str} {additional_options_str} {image}" ) -def get_tests_to_run(pr_info: PRInfo) -> List[str]: +def _get_statless_tests_to_run(pr_info: PRInfo) -> List[str]: result = set() if pr_info.changed_files is None: @@ -213,10 +193,10 @@ def parse_args(): help="Check that added tests failed on latest stable", ) parser.add_argument( - "--post-commit-status", - default="commit_status", - choices=["commit_status", "file"], - help="Where to public post commit status", + "--report-to-file", + type=str, + default="", + help="Path to write script report to (for --validate-bugfix)", ) return parser.parse_args() @@ -232,7 +212,6 @@ def main(): reports_path.mkdir(parents=True, exist_ok=True) repo_path = Path(REPO_COPY) - post_commit_path = temp_path / "functional_commit_status.tsv" args = parse_args() check_name = args.check_name or os.getenv("CHECK_NAME") @@ -249,62 +228,20 @@ def main(): flaky_check = "flaky" in check_name.lower() run_changed_tests = flaky_check or validate_bugfix_check - - # For validate_bugfix_check we need up to date information about labels, so pr_event_from_api is used - pr_info = PRInfo( - need_changed_files=run_changed_tests, pr_event_from_api=validate_bugfix_check - ) - - # FIXME: move to job report and remove - gh = Github(get_best_robot_token(), per_page=100) - commit = get_commit(gh, pr_info.sha) - atexit.register(update_mergeable_check, commit, pr_info, check_name) - - if validate_bugfix_check and "pr-bugfix" not in pr_info.labels: - if args.post_commit_status == "file": - post_commit_status_to_file( - post_commit_path, - f"Skipped (no pr-bugfix in {pr_info.labels})", - SUCCESS, - "null", - ) - logging.info("Skipping '%s' (no pr-bugfix in %s)", check_name, pr_info.labels) - sys.exit(0) + pr_info = PRInfo(need_changed_files=run_changed_tests) + tests_to_run = [] + assert ( + not validate_bugfix_check or args.report_to_file + ), "JobReport file path must be provided with --validate-bugfix" + if run_changed_tests: + tests_to_run = _get_statless_tests_to_run(pr_info) if "RUN_BY_HASH_NUM" in os.environ: run_by_hash_num = int(os.getenv("RUN_BY_HASH_NUM", "0")) run_by_hash_total = int(os.getenv("RUN_BY_HASH_TOTAL", "0")) - check_name_with_group = ( - check_name + f" [{run_by_hash_num + 1}/{run_by_hash_total}]" - ) else: run_by_hash_num = 0 run_by_hash_total = 0 - check_name_with_group = check_name - - tests_to_run = [] - if run_changed_tests: - tests_to_run = get_tests_to_run(pr_info) - if not tests_to_run: - state = override_status(SUCCESS, check_name, validate_bugfix_check) - if args.post_commit_status == "commit_status": - post_commit_status( - commit, - state, - "", - NO_CHANGES_MSG, - check_name_with_group, - pr_info, - dump_to_file=True, - ) - elif args.post_commit_status == "file": - post_commit_status_to_file( - post_commit_path, - description=NO_CHANGES_MSG, - state=state, - report_url="null", - ) - sys.exit(0) image_name = get_image_name(check_name) @@ -338,91 +275,65 @@ def main(): pr_info, stopwatch.start_time_str, check_name ) - run_command = get_run_command( - check_name, - packages_path, - repo_path, - result_path, - server_log_path, - kill_timeout, - additional_envs, - ci_logs_args, - docker_image, - flaky_check, - tests_to_run, - ) - logging.info("Going to run func tests: %s", run_command) - - with TeePopen(run_command, run_log_path) as process: - retcode = process.wait() - if retcode == 0: - logging.info("Run successfully") - else: - logging.info("Run failed") - - try: - subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {temp_path}", shell=True) - except subprocess.CalledProcessError: - logging.warning("Failed to change files owner in %s, ignoring it", temp_path) - - ci_logs_credentials.clean_ci_logs_from_credentials(run_log_path) - s3_helper = S3Helper() - - state, description, test_results, additional_logs = process_results( - result_path, server_log_path - ) - state = override_status(state, check_name, invert=validate_bugfix_check) - - ch_helper = ClickHouseHelper() - - report_url = upload_results( - s3_helper, - pr_info.number, - pr_info.sha, - test_results, - [run_log_path] + additional_logs, - check_name_with_group, - ) - - print(f"::notice:: {check_name} Report url: {report_url}") - if args.post_commit_status == "commit_status": - post_commit_status( - commit, - state, - report_url, - description, - check_name_with_group, - pr_info, - dump_to_file=True, + if (not validate_bugfix_check and not flaky_check) or tests_to_run: + run_command = get_run_command( + check_name, + packages_path, + repo_path, + result_path, + server_log_path, + kill_timeout, + additional_envs, + ci_logs_args, + docker_image, + flaky_check, + tests_to_run, ) - elif args.post_commit_status == "file": - post_commit_status_to_file( - post_commit_path, - description, - state, - report_url, + logging.info("Going to run func tests: %s", run_command) + + with TeePopen(run_command, run_log_path) as process: + retcode = process.wait() + if retcode == 0: + logging.info("Run successfully") + else: + logging.info("Run failed") + + try: + subprocess.check_call( + f"sudo chown -R ubuntu:ubuntu {temp_path}", shell=True + ) + except subprocess.CalledProcessError: + logging.warning( + "Failed to change files owner in %s, ignoring it", temp_path + ) + + ci_logs_credentials.clean_ci_logs_from_credentials(run_log_path) + + state, description, test_results, additional_logs = process_results( + result_path, server_log_path ) else: - raise Exception( - f'Unknown post_commit_status option "{args.post_commit_status}"' + print( + "This is validate bugfix or flaky check run, but no changes test to run - skip with success" + ) + state, description, test_results, additional_logs = ( + SUCCESS, + "No tests to run", + [], + [], ) - prepared_events = prepare_tests_results_for_clickhouse( - pr_info, - test_results, - state, - stopwatch.duration_seconds, - stopwatch.start_time_str, - report_url, - check_name_with_group, - ) - ch_helper.insert_events_into(db="default", table="checks", events=prepared_events) + JobReport( + description=description, + test_results=test_results, + status=state, + start_time=stopwatch.start_time_str, + duration=stopwatch.duration_seconds, + additional_files=additional_logs, + ).dump(to_file=args.report_to_file if args.report_to_file else None) if state != SUCCESS: - if FORCE_TESTS_LABEL in pr_info.labels: - print(f"'{FORCE_TESTS_LABEL}' enabled, will report success") - else: - sys.exit(1) + sys.exit(1) if __name__ == "__main__": diff --git a/tests/ci/get_previous_release_tag.py b/tests/ci/get_previous_release_tag.py index c2d279f7fec..bc0cb975ef5 100755 --- a/tests/ci/get_previous_release_tag.py +++ b/tests/ci/get_previous_release_tag.py @@ -1,10 +1,10 @@ #!/usr/bin/env python3 -import re import logging +import re from typing import List, Optional, Tuple -import requests # type: ignore +import requests CLICKHOUSE_TAGS_URL = "https://api.github.com/repos/ClickHouse/ClickHouse/tags" CLICKHOUSE_PACKAGE_URL = ( @@ -82,13 +82,14 @@ def get_previous_release(server_version: Optional[Version]) -> Optional[ReleaseI CLICKHOUSE_TAGS_URL, {"page": page, "per_page": 100}, timeout=10 ) if not response.ok: - raise Exception( - "Cannot load the list of tags from github: " + response.reason + logger.error( + "Cannot load the list of tags from github: %s", response.reason ) + response.raise_for_status() releases_str = set(re.findall(VERSION_PATTERN, response.text)) if len(releases_str) == 0: - raise Exception( + raise ValueError( "Cannot find previous release for " + str(server_version) + " server version" diff --git a/tests/ci/integration_test_check.py b/tests/ci/integration_test_check.py index 5af4d5e625b..751abf617fa 100644 --- a/tests/ci/integration_test_check.py +++ b/tests/ci/integration_test_check.py @@ -5,38 +5,27 @@ import csv import json import logging import os -import subprocess import sys from pathlib import Path from typing import Dict, List, Tuple from build_download_helper import download_all_deb_packages -from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickhouse -from commit_status_helper import ( - get_commit, - override_status, - post_commit_status, - post_commit_status_to_file, -) from docker_images_helper import DockerImage, get_docker_image from download_release_packages import download_last_release from env_helper import REPO_COPY, REPORT_PATH, TEMP_PATH -from get_robot_token import get_best_robot_token -from github_helper import GitHub from integration_test_images import IMAGES from pr_info import PRInfo from report import ( ERROR, SUCCESS, StatusType, + JobReport, TestResult, TestResults, read_test_results, ) -from s3_helper import S3Helper from stopwatch import Stopwatch from tee_popen import TeePopen -from upload_result_helper import upload_results def get_json_params_dict( @@ -131,16 +120,19 @@ def process_results( def parse_args(): parser = argparse.ArgumentParser() parser.add_argument("check_name") + parser.add_argument( + "--run-tests", nargs="*", help="List of tests to run", default=None + ) parser.add_argument( "--validate-bugfix", action="store_true", help="Check that added tests failed on latest stable", ) parser.add_argument( - "--post-commit-status", - default="commit_status", - choices=["commit_status", "file"], - help="Where to public post commit status", + "--report-to-file", + type=str, + default="", + help="Path to write script report to (for --validate-bugfix)", ) return parser.parse_args() @@ -154,7 +146,6 @@ def main(): reports_path = Path(REPORT_PATH) temp_path.mkdir(parents=True, exist_ok=True) - post_commit_path = temp_path / "integration_commit_status.tsv" repo_path = Path(REPO_COPY) args = parse_args() @@ -167,37 +158,19 @@ def main(): if "RUN_BY_HASH_NUM" in os.environ: run_by_hash_num = int(os.getenv("RUN_BY_HASH_NUM", "0")) run_by_hash_total = int(os.getenv("RUN_BY_HASH_TOTAL", "0")) - check_name_with_group = ( - check_name + f" [{run_by_hash_num + 1}/{run_by_hash_total}]" - ) else: run_by_hash_num = 0 run_by_hash_total = 0 - check_name_with_group = check_name is_flaky_check = "flaky" in check_name + assert ( + not validate_bugfix_check or args.report_to_file + ), "--report-to-file must be provided for --validate-bugfix" + # For validate_bugfix_check we need up to date information about labels, so # pr_event_from_api is used - pr_info = PRInfo( - need_changed_files=is_flaky_check or validate_bugfix_check, - pr_event_from_api=validate_bugfix_check, - ) - - if validate_bugfix_check and "pr-bugfix" not in pr_info.labels: - if args.post_commit_status == "file": - post_commit_status_to_file( - post_commit_path, - f"Skipped (no pr-bugfix in {pr_info.labels})", - SUCCESS, - "null", - ) - logging.info("Skipping '%s' (no pr-bugfix in '%s')", check_name, pr_info.labels) - sys.exit(0) - - # FIXME: switch to JobReport and remove: - gh = GitHub(get_best_robot_token()) - commit = get_commit(gh, pr_info.sha) + pr_info = PRInfo(need_changed_files=is_flaky_check or validate_bugfix_check) images = [get_docker_image(image_) for image_ in IMAGES] @@ -245,7 +218,7 @@ def main(): ), ) - ch_helper = ClickHouseHelper() + integration_infrastructure_fail = False with TeePopen(run_command, output_path_log, my_env) as process: retcode = process.wait() if retcode == 0: @@ -254,73 +227,31 @@ def main(): logging.warning( "There were issues with infrastructure. Not writing status report to restart job." ) - prepared_events = prepare_tests_results_for_clickhouse( - pr_info, - [ - TestResult( - "integration_infrastructure_fail", - "ERROR", - stopwatch.duration_seconds, - ) - ], - ERROR, - stopwatch.duration_seconds, - stopwatch.start_time_str, - "", - check_name_with_group, - ) - - ch_helper.insert_events_into( - db="default", table="checks", events=prepared_events - ) + integration_infrastructure_fail = True sys.exit(1) else: logging.info("Some tests failed") - subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {temp_path}", shell=True) + # subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {temp_path}", shell=True) - state, description, test_results, additional_logs = process_results(result_path) - state = override_status(state, check_name, invert=validate_bugfix_check) - - s3_helper = S3Helper() - report_url = upload_results( - s3_helper, - pr_info.number, - pr_info.sha, - test_results, - [output_path_log] + additional_logs, - check_name_with_group, - ) - - print(f"::notice:: {check_name} Report url: {report_url}") - if args.post_commit_status == "commit_status": - post_commit_status( - commit, - state, - report_url, - description, - check_name_with_group, - pr_info, - dump_to_file=True, - ) - elif args.post_commit_status == "file": - post_commit_status_to_file(post_commit_path, description, state, report_url) + if not integration_infrastructure_fail: + state, description, test_results, additional_logs = process_results(result_path) else: - raise Exception( - f'Unknown post_commit_status option "{args.post_commit_status}"' + state, description, test_results, additional_logs = ( + ERROR, + "no description", + [TestResult("infrastructure error", ERROR, stopwatch.duration_seconds)], + [], ) - prepared_events = prepare_tests_results_for_clickhouse( - pr_info, - test_results, - state, - stopwatch.duration_seconds, - stopwatch.start_time_str, - report_url, - check_name_with_group, - ) - - ch_helper.insert_events_into(db="default", table="checks", events=prepared_events) + JobReport( + description=description, + test_results=test_results, + status=state, + start_time=stopwatch.start_time_str, + duration=stopwatch.duration_seconds, + additional_files=[output_path_log] + additional_logs, + ).dump(to_file=args.report_to_file if args.report_to_file else None) if state != SUCCESS: sys.exit(1) diff --git a/tests/ci/jepsen_check.py b/tests/ci/jepsen_check.py index fb7540abda3..011ecff635e 100644 --- a/tests/ci/jepsen_check.py +++ b/tests/ci/jepsen_check.py @@ -9,7 +9,7 @@ from pathlib import Path from typing import Any, List import boto3 # type: ignore -import requests # type: ignore +import requests from build_download_helper import ( download_build_with_progress, get_build_name_for_check, @@ -46,7 +46,7 @@ FAILED_TESTS_ANCHOR = "# Failed tests" def _parse_jepsen_output(path: Path) -> TestResults: test_results = [] # type: TestResults current_type = "" - with open(path, "r") as f: + with open(path, "r", encoding="utf-8") as f: for line in f: if SUCCESSFUL_TESTS_ANCHOR in line: current_type = "OK" @@ -101,7 +101,7 @@ def prepare_autoscaling_group_and_get_hostnames(count): instances = get_autoscaling_group_instances_ids(asg_client, JEPSEN_GROUP_NAME) counter += 1 if counter > 30: - raise Exception("Cannot wait autoscaling group") + raise RuntimeError("Cannot wait autoscaling group") ec2_client = boto3.client("ec2", region_name="us-east-1") return get_instances_addresses(ec2_client, instances) @@ -119,12 +119,12 @@ def clear_autoscaling_group(): instances = get_autoscaling_group_instances_ids(asg_client, JEPSEN_GROUP_NAME) counter += 1 if counter > 30: - raise Exception("Cannot wait autoscaling group") + raise RuntimeError("Cannot wait autoscaling group") def save_nodes_to_file(instances: List[Any], temp_path: Path) -> Path: nodes_path = temp_path / "nodes.txt" - with open(nodes_path, "w") as f: + with open(nodes_path, "w", encoding="utf-8") as f: f.write("\n".join(instances)) f.flush() return nodes_path @@ -159,7 +159,7 @@ def main(): ) args = parser.parse_args() - if args.program != "server" and args.program != "keeper": + if args.program not in ("server", "keeper"): logging.warning("Invalid argument '%s'", args.program) sys.exit(0) @@ -220,7 +220,7 @@ def main(): f"{S3_URL}/{S3_BUILDS_BUCKET}/{version}/{sha}/binary_release/clickhouse" ) print(f"Clickhouse version: [{version_full}], sha: [{sha}], url: [{build_url}]") - head = requests.head(build_url) + head = requests.head(build_url, timeout=60) assert head.status_code == 200, f"Clickhouse binary not found: {build_url}" else: build_name = get_build_name_for_check(check_name) diff --git a/tests/ci/lambda_shared_package/lambda_shared/__init__.py b/tests/ci/lambda_shared_package/lambda_shared/__init__.py index 9e6c5dde298..8b53f9dcb23 100644 --- a/tests/ci/lambda_shared_package/lambda_shared/__init__.py +++ b/tests/ci/lambda_shared_package/lambda_shared/__init__.py @@ -8,7 +8,7 @@ from collections import namedtuple from typing import Any, Dict, Iterable, List, Optional import boto3 # type: ignore -import requests # type: ignore +import requests RUNNER_TYPE_LABELS = [ "builder", @@ -20,11 +20,12 @@ RUNNER_TYPE_LABELS = [ "style-checker", "style-checker-aarch64", # private runners - "private-style-checker", "private-builder", + "private-clickpipes", "private-func-tester", "private-fuzzer-unit-tester", "private-stress-tester", + "private-style-checker", ] diff --git a/tests/ci/lambda_shared_package/lambda_shared/pr.py b/tests/ci/lambda_shared_package/lambda_shared/pr.py index 1b4f827cc0a..4ac787229c0 100644 --- a/tests/ci/lambda_shared_package/lambda_shared/pr.py +++ b/tests/ci/lambda_shared_package/lambda_shared/pr.py @@ -78,6 +78,7 @@ LABELS = { "Not for changelog", ], "pr-performance": ["Performance Improvement"], + "pr-ci": ["CI Fix or Improvement (changelog entry is not required)"], } CATEGORY_TO_LABEL = {c: lb for lb, categories in LABELS.items() for c in categories} @@ -149,10 +150,7 @@ def check_pr_description(pr_body: str, repo_name: str) -> Tuple[str, str]: if not category: description_error = "Changelog category is empty" # Filter out the PR categories that are not for changelog. - elif re.match( - r"(?i)doc|((non|in|not|un)[-\s]*significant)|(not[ ]*for[ ]*changelog)", - category, - ): + elif "(changelog entry is not required)" in category: pass # to not check the rest of the conditions elif category not in CATEGORY_TO_LABEL: description_error, category = f"Category '{category}' is not valid", "" diff --git a/tests/ci/lambda_shared_package/lambda_shared/token.py b/tests/ci/lambda_shared_package/lambda_shared/token.py index 6d5653f6a58..f9860f6ad2a 100644 --- a/tests/ci/lambda_shared_package/lambda_shared/token.py +++ b/tests/ci/lambda_shared_package/lambda_shared/token.py @@ -6,7 +6,7 @@ from typing import Tuple import boto3 # type: ignore import jwt -import requests # type: ignore +import requests from . import cached_value_is_valid diff --git a/tests/ci/libfuzzer_test_check.py b/tests/ci/libfuzzer_test_check.py index 5f41afe9fb6..62dadc6fea9 100644 --- a/tests/ci/libfuzzer_test_check.py +++ b/tests/ci/libfuzzer_test_check.py @@ -9,19 +9,13 @@ from pathlib import Path from typing import List from build_download_helper import download_fuzzers -from clickhouse_helper import ( - CiLogsCredentials, -) -from docker_images_helper import DockerImage, pull_image, get_docker_image - -from env_helper import REPORT_PATH, TEMP_PATH, REPO_COPY +from clickhouse_helper import CiLogsCredentials +from docker_images_helper import DockerImage, get_docker_image, pull_image +from env_helper import REPO_COPY, REPORT_PATH, TEMP_PATH from pr_info import PRInfo - from stopwatch import Stopwatch - from tee_popen import TeePopen - NO_CHANGES_MSG = "Nothing to run" @@ -81,6 +75,7 @@ def get_run_command( f"--volume={fuzzers_path}:/fuzzers " f"--volume={repo_path}/tests:/usr/share/clickhouse-test " f"--volume={result_path}:/test_output " + "--security-opt seccomp=unconfined " # required to issue io_uring sys-calls f"--cap-add=SYS_PTRACE {env_str} {additional_options_str} {image}" ) @@ -129,7 +124,8 @@ def main(): os.chmod(fuzzers_path / file, 0o777) elif file.endswith("_seed_corpus.zip"): corpus_path = fuzzers_path / (file.removesuffix("_seed_corpus.zip") + ".in") - zipfile.ZipFile(fuzzers_path / file, "r").extractall(corpus_path) + with zipfile.ZipFile(fuzzers_path / file, "r") as zfd: + zfd.extractall(corpus_path) result_path = temp_path / "result_path" result_path.mkdir(parents=True, exist_ok=True) diff --git a/tests/ci/mark_release_ready.py b/tests/ci/mark_release_ready.py index 011b3f28843..31415fef9c0 100755 --- a/tests/ci/mark_release_ready.py +++ b/tests/ci/mark_release_ready.py @@ -56,7 +56,6 @@ def main(): description, RELEASE_READY_STATUS, pr_info, - dump_to_file=True, ) diff --git a/tests/ci/pr_info.py b/tests/ci/pr_info.py index 744de7dea72..6f4b400f7a8 100644 --- a/tests/ci/pr_info.py +++ b/tests/ci/pr_info.py @@ -2,6 +2,7 @@ import json import logging import os +import re from typing import Dict, List, Set, Union from urllib.parse import quote @@ -19,7 +20,6 @@ from env_helper import ( GITHUB_SERVER_URL, ) -FORCE_TESTS_LABEL = "force tests" SKIP_MERGEABLE_CHECK_LABEL = "skip mergeable check" NeedsDataType = Dict[str, Dict[str, Union[str, Dict[str, str]]]] @@ -44,11 +44,12 @@ RETRY_SLEEP = 0 class EventType: - UNKNOWN = 0 - PUSH = 1 - PULL_REQUEST = 2 - SCHEDULE = 3 - DISPATCH = 4 + UNKNOWN = "unknown" + PUSH = "commits" + PULL_REQUEST = "pull_request" + SCHEDULE = "schedule" + DISPATCH = "dispatch" + MERGE_QUEUE = "merge_group" def get_pr_for_commit(sha, ref): @@ -114,6 +115,12 @@ class PRInfo: # release_pr and merged_pr are used for docker images additional cache self.release_pr = 0 self.merged_pr = 0 + self.labels = set() + + repo_prefix = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}" + self.task_url = GITHUB_RUN_URL + self.repo_full_name = GITHUB_REPOSITORY + self.event_type = EventType.UNKNOWN ref = github_event.get("ref", "refs/heads/master") if ref and ref.startswith("refs/heads/"): @@ -154,10 +161,6 @@ class PRInfo: else: self.sha = github_event["pull_request"]["head"]["sha"] - repo_prefix = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}" - self.task_url = GITHUB_RUN_URL - - self.repo_full_name = GITHUB_REPOSITORY self.commit_html_url = f"{repo_prefix}/commits/{self.sha}" self.pr_html_url = f"{repo_prefix}/pull/{self.number}" @@ -176,7 +179,7 @@ class PRInfo: self.body = github_event["pull_request"]["body"] self.labels = { label["name"] for label in github_event["pull_request"]["labels"] - } # type: Set[str] + } self.user_login = github_event["pull_request"]["user"]["login"] # type: str self.user_orgs = set() # type: Set[str] @@ -191,6 +194,28 @@ class PRInfo: self.diff_urls.append(self.compare_pr_url(github_event["pull_request"])) + elif ( + EventType.MERGE_QUEUE in github_event + ): # pull request and other similar events + self.event_type = EventType.MERGE_QUEUE + # FIXME: need pr? we can parse it from ["head_ref": "refs/heads/gh-readonly-queue/test-merge-queue/pr-6751-4690229995a155e771c52e95fbd446d219c069bf"] + self.number = 0 + self.sha = github_event[EventType.MERGE_QUEUE]["head_sha"] + self.base_ref = github_event[EventType.MERGE_QUEUE]["base_ref"] + base_sha = github_event[EventType.MERGE_QUEUE]["base_sha"] # type: str + # ClickHouse/ClickHouse + self.base_name = github_event["repository"]["full_name"] + # any_branch-name - the name of working branch name + self.head_ref = github_event[EventType.MERGE_QUEUE]["head_ref"] + # UserName/ClickHouse or ClickHouse/ClickHouse + self.head_name = self.base_name + self.user_login = github_event["sender"]["login"] + self.diff_urls.append( + github_event["repository"]["compare_url"] + .replace("{base}", base_sha) + .replace("{head}", self.sha) + ) + elif "commits" in github_event: self.event_type = EventType.PUSH # `head_commit` always comes with `commits` @@ -203,10 +228,8 @@ class PRInfo: logging.error("Failed to convert %s to integer", merged_pr) self.sha = github_event["after"] pull_request = get_pr_for_commit(self.sha, github_event["ref"]) - repo_prefix = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}" - self.task_url = GITHUB_RUN_URL self.commit_html_url = f"{repo_prefix}/commits/{self.sha}" - self.repo_full_name = GITHUB_REPOSITORY + if pull_request is None or pull_request["state"] == "closed": # it's merged PR to master self.number = 0 @@ -272,11 +295,7 @@ class PRInfo: "GITHUB_SHA", "0000000000000000000000000000000000000000" ) self.number = 0 - self.labels = set() - repo_prefix = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}" - self.task_url = GITHUB_RUN_URL self.commit_html_url = f"{repo_prefix}/commits/{self.sha}" - self.repo_full_name = GITHUB_REPOSITORY self.pr_html_url = f"{repo_prefix}/commits/{ref}" self.base_ref = ref self.base_name = self.repo_full_name @@ -289,12 +308,20 @@ class PRInfo: def is_master(self) -> bool: return self.number == 0 and self.head_ref == "master" + def is_release(self) -> bool: + return self.number == 0 and bool( + re.match(r"^2[1-9]\.[1-9][0-9]*$", self.head_ref) + ) + def is_release_branch(self) -> bool: return self.number == 0 def is_scheduled(self): return self.event_type == EventType.SCHEDULE + def is_merge_queue(self): + return self.event_type == EventType.MERGE_QUEUE + def is_dispatched(self): return self.event_type == EventType.DISPATCH @@ -369,6 +396,7 @@ class PRInfo: (ext in DIFF_IN_DOCUMENTATION_EXT and path_in_docs) or "docker/docs" in f or "docs_check.py" in f + or "aspell-dict.txt" in f or ext == ".md" ): return False diff --git a/tests/ci/release.py b/tests/ci/release.py index 2b3331938e7..679e65560f5 100755 --- a/tests/ci/release.py +++ b/tests/ci/release.py @@ -54,7 +54,7 @@ class Repo: elif protocol == "origin": self._url = protocol else: - raise Exception(f"protocol must be in {self.VALID}") + raise ValueError(f"protocol must be in {self.VALID}") def __str__(self): return self._repo @@ -144,7 +144,7 @@ class Release: for status in statuses: if status["context"] == RELEASE_READY_STATUS: if not status["state"] == SUCCESS: - raise Exception( + raise ValueError( f"the status {RELEASE_READY_STATUS} is {status['state']}" ", not success" ) @@ -153,7 +153,7 @@ class Release: page += 1 - raise Exception( + raise KeyError( f"the status {RELEASE_READY_STATUS} " f"is not found for commit {self.release_commit}" ) @@ -188,7 +188,7 @@ class Release: raise if check_run_from_master and self._git.branch != "master": - raise Exception("the script must be launched only from master") + raise RuntimeError("the script must be launched only from master") self.set_release_info() @@ -229,7 +229,7 @@ class Release: def check_no_tags_after(self): tags_after_commit = self.run(f"git tag --contains={self.release_commit}") if tags_after_commit: - raise Exception( + raise RuntimeError( f"Commit {self.release_commit} belongs to following tags:\n" f"{tags_after_commit}\nChoose another commit" ) @@ -253,7 +253,7 @@ class Release: ) output = self.run(f"git branch --contains={self.release_commit} {branch}") if branch not in output: - raise Exception( + raise RuntimeError( f"commit {self.release_commit} must belong to {branch} " f"for {self.release_type} release" ) @@ -464,9 +464,9 @@ class Release: logging.warning("Rolling back checked out %s for %s", ref, orig_ref) self.run(f"git reset --hard; git checkout -f {orig_ref}") raise - else: - if with_checkout_back and need_rollback: - self.run(rollback_cmd) + # Normal flow when we need to checkout back + if with_checkout_back and need_rollback: + self.run(rollback_cmd) @contextmanager def _create_branch(self, name: str, start_point: str = "") -> Iterator[None]: diff --git a/tests/ci/report.py b/tests/ci/report.py index 55d1f604605..a3c9b53637a 100644 --- a/tests/ci/report.py +++ b/tests/ci/report.py @@ -22,8 +22,8 @@ from typing import ( from build_download_helper import get_gh_api from ci_config import CI_CONFIG, BuildConfig -from env_helper import REPORT_PATH, TEMP_PATH from ci_utils import normalize_string +from env_helper import REPORT_PATH, TEMP_PATH logger = logging.getLogger(__name__) @@ -34,6 +34,7 @@ SUCCESS: Final = "success" OK: Final = "OK" FAIL: Final = "FAIL" +SKIPPED: Final = "SKIPPED" StatusType = Literal["error", "failure", "pending", "success"] STATUSES = [ERROR, FAILURE, PENDING, SUCCESS] # type: List[StatusType] @@ -287,14 +288,18 @@ class JobReport: # if False no GH commit status will be created by CI need_commit_status: bool = True + def __post_init__(self): + assert self.status in (SUCCESS, ERROR, FAILURE, PENDING) + @classmethod def exist(cls) -> bool: return JOB_REPORT_FILE.is_file() @classmethod - def load(cls): # type: ignore + def load(cls, from_file=None): # type: ignore res = {} - with open(JOB_REPORT_FILE, "r") as json_file: + from_file = from_file or JOB_REPORT_FILE + with open(from_file, "r", encoding="utf-8") as json_file: res = json.load(json_file) # Deserialize the nested lists of TestResult test_results_data = res.get("test_results", []) @@ -307,13 +312,14 @@ class JobReport: if JOB_REPORT_FILE.exists(): JOB_REPORT_FILE.unlink() - def dump(self): + def dump(self, to_file=None): def path_converter(obj): if isinstance(obj, Path): return str(obj) raise TypeError("Type not serializable") - with open(JOB_REPORT_FILE, "w") as json_file: + to_file = to_file or JOB_REPORT_FILE + with open(to_file, "w", encoding="utf-8") as json_file: json.dump(asdict(self), json_file, default=path_converter, indent=2) @@ -415,7 +421,7 @@ class BuildResult: def load_from_file(cls, file: Union[Path, str]): # type: ignore if not Path(file).exists(): return None - with open(file, "r") as json_file: + with open(file, "r", encoding="utf-8") as json_file: res = json.load(json_file) return BuildResult(**res) @@ -594,7 +600,6 @@ class ReportColorTheme: blue = "#00B4FF" default = (ReportColor.green, ReportColor.red, ReportColor.yellow) - bugfixcheck = (ReportColor.yellow, ReportColor.blue, ReportColor.blue) ColorTheme = Tuple[str, str, str] diff --git a/tests/ci/run_check.py b/tests/ci/run_check.py index 2aeac5b5740..6187656983e 100644 --- a/tests/ci/run_check.py +++ b/tests/ci/run_check.py @@ -1,5 +1,4 @@ #!/usr/bin/env python3 -import atexit import logging import sys from typing import Tuple @@ -17,7 +16,6 @@ from commit_status_helper import ( post_commit_status, post_labels, remove_labels, - update_mergeable_check, ) from env_helper import GITHUB_REPOSITORY, GITHUB_SERVER_URL from get_robot_token import get_best_robot_token @@ -26,8 +24,9 @@ from lambda_shared_package.lambda_shared.pr import ( TRUSTED_CONTRIBUTORS, check_pr_description, ) -from pr_info import FORCE_TESTS_LABEL, PRInfo -from report import FAILURE, PENDING +from pr_info import PRInfo +from report import FAILURE, PENDING, SUCCESS +from cherry_pick import Labels TRUSTED_ORG_IDS = { 54801242, # clickhouse @@ -38,6 +37,8 @@ CAN_BE_TESTED_LABEL = "can be tested" FEATURE_LABEL = "pr-feature" SUBMODULE_CHANGED_LABEL = "submodule changed" PR_CHECK = "PR Check" +# pr-bugfix autoport can lead to issues in releases, let's do ci fixes only +AUTO_BACKPORT_LABELS = ["pr-ci"] def pr_is_by_trusted_user(pr_user_login, pr_user_orgs): @@ -66,9 +67,6 @@ def pr_is_by_trusted_user(pr_user_login, pr_user_orgs): def should_run_ci_for_pr(pr_info: PRInfo) -> Tuple[bool, str]: # Consider the labels and whether the user is trusted. print("Got labels", pr_info.labels) - if FORCE_TESTS_LABEL in pr_info.labels: - print(f"Label '{FORCE_TESTS_LABEL}' set, forcing remaining checks") - return True, f"Labeled '{FORCE_TESTS_LABEL}'" if OK_SKIP_LABELS.intersection(pr_info.labels): return True, "Don't try new checks for release/backports/cherry-picks" @@ -102,7 +100,6 @@ def main(): description = format_description(description) gh = Github(get_best_robot_token(), per_page=100) commit = get_commit(gh, pr_info.sha) - atexit.register(update_mergeable_check, commit, pr_info, PR_CHECK) description_error, category = check_pr_description(pr_info.body, GITHUB_REPOSITORY) pr_labels_to_add = [] @@ -126,6 +123,15 @@ def main(): elif SUBMODULE_CHANGED_LABEL in pr_info.labels: pr_labels_to_remove.append(SUBMODULE_CHANGED_LABEL) + if any(label in AUTO_BACKPORT_LABELS for label in pr_labels_to_add): + backport_labels = [Labels.MUST_BACKPORT, Labels.MUST_BACKPORT_CLOUD] + pr_labels_to_add += [ + label for label in backport_labels if label not in pr_info.labels + ] + print( + f"::notice :: Add backport labels [{backport_labels}] for a given PR category" + ) + print(f"Change labels: add {pr_labels_to_add}, remove {pr_labels_to_remove}") if pr_labels_to_add: post_labels(gh, pr_info, pr_labels_to_add) @@ -184,6 +190,15 @@ def main(): print("::notice ::Cannot run") sys.exit(1) + post_commit_status( + commit, + SUCCESS, + "", + "ok", + PR_CHECK, + pr_info, + ) + ci_report_url = create_ci_report(pr_info, []) print("::notice ::Can run") post_commit_status( diff --git a/tests/ci/runner_token_rotation_lambda/app.py b/tests/ci/runner_token_rotation_lambda/app.py index 6544eee9581..445704648a9 100644 --- a/tests/ci/runner_token_rotation_lambda/app.py +++ b/tests/ci/runner_token_rotation_lambda/app.py @@ -4,9 +4,8 @@ import argparse import sys import boto3 # type: ignore -import requests # type: ignore - -from lambda_shared.token import get_cached_access_token, get_access_token_by_key_app +import requests +from lambda_shared.token import get_access_token_by_key_app, get_cached_access_token def get_runner_registration_token(access_token): @@ -17,6 +16,7 @@ def get_runner_registration_token(access_token): response = requests.post( "https://api.github.com/orgs/ClickHouse/actions/runners/registration-token", headers=headers, + timeout=30, ) response.raise_for_status() data = response.json() @@ -43,6 +43,7 @@ def main(access_token, push_to_ssm, ssm_parameter_name): def handler(event, context): + _, _ = event, context main(get_cached_access_token(), True, "github_runner_registration_token") @@ -85,7 +86,7 @@ if __name__ == "__main__": if args.private_key: private_key = args.private_key else: - with open(args.private_key_path, "r") as key_file: + with open(args.private_key_path, "r", encoding="utf-8") as key_file: private_key = key_file.read() token = get_access_token_by_key_app(private_key, args.app_id) diff --git a/tests/ci/slack_bot_ci_lambda/app.py b/tests/ci/slack_bot_ci_lambda/app.py index 45e14138335..94b71724b1c 100755 --- a/tests/ci/slack_bot_ci_lambda/app.py +++ b/tests/ci/slack_bot_ci_lambda/app.py @@ -17,12 +17,12 @@ It's deployed to slack-bot-ci-lambda in CI/CD account See also: https://aretestsgreenyet.com/ """ -import os -import json import base64 +import json +import os import random -import requests # type: ignore +import requests DRY_RUN_MARK = "" @@ -139,13 +139,11 @@ def get_play_url(query): def run_clickhouse_query(query): - url = "https://play.clickhouse.com/?user=play&query=" + requests.utils.quote(query) - res = requests.get(url) + url = "https://play.clickhouse.com/?user=play&query=" + requests.compat.quote(query) + res = requests.get(url, timeout=30) if res.status_code != 200: print("Failed to execute query: ", res.status_code, res.content) - raise Exception( - "Failed to execute query: {}: {}".format(res.status_code, res.content) - ) + res.raise_for_status() lines = res.text.strip().splitlines() return [x.split("\t") for x in lines] @@ -159,9 +157,9 @@ def split_broken_and_flaky_tests(failed_tests): flaky_tests = [] for name, report, count_prev_str, count_str in failed_tests: count_prev, count = int(count_prev_str), int(count_str) - if (2 <= count and count_prev < 2) or (count_prev == 1 and count == 1): + if (count_prev < 2 <= count) or (count_prev == count == 1): # It failed 2 times or more within extended time window, it's definitely broken. - # 2 <= count_prev means that it was not reported as broken on previous runs + # 2 <= count means that it was not reported as broken on previous runs broken_tests.append([name, report]) elif 0 < count and count_prev == 0: # It failed only once, can be a rare flaky test @@ -172,19 +170,18 @@ def split_broken_and_flaky_tests(failed_tests): def format_failed_tests_list(failed_tests, failure_type): if len(failed_tests) == 1: - res = "There is a new {} test:\n".format(failure_type) + res = f"There is a new {failure_type} test:\n" else: - res = "There are {} new {} tests:\n".format(len(failed_tests), failure_type) + res = f"There are {len(failed_tests)} new {failure_type} tests:\n" for name, report in failed_tests[:MAX_TESTS_TO_REPORT]: cidb_url = get_play_url(ALL_RECENT_FAILURES_QUERY.format(name)) - res += "- *{}* - <{}|Report> - <{}|CI DB> \n".format( - name, report, cidb_url - ) + res += f"- *{name}* - <{report}|Report> - <{cidb_url}|CI DB> \n" if MAX_TESTS_TO_REPORT < len(failed_tests): - res += "- and {} other tests... :this-is-fine-fire:".format( - len(failed_tests) - MAX_TESTS_TO_REPORT + res += ( + f"- and {len(failed_tests) - MAX_TESTS_TO_REPORT} other " + "tests... :this-is-fine-fire:" ) return res @@ -223,19 +220,16 @@ def get_too_many_failures_message_impl(failures_count): if random.random() < REPORT_NO_FAILURES_PROBABILITY: return None return "Wow, there are *no failures* at all... 0_o" - if curr_failures < MAX_FAILURES: + return_none = ( + curr_failures < MAX_FAILURES + or curr_failures < prev_failures + or (curr_failures - prev_failures) / prev_failures < 0.2 + ) + if return_none: return None if prev_failures < MAX_FAILURES: - return ":alert: *CI is broken: there are {} failures during the last 24 hours*".format( - curr_failures - ) - if curr_failures < prev_failures: - return None - if (curr_failures - prev_failures) / prev_failures < 0.2: - return None - return "CI is broken and it's getting worse: there are {} failures during the last 24 hours".format( - curr_failures - ) + return f":alert: *CI is broken: there are {curr_failures} failures during the last 24 hours*" + return "CI is broken and it's getting worse: there are {curr_failures} failures during the last 24 hours" def get_too_many_failures_message(failures_count): @@ -254,7 +248,7 @@ def get_failed_checks_percentage_message(percentage): return None msg = ":alert: " if p > 1 else "Only " if p < 0.5 else "" - msg += "*{0:.2f}%* of all checks in master have failed yesterday".format(p) + msg += f"*{p:.2f}%* of all checks in master have failed yesterday" return msg @@ -280,14 +274,10 @@ def send_to_slack_impl(message): payload = SLACK_MESSAGE_JSON.copy() payload["text"] = message - res = requests.post(SLACK_URL, json.dumps(payload)) + res = requests.post(SLACK_URL, json.dumps(payload), timeout=30) if res.status_code != 200: print("Failed to send a message to Slack: ", res.status_code, res.content) - raise Exception( - "Failed to send a message to Slack: {}: {}".format( - res.status_code, res.content - ) - ) + res.raise_for_status() def send_to_slack(message): @@ -303,7 +293,7 @@ def query_and_alert_if_needed(query, get_message_func): if msg is None: return - msg += "\nCI DB query: <{}|link>".format(get_play_url(query)) + msg += f"\nCI DB query: <{get_play_url(query)}|link>" print("Sending message to slack:", msg) send_to_slack(msg) @@ -317,6 +307,7 @@ def check_and_alert(): def handler(event, context): + _, _ = event, context try: check_and_alert() return {"statusCode": 200, "body": "OK"} diff --git a/tests/ci/sqlancer_check.py b/tests/ci/sqlancer_check.py index 59d2a3d6275..9d33c480598 100644 --- a/tests/ci/sqlancer_check.py +++ b/tests/ci/sqlancer_check.py @@ -46,14 +46,14 @@ def main(): build_name = get_build_name_for_check(check_name) urls = read_build_urls(build_name, reports_path) if not urls: - raise Exception("No build URLs found") + raise ValueError("No build URLs found") for url in urls: if url.endswith("/clickhouse"): build_url = url break else: - raise Exception("Cannot find binary clickhouse among build results") + raise ValueError("Cannot find the clickhouse binary among build results") logging.info("Got build url %s", build_url) diff --git a/tests/ci/sqllogic_test.py b/tests/ci/sqllogic_test.py index bbd81fd76bb..6ea6fa19d91 100755 --- a/tests/ci/sqllogic_test.py +++ b/tests/ci/sqllogic_test.py @@ -9,9 +9,8 @@ from pathlib import Path from typing import Tuple from build_download_helper import download_all_deb_packages -from commit_status_helper import override_status -from docker_images_helper import DockerImage, get_docker_image, pull_image -from env_helper import REPO_COPY, REPORT_PATH, TEMP_PATH +from docker_images_helper import DockerImage, pull_image, get_docker_image +from env_helper import REPORT_PATH, TEMP_PATH, REPO_COPY from report import ( ERROR, FAIL, @@ -43,6 +42,7 @@ def get_run_command( f"--volume={repo_tests_path}:/clickhouse-tests " f"--volume={result_path}:/test_output " f"--volume={server_log_path}:/var/log/clickhouse-server " + "--security-opt seccomp=unconfined " # required to issue io_uring sys-calls f"--cap-add=SYS_PTRACE {image}" ) @@ -163,7 +163,7 @@ def main(): status, description = ERROR, "Empty test_results.tsv" assert status is not None - status = override_status(status, check_name) + test_results.append( TestResult( "All tests", diff --git a/tests/ci/sqltest.py b/tests/ci/sqltest.py index 2fe6aabd69c..c8c2adbbd56 100644 --- a/tests/ci/sqltest.py +++ b/tests/ci/sqltest.py @@ -53,14 +53,14 @@ def main(): print(build_name) urls = read_build_urls(build_name, reports_path) if not urls: - raise Exception("No build URLs found") + raise ValueError("No build URLs found") for url in urls: if url.endswith("/clickhouse"): build_url = url break else: - raise Exception("Cannot find the clickhouse binary among build results") + raise ValueError("Cannot find the clickhouse binary among build results") logging.info("Got build url %s", build_url) diff --git a/tests/ci/ssh.py b/tests/ci/ssh.py index 275f26fd65f..321826fcf44 100644 --- a/tests/ci/ssh.py +++ b/tests/ci/ssh.py @@ -1,11 +1,11 @@ #!/usr/bin/env python3 -import shutil +import logging import os +import shutil +import signal import subprocess import tempfile -import logging -import signal class SSHAgent: @@ -21,7 +21,7 @@ class SSHAgent: def start(self): if shutil.which("ssh-agent") is None: - raise Exception("ssh-agent binary is not available") + raise RuntimeError("ssh-agent binary is not available") self._env_backup["SSH_AUTH_SOCK"] = os.environ.get("SSH_AUTH_SOCK") self._env_backup["SSH_OPTIONS"] = os.environ.get("SSH_OPTIONS") @@ -54,7 +54,7 @@ class SSHAgent: def remove(self, key_pub): if key_pub not in self._keys: - raise Exception(f"Private key not found, public part: {key_pub}") + raise ValueError(f"Private key not found, public part: {key_pub}") if self._keys[key_pub] > 1: self._keys[key_pub] -= 1 @@ -107,7 +107,7 @@ class SSHAgent: if p.returncode: message = stderr.strip() + b"\n" + stdout.strip() - raise Exception(message.strip().decode()) + raise RuntimeError(message.strip().decode()) return stdout @@ -115,9 +115,9 @@ class SSHAgent: class SSHKey: def __init__(self, key_name=None, key_value=None): if key_name is None and key_value is None: - raise Exception("Either key_name or key_value must be specified") + raise ValueError("Either key_name or key_value must be specified") if key_name is not None and key_value is not None: - raise Exception("key_name or key_value must be specified") + raise ValueError("key_name or key_value must be specified") if key_name is not None: self.key = os.getenv(key_name) else: diff --git a/tests/ci/stress.py b/tests/ci/stress.py index 7d582e683e0..7ccc058f79f 100755 --- a/tests/ci/stress.py +++ b/tests/ci/stress.py @@ -1,14 +1,14 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- """This script is used in docker images for stress tests and upgrade tests""" -from multiprocessing import cpu_count -from pathlib import Path -from subprocess import Popen, call, check_output, STDOUT, PIPE -from typing import List import argparse import logging import random import time +from multiprocessing import cpu_count +from pathlib import Path +from subprocess import PIPE, STDOUT, Popen, call, check_output +from typing import List def get_options(i: int, upgrade_check: bool) -> str: @@ -90,12 +90,13 @@ def run_func_test( ] pipes = [] for i, path in enumerate(output_paths): - with open(path, "w") as op: + with open(path, "w", encoding="utf-8") as op: full_command = ( f"{cmd} {get_options(i, upgrade_check)} {global_time_limit_option} " f"{skip_tests_option} {upgrade_check_option}" ) logging.info("Run func tests '%s'", full_command) + # pylint:disable-next=consider-using-with pipes.append(Popen(full_command, shell=True, stdout=op, stderr=op)) time.sleep(0.5) return pipes @@ -204,6 +205,7 @@ def prepare_for_hung_check(drop_databases: bool) -> bool: continue command = make_query_command(f"DETACH DATABASE {db}") # we don't wait for drop + # pylint:disable-next=consider-using-with Popen(command, shell=True) break except Exception as ex: @@ -212,7 +214,7 @@ def prepare_for_hung_check(drop_databases: bool) -> bool: ) time.sleep(i) else: - raise Exception( + raise RuntimeError( "Cannot drop databases after stress tests. Probably server consumed " "too much memory and cannot execute simple queries" ) @@ -293,7 +295,9 @@ def main(): args = parse_args() if args.drop_databases and not args.hung_check: - raise Exception("--drop-databases only used in hung check (--hung-check)") + raise argparse.ArgumentTypeError( + "--drop-databases only used in hung check (--hung-check)" + ) # FIXME Hung check with ubsan is temporarily disabled due to # https://github.com/ClickHouse/ClickHouse/issues/45372 @@ -359,15 +363,17 @@ def main(): ] ) hung_check_log = args.output_folder / "hung_check.log" # type: Path - tee = Popen(["/usr/bin/tee", hung_check_log], stdin=PIPE) - res = call(cmd, shell=True, stdout=tee.stdin, stderr=STDOUT, timeout=600) - if tee.stdin is not None: - tee.stdin.close() + with Popen(["/usr/bin/tee", hung_check_log], stdin=PIPE) as tee: + res = call(cmd, shell=True, stdout=tee.stdin, stderr=STDOUT, timeout=600) + if tee.stdin is not None: + tee.stdin.close() if res != 0 and have_long_running_queries and not suppress_hung_check: logging.info("Hung check failed with exit code %d", res) else: hung_check_status = "No queries hung\tOK\t\\N\t\n" - with open(args.output_folder / "test_results.tsv", "w+") as results: + with open( + args.output_folder / "test_results.tsv", "w+", encoding="utf-8" + ) as results: results.write(hung_check_status) hung_check_log.unlink() diff --git a/tests/ci/stress_check.py b/tests/ci/stress_check.py index 49c1515c69f..0f2c0cdd222 100644 --- a/tests/ci/stress_check.py +++ b/tests/ci/stress_check.py @@ -95,7 +95,7 @@ def process_results( results_path = result_directory / "test_results.tsv" test_results = read_test_results(results_path, True) if len(test_results) == 0: - raise Exception("Empty results") + raise ValueError("Empty results") except Exception as e: return ( ERROR, diff --git a/tests/ci/style_check.py b/tests/ci/style_check.py index 0c7160aeea4..7fb5d22a3ce 100644 --- a/tests/ci/style_check.py +++ b/tests/ci/style_check.py @@ -3,13 +3,15 @@ import argparse import csv import logging import os +import shutil import subprocess import sys +from concurrent.futures import ProcessPoolExecutor from pathlib import Path from typing import List, Tuple from docker_images_helper import get_docker_image, pull_image -from env_helper import REPO_COPY, TEMP_PATH +from env_helper import CI, REPO_COPY, TEMP_PATH from git_helper import GIT_PREFIX, git_runner from pr_info import PRInfo from report import ERROR, FAILURE, SUCCESS, JobReport, TestResults, read_test_results @@ -43,7 +45,7 @@ def process_result( results_path = result_directory / "test_results.tsv" test_results = read_test_results(results_path) if len(test_results) == 0: - raise Exception("Empty results") + raise ValueError("Empty results") return state, description, test_results, additional_files except Exception: @@ -125,31 +127,72 @@ def main(): repo_path = Path(REPO_COPY) temp_path = Path(TEMP_PATH) + if temp_path.is_dir(): + shutil.rmtree(temp_path) temp_path.mkdir(parents=True, exist_ok=True) pr_info = PRInfo() IMAGE_NAME = "clickhouse/style-test" image = pull_image(get_docker_image(IMAGE_NAME)) - cmd = ( + cmd_cpp = ( f"docker run -u $(id -u ${{USER}}):$(id -g ${{USER}}) --cap-add=SYS_PTRACE " f"--volume={repo_path}:/ClickHouse --volume={temp_path}:/test_output " - f"{image}" + f"--entrypoint= -w/ClickHouse/utils/check-style " + f"{image} ./check_cpp.sh" ) - if args.push: - checkout_head(pr_info) + cmd_py = ( + f"docker run -u $(id -u ${{USER}}):$(id -g ${{USER}}) --cap-add=SYS_PTRACE " + f"--volume={repo_path}:/ClickHouse --volume={temp_path}:/test_output " + f"--entrypoint= -w/ClickHouse/utils/check-style " + f"{image} ./check_py.sh" + ) + + cmd_docs = ( + f"docker run -u $(id -u ${{USER}}):$(id -g ${{USER}}) --cap-add=SYS_PTRACE " + f"--volume={repo_path}:/ClickHouse --volume={temp_path}:/test_output " + f"--entrypoint= -w/ClickHouse/utils/check-style " + f"{image} ./check_docs.sh" + ) + + with ProcessPoolExecutor(max_workers=2) as executor: + logging.info("Run docs files check: %s", cmd_docs) + future = executor.submit(subprocess.run, cmd_docs, shell=True) + # Parallelization does not make it faster - run subsequently + _ = future.result() + + run_cppcheck = True + run_pycheck = True + if CI and pr_info.number > 0: + pr_info.fetch_changed_files() + if not any(file.endswith(".py") for file in pr_info.changed_files): + run_pycheck = False + if all(file.endswith(".py") for file in pr_info.changed_files): + run_cppcheck = False + + if run_cppcheck: + logging.info("Run source files check: %s", cmd_cpp) + future1 = executor.submit(subprocess.run, cmd_cpp, shell=True) + _ = future1.result() + + if run_pycheck: + if args.push: + checkout_head(pr_info) + logging.info("Run py files check: %s", cmd_py) + future2 = executor.submit(subprocess.run, cmd_py, shell=True) + _ = future2.result() + if args.push: + commit_push_staged(pr_info) + checkout_last_ref(pr_info) - logging.info("Is going to run the command: %s", cmd) subprocess.check_call( - cmd, + f"python3 ../../utils/check-style/process_style_check_result.py --in-results-dir {temp_path} " + f"--out-results-file {temp_path}/test_results.tsv --out-status-file {temp_path}/check_status.tsv || " + f'echo -e "failure\tCannot parse results" > {temp_path}/check_status.tsv', shell=True, ) - if args.push: - commit_push_staged(pr_info) - checkout_last_ref(pr_info) - state, description, test_results, additional_files = process_result(temp_path) JobReport( diff --git a/tests/ci/team_keys_lambda/app.py b/tests/ci/team_keys_lambda/app.py index f562fbe101d..c056808a9b4 100644 --- a/tests/ci/team_keys_lambda/app.py +++ b/tests/ci/team_keys_lambda/app.py @@ -2,13 +2,12 @@ import argparse import json - from datetime import datetime from queue import Queue from threading import Thread -import requests # type: ignore import boto3 # type: ignore +import requests class Keys(set): @@ -34,7 +33,7 @@ class Worker(Thread): m = self.queue.get() if m == "": break - response = requests.get(f"https://github.com/{m}.keys") + response = requests.get(f"https://github.com/{m}.keys", timeout=30) self.results.add(f"# {m}\n{response.text}\n") self.queue.task_done() @@ -45,7 +44,9 @@ def get_org_team_members(token: str, org: str, team_slug: str) -> set: "Accept": "application/vnd.github.v3+json", } response = requests.get( - f"https://api.github.com/orgs/{org}/teams/{team_slug}/members", headers=headers + f"https://api.github.com/orgs/{org}/teams/{team_slug}/members", + headers=headers, + timeout=30, ) response.raise_for_status() data = response.json() diff --git a/tests/ci/team_keys_lambda/build_and_deploy_archive.sh b/tests/ci/team_keys_lambda/build_and_deploy_archive.sh index 6ba0987010a..b72bce4a677 100644 --- a/tests/ci/team_keys_lambda/build_and_deploy_archive.sh +++ b/tests/ci/team_keys_lambda/build_and_deploy_archive.sh @@ -29,34 +29,45 @@ if [ -e "$PACKAGE.zip" ] && [ -z "$FORCE" ]; then [ -n "$REBUILD" ] || exit 0 fi +docker_cmd=( + docker run -i --net=host --rm --user="${UID}" -e HOME=/tmp --entrypoint=/bin/bash + --volume="${WORKDIR}/..:/ci" --workdir="/ci/${DIR_NAME}" "${DOCKER_IMAGE}" +) rm -rf "$PACKAGE" "$PACKAGE".zip mkdir "$PACKAGE" cp app.py "$PACKAGE" if [ -f requirements.txt ]; then VENV=lambda-venv rm -rf "$VENV" - docker run --net=host --rm --user="${UID}" -e HOME=/tmp --entrypoint=/bin/bash \ - --volume="${WORKDIR}/..:/ci" --workdir="/ci/${DIR_NAME}" "${DOCKER_IMAGE}" \ - -exc " - '$PY_EXEC' -m venv '$VENV' && - source '$VENV/bin/activate' && - pip install -r requirements.txt && - # To have consistent pyc files - find '$VENV/lib' -name '*.pyc' -delete - find '$VENV/lib' ! -type d -exec touch -t 201212121212 {} + - python -m compileall - " - cp -rT "$VENV/lib/$PY_EXEC/site-packages/" "$PACKAGE" - rm -r "$PACKAGE"/{pip,pip-*,setuptools,setuptools-*} - # zip stores metadata about timestamps - find "$PACKAGE" ! -type d -exec touch -t 201212121212 {} + + "${docker_cmd[@]}" -ex < TextIOWrapper: if self._log_file is None: + # pylint:disable-next=consider-using-with self._log_file = open(self._log_file_name, "w", encoding="utf-8") return self._log_file diff --git a/tests/ci/terminate_runner_lambda/app.py b/tests/ci/terminate_runner_lambda/app.py index 010f7dd6734..ab3e33d9e11 100644 --- a/tests/ci/terminate_runner_lambda/app.py +++ b/tests/ci/terminate_runner_lambda/app.py @@ -8,8 +8,7 @@ from dataclasses import dataclass from typing import Any, Dict, List import boto3 # type: ignore - -from lambda_shared import RunnerDescriptions, list_runners, cached_value_is_valid +from lambda_shared import RunnerDescriptions, cached_value_is_valid, list_runners from lambda_shared.token import get_access_token_by_key_app, get_cached_access_token @@ -134,7 +133,7 @@ def main(access_token: str, event: dict) -> Dict[str, List[str]]: candidates = instances_by_zone[zone] total_to_kill += num_to_kill if num_to_kill > len(candidates): - raise Exception( + raise RuntimeError( f"Required to kill {num_to_kill}, but have only {len(candidates)}" f" candidates in AV {zone}" ) @@ -196,6 +195,7 @@ def main(access_token: str, event: dict) -> Dict[str, List[str]]: def handler(event: dict, context: Any) -> Dict[str, List[str]]: + _ = context return main(get_cached_access_token(), event) @@ -226,7 +226,7 @@ if __name__ == "__main__": if args.private_key: private_key = args.private_key else: - with open(args.private_key_path, "r") as key_file: + with open(args.private_key_path, "r", encoding="utf-8") as key_file: private_key = key_file.read() token = get_access_token_by_key_app(private_key, args.app_id) diff --git a/tests/ci/test_ci_config.py b/tests/ci/test_ci_config.py index 49d49d9c328..badbc4c5dcf 100644 --- a/tests/ci/test_ci_config.py +++ b/tests/ci/test_ci_config.py @@ -1,15 +1,30 @@ #!/usr/bin/env python3 import unittest +from ci_config import CIStages, JobNames, CI_CONFIG, Runners class TestCIConfig(unittest.TestCase): - def test_no_errors_in_ci_config(self): - raised = None - try: - from ci_config import ( # pylint: disable=import-outside-toplevel - CI_CONFIG as _, - ) - except Exception as exc: - raised = exc - self.assertIsNone(raised, f"CI_CONFIG import raised error {raised}") + def test_runner_config(self): + """check runner is provided w/o exception""" + for job in JobNames: + runner = CI_CONFIG.get_runner_type(job) + self.assertIn(runner, Runners) + + def test_job_stage_config(self): + """check runner is provided w/o exception""" + for job in JobNames: + stage = CI_CONFIG.get_job_ci_stage(job) + if job in [ + JobNames.STYLE_CHECK, + JobNames.FAST_TEST, + JobNames.JEPSEN_KEEPER, + JobNames.BUILD_CHECK, + JobNames.BUILD_CHECK_SPECIAL, + ]: + assert ( + stage == CIStages.NA + ), "These jobs are not in CI stages, must be NA" + else: + assert stage != CIStages.NA, f"stage not found for [{job}]" + self.assertIn(stage, CIStages) diff --git a/tests/ci/unit_tests_check.py b/tests/ci/unit_tests_check.py index 41c52d53020..2c2862d926a 100644 --- a/tests/ci/unit_tests_check.py +++ b/tests/ci/unit_tests_check.py @@ -23,7 +23,7 @@ def get_test_name(line): for element in elements: if "(" not in element and ")" not in element: return element - raise Exception(f"No test name in line '{line}'") + raise ValueError(f"No test name in line '{line}'") def process_results( @@ -176,6 +176,7 @@ def main(): run_command = ( f"docker run --cap-add=SYS_PTRACE --volume={tests_binary}:/unit_tests_dbms " + "--security-opt seccomp=unconfined " # required to issue io_uring sys-calls f"--volume={test_output}:/test_output {docker_image}" ) diff --git a/tests/ci/upload_result_helper.py b/tests/ci/upload_result_helper.py index 6fa9c1dd873..9dca3fae1dc 100644 --- a/tests/ci/upload_result_helper.py +++ b/tests/ci/upload_result_helper.py @@ -9,7 +9,7 @@ from env_helper import ( GITHUB_RUN_URL, GITHUB_SERVER_URL, ) -from report import ReportColorTheme, TestResults, create_test_html_report +from report import TestResults, create_test_html_report from s3_helper import S3Helper @@ -92,10 +92,6 @@ def upload_results( else: raw_log_url = GITHUB_JOB_URL() - statuscolors = ( - ReportColorTheme.bugfixcheck if "bugfix validate check" in check_name else None - ) - if test_results or not ready_report_url: html_report = create_test_html_report( check_name, @@ -107,7 +103,6 @@ def upload_results( branch_name, commit_url, additional_urls, - statuscolors=statuscolors, ) report_path = Path("report.html") report_path.write_text(html_report, encoding="utf-8") diff --git a/tests/ci/worker/dockerhub_proxy_template.sh b/tests/ci/worker/dockerhub_proxy_template.sh index 7ca8d581df5..0e375dd5f04 100644 --- a/tests/ci/worker/dockerhub_proxy_template.sh +++ b/tests/ci/worker/dockerhub_proxy_template.sh @@ -1,19 +1,7 @@ #!/usr/bin/env bash set -xeuo pipefail -# Add cloudflare DNS as a fallback -# Get default gateway interface -IFACE=$(ip --json route list | jq '.[]|select(.dst == "default").dev' --raw-output) -# `Link 2 (eth0): 172.31.0.2` -ETH_DNS=$(resolvectl dns "$IFACE") || : -CLOUDFLARE_NS=1.1.1.1 -if [[ "$ETH_DNS" ]] && [[ "${ETH_DNS#*: }" != *"$CLOUDFLARE_NS"* ]]; then - # Cut the leading legend - ETH_DNS=${ETH_DNS#*: } - # shellcheck disable=SC2206 - new_dns=(${ETH_DNS} "$CLOUDFLARE_NS") - resolvectl dns "$IFACE" "${new_dns[@]}" -fi +bash /usr/local/share/scripts/init-network.sh # tune sysctl for network performance cat > /etc/sysctl.d/10-network-memory.conf << EOF diff --git a/tests/ci/worker/init_runner.sh b/tests/ci/worker/init_runner.sh index 017d847739f..b211128cf10 100644 --- a/tests/ci/worker/init_runner.sh +++ b/tests/ci/worker/init_runner.sh @@ -60,19 +60,7 @@ export RUNNER_URL="https://github.com/${RUNNER_ORG}" INSTANCE_ID=$(ec2metadata --instance-id) export INSTANCE_ID -# Add cloudflare DNS as a fallback -# Get default gateway interface -IFACE=$(ip --json route list | jq '.[]|select(.dst == "default").dev' --raw-output) -# `Link 2 (eth0): 172.31.0.2` -ETH_DNS=$(resolvectl dns "$IFACE") || : -CLOUDFLARE_NS=1.1.1.1 -if [[ "$ETH_DNS" ]] && [[ "${ETH_DNS#*: }" != *"$CLOUDFLARE_NS"* ]]; then - # Cut the leading legend - ETH_DNS=${ETH_DNS#*: } - # shellcheck disable=SC2206 - new_dns=(${ETH_DNS} "$CLOUDFLARE_NS") - resolvectl dns "$IFACE" "${new_dns[@]}" -fi +bash /usr/local/share/scripts/init-network.sh # combine labels RUNNER_TYPE=$(/usr/local/bin/aws ec2 describe-tags --filters "Name=resource-id,Values=$INSTANCE_ID" --query "Tags[?Key=='github:runner-type'].Value" --output text) diff --git a/tests/ci/worker/prepare-ci-ami.sh b/tests/ci/worker/prepare-ci-ami.sh index c27d956c834..281dff5b1c2 100644 --- a/tests/ci/worker/prepare-ci-ami.sh +++ b/tests/ci/worker/prepare-ci-ami.sh @@ -9,7 +9,7 @@ set -xeuo pipefail echo "Running prepare script" export DEBIAN_FRONTEND=noninteractive -export RUNNER_VERSION=2.311.0 +export RUNNER_VERSION=2.313.0 export RUNNER_HOME=/home/ubuntu/actions-runner deb_arch() { @@ -138,6 +138,49 @@ dpkg -i /tmp/amazon-cloudwatch-agent.deb aws ssm get-parameter --region us-east-1 --name AmazonCloudWatch-github-runners --query 'Parameter.Value' --output text > /opt/aws/amazon-cloudwatch-agent/etc/amazon-cloudwatch-agent.json systemctl enable amazon-cloudwatch-agent.service + +echo "Install tailscale" +# Build get-authkey for tailscale +docker run --rm -v /usr/local/bin/:/host-local-bin -i golang:alpine sh -ex <<'EOF' + CGO_ENABLED=0 go install -tags tag:svc-core-ci-github tailscale.com/cmd/get-authkey@main + mv /go/bin/get-authkey /host-local-bin +EOF + +# install tailscale +curl -fsSL "https://pkgs.tailscale.com/stable/ubuntu/$(lsb_release -cs).noarmor.gpg" > /usr/share/keyrings/tailscale-archive-keyring.gpg +curl -fsSL "https://pkgs.tailscale.com/stable/ubuntu/$(lsb_release -cs).tailscale-keyring.list" > /etc/apt/sources.list.d/tailscale.list +apt-get update +apt-get install tailscale --yes --no-install-recommends + + +# Create a common script for the instances +mkdir /usr/local/share/scripts -p +cat > /usr/local/share/scripts/init-network.sh << 'EOF' +#!/usr/bin/env bash + +# Add cloudflare DNS as a fallback +# Get default gateway interface +IFACE=$(ip --json route list | jq '.[]|select(.dst == "default").dev' --raw-output) +# `Link 2 (eth0): 172.31.0.2` +ETH_DNS=$(resolvectl dns "$IFACE") || : +CLOUDFLARE_NS=1.1.1.1 +if [[ "$ETH_DNS" ]] && [[ "${ETH_DNS#*: }" != *"$CLOUDFLARE_NS"* ]]; then + # Cut the leading legend + ETH_DNS=${ETH_DNS#*: } + # shellcheck disable=SC2206 + new_dns=(${ETH_DNS} "$CLOUDFLARE_NS") + resolvectl dns "$IFACE" "${new_dns[@]}" +fi + +# Setup tailscale, the very first action +TS_API_CLIENT_ID=$(aws ssm get-parameter --region us-east-1 --name /tailscale/api-client-id --query 'Parameter.Value' --output text --with-decryption) +TS_API_CLIENT_SECRET=$(aws ssm get-parameter --region us-east-1 --name /tailscale/api-client-secret --query 'Parameter.Value' --output text --with-decryption) +export TS_API_CLIENT_ID TS_API_CLIENT_SECRET +TS_AUTHKEY=$(get-authkey -tags tag:svc-core-ci-github -reusable -ephemeral) +tailscale up --ssh --auth-key="$TS_AUTHKEY" --hostname="ci-runner-$INSTANCE_ID" +EOF + + # The following line is used in aws TOE check. touch /var/tmp/clickhouse-ci-ami.success # END OF THE SCRIPT diff --git a/tests/ci/workflow_approve_rerun_lambda/app.py b/tests/ci/workflow_approve_rerun_lambda/app.py index 5e68f2d4b53..8ed47a54ab1 100644 --- a/tests/ci/workflow_approve_rerun_lambda/app.py +++ b/tests/ci/workflow_approve_rerun_lambda/app.py @@ -6,7 +6,7 @@ import time from collections import namedtuple from urllib.parse import quote -import requests # type: ignore +import requests from lambda_shared.pr import TRUSTED_CONTRIBUTORS from lambda_shared.token import get_cached_access_token @@ -90,26 +90,29 @@ def is_trusted_contributor(pr_user_login, pr_user_orgs): def _exec_get_with_retry(url, token): headers = {"Authorization": f"token {token}"} + e = Exception() for i in range(MAX_RETRY): try: - response = requests.get(url, headers=headers) + response = requests.get(url, headers=headers, timeout=30) response.raise_for_status() return response.json() except Exception as ex: print("Got exception executing request", ex) + e = ex time.sleep(i + 1) - raise Exception("Cannot execute GET request with retries") + raise requests.HTTPError("Cannot execute GET request with retries") from e def _exec_post_with_retry(url, token, data=None): headers = {"Authorization": f"token {token}"} + e = Exception() for i in range(MAX_RETRY): try: if data: - response = requests.post(url, headers=headers, json=data) + response = requests.post(url, headers=headers, json=data, timeout=30) else: - response = requests.post(url, headers=headers) + response = requests.post(url, headers=headers, timeout=30) if response.status_code == 403: data = response.json() if ( @@ -123,9 +126,10 @@ def _exec_post_with_retry(url, token, data=None): return response.json() except Exception as ex: print("Got exception executing request", ex) + e = ex time.sleep(i + 1) - raise Exception("Cannot execute POST request with retry") + raise requests.HTTPError("Cannot execute POST request with retry") from e def _get_pull_requests_from(repo_url, owner, branch, token): diff --git a/tests/clickhouse-test b/tests/clickhouse-test index 9c21f1fd2a2..ce0feadf050 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -5,48 +5,46 @@ # pylint: disable=too-many-lines # pylint: disable=anomalous-backslash-in-string -import enum -from queue import Full -import shutil -import itertools -import sys -import os -import os.path -import glob -import platform -import signal -import re import copy -import traceback -import math +import enum +import glob # Not requests, to avoid requiring extra dependency. import http.client -import urllib.parse +import itertools import json +import math +import multiprocessing +import os +import os.path +import platform +import random +import re +import shutil +import signal +import socket +import string +import subprocess +import sys +import traceback +import urllib.parse # for crc32 import zlib - from argparse import ArgumentParser -from typing import Tuple, Union, Optional, Dict, Set, List -import subprocess -from subprocess import Popen -from subprocess import PIPE +from contextlib import closing from datetime import datetime, timedelta -from time import time, sleep from errno import ESRCH +from queue import Full +from subprocess import PIPE, Popen +from time import sleep, time +from typing import Dict, List, Optional, Set, Tuple, Union try: import termcolor # type: ignore except ImportError: termcolor = None -import random -import string -import multiprocessing -import socket -from contextlib import closing USE_JINJA = True try: @@ -70,7 +68,7 @@ TEST_FILE_EXTENSIONS = [".sql", ".sql.j2", ".sh", ".py", ".expect"] VERSION_PATTERN = r"^((\d+\.)?(\d+\.)?(\d+\.)?\d+)$" -def stringhash(s): +def stringhash(s: str) -> int: # default hash() function consistent # only during process invocation https://stackoverflow.com/a/42089311 return zlib.crc32(s.encode("utf-8")) @@ -94,6 +92,10 @@ def trim_for_log(s): return "\n".join(lines) +class TestException(Exception): + pass + + class HTTPError(Exception): def __init__(self, message=None, code=None): self.message = message @@ -250,7 +252,7 @@ def get_db_engine(args, database_name): def get_create_database_settings(args, testcase_args): - create_database_settings = dict() + create_database_settings = {} if testcase_args: create_database_settings["log_comment"] = testcase_args.testcase_basename if args.db_engine == "Ordinary": @@ -682,6 +684,8 @@ class SettingsRandomizer: get_localzone(), ] ), + "use_page_cache_for_disks_without_file_cache": lambda: random.random() < 0.7, + "page_cache_inject_eviction": lambda: random.random() < 0.5, } @staticmethod @@ -1184,7 +1188,7 @@ class TestCase: ) if result_is_different: - diff_proc = Popen( + with Popen( [ "diff", "-U", @@ -1195,23 +1199,23 @@ class TestCase: encoding="latin-1", stdout=PIPE, universal_newlines=True, - ) - if self.show_whitespaces_in_diff: - sed_proc = Popen( - ["sed", "-e", "s/[ \t]\\+$/&$/g"], - stdin=diff_proc.stdout, - stdout=PIPE, - ) - diff_proc.stdout.close() # Allow diff to receive a SIGPIPE if cat exits. - diff = sed_proc.communicate()[0].decode("utf-8", errors="ignore") - else: - diff = diff_proc.communicate()[0] + ) as diff_proc: + if self.show_whitespaces_in_diff: + with Popen( + ["sed", "-e", "s/[ \t]\\+$/&$/g"], + stdin=diff_proc.stdout, + stdout=PIPE, + ) as sed_proc: + diff = sed_proc.communicate()[0].decode( + "utf-8", errors="ignore" + ) + else: + diff = diff_proc.communicate()[0] if diff.startswith("Binary files "): diff += "Content of stdout:\n===================\n" - file = open(self.stdout_file, "rb") - diff += str(file.read()) - file.close() + with open(self.stdout_file, "rb") as file: + diff += str(file.read()) diff += "===================" description += f"\n{diff}\n" if debug_log: @@ -1374,6 +1378,7 @@ class TestCase: command = pattern.format(**params) + # pylint:disable-next=consider-using-with; TODO: fix proc = Popen(command, shell=True, env=os.environ, start_new_session=True) while ( @@ -1540,7 +1545,7 @@ class TestCase: ) if len(leftover_tables) != 0: - raise Exception( + raise TestException( f"The test should cleanup its tables ({leftover_tables}), otherwise it is inconvenient for running it locally." ) @@ -1623,7 +1628,7 @@ class TestSuite: ): return "#" else: - raise Exception(f"Unknown file_extension: {filename}") + raise TestException(f"Unknown file_extension: {filename}") def parse_tags_from_line(line, comment_sign) -> Set[str]: if not line.startswith(comment_sign): @@ -1684,17 +1689,22 @@ class TestSuite: self.suite_tmp_path: str = suite_tmp_path self.suite: str = suite - filter_func = lambda x: True # noqa: ignore E731 - if args.run_by_hash_num is not None and args.run_by_hash_total is not None: if args.run_by_hash_num > args.run_by_hash_total: - raise Exception( + raise TestException( f"Incorrect run by hash, value {args.run_by_hash_num} bigger than total {args.run_by_hash_total}" ) - filter_func = ( - lambda x: stringhash(x) % args.run_by_hash_total == args.run_by_hash_num - ) + def filter_func(x: str) -> bool: + return bool( + stringhash(x) % args.run_by_hash_total == args.run_by_hash_num + ) + + else: + + def filter_func(x: str) -> bool: + _ = x + return True self.all_tests: List[str] = self.get_tests_list( self.tests_in_suite_key_func, filter_func @@ -2416,7 +2426,7 @@ def main(args): pid = get_server_pid() print("Got server pid", pid) print_stacktraces() - raise Exception(msg) + raise TestException(msg) args.build_flags = collect_build_flags(args) args.changed_merge_tree_settings = collect_changed_merge_tree_settings(args) @@ -2620,7 +2630,7 @@ def find_binary(name): if os.access(bin_path, os.X_OK): return bin_path - raise Exception(f"{name} was not found in PATH") + raise TestException(f"{name} was not found in PATH") def find_clickhouse_command(binary, command): diff --git a/tests/config/config.d/azure_storage_conf.xml b/tests/config/config.d/azure_storage_conf.xml index f42bb8e3cf8..412d40111a7 100644 --- a/tests/config/config.d/azure_storage_conf.xml +++ b/tests/config/config.d/azure_storage_conf.xml @@ -2,7 +2,8 @@ - azure_blob_storage + object_storage + azure http://localhost:10000/devstoreaccount1 cont false diff --git a/tests/config/config.d/backoff_failed_mutation.xml b/tests/config/config.d/backoff_failed_mutation.xml new file mode 100644 index 00000000000..c11fbff018b --- /dev/null +++ b/tests/config/config.d/backoff_failed_mutation.xml @@ -0,0 +1,5 @@ + + + 200 + + diff --git a/tests/config/config.d/handlers.yaml b/tests/config/config.d/handlers.yaml new file mode 100644 index 00000000000..c77dd6578c1 --- /dev/null +++ b/tests/config/config.d/handlers.yaml @@ -0,0 +1,8 @@ +http_handlers: + rule: + url: '/upyachka' + empty_query_string: + handler: + type: redirect + location: "/?query=SELECT+'Pepyaka'" + defaults: diff --git a/tests/config/config.d/keeper_port.xml b/tests/config/config.d/keeper_port.xml index b724d5dd87e..2b04d843a3b 100644 --- a/tests/config/config.d/keeper_port.xml +++ b/tests/config/config.d/keeper_port.xml @@ -3,7 +3,7 @@ 9181 1 - 1 + 0 1 @@ -24,6 +24,9 @@ 0 1 + + 1073741824 + 524288000 diff --git a/tests/config/config.d/storage_conf.xml b/tests/config/config.d/storage_conf.xml index 1429dfff724..00d8cb3aea5 100644 --- a/tests/config/config.d/storage_conf.xml +++ b/tests/config/config.d/storage_conf.xml @@ -37,11 +37,13 @@ - local_blob_storage + object_storage + local local_disk/ - local_blob_storage + object_storage + local_blob_storage local_disk_2/ diff --git a/tests/config/config.d/storage_conf_02961.xml b/tests/config/config.d/storage_conf_02961.xml new file mode 100644 index 00000000000..436a5628e51 --- /dev/null +++ b/tests/config/config.d/storage_conf_02961.xml @@ -0,0 +1,43 @@ + + + + + local + disk1_02961/ + + + local + disk2_02961/ + + + local + disk3_02961/ + + + local + disk4_02961/ + + + + + + + disk1_02961 + 2 + + + disk2_02961 + + + disk3_02961 + 1 + + + disk4_02961 + + + 0.2 + + + + diff --git a/tests/config/install.sh b/tests/config/install.sh index cfe810cda84..467636cfa40 100755 --- a/tests/config/install.sh +++ b/tests/config/install.sh @@ -30,6 +30,7 @@ ln -sf $SRC_PATH/config.d/graphite_alternative.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/database_atomic.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/max_concurrent_queries.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/merge_tree_settings.xml $DEST_SERVER_PATH/config.d/ +ln -sf $SRC_PATH/config.d/backoff_failed_mutation.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/merge_tree_old_dirs_cleanup.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/test_cluster_with_incorrect_pw.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/keeper_port.xml $DEST_SERVER_PATH/config.d/ @@ -65,6 +66,7 @@ ln -sf $SRC_PATH/config.d/filesystem_caches_path.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/validate_tcp_client_information.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/zero_copy_destructive_operations.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/block_number.xml $DEST_SERVER_PATH/config.d/ +ln -sf $SRC_PATH/config.d/handlers.yaml $DEST_SERVER_PATH/config.d/ # Not supported with fasttest. if [ "${DEST_SERVER_PATH}" = "/etc/clickhouse-server" ] @@ -134,6 +136,12 @@ fi value=$(($RANDOM % 2)) sed --follow-symlinks -i "s|[01]|$value|" $DEST_SERVER_PATH/config.d/keeper_port.xml +value=$((($RANDOM + 100) * 2048)) +sed --follow-symlinks -i "s|[[:digit:]]\+|$value|" $DEST_SERVER_PATH/config.d/keeper_port.xml + +value=$((($RANDOM + 100) * 2048)) +sed --follow-symlinks -i "s|[[:digit:]]\+|$value|" $DEST_SERVER_PATH/config.d/keeper_port.xml + if [[ -n "$USE_POLYMORPHIC_PARTS" ]] && [[ "$USE_POLYMORPHIC_PARTS" -eq 1 ]]; then ln -sf $SRC_PATH/config.d/polymorphic_parts.xml $DEST_SERVER_PATH/config.d/ fi @@ -178,6 +186,7 @@ if [[ -n "$EXPORT_S3_STORAGE_POLICIES" ]]; then ln -sf $SRC_PATH/config.d/storage_conf.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/storage_conf_02944.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/storage_conf_02963.xml $DEST_SERVER_PATH/config.d/ + ln -sf $SRC_PATH/config.d/storage_conf_02961.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/users.d/s3_cache.xml $DEST_SERVER_PATH/users.d/ ln -sf $SRC_PATH/users.d/s3_cache_new.xml $DEST_SERVER_PATH/users.d/ fi diff --git a/tests/integration/README.md b/tests/integration/README.md index 1b5a0ee8994..ac01c43769e 100644 --- a/tests/integration/README.md +++ b/tests/integration/README.md @@ -76,7 +76,7 @@ docker pull clickhouse/integration-tests-runner Notes: * If you want to run integration tests without `sudo` you have to add your user to docker group `sudo usermod -aG docker $USER`. [More information](https://docs.docker.com/install/linux/linux-postinstall/) about docker configuration. * If you already had run these tests without `./runner` script you may have problems with pytest cache. It can be removed with `rm -r __pycache__ .pytest_cache/`. -* Some tests maybe require a lot of resources (CPU, RAM, etc.). Better not try large tests like `test_cluster_copier` or `test_distributed_ddl*` on your laptop. +* Some tests maybe require a lot of resources (CPU, RAM, etc.). Better not try large tests like `test_distributed_ddl*` on your laptop. You can run tests via `./runner` script and pass pytest arguments as last arg: ``` diff --git a/tests/integration/ci-runner.py b/tests/integration/ci-runner.py index 7c922e339fe..e7f691d2237 100755 --- a/tests/integration/ci-runner.py +++ b/tests/integration/ci-runner.py @@ -11,9 +11,11 @@ import shlex import shutil import string import subprocess +import sys import time import zlib # for crc32 from collections import defaultdict +from itertools import chain from integration_test_images import IMAGES @@ -102,7 +104,7 @@ def get_counters(fname): "SKIPPED": set([]), } - with open(fname, "r") as out: + with open(fname, "r", encoding="utf-8") as out: for line in out: line = line.strip() # Example of log: @@ -118,7 +120,7 @@ def get_counters(fname): # [gw0] [ 7%] ERROR test_mysql_protocol/test.py::test_golang_client # ^^^^^^^^^^^^^ if line.strip().startswith("["): - line = re.sub("^\[[^\[\]]*\] \[[^\[\]]*\] ", "", line) + line = re.sub(r"^\[[^\[\]]*\] \[[^\[\]]*\] ", "", line) line_arr = line.split(" ") if len(line_arr) < 2: @@ -160,7 +162,7 @@ def get_counters(fname): def parse_test_times(fname): read = False description_output = [] - with open(fname, "r") as out: + with open(fname, "r", encoding="utf-8") as out: for line in out: if read and "==" in line: break @@ -196,7 +198,7 @@ def clear_ip_tables_and_restart_daemons(): shell=True, ) except subprocess.CalledProcessError as err: - logging.info("docker kill excepted: " + str(err)) + logging.info("docker kill excepted: %s", str(err)) try: logging.info("Removing all docker containers") @@ -205,7 +207,7 @@ def clear_ip_tables_and_restart_daemons(): shell=True, ) except subprocess.CalledProcessError as err: - logging.info("docker rm excepted: " + str(err)) + logging.info("docker rm excepted: %s", str(err)) # don't restart docker if it's disabled if os.environ.get("CLICKHOUSE_TESTS_RUNNER_RESTART_DOCKER", "1") == "1": @@ -213,7 +215,7 @@ def clear_ip_tables_and_restart_daemons(): logging.info("Stopping docker daemon") subprocess.check_output("service docker stop", shell=True) except subprocess.CalledProcessError as err: - logging.info("docker stop excepted: " + str(err)) + logging.info("docker stop excepted: %s", str(err)) try: for i in range(200): @@ -226,9 +228,9 @@ def clear_ip_tables_and_restart_daemons(): time.sleep(0.5) logging.info("Waiting docker to start, current %s", str(err)) else: - raise Exception("Docker daemon doesn't responding") + raise RuntimeError("Docker daemon doesn't responding") except subprocess.CalledProcessError as err: - logging.info("Can't reload docker: " + str(err)) + logging.info("Can't reload docker: %s", str(err)) iptables_iter = 0 try: @@ -252,9 +254,7 @@ class ClickhouseIntegrationTestsRunner: self.image_versions = self.params["docker_images_with_versions"] self.shuffle_groups = self.params["shuffle_test_groups"] self.flaky_check = "flaky check" in self.params["context_name"] - self.bugfix_validate_check = ( - "bugfix validate check" in self.params["context_name"] - ) + self.bugfix_validate_check = "bugfix" in self.params["context_name"].lower() # if use_tmpfs is not set we assume it to be true, otherwise check self.use_tmpfs = "use_tmpfs" not in self.params or self.params["use_tmpfs"] self.disable_net_host = ( @@ -278,13 +278,14 @@ class ClickhouseIntegrationTestsRunner: def base_path(self): return os.path.join(str(self.result_path), "../") - def should_skip_tests(self): + @staticmethod + def should_skip_tests(): return [] def get_image_with_version(self, name): if name in self.image_versions: return name + ":" + self.image_versions[name] - logging.warn( + logging.warning( "Cannot find image %s in params list %s", name, self.image_versions ) if ":" not in name: @@ -294,7 +295,7 @@ class ClickhouseIntegrationTestsRunner: def get_image_version(self, name: str): if name in self.image_versions: return self.image_versions[name] - logging.warn( + logging.warning( "Cannot find image %s in params list %s", name, self.image_versions ) return "latest" @@ -306,13 +307,9 @@ class ClickhouseIntegrationTestsRunner: image_cmd = self._get_runner_image_cmd(repo_path) cmd = ( - "cd {repo_path}/tests/integration && " - "timeout --signal=KILL 1h ./runner {runner_opts} {image_cmd} --pre-pull --command '{command}' ".format( - repo_path=repo_path, - runner_opts=self._get_runner_opts(), - image_cmd=image_cmd, - command=r""" echo Pre Pull finished """, - ) + f"cd {repo_path}/tests/integration && " + f"timeout --signal=KILL 1h ./runner {self._get_runner_opts()} {image_cmd} " + "--pre-pull --command ' echo Pre Pull finished ' " ) for i in range(5): @@ -324,14 +321,15 @@ class ClickhouseIntegrationTestsRunner: ) return except subprocess.CalledProcessError as err: - logging.info("docker-compose pull failed: " + str(err)) + logging.info("docker-compose pull failed: %s", str(err)) continue logging.error("Pulling images failed for 5 attempts. Will fail the worker.") # We pass specific retcode to to ci/integration_test_check.py to skip status reporting and restart job - exit(13) + sys.exit(13) - def _can_run_with(self, path, opt): - with open(path, "r") as script: + @staticmethod + def _can_run_with(path, opt): + with open(path, "r", encoding="utf-8") as script: for line in script: if opt in line: return True @@ -351,19 +349,23 @@ class ClickhouseIntegrationTestsRunner: logging.info("Package found in %s", full_path) log_name = "install_" + f + ".log" log_path = os.path.join(str(self.path()), log_name) - with open(log_path, "w") as log: - cmd = "dpkg -x {} .".format(full_path) + with open(log_path, "w", encoding="utf-8") as log: + cmd = f"dpkg -x {full_path} ." logging.info("Executing installation cmd %s", cmd) - retcode = subprocess.Popen( + with subprocess.Popen( cmd, shell=True, stderr=log, stdout=log - ).wait() - if retcode == 0: - logging.info("Installation of %s successfull", full_path) - else: - raise Exception("Installation of %s failed", full_path) + ) as proc: + if proc.wait() == 0: + logging.info( + "Installation of %s successfull", full_path + ) + else: + raise RuntimeError( + f"Installation of {full_path} failed" + ) break else: - raise Exception("Package with {} not found".format(package)) + raise FileNotFoundError(f"Package with {package} not found") # logging.info("Unstripping binary") # logging.info( # "Unstring %s", @@ -389,11 +391,11 @@ class ClickhouseIntegrationTestsRunner: os.getenv("CLICKHOUSE_TESTS_LIBRARY_BRIDGE_BIN_PATH"), ) - def _compress_logs(self, dir, relpaths, result_path): + @staticmethod + def _compress_logs(directory, relpaths, result_path): retcode = subprocess.call( # STYLE_CHECK_ALLOW_SUBPROCESS_CHECK_CALL - "tar --use-compress-program='zstd --threads=0' -cf {} -C {} {}".format( - result_path, dir, " ".join(relpaths) - ), + f"tar --use-compress-program='zstd --threads=0' -cf {result_path} -C " + f"{directory} {' '.join(relpaths)}", shell=True, ) # tar return 1 when the files are changed on compressing, we ignore it @@ -445,26 +447,25 @@ class ClickhouseIntegrationTestsRunner: return list(sorted(all_tests)) - def _get_parallel_tests_skip_list(self, repo_path): - skip_list_file_path = "{}/tests/integration/parallel_skip.json".format( - repo_path - ) + @staticmethod + def _get_parallel_tests_skip_list(repo_path): + skip_list_file_path = f"{repo_path}/tests/integration/parallel_skip.json" if ( not os.path.isfile(skip_list_file_path) or os.path.getsize(skip_list_file_path) == 0 ): - raise Exception( - "There is something wrong with getting all tests list: file '{}' is empty or does not exist.".format( - skip_list_file_path - ) + raise ValueError( + "There is something wrong with getting all tests list: " + f"file '{skip_list_file_path}' is empty or does not exist." ) skip_list_tests = [] - with open(skip_list_file_path, "r") as skip_list_file: + with open(skip_list_file_path, "r", encoding="utf-8") as skip_list_file: skip_list_tests = json.load(skip_list_file) return list(sorted(skip_list_tests)) - def group_test_by_file(self, tests): + @staticmethod + def group_test_by_file(tests): result = {} for test in tests: test_file = test.split("::")[0] @@ -473,7 +474,8 @@ class ClickhouseIntegrationTestsRunner: result[test_file].append(test) return result - def _update_counters(self, main_counters, current_counters, broken_tests): + @staticmethod + def _update_counters(main_counters, current_counters, broken_tests): for test in current_counters["PASSED"]: if test not in main_counters["PASSED"]: if test in main_counters["FAILED"]: @@ -513,21 +515,23 @@ class ClickhouseIntegrationTestsRunner: logging.info( "Can run with custom docker image version %s", runner_version ) - image_cmd += " --docker-image-version={} ".format(runner_version) + image_cmd += f" --docker-image-version={runner_version} " else: if self._can_run_with( os.path.join(repo_path, "tests/integration", "runner"), "--docker-compose-images-tags", ): - image_cmd += "--docker-compose-images-tags={} ".format( - self.get_image_with_version(img) + image_cmd += ( + "--docker-compose-images-tags=" + f"{self.get_image_with_version(img)} " ) else: image_cmd = "" logging.info("Cannot run with custom docker image version :(") return image_cmd - def _find_test_data_dirs(self, repo_path, test_names): + @staticmethod + def _find_test_data_dirs(repo_path, test_names): relpaths = {} for test_name in test_names: if "/" in test_name: @@ -545,7 +549,8 @@ class ClickhouseIntegrationTestsRunner: relpaths[relpath] = mtime return relpaths - def _get_test_data_dirs_difference(self, new_snapshot, old_snapshot): + @staticmethod + def _get_test_data_dirs_difference(new_snapshot, old_snapshot): res = set() for path in new_snapshot: if (path not in old_snapshot) or (old_snapshot[path] != new_snapshot[path]): @@ -571,7 +576,7 @@ class ClickhouseIntegrationTestsRunner: broken_tests, ) except Exception as e: - logging.info("Failed to run {}:\n{}".format(str(test_group), str(e))) + logging.info("Failed to run %s:\n%s", test_group, e) counters = { "ERROR": [], "PASSED": [], @@ -632,31 +637,27 @@ class ClickhouseIntegrationTestsRunner: info_path = os.path.join(repo_path, "tests/integration", info_basename) test_cmd = " ".join([shlex.quote(test) for test in sorted(test_names)]) - parallel_cmd = ( - " --parallel {} ".format(num_workers) if num_workers > 0 else "" - ) + parallel_cmd = f" --parallel {num_workers} " if num_workers > 0 else "" # -r -- show extra test summary: # -f -- (f)ailed # -E -- (E)rror # -p -- (p)assed # -s -- (s)kipped - cmd = "cd {}/tests/integration && timeout --signal=KILL 1h ./runner {} {} -t {} {} -- -rfEps --run-id={} --color=no --durations=0 {} | tee {}".format( - repo_path, - self._get_runner_opts(), - image_cmd, - test_cmd, - parallel_cmd, - i, - _get_deselect_option(self.should_skip_tests()), - info_path, + cmd = ( + f"cd {repo_path}/tests/integration && " + f"timeout --signal=KILL 1h ./runner {self._get_runner_opts()} " + f"{image_cmd} -t {test_cmd} {parallel_cmd} -- -rfEps --run-id={i} " + f"--color=no --durations=0 {_get_deselect_option(self.should_skip_tests())} " + f"| tee {info_path}" ) log_basename = test_group_str + "_" + str(i) + ".log" log_path = os.path.join(repo_path, "tests/integration", log_basename) - with open(log_path, "w") as log: + with open(log_path, "w", encoding="utf-8") as log: logging.info("Executing cmd: %s", cmd) # ignore retcode, since it meaningful due to pipe to tee - subprocess.Popen(cmd, shell=True, stderr=log, stdout=log).wait() + with subprocess.Popen(cmd, shell=True, stderr=log, stdout=log) as proc: + proc.wait() extra_logs_names = [log_basename] log_result_path = os.path.join( @@ -747,11 +748,14 @@ class ClickhouseIntegrationTestsRunner: # want to mark them as error so we filter by '::'. for test in tests_in_group: if ( - test not in counters["PASSED"] - and test not in counters["ERROR"] - and test not in counters["SKIPPED"] - and test not in counters["FAILED"] - and test not in counters["BROKEN"] + test + not in chain( + counters["PASSED"], + counters["ERROR"], + counters["SKIPPED"], + counters["FAILED"], + counters["BROKEN"], + ) and "::" in test ): counters["ERROR"].append(test) @@ -816,7 +820,7 @@ class ClickhouseIntegrationTestsRunner: ( c + " (✕" + str(final_retry) + ")", text_state, - "{:.2f}".format(tests_times[c]), + f"{tests_times[c]:.2f}", ) for c in counters[state] ] @@ -838,7 +842,7 @@ class ClickhouseIntegrationTestsRunner: self._install_clickhouse(build_path) logging.info("Pulling images") - runner._pre_pull_images(repo_path) + self._pre_pull_images(repo_path) logging.info( "Dump iptables before run %s", @@ -911,11 +915,15 @@ class ClickhouseIntegrationTestsRunner: logging.info("Shuffling test groups") random.shuffle(items_to_run) - broken_tests = list() + broken_tests = [] if self.use_analyzer: - with open(f"{repo_path}/tests/analyzer_integration_broken_tests.txt") as f: + with open( + f"{repo_path}/tests/analyzer_integration_broken_tests.txt", + "r", + encoding="utf-8", + ) as f: broken_tests = f.read().splitlines() - logging.info(f"Broken tests in the list: {len(broken_tests)}") + logging.info("Broken tests in the list: %s", len(broken_tests)) for group, tests in items_to_run: logging.info("Running test group %s containing %s tests", group, len(tests)) @@ -967,12 +975,12 @@ class ClickhouseIntegrationTestsRunner: else: text_state = state test_result += [ - (c, text_state, "{:.2f}".format(tests_times[c]), tests_log_paths[c]) + (c, text_state, f"{tests_times[c]:.2f}", tests_log_paths[c]) for c in counters[state] ] failed_sum = len(counters["FAILED"]) + len(counters["ERROR"]) - status_text = "fail: {}, passed: {}".format(failed_sum, len(counters["PASSED"])) + status_text = f"fail: {failed_sum}, passed: {len(counters['PASSED'])}" if self.soft_deadline_time < time.time(): status_text = "Timeout, " + status_text @@ -989,10 +997,10 @@ class ClickhouseIntegrationTestsRunner: def write_results(results_file, status_file, results, status): - with open(results_file, "w") as f: + with open(results_file, "w", encoding="utf-8") as f: out = csv.writer(f, delimiter="\t") out.writerows(results) - with open(status_file, "w") as f: + with open(status_file, "w", encoding="utf-8") as f: out = csv.writer(f, delimiter="\t") out.writerow(status) @@ -1005,7 +1013,8 @@ if __name__ == "__main__": result_path = os.environ.get("CLICKHOUSE_TESTS_RESULT_PATH") params_path = os.environ.get("CLICKHOUSE_TESTS_JSON_PARAMS_PATH") - params = json.loads(open(params_path, "r").read()) + with open(params_path, "r", encoding="utf-8") as jfd: + params = json.loads(jfd.read()) runner = ClickhouseIntegrationTestsRunner(result_path, params) logging.info("Running tests") diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index 6e76270c607..a4b18ff523a 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -1,10 +1,11 @@ -from helpers.cluster import run_and_check -import pytest +#!/usr/bin/env python3 + import logging import os -from helpers.test_tools import TSV -from helpers.network import _NetworkManager +import pytest # pylint:disable=import-error; for style check +from helpers.cluster import run_and_check +from helpers.network import _NetworkManager # This is a workaround for a problem with logging in pytest [1]. # @@ -32,32 +33,35 @@ def tune_local_port_range(): def cleanup_environment(): try: if int(os.environ.get("PYTEST_CLEANUP_CONTAINERS", 0)) == 1: - logging.debug(f"Cleaning all iptables rules") + logging.debug("Cleaning all iptables rules") _NetworkManager.clean_all_user_iptables_rules() result = run_and_check(["docker ps | wc -l"], shell=True) if int(result) > 1: if int(os.environ.get("PYTEST_CLEANUP_CONTAINERS", 0)) != 1: logging.warning( - f"Docker containters({int(result)}) are running before tests run. They can be left from previous pytest run and cause test failures.\n" - "You can set env PYTEST_CLEANUP_CONTAINERS=1 or use runner with --cleanup-containers argument to enable automatic containers cleanup." + "Docker containters(%s) are running before tests run. " + "They can be left from previous pytest run and cause test failures.\n" + "You can set env PYTEST_CLEANUP_CONTAINERS=1 or use runner with " + "--cleanup-containers argument to enable automatic containers cleanup.", + int(result), ) else: logging.debug("Trying to kill unstopped containers...") run_and_check( - [f"docker kill $(docker container list --all --quiet)"], + ["docker kill $(docker container list --all --quiet)"], shell=True, nothrow=True, ) run_and_check( - [f"docker rm $docker container list --all --quiet)"], + ["docker rm $docker container list --all --quiet)"], shell=True, nothrow=True, ) logging.debug("Unstopped containers killed") r = run_and_check(["docker-compose", "ps", "--services", "--all"]) - logging.debug(f"Docker ps before start:{r.stdout}") + logging.debug("Docker ps before start:%s", r.stdout) else: - logging.debug(f"No running containers") + logging.debug("No running containers") logging.debug("Pruning Docker networks") run_and_check( @@ -66,8 +70,7 @@ def cleanup_environment(): nothrow=True, ) except Exception as e: - logging.exception(f"cleanup_environment:{str(e)}") - pass + logging.exception("cleanup_environment:%s", e) yield diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index 1d96563251b..52c0d8a8ee5 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -70,6 +70,11 @@ CLICKHOUSE_LOG_FILE = "/var/log/clickhouse-server/clickhouse-server.log" CLICKHOUSE_ERROR_LOG_FILE = "/var/log/clickhouse-server/clickhouse-server.err.log" +# Minimum version we use in integration tests to check compatibility with old releases +# Keep in mind that we only support upgrading between releases that are at most 1 year different. +# This means that this minimum need to be, at least, 1 year older than the current release +CLICKHOUSE_CI_MIN_TESTED_VERSION = "22.8" + # to create docker-compose env file def _create_env_file(path, variables): @@ -104,8 +109,8 @@ def run_and_check( res = subprocess.run( args, stdout=stdout, stderr=stderr, env=env, shell=shell, timeout=timeout ) - out = res.stdout.decode("utf-8") - err = res.stderr.decode("utf-8") + out = res.stdout.decode("utf-8", "ignore") + err = res.stderr.decode("utf-8", "ignore") # check_call(...) from subprocess does not print stderr, so we do it manually for outline in out.splitlines(): logging.debug(f"Stdout:{outline}") @@ -465,7 +470,7 @@ class ClickHouseCluster: self.base_cmd += ["--project-name", self.project_name] self.base_zookeeper_cmd = None - self.base_mysql_cmd = [] + self.base_mysql57_cmd = [] self.base_kafka_cmd = [] self.base_kerberized_kafka_cmd = [] self.base_kerberos_kdc_cmd = [] @@ -479,7 +484,7 @@ class ClickHouseCluster: self.with_zookeeper = False self.with_zookeeper_secure = False self.with_mysql_client = False - self.with_mysql = False + self.with_mysql57 = False self.with_mysql8 = False self.with_mysql_cluster = False self.with_postgres = False @@ -644,12 +649,19 @@ class ClickHouseCluster: self.mysql_client_host = "mysql_client" self.mysql_client_container = None - # available when with_mysql == True - self.mysql_host = "mysql57" - self.mysql_port = 3306 - self.mysql_ip = None - self.mysql_dir = p.abspath(p.join(self.instances_dir, "mysql")) - self.mysql_logs_dir = os.path.join(self.mysql_dir, "logs") + # available when with_mysql57 == True + self.mysql57_host = "mysql57" + self.mysql57_port = 3306 + self.mysql57_ip = None + self.mysql57_dir = p.abspath(p.join(self.instances_dir, "mysql")) + self.mysql57_logs_dir = os.path.join(self.mysql57_dir, "logs") + + # available when with_mysql8 == True + self.mysql8_host = "mysql80" + self.mysql8_port = 3306 + self.mysql8_ip = None + self.mysql8_dir = p.abspath(p.join(self.instances_dir, "mysql8")) + self.mysql8_logs_dir = os.path.join(self.mysql8_dir, "logs") # available when with_mysql_cluster == True self.mysql2_host = "mysql2" @@ -659,14 +671,7 @@ class ClickHouseCluster: self.mysql3_ip = None self.mysql4_ip = None self.mysql_cluster_dir = p.abspath(p.join(self.instances_dir, "mysql")) - self.mysql_cluster_logs_dir = os.path.join(self.mysql_dir, "logs") - - # available when with_mysql8 == True - self.mysql8_host = "mysql80" - self.mysql8_port = 3306 - self.mysql8_ip = None - self.mysql8_dir = p.abspath(p.join(self.instances_dir, "mysql8")) - self.mysql8_logs_dir = os.path.join(self.mysql8_dir, "logs") + self.mysql_cluster_logs_dir = os.path.join(self.mysql8_dir, "logs") # available when with_zookeper_secure == True self.zookeeper_secure_port = 2281 @@ -1045,17 +1050,17 @@ class ClickHouseCluster: return self.base_mysql_client_cmd - def setup_mysql_cmd(self, instance, env_variables, docker_compose_yml_dir): - self.with_mysql = True - env_variables["MYSQL_HOST"] = self.mysql_host - env_variables["MYSQL_PORT"] = str(self.mysql_port) + def setup_mysql57_cmd(self, instance, env_variables, docker_compose_yml_dir): + self.with_mysql57 = True + env_variables["MYSQL_HOST"] = self.mysql57_host + env_variables["MYSQL_PORT"] = str(self.mysql57_port) env_variables["MYSQL_ROOT_HOST"] = "%" - env_variables["MYSQL_LOGS"] = self.mysql_logs_dir + env_variables["MYSQL_LOGS"] = self.mysql57_logs_dir env_variables["MYSQL_LOGS_FS"] = "bind" self.base_cmd.extend( ["--file", p.join(docker_compose_yml_dir, "docker_compose_mysql.yml")] ) - self.base_mysql_cmd = [ + self.base_mysql57_cmd = [ "docker-compose", "--env-file", instance.env_file, @@ -1065,7 +1070,7 @@ class ClickHouseCluster: p.join(docker_compose_yml_dir, "docker_compose_mysql.yml"), ] - return self.base_mysql_cmd + return self.base_mysql57_cmd def setup_mysql8_cmd(self, instance, env_variables, docker_compose_yml_dir): self.with_mysql8 = True @@ -1091,7 +1096,7 @@ class ClickHouseCluster: def setup_mysql_cluster_cmd(self, instance, env_variables, docker_compose_yml_dir): self.with_mysql_cluster = True - env_variables["MYSQL_CLUSTER_PORT"] = str(self.mysql_port) + env_variables["MYSQL_CLUSTER_PORT"] = str(self.mysql8_port) env_variables["MYSQL_CLUSTER_ROOT_HOST"] = "%" env_variables["MYSQL_CLUSTER_LOGS"] = self.mysql_cluster_logs_dir env_variables["MYSQL_CLUSTER_LOGS_FS"] = "bind" @@ -1572,7 +1577,7 @@ class ClickHouseCluster: with_zookeeper=False, with_zookeeper_secure=False, with_mysql_client=False, - with_mysql=False, + with_mysql57=False, with_mysql8=False, with_mysql_cluster=False, with_kafka=False, @@ -1676,7 +1681,7 @@ class ClickHouseCluster: with_zookeeper=with_zookeeper, zookeeper_config_path=self.zookeeper_config_path, with_mysql_client=with_mysql_client, - with_mysql=with_mysql, + with_mysql57=with_mysql57, with_mysql8=with_mysql8, with_mysql_cluster=with_mysql_cluster, with_kafka=with_kafka, @@ -1767,9 +1772,9 @@ class ClickHouseCluster: ) ) - if with_mysql and not self.with_mysql: + if with_mysql57 and not self.with_mysql57: cmds.append( - self.setup_mysql_cmd(instance, env_variables, docker_compose_yml_dir) + self.setup_mysql57_cmd(instance, env_variables, docker_compose_yml_dir) ) if with_mysql8 and not self.with_mysql8: @@ -1805,9 +1810,9 @@ class ClickHouseCluster: if with_odbc_drivers and not self.with_odbc_drivers: self.with_odbc_drivers = True - if not self.with_mysql: + if not self.with_mysql8: cmds.append( - self.setup_mysql_cmd( + self.setup_mysql8_cmd( instance, env_variables, docker_compose_yml_dir ) ) @@ -2148,8 +2153,8 @@ class ClickHouseCluster: logging.error("Can't connect to MySQL Client:{}".format(errors)) raise Exception("Cannot wait MySQL Client container") - def wait_mysql_to_start(self, timeout=180): - self.mysql_ip = self.get_instance_ip("mysql57") + def wait_mysql57_to_start(self, timeout=180): + self.mysql57_ip = self.get_instance_ip("mysql57") start = time.time() errors = [] while time.time() - start < timeout: @@ -2157,8 +2162,8 @@ class ClickHouseCluster: conn = pymysql.connect( user=mysql_user, password=mysql_pass, - host=self.mysql_ip, - port=self.mysql_port, + host=self.mysql57_ip, + port=self.mysql57_port, ) conn.close() logging.debug("Mysql Started") @@ -2205,7 +2210,7 @@ class ClickHouseCluster: user=mysql_user, password=mysql_pass, host=ip, - port=self.mysql_port, + port=self.mysql8_port, ) conn.close() logging.debug(f"Mysql Started {ip}") @@ -2752,15 +2757,15 @@ class ClickHouseCluster: subprocess_check_call(self.base_mysql_client_cmd + common_opts) self.wait_mysql_client_to_start() - if self.with_mysql and self.base_mysql_cmd: + if self.with_mysql57 and self.base_mysql57_cmd: logging.debug("Setup MySQL") - if os.path.exists(self.mysql_dir): - shutil.rmtree(self.mysql_dir) - os.makedirs(self.mysql_logs_dir) - os.chmod(self.mysql_logs_dir, stat.S_IRWXU | stat.S_IRWXO) - subprocess_check_call(self.base_mysql_cmd + common_opts) + if os.path.exists(self.mysql57_dir): + shutil.rmtree(self.mysql57_dir) + os.makedirs(self.mysql57_logs_dir) + os.chmod(self.mysql57_logs_dir, stat.S_IRWXU | stat.S_IRWXO) + subprocess_check_call(self.base_mysql57_cmd + common_opts) self.up_called = True - self.wait_mysql_to_start() + self.wait_mysql57_to_start() if self.with_mysql8 and self.base_mysql8_cmd: logging.debug("Setup MySQL 8") @@ -2775,7 +2780,7 @@ class ClickHouseCluster: print("Setup MySQL") if os.path.exists(self.mysql_cluster_dir): shutil.rmtree(self.mysql_cluster_dir) - os.makedirs(self.mysql_cluster_logs_dir) + os.makedirs(self.mysql_cluster_logs_dir, exist_ok=True) os.chmod(self.mysql_cluster_logs_dir, stat.S_IRWXU | stat.S_IRWXO) subprocess_check_call(self.base_mysql_cluster_cmd + common_opts) @@ -3239,7 +3244,7 @@ class ClickHouseInstance: with_zookeeper, zookeeper_config_path, with_mysql_client, - with_mysql, + with_mysql57, with_mysql8, with_mysql_cluster, with_kafka, @@ -3324,7 +3329,7 @@ class ClickHouseInstance: self.library_bridge_bin_path = library_bridge_bin_path self.with_mysql_client = with_mysql_client - self.with_mysql = with_mysql + self.with_mysql57 = with_mysql57 self.with_mysql8 = with_mysql8 self.with_mysql_cluster = with_mysql_cluster self.with_postgres = with_postgres @@ -3368,7 +3373,7 @@ class ClickHouseInstance: self.env_file = self.cluster.env_file if with_odbc_drivers: self.odbc_ini_path = self.path + "/odbc.ini:/etc/odbc.ini" - self.with_mysql = True + self.with_mysql8 = True else: self.odbc_ini_path = "" @@ -3484,6 +3489,11 @@ class ClickHouseInstance: if check_callback(result): return result time.sleep(sleep_time) + except QueryRuntimeException as ex: + # Container is down, this is likely due to server crash. + if "No route to host" in str(ex): + raise + time.sleep(sleep_time) except Exception as ex: # logging.debug("Retry {} got exception {}".format(i + 1, ex)) time.sleep(sleep_time) @@ -4294,7 +4304,7 @@ class ClickHouseInstance: "Database": odbc_mysql_db, "Uid": odbc_mysql_uid, "Pwd": odbc_mysql_pass, - "Server": self.cluster.mysql_host, + "Server": self.cluster.mysql8_host, }, "PostgreSQL": { "DSN": "postgresql_odbc", @@ -4482,14 +4492,14 @@ class ClickHouseInstance: if self.with_mysql_client: depends_on.append(self.cluster.mysql_client_host) - if self.with_mysql: + if self.with_mysql57: depends_on.append("mysql57") if self.with_mysql8: depends_on.append("mysql80") if self.with_mysql_cluster: - depends_on.append("mysql57") + depends_on.append("mysql80") depends_on.append("mysql2") depends_on.append("mysql3") depends_on.append("mysql4") diff --git a/tests/integration/helpers/external_sources.py b/tests/integration/helpers/external_sources.py index cccf151e73e..033a2f84fa2 100644 --- a/tests/integration/helpers/external_sources.py +++ b/tests/integration/helpers/external_sources.py @@ -119,7 +119,7 @@ class SourceMySQL(ExternalSource): def prepare(self, structure, table_name, cluster): if self.internal_hostname is None: - self.internal_hostname = cluster.mysql_ip + self.internal_hostname = cluster.mysql8_ip self.create_mysql_conn() self.execute_mysql_query( "create database if not exists test default character set 'utf8'" diff --git a/tests/integration/helpers/keeper_config1.xml b/tests/integration/helpers/keeper_config1.xml index 12c6c0b78b6..a4a1059ffe9 100644 --- a/tests/integration/helpers/keeper_config1.xml +++ b/tests/integration/helpers/keeper_config1.xml @@ -9,11 +9,13 @@ /var/log/clickhouse-keeper/clickhouse-keeper.err.log + + 0 + az-zoo1 + + 2181 - - az-zoo1 - 1 diff --git a/tests/integration/helpers/keeper_config2.xml b/tests/integration/helpers/keeper_config2.xml index 2afff2f5e59..88a0d1f0b4b 100644 --- a/tests/integration/helpers/keeper_config2.xml +++ b/tests/integration/helpers/keeper_config2.xml @@ -9,13 +9,14 @@ /var/log/clickhouse-keeper/clickhouse-keeper.err.log + + 0 + az-zoo2 + + 2181 2 - - az-zoo2 - 1 - 10000 diff --git a/tests/integration/helpers/mock_servers.py b/tests/integration/helpers/mock_servers.py index e4655ffeeaf..f2181d85e12 100644 --- a/tests/integration/helpers/mock_servers.py +++ b/tests/integration/helpers/mock_servers.py @@ -33,7 +33,7 @@ def start_mock_servers(cluster, script_dir, mocks, timeout=100): cluster.exec_in_container( container_id, - ["python", server_name, str(port)], + ["python3", server_name, str(port)], detach=True, ) diff --git a/tests/integration/runner b/tests/integration/runner index b1193b5b471..f1d5198f545 100755 --- a/tests/integration/runner +++ b/tests/integration/runner @@ -39,9 +39,7 @@ def check_args_and_update_paths(args): else: CLICKHOUSE_ROOT = args.clickhouse_root else: - logging.info( - "ClickHouse root is not set. Will use %s" % (DEFAULT_CLICKHOUSE_ROOT) - ) + logging.info("ClickHouse root is not set. Will use %s", DEFAULT_CLICKHOUSE_ROOT) CLICKHOUSE_ROOT = DEFAULT_CLICKHOUSE_ROOT if not os.path.isabs(args.binary): @@ -74,9 +72,7 @@ def check_args_and_update_paths(args): args.base_configs_dir = os.path.abspath( os.path.join(CLICKHOUSE_ROOT, CONFIG_DIR_IN_REPO) ) - logging.info( - "Base configs dir is not set. Will use %s" % (args.base_configs_dir) - ) + logging.info("Base configs dir is not set. Will use %s", args.base_configs_dir) if args.cases_dir: if not os.path.isabs(args.cases_dir): @@ -87,7 +83,7 @@ def check_args_and_update_paths(args): args.cases_dir = os.path.abspath( os.path.join(CLICKHOUSE_ROOT, INTEGRATION_DIR_IN_REPO) ) - logging.info("Cases dir is not set. Will use %s" % (args.cases_dir)) + logging.info("Cases dir is not set. Will use %s", args.cases_dir) if args.utils_dir: if not os.path.isabs(args.utils_dir): @@ -98,12 +94,13 @@ def check_args_and_update_paths(args): args.utils_dir = os.path.abspath( os.path.join(CLICKHOUSE_ROOT, UTILS_DIR_IN_REPO) ) - logging.info("utils dir is not set. Will use %s" % (args.utils_dir)) + logging.info("utils dir is not set. Will use %s", args.utils_dir) logging.info( - "base_configs_dir: {}, binary: {}, cases_dir: {} ".format( - args.base_configs_dir, args.binary, args.cases_dir - ) + "base_configs_dir: %s, binary: %s, cases_dir: %s ", + args.base_configs_dir, + args.binary, + args.cases_dir, ) for path in [ @@ -115,7 +112,7 @@ def check_args_and_update_paths(args): CLICKHOUSE_ROOT, ]: if not os.path.exists(path): - raise Exception("Path {} doesn't exist".format(path)) + raise FileNotFoundError(f"Path {path} doesn't exist") if args.dockerd_volume: if not os.path.isabs(args.dockerd_volume): @@ -126,21 +123,22 @@ def check_args_and_update_paths(args): if (not os.path.exists(os.path.join(args.base_configs_dir, "config.xml"))) and ( not os.path.exists(os.path.join(args.base_configs_dir, "config.yaml")) ): - raise Exception( - "No config.xml or config.yaml in {}".format(args.base_configs_dir) + raise FileNotFoundError( + f"No config.xml or config.yaml in {args.base_configs_dir}" ) if (not os.path.exists(os.path.join(args.base_configs_dir, "users.xml"))) and ( not os.path.exists(os.path.join(args.base_configs_dir, "users.yaml")) ): - raise Exception( - "No users.xml or users.yaml in {}".format(args.base_configs_dir) + raise FileNotFoundError( + f"No users.xml or users.yaml in {args.base_configs_dir}" ) def docker_kill_handler_handler(signum, frame): + _, _ = signum, frame subprocess.check_call( - "docker ps --all --quiet --filter name={name}".format(name=CONTAINER_NAME), + f"docker ps --all --quiet --filter name={CONTAINER_NAME}", shell=True, ) raise KeyboardInterrupt("Killed by Ctrl+C") @@ -318,7 +316,7 @@ if __name__ == "__main__": parallel_args = "" if args.parallel: parallel_args += "--dist=loadfile" - parallel_args += " -n {}".format(args.parallel) + parallel_args += f" -n {args.parallel}".format() rand_args = "" # if not args.no_random: @@ -326,7 +324,7 @@ if __name__ == "__main__": net = "" if args.network: - net = "--net={}".format(args.network) + net = f"--net={args.network}" elif not args.disable_net_host: net = "--net=host" @@ -350,9 +348,7 @@ if __name__ == "__main__": dockerd_internal_volume = "--tmpfs /var/lib/docker -e DOCKER_RAMDISK=true" elif args.dockerd_volume: dockerd_internal_volume = ( - "--mount type=bind,source={},target=/var/lib/docker".format( - args.dockerd_volume - ) + f"--mount type=bind,source={args.dockerd_volume},target=/var/lib/docker" ) else: try: diff --git a/tests/integration/test_attach_backup_from_s3_plain/configs/disk_s3.xml b/tests/integration/test_attach_backup_from_s3_plain/configs/disk_s3.xml index 779e4b6ae21..2edabc76c8b 100644 --- a/tests/integration/test_attach_backup_from_s3_plain/configs/disk_s3.xml +++ b/tests/integration/test_attach_backup_from_s3_plain/configs/disk_s3.xml @@ -8,9 +8,16 @@ minio minio123 + + object_storage + local_blob_storage + plain + /local_plain/ + backup_disk_s3_plain + backup_disk_local_plain diff --git a/tests/integration/test_attach_backup_from_s3_plain/test.py b/tests/integration/test_attach_backup_from_s3_plain/test.py index e575c487b7a..c2f8936b82c 100644 --- a/tests/integration/test_attach_backup_from_s3_plain/test.py +++ b/tests/integration/test_attach_backup_from_s3_plain/test.py @@ -21,16 +21,55 @@ def start_cluster(): cluster.shutdown() +s3_disk_def = """disk(type=s3_plain, + endpoint='http://minio1:9001/root/data/disks/disk_s3_plain/{}/', + access_key_id='minio', + secret_access_key='minio123');""" + +local_disk_def = "disk(type=object_storage, object_storage_type = 'local_blob_storage', metadata_type = 'plain', path = '/local_plain/{}/');" + + @pytest.mark.parametrize( - "table_name,backup_name,storage_policy,min_bytes_for_wide_part", + "table_name,backup_name,storage_policy,disk_def,min_bytes_for_wide_part", [ pytest.param( - "compact", "backup_compact", "s3_backup_compact", int(1e9), id="compact" + "compact", + "backup_compact_s3", + "backup_disk_s3_plain", + s3_disk_def, + int(1e9), + id="compact", + ), + pytest.param( + "wide", + "backup_wide_s3", + "backup_disk_s3_plain", + s3_disk_def, + int(0), + id="wide", + ), + pytest.param( + "compact", + "backup_compact_local", + "backup_disk_local_plain", + local_disk_def, + int(1e9), + id="compact", + ), + pytest.param( + "wide", + "backup_wide_local", + "backup_disk_local_plain", + local_disk_def, + int(0), + id="wide", ), - pytest.param("wide", "backup_wide", "s3_backup_wide", int(0), id="wide"), ], ) -def test_attach_part(table_name, backup_name, storage_policy, min_bytes_for_wide_part): +def test_attach_part( + table_name, backup_name, storage_policy, disk_def, min_bytes_for_wide_part +): + disk_definition = disk_def.format(backup_name) node.query( f""" -- Catch any errors (NOTE: warnings are ok) @@ -45,7 +84,7 @@ def test_attach_part(table_name, backup_name, storage_policy, min_bytes_for_wide settings min_bytes_for_wide_part={min_bytes_for_wide_part} as select number%5 part, number key from numbers(100); - backup table ordinary_db.{table_name} TO Disk('backup_disk_s3_plain', '{backup_name}') settings deduplicate_files=0; + backup table ordinary_db.{table_name} TO Disk('{storage_policy}', '{backup_name}') settings deduplicate_files=0; drop table ordinary_db.{table_name}; attach table ordinary_db.{table_name} (part UInt8, key UInt64) @@ -53,10 +92,7 @@ def test_attach_part(table_name, backup_name, storage_policy, min_bytes_for_wide order by key partition by part settings max_suspicious_broken_parts=0, - disk=disk(type=s3_plain, - endpoint='http://minio1:9001/root/data/disks/disk_s3_plain/{backup_name}/', - access_key_id='minio', - secret_access_key='minio123'); + disk={disk_definition} """ ) diff --git a/tests/integration/test_attach_without_fetching/test.py b/tests/integration/test_attach_without_fetching/test.py index b430387e0f1..67352e2dcbe 100644 --- a/tests/integration/test_attach_without_fetching/test.py +++ b/tests/integration/test_attach_without_fetching/test.py @@ -56,6 +56,7 @@ def check_data(nodes, detached_parts): node.query_with_retry("SYSTEM SYNC REPLICA test") + for node in nodes: print("> Checking data integrity for", node.name) for i in range(10): diff --git a/tests/integration/test_backup_restore_new/test.py b/tests/integration/test_backup_restore_new/test.py index cac458f616d..34ffdf7a8df 100644 --- a/tests/integration/test_backup_restore_new/test.py +++ b/tests/integration/test_backup_restore_new/test.py @@ -591,6 +591,138 @@ def test_zip_archive_with_bad_compression_method(): ) +def test_tar_archive(): + backup_name = f"Disk('backups', 'archive.tar')" + create_and_fill_table() + + assert instance.query("SELECT count(), sum(x) FROM test.table") == "100\t4950\n" + instance.query(f"BACKUP TABLE test.table TO {backup_name}") + + assert os.path.isfile(get_path_to_backup(backup_name)) + + instance.query("DROP TABLE test.table") + assert instance.query("EXISTS test.table") == "0\n" + + instance.query(f"RESTORE TABLE test.table FROM {backup_name}") + assert instance.query("SELECT count(), sum(x) FROM test.table") == "100\t4950\n" + + +def test_tar_bz2_archive(): + backup_name = f"Disk('backups', 'archive.tar.bz2')" + create_and_fill_table() + + assert instance.query("SELECT count(), sum(x) FROM test.table") == "100\t4950\n" + instance.query(f"BACKUP TABLE test.table TO {backup_name}") + + assert os.path.isfile(get_path_to_backup(backup_name)) + + instance.query("DROP TABLE test.table") + assert instance.query("EXISTS test.table") == "0\n" + + instance.query(f"RESTORE TABLE test.table FROM {backup_name}") + assert instance.query("SELECT count(), sum(x) FROM test.table") == "100\t4950\n" + + +def test_tar_gz_archive(): + backup_name = f"Disk('backups', 'archive.tar.gz')" + create_and_fill_table() + + assert instance.query("SELECT count(), sum(x) FROM test.table") == "100\t4950\n" + instance.query(f"BACKUP TABLE test.table TO {backup_name}") + + assert os.path.isfile(get_path_to_backup(backup_name)) + + instance.query("DROP TABLE test.table") + assert instance.query("EXISTS test.table") == "0\n" + + instance.query(f"RESTORE TABLE test.table FROM {backup_name}") + assert instance.query("SELECT count(), sum(x) FROM test.table") == "100\t4950\n" + + +def test_tar_lzma_archive(): + backup_name = f"Disk('backups', 'archive.tar.lzma')" + create_and_fill_table() + + assert instance.query("SELECT count(), sum(x) FROM test.table") == "100\t4950\n" + instance.query(f"BACKUP TABLE test.table TO {backup_name}") + + assert os.path.isfile(get_path_to_backup(backup_name)) + + instance.query("DROP TABLE test.table") + assert instance.query("EXISTS test.table") == "0\n" + + instance.query(f"RESTORE TABLE test.table FROM {backup_name}") + assert instance.query("SELECT count(), sum(x) FROM test.table") == "100\t4950\n" + + +def test_tar_zst_archive(): + backup_name = f"Disk('backups', 'archive.tar.zst')" + create_and_fill_table() + + assert instance.query("SELECT count(), sum(x) FROM test.table") == "100\t4950\n" + instance.query(f"BACKUP TABLE test.table TO {backup_name}") + + assert os.path.isfile(get_path_to_backup(backup_name)) + + instance.query("DROP TABLE test.table") + assert instance.query("EXISTS test.table") == "0\n" + + instance.query(f"RESTORE TABLE test.table FROM {backup_name}") + assert instance.query("SELECT count(), sum(x) FROM test.table") == "100\t4950\n" + + +def test_tar_xz_archive(): + backup_name = f"Disk('backups', 'archive.tar.xz')" + create_and_fill_table() + + assert instance.query("SELECT count(), sum(x) FROM test.table") == "100\t4950\n" + instance.query(f"BACKUP TABLE test.table TO {backup_name}") + + assert os.path.isfile(get_path_to_backup(backup_name)) + + instance.query("DROP TABLE test.table") + assert instance.query("EXISTS test.table") == "0\n" + + instance.query(f"RESTORE TABLE test.table FROM {backup_name}") + assert instance.query("SELECT count(), sum(x) FROM test.table") == "100\t4950\n" + + +def test_tar_archive_with_password(): + backup_name = f"Disk('backups', 'archive_with_password.tar')" + create_and_fill_table() + + assert instance.query("SELECT count(), sum(x) FROM test.table") == "100\t4950\n" + + expected_error = "Setting a password is not currently supported for libarchive" + assert expected_error in instance.query_and_get_error( + f"BACKUP TABLE test.table TO {backup_name} SETTINGS id='tar_archive_with_password', password='password123'" + ) + assert ( + instance.query( + "SELECT status FROM system.backups WHERE id='tar_archive_with_password'" + ) + == "BACKUP_FAILED\n" + ) + + +def test_tar_archive_with_bad_compression_method(): + backup_name = f"Disk('backups', 'archive_with_bad_compression_method.tar')" + create_and_fill_table() + + assert instance.query("SELECT count(), sum(x) FROM test.table") == "100\t4950\n" + + expected_error = "Using compression_method and compression_level options are not supported for tar archives" + assert expected_error in instance.query_and_get_error( + f"BACKUP TABLE test.table TO {backup_name} SETTINGS id='tar_archive_with_bad_compression_method', compression_method='foobar'" + ) + assert ( + instance.query( + "SELECT status FROM system.backups WHERE id='tar_archive_with_bad_compression_method'" + ) + == "BACKUP_FAILED\n" + ) + + def test_async(): create_and_fill_table() assert instance.query("SELECT count(), sum(x) FROM test.table") == "100\t4950\n" @@ -885,7 +1017,7 @@ def test_required_privileges(): instance.query("CREATE USER u1") backup_name = new_backup_name() - expected_error = "necessary to have the grant BACKUP ON test.table" + expected_error = "necessary to have the grant BACKUP ON test.`table`" assert expected_error in instance.query_and_get_error( f"BACKUP TABLE test.table TO {backup_name}", user="u1" ) @@ -893,7 +1025,7 @@ def test_required_privileges(): instance.query("GRANT BACKUP ON test.table TO u1") instance.query(f"BACKUP TABLE test.table TO {backup_name}", user="u1") - expected_error = "necessary to have the grant INSERT, CREATE TABLE ON test.table" + expected_error = "necessary to have the grant INSERT, CREATE TABLE ON test.`table`" assert expected_error in instance.query_and_get_error( f"RESTORE TABLE test.table FROM {backup_name}", user="u1" ) @@ -910,7 +1042,7 @@ def test_required_privileges(): instance.query("DROP TABLE test.table") - expected_error = "necessary to have the grant INSERT, CREATE TABLE ON test.table" + expected_error = "necessary to have the grant INSERT, CREATE TABLE ON test.`table`" assert expected_error in instance.query_and_get_error( f"RESTORE ALL FROM {backup_name}", user="u1" ) @@ -963,7 +1095,7 @@ def test_system_users(): instance.query("GRANT r1 TO r2 WITH ADMIN OPTION") instance.query("GRANT r2 TO u1") - instance.query("CREATE SETTINGS PROFILE prof1 SETTINGS custom_b=2 TO u1") + instance.query("CREATE SETTINGS PROFILE `prof1` SETTINGS custom_b=2 TO u1") instance.query("CREATE ROW POLICY rowpol1 ON test.table USING x<50 TO u1") instance.query("CREATE QUOTA q1 TO r1") @@ -984,7 +1116,7 @@ def test_system_users(): assert ( instance.query("SHOW CREATE USER u1") - == "CREATE USER u1 IDENTIFIED WITH sha256_password SETTINGS PROFILE default, custom_a = 1\n" + == "CREATE USER u1 IDENTIFIED WITH sha256_password SETTINGS PROFILE `default`, custom_a = 1\n" ) assert instance.query("SHOW GRANTS FOR u1") == TSV( ["GRANT SELECT ON test.* TO u1", "GRANT r2 TO u1"] @@ -998,11 +1130,11 @@ def test_system_users(): assert ( instance.query("SHOW CREATE SETTINGS PROFILE prof1") - == "CREATE SETTINGS PROFILE prof1 SETTINGS custom_b = 2 TO u1\n" + == "CREATE SETTINGS PROFILE `prof1` SETTINGS custom_b = 2 TO u1\n" ) assert ( instance.query("SHOW CREATE ROW POLICY rowpol1") - == "CREATE ROW POLICY rowpol1 ON test.table FOR SELECT USING x < 50 TO u1\n" + == "CREATE ROW POLICY rowpol1 ON test.`table` FOR SELECT USING x < 50 TO u1\n" ) assert instance.query("SHOW CREATE QUOTA q1") == "CREATE QUOTA q1 TO r1\n" diff --git a/tests/integration/test_backup_restore_new/test_cancel_backup.py b/tests/integration/test_backup_restore_new/test_cancel_backup.py index 06bcb5eadfc..cce23a7e932 100644 --- a/tests/integration/test_backup_restore_new/test_cancel_backup.py +++ b/tests/integration/test_backup_restore_new/test_cancel_backup.py @@ -177,7 +177,7 @@ def cancel_restore(restore_id): def test_cancel_backup(): # We use partitioning so backups would contain more files. node.query( - "CREATE TABLE tbl (x UInt64) ENGINE=MergeTree() ORDER BY tuple() PARTITION BY x%5" + "CREATE TABLE tbl (x UInt64) ENGINE=MergeTree() ORDER BY tuple() PARTITION BY x%20" ) node.query(f"INSERT INTO tbl SELECT number FROM numbers(500)") @@ -196,7 +196,8 @@ def test_cancel_backup(): start_restore(try_restore_id_1, backup_id) cancel_restore(try_restore_id_1) - node.query(f"DROP TABLE tbl SYNC") + # IF EXISTS because it's unknown whether RESTORE had managed to create a table before it got cancelled. + node.query(f"DROP TABLE IF EXISTS tbl SYNC") restore_id = uuid.uuid4().hex start_restore(restore_id, backup_id) diff --git a/tests/integration/test_backup_restore_s3/configs/s3_settings.xml b/tests/integration/test_backup_restore_s3/configs/s3_settings.xml index 981cf67bbe9..61ef7759b57 100644 --- a/tests/integration/test_backup_restore_s3/configs/s3_settings.xml +++ b/tests/integration/test_backup_restore_s3/configs/s3_settings.xml @@ -1,5 +1,6 @@ + 0 http://minio1:9001/root/data/backups/multipart/ - - - system -
query_log
- - - 1000 - - \ No newline at end of file diff --git a/tests/integration/test_cluster_copier/configs/config-copier.xml b/tests/integration/test_cluster_copier/configs/config-copier.xml deleted file mode 100644 index 590b1892f8d..00000000000 --- a/tests/integration/test_cluster_copier/configs/config-copier.xml +++ /dev/null @@ -1,11 +0,0 @@ - - - information - /var/log/clickhouse-server/copier/log.log - /var/log/clickhouse-server/copier/log.err.log - 1000M - 10 - /var/log/clickhouse-server/copier/stderr.log - /var/log/clickhouse-server/copier/stdout.log - - diff --git a/tests/integration/test_cluster_copier/configs/users.xml b/tests/integration/test_cluster_copier/configs/users.xml deleted file mode 100644 index b463dfc81e7..00000000000 --- a/tests/integration/test_cluster_copier/configs/users.xml +++ /dev/null @@ -1,34 +0,0 @@ - - - - 1 - - 5 - 1 - - - - - - - - ::/0 - - default - default - - - 12345678 - - ::/0 - - default - default - - - - - - - - diff --git a/tests/integration/test_cluster_copier/configs_three_nodes/conf.d/clusters.xml b/tests/integration/test_cluster_copier/configs_three_nodes/conf.d/clusters.xml deleted file mode 100644 index 9de7b57de27..00000000000 --- a/tests/integration/test_cluster_copier/configs_three_nodes/conf.d/clusters.xml +++ /dev/null @@ -1,27 +0,0 @@ - - - - - false - - first - 9000 - - - - false - - second - 9000 - - - - false - - third - 9000 - - - - - diff --git a/tests/integration/test_cluster_copier/configs_three_nodes/conf.d/ddl.xml b/tests/integration/test_cluster_copier/configs_three_nodes/conf.d/ddl.xml deleted file mode 100644 index 64fa32335ab..00000000000 --- a/tests/integration/test_cluster_copier/configs_three_nodes/conf.d/ddl.xml +++ /dev/null @@ -1,5 +0,0 @@ - - - /clickhouse/task_queue/ddl - - \ No newline at end of file diff --git a/tests/integration/test_cluster_copier/configs_three_nodes/config-copier.xml b/tests/integration/test_cluster_copier/configs_three_nodes/config-copier.xml deleted file mode 100644 index d0cab0fafb7..00000000000 --- a/tests/integration/test_cluster_copier/configs_three_nodes/config-copier.xml +++ /dev/null @@ -1,27 +0,0 @@ - - - information - /var/log/clickhouse-server/copier/log.log - /var/log/clickhouse-server/copier/log.err.log - 1000M - 10 - /var/log/clickhouse-server/copier/stderr.log - /var/log/clickhouse-server/copier/stdout.log - - - - - zoo1 - 2181 - - - zoo2 - 2181 - - - zoo3 - 2181 - - 2000 - - diff --git a/tests/integration/test_cluster_copier/configs_three_nodes/users.xml b/tests/integration/test_cluster_copier/configs_three_nodes/users.xml deleted file mode 100644 index badaf46a5ca..00000000000 --- a/tests/integration/test_cluster_copier/configs_three_nodes/users.xml +++ /dev/null @@ -1,32 +0,0 @@ - - - - 1 - 1 - - - - - - - - ::/0 - - default - default - - - 12345678 - - ::/0 - - default - default - - - - - - - - diff --git a/tests/integration/test_cluster_copier/configs_two_nodes/conf.d/clusters.xml b/tests/integration/test_cluster_copier/configs_two_nodes/conf.d/clusters.xml deleted file mode 100644 index 38d88308631..00000000000 --- a/tests/integration/test_cluster_copier/configs_two_nodes/conf.d/clusters.xml +++ /dev/null @@ -1,22 +0,0 @@ - - - - - false - - first_of_two - 9000 - - - - - - false - - second_of_two - 9000 - - - - - diff --git a/tests/integration/test_cluster_copier/configs_two_nodes/conf.d/ddl.xml b/tests/integration/test_cluster_copier/configs_two_nodes/conf.d/ddl.xml deleted file mode 100644 index 64fa32335ab..00000000000 --- a/tests/integration/test_cluster_copier/configs_two_nodes/conf.d/ddl.xml +++ /dev/null @@ -1,5 +0,0 @@ - - - /clickhouse/task_queue/ddl - - \ No newline at end of file diff --git a/tests/integration/test_cluster_copier/configs_two_nodes/conf.d/storage_configuration.xml b/tests/integration/test_cluster_copier/configs_two_nodes/conf.d/storage_configuration.xml deleted file mode 100644 index 8306f40ad6a..00000000000 --- a/tests/integration/test_cluster_copier/configs_two_nodes/conf.d/storage_configuration.xml +++ /dev/null @@ -1,34 +0,0 @@ - - - - - - - - /jbod1/ - - - /jbod2/ - - - /external/ - - - - - - - - external - -
- jbod1 - jbod2 -
-
-
-
- -
- -
diff --git a/tests/integration/test_cluster_copier/configs_two_nodes/config-copier.xml b/tests/integration/test_cluster_copier/configs_two_nodes/config-copier.xml deleted file mode 100644 index 55bd24816ae..00000000000 --- a/tests/integration/test_cluster_copier/configs_two_nodes/config-copier.xml +++ /dev/null @@ -1,19 +0,0 @@ - - - information - /var/log/clickhouse-server/copier/log.log - /var/log/clickhouse-server/copier/log.err.log - 1000M - 10 - /var/log/clickhouse-server/copier/stderr.log - /var/log/clickhouse-server/copier/stdout.log - - - - - zoo1 - 2181 - - 2000 - - diff --git a/tests/integration/test_cluster_copier/configs_two_nodes/users.xml b/tests/integration/test_cluster_copier/configs_two_nodes/users.xml deleted file mode 100644 index badaf46a5ca..00000000000 --- a/tests/integration/test_cluster_copier/configs_two_nodes/users.xml +++ /dev/null @@ -1,32 +0,0 @@ - - - - 1 - 1 - - - - - - - - ::/0 - - default - default - - - 12345678 - - ::/0 - - default - default - - - - - - - - diff --git a/tests/integration/test_cluster_copier/task0_description.xml b/tests/integration/test_cluster_copier/task0_description.xml deleted file mode 100644 index 8d74d0bdde0..00000000000 --- a/tests/integration/test_cluster_copier/task0_description.xml +++ /dev/null @@ -1,95 +0,0 @@ - - - 3 - - - - 1 - - - - - 0 - - - - - - - - - - cluster0 - default - hits - - cluster1 - default - hits - - 2 - - 3 4 5 6 1 2 0 - - - ENGINE=ReplicatedMergeTree PARTITION BY d % 3 ORDER BY (d, sipHash64(d)) SAMPLE BY sipHash64(d) SETTINGS index_granularity = 16 - - - d + 1 - - - d - d = 0 - - - - - - - - true - - s0_0_0 - 9000 - - - s0_0_1 - 9000 - - - - true - - s0_1_0 - 9000 - - - - - - - true - - s1_0_0 - 9000 - - - s1_0_1 - 9000 - - - - true - - s1_1_0 - 9000 - - - - 255.255.255.255 - 9000 - - - - - - diff --git a/tests/integration/test_cluster_copier/task_drop_target_partition.xml b/tests/integration/test_cluster_copier/task_drop_target_partition.xml deleted file mode 100644 index dc8e6452243..00000000000 --- a/tests/integration/test_cluster_copier/task_drop_target_partition.xml +++ /dev/null @@ -1,41 +0,0 @@ - - - - - false - - first_of_two - 9000 - - - - - - false - - second_of_two - 9000 - - - - - - 2 - - - - source - db_drop_target_partition - source - - destination - db_drop_target_partition - destination - - true - - ENGINE = MergeTree() PARTITION BY toYYYYMMDD(Column3) ORDER BY (Column3, Column2, Column1) - rand() - - - diff --git a/tests/integration/test_cluster_copier/task_month_to_week_description.xml b/tests/integration/test_cluster_copier/task_month_to_week_description.xml deleted file mode 100644 index bc290ca397f..00000000000 --- a/tests/integration/test_cluster_copier/task_month_to_week_description.xml +++ /dev/null @@ -1,99 +0,0 @@ - - - 4 - - - - 1 - 2 - - - - 0 - - - - - - cluster0 - default - a - - cluster1 - default - b - - - - 2 - - - ENGINE= - ReplicatedMergeTree - PARTITION BY toMonday(date) - ORDER BY d - - - - jumpConsistentHash(intHash64(d), 2) - - - - - - - - - - - true - - s0_0_0 - 9000 - - - s0_0_1 - 9000 - - - - true - - s0_1_0 - 9000 - - - - - - - true - - s1_0_0 - 9000 - - - s1_0_1 - 9000 - - - - true - - s1_1_0 - 9000 - - - - 255.255.255.255 - 9000 - - - - - - diff --git a/tests/integration/test_cluster_copier/task_no_arg.xml b/tests/integration/test_cluster_copier/task_no_arg.xml deleted file mode 100644 index 262ff073537..00000000000 --- a/tests/integration/test_cluster_copier/task_no_arg.xml +++ /dev/null @@ -1,39 +0,0 @@ - - - - - 1 - - s0_0_0 - 9000 - - - - - - - 1 - - s1_1_0 - 9000 - - - - - - 1 - - - - source_cluster - default - copier_test1 - - default_cluster - default - copier_test1_1 - ENGINE = MergeTree PARTITION BY date ORDER BY (date, sipHash64(date)) SAMPLE BY sipHash64(date) - rand() - - - diff --git a/tests/integration/test_cluster_copier/task_no_index.xml b/tests/integration/test_cluster_copier/task_no_index.xml deleted file mode 100644 index 265f99e21a6..00000000000 --- a/tests/integration/test_cluster_copier/task_no_index.xml +++ /dev/null @@ -1,109 +0,0 @@ - - - - - false - - s0_0_0 - 9000 - - - - - - - false - - s1_1_0 - 9000 - - - - - - - 2 - - - - 1 - - - - - 0 - - - - - 3 - - 1 - - - - - - - - source_cluster - default - ontime - - - - destination_cluster - default - ontime22 - - - - - - - ENGINE = MergeTree() PARTITION BY Year ORDER BY (Year, FlightDate) SETTINGS index_granularity=8192 - - - - - jumpConsistentHash(intHash64(Year), 2) - - - - - - - 2017 - - - - - - - diff --git a/tests/integration/test_cluster_copier/task_non_partitioned_table.xml b/tests/integration/test_cluster_copier/task_non_partitioned_table.xml deleted file mode 100644 index d5424b95f45..00000000000 --- a/tests/integration/test_cluster_copier/task_non_partitioned_table.xml +++ /dev/null @@ -1,39 +0,0 @@ - - - - - 1 - - s0_0_0 - 9000 - - - - - - - 1 - - s1_1_0 - 9000 - - - - - - 1 - - - - source_cluster - default - copier_test1 - - default_cluster - default - copier_test1_1 - ENGINE = MergeTree ORDER BY date SETTINGS index_granularity = 8192 - rand() - - - diff --git a/tests/integration/test_cluster_copier/task_self_copy.xml b/tests/integration/test_cluster_copier/task_self_copy.xml deleted file mode 100644 index 21d577bc397..00000000000 --- a/tests/integration/test_cluster_copier/task_self_copy.xml +++ /dev/null @@ -1,63 +0,0 @@ - - 9440 - - - - false - - s0_0_0 - 9000 - dbuser - 12345678 - 0 - - - - - - - false - - s0_0_0 - 9000 - dbuser - 12345678 - 0 - - - - - - 2 - - - 1 - - - - 0 - - - - 3 - 1 - - - - - source_cluster - db1 - source_table - - destination_cluster - db2 - destination_table - - - ENGINE = MergeTree PARTITION BY a ORDER BY a SETTINGS index_granularity = 8192 - - - rand() - - - \ No newline at end of file diff --git a/tests/integration/test_cluster_copier/task_skip_index.xml b/tests/integration/test_cluster_copier/task_skip_index.xml deleted file mode 100644 index b04cec963d4..00000000000 --- a/tests/integration/test_cluster_copier/task_skip_index.xml +++ /dev/null @@ -1,39 +0,0 @@ - - - - - false - - first_of_two - 9000 - - - - - - false - - second_of_two - 9000 - - - - - - 2 - - - - source - db_skip_index - source - - destination - db_skip_index - destination - - ENGINE = MergeTree() PARTITION BY toYYYYMMDD(Column3) ORDER BY (Column3, Column2, Column1) - rand() - - - diff --git a/tests/integration/test_cluster_copier/task_taxi_data.xml b/tests/integration/test_cluster_copier/task_taxi_data.xml deleted file mode 100644 index 94fa5087338..00000000000 --- a/tests/integration/test_cluster_copier/task_taxi_data.xml +++ /dev/null @@ -1,42 +0,0 @@ - - - - - false - - first - 9000 - - - - false - - second - 9000 - - - - false - - third - 9000 - - - - - - 2 - - - - events - dailyhistory - yellow_tripdata_staging - events - monthlyhistory - yellow_tripdata_staging - Engine=ReplacingMergeTree() PRIMARY KEY (tpep_pickup_datetime, id) ORDER BY (tpep_pickup_datetime, id) PARTITION BY (pickup_location_id, toYYYYMM(tpep_pickup_datetime)) - sipHash64(id) % 3 - - - \ No newline at end of file diff --git a/tests/integration/test_cluster_copier/task_test_block_size.xml b/tests/integration/test_cluster_copier/task_test_block_size.xml deleted file mode 100644 index bf29c7e1832..00000000000 --- a/tests/integration/test_cluster_copier/task_test_block_size.xml +++ /dev/null @@ -1,101 +0,0 @@ - - - 1 - - - - 1 - - - - - - - - - shard_0_0 - default - test_block_size - - cluster1 - default - test_block_size - - - '1970-01-01' - - - - ENGINE= - ReplicatedMergeTree - ORDER BY d PARTITION BY partition - - - - jumpConsistentHash(intHash64(d), 2) - - - - - - - - - - - true - - s0_0_0 - 9000 - - - s0_0_1 - 9000 - - - - true - - s0_1_0 - 9000 - - - - - - - true - - s1_0_0 - 9000 - - - s1_0_1 - 9000 - - - - true - - s1_1_0 - 9000 - - - - - - - true - - s0_0_0 - 9000 - - - s0_0_1 - 9000 - - - - - - diff --git a/tests/integration/test_cluster_copier/task_trivial.xml b/tests/integration/test_cluster_copier/task_trivial.xml deleted file mode 100644 index a3b8bc03888..00000000000 --- a/tests/integration/test_cluster_copier/task_trivial.xml +++ /dev/null @@ -1,63 +0,0 @@ - - - 3 - - - - 1 - - - - - 0 - - - - - - - - - - source_trivial_cluster - default - trivial - - destination_trivial_cluster - default - trivial - - - ENGINE=ReplicatedMergeTree('/clickhouse/tables/cluster{cluster}/{shard}/hits', '{replica}') PARTITION BY d % 5 ORDER BY (d, sipHash64(d)) SAMPLE BY sipHash64(d) SETTINGS index_granularity = 16 - - - d + 1 - - - d - d = 0 - - - - - - - - - first_trivial - 9000 - - - - - - - - - second_trivial - 9000 - - - - - - diff --git a/tests/integration/test_cluster_copier/task_trivial_without_arguments.xml b/tests/integration/test_cluster_copier/task_trivial_without_arguments.xml deleted file mode 100644 index 0197dee0181..00000000000 --- a/tests/integration/test_cluster_copier/task_trivial_without_arguments.xml +++ /dev/null @@ -1,63 +0,0 @@ - - - 3 - - - - 1 - - - - - 0 - - - - - - - - - - source_trivial_cluster - default - trivial_without_arguments - - destination_trivial_cluster - default - trivial_without_arguments - - - ENGINE=ReplicatedMergeTree() PARTITION BY d % 5 ORDER BY (d, sipHash64(d)) SAMPLE BY sipHash64(d) SETTINGS index_granularity = 16 - - - d + 1 - - - d - d = 0 - - - - - - - - - first_trivial - 9000 - - - - - - - - - second_trivial - 9000 - - - - - - diff --git a/tests/integration/test_cluster_copier/task_ttl_columns.xml b/tests/integration/test_cluster_copier/task_ttl_columns.xml deleted file mode 100644 index 2069c509c87..00000000000 --- a/tests/integration/test_cluster_copier/task_ttl_columns.xml +++ /dev/null @@ -1,39 +0,0 @@ - - - - - false - - first_of_two - 9000 - - - - - - false - - second_of_two - 9000 - - - - - - 2 - - - - source - db_ttl_columns - source - - destination - db_ttl_columns - destination - - ENGINE = MergeTree() PARTITION BY toYYYYMMDD(Column3) ORDER BY (Column3, Column2, Column1) - rand() - - - diff --git a/tests/integration/test_cluster_copier/task_ttl_move_to_volume.xml b/tests/integration/test_cluster_copier/task_ttl_move_to_volume.xml deleted file mode 100644 index 2a51fa7a66d..00000000000 --- a/tests/integration/test_cluster_copier/task_ttl_move_to_volume.xml +++ /dev/null @@ -1,39 +0,0 @@ - - - - - false - - first_of_two - 9000 - - - - - - false - - second_of_two - 9000 - - - - - - 2 - - - - source - db_move_to_volume - source - - destination - db_move_to_volume - destination - - ENGINE = MergeTree() PARTITION BY toYYYYMMDD(Column3) ORDER BY (Column3, Column2, Column1) TTL Column3 + INTERVAL 1 MONTH TO VOLUME 'external' SETTINGS storage_policy = 'external_with_jbods' - rand() - - - diff --git a/tests/integration/test_cluster_copier/task_with_different_schema.xml b/tests/integration/test_cluster_copier/task_with_different_schema.xml deleted file mode 100644 index e1e6ee4dc42..00000000000 --- a/tests/integration/test_cluster_copier/task_with_different_schema.xml +++ /dev/null @@ -1,39 +0,0 @@ - - - - - false - - first_of_two - 9000 - - - - - - false - - second_of_two - 9000 - - - - - - 2 - - - - source - db_different_schema - source - - destination - db_different_schema - destination - - ENGINE = MergeTree() PARTITION BY toYYYYMMDD(Column3) ORDER BY (Column9, Column1, Column2, Column3, Column4) - rand() - - - diff --git a/tests/integration/test_cluster_copier/test.py b/tests/integration/test_cluster_copier/test.py deleted file mode 100644 index be71fc21e33..00000000000 --- a/tests/integration/test_cluster_copier/test.py +++ /dev/null @@ -1,653 +0,0 @@ -import os -import random -import sys -import time -import kazoo -import pytest -import string -import random -from contextlib import contextmanager -from helpers.cluster import ClickHouseCluster -from helpers.test_tools import TSV - -import docker - -CURRENT_TEST_DIR = os.path.dirname(os.path.abspath(__file__)) -sys.path.insert(0, os.path.dirname(CURRENT_TEST_DIR)) - -COPYING_FAIL_PROBABILITY = 0.2 -MOVING_FAIL_PROBABILITY = 0.2 - -cluster = ClickHouseCluster(__file__) - - -def generateRandomString(count): - return "".join( - random.choice(string.ascii_uppercase + string.digits) for _ in range(count) - ) - - -def check_all_hosts_sucesfully_executed(tsv_content, num_hosts): - M = TSV.toMat(tsv_content) - hosts = [(l[0], l[1]) for l in M] # (host, port) - codes = [l[2] for l in M] - - assert len(hosts) == num_hosts and len(set(hosts)) == num_hosts, "\n" + tsv_content - assert len(set(codes)) == 1, "\n" + tsv_content - assert codes[0] == "0", "\n" + tsv_content - - -def ddl_check_query(instance, query, num_hosts=3): - contents = instance.query(query) - check_all_hosts_sucesfully_executed(contents, num_hosts) - return contents - - -@pytest.fixture(scope="module") -def started_cluster(): - global cluster - try: - clusters_schema = { - "0": {"0": ["0", "1"], "1": ["0"]}, - "1": {"0": ["0", "1"], "1": ["0"]}, - } - - for cluster_name, shards in clusters_schema.items(): - for shard_name, replicas in shards.items(): - for replica_name in replicas: - name = "s{}_{}_{}".format(cluster_name, shard_name, replica_name) - cluster.add_instance( - name, - main_configs=[ - "configs/conf.d/query_log.xml", - "configs/conf.d/ddl.xml", - "configs/conf.d/clusters.xml", - ], - user_configs=["configs/users.xml"], - macros={ - "cluster": cluster_name, - "shard": shard_name, - "replica": replica_name, - }, - with_zookeeper=True, - ) - - cluster.start() - yield cluster - - finally: - cluster.shutdown() - - -class Task1: - def __init__(self, cluster): - self.cluster = cluster - self.zk_task_path = "/clickhouse-copier/task_simple_" + generateRandomString(10) - self.container_task_file = "/task0_description.xml" - - for instance_name, _ in cluster.instances.items(): - instance = cluster.instances[instance_name] - instance.copy_file_to_container( - os.path.join(CURRENT_TEST_DIR, "./task0_description.xml"), - self.container_task_file, - ) - print( - "Copied task file to container of '{}' instance. Path {}".format( - instance_name, self.container_task_file - ) - ) - - def start(self): - instance = cluster.instances["s0_0_0"] - - for cluster_num in ["0", "1"]: - ddl_check_query( - instance, - "DROP DATABASE IF EXISTS default ON CLUSTER cluster{} SYNC".format( - cluster_num - ), - ) - ddl_check_query( - instance, - "CREATE DATABASE default ON CLUSTER cluster{} ".format(cluster_num), - ) - - ddl_check_query( - instance, - "CREATE TABLE hits ON CLUSTER cluster0 (d UInt64, d1 UInt64 MATERIALIZED d+1) " - + "ENGINE=ReplicatedMergeTree " - + "PARTITION BY d % 3 ORDER BY (d, sipHash64(d)) SAMPLE BY sipHash64(d) SETTINGS index_granularity = 16", - ) - ddl_check_query( - instance, - "CREATE TABLE hits_all ON CLUSTER cluster0 (d UInt64) ENGINE=Distributed(cluster0, default, hits, d)", - ) - ddl_check_query( - instance, - "CREATE TABLE hits_all ON CLUSTER cluster1 (d UInt64) ENGINE=Distributed(cluster1, default, hits, d + 1)", - ) - instance.query( - "INSERT INTO hits_all SELECT * FROM system.numbers LIMIT 1002", - settings={"distributed_foreground_insert": 1}, - ) - - def check(self): - assert ( - self.cluster.instances["s0_0_0"] - .query("SELECT count() FROM hits_all") - .strip() - == "1002" - ) - assert ( - self.cluster.instances["s1_0_0"] - .query("SELECT count() FROM hits_all") - .strip() - == "1002" - ) - - assert ( - self.cluster.instances["s1_0_0"] - .query("SELECT DISTINCT d % 2 FROM hits") - .strip() - == "1" - ) - assert ( - self.cluster.instances["s1_1_0"] - .query("SELECT DISTINCT d % 2 FROM hits") - .strip() - == "0" - ) - - instance = self.cluster.instances["s0_0_0"] - ddl_check_query(instance, "DROP TABLE hits_all ON CLUSTER cluster0") - ddl_check_query(instance, "DROP TABLE hits_all ON CLUSTER cluster1") - ddl_check_query(instance, "DROP TABLE hits ON CLUSTER cluster0") - ddl_check_query(instance, "DROP TABLE hits ON CLUSTER cluster1") - - -class Task2: - def __init__(self, cluster, unique_zk_path): - self.cluster = cluster - self.zk_task_path = ( - "/clickhouse-copier/task_month_to_week_partition_" + generateRandomString(5) - ) - self.unique_zk_path = generateRandomString(10) - self.container_task_file = "/task_month_to_week_description.xml" - - for instance_name, _ in cluster.instances.items(): - instance = cluster.instances[instance_name] - instance.copy_file_to_container( - os.path.join(CURRENT_TEST_DIR, "./task_month_to_week_description.xml"), - self.container_task_file, - ) - print( - "Copied task file to container of '{}' instance. Path {}".format( - instance_name, self.container_task_file - ) - ) - - def start(self): - instance = cluster.instances["s0_0_0"] - - for cluster_num in ["0", "1"]: - ddl_check_query( - instance, - "DROP DATABASE IF EXISTS default ON CLUSTER cluster{}".format( - cluster_num - ), - ) - ddl_check_query( - instance, - "CREATE DATABASE IF NOT EXISTS default ON CLUSTER cluster{}".format( - cluster_num - ), - ) - - ddl_check_query( - instance, - "CREATE TABLE a ON CLUSTER cluster0 (date Date, d UInt64, d1 UInt64 ALIAS d+1) " - "ENGINE=ReplicatedMergeTree('/clickhouse/tables/cluster_{cluster}/{shard}/" - + self.unique_zk_path - + "', " - "'{replica}', date, intHash64(d), (date, intHash64(d)), 8192)", - ) - ddl_check_query( - instance, - "CREATE TABLE a_all ON CLUSTER cluster0 (date Date, d UInt64) ENGINE=Distributed(cluster0, default, a, d)", - ) - - instance.query( - "INSERT INTO a_all SELECT toDate(17581 + number) AS date, number AS d FROM system.numbers LIMIT 85", - settings={"distributed_foreground_insert": 1}, - ) - - def check(self): - assert TSV( - self.cluster.instances["s0_0_0"].query( - "SELECT count() FROM cluster(cluster0, default, a)" - ) - ) == TSV("85\n") - assert TSV( - self.cluster.instances["s1_0_0"].query( - "SELECT count(), uniqExact(date) FROM cluster(cluster1, default, b)" - ) - ) == TSV("85\t85\n") - - assert TSV( - self.cluster.instances["s1_0_0"].query( - "SELECT DISTINCT jumpConsistentHash(intHash64(d), 2) FROM b" - ) - ) == TSV("0\n") - assert TSV( - self.cluster.instances["s1_1_0"].query( - "SELECT DISTINCT jumpConsistentHash(intHash64(d), 2) FROM b" - ) - ) == TSV("1\n") - - assert TSV( - self.cluster.instances["s1_0_0"].query( - "SELECT uniqExact(partition) IN (12, 13) FROM system.parts WHERE active AND database='default' AND table='b'" - ) - ) == TSV("1\n") - assert TSV( - self.cluster.instances["s1_1_0"].query( - "SELECT uniqExact(partition) IN (12, 13) FROM system.parts WHERE active AND database='default' AND table='b'" - ) - ) == TSV("1\n") - - instance = cluster.instances["s0_0_0"] - ddl_check_query(instance, "DROP TABLE a ON CLUSTER cluster0") - ddl_check_query(instance, "DROP TABLE b ON CLUSTER cluster1") - - -class Task_test_block_size: - def __init__(self, cluster): - self.cluster = cluster - self.zk_task_path = ( - "/clickhouse-copier/task_test_block_size_" + generateRandomString(5) - ) - self.rows = 1000000 - self.container_task_file = "/task_test_block_size.xml" - - for instance_name, _ in cluster.instances.items(): - instance = cluster.instances[instance_name] - instance.copy_file_to_container( - os.path.join(CURRENT_TEST_DIR, "./task_test_block_size.xml"), - self.container_task_file, - ) - print( - "Copied task file to container of '{}' instance. Path {}".format( - instance_name, self.container_task_file - ) - ) - - def start(self): - instance = cluster.instances["s0_0_0"] - - ddl_check_query( - instance, - """ - CREATE TABLE test_block_size ON CLUSTER shard_0_0 (partition Date, d UInt64) - ENGINE=ReplicatedMergeTree - ORDER BY (d, sipHash64(d)) SAMPLE BY sipHash64(d)""", - 2, - ) - - instance.query( - "INSERT INTO test_block_size SELECT toDate(0) AS partition, number as d FROM system.numbers LIMIT {}".format( - self.rows - ) - ) - - def check(self): - assert TSV( - self.cluster.instances["s1_0_0"].query( - "SELECT count() FROM cluster(cluster1, default, test_block_size)" - ) - ) == TSV("{}\n".format(self.rows)) - - instance = cluster.instances["s0_0_0"] - ddl_check_query(instance, "DROP TABLE test_block_size ON CLUSTER shard_0_0", 2) - ddl_check_query(instance, "DROP TABLE test_block_size ON CLUSTER cluster1") - - -class Task_no_index: - def __init__(self, cluster): - self.cluster = cluster - self.zk_task_path = "/clickhouse-copier/task_no_index_" + generateRandomString( - 5 - ) - self.rows = 1000000 - self.container_task_file = "/task_no_index.xml" - - for instance_name, _ in cluster.instances.items(): - instance = cluster.instances[instance_name] - instance.copy_file_to_container( - os.path.join(CURRENT_TEST_DIR, "./task_no_index.xml"), - self.container_task_file, - ) - print( - "Copied task file to container of '{}' instance. Path {}".format( - instance_name, self.container_task_file - ) - ) - - def start(self): - instance = cluster.instances["s0_0_0"] - instance.query("DROP TABLE IF EXISTS ontime SYNC") - instance.query( - "create table IF NOT EXISTS ontime (Year UInt16, FlightDate String) ENGINE = Memory" - ) - instance.query( - "insert into ontime values (2016, 'test6'), (2017, 'test7'), (2018, 'test8')" - ) - - def check(self): - assert TSV( - self.cluster.instances["s1_1_0"].query("SELECT Year FROM ontime22") - ) == TSV("2017\n") - instance = cluster.instances["s0_0_0"] - instance.query("DROP TABLE ontime") - instance = cluster.instances["s1_1_0"] - instance.query("DROP TABLE ontime22") - - -class Task_no_arg: - def __init__(self, cluster): - self.cluster = cluster - self.zk_task_path = "/clickhouse-copier/task_no_arg" - self.rows = 1000000 - self.container_task_file = "/task_no_arg.xml" - - for instance_name, _ in cluster.instances.items(): - instance = cluster.instances[instance_name] - instance.copy_file_to_container( - os.path.join(CURRENT_TEST_DIR, "./task_no_arg.xml"), - self.container_task_file, - ) - print( - "Copied task file to container of '{}' instance. Path {}".format( - instance_name, self.container_task_file - ) - ) - - def start(self): - instance = cluster.instances["s0_0_0"] - instance.query("DROP TABLE IF EXISTS copier_test1 SYNC") - instance.query( - "create table if not exists copier_test1 (date Date, id UInt32) engine = MergeTree PARTITION BY date ORDER BY date SETTINGS index_granularity = 8192" - ) - instance.query("insert into copier_test1 values ('2016-01-01', 10);") - - def check(self): - assert TSV( - self.cluster.instances["s1_1_0"].query("SELECT date FROM copier_test1_1") - ) == TSV("2016-01-01\n") - instance = cluster.instances["s0_0_0"] - instance.query("DROP TABLE copier_test1 SYNC") - instance = cluster.instances["s1_1_0"] - instance.query("DROP TABLE copier_test1_1 SYNC") - - -class Task_non_partitioned_table: - def __init__(self, cluster): - self.cluster = cluster - self.zk_task_path = "/clickhouse-copier/task_non_partitoned_table" - self.rows = 1000000 - self.container_task_file = "/task_non_partitioned_table.xml" - - for instance_name, _ in cluster.instances.items(): - instance = cluster.instances[instance_name] - instance.copy_file_to_container( - os.path.join(CURRENT_TEST_DIR, "./task_non_partitioned_table.xml"), - self.container_task_file, - ) - print( - "Copied task file to container of '{}' instance. Path {}".format( - instance_name, self.container_task_file - ) - ) - - def start(self): - instance = cluster.instances["s0_0_0"] - instance.query("DROP TABLE IF EXISTS copier_test1 SYNC") - instance.query( - "create table copier_test1 (date Date, id UInt32) engine = MergeTree ORDER BY date SETTINGS index_granularity = 8192" - ) - instance.query("insert into copier_test1 values ('2016-01-01', 10);") - - def check(self): - assert TSV( - self.cluster.instances["s1_1_0"].query("SELECT date FROM copier_test1_1") - ) == TSV("2016-01-01\n") - instance = cluster.instances["s0_0_0"] - instance.query("DROP TABLE copier_test1") - instance = cluster.instances["s1_1_0"] - instance.query("DROP TABLE copier_test1_1") - - -class Task_self_copy: - def __init__(self, cluster): - self.cluster = cluster - self.zk_task_path = "/clickhouse-copier/task_self_copy" - self.container_task_file = "/task_self_copy.xml" - - for instance_name, _ in cluster.instances.items(): - instance = cluster.instances[instance_name] - instance.copy_file_to_container( - os.path.join(CURRENT_TEST_DIR, "./task_self_copy.xml"), - self.container_task_file, - ) - print( - "Copied task file to container of '{}' instance. Path {}".format( - instance_name, self.container_task_file - ) - ) - - def start(self): - instance = cluster.instances["s0_0_0"] - instance.query("DROP DATABASE IF EXISTS db1 SYNC") - instance.query("DROP DATABASE IF EXISTS db2 SYNC") - instance.query("CREATE DATABASE IF NOT EXISTS db1;") - instance.query( - "CREATE TABLE IF NOT EXISTS db1.source_table (`a` Int8, `b` String, `c` Int8) ENGINE = MergeTree PARTITION BY a ORDER BY a SETTINGS index_granularity = 8192" - ) - instance.query("CREATE DATABASE IF NOT EXISTS db2;") - instance.query( - "CREATE TABLE IF NOT EXISTS db2.destination_table (`a` Int8, `b` String, `c` Int8) ENGINE = MergeTree PARTITION BY a ORDER BY a SETTINGS index_granularity = 8192" - ) - instance.query("INSERT INTO db1.source_table VALUES (1, 'ClickHouse', 1);") - instance.query("INSERT INTO db1.source_table VALUES (2, 'Copier', 2);") - - def check(self): - instance = cluster.instances["s0_0_0"] - assert TSV( - instance.query("SELECT * FROM db2.destination_table ORDER BY a") - ) == TSV(instance.query("SELECT * FROM db1.source_table ORDER BY a")) - instance = cluster.instances["s0_0_0"] - instance.query("DROP DATABASE IF EXISTS db1 SYNC") - instance.query("DROP DATABASE IF EXISTS db2 SYNC") - - -def execute_task(started_cluster, task, cmd_options): - task.start() - - zk = started_cluster.get_kazoo_client("zoo1") - print("Use ZooKeeper server: {}:{}".format(zk.hosts[0][0], zk.hosts[0][1])) - - try: - zk.delete("/clickhouse-copier", recursive=True) - except kazoo.exceptions.NoNodeError: - print("No node /clickhouse-copier. It is Ok in first test.") - - # Run cluster-copier processes on each node - docker_api = started_cluster.docker_client.api - copiers_exec_ids = [] - - cmd = [ - "/usr/bin/clickhouse", - "copier", - "--config", - "/etc/clickhouse-server/config-copier.xml", - "--task-path", - task.zk_task_path, - "--task-file", - task.container_task_file, - "--task-upload-force", - "true", - "--base-dir", - "/var/log/clickhouse-server/copier", - ] - cmd += cmd_options - - print(cmd) - - copiers = random.sample(list(started_cluster.instances.keys()), 3) - - for instance_name in copiers: - instance = started_cluster.instances[instance_name] - container = instance.get_docker_handle() - instance.copy_file_to_container( - os.path.join(CURRENT_TEST_DIR, "configs/config-copier.xml"), - "/etc/clickhouse-server/config-copier.xml", - ) - print("Copied copier config to {}".format(instance.name)) - exec_id = docker_api.exec_create(container.id, cmd, stderr=True) - output = docker_api.exec_start(exec_id).decode("utf8") - print(output) - copiers_exec_ids.append(exec_id) - print( - "Copier for {} ({}) has started".format(instance.name, instance.ip_address) - ) - - # Wait for copiers stopping and check their return codes - for exec_id, instance_name in zip(copiers_exec_ids, copiers): - instance = started_cluster.instances[instance_name] - while True: - res = docker_api.exec_inspect(exec_id) - if not res["Running"]: - break - time.sleep(0.5) - - assert res["ExitCode"] == 0, "Instance: {} ({}). Info: {}".format( - instance.name, instance.ip_address, repr(res) - ) - - try: - task.check() - finally: - zk.delete(task.zk_task_path, recursive=True) - - -# Tests - - -@pytest.mark.parametrize(("use_sample_offset"), [False, True]) -def test_copy_simple(started_cluster, use_sample_offset): - if use_sample_offset: - execute_task( - started_cluster, - Task1(started_cluster), - ["--experimental-use-sample-offset", "1"], - ) - else: - execute_task(started_cluster, Task1(started_cluster), []) - - -@pytest.mark.parametrize(("use_sample_offset"), [False, True]) -def test_copy_with_recovering(started_cluster, use_sample_offset): - if use_sample_offset: - execute_task( - started_cluster, - Task1(started_cluster), - [ - "--copy-fault-probability", - str(COPYING_FAIL_PROBABILITY), - "--experimental-use-sample-offset", - "1", - "--max-table-tries", - "10", - ], - ) - else: - execute_task( - started_cluster, - Task1(started_cluster), - [ - "--copy-fault-probability", - str(COPYING_FAIL_PROBABILITY), - "--max-table-tries", - "10", - ], - ) - - -@pytest.mark.parametrize(("use_sample_offset"), [False, True]) -def test_copy_with_recovering_after_move_faults(started_cluster, use_sample_offset): - if use_sample_offset: - execute_task( - started_cluster, - Task1(started_cluster), - [ - "--move-fault-probability", - str(MOVING_FAIL_PROBABILITY), - "--experimental-use-sample-offset", - "1", - ], - ) - else: - execute_task( - started_cluster, - Task1(started_cluster), - ["--move-fault-probability", str(MOVING_FAIL_PROBABILITY)], - ) - - -@pytest.mark.timeout(600) -def test_copy_month_to_week_partition(started_cluster): - execute_task(started_cluster, Task2(started_cluster, "test1"), []) - - -@pytest.mark.timeout(600) -def test_copy_month_to_week_partition_with_recovering(started_cluster): - execute_task( - started_cluster, - Task2(started_cluster, "test2"), - [ - "--copy-fault-probability", - str(COPYING_FAIL_PROBABILITY), - "--max-table-tries", - "10", - ], - ) - - -@pytest.mark.timeout(600) -def test_copy_month_to_week_partition_with_recovering_after_move_faults( - started_cluster, -): - execute_task( - started_cluster, - Task2(started_cluster, "test3"), - ["--move-fault-probability", str(MOVING_FAIL_PROBABILITY)], - ) - - -def test_block_size(started_cluster): - execute_task(started_cluster, Task_test_block_size(started_cluster), []) - - -def test_no_index(started_cluster): - execute_task(started_cluster, Task_no_index(started_cluster), []) - - -def test_no_arg(started_cluster): - execute_task(started_cluster, Task_no_arg(started_cluster), []) - - -def test_non_partitioned_table(started_cluster): - execute_task(started_cluster, Task_non_partitioned_table(started_cluster), []) - - -def test_self_copy(started_cluster): - execute_task(started_cluster, Task_self_copy(started_cluster), []) diff --git a/tests/integration/test_cluster_copier/test_three_nodes.py b/tests/integration/test_cluster_copier/test_three_nodes.py deleted file mode 100644 index e7d07757adb..00000000000 --- a/tests/integration/test_cluster_copier/test_three_nodes.py +++ /dev/null @@ -1,286 +0,0 @@ -import os -import sys -import time -import logging -import pytest - -from helpers.cluster import ClickHouseCluster -from helpers.test_tools import TSV - -import docker - -CURRENT_TEST_DIR = os.path.dirname(os.path.abspath(__file__)) -sys.path.insert(0, os.path.dirname(CURRENT_TEST_DIR)) - -cluster = ClickHouseCluster(__file__) - - -@pytest.fixture(scope="module") -def started_cluster(): - global cluster - try: - for name in ["first", "second", "third"]: - cluster.add_instance( - name, - main_configs=[ - "configs_three_nodes/conf.d/clusters.xml", - "configs_three_nodes/conf.d/ddl.xml", - ], - user_configs=["configs_three_nodes/users.xml"], - with_zookeeper=True, - ) - - cluster.start() - yield cluster - - finally: - cluster.shutdown() - - -class Task: - def __init__(self, cluster): - self.cluster = cluster - self.zk_task_path = "/clickhouse-copier/task" - self.container_task_file = "/task_taxi_data.xml" - - for instance_name, _ in cluster.instances.items(): - instance = cluster.instances[instance_name] - instance.copy_file_to_container( - os.path.join(CURRENT_TEST_DIR, "./task_taxi_data.xml"), - self.container_task_file, - ) - logging.debug( - f"Copied task file to container of '{instance_name}' instance. Path {self.container_task_file}" - ) - - def start(self): - for name in ["first", "second", "third"]: - node = cluster.instances[name] - node.query("DROP DATABASE IF EXISTS dailyhistory SYNC;") - node.query("DROP DATABASE IF EXISTS monthlyhistory SYNC;") - - first = cluster.instances["first"] - - # daily partition database - first.query("CREATE DATABASE IF NOT EXISTS dailyhistory on cluster events;") - first.query( - """CREATE TABLE dailyhistory.yellow_tripdata_staging ON CLUSTER events - ( - id UUID DEFAULT generateUUIDv4(), - vendor_id String, - tpep_pickup_datetime DateTime('UTC'), - tpep_dropoff_datetime DateTime('UTC'), - passenger_count Nullable(Float64), - trip_distance String, - pickup_longitude Float64, - pickup_latitude Float64, - rate_code_id String, - store_and_fwd_flag String, - dropoff_longitude Float64, - dropoff_latitude Float64, - payment_type String, - fare_amount String, - extra String, - mta_tax String, - tip_amount String, - tolls_amount String, - improvement_surcharge String, - total_amount String, - pickup_location_id String, - dropoff_location_id String, - congestion_surcharge String, - junk1 String, junk2 String - ) - Engine = ReplacingMergeTree() - PRIMARY KEY (tpep_pickup_datetime, id) - ORDER BY (tpep_pickup_datetime, id) - PARTITION BY (toYYYYMMDD(tpep_pickup_datetime))""" - ) - - first.query( - """CREATE TABLE dailyhistory.yellow_tripdata - ON CLUSTER events - AS dailyhistory.yellow_tripdata_staging - ENGINE = Distributed('events', 'dailyhistory', yellow_tripdata_staging, sipHash64(id) % 3);""" - ) - - first.query( - """INSERT INTO dailyhistory.yellow_tripdata - SELECT * FROM generateRandom( - 'id UUID DEFAULT generateUUIDv4(), - vendor_id String, - tpep_pickup_datetime DateTime(\\'UTC\\'), - tpep_dropoff_datetime DateTime(\\'UTC\\'), - passenger_count Nullable(Float64), - trip_distance String, - pickup_longitude Float64, - pickup_latitude Float64, - rate_code_id String, - store_and_fwd_flag String, - dropoff_longitude Float64, - dropoff_latitude Float64, - payment_type String, - fare_amount String, - extra String, - mta_tax String, - tip_amount String, - tolls_amount String, - improvement_surcharge String, - total_amount String, - pickup_location_id String, - dropoff_location_id String, - congestion_surcharge String, - junk1 String, - junk2 String', - 1, 10, 2) LIMIT 50;""" - ) - - # monthly partition database - first.query("create database IF NOT EXISTS monthlyhistory on cluster events;") - first.query( - """CREATE TABLE monthlyhistory.yellow_tripdata_staging ON CLUSTER events - ( - id UUID DEFAULT generateUUIDv4(), - vendor_id String, - tpep_pickup_datetime DateTime('UTC'), - tpep_dropoff_datetime DateTime('UTC'), - passenger_count Nullable(Float64), - trip_distance String, - pickup_longitude Float64, - pickup_latitude Float64, - rate_code_id String, - store_and_fwd_flag String, - dropoff_longitude Float64, - dropoff_latitude Float64, - payment_type String, - fare_amount String, - extra String, - mta_tax String, - tip_amount String, - tolls_amount String, - improvement_surcharge String, - total_amount String, - pickup_location_id String, - dropoff_location_id String, - congestion_surcharge String, - junk1 String, - junk2 String - ) - Engine = ReplacingMergeTree() - PRIMARY KEY (tpep_pickup_datetime, id) - ORDER BY (tpep_pickup_datetime, id) - PARTITION BY (pickup_location_id, toYYYYMM(tpep_pickup_datetime))""" - ) - - first.query( - """CREATE TABLE monthlyhistory.yellow_tripdata - ON CLUSTER events - AS monthlyhistory.yellow_tripdata_staging - ENGINE = Distributed('events', 'monthlyhistory', yellow_tripdata_staging, sipHash64(id) % 3);""" - ) - - def check(self): - first = cluster.instances["first"] - a = TSV(first.query("SELECT count() from dailyhistory.yellow_tripdata")) - b = TSV(first.query("SELECT count() from monthlyhistory.yellow_tripdata")) - assert a == b, "Distributed tables" - - for instance_name, instance in cluster.instances.items(): - instance = cluster.instances[instance_name] - a = instance.query( - "SELECT count() from dailyhistory.yellow_tripdata_staging" - ) - b = instance.query( - "SELECT count() from monthlyhistory.yellow_tripdata_staging" - ) - assert a == b, "MergeTree tables on each shard" - - a = TSV( - instance.query( - "SELECT sipHash64(*) from dailyhistory.yellow_tripdata_staging ORDER BY id" - ) - ) - b = TSV( - instance.query( - "SELECT sipHash64(*) from monthlyhistory.yellow_tripdata_staging ORDER BY id" - ) - ) - - assert a == b, "Data on each shard" - - for name in ["first", "second", "third"]: - node = cluster.instances[name] - node.query("DROP DATABASE IF EXISTS dailyhistory SYNC;") - node.query("DROP DATABASE IF EXISTS monthlyhistory SYNC;") - - -def execute_task(started_cluster, task, cmd_options): - task.start() - - zk = started_cluster.get_kazoo_client("zoo1") - logging.debug("Use ZooKeeper server: {}:{}".format(zk.hosts[0][0], zk.hosts[0][1])) - - # Run cluster-copier processes on each node - docker_api = started_cluster.docker_client.api - copiers_exec_ids = [] - - cmd = [ - "/usr/bin/clickhouse", - "copier", - "--config", - "/etc/clickhouse-server/config-copier.xml", - "--task-path", - task.zk_task_path, - "--task-file", - task.container_task_file, - "--task-upload-force", - "true", - "--base-dir", - "/var/log/clickhouse-server/copier", - ] - cmd += cmd_options - - logging.debug(f"execute_task cmd: {cmd}") - - for instance_name in started_cluster.instances.keys(): - instance = started_cluster.instances[instance_name] - container = instance.get_docker_handle() - instance.copy_file_to_container( - os.path.join(CURRENT_TEST_DIR, "configs_three_nodes/config-copier.xml"), - "/etc/clickhouse-server/config-copier.xml", - ) - logging.info("Copied copier config to {}".format(instance.name)) - exec_id = docker_api.exec_create(container.id, cmd, stderr=True) - output = docker_api.exec_start(exec_id).decode("utf8") - logging.info(output) - copiers_exec_ids.append(exec_id) - logging.info( - "Copier for {} ({}) has started".format(instance.name, instance.ip_address) - ) - - # time.sleep(1000) - - # Wait for copiers stopping and check their return codes - for exec_id, instance in zip( - copiers_exec_ids, iter(started_cluster.instances.values()) - ): - while True: - res = docker_api.exec_inspect(exec_id) - if not res["Running"]: - break - time.sleep(1) - - assert res["ExitCode"] == 0, "Instance: {} ({}). Info: {}".format( - instance.name, instance.ip_address, repr(res) - ) - - try: - task.check() - finally: - zk.delete(task.zk_task_path, recursive=True) - - -# Tests -@pytest.mark.timeout(600) -def test(started_cluster): - execute_task(started_cluster, Task(started_cluster), []) diff --git a/tests/integration/test_cluster_copier/test_trivial.py b/tests/integration/test_cluster_copier/test_trivial.py deleted file mode 100644 index b8060583ef8..00000000000 --- a/tests/integration/test_cluster_copier/test_trivial.py +++ /dev/null @@ -1,227 +0,0 @@ -import os -import sys -import time -import random -import string - -from helpers.cluster import ClickHouseCluster -from helpers.test_tools import TSV - -import kazoo -import pytest -import docker - - -CURRENT_TEST_DIR = os.path.dirname(os.path.abspath(__file__)) -sys.path.insert(0, os.path.dirname(CURRENT_TEST_DIR)) - - -COPYING_FAIL_PROBABILITY = 0.1 -MOVING_FAIL_PROBABILITY = 0.1 - -cluster = ClickHouseCluster(__file__) - - -def generateRandomString(count): - return "".join( - random.choice(string.ascii_uppercase + string.digits) for _ in range(count) - ) - - -@pytest.fixture(scope="module") -def started_cluster(): - global cluster - try: - for name in ["first_trivial", "second_trivial"]: - instance = cluster.add_instance( - name, - main_configs=["configs/conf.d/clusters_trivial.xml"], - user_configs=["configs_two_nodes/users.xml"], - macros={ - "cluster": name, - "shard": "the_only_shard", - "replica": "the_only_replica", - }, - with_zookeeper=True, - ) - - cluster.start() - yield cluster - - finally: - cluster.shutdown() - - -class TaskTrivial: - def __init__(self, cluster): - self.cluster = cluster - self.zk_task_path = "/clickhouse-copier/task_trivial" - self.copier_task_config = open( - os.path.join(CURRENT_TEST_DIR, "task_trivial.xml"), "r" - ).read() - - def start(self): - source = cluster.instances["first_trivial"] - destination = cluster.instances["second_trivial"] - - for node in [source, destination]: - node.query("DROP DATABASE IF EXISTS default") - node.query("CREATE DATABASE IF NOT EXISTS default") - - source.query( - "CREATE TABLE trivial (d UInt64, d1 UInt64 MATERIALIZED d+1)" - "ENGINE=ReplicatedMergeTree('/clickhouse/tables/source_trivial_cluster/1/trivial/{}', '1') " - "PARTITION BY d % 5 ORDER BY (d, sipHash64(d)) SAMPLE BY sipHash64(d) SETTINGS index_granularity = 16".format( - generateRandomString(10) - ) - ) - - source.query( - "INSERT INTO trivial SELECT * FROM system.numbers LIMIT 1002", - settings={"distributed_foreground_insert": 1}, - ) - - def check(self): - zk = cluster.get_kazoo_client("zoo1") - status_data, _ = zk.get(self.zk_task_path + "/status") - assert ( - status_data - == b'{"hits":{"all_partitions_count":5,"processed_partitions_count":5}}' - ) - - source = cluster.instances["first_trivial"] - destination = cluster.instances["second_trivial"] - - assert TSV(source.query("SELECT count() FROM trivial")) == TSV("1002\n") - assert TSV(destination.query("SELECT count() FROM trivial")) == TSV("1002\n") - - for node in [source, destination]: - node.query("DROP TABLE trivial") - - -class TaskReplicatedWithoutArguments: - def __init__(self, cluster): - self.cluster = cluster - self.zk_task_path = "/clickhouse-copier/task_trivial_without_arguments" - self.copier_task_config = open( - os.path.join(CURRENT_TEST_DIR, "task_trivial_without_arguments.xml"), "r" - ).read() - - def start(self): - source = cluster.instances["first_trivial"] - destination = cluster.instances["second_trivial"] - - for node in [source, destination]: - node.query("DROP DATABASE IF EXISTS default") - node.query("CREATE DATABASE IF NOT EXISTS default") - - source.query( - "CREATE TABLE trivial_without_arguments ON CLUSTER source_trivial_cluster (d UInt64, d1 UInt64 MATERIALIZED d+1) " - "ENGINE=ReplicatedMergeTree() " - "PARTITION BY d % 5 ORDER BY (d, sipHash64(d)) SAMPLE BY sipHash64(d) SETTINGS index_granularity = 16" - ) - - source.query( - "INSERT INTO trivial_without_arguments SELECT * FROM system.numbers LIMIT 1002", - settings={"distributed_foreground_insert": 1}, - ) - - def check(self): - zk = cluster.get_kazoo_client("zoo1") - status_data, _ = zk.get(self.zk_task_path + "/status") - assert ( - status_data - == b'{"hits":{"all_partitions_count":5,"processed_partitions_count":5}}' - ) - - source = cluster.instances["first_trivial"] - destination = cluster.instances["second_trivial"] - - assert TSV( - source.query("SELECT count() FROM trivial_without_arguments") - ) == TSV("1002\n") - assert TSV( - destination.query("SELECT count() FROM trivial_without_arguments") - ) == TSV("1002\n") - - for node in [source, destination]: - node.query("DROP TABLE trivial_without_arguments") - - -def execute_task(started_cluster, task, cmd_options): - task.start() - - zk = started_cluster.get_kazoo_client("zoo1") - print("Use ZooKeeper server: {}:{}".format(zk.hosts[0][0], zk.hosts[0][1])) - - try: - zk.delete("/clickhouse-copier", recursive=True) - except kazoo.exceptions.NoNodeError: - print("No node /clickhouse-copier. It is Ok in first test.") - - zk_task_path = task.zk_task_path - zk.ensure_path(zk_task_path) - zk.create(zk_task_path + "/description", task.copier_task_config.encode()) - - # Run cluster-copier processes on each node - docker_api = started_cluster.docker_client.api - copiers_exec_ids = [] - - cmd = [ - "/usr/bin/clickhouse", - "copier", - "--config", - "/etc/clickhouse-server/config-copier.xml", - "--task-path", - zk_task_path, - "--base-dir", - "/var/log/clickhouse-server/copier", - ] - cmd += cmd_options - - copiers = list(started_cluster.instances.keys()) - - for instance_name in copiers: - instance = started_cluster.instances[instance_name] - container = instance.get_docker_handle() - instance.copy_file_to_container( - os.path.join(CURRENT_TEST_DIR, "configs/config-copier.xml"), - "/etc/clickhouse-server/config-copier.xml", - ) - print("Copied copier config to {}".format(instance.name)) - exec_id = docker_api.exec_create(container.id, cmd, stderr=True) - output = docker_api.exec_start(exec_id).decode("utf8") - print(output) - copiers_exec_ids.append(exec_id) - print( - "Copier for {} ({}) has started".format(instance.name, instance.ip_address) - ) - - # Wait for copiers stopping and check their return codes - for exec_id, instance_name in zip(copiers_exec_ids, copiers): - instance = started_cluster.instances[instance_name] - while True: - res = docker_api.exec_inspect(exec_id) - if not res["Running"]: - break - time.sleep(0.5) - - assert res["ExitCode"] == 0, "Instance: {} ({}). Info: {}".format( - instance.name, instance.ip_address, repr(res) - ) - - try: - task.check() - finally: - zk.delete(zk_task_path, recursive=True) - - -# Tests - - -def test_trivial_copy(started_cluster): - execute_task(started_cluster, TaskTrivial(started_cluster), []) - - -def test_trivial_without_arguments(started_cluster): - execute_task(started_cluster, TaskReplicatedWithoutArguments(started_cluster), []) diff --git a/tests/integration/test_cluster_copier/test_two_nodes.py b/tests/integration/test_cluster_copier/test_two_nodes.py deleted file mode 100644 index 1bd3561f24f..00000000000 --- a/tests/integration/test_cluster_copier/test_two_nodes.py +++ /dev/null @@ -1,597 +0,0 @@ -import os -import sys -import time -import logging -import pytest - -from helpers.cluster import ClickHouseCluster -from helpers.test_tools import TSV - -import docker - -CURRENT_TEST_DIR = os.path.dirname(os.path.abspath(__file__)) -sys.path.insert(0, os.path.dirname(CURRENT_TEST_DIR)) - -cluster = ClickHouseCluster(__file__) - - -@pytest.fixture(scope="module") -def started_cluster(): - global cluster - try: - for name in ["first_of_two", "second_of_two"]: - instance = cluster.add_instance( - name, - main_configs=[ - "configs_two_nodes/conf.d/clusters.xml", - "configs_two_nodes/conf.d/ddl.xml", - "configs_two_nodes/conf.d/storage_configuration.xml", - ], - user_configs=["configs_two_nodes/users.xml"], - with_zookeeper=True, - ) - - cluster.start() - - for name in ["first_of_two", "second_of_two"]: - instance = cluster.instances[name] - instance.exec_in_container(["bash", "-c", "mkdir /jbod1"]) - instance.exec_in_container(["bash", "-c", "mkdir /jbod2"]) - instance.exec_in_container(["bash", "-c", "mkdir /external"]) - - yield cluster - - finally: - cluster.shutdown() - - -# Will copy table from `first` node to `second` -class TaskWithDifferentSchema: - def __init__(self, cluster): - self.cluster = cluster - self.zk_task_path = "/clickhouse-copier/task_with_different_schema" - self.container_task_file = "/task_with_different_schema.xml" - - for instance_name, _ in cluster.instances.items(): - instance = cluster.instances[instance_name] - instance.copy_file_to_container( - os.path.join(CURRENT_TEST_DIR, "./task_with_different_schema.xml"), - self.container_task_file, - ) - print( - "Copied task file to container of '{}' instance. Path {}".format( - instance_name, self.container_task_file - ) - ) - - def start(self): - first = cluster.instances["first_of_two"] - second = cluster.instances["second_of_two"] - - first.query("DROP DATABASE IF EXISTS db_different_schema SYNC") - second.query("DROP DATABASE IF EXISTS db_different_schema SYNC") - - first.query("CREATE DATABASE IF NOT EXISTS db_different_schema;") - first.query( - """CREATE TABLE db_different_schema.source - ( - Column1 String, - Column2 UInt32, - Column3 Date, - Column4 DateTime, - Column5 UInt16, - Column6 String, - Column7 String, - Column8 String, - Column9 String, - Column10 String, - Column11 String, - Column12 Decimal(3, 1), - Column13 DateTime, - Column14 UInt16 - ) - ENGINE = MergeTree() - PARTITION BY (toYYYYMMDD(Column3), Column3) - PRIMARY KEY (Column1, Column2, Column3, Column4, Column6, Column7, Column8, Column9) - ORDER BY (Column1, Column2, Column3, Column4, Column6, Column7, Column8, Column9) - SETTINGS index_granularity = 8192""" - ) - - first.query( - """INSERT INTO db_different_schema.source SELECT * FROM generateRandom( - 'Column1 String, Column2 UInt32, Column3 Date, Column4 DateTime, Column5 UInt16, - Column6 String, Column7 String, Column8 String, Column9 String, Column10 String, - Column11 String, Column12 Decimal(3, 1), Column13 DateTime, Column14 UInt16', 1, 10, 2) LIMIT 50;""" - ) - - second.query("CREATE DATABASE IF NOT EXISTS db_different_schema;") - second.query( - """CREATE TABLE db_different_schema.destination - ( - Column1 LowCardinality(String) CODEC(LZ4), - Column2 UInt32 CODEC(LZ4), - Column3 Date CODEC(DoubleDelta, LZ4), - Column4 DateTime CODEC(DoubleDelta, LZ4), - Column5 UInt16 CODEC(LZ4), - Column6 LowCardinality(String) CODEC(ZSTD), - Column7 LowCardinality(String) CODEC(ZSTD), - Column8 LowCardinality(String) CODEC(ZSTD), - Column9 LowCardinality(String) CODEC(ZSTD), - Column10 String CODEC(ZSTD(6)), - Column11 LowCardinality(String) CODEC(LZ4), - Column12 Decimal(3,1) CODEC(LZ4), - Column13 DateTime CODEC(DoubleDelta, LZ4), - Column14 UInt16 CODEC(LZ4) - ) ENGINE = MergeTree() - PARTITION BY toYYYYMMDD(Column3) - ORDER BY (Column9, Column1, Column2, Column3, Column4);""" - ) - - print("Preparation completed") - - def check(self): - first = cluster.instances["first_of_two"] - second = cluster.instances["second_of_two"] - - a = first.query("SELECT count() from db_different_schema.source") - b = second.query("SELECT count() from db_different_schema.destination") - assert a == b, "Count" - - a = TSV( - first.query( - """SELECT sipHash64(*) from db_different_schema.source - ORDER BY (Column1, Column2, Column3, Column4, Column5, Column6, Column7, Column8, Column9, Column10, Column11, Column12, Column13, Column14)""" - ) - ) - b = TSV( - second.query( - """SELECT sipHash64(*) from db_different_schema.destination - ORDER BY (Column1, Column2, Column3, Column4, Column5, Column6, Column7, Column8, Column9, Column10, Column11, Column12, Column13, Column14)""" - ) - ) - assert a == b, "Data" - - first.query("DROP DATABASE IF EXISTS db_different_schema SYNC") - second.query("DROP DATABASE IF EXISTS db_different_schema SYNC") - - -# Just simple copying, but table schema has TTL on columns -# Also table will have slightly different schema -class TaskTTL: - def __init__(self, cluster): - self.cluster = cluster - self.zk_task_path = "/clickhouse-copier/task_ttl_columns" - self.container_task_file = "/task_ttl_columns.xml" - - for instance_name, _ in cluster.instances.items(): - instance = cluster.instances[instance_name] - instance.copy_file_to_container( - os.path.join(CURRENT_TEST_DIR, "./task_ttl_columns.xml"), - self.container_task_file, - ) - print( - "Copied task file to container of '{}' instance. Path {}".format( - instance_name, self.container_task_file - ) - ) - - def start(self): - first = cluster.instances["first_of_two"] - second = cluster.instances["second_of_two"] - - first.query("DROP DATABASE IF EXISTS db_ttl_columns SYNC") - second.query("DROP DATABASE IF EXISTS db_ttl_columns SYNC") - - first.query("CREATE DATABASE IF NOT EXISTS db_ttl_columns;") - first.query( - """CREATE TABLE db_ttl_columns.source - ( - Column1 String, - Column2 UInt32, - Column3 Date, - Column4 DateTime, - Column5 UInt16, - Column6 String TTL now() + INTERVAL 1 MONTH, - Column7 Decimal(3, 1) TTL now() + INTERVAL 1 MONTH, - Column8 Tuple(Float64, Float64) TTL now() + INTERVAL 1 MONTH - ) - ENGINE = MergeTree() - PARTITION BY (toYYYYMMDD(Column3), Column3) - PRIMARY KEY (Column1, Column2, Column3) - ORDER BY (Column1, Column2, Column3) - SETTINGS index_granularity = 8192""" - ) - - first.query( - """INSERT INTO db_ttl_columns.source SELECT * FROM generateRandom( - 'Column1 String, Column2 UInt32, Column3 Date, Column4 DateTime, Column5 UInt16, - Column6 String, Column7 Decimal(3, 1), Column8 Tuple(Float64, Float64)', 1, 10, 2) LIMIT 50;""" - ) - - second.query("CREATE DATABASE IF NOT EXISTS db_ttl_columns;") - second.query( - """CREATE TABLE db_ttl_columns.destination - ( - Column1 String, - Column2 UInt32, - Column3 Date, - Column4 DateTime TTL now() + INTERVAL 1 MONTH, - Column5 UInt16 TTL now() + INTERVAL 1 MONTH, - Column6 String TTL now() + INTERVAL 1 MONTH, - Column7 Decimal(3, 1) TTL now() + INTERVAL 1 MONTH, - Column8 Tuple(Float64, Float64) - ) ENGINE = MergeTree() - PARTITION BY toYYYYMMDD(Column3) - ORDER BY (Column3, Column2, Column1);""" - ) - - print("Preparation completed") - - def check(self): - first = cluster.instances["first_of_two"] - second = cluster.instances["second_of_two"] - - a = first.query("SELECT count() from db_ttl_columns.source") - b = second.query("SELECT count() from db_ttl_columns.destination") - assert a == b, "Count" - - a = TSV( - first.query( - """SELECT sipHash64(*) from db_ttl_columns.source - ORDER BY (Column1, Column2, Column3, Column4, Column5, Column6, Column7, Column8)""" - ) - ) - b = TSV( - second.query( - """SELECT sipHash64(*) from db_ttl_columns.destination - ORDER BY (Column1, Column2, Column3, Column4, Column5, Column6, Column7, Column8)""" - ) - ) - assert a == b, "Data" - - first.query("DROP DATABASE IF EXISTS db_ttl_columns SYNC") - second.query("DROP DATABASE IF EXISTS db_ttl_columns SYNC") - - -class TaskSkipIndex: - def __init__(self, cluster): - self.cluster = cluster - self.zk_task_path = "/clickhouse-copier/task_skip_index" - self.container_task_file = "/task_skip_index.xml" - - for instance_name, _ in cluster.instances.items(): - instance = cluster.instances[instance_name] - instance.copy_file_to_container( - os.path.join(CURRENT_TEST_DIR, "./task_skip_index.xml"), - self.container_task_file, - ) - print( - "Copied task file to container of '{}' instance. Path {}".format( - instance_name, self.container_task_file - ) - ) - - def start(self): - first = cluster.instances["first_of_two"] - second = cluster.instances["second_of_two"] - - first.query("DROP DATABASE IF EXISTS db_skip_index SYNC") - second.query("DROP DATABASE IF EXISTS db_skip_index SYNC") - - first.query("CREATE DATABASE IF NOT EXISTS db_skip_index;") - first.query( - """CREATE TABLE db_skip_index.source - ( - Column1 UInt64, - Column2 Int32, - Column3 Date, - Column4 DateTime, - Column5 String, - INDEX a (Column1 * Column2, Column5) TYPE minmax GRANULARITY 3, - INDEX b (Column1 * length(Column5)) TYPE set(1000) GRANULARITY 4 - ) - ENGINE = MergeTree() - PARTITION BY (toYYYYMMDD(Column3), Column3) - PRIMARY KEY (Column1, Column2, Column3) - ORDER BY (Column1, Column2, Column3) - SETTINGS index_granularity = 8192""" - ) - - first.query( - """INSERT INTO db_skip_index.source SELECT * FROM generateRandom( - 'Column1 UInt64, Column2 Int32, Column3 Date, Column4 DateTime, Column5 String', 1, 10, 2) LIMIT 100;""" - ) - - second.query("CREATE DATABASE IF NOT EXISTS db_skip_index;") - second.query( - """CREATE TABLE db_skip_index.destination - ( - Column1 UInt64, - Column2 Int32, - Column3 Date, - Column4 DateTime, - Column5 String, - INDEX a (Column1 * Column2, Column5) TYPE minmax GRANULARITY 3, - INDEX b (Column1 * length(Column5)) TYPE set(1000) GRANULARITY 4 - ) ENGINE = MergeTree() - PARTITION BY toYYYYMMDD(Column3) - ORDER BY (Column3, Column2, Column1);""" - ) - - print("Preparation completed") - - def check(self): - first = cluster.instances["first_of_two"] - second = cluster.instances["second_of_two"] - - a = first.query("SELECT count() from db_skip_index.source") - b = second.query("SELECT count() from db_skip_index.destination") - assert a == b, "Count" - - a = TSV( - first.query( - """SELECT sipHash64(*) from db_skip_index.source - ORDER BY (Column1, Column2, Column3, Column4, Column5)""" - ) - ) - b = TSV( - second.query( - """SELECT sipHash64(*) from db_skip_index.destination - ORDER BY (Column1, Column2, Column3, Column4, Column5)""" - ) - ) - assert a == b, "Data" - - first.query("DROP DATABASE IF EXISTS db_skip_index SYNC") - second.query("DROP DATABASE IF EXISTS db_skip_index SYNC") - - -class TaskTTLMoveToVolume: - def __init__(self, cluster): - self.cluster = cluster - self.zk_task_path = "/clickhouse-copier/task_ttl_move_to_volume" - self.container_task_file = "/task_ttl_move_to_volume.xml" - - for instance_name, _ in cluster.instances.items(): - instance = cluster.instances[instance_name] - instance.copy_file_to_container( - os.path.join(CURRENT_TEST_DIR, "./task_ttl_move_to_volume.xml"), - self.container_task_file, - ) - print( - "Copied task file to container of '{}' instance. Path {}".format( - instance_name, self.container_task_file - ) - ) - - def start(self): - first = cluster.instances["first_of_two"] - second = cluster.instances["first_of_two"] - - first.query("DROP DATABASE IF EXISTS db_move_to_volume SYNC") - second.query("DROP DATABASE IF EXISTS db_move_to_volume SYNC") - - first.query("CREATE DATABASE IF NOT EXISTS db_move_to_volume;") - first.query( - """CREATE TABLE db_move_to_volume.source - ( - Column1 UInt64, - Column2 Int32, - Column3 Date, - Column4 DateTime, - Column5 String - ) - ENGINE = MergeTree() - PARTITION BY (toYYYYMMDD(Column3), Column3) - PRIMARY KEY (Column1, Column2, Column3) - ORDER BY (Column1, Column2, Column3) - TTL Column3 + INTERVAL 1 MONTH TO VOLUME 'external' - SETTINGS storage_policy = 'external_with_jbods';""" - ) - - first.query( - """INSERT INTO db_move_to_volume.source SELECT * FROM generateRandom( - 'Column1 UInt64, Column2 Int32, Column3 Date, Column4 DateTime, Column5 String', 1, 10, 2) LIMIT 100;""" - ) - - second.query("CREATE DATABASE IF NOT EXISTS db_move_to_volume;") - second.query( - """CREATE TABLE db_move_to_volume.destination - ( - Column1 UInt64, - Column2 Int32, - Column3 Date, - Column4 DateTime, - Column5 String - ) ENGINE = MergeTree() - PARTITION BY toYYYYMMDD(Column3) - ORDER BY (Column3, Column2, Column1) - TTL Column3 + INTERVAL 1 MONTH TO VOLUME 'external' - SETTINGS storage_policy = 'external_with_jbods';""" - ) - - print("Preparation completed") - - def check(self): - first = cluster.instances["first_of_two"] - second = cluster.instances["second_of_two"] - - a = first.query("SELECT count() from db_move_to_volume.source") - b = second.query("SELECT count() from db_move_to_volume.destination") - assert a == b, "Count" - - a = TSV( - first.query( - """SELECT sipHash64(*) from db_move_to_volume.source - ORDER BY (Column1, Column2, Column3, Column4, Column5)""" - ) - ) - b = TSV( - second.query( - """SELECT sipHash64(*) from db_move_to_volume.destination - ORDER BY (Column1, Column2, Column3, Column4, Column5)""" - ) - ) - assert a == b, "Data" - - first.query("DROP DATABASE IF EXISTS db_move_to_volume SYNC") - second.query("DROP DATABASE IF EXISTS db_move_to_volume SYNC") - - -class TaskDropTargetPartition: - def __init__(self, cluster): - self.cluster = cluster - self.zk_task_path = "/clickhouse-copier/task_drop_target_partition" - self.container_task_file = "/task_drop_target_partition.xml" - - for instance_name, _ in cluster.instances.items(): - instance = cluster.instances[instance_name] - instance.copy_file_to_container( - os.path.join(CURRENT_TEST_DIR, "./task_drop_target_partition.xml"), - self.container_task_file, - ) - print( - "Copied task file to container of '{}' instance. Path {}".format( - instance_name, self.container_task_file - ) - ) - - def start(self): - first = cluster.instances["first_of_two"] - second = cluster.instances["second_of_two"] - - first.query("DROP DATABASE IF EXISTS db_drop_target_partition SYNC") - second.query("DROP DATABASE IF EXISTS db_drop_target_partition SYNC") - - first.query("CREATE DATABASE IF NOT EXISTS db_drop_target_partition;") - first.query( - """CREATE TABLE db_drop_target_partition.source - ( - Column1 UInt64, - Column2 Int32, - Column3 Date, - Column4 DateTime, - Column5 String - ) - ENGINE = MergeTree() - PARTITION BY (toYYYYMMDD(Column3), Column3) - PRIMARY KEY (Column1, Column2, Column3) - ORDER BY (Column1, Column2, Column3);""" - ) - - first.query( - """INSERT INTO db_drop_target_partition.source SELECT * FROM generateRandom( - 'Column1 UInt64, Column2 Int32, Column3 Date, Column4 DateTime, Column5 String', 1, 10, 2) LIMIT 100;""" - ) - - second.query("CREATE DATABASE IF NOT EXISTS db_drop_target_partition;") - second.query( - """CREATE TABLE db_drop_target_partition.destination - ( - Column1 UInt64, - Column2 Int32, - Column3 Date, - Column4 DateTime, - Column5 String - ) ENGINE = MergeTree() - PARTITION BY toYYYYMMDD(Column3) - ORDER BY (Column3, Column2, Column1);""" - ) - - # Insert data in target too. It has to be dropped. - first.query( - """INSERT INTO db_drop_target_partition.destination SELECT * FROM db_drop_target_partition.source;""" - ) - - print("Preparation completed") - - def check(self): - first = cluster.instances["first_of_two"] - second = cluster.instances["second_of_two"] - - a = first.query("SELECT count() from db_drop_target_partition.source") - b = second.query("SELECT count() from db_drop_target_partition.destination") - assert a == b, "Count" - - a = TSV( - first.query( - """SELECT sipHash64(*) from db_drop_target_partition.source - ORDER BY (Column1, Column2, Column3, Column4, Column5)""" - ) - ) - b = TSV( - second.query( - """SELECT sipHash64(*) from db_drop_target_partition.destination - ORDER BY (Column1, Column2, Column3, Column4, Column5)""" - ) - ) - assert a == b, "Data" - - first.query("DROP DATABASE IF EXISTS db_drop_target_partition SYNC") - second.query("DROP DATABASE IF EXISTS db_drop_target_partition SYNC") - - -def execute_task(started_cluster, task, cmd_options): - task.start() - - zk = started_cluster.get_kazoo_client("zoo1") - print("Use ZooKeeper server: {}:{}".format(zk.hosts[0][0], zk.hosts[0][1])) - - # Run cluster-copier processes on each node - docker_api = started_cluster.docker_client.api - copiers_exec_ids = [] - - cmd = [ - "/usr/bin/clickhouse", - "copier", - "--config", - "/etc/clickhouse-server/config-copier.xml", - "--task-path", - task.zk_task_path, - "--task-file", - task.container_task_file, - "--task-upload-force", - "true", - "--base-dir", - "/var/log/clickhouse-server/copier", - ] - cmd += cmd_options - - print(cmd) - - for instance_name in started_cluster.instances.keys(): - instance = started_cluster.instances[instance_name] - container = instance.get_docker_handle() - instance.copy_file_to_container( - os.path.join(CURRENT_TEST_DIR, "configs_two_nodes/config-copier.xml"), - "/etc/clickhouse-server/config-copier.xml", - ) - logging.info("Copied copier config to {}".format(instance.name)) - exec_id = docker_api.exec_create(container.id, cmd, stderr=True) - output = docker_api.exec_start(exec_id).decode("utf8") - logging.info(output) - copiers_exec_ids.append(exec_id) - logging.info( - "Copier for {} ({}) has started".format(instance.name, instance.ip_address) - ) - - # time.sleep(1000) - - # Wait for copiers stopping and check their return codes - for exec_id, instance in zip( - copiers_exec_ids, iter(started_cluster.instances.values()) - ): - while True: - res = docker_api.exec_inspect(exec_id) - if not res["Running"]: - break - time.sleep(1) - - assert res["ExitCode"] == 0, "Instance: {} ({}). Info: {}".format( - instance.name, instance.ip_address, repr(res) - ) - - try: - task.check() - finally: - zk.delete(task.zk_task_path, recursive=True) diff --git a/tests/integration/test_compression_codec_read/test.py b/tests/integration/test_compression_codec_read/test.py deleted file mode 100644 index b39e5147d38..00000000000 --- a/tests/integration/test_compression_codec_read/test.py +++ /dev/null @@ -1,60 +0,0 @@ -import pytest - -from helpers.cluster import ClickHouseCluster -from helpers.test_tools import assert_eq_with_retry - -cluster = ClickHouseCluster(__file__) - -node1 = cluster.add_instance( - "node1", - image="yandex/clickhouse-server", - tag="20.8.11.17", - with_installed_binary=True, - stay_alive=True, - allow_analyzer=False, -) - - -@pytest.fixture(scope="module") -def start_cluster(): - try: - cluster.start() - - yield cluster - finally: - cluster.shutdown() - - -def test_default_codec_read(start_cluster): - node1.query("DROP TABLE IF EXISTS test_18340") - - node1.query( - """ - CREATE TABLE test_18340 - ( - `lns` LowCardinality(Nullable(String)), - `ns` Nullable(String), - `s` String, - `ni64` Nullable(Int64), - `ui64` UInt64, - `alns` Array(LowCardinality(Nullable(String))), - `ans` Array(Nullable(String)), - `dt` DateTime, - `i32` Int32 - ) - ENGINE = MergeTree() - PARTITION BY i32 - ORDER BY (s, farmHash64(s)) - SAMPLE BY farmHash64(s) - """ - ) - - node1.query( - "insert into test_18340 values ('test', 'test', 'test', 0, 0, ['a'], ['a'], now(), 0)" - ) - - assert node1.query("SELECT COUNT() FROM test_18340") == "1\n" - - node1.restart_with_latest_version() - - assert node1.query("SELECT COUNT() FROM test_18340") == "1\n" diff --git a/tests/integration/test_config_xml_full/configs/config.xml b/tests/integration/test_config_xml_full/configs/config.xml index ac59b3428e8..628e1432350 100644 --- a/tests/integration/test_config_xml_full/configs/config.xml +++ b/tests/integration/test_config_xml_full/configs/config.xml @@ -72,7 +72,7 @@ 8123 - - 600 + + 600 consumer_hang diff --git a/tests/integration/test_storage_kerberized_kafka/kerberos_image_config.sh b/tests/integration/test_storage_kerberized_kafka/kerberos_image_config.sh index 07437c42359..1c7419a05e7 100644 --- a/tests/integration/test_storage_kerberized_kafka/kerberos_image_config.sh +++ b/tests/integration/test_storage_kerberized_kafka/kerberos_image_config.sh @@ -105,6 +105,9 @@ create_keytabs() { kadmin.local -q "addprinc -randkey kafkauser/instance@${REALM}" kadmin.local -q "ktadd -norandkey -k /tmp/keytab/clickhouse.keytab kafkauser/instance@${REALM}" + kadmin.local -q "addprinc -randkey anotherkafkauser/instance@${REALM}" + kadmin.local -q "ktadd -norandkey -k /tmp/keytab/clickhouse.keytab anotherkafkauser/instance@${REALM}" + chmod g+r /tmp/keytab/clickhouse.keytab } diff --git a/tests/integration/test_storage_kerberized_kafka/test.py b/tests/integration/test_storage_kerberized_kafka/test.py index 7856361deda..451e1ab2ccf 100644 --- a/tests/integration/test_storage_kerberized_kafka/test.py +++ b/tests/integration/test_storage_kerberized_kafka/test.py @@ -227,6 +227,58 @@ def test_kafka_json_as_string_no_kdc(kafka_cluster): assert instance.contains_in_log("KerberosInit failure:") +def test_kafka_config_from_sql_named_collection(kafka_cluster): + kafka_produce( + kafka_cluster, + "kafka_json_as_string", + [ + '{"t": 123, "e": {"x": "woof"} }', + "", + '{"t": 124, "e": {"x": "test"} }', + '{"F1":"V1","F2":{"F21":"V21","F22":{},"F23":"V23","F24":"2019-12-24T16:28:04"},"F3":"V3"}', + ], + ) + + instance.query( + """ + CREATE NAMED COLLECTION kafka_config AS + kafka.security_protocol = 'SASL_PLAINTEXT', + kafka.sasl_mechanism = 'GSSAPI', + kafka.sasl_kerberos_service_name = 'kafka', + kafka.sasl_kerberos_keytab = '/tmp/keytab/clickhouse.keytab', + kafka.sasl_kerberos_principal = 'anotherkafkauser/instance@TEST.CLICKHOUSE.TECH', + kafka.debug = 'security', + kafka.api_version_request = 'false', + + kafka_broker_list = 'kerberized_kafka1:19092', + kafka_topic_list = 'kafka_json_as_string', + kafka_commit_on_select = 1, + kafka_group_name = 'kafka_json_as_string', + kafka_format = 'JSONAsString', + kafka_flush_interval_ms=1000; + """ + ) + instance.query( + """ + CREATE TABLE test.kafka (field String) + ENGINE = Kafka(kafka_config); + """ + ) + + time.sleep(3) + + result = instance.query("SELECT * FROM test.kafka;") + expected = """\ +{"t": 123, "e": {"x": "woof"} } +{"t": 124, "e": {"x": "test"} } +{"F1":"V1","F2":{"F21":"V21","F22":{},"F23":"V23","F24":"2019-12-24T16:28:04"},"F3":"V3"} +""" + assert TSV(result) == TSV(expected) + assert instance.contains_in_log( + "Parsing of message (topic: kafka_json_as_string, partition: 0, offset: 1) return no rows" + ) + + if __name__ == "__main__": cluster.start() input("Cluster created, press any key to destroy...") diff --git a/tests/integration/test_storage_mysql/configs/named_collections.xml b/tests/integration/test_storage_mysql/configs/named_collections.xml index 04117f32d4b..d20630eaeb3 100644 --- a/tests/integration/test_storage_mysql/configs/named_collections.xml +++ b/tests/integration/test_storage_mysql/configs/named_collections.xml @@ -3,7 +3,7 @@ root clickhouse - mysql57 + mysql80 3306 clickhouse test_table
@@ -16,7 +16,7 @@ root clickhouse - mysql57 + mysql80 1111 clickhouse test_table
@@ -24,7 +24,7 @@ root clickhouse - mysql57 + mysql80 3306 clickhouse test_table
@@ -33,7 +33,7 @@ root clickhouse - mysql57 + mysql80 3306 clickhouse 1 diff --git a/tests/integration/test_storage_mysql/test.py b/tests/integration/test_storage_mysql/test.py index e2257026dc7..b131d8fe51c 100644 --- a/tests/integration/test_storage_mysql/test.py +++ b/tests/integration/test_storage_mysql/test.py @@ -14,7 +14,7 @@ node1 = cluster.add_instance( "node1", main_configs=["configs/remote_servers.xml", "configs/named_collections.xml"], user_configs=["configs/users.xml"], - with_mysql=True, + with_mysql8=True, ) node2 = cluster.add_instance( "node2", main_configs=["configs/remote_servers.xml"], with_mysql_cluster=True @@ -23,7 +23,7 @@ node3 = cluster.add_instance( "node3", main_configs=["configs/remote_servers.xml"], user_configs=["configs/users.xml"], - with_mysql=True, + with_mysql8=True, ) create_table_sql_template = """ @@ -43,7 +43,7 @@ drop_table_sql_template = """ def get_mysql_conn(started_cluster, host): conn = pymysql.connect( - user="root", password="clickhouse", host=host, port=started_cluster.mysql_port + user="root", password="clickhouse", host=host, port=started_cluster.mysql8_port ) return conn @@ -69,7 +69,7 @@ def started_cluster(): try: cluster.start() - conn = get_mysql_conn(cluster, cluster.mysql_ip) + conn = get_mysql_conn(cluster, cluster.mysql8_ip) create_mysql_db(conn, "clickhouse") ## create mysql db and table @@ -85,13 +85,13 @@ def test_many_connections(started_cluster): table_name = "test_many_connections" node1.query(f"DROP TABLE IF EXISTS {table_name}") - conn = get_mysql_conn(started_cluster, cluster.mysql_ip) + conn = get_mysql_conn(started_cluster, cluster.mysql8_ip) drop_mysql_table(conn, table_name) create_mysql_table(conn, table_name) node1.query( """ -CREATE TABLE {}(id UInt32, name String, age UInt32, money UInt32) ENGINE = MySQL('mysql57:3306', 'clickhouse', '{}', 'root', 'clickhouse'); +CREATE TABLE {}(id UInt32, name String, age UInt32, money UInt32) ENGINE = MySQL('mysql80:3306', 'clickhouse', '{}', 'root', 'clickhouse'); """.format( table_name, table_name ) @@ -116,13 +116,13 @@ CREATE TABLE {}(id UInt32, name String, age UInt32, money UInt32) ENGINE = MySQL def test_insert_select(started_cluster): table_name = "test_insert_select" node1.query(f"DROP TABLE IF EXISTS {table_name}") - conn = get_mysql_conn(started_cluster, cluster.mysql_ip) + conn = get_mysql_conn(started_cluster, cluster.mysql8_ip) drop_mysql_table(conn, table_name) create_mysql_table(conn, table_name) node1.query( """ -CREATE TABLE {}(id UInt32, name String, age UInt32, money UInt32) ENGINE = MySQL('mysql57:3306', 'clickhouse', '{}', 'root', 'clickhouse'); +CREATE TABLE {}(id UInt32, name String, age UInt32, money UInt32) ENGINE = MySQL('mysql80:3306', 'clickhouse', '{}', 'root', 'clickhouse'); """.format( table_name, table_name ) @@ -142,13 +142,13 @@ CREATE TABLE {}(id UInt32, name String, age UInt32, money UInt32) ENGINE = MySQL def test_replace_select(started_cluster): table_name = "test_replace_select" node1.query(f"DROP TABLE IF EXISTS {table_name}") - conn = get_mysql_conn(started_cluster, cluster.mysql_ip) + conn = get_mysql_conn(started_cluster, cluster.mysql8_ip) drop_mysql_table(conn, table_name) create_mysql_table(conn, table_name) node1.query( """ -CREATE TABLE {}(id UInt32, name String, age UInt32, money UInt32) ENGINE = MySQL('mysql57:3306', 'clickhouse', '{}', 'root', 'clickhouse', 1); +CREATE TABLE {}(id UInt32, name String, age UInt32, money UInt32) ENGINE = MySQL('mysql80:3306', 'clickhouse', '{}', 'root', 'clickhouse', 1); """.format( table_name, table_name ) @@ -173,13 +173,13 @@ CREATE TABLE {}(id UInt32, name String, age UInt32, money UInt32) ENGINE = MySQL def test_insert_on_duplicate_select(started_cluster): table_name = "test_insert_on_duplicate_select" node1.query(f"DROP TABLE IF EXISTS {table_name}") - conn = get_mysql_conn(started_cluster, cluster.mysql_ip) + conn = get_mysql_conn(started_cluster, cluster.mysql8_ip) drop_mysql_table(conn, table_name) create_mysql_table(conn, table_name) node1.query( """ -CREATE TABLE {}(id UInt32, name String, age UInt32, money UInt32) ENGINE = MySQL('mysql57:3306', 'clickhouse', '{}', 'root', 'clickhouse', 0, 'update money = money + values(money)'); +CREATE TABLE {}(id UInt32, name String, age UInt32, money UInt32) ENGINE = MySQL('mysql80:3306', 'clickhouse', '{}', 'root', 'clickhouse', 0, 'update money = money + values(money)'); """.format( table_name, table_name ) @@ -205,12 +205,12 @@ def test_where(started_cluster): table_name = "test_where" node1.query(f"DROP TABLE IF EXISTS {table_name}") - conn = get_mysql_conn(started_cluster, cluster.mysql_ip) + conn = get_mysql_conn(started_cluster, cluster.mysql8_ip) drop_mysql_table(conn, table_name) create_mysql_table(conn, table_name) node1.query( """ -CREATE TABLE {}(id UInt32, name String, age UInt32, money UInt32) ENGINE = MySQL('mysql57:3306', 'clickhouse', '{}', 'root', 'clickhouse'); +CREATE TABLE {}(id UInt32, name String, age UInt32, money UInt32) ENGINE = MySQL('mysql80:3306', 'clickhouse', '{}', 'root', 'clickhouse'); """.format( table_name, table_name ) @@ -264,11 +264,11 @@ CREATE TABLE {}(id UInt32, name String, age UInt32, money UInt32) ENGINE = MySQL def test_table_function(started_cluster): - conn = get_mysql_conn(started_cluster, cluster.mysql_ip) + conn = get_mysql_conn(started_cluster, cluster.mysql8_ip) drop_mysql_table(conn, "table_function") create_mysql_table(conn, "table_function") table_function = ( - "mysql('mysql57:3306', 'clickhouse', '{}', 'root', 'clickhouse')".format( + "mysql('mysql80:3306', 'clickhouse', '{}', 'root', 'clickhouse')".format( "table_function" ) ) @@ -309,7 +309,7 @@ def test_table_function(started_cluster): def test_schema_inference(started_cluster): - conn = get_mysql_conn(started_cluster, cluster.mysql_ip) + conn = get_mysql_conn(started_cluster, cluster.mysql8_ip) drop_mysql_table(conn, "inference_table") with conn.cursor() as cursor: @@ -317,7 +317,7 @@ def test_schema_inference(started_cluster): "CREATE TABLE clickhouse.inference_table (id INT PRIMARY KEY, data BINARY(16) NOT NULL)" ) - parameters = "'mysql57:3306', 'clickhouse', 'inference_table', 'root', 'clickhouse'" + parameters = "'mysql80:3306', 'clickhouse', 'inference_table', 'root', 'clickhouse'" node1.query( f"CREATE TABLE mysql_schema_inference_engine ENGINE=MySQL({parameters})" @@ -335,7 +335,7 @@ def test_schema_inference(started_cluster): def test_binary_type(started_cluster): - conn = get_mysql_conn(started_cluster, cluster.mysql_ip) + conn = get_mysql_conn(started_cluster, cluster.mysql8_ip) drop_mysql_table(conn, "binary_type") with conn.cursor() as cursor: @@ -343,7 +343,7 @@ def test_binary_type(started_cluster): "CREATE TABLE clickhouse.binary_type (id INT PRIMARY KEY, data BINARY(16) NOT NULL)" ) table_function = ( - "mysql('mysql57:3306', 'clickhouse', '{}', 'root', 'clickhouse')".format( + "mysql('mysql80:3306', 'clickhouse', '{}', 'root', 'clickhouse')".format( "binary_type" ) ) @@ -363,12 +363,12 @@ def test_enum_type(started_cluster): table_name = "test_enum_type" node1.query(f"DROP TABLE IF EXISTS {table_name}") - conn = get_mysql_conn(started_cluster, cluster.mysql_ip) + conn = get_mysql_conn(started_cluster, cluster.mysql8_ip) drop_mysql_table(conn, table_name) create_mysql_table(conn, table_name) node1.query( """ -CREATE TABLE {}(id UInt32, name String, age UInt32, money UInt32, source Enum8('IP' = 1, 'URL' = 2)) ENGINE = MySQL('mysql57:3306', 'clickhouse', '{}', 'root', 'clickhouse', 1); +CREATE TABLE {}(id UInt32, name String, age UInt32, money UInt32, source Enum8('IP' = 1, 'URL' = 2)) ENGINE = MySQL('mysql80:3306', 'clickhouse', '{}', 'root', 'clickhouse', 1); """.format( table_name, table_name ) @@ -388,7 +388,7 @@ CREATE TABLE {}(id UInt32, name String, age UInt32, money UInt32, source Enum8(' def test_mysql_distributed(started_cluster): table_name = "test_replicas" - conn1 = get_mysql_conn(started_cluster, started_cluster.mysql_ip) + conn1 = get_mysql_conn(started_cluster, started_cluster.mysql8_ip) conn2 = get_mysql_conn(started_cluster, started_cluster.mysql2_ip) conn3 = get_mysql_conn(started_cluster, started_cluster.mysql3_ip) conn4 = get_mysql_conn(started_cluster, started_cluster.mysql4_ip) @@ -422,7 +422,7 @@ def test_mysql_distributed(started_cluster): CREATE TABLE test_replica{} (id UInt32, name String, age UInt32, money UInt32) ENGINE = MySQL('mysql{}:3306', 'clickhouse', 'test_replicas', 'root', 'clickhouse');""".format( - i, 57 if i == 1 else i + i, 80 if i == 1 else i ) ) nodes[i - 1].query( @@ -433,11 +433,11 @@ def test_mysql_distributed(started_cluster): # test multiple ports parsing result = node2.query( - """SELECT DISTINCT(name) FROM mysql('mysql{57|2|3}:3306', 'clickhouse', 'test_replicas', 'root', 'clickhouse'); """ + """SELECT DISTINCT(name) FROM mysql('mysql{80|2|3}:3306', 'clickhouse', 'test_replicas', 'root', 'clickhouse'); """ ) assert result == "host1\n" or result == "host2\n" or result == "host3\n" result = node2.query( - """SELECT DISTINCT(name) FROM mysql('mysql57:3306|mysql2:3306|mysql3:3306', 'clickhouse', 'test_replicas', 'root', 'clickhouse'); """ + """SELECT DISTINCT(name) FROM mysql('mysql80:3306|mysql2:3306|mysql3:3306', 'clickhouse', 'test_replicas', 'root', 'clickhouse'); """ ) assert result == "host1\n" or result == "host2\n" or result == "host3\n" @@ -457,7 +457,7 @@ def test_mysql_distributed(started_cluster): """ CREATE TABLE test_shards (id UInt32, name String, age UInt32, money UInt32) - ENGINE = ExternalDistributed('MySQL', 'mysql{57|2}:3306,mysql{3|4}:3306', 'clickhouse', 'test_replicas', 'root', 'clickhouse'); """ + ENGINE = ExternalDistributed('MySQL', 'mysql{80|2}:3306,mysql{3|4}:3306', 'clickhouse', 'test_replicas', 'root', 'clickhouse'); """ ) # Check only one replica in each shard is used @@ -472,24 +472,24 @@ def test_mysql_distributed(started_cluster): result = node2.query(query) assert result == "host1\nhost2\nhost3\nhost4\n" - # disconnect mysql57 - started_cluster.pause_container("mysql57") + # disconnect mysql + started_cluster.pause_container("mysql80") result = node2.query("SELECT DISTINCT(name) FROM test_shards ORDER BY name") - started_cluster.unpause_container("mysql57") + started_cluster.unpause_container("mysql80") assert result == "host2\nhost4\n" or result == "host3\nhost4\n" def test_external_settings(started_cluster): table_name = "test_external_settings" node1.query(f"DROP TABLE IF EXISTS {table_name}") - conn = get_mysql_conn(started_cluster, started_cluster.mysql_ip) + conn = get_mysql_conn(started_cluster, started_cluster.mysql8_ip) drop_mysql_table(conn, table_name) create_mysql_table(conn, table_name) node3.query(f"DROP TABLE IF EXISTS {table_name}") node3.query( """ -CREATE TABLE {}(id UInt32, name String, age UInt32, money UInt32) ENGINE = MySQL('mysql57:3306', 'clickhouse', '{}', 'root', 'clickhouse'); +CREATE TABLE {}(id UInt32, name String, age UInt32, money UInt32) ENGINE = MySQL('mysql80:3306', 'clickhouse', '{}', 'root', 'clickhouse'); """.format( table_name, table_name ) @@ -521,7 +521,7 @@ def test_settings_connection_wait_timeout(started_cluster): node1.query(f"DROP TABLE IF EXISTS {table_name}") wait_timeout = 2 - conn = get_mysql_conn(started_cluster, cluster.mysql_ip) + conn = get_mysql_conn(started_cluster, cluster.mysql8_ip) drop_mysql_table(conn, table_name) create_mysql_table(conn, table_name) @@ -534,7 +534,7 @@ def test_settings_connection_wait_timeout(started_cluster): age UInt32, money UInt32 ) - ENGINE = MySQL('mysql57:3306', 'clickhouse', '{}', 'root', 'clickhouse') + ENGINE = MySQL('mysql80:3306', 'clickhouse', '{}', 'root', 'clickhouse') SETTINGS connection_wait_timeout={}, connection_pool_size=1 """.format( table_name, table_name, wait_timeout @@ -584,7 +584,7 @@ def test_settings_connection_wait_timeout(started_cluster): def test_predefined_connection_configuration(started_cluster): - conn = get_mysql_conn(started_cluster, started_cluster.mysql_ip) + conn = get_mysql_conn(started_cluster, started_cluster.mysql8_ip) table_name = "test_table" drop_mysql_table(conn, table_name) create_mysql_table(conn, table_name) @@ -655,7 +655,7 @@ def test_predefined_connection_configuration(started_cluster): assert node1.query(f"SELECT count() FROM test_table").rstrip() == "100" assert "Connection pool cannot have zero size" in node1.query_and_get_error( - "SELECT count() FROM mysql(mysql1, table='test_table', connection_pool_size=0)" + "SELECT count() FROM mysql(mysql1, `table`='test_table', connection_pool_size=0)" ) assert "Connection pool cannot have zero size" in node1.query_and_get_error( "SELECT count() FROM mysql(mysql4)" @@ -671,7 +671,7 @@ def test_mysql_in(started_cluster): table_name = "test_mysql_in" node1.query(f"DROP TABLE IF EXISTS {table_name}") - conn = get_mysql_conn(started_cluster, cluster.mysql_ip) + conn = get_mysql_conn(started_cluster, cluster.mysql8_ip) drop_mysql_table(conn, table_name) create_mysql_table(conn, table_name) @@ -684,7 +684,7 @@ def test_mysql_in(started_cluster): age UInt32, money UInt32 ) - ENGINE = MySQL('mysql57:3306', 'clickhouse', '{}', 'root', 'clickhouse') + ENGINE = MySQL('mysql80:3306', 'clickhouse', '{}', 'root', 'clickhouse') """.format( table_name, table_name ) @@ -714,7 +714,7 @@ def test_mysql_null(started_cluster): table_name = "test_mysql_in" node1.query(f"DROP TABLE IF EXISTS {table_name}") - conn = get_mysql_conn(started_cluster, cluster.mysql_ip) + conn = get_mysql_conn(started_cluster, cluster.mysql8_ip) drop_mysql_table(conn, table_name) with conn.cursor() as cursor: cursor.execute( @@ -735,7 +735,7 @@ def test_mysql_null(started_cluster): id UInt32, money Nullable(UInt32) ) - ENGINE = MySQL('mysql57:3306', 'clickhouse', '{}', 'root', 'clickhouse') + ENGINE = MySQL('mysql80:3306', 'clickhouse', '{}', 'root', 'clickhouse') """.format( table_name, table_name ) @@ -780,7 +780,7 @@ def test_settings(started_cluster): connect_timeout = 10123002 connection_pool_size = 1 - conn = get_mysql_conn(started_cluster, cluster.mysql_ip) + conn = get_mysql_conn(started_cluster, cluster.mysql8_ip) drop_mysql_table(conn, table_name) create_mysql_table(conn, table_name) @@ -793,7 +793,7 @@ def test_settings(started_cluster): age UInt32, money UInt32 ) - ENGINE = MySQL('mysql57:3306', 'clickhouse', '{table_name}', 'root', 'clickhouse') + ENGINE = MySQL('mysql80:3306', 'clickhouse', '{table_name}', 'root', 'clickhouse') SETTINGS connection_wait_timeout={wait_timeout}, connect_timeout={connect_timeout}, read_write_timeout={rw_timeout}, connection_pool_size={connection_pool_size} """ ) @@ -815,7 +815,7 @@ def test_settings(started_cluster): node1.query( f""" SELECT * - FROM mysql('mysql57:3306', 'clickhouse', '{table_name}', 'root', 'clickhouse', + FROM mysql('mysql80:3306', 'clickhouse', '{table_name}', 'root', 'clickhouse', SETTINGS connection_wait_timeout={wait_timeout}, connect_timeout={connect_timeout}, @@ -843,7 +843,7 @@ def test_settings(started_cluster): connect_timeout = 50123002 node1.query( f""" - CREATE DATABASE mm ENGINE = MySQL('mysql57:3306', 'clickhouse', 'root', 'clickhouse') + CREATE DATABASE mm ENGINE = MySQL('mysql80:3306', 'clickhouse', 'root', 'clickhouse') SETTINGS connection_wait_timeout={wait_timeout}, connect_timeout={connect_timeout}, @@ -863,7 +863,7 @@ def test_mysql_point(started_cluster): table_name = "test_mysql_point" node1.query(f"DROP TABLE IF EXISTS {table_name}") - conn = get_mysql_conn(started_cluster, cluster.mysql_ip) + conn = get_mysql_conn(started_cluster, cluster.mysql8_ip) drop_mysql_table(conn, table_name) with conn.cursor() as cursor: cursor.execute( @@ -882,25 +882,25 @@ def test_mysql_point(started_cluster): conn.commit() result = node1.query( - f"DESCRIBE mysql('mysql57:3306', 'clickhouse', '{table_name}', 'root', 'clickhouse')" + f"DESCRIBE mysql('mysql80:3306', 'clickhouse', '{table_name}', 'root', 'clickhouse')" ) assert result.strip() == "id\tInt32\t\t\t\t\t\npoint\tPoint" assert 1 == int( node1.query( - f"SELECT count() FROM mysql('mysql57:3306', 'clickhouse', '{table_name}', 'root', 'clickhouse')" + f"SELECT count() FROM mysql('mysql80:3306', 'clickhouse', '{table_name}', 'root', 'clickhouse')" ) ) assert ( "(15,20)" == node1.query( - f"SELECT point FROM mysql('mysql57:3306', 'clickhouse', '{table_name}', 'root', 'clickhouse')" + f"SELECT point FROM mysql('mysql80:3306', 'clickhouse', '{table_name}', 'root', 'clickhouse')" ).strip() ) node1.query("DROP TABLE IF EXISTS test") node1.query( - f"CREATE TABLE test (id Int32, point Point) Engine=MySQL('mysql57:3306', 'clickhouse', '{table_name}', 'root', 'clickhouse')" + f"CREATE TABLE test (id Int32, point Point) Engine=MySQL('mysql80:3306', 'clickhouse', '{table_name}', 'root', 'clickhouse')" ) assert "(15,20)" == node1.query(f"SELECT point FROM test").strip() diff --git a/tests/integration/test_storage_numbers/test.py b/tests/integration/test_storage_numbers/test.py index 61fe8719ea2..cbd7793fd8c 100644 --- a/tests/integration/test_storage_numbers/test.py +++ b/tests/integration/test_storage_numbers/test.py @@ -242,3 +242,19 @@ def test_overflow(started_cluster): ) assert response == "(18446744073709551614),(18446744073709551615),(0),(1),(2)" check_read_rows("test_overflow", 5) + + +def test_non_number_filter(started_cluster): + response = node.query( + "SELECT toString(number) as a FROM numbers(3) WHERE a = '1' FORMAT Values", + query_id="test_non_number_filter", + ) + assert response == "('1')" + check_read_rows("test_non_number_filter", 3) + + response = node.query( + "SELECT toString(number) as a FROM numbers(1, 4) WHERE a = '1' FORMAT Values SETTINGS max_block_size = 3", + query_id="test_non_number_filter2", + ) + assert response == "('1')" + check_read_rows("test_non_number_filter2", 4) diff --git a/tests/integration/test_storage_rabbitmq/configs/mergetree.xml b/tests/integration/test_storage_rabbitmq/configs/mergetree.xml new file mode 100644 index 00000000000..61eba8face7 --- /dev/null +++ b/tests/integration/test_storage_rabbitmq/configs/mergetree.xml @@ -0,0 +1,5 @@ + + + 0 + + diff --git a/tests/integration/test_storage_rabbitmq/test.py b/tests/integration/test_storage_rabbitmq/test.py index b778e9fb556..0f1c5eb17dd 100644 --- a/tests/integration/test_storage_rabbitmq/test.py +++ b/tests/integration/test_storage_rabbitmq/test.py @@ -37,16 +37,29 @@ instance2 = cluster.add_instance( with_rabbitmq=True, ) +instance3 = cluster.add_instance( + "instance3", + user_configs=["configs/users.xml"], + main_configs=[ + "configs/rabbitmq.xml", + "configs/macros.xml", + "configs/named_collection.xml", + "configs/mergetree.xml", + ], + with_rabbitmq=True, + stay_alive=True, +) + # Helpers -def rabbitmq_check_result(result, check=False, ref_file="test_rabbitmq_json.reference"): - fpath = p.join(p.dirname(__file__), ref_file) - with open(fpath) as reference: - if check: - assert TSV(result) == TSV(reference) - else: - return TSV(result) == TSV(reference) +def rabbitmq_check_result(result, check=False, reference=None): + if reference is None: + reference = "\n".join([f"{i}\t{i}" for i in range(50)]) + if check: + assert TSV(result) == TSV(reference) + else: + return TSV(result) == TSV(reference) def wait_rabbitmq_to_start(rabbitmq_docker_id, cookie, timeout=180): @@ -84,6 +97,7 @@ def rabbitmq_cluster(): cluster.start() logging.debug("rabbitmq_id is {}".format(instance.cluster.rabbitmq_docker_id)) instance.query("CREATE DATABASE test") + instance3.query("CREATE DATABASE test") yield cluster @@ -119,9 +133,10 @@ def test_rabbitmq_select(rabbitmq_cluster, secure): if secure: port = cluster.rabbitmq_secure_port + # MATERIALIZED and ALIAS columns are not supported in RabbitMQ engine, but we can test that it does not fail instance.query( """ - CREATE TABLE test.rabbitmq (key UInt64, value UInt64) + CREATE TABLE test.rabbitmq (key UInt64, value UInt64, value2 ALIAS value + 1, value3 MATERIALIZED value + 1) ENGINE = RabbitMQ SETTINGS rabbitmq_host_port = '{}:{}', rabbitmq_exchange_name = 'select', @@ -134,6 +149,11 @@ def test_rabbitmq_select(rabbitmq_cluster, secure): ) ) + assert ( + "RabbitMQ table engine doesn\\'t support ALIAS, DEFAULT or MATERIALIZED columns" + in instance.query("SELECT * FROM system.warnings") + ) + credentials = pika.PlainCredentials("root", "clickhouse") parameters = pika.ConnectionParameters( rabbitmq_cluster.rabbitmq_ip, rabbitmq_cluster.rabbitmq_port, "/", credentials @@ -365,7 +385,7 @@ def test_rabbitmq_macros(rabbitmq_cluster): def test_rabbitmq_materialized_view(rabbitmq_cluster): instance.query( """ - CREATE TABLE test.rabbitmq (key UInt64, value UInt64) + CREATE TABLE test.rabbitmq (key UInt64, value UInt64, dt1 DateTime MATERIALIZED now(), value2 ALIAS value + 1) ENGINE = RabbitMQ SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', rabbitmq_exchange_name = 'mv', @@ -470,9 +490,11 @@ def test_rabbitmq_many_materialized_views(rabbitmq_cluster): """ DROP TABLE IF EXISTS test.view1; DROP TABLE IF EXISTS test.view2; + DROP TABLE IF EXISTS test.view3; DROP TABLE IF EXISTS test.consumer1; DROP TABLE IF EXISTS test.consumer2; - CREATE TABLE test.rabbitmq (key UInt64, value UInt64) + DROP TABLE IF EXISTS test.consumer3; + CREATE TABLE test.rabbitmq (key UInt64, value UInt64, value2 ALIAS value + 1, value3 MATERIALIZED value + 1, value4 DEFAULT 1) ENGINE = RabbitMQ SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', rabbitmq_exchange_name = 'mmv', @@ -483,13 +505,18 @@ def test_rabbitmq_many_materialized_views(rabbitmq_cluster): CREATE TABLE test.view1 (key UInt64, value UInt64) ENGINE = MergeTree() ORDER BY key; - CREATE TABLE test.view2 (key UInt64, value UInt64) + CREATE TABLE test.view2 (key UInt64, value UInt64, value2 UInt64, value3 UInt64, value4 UInt64) + ENGINE = MergeTree() + ORDER BY key; + CREATE TABLE test.view3 (key UInt64) ENGINE = MergeTree() ORDER BY key; CREATE MATERIALIZED VIEW test.consumer1 TO test.view1 AS SELECT * FROM test.rabbitmq; CREATE MATERIALIZED VIEW test.consumer2 TO test.view2 AS SELECT * FROM test.rabbitmq; + CREATE MATERIALIZED VIEW test.consumer3 TO test.view3 AS + SELECT * FROM test.rabbitmq; """ ) @@ -500,7 +527,7 @@ def test_rabbitmq_many_materialized_views(rabbitmq_cluster): connection = pika.BlockingConnection(parameters) channel = connection.channel() - instance.wait_for_log_line("Started streaming to 2 attached views") + instance.wait_for_log_line("Started streaming to 3 attached views") messages = [] for i in range(50): @@ -508,24 +535,43 @@ def test_rabbitmq_many_materialized_views(rabbitmq_cluster): for message in messages: channel.basic_publish(exchange="mmv", routing_key="", body=message) - while True: + is_check_passed = False + deadline = time.monotonic() + 60 + while time.monotonic() < deadline: result1 = instance.query("SELECT * FROM test.view1 ORDER BY key") result2 = instance.query("SELECT * FROM test.view2 ORDER BY key") - if rabbitmq_check_result(result1) and rabbitmq_check_result(result2): + result3 = instance.query("SELECT * FROM test.view3 ORDER BY key") + # Note that for view2 result is `i i 0 0 0`, but not `i i i+1 i+1 1` as expected, ALIAS/MATERIALIZED/DEFAULT columns are not supported in RabbitMQ engine + # We onlt check that at least it do not fail + if ( + rabbitmq_check_result(result1) + and rabbitmq_check_result( + result2, reference="\n".join([f"{i}\t{i}\t0\t0\t0" for i in range(50)]) + ) + and rabbitmq_check_result( + result3, reference="\n".join([str(i) for i in range(50)]) + ) + ): + is_check_passed = True break + time.sleep(0.1) + + assert ( + is_check_passed + ), f"References are not equal to results, result1: {result1}, result2: {result2}, result3: {result3}" instance.query( """ DROP TABLE test.consumer1; DROP TABLE test.consumer2; + DROP TABLE test.consumer3; DROP TABLE test.view1; DROP TABLE test.view2; + DROP TABLE test.view3; """ ) connection.close() - rabbitmq_check_result(result1, True) - rabbitmq_check_result(result2, True) def test_rabbitmq_big_message(rabbitmq_cluster): @@ -3549,3 +3595,88 @@ def test_attach_broken_table(rabbitmq_cluster): assert "CANNOT_CONNECT_RABBITMQ" in error error = instance.query_and_get_error("INSERT INTO rabbit_queue VALUES ('test')") assert "CANNOT_CONNECT_RABBITMQ" in error + + +def test_rabbitmq_nack_failed_insert(rabbitmq_cluster): + table_name = "nack_failed_insert" + exchange = f"{table_name}_exchange" + + credentials = pika.PlainCredentials("root", "clickhouse") + parameters = pika.ConnectionParameters( + rabbitmq_cluster.rabbitmq_ip, rabbitmq_cluster.rabbitmq_port, "/", credentials + ) + connection = pika.BlockingConnection(parameters) + channel = connection.channel() + + channel.exchange_declare(exchange="deadl") + + result = channel.queue_declare(queue="deadq") + queue_name = result.method.queue + channel.queue_bind(exchange="deadl", routing_key="", queue=queue_name) + + instance3.query( + f""" + CREATE TABLE test.{table_name} (key UInt64, value UInt64) + ENGINE = RabbitMQ + SETTINGS rabbitmq_host_port = '{rabbitmq_cluster.rabbitmq_host}:5672', + rabbitmq_flush_interval_ms=1000, + rabbitmq_exchange_name = '{exchange}', + rabbitmq_format = 'JSONEachRow', + rabbitmq_queue_settings_list='x-dead-letter-exchange=deadl'; + + DROP TABLE IF EXISTS test.view; + CREATE TABLE test.view (key UInt64, value UInt64) + ENGINE = MergeTree() + ORDER BY key; + + DROP TABLE IF EXISTS test.consumer; + CREATE MATERIALIZED VIEW test.consumer TO test.view AS + SELECT * FROM test.{table_name}; + """ + ) + + num_rows = 25 + for i in range(num_rows): + message = json.dumps({"key": i, "value": i}) + "\n" + channel.basic_publish(exchange=exchange, routing_key="", body=message) + + instance3.wait_for_log_line( + "Failed to push to views. Error: Code: 252. DB::Exception: Too many parts" + ) + + instance3.replace_in_config( + "/etc/clickhouse-server/config.d/mergetree.xml", + "parts_to_throw_insert>0", + "parts_to_throw_insert>10", + ) + instance3.restart_clickhouse() + + count = [0] + + def on_consume(channel, method, properties, body): + channel.basic_publish(exchange=exchange, routing_key="", body=body) + count[0] += 1 + if count[0] == num_rows: + channel.stop_consuming() + + channel.basic_consume(queue_name, on_consume) + channel.start_consuming() + + attempt = 0 + count = 0 + while attempt < 100: + count = int(instance3.query("SELECT count() FROM test.view")) + if count == num_rows: + break + attempt += 1 + + assert count == num_rows + + instance3.query( + f""" + DROP TABLE test.consumer; + DROP TABLE test.view; + DROP TABLE test.{table_name}; + """ + ) + connection.close() diff --git a/tests/integration/test_storage_rabbitmq/test_rabbitmq_json.reference b/tests/integration/test_storage_rabbitmq/test_rabbitmq_json.reference deleted file mode 100644 index 959bb2aad74..00000000000 --- a/tests/integration/test_storage_rabbitmq/test_rabbitmq_json.reference +++ /dev/null @@ -1,50 +0,0 @@ -0 0 -1 1 -2 2 -3 3 -4 4 -5 5 -6 6 -7 7 -8 8 -9 9 -10 10 -11 11 -12 12 -13 13 -14 14 -15 15 -16 16 -17 17 -18 18 -19 19 -20 20 -21 21 -22 22 -23 23 -24 24 -25 25 -26 26 -27 27 -28 28 -29 29 -30 30 -31 31 -32 32 -33 33 -34 34 -35 35 -36 36 -37 37 -38 38 -39 39 -40 40 -41 41 -42 42 -43 43 -44 44 -45 45 -46 46 -47 47 -48 48 -49 49 diff --git a/tests/integration/test_storage_s3/s3_mocks/unstable_server.py b/tests/integration/test_storage_s3/s3_mocks/unstable_server.py index 5ef781bdc9e..70d49b7c1b0 100644 --- a/tests/integration/test_storage_s3/s3_mocks/unstable_server.py +++ b/tests/integration/test_storage_s3/s3_mocks/unstable_server.py @@ -118,5 +118,5 @@ class RequestHandler(http.server.BaseHTTPRequestHandler): self.wfile.write(b"OK") -httpd = http.server.HTTPServer(("0.0.0.0", int(sys.argv[1])), RequestHandler) +httpd = http.server.ThreadingHTTPServer(("0.0.0.0", int(sys.argv[1])), RequestHandler) httpd.serve_forever() diff --git a/tests/integration/test_storage_s3/test.py b/tests/integration/test_storage_s3/test.py index 2549cb0d473..dbbe670e8ca 100644 --- a/tests/integration/test_storage_s3/test.py +++ b/tests/integration/test_storage_s3/test.py @@ -1379,9 +1379,7 @@ def test_schema_inference_from_globs(started_cluster): f"desc s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test*.jsoncompacteachrow') settings schema_inference_use_cache_for_s3=0, input_format_json_infer_incomplete_types_as_strings=0" ) - assert ( - "Cannot extract table structure from JSONCompactEachRow format file" in result - ) + assert "CANNOT_EXTRACT_TABLE_STRUCTURE" in result url_filename = "test{0,1,2,3}.jsoncompacteachrow" @@ -1389,9 +1387,7 @@ def test_schema_inference_from_globs(started_cluster): f"desc url('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/{url_filename}') settings schema_inference_use_cache_for_url=0, input_format_json_infer_incomplete_types_as_strings=0" ) - assert ( - "Cannot extract table structure from JSONCompactEachRow format file" in result - ) + assert "CANNOT_EXTRACT_TABLE_STRUCTURE" in result def test_signatures(started_cluster): @@ -2193,4 +2189,58 @@ def test_union_schema_inference_mode(started_cluster): error = instance.query_and_get_error( f"desc {engine}('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_union_schema_inference{{1,2,3,4}}.jsonl') settings schema_inference_mode='union', describe_compact_output=1 format TSV" ) - assert "Cannot extract table structure" in error + assert "CANNOT_EXTRACT_TABLE_STRUCTURE" in error + + +def test_s3_format_detection(started_cluster): + bucket = started_cluster.minio_bucket + instance = started_cluster.instances["dummy"] + + instance.query( + f"insert into table function s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_format_detection0', 'JSONEachRow', 'x UInt64, y String') select number, 'str_' || toString(number) from numbers(0) settings s3_truncate_on_insert=1" + ) + + instance.query( + f"insert into table function s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_format_detection1', 'JSONEachRow', 'x UInt64, y String') select number, 'str_' || toString(number) from numbers(5) settings s3_truncate_on_insert=1" + ) + + expected_result = instance.query( + f"select * from s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_format_detection1', 'JSONEachRow', 'x UInt64, y String')" + ) + + expected_desc_result = instance.query( + f"desc s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_format_detection1', 'JSONEachRow')" + ) + + for engine in ["s3", "url"]: + desc_result = instance.query( + f"desc {engine}('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_format_detection1')" + ) + + assert desc_result == expected_desc_result + + result = instance.query( + f"select * from {engine}('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_format_detection1')" + ) + + assert result == expected_result + + result = instance.query( + f"select * from {engine}('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_format_detection1', auto, 'x UInt64, y String')" + ) + + assert result == expected_result + + result = instance.query( + f"select * from {engine}('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_format_detection{{0,1}}', auto, 'x UInt64, y String')" + ) + + assert result == expected_result + + instance.query(f"system drop schema cache for {engine}") + + result = instance.query( + f"select * from {engine}('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_format_detection{{0,1}}', auto, 'x UInt64, y String')" + ) + + assert result == expected_result diff --git a/tests/integration/test_ttl_replicated/test.py b/tests/integration/test_ttl_replicated/test.py index 119a211ae45..b20b761ef47 100644 --- a/tests/integration/test_ttl_replicated/test.py +++ b/tests/integration/test_ttl_replicated/test.py @@ -2,7 +2,7 @@ import time import helpers.client as client import pytest -from helpers.cluster import ClickHouseCluster +from helpers.cluster import ClickHouseCluster, CLICKHOUSE_CI_MIN_TESTED_VERSION from helpers.test_tools import TSV, exec_query_with_retry from helpers.wait_for_helpers import wait_for_delete_inactive_parts from helpers.wait_for_helpers import wait_for_delete_empty_parts @@ -16,8 +16,8 @@ node3 = cluster.add_instance("node3", with_zookeeper=True) node4 = cluster.add_instance( "node4", with_zookeeper=True, - image="yandex/clickhouse-server", - tag="20.8.11.17", + image="clickhouse/clickhouse-server", + tag=CLICKHOUSE_CI_MIN_TESTED_VERSION, stay_alive=True, with_installed_binary=True, main_configs=[ @@ -29,8 +29,8 @@ node4 = cluster.add_instance( node5 = cluster.add_instance( "node5", with_zookeeper=True, - image="yandex/clickhouse-server", - tag="20.8.11.17", + image="clickhouse/clickhouse-server", + tag=CLICKHOUSE_CI_MIN_TESTED_VERSION, stay_alive=True, with_installed_binary=True, main_configs=[ @@ -41,8 +41,8 @@ node5 = cluster.add_instance( node6 = cluster.add_instance( "node6", with_zookeeper=True, - image="yandex/clickhouse-server", - tag="20.8.11.17", + image="clickhouse/clickhouse-server", + tag=CLICKHOUSE_CI_MIN_TESTED_VERSION, stay_alive=True, with_installed_binary=True, main_configs=[ diff --git a/tests/integration/test_unambiguous_alter_commands/__init__.py b/tests/integration/test_unambiguous_alter_commands/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_unambiguous_alter_commands/configs/format_alter_operations_with_parentheses.xml b/tests/integration/test_unambiguous_alter_commands/configs/format_alter_operations_with_parentheses.xml new file mode 100644 index 00000000000..cec7d30749f --- /dev/null +++ b/tests/integration/test_unambiguous_alter_commands/configs/format_alter_operations_with_parentheses.xml @@ -0,0 +1,3 @@ + + 1 + diff --git a/tests/integration/test_unambiguous_alter_commands/test.py b/tests/integration/test_unambiguous_alter_commands/test.py new file mode 100644 index 00000000000..768ab78fbd8 --- /dev/null +++ b/tests/integration/test_unambiguous_alter_commands/test.py @@ -0,0 +1,45 @@ +import pytest +from helpers.cluster import ClickHouseCluster + + +cluster = ClickHouseCluster(__file__) +node = cluster.add_instance( + "node", + main_configs=[ + "configs/format_alter_operations_with_parentheses.xml", + ], +) + + +@pytest.fixture(scope="module", autouse=True) +def started_cluster(): + try: + cluster.start() + yield cluster + + finally: + cluster.shutdown() + + +def test_alter(): + INPUT = """ +SELECT '--- Alter commands in parens'; +SELECT formatQuery('ALTER TABLE a (MODIFY TTL expr GROUP BY some_key), (ADD COLUMN a Int64)'); +SELECT formatQuery('ALTER TABLE a (MODIFY TTL expr TO VOLUME \\'vol1\\', expr2 + INTERVAL 2 YEAR TO VOLUME \\'vol2\\'), (DROP COLUMN c)'); + +SELECT '--- Check only consistent parens around alter commands are accepted'; +SELECT formatQuery('ALTER TABLE a (DROP COLUMN b), DROP COLUMN c'); -- { serverError SYNTAX_ERROR } +SELECT formatQuery('ALTER TABLE a DROP COLUMN b, (DROP COLUMN c)'); -- { serverError SYNTAX_ERROR } +SELECT formatQuery('ALTER TABLE a (DROP COLUMN b), (DROP COLUMN c)'); +SELECT formatQuery('ALTER TABLE a DROP COLUMN b, DROP COLUMN c'); -- Make sure it is backward compatible +""" + + EXPECTED_OUTPUT = """--- Alter commands in parens +ALTER TABLE a\\n (MODIFY TTL expr GROUP BY some_key),\\n (ADD COLUMN `a` Int64) +ALTER TABLE a\\n (MODIFY TTL expr TO VOLUME \\'vol1\\', expr2 + toIntervalYear(2) TO VOLUME \\'vol2\\'),\\n (DROP COLUMN c) +--- Check only consistent parens around alter commands are accepted +ALTER TABLE a\\n (DROP COLUMN b),\\n (DROP COLUMN c) +ALTER TABLE a\\n (DROP COLUMN b),\\n (DROP COLUMN c) +""" + result = node.query(INPUT) + assert result == EXPECTED_OUTPUT diff --git a/tests/integration/test_version_update/test.py b/tests/integration/test_version_update/test.py index a752960bc76..ab3eb1860f3 100644 --- a/tests/integration/test_version_update/test.py +++ b/tests/integration/test_version_update/test.py @@ -1,7 +1,6 @@ import pytest -from helpers.cluster import ClickHouseCluster -from helpers.test_tools import assert_eq_with_retry, exec_query_with_retry +from helpers.cluster import ClickHouseCluster, CLICKHOUSE_CI_MIN_TESTED_VERSION cluster = ClickHouseCluster(__file__) @@ -11,43 +10,8 @@ node1 = cluster.add_instance("node1", stay_alive=True) node2 = cluster.add_instance( "node2", with_zookeeper=True, - image="yandex/clickhouse-server", - tag="20.8.11.17", - with_installed_binary=True, - stay_alive=True, - allow_analyzer=False, -) - -# Use different nodes because if there is node.restart_from_latest_version(), then in later tests -# it will be with latest version, but shouldn't, order of tests in CI is shuffled. -node3 = cluster.add_instance( - "node3", - image="yandex/clickhouse-server", - tag="21.6", - with_installed_binary=True, - stay_alive=True, - allow_analyzer=False, -) -node4 = cluster.add_instance( - "node4", - image="yandex/clickhouse-server", - tag="21.6", - with_installed_binary=True, - stay_alive=True, - allow_analyzer=False, -) -node5 = cluster.add_instance( - "node5", - image="yandex/clickhouse-server", - tag="21.6", - with_installed_binary=True, - stay_alive=True, - allow_analyzer=False, -) -node6 = cluster.add_instance( - "node6", - image="yandex/clickhouse-server", - tag="21.6", + image="clickhouse/clickhouse-server", + tag=CLICKHOUSE_CI_MIN_TESTED_VERSION, with_installed_binary=True, stay_alive=True, allow_analyzer=False, @@ -124,211 +88,3 @@ def test_modulo_partition_key_issue_23508(start_cluster): assert partition_data == node2.query( "SELECT partition, name FROM system.parts WHERE table='test' ORDER BY partition" ) - - -# Test from issue 16587 -def test_aggregate_function_versioning_issue_16587(start_cluster): - for node in [node1, node3]: - node.query("DROP TABLE IF EXISTS test_table;") - node.query( - """ - CREATE TABLE test_table (`col1` DateTime, `col2` Int64) - ENGINE = MergeTree() ORDER BY col1""" - ) - node.query( - "insert into test_table select '2020-10-26 00:00:00', 1929292 from numbers(300)" - ) - - expected = "([1],[600])" - - result_on_old_version = node3.query( - "select sumMap(sm) from (select sumMap([1],[1]) as sm from remote('127.0.0.{1,2}', default.test_table) group by col1, col2);" - ).strip() - assert result_on_old_version != expected - - result_on_new_version = node1.query( - "select sumMap(sm) from (select sumMap([1],[1]) as sm from remote('127.0.0.{1,2}', default.test_table) group by col1, col2);" - ).strip() - assert result_on_new_version == expected - - -def test_aggregate_function_versioning_fetch_data_from_old_to_new_server(start_cluster): - for node in [node1, node4]: - create_table(node) - insert_data(node) - - expected = "([1],[300])" - - new_server_data = node1.query( - "select finalizeAggregation(col3) from default.test_table;" - ).strip() - assert new_server_data == expected - - old_server_data = node4.query( - "select finalizeAggregation(col3) from default.test_table;" - ).strip() - assert old_server_data != expected - - data_from_old_to_new_server = node1.query( - "select finalizeAggregation(col3) from remote('node4', default.test_table);" - ).strip() - assert data_from_old_to_new_server == old_server_data - - -def test_aggregate_function_versioning_server_upgrade(start_cluster): - for node in [node1, node5]: - create_table(node) - insert_data(node1, col2=5) - insert_data(node5, col2=1) - - # Serialization with version 0, server does not support versioning of aggregate function states. - old_server_data = node5.query( - "select finalizeAggregation(col3) from default.test_table;" - ).strip() - assert old_server_data == "([1],[44])" - create = node5.query("describe table default.test_table;").strip() - assert create.strip().endswith( - "col3\tAggregateFunction(sumMap, Array(UInt8), Array(UInt8))" - ) - print("Ok 1") - - # Upgrade server. - node5.restart_with_latest_version() - - # Deserialized with version 0, server supports versioning. - upgraded_server_data = node5.query( - "select finalizeAggregation(col3) from default.test_table;" - ).strip() - assert upgraded_server_data == "([1],[44])" - create = node5.query("describe table default.test_table;").strip() - assert create.strip().endswith( - "col3\tAggregateFunction(sumMap, Array(UInt8), Array(UInt8))" - ) - print("Ok 2") - - create = node1.query("describe table default.test_table;").strip() - print(create) - assert create.strip().endswith( - "col3\tAggregateFunction(1, sumMap, Array(UInt8), Array(UInt8))" - ) - - # Data from upgraded server to new server. Deserialize with version 0. - data_from_upgraded_to_new_server = node1.query( - "select finalizeAggregation(col3) from remote('node5', default.test_table);" - ).strip() - assert data_from_upgraded_to_new_server == upgraded_server_data == "([1],[44])" - print("Ok 3") - - # Data is serialized according to version 0 (though one of the states is version 1, but result is version 0). - upgraded_server_data = node5.query( - "select finalizeAggregation(col3) from remote('127.0.0.{1,2}', default.test_table);" - ).strip() - assert upgraded_server_data == "([1],[44])\n([1],[44])" - print("Ok 4") - - # Check insertion after server upgarde. - insert_data(node5, col2=2) - - # Check newly inserted data is still serialized with 0 version. - upgraded_server_data = node5.query( - "select finalizeAggregation(col3) from default.test_table order by col2;" - ).strip() - assert upgraded_server_data == "([1],[44])\n([1],[44])" - print("Ok 5") - - # New table has latest version. - new_server_data = node1.query( - "select finalizeAggregation(col3) from default.test_table;" - ).strip() - assert new_server_data == "([1],[300])" - print("Ok 6") - - # Insert from new server (with version 1) to upgraded server (where version will be 0), result version 0. - node1.query( - "insert into table function remote('node5', default.test_table) select * from default.test_table;" - ).strip() - upgraded_server_data = node5.query( - "select finalizeAggregation(col3) from default.test_table order by col2;" - ).strip() - assert upgraded_server_data == "([1],[44])\n([1],[44])\n([1],[44])" - print("Ok 7") - - # But new table gets data with latest version. - insert_data(node1) - new_server_data = node1.query( - "select finalizeAggregation(col3) from default.test_table;" - ).strip() - assert new_server_data == "([1],[300])\n([1],[300])" - print("Ok 8") - - # Create table with column implicitly with older version (version 0). - create_table(node1, name="test_table_0", version=0) - insert_data(node1, table_name="test_table_0", col2=3) - data = node1.query( - "select finalizeAggregation(col3) from default.test_table_0;" - ).strip() - assert data == "([1],[44])" - print("Ok") - - # Insert from new server to upgraded server to a new table but the version was set implicitly to 0, so data version 0. - node1.query( - "insert into table function remote('node5', default.test_table) select * from default.test_table_0;" - ).strip() - upgraded_server_data = node5.query( - "select finalizeAggregation(col3) from default.test_table order by col2;" - ).strip() - assert upgraded_server_data == "([1],[44])\n([1],[44])\n([1],[44])\n([1],[44])" - print("Ok") - - -def test_aggregate_function_versioning_persisting_metadata(start_cluster): - for node in [node1, node6]: - create_table(node) - insert_data(node) - data = node1.query( - "select finalizeAggregation(col3) from default.test_table;" - ).strip() - assert data == "([1],[300])" - data = node6.query( - "select finalizeAggregation(col3) from default.test_table;" - ).strip() - assert data == "([1],[44])" - - node6.restart_with_latest_version() - - for node in [node1, node6]: - node.query("DETACH TABLE test_table") - node.query("ATTACH TABLE test_table") - - for node in [node1, node6]: - insert_data(node) - - new_server_data = node1.query( - "select finalizeAggregation(col3) from default.test_table;" - ).strip() - assert new_server_data == "([1],[300])\n([1],[300])" - - upgraded_server_data = node6.query( - "select finalizeAggregation(col3) from default.test_table;" - ).strip() - assert upgraded_server_data == "([1],[44])\n([1],[44])" - - for node in [node1, node6]: - node.restart_clickhouse() - insert_data(node) - - result = node1.query( - "select finalizeAggregation(col3) from remote('127.0.0.{1,2}', default.test_table);" - ).strip() - assert ( - result - == "([1],[300])\n([1],[300])\n([1],[300])\n([1],[300])\n([1],[300])\n([1],[300])" - ) - - result = node6.query( - "select finalizeAggregation(col3) from remote('127.0.0.{1,2}', default.test_table);" - ).strip() - assert ( - result - == "([1],[44])\n([1],[44])\n([1],[44])\n([1],[44])\n([1],[44])\n([1],[44])" - ) diff --git a/tests/integration/test_version_update_after_mutation/test.py b/tests/integration/test_version_update_after_mutation/test.py index 4e84b4c10ca..90f8d283a6a 100644 --- a/tests/integration/test_version_update_after_mutation/test.py +++ b/tests/integration/test_version_update_after_mutation/test.py @@ -1,7 +1,7 @@ import pytest import time -from helpers.cluster import ClickHouseCluster +from helpers.cluster import ClickHouseCluster, CLICKHOUSE_CI_MIN_TESTED_VERSION from helpers.test_tools import assert_eq_with_retry, exec_query_with_retry cluster = ClickHouseCluster(__file__) @@ -9,8 +9,8 @@ cluster = ClickHouseCluster(__file__) node1 = cluster.add_instance( "node1", with_zookeeper=True, - image="yandex/clickhouse-server", - tag="20.8.11.17", + image="clickhouse/clickhouse-server", + tag=CLICKHOUSE_CI_MIN_TESTED_VERSION, with_installed_binary=True, stay_alive=True, main_configs=[ @@ -21,8 +21,8 @@ node1 = cluster.add_instance( node2 = cluster.add_instance( "node2", with_zookeeper=True, - image="yandex/clickhouse-server", - tag="20.8.11.17", + image="clickhouse/clickhouse-server", + tag=CLICKHOUSE_CI_MIN_TESTED_VERSION, with_installed_binary=True, stay_alive=True, main_configs=[ @@ -33,8 +33,8 @@ node2 = cluster.add_instance( node3 = cluster.add_instance( "node3", with_zookeeper=True, - image="yandex/clickhouse-server", - tag="20.8.11.17", + image="clickhouse/clickhouse-server", + tag=CLICKHOUSE_CI_MIN_TESTED_VERSION, with_installed_binary=True, stay_alive=True, main_configs=[ diff --git a/tests/performance/agg_functions_argmin_argmax.xml b/tests/performance/agg_functions_argmin_argmax.xml new file mode 100644 index 00000000000..e8eed2a82de --- /dev/null +++ b/tests/performance/agg_functions_argmin_argmax.xml @@ -0,0 +1,24 @@ + + + + group_scale + + 1000000 + + + + +select argMin(Title, EventTime) from hits_100m_single where Title != '' group by intHash32(UserID) % {group_scale} FORMAT Null +select argMinIf(Title, EventTime, Title != '') from hits_100m_single group by intHash32(UserID) % {group_scale} FORMAT Null +select argMinIf(Title::Nullable(String), EventTime::Nullable(DateTime), Title::Nullable(String) != '') from hits_100m_single group by intHash32(UserID) % {group_scale} FORMAT Null + +select argMin(RegionID, EventTime) from hits_100m_single where Title != '' group by intHash32(UserID) % {group_scale} FORMAT Null +select argMin((Title, RegionID), EventTime) from hits_100m_single where Title != '' group by intHash32(UserID) % {group_scale} FORMAT Null +select argMinIf(Title, EventTime, Title != '') from hits_100m_single group by intHash32(UserID) % {group_scale} FORMAT Null + +select argMax(WatchID, Age) from hits_100m_single FORMAT Null +select argMax(WatchID, Age::Nullable(UInt8)) from hits_100m_single FORMAT Null +select argMax(WatchID, (EventDate, EventTime)) from hits_100m_single where Title != '' group by intHash32(UserID) % {group_scale} FORMAT Null +select argMax(MobilePhone, MobilePhoneModel) from hits_100m_single + + diff --git a/tests/performance/aggregate_with_serialized_method.xml b/tests/performance/aggregate_with_serialized_method.xml index 91763c69bb9..5964e7e2227 100644 --- a/tests/performance/aggregate_with_serialized_method.xml +++ b/tests/performance/aggregate_with_serialized_method.xml @@ -1,8 +1,7 @@ 8 - 0 - 4 + 1 @@ -29,4 +28,4 @@ select toDecimal64(key_int64_1, 3),toDecimal64(key_int64_2, 3),toDecimal64(key_int64_3, 3),toDecimal64(key_int64_4, 3),toDecimal64(key_int64_5, 3), min(m1) from t_nullable group by toDecimal64(key_int64_1, 3),toDecimal64(key_int64_2, 3),toDecimal64(key_int64_3, 3),toDecimal64(key_int64_4, 3),toDecimal64(key_int64_5, 3) limit 10 drop table if exists t_nullable - \ No newline at end of file + diff --git a/tests/performance/alter_select.xml b/tests/performance/alter_select.xml new file mode 100644 index 00000000000..fbbf603dcba --- /dev/null +++ b/tests/performance/alter_select.xml @@ -0,0 +1,35 @@ + + + + engine + + mt + rmt + + + + + create table alter_select_mt (part_id String, col_0 String) engine=MergeTree() partition by part_id order by tuple() settings max_parts_to_merge_at_once=1 + create table alter_select_rmt (part_id String, col_0 String) engine=ReplicatedMergeTree('/tables/{{database}}', '{{table}}') partition by part_id order by tuple() settings max_parts_to_merge_at_once=1 + + system stop merges alter_select_{engine} + + + insert into alter_select_{engine} (part_id, col_0) + select toString(number % 5000), 0 from numbers(10000) + settings + max_block_size=1, + max_insert_threads=32, + min_insert_block_size_rows=1, + insert_deduplicate=false, + parts_to_delay_insert=100000, + parts_to_throw_insert=100000 + + alter table alter_select_{engine} drop column col_0 settings alter_sync = 0 + + select count() from alter_select_{engine} format Null settings max_threads=1 + select * from alter_select_{engine} format Null settings max_threads=1 + + drop table alter_select_{engine} + + diff --git a/tests/performance/array_index_low_cardinality_numbers.xml b/tests/performance/array_index_low_cardinality_numbers.xml index f8fa27df05f..a68a37ebfff 100644 --- a/tests/performance/array_index_low_cardinality_numbers.xml +++ b/tests/performance/array_index_low_cardinality_numbers.xml @@ -1,4 +1,8 @@ + + 1 + + DROP TABLE IF EXISTS perf_lc_num CREATE TABLE perf_lc_num( num UInt8, diff --git a/tests/performance/dotProduct.xml b/tests/performance/dotProduct.xml new file mode 100644 index 00000000000..6e056964ebb --- /dev/null +++ b/tests/performance/dotProduct.xml @@ -0,0 +1,62 @@ + + + + + element_type + + + + + + + Float32 + Float64 + + + + + + CREATE TABLE vecs_{element_type} ( + v Array({element_type}) + ) ENGINE=Memory; + + + + + + + INSERT INTO vecs_{element_type} + SELECT v FROM ( + SELECT + number AS n, + [ + rand(n*10), rand(n*10+1), rand(n*10+2), rand(n*10+3), rand(n*10+4), rand(n*10+5), rand(n*10+6), rand(n*10+7), rand(n*10+8), rand(n*10+9), + rand(n*10+10), rand(n*10+11), rand(n*10+12), rand(n*10+13), rand(n*10+14), rand(n*10+15), rand(n*10+16), rand(n*10+17), rand(n*10+18), rand(n*10+19), + rand(n*10+20), rand(n*10+21), rand(n*10+22), rand(n*10+23), rand(n*10+24), rand(n*10+25), rand(n*10+26), rand(n*10+27), rand(n*10+28), rand(n*10+29), + rand(n*10+30), rand(n*10+31), rand(n*10+32), rand(n*10+33), rand(n*10+34), rand(n*10+35), rand(n*10+36), rand(n*10+37), rand(n*10+38), rand(n*10+39), + rand(n*10+40), rand(n*10+41), rand(n*10+42), rand(n*10+43), rand(n*10+44), rand(n*10+45), rand(n*10+46), rand(n*10+47), rand(n*10+48), rand(n*10+49), + rand(n*10+50), rand(n*10+51), rand(n*10+52), rand(n*10+53), rand(n*10+54), rand(n*10+55), rand(n*10+56), rand(n*10+57), rand(n*10+58), rand(n*10+59), + rand(n*10+60), rand(n*10+61), rand(n*10+62), rand(n*10+63), rand(n*10+64), rand(n*10+65), rand(n*10+66), rand(n*10+67), rand(n*10+68), rand(n*10+69), + rand(n*10+70), rand(n*10+71), rand(n*10+72), rand(n*10+73), rand(n*10+74), rand(n*10+75), rand(n*10+76), rand(n*10+77), rand(n*10+78), rand(n*10+79), + rand(n*10+80), rand(n*10+81), rand(n*10+82), rand(n*10+83), rand(n*10+84), rand(n*10+85), rand(n*10+86), rand(n*10+87), rand(n*10+88), rand(n*10+89), + rand(n*10+90), rand(n*10+91), rand(n*10+92), rand(n*10+93), rand(n*10+94), rand(n*10+95), rand(n*10+96), rand(n*10+97), rand(n*10+98), rand(n*10+99), + rand(n*10+100), rand(n*10+101), rand(n*10+102), rand(n*10+103), rand(n*10+104), rand(n*10+105), rand(n*10+106), rand(n*10+107), rand(n*10+108), rand(n*10+109), + rand(n*10+110), rand(n*10+111), rand(n*10+112), rand(n*10+113), rand(n*10+114), rand(n*10+115), rand(n*10+116), rand(n*10+117), rand(n*10+118), rand(n*10+119), + rand(n*10+120), rand(n*10+121), rand(n*10+122), rand(n*10+123), rand(n*10+124), rand(n*10+125), rand(n*10+126), rand(n*10+127), rand(n*10+128), rand(n*10+129), + rand(n*10+130), rand(n*10+131), rand(n*10+132), rand(n*10+133), rand(n*10+134), rand(n*10+135), rand(n*10+136), rand(n*10+137), rand(n*10+138), rand(n*10+139), + rand(n*10+140), rand(n*10+141), rand(n*10+142), rand(n*10+143), rand(n*10+144), rand(n*10+145), rand(n*10+146), rand(n*10+147), rand(n*10+148), rand(n*10+149) + ] AS v + FROM system.numbers + LIMIT 5000000 + ); + + + + 1 + + + SELECT sum(dp) FROM (SELECT dotProduct(v, v) AS dp FROM vecs_{element_type}) + + DROP TABLE vecs_{element_type} + + diff --git a/tests/performance/jit_aggregate_functions.xml b/tests/performance/jit_aggregate_functions.xml index 8abb901439a..a16b81f610c 100644 --- a/tests/performance/jit_aggregate_functions.xml +++ b/tests/performance/jit_aggregate_functions.xml @@ -30,7 +30,7 @@ - CREATE TABLE jit_test_merge_tree_nullable ( + CREATE TABLE jit_test_memory_nullable ( key UInt64, value_1 Nullable(UInt64), value_2 Nullable(UInt64), @@ -42,7 +42,7 @@ - CREATE TABLE jit_test_memory_nullable ( + CREATE TABLE jit_test_merge_tree_nullable ( key UInt64, value_1 Nullable(UInt64), value_2 Nullable(UInt64), diff --git a/tests/performance/multiif.xml b/tests/performance/multiif.xml index ad56ab3f5f2..0c2d95cc553 100644 --- a/tests/performance/multiif.xml +++ b/tests/performance/multiif.xml @@ -5,4 +5,12 @@ select count(1) from test_multiif_t where multiIf(d > 2, d-2, d > 1, d-1, d >0, d, 0) > 1 SETTINGS max_threads=1 DROP TABLE IF EXISTS test_multiif_t + + + + SELECT count() FROM zeros(10000000) WHERE NOT ignore(multiIf( rand(1) % 2 = 0, materialize(1::Nullable(Decimal256(3))), rand(2) % 2 = 0, materialize(2::Nullable(Decimal256(3))), rand(3) % 2 = 0, materialize(3::Nullable(Decimal256(3))), rand(4) % 2 = 0, materialize(4::Nullable(Decimal256(3))), rand(5) % 2 = 0, materialize(5::Nullable(Decimal256(3))), materialize(6::Nullable(Decimal256(3))))) + SELECT count() FROM zeros(10000000) WHERE NOT ignore(multiIf( rand(1) % 2 = 0, materialize(1::Decimal256(3)), rand(2) % 2 = 0, materialize(2::Decimal256(3)), rand(3) % 2 = 0, materialize(3::Decimal256(3)), rand(4) % 2 = 0, materialize(4::Decimal256(3)), rand(5) % 2 = 0, materialize(5::Decimal256(3)), materialize(6::Decimal256(3)))) + + + SELECT count() FROM zeros(10000000) WHERE NOT ignore(multiIf(rand() % 2 = 0, rand()+2, rand() % 3 = 0, rand()+3, rand() % 4 = 0, rand()+4, rand() % 5 = 0, rand() + 5, rand() % 6 = 0, rand() + 6, rand())) diff --git a/tests/performance/norm_distance.xml b/tests/performance/norm_distance.xml index 1e879607dac..69ed71d026f 100644 --- a/tests/performance/norm_distance.xml +++ b/tests/performance/norm_distance.xml @@ -4,11 +4,11 @@ element_type - + - Int32 - Int64 + + Float32 Float64 diff --git a/tests/performance/order_with_limit.xml b/tests/performance/order_with_limit.xml index 1e1cb52267c..d1ad2afade8 100644 --- a/tests/performance/order_with_limit.xml +++ b/tests/performance/order_with_limit.xml @@ -1,4 +1,5 @@ + SELECT number AS n FROM numbers_mt(200000000) ORDER BY n DESC LIMIT 1 FORMAT Null SELECT number AS n FROM numbers_mt(200000000) ORDER BY n DESC LIMIT 10 FORMAT Null SELECT number AS n FROM numbers_mt(200000000) ORDER BY n DESC LIMIT 100 FORMAT Null SELECT number AS n FROM numbers_mt(200000000) ORDER BY n DESC LIMIT 1500 FORMAT Null @@ -7,6 +8,7 @@ SELECT number AS n FROM numbers_mt(200000000) ORDER BY n DESC LIMIT 10000 FORMAT Null SELECT number AS n FROM numbers_mt(200000000) ORDER BY n DESC LIMIT 65535 FORMAT Null + SELECT intHash64(number) AS n FROM numbers_mt(500000000) ORDER BY n LIMIT 1 FORMAT Null SELECT intHash64(number) AS n FROM numbers_mt(500000000) ORDER BY n LIMIT 10 FORMAT Null SELECT intHash64(number) AS n FROM numbers_mt(200000000) ORDER BY n LIMIT 100 FORMAT Null SELECT intHash64(number) AS n FROM numbers_mt(200000000) ORDER BY n LIMIT 1500 FORMAT Null @@ -15,6 +17,7 @@ SELECT intHash64(number) AS n FROM numbers_mt(200000000) ORDER BY n LIMIT 10000 FORMAT Null SELECT intHash64(number) AS n FROM numbers_mt(100000000) ORDER BY n LIMIT 65535 FORMAT Null + SELECT intHash64(number) AS n FROM numbers_mt(200000000) ORDER BY n, n + 1, n + 2 LIMIT 1 FORMAT Null SELECT intHash64(number) AS n FROM numbers_mt(200000000) ORDER BY n, n + 1, n + 2 LIMIT 10 FORMAT Null SELECT intHash64(number) AS n FROM numbers_mt(200000000) ORDER BY n, n + 1, n + 2 LIMIT 100 FORMAT Null SELECT intHash64(number) AS n FROM numbers_mt(200000000) ORDER BY n, n + 1, n + 2 LIMIT 1500 FORMAT Null diff --git a/tests/performance/scripts/compare.sh b/tests/performance/scripts/compare.sh index 39c6854fbf9..9a0fb5b335c 100755 --- a/tests/performance/scripts/compare.sh +++ b/tests/performance/scripts/compare.sh @@ -11,8 +11,14 @@ script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" # upstream/master LEFT_SERVER_PORT=9001 +LEFT_SERVER_KEEPER_PORT=9181 +LEFT_SERVER_KEEPER_RAFT_PORT=9234 +LEFT_SERVER_INTERSERVER_PORT=9009 # patched version -RIGHT_SERVER_PORT=9002 +RIGHT_SERVER_PORT=19001 +RIGHT_SERVER_KEEPER_PORT=19181 +RIGHT_SERVER_KEEPER_RAFT_PORT=19234 +RIGHT_SERVER_INTERSERVER_PORT=19009 # abort_conf -- abort if some options is not recognized # abort -- abort if something is not right in the env (i.e. per-cpu arenas does not work) @@ -127,6 +133,10 @@ function restart --user_files_path left/db/user_files --top_level_domains_path "$(left_or_right left top_level_domains)" --tcp_port $LEFT_SERVER_PORT + --keeper_server.tcp_port $LEFT_SERVER_KEEPER_PORT + --keeper_server.raft_configuration.server.port $LEFT_SERVER_KEEPER_RAFT_PORT + --zookeeper.node.port $LEFT_SERVER_KEEPER_PORT + --interserver_http_port $LEFT_SERVER_INTERSERVER_PORT ) left/clickhouse-server "${left_server_opts[@]}" &>> left-server-log.log & left_pid=$! @@ -142,6 +152,10 @@ function restart --user_files_path right/db/user_files --top_level_domains_path "$(left_or_right right top_level_domains)" --tcp_port $RIGHT_SERVER_PORT + --keeper_server.tcp_port $RIGHT_SERVER_KEEPER_PORT + --keeper_server.raft_configuration.server.port $RIGHT_SERVER_KEEPER_RAFT_PORT + --zookeeper.node.port $RIGHT_SERVER_KEEPER_PORT + --interserver_http_port $RIGHT_SERVER_INTERSERVER_PORT ) right/clickhouse-server "${right_server_opts[@]}" &>> right-server-log.log & right_pid=$! diff --git a/tests/performance/scripts/config/config.d/zzz-perf-comparison-tweaks-config.xml b/tests/performance/scripts/config/config.d/zzz-perf-comparison-tweaks-config.xml index 292665c4f68..c2bef2b479a 100644 --- a/tests/performance/scripts/config/config.d/zzz-perf-comparison-tweaks-config.xml +++ b/tests/performance/scripts/config/config.d/zzz-perf-comparison-tweaks-config.xml @@ -2,10 +2,7 @@ - - - :: diff --git a/tests/performance/small_block_contention.xml b/tests/performance/small_block_contention.xml new file mode 100644 index 00000000000..ce1995a0a29 --- /dev/null +++ b/tests/performance/small_block_contention.xml @@ -0,0 +1,3 @@ + + select sum(number) from numbers_mt(200000) settings max_threads=100, max_block_size = 1 format Null + diff --git a/tests/performance/sum.xml b/tests/performance/sum.xml index 57b879a360d..36b898436bf 100644 --- a/tests/performance/sum.xml +++ b/tests/performance/sum.xml @@ -17,6 +17,13 @@ SELECT sumKahan(toNullable(toFloat32(number))) FROM numbers(100000000) SELECT sumKahan(toNullable(toFloat64(number))) FROM numbers(100000000) + select sumIf(number::Decimal128(3), rand32() % 2 = 0) from numbers(100000000) + select sumIf(number::Decimal256(3), rand32() % 2 = 0) from numbers(100000000) + select sumIf(number::Int128, rand32() % 2 = 0) from numbers(100000000) + select sumIf(number::UInt128, rand32() % 2 = 0) from numbers(100000000) + select sumIf(number::Int256, rand32() % 2 = 0) from numbers(100000000) + select sumIf(number::UInt256, rand32() % 2 = 0) from numbers(100000000) + CREATE TABLE nullfloat32 (x Nullable(Float32)) ENGINE = Memory INSERT INTO nullfloat32 diff --git a/tests/queries/0_stateless/00027_argMinMax.sql b/tests/queries/0_stateless/00027_argMinMax.sql index 2b67b99ec77..dbf7c9176d2 100644 --- a/tests/queries/0_stateless/00027_argMinMax.sql +++ b/tests/queries/0_stateless/00027_argMinMax.sql @@ -5,4 +5,12 @@ select argMin(x.1, x.2), argMax(x.1, x.2) from (select (toDate(number, 'UTC'), t select argMin(x.1, x.2), argMax(x.1, x.2) from (select (toDecimal32(number, 2), toDecimal64(number, 2) + 1) as x from numbers(10)); -- array -SELECT argMinArray(id, num), argMaxArray(id, num) FROM (SELECT arrayJoin([[10, 4, 3], [7, 5, 6], [8, 8, 2]]) AS num, arrayJoin([[1, 2, 4], [2, 3, 3]]) AS id); +SELECT + argMinArray(id, num), + argMaxArray(id, num) +FROM +( + SELECT + arrayJoin([[10, 4, 3], [7, 5, 6], [8, 8, 2]]) AS num, + arrayJoin([[1, 2, 4]]) AS id +) diff --git a/tests/queries/0_stateless/00027_simple_argMinArray.sql b/tests/queries/0_stateless/00027_simple_argMinArray.sql index b681a2c53cf..bdee2b058b8 100644 --- a/tests/queries/0_stateless/00027_simple_argMinArray.sql +++ b/tests/queries/0_stateless/00027_simple_argMinArray.sql @@ -1 +1 @@ -SELECT argMinArray(id, num), argMaxArray(id, num) FROM (SELECT arrayJoin([[10, 4, 3], [7, 5, 6], [8, 8, 2]]) AS num, arrayJoin([[1, 2, 4], [2, 3, 3]]) AS id) +SELECT argMinArray(id, num), argMaxArray(id, num) FROM (SELECT arrayJoin([[10, 4, 3], [7, 5, 6], [8, 8, 2]]) AS num, arrayJoin([[1, 2, 4]]) AS id) diff --git a/tests/queries/0_stateless/00047_stored_aggregates_complex.sql b/tests/queries/0_stateless/00047_stored_aggregates_complex.sql index 2e416f91d5d..df5305c97db 100644 --- a/tests/queries/0_stateless/00047_stored_aggregates_complex.sql +++ b/tests/queries/0_stateless/00047_stored_aggregates_complex.sql @@ -1,5 +1,7 @@ DROP TABLE IF EXISTS stored_aggregates; +set max_insert_threads = 1; + set allow_deprecated_syntax_for_merge_tree=1; CREATE TABLE stored_aggregates ( diff --git a/tests/queries/0_stateless/00309_formats_case_insensitive.reference b/tests/queries/0_stateless/00309_formats_case_insensitive.reference new file mode 100644 index 00000000000..b74d7002833 --- /dev/null +++ b/tests/queries/0_stateless/00309_formats_case_insensitive.reference @@ -0,0 +1,95 @@ +-- test FORMAT clause -- +0 Hello & world +1 Hello & world +2 Hello & world +0,"Hello & world" +1,"Hello & world" +2,"Hello & world" + + + + + + number + UInt64 + + + 'Hello & world' + String + + + + + + 0 + Hello & world + + + 1 + Hello & world + + + 2 + Hello & world + + + 3 + +{ + "meta": + [ + { + "name": "number", + "type": "UInt64" + }, + { + "name": "'Hello & world'", + "type": "String" + } + ], + + "data": + [ + { + "number": "0", + "'Hello & world'": "Hello & world" + }, + { + "number": "1", + "'Hello & world'": "Hello & world" + }, + { + "number": "2", + "'Hello & world'": "Hello & world" + } + ], + + "rows": 3 +} +Row 1: +────── +number: 0 +'Hello & world': Hello & world + +Row 2: +────── +number: 1 +'Hello & world': Hello & world + +Row 3: +────── +number: 2 +'Hello & world': Hello & world +-- test table function -- +0 Hello & world +1 Hello & world +2 Hello & world +0 Hello & world +1 Hello & world +2 Hello & world +-- test other function -- +0 Hello & world +-- test table engine -- +0 Hello & world +1 Hello & world +2 Hello & world diff --git a/tests/queries/0_stateless/00309_formats_case_insensitive.sql b/tests/queries/0_stateless/00309_formats_case_insensitive.sql new file mode 100644 index 00000000000..b4037ed9861 --- /dev/null +++ b/tests/queries/0_stateless/00309_formats_case_insensitive.sql @@ -0,0 +1,23 @@ +SELECT '-- test FORMAT clause --'; +SET output_format_write_statistics = 0; +SELECT number, 'Hello & world' FROM numbers(3) FORMAT Tsv; +SELECT number, 'Hello & world' FROM numbers(3) FORMAT csv; +SELECT number, 'Hello & world' FROM numbers(3) FORMAT xMl; +SELECT number, 'Hello & world' FROM numbers(3) FORMAT JsonStrINGs; +SELECT number, 'Hello & world' FROM numbers(3) FORMAT VERTICAL; + +SELECT '-- test table function --'; +INSERT INTO FUNCTION file('data_00309_formats_case_insensitive', 'Csv') SELECT number, 'Hello & world' FROM numbers(3) SETTINGS engine_file_truncate_on_insert=1; +SELECT * FROM file('data_00309_formats_case_insensitive', 'Csv'); + +INSERT INTO FUNCTION file('data_00309_formats_case_insensitive.cSv') SELECT number, 'Hello & world' FROM numbers(3) SETTINGS engine_file_truncate_on_insert=1; +SELECT * FROM file('data_00309_formats_case_insensitive.cSv'); + +SELECT '-- test other function --'; +SELECT * FROM format(cSv, '0,Hello & world'); + +SELECT '-- test table engine --'; +DROP TABLE IF EXISTS test_00309_formats_case_insensitive; +CREATE TABLE test_00309_formats_case_insensitive(a Int64, b String) ENGINE=File(Csv); +INSERT INTO test_00309_formats_case_insensitive SELECT number, 'Hello & world' FROM numbers(3); +SELECT * FROM test_00309_formats_case_insensitive; diff --git a/tests/queries/0_stateless/00340_squashing_insert_select.sql b/tests/queries/0_stateless/00340_squashing_insert_select.sql index 102eb061bad..6b7133c155e 100644 --- a/tests/queries/0_stateless/00340_squashing_insert_select.sql +++ b/tests/queries/0_stateless/00340_squashing_insert_select.sql @@ -7,6 +7,8 @@ SET max_block_size = 10000; SET min_insert_block_size_rows = 1000000; SET min_insert_block_size_bytes = 0; +set max_insert_threads = 1; + INSERT INTO numbers_squashed SELECT * FROM system.numbers LIMIT 10000000; SELECT blockSize() AS b, count() / b AS c FROM numbers_squashed GROUP BY blockSize() ORDER BY c DESC; diff --git a/tests/queries/0_stateless/00597_push_down_predicate_long.reference b/tests/queries/0_stateless/00597_push_down_predicate_long.reference index 4ac88edd40e..2c46edc98bf 100644 --- a/tests/queries/0_stateless/00597_push_down_predicate_long.reference +++ b/tests/queries/0_stateless/00597_push_down_predicate_long.reference @@ -114,7 +114,7 @@ FROM ( SELECT 1 AS id, - identity(_CAST(1, \'Nullable(UInt8)\')) AS subquery + __scalarSubqueryResult(_CAST(1, \'Nullable(UInt8)\')) AS subquery WHERE subquery = 1 ) WHERE subquery = 1 @@ -179,7 +179,7 @@ FROM name, value FROM test_00597 - PREWHERE id = 1 + WHERE id = 1 ) WHERE id = 1 2000-01-01 1 test string 1 1 @@ -203,7 +203,7 @@ FROM name, value FROM test_00597 - PREWHERE id = 1 + WHERE id = 1 ) WHERE id = 1 ) @@ -229,7 +229,7 @@ FROM name, value FROM test_00597 - PREWHERE id = 1 + WHERE id = 1 ) AS b WHERE id = 1 ) @@ -248,7 +248,7 @@ FROM name, value FROM test_00597 - PREWHERE id = 1 + WHERE id = 1 ) WHERE id = 1 2000-01-01 1 test string 1 1 @@ -272,7 +272,7 @@ FROM name, value FROM test_00597 - PREWHERE id = 1 + WHERE id = 1 ) WHERE id = 1 ) @@ -291,7 +291,7 @@ FROM name, value FROM test_00597 - PREWHERE id = 1 + WHERE id = 1 ) AS b WHERE id = 1 2000-01-01 1 test string 1 1 @@ -315,7 +315,7 @@ FROM name, value FROM test_00597 - PREWHERE id = 1 + WHERE id = 1 ) AS a WHERE id = 1 ) AS b @@ -332,7 +332,7 @@ FROM date, min(value) AS value FROM test_00597 - PREWHERE id = 1 + WHERE id = 1 GROUP BY id, date @@ -352,7 +352,7 @@ FROM name, value FROM test_00597 - PREWHERE id = 1 + WHERE id = 1 UNION ALL SELECT date, @@ -360,7 +360,7 @@ FROM name, value FROM test_00597 - PREWHERE id = 1 + WHERE id = 1 ) WHERE id = 1 2000-01-01 1 test string 1 1 @@ -381,7 +381,7 @@ FROM name, value FROM test_00597 - PREWHERE id = 1 + WHERE id = 1 ) ANY LEFT JOIN ( @@ -443,7 +443,7 @@ FROM name, value FROM test_00597 - PREWHERE id = 1 + WHERE id = 1 ) ANY LEFT JOIN ( @@ -540,7 +540,7 @@ FROM name, value FROM test_00597 - PREWHERE id = 1 + WHERE id = 1 ) AS a ANY LEFT JOIN ( @@ -587,7 +587,7 @@ SEMI LEFT JOIN name, value FROM test_00597 - PREWHERE id = 1 + WHERE id = 1 ) WHERE id = 1 ) AS r USING (id) diff --git a/tests/queries/0_stateless/00599_create_view_with_subquery.reference b/tests/queries/0_stateless/00599_create_view_with_subquery.reference index 0458f650fd0..39a5f99df03 100644 --- a/tests/queries/0_stateless/00599_create_view_with_subquery.reference +++ b/tests/queries/0_stateless/00599_create_view_with_subquery.reference @@ -1 +1 @@ -CREATE VIEW default.test_view_00599\n(\n `id` UInt64\n) AS\nSELECT *\nFROM default.test_00599\nWHERE id = (\n SELECT 1\n) +CREATE VIEW default.test_view_00599\n(\n `id` UInt64\n)\nAS SELECT *\nFROM default.test_00599\nWHERE id = (\n SELECT 1\n) diff --git a/tests/queries/0_stateless/00646_url_engine.python b/tests/queries/0_stateless/00646_url_engine.python index dc0fdd1a71d..931d18a3f80 100644 --- a/tests/queries/0_stateless/00646_url_engine.python +++ b/tests/queries/0_stateless/00646_url_engine.python @@ -12,6 +12,7 @@ import urllib.request import subprocess from io import StringIO from http.server import BaseHTTPRequestHandler, HTTPServer +from socketserver import ThreadingMixIn def is_ipv6(host): @@ -145,11 +146,19 @@ class HTTPServerV6(HTTPServer): address_family = socket.AF_INET6 +class ThreadedHTTPServer(ThreadingMixIn, HTTPServer): + pass + + +class ThreadedHTTPServerV6(ThreadingMixIn, HTTPServerV6): + pass + + def start_server(): if IS_IPV6: - httpd = HTTPServerV6(HTTP_SERVER_ADDRESS, CSVHTTPServer) + httpd = ThreadedHTTPServerV6(HTTP_SERVER_ADDRESS, CSVHTTPServer) else: - httpd = HTTPServer(HTTP_SERVER_ADDRESS, CSVHTTPServer) + httpd = ThreadedHTTPServer(HTTP_SERVER_ADDRESS, CSVHTTPServer) t = threading.Thread(target=httpd.serve_forever) return t, httpd diff --git a/tests/queries/0_stateless/00662_has_nullable.reference b/tests/queries/0_stateless/00662_has_nullable.reference new file mode 100644 index 00000000000..1ac93f25a39 --- /dev/null +++ b/tests/queries/0_stateless/00662_has_nullable.reference @@ -0,0 +1,14 @@ +Nullable(UInt64), non-null array +1 1 +\N 0 +Non-nullable UInt64, nullable array +0 0 +1 1 +2 1 +Nullable(UInt64), nullable array +0 0 +\N 1 +1 1 +All NULLs +0 0 +\N 1 diff --git a/tests/queries/0_stateless/00662_has_nullable.sql b/tests/queries/0_stateless/00662_has_nullable.sql new file mode 100644 index 00000000000..3fd3bd46baa --- /dev/null +++ b/tests/queries/0_stateless/00662_has_nullable.sql @@ -0,0 +1,39 @@ +DROP TABLE IF EXISTS 00662_has_nullable; + +SELECT 'Nullable(UInt64), non-null array'; +CREATE TABLE 00662_has_nullable(a Nullable(UInt64)) ENGINE = Memory; + +INSERT INTO 00662_has_nullable VALUES (1), (Null); +SELECT a, has([0, 1], a) FROM 00662_has_nullable; + +DROP TABLE 00662_has_nullable; + +-------------------------------------------------------------------------------- + +SELECT 'Non-nullable UInt64, nullable array'; +CREATE TABLE 00662_has_nullable(a UInt64) ENGINE = Memory; + +INSERT INTO 00662_has_nullable VALUES (0), (1), (2); +SELECT a, has([NULL, 1, 2], a) FROM 00662_has_nullable; + +DROP TABLE 00662_has_nullable; + +-------------------------------------------------------------------------------- + +SELECT 'Nullable(UInt64), nullable array'; +CREATE TABLE 00662_has_nullable(a Nullable(UInt64)) ENGINE = Memory; + +INSERT INTO 00662_has_nullable VALUES (0), (Null), (1); +SELECT a, has([NULL, 1, 2], a) FROM 00662_has_nullable; + +DROP TABLE 00662_has_nullable; + +-------------------------------------------------------------------------------- + +SELECT 'All NULLs'; +CREATE TABLE 00662_has_nullable(a Nullable(UInt64)) ENGINE = Memory; + +INSERT INTO 00662_has_nullable VALUES (0), (Null); +SELECT a, has([NULL, NULL], a) FROM 00662_has_nullable; + +DROP TABLE 00662_has_nullable; diff --git a/tests/queries/0_stateless/00700_decimal_arithm.reference b/tests/queries/0_stateless/00700_decimal_arithm.reference index 811946c87e0..109c0632fb1 100644 --- a/tests/queries/0_stateless/00700_decimal_arithm.reference +++ b/tests/queries/0_stateless/00700_decimal_arithm.reference @@ -10,18 +10,18 @@ 63 21 -42 882 -882 2 0 2 0 63 21 -42 882 -882 2 0 2 0 1.00305798474369219219752355409390731264 -0.16305798474369219219752355409390731264 1.490591730234615865843651857942052864 -1.38847100762815390390123822295304634368 1.38847100762815390390123822295304634368 0.02 0.005 -63.42 21.42 -41.58 890.82 -890.82 2.02 0.505 2.02 0.505 -63.42 21.42 -41.58 890.82 -890.82 2.02 0.505 2.02 0.505 -63.42 21.42 -41.58 890.82 -890.82 2.02 0.505 2.02 0.505 -63.42 21.42 -41.58 890.82 -890.82 2.02 0.5 2.02 0.5 +63.42 21.42 -41.58 890.82 -890.82 2.02 0.505 2 0 +63.42 21.42 -41.58 890.82 -890.82 2.02 0.505 2 0 +63.42 21.42 -41.58 890.82 -890.82 2.02 0.505 2 0 +63.42 21.42 -41.58 890.82 -890.82 2.02 0.5 2 0 63 -21 42 882 -882 0 2 0 2 63 -21 42 882 -882 0 2 0 2 63 -21 42 882 -882 0 2 0 2 1.00305798474369219219752355409390731264 0.16305798474369219219752355409390731264 -1.490591730234615865843651857942052864 -1.38847100762815390390123822295304634368 1.38847100762815390390123822295304634368 -0.00000000000000000000000000000000000001 0.00000000000000000000000000000000000001 -63.42 -21.42 41.58 890.82 -890.82 0.495 1.98 0.495 1.98 +63.42 -21.42 41.58 890.82 -890.82 0.495 1.98 0 1 63.42 -21.42 41.58 890.82 -890.82 -63.42 -21.42 41.58 890.82 -890.82 0.495049504950495049 1.980198019801980198 0.495049504950495049 1.980198019801980198 -63.42 -21.42 41.58 890.82 -890.82 0.49 1.98 0.49 1.98 +63.42 -21.42 41.58 890.82 -890.82 0.495049504950495049 1.980198019801980198 0 1 +63.42 -21.42 41.58 890.82 -890.82 0.49 1.98 0 1 -42 42 42 42 0.42 0.42 0.42 42.42 42.42 42.42 0 0 0 0 0 0 0 0 0 0 42 -42 -42 -42 -0.42 -0.42 -0.42 -42.42 -42.42 -42.42 diff --git a/tests/queries/0_stateless/00751_default_databasename_for_view.reference b/tests/queries/0_stateless/00751_default_databasename_for_view.reference index 4899e230924..2cd5019defa 100644 --- a/tests/queries/0_stateless/00751_default_databasename_for_view.reference +++ b/tests/queries/0_stateless/00751_default_databasename_for_view.reference @@ -6,8 +6,8 @@ CREATE MATERIALIZED VIEW default.t_mv_00751 ) ENGINE = MergeTree ORDER BY date -SETTINGS index_granularity = 8192 AS -SELECT +SETTINGS index_granularity = 8192 +AS SELECT date, platform, app diff --git a/tests/queries/0_stateless/00752_low_cardinality_lambda_argument.sql b/tests/queries/0_stateless/00752_low_cardinality_lambda_argument.sql index a4bdbd5653c..998ff2f54d3 100644 --- a/tests/queries/0_stateless/00752_low_cardinality_lambda_argument.sql +++ b/tests/queries/0_stateless/00752_low_cardinality_lambda_argument.sql @@ -1,3 +1,4 @@ +set allow_suspicious_low_cardinality_types=1; drop table if exists lc_lambda; create table lc_lambda (arr Array(LowCardinality(UInt64))) engine = Memory; insert into lc_lambda select range(number) from system.numbers limit 10; diff --git a/tests/queries/0_stateless/00752_low_cardinality_left_array_join.sql b/tests/queries/0_stateless/00752_low_cardinality_left_array_join.sql index 1c19700e34d..2d65f01a1b9 100644 --- a/tests/queries/0_stateless/00752_low_cardinality_left_array_join.sql +++ b/tests/queries/0_stateless/00752_low_cardinality_left_array_join.sql @@ -1,3 +1,4 @@ +set allow_suspicious_low_cardinality_types=1; drop table if exists lc_left_aj; CREATE TABLE lc_left_aj ( diff --git a/tests/queries/0_stateless/00804_test_delta_codec_compression.sql b/tests/queries/0_stateless/00804_test_delta_codec_compression.sql index 01a2f53bf93..0b289ad7c1d 100644 --- a/tests/queries/0_stateless/00804_test_delta_codec_compression.sql +++ b/tests/queries/0_stateless/00804_test_delta_codec_compression.sql @@ -16,6 +16,8 @@ CREATE TABLE default_codec_synthetic id UInt64 Codec(ZSTD(3)) ) ENGINE MergeTree() ORDER BY tuple() SETTINGS min_bytes_for_wide_part = 0, compress_marks = false, compress_primary_key = false, ratio_of_defaults_for_sparse_serialization = 1; +set max_insert_threads = 1; + INSERT INTO delta_codec_synthetic SELECT number FROM system.numbers LIMIT 5000000; INSERT INTO default_codec_synthetic SELECT number FROM system.numbers LIMIT 5000000; diff --git a/tests/queries/0_stateless/00808_not_optimize_predicate.reference b/tests/queries/0_stateless/00808_not_optimize_predicate.reference index 3110b82db89..647c6d91890 100644 --- a/tests/queries/0_stateless/00808_not_optimize_predicate.reference +++ b/tests/queries/0_stateless/00808_not_optimize_predicate.reference @@ -24,6 +24,6 @@ FROM n, finalizeAggregation(s) FROM test_00808_push_down_with_finalizeAggregation - PREWHERE (n <= 5) AND (n >= 2) + WHERE (n <= 5) AND (n >= 2) ) WHERE (n >= 2) AND (n <= 5) diff --git a/tests/queries/0_stateless/00849_multiple_comma_join_2.sql b/tests/queries/0_stateless/00849_multiple_comma_join_2.sql index 51bf5a2ede1..6530f691087 100644 --- a/tests/queries/0_stateless/00849_multiple_comma_join_2.sql +++ b/tests/queries/0_stateless/00849_multiple_comma_join_2.sql @@ -62,49 +62,49 @@ SELECT countIf(explain like '%COMMA%' OR explain like '%CROSS%'), countIf(explai --- EXPLAIN QUERY TREE SELECT countIf(explain like '%COMMA%' OR explain like '%CROSS%'), countIf(explain like '%INNER%') FROM ( - EXPLAIN QUERY TREE SELECT t1.a FROM t1, t2 WHERE t1.a = t2.a SETTINGS allow_experimental_analyzer = 1); + EXPLAIN QUERY TREE SELECT t1.a FROM t1, t2 WHERE t1.a = t2.a) SETTINGS allow_experimental_analyzer = 1; SELECT countIf(explain like '%COMMA%' OR explain like '%CROSS%'), countIf(explain like '%INNER%') FROM ( - EXPLAIN QUERY TREE SELECT t1.a FROM t1, t2 WHERE t1.b = t2.b SETTINGS allow_experimental_analyzer = 1); + EXPLAIN QUERY TREE SELECT t1.a FROM t1, t2 WHERE t1.b = t2.b) SETTINGS allow_experimental_analyzer = 1; SELECT countIf(explain like '%COMMA%' OR explain like '%CROSS%'), countIf(explain like '%INNER%') FROM ( - EXPLAIN QUERY TREE SELECT t1.a FROM t1, t2, t3 WHERE t1.a = t2.a AND t1.a = t3.a SETTINGS allow_experimental_analyzer = 1); + EXPLAIN QUERY TREE SELECT t1.a FROM t1, t2, t3 WHERE t1.a = t2.a AND t1.a = t3.a) SETTINGS allow_experimental_analyzer = 1; SELECT countIf(explain like '%COMMA%' OR explain like '%CROSS%'), countIf(explain like '%INNER%') FROM ( - EXPLAIN QUERY TREE SELECT t1.a FROM t1, t2, t3 WHERE t1.b = t2.b AND t1.b = t3.b SETTINGS allow_experimental_analyzer = 1); + EXPLAIN QUERY TREE SELECT t1.a FROM t1, t2, t3 WHERE t1.b = t2.b AND t1.b = t3.b) SETTINGS allow_experimental_analyzer = 1; SELECT countIf(explain like '%COMMA%' OR explain like '%CROSS%'), countIf(explain like '%INNER%') FROM ( - EXPLAIN QUERY TREE SELECT t1.a FROM t1, t2, t3, t4 WHERE t1.a = t2.a AND t1.a = t3.a AND t1.a = t4.a SETTINGS allow_experimental_analyzer = 1); + EXPLAIN QUERY TREE SELECT t1.a FROM t1, t2, t3, t4 WHERE t1.a = t2.a AND t1.a = t3.a AND t1.a = t4.a) SETTINGS allow_experimental_analyzer = 1; SELECT countIf(explain like '%COMMA%' OR explain like '%CROSS%'), countIf(explain like '%INNER%') FROM ( - EXPLAIN QUERY TREE SELECT t1.a FROM t1, t2, t3, t4 WHERE t1.b = t2.b AND t1.b = t3.b AND t1.b = t4.b SETTINGS allow_experimental_analyzer = 1); + EXPLAIN QUERY TREE SELECT t1.a FROM t1, t2, t3, t4 WHERE t1.b = t2.b AND t1.b = t3.b AND t1.b = t4.b) SETTINGS allow_experimental_analyzer = 1; SELECT countIf(explain like '%COMMA%' OR explain like '%CROSS%'), countIf(explain like '%INNER%') FROM ( - EXPLAIN QUERY TREE SELECT t1.a FROM t1, t2, t3, t4 WHERE t2.a = t1.a AND t2.a = t3.a AND t2.a = t4.a SETTINGS allow_experimental_analyzer = 1); + EXPLAIN QUERY TREE SELECT t1.a FROM t1, t2, t3, t4 WHERE t2.a = t1.a AND t2.a = t3.a AND t2.a = t4.a) SETTINGS allow_experimental_analyzer = 1; SELECT countIf(explain like '%COMMA%' OR explain like '%CROSS%'), countIf(explain like '%INNER%') FROM ( - EXPLAIN QUERY TREE SELECT t1.a FROM t1, t2, t3, t4 WHERE t3.a = t1.a AND t3.a = t2.a AND t3.a = t4.a SETTINGS allow_experimental_analyzer = 1); + EXPLAIN QUERY TREE SELECT t1.a FROM t1, t2, t3, t4 WHERE t3.a = t1.a AND t3.a = t2.a AND t3.a = t4.a) SETTINGS allow_experimental_analyzer = 1; SELECT countIf(explain like '%COMMA%' OR explain like '%CROSS%'), countIf(explain like '%INNER%') FROM ( - EXPLAIN QUERY TREE SELECT t1.a FROM t1, t2, t3, t4 WHERE t4.a = t1.a AND t4.a = t2.a AND t4.a = t3.a SETTINGS allow_experimental_analyzer = 1); + EXPLAIN QUERY TREE SELECT t1.a FROM t1, t2, t3, t4 WHERE t4.a = t1.a AND t4.a = t2.a AND t4.a = t3.a) SETTINGS allow_experimental_analyzer = 1; SELECT countIf(explain like '%COMMA%' OR explain like '%CROSS%'), countIf(explain like '%INNER%') FROM ( - EXPLAIN QUERY TREE SELECT t1.a FROM t1, t2, t3, t4 WHERE t1.a = t2.a AND t2.a = t3.a AND t3.a = t4.a SETTINGS allow_experimental_analyzer = 1); + EXPLAIN QUERY TREE SELECT t1.a FROM t1, t2, t3, t4 WHERE t1.a = t2.a AND t2.a = t3.a AND t3.a = t4.a) SETTINGS allow_experimental_analyzer = 1; SELECT countIf(explain like '%COMMA%' OR explain like '%CROSS%'), countIf(explain like '%INNER%') FROM ( - EXPLAIN QUERY TREE SELECT t1.a FROM t1, t2, t3, t4 SETTINGS allow_experimental_analyzer = 1); + EXPLAIN QUERY TREE SELECT t1.a FROM t1, t2, t3, t4) SETTINGS allow_experimental_analyzer = 1; SELECT countIf(explain like '%COMMA%' OR explain like '%CROSS%'), countIf(explain like '%INNER%') FROM ( - EXPLAIN QUERY TREE SELECT t1.a FROM t1 CROSS JOIN t2 CROSS JOIN t3 CROSS JOIN t4 SETTINGS allow_experimental_analyzer = 1); + EXPLAIN QUERY TREE SELECT t1.a FROM t1 CROSS JOIN t2 CROSS JOIN t3 CROSS JOIN t4) SETTINGS allow_experimental_analyzer = 1; SELECT countIf(explain like '%COMMA%' OR explain like '%CROSS%'), countIf(explain like '%INNER%') FROM ( - EXPLAIN QUERY TREE SELECT t1.a FROM t1, t2 CROSS JOIN t3 SETTINGS allow_experimental_analyzer = 1); + EXPLAIN QUERY TREE SELECT t1.a FROM t1, t2 CROSS JOIN t3) SETTINGS allow_experimental_analyzer = 1; SELECT countIf(explain like '%COMMA%' OR explain like '%CROSS%'), countIf(explain like '%INNER%') FROM ( - EXPLAIN QUERY TREE SELECT t1.a FROM t1 JOIN t2 USING a CROSS JOIN t3 SETTINGS allow_experimental_analyzer = 1); + EXPLAIN QUERY TREE SELECT t1.a FROM t1 JOIN t2 USING a CROSS JOIN t3) SETTINGS allow_experimental_analyzer = 1; SELECT countIf(explain like '%COMMA%' OR explain like '%CROSS%'), countIf(explain like '%INNER%') FROM ( - EXPLAIN QUERY TREE SELECT t1.a FROM t1 JOIN t2 ON t1.a = t2.a CROSS JOIN t3 SETTINGS allow_experimental_analyzer = 1); + EXPLAIN QUERY TREE SELECT t1.a FROM t1 JOIN t2 ON t1.a = t2.a CROSS JOIN t3) SETTINGS allow_experimental_analyzer = 1; INSERT INTO t1 values (1,1), (2,2), (3,3), (4,4); INSERT INTO t2 values (1,1), (1, Null); diff --git a/tests/queries/0_stateless/00873_t64_codec_date.reference b/tests/queries/0_stateless/00873_t64_codec_date.reference new file mode 100644 index 00000000000..9353696610c --- /dev/null +++ b/tests/queries/0_stateless/00873_t64_codec_date.reference @@ -0,0 +1,4 @@ +1970-01-01 1970-01-01 1950-01-01 1950-01-01 +1970-01-01 1970-01-01 1970-01-01 1970-01-01 +2149-06-06 2149-06-06 2149-06-06 2149-06-06 +2149-06-06 2149-06-06 2149-06-08 2149-06-08 diff --git a/tests/queries/0_stateless/00873_t64_codec_date.sql b/tests/queries/0_stateless/00873_t64_codec_date.sql new file mode 100644 index 00000000000..c6e21baba12 --- /dev/null +++ b/tests/queries/0_stateless/00873_t64_codec_date.sql @@ -0,0 +1,26 @@ +DROP TABLE IF EXISTS t64; + +CREATE TABLE t64 +( + date16 Date, + t_date16 Date Codec(T64, ZSTD), + date_32 Date32, + t_date32 Date32 Codec(T64, ZSTD) +) ENGINE MergeTree() ORDER BY tuple(); + +INSERT INTO t64 values ('1970-01-01', '1970-01-01', '1970-01-01', '1970-01-01'); +INSERT INTO t64 values ('2149-06-06', '2149-06-06', '2149-06-06', '2149-06-06'); +INSERT INTO t64 values ('2149-06-08', '2149-06-08', '2149-06-08', '2149-06-08'); +INSERT INTO t64 values ('1950-01-01', '1950-01-01', '1950-01-01', '1950-01-01'); + +SELECT * FROM t64 ORDER BY date_32; + +SELECT * FROM t64 WHERE date16 != t_date16; +SELECT * FROM t64 WHERE date_32 != t_date32; + +OPTIMIZE TABLE t64 FINAL; + +SELECT * FROM t64 WHERE date16 != t_date16; +SELECT * FROM t64 WHERE date_32 != t_date32; + +DROP TABLE t64; diff --git a/tests/queries/0_stateless/00909_arrayEnumerateUniq.sql b/tests/queries/0_stateless/00909_arrayEnumerateUniq.sql index 33097c99272..0bdb338e9d2 100644 --- a/tests/queries/0_stateless/00909_arrayEnumerateUniq.sql +++ b/tests/queries/0_stateless/00909_arrayEnumerateUniq.sql @@ -181,15 +181,15 @@ SELECT arrayEnumerateUniqRanked([1,2], 1, 2); -- { serverError 36 } SELECT arrayEnumerateUniqRanked([1,2], 1, 3, 4, 5); -- { serverError 36 } SELECT arrayEnumerateUniqRanked([1,2], 1, 3, [4], 5); -- { serverError 36 } SELECT arrayEnumerateDenseRanked([[[[[[[[[[42]]]]]]]]]]); -SELECT arrayEnumerateUniqRanked('wat', [1,2]); -- { serverError 170 } -SELECT arrayEnumerateUniqRanked(1, [1,2], 'boom'); -- { serverError 170 } -SELECT arrayEnumerateDenseRanked(['\0'], -8363126); -- { serverError 170 } -SELECT arrayEnumerateDenseRanked(-10, ['\0'], -8363126); -- { serverError 170 } -SELECT arrayEnumerateDenseRanked(1, ['\0'], -8363126); -- { serverError 170 } -SELECT arrayEnumerateDenseRanked(-101, ['\0']); -- { serverError 170 } -SELECT arrayEnumerateDenseRanked(1.1, [10,20,10,30]); -- { serverError 170 } -SELECT arrayEnumerateDenseRanked([10,20,10,30], 0.4); -- { serverError 170 } -SELECT arrayEnumerateDenseRanked([10,20,10,30], 1.8); -- { serverError 170 } +SELECT arrayEnumerateUniqRanked('wat', [1,2]); -- { serverError BAD_ARGUMENTS } +SELECT arrayEnumerateUniqRanked(1, [1,2], 'boom'); -- { serverError BAD_ARGUMENTS } +SELECT arrayEnumerateDenseRanked(['\0'], -8363126); -- { serverError BAD_ARGUMENTS } +SELECT arrayEnumerateDenseRanked(-10, ['\0'], -8363126); -- { serverError BAD_ARGUMENTS } +SELECT arrayEnumerateDenseRanked(1, ['\0'], -8363126); -- { serverError BAD_ARGUMENTS } +SELECT arrayEnumerateDenseRanked(-101, ['\0']); -- { serverError BAD_ARGUMENTS } +SELECT arrayEnumerateDenseRanked(1.1, [10,20,10,30]); -- { serverError BAD_ARGUMENTS } +SELECT arrayEnumerateDenseRanked([10,20,10,30], 0.4); -- { serverError BAD_ARGUMENTS } +SELECT arrayEnumerateDenseRanked([10,20,10,30], 1.8); -- { serverError BAD_ARGUMENTS } SELECT arrayEnumerateUniqRanked(1, [], 1000000000); -- { serverError 36 } diff --git a/tests/queries/0_stateless/00916_create_or_replace_view.reference b/tests/queries/0_stateless/00916_create_or_replace_view.reference index 50323e47556..66aefd5cf46 100644 --- a/tests/queries/0_stateless/00916_create_or_replace_view.reference +++ b/tests/queries/0_stateless/00916_create_or_replace_view.reference @@ -1,2 +1,2 @@ -CREATE VIEW default.t\n(\n `number` UInt64\n) AS\nSELECT number\nFROM system.numbers -CREATE VIEW default.t\n(\n `next_number` UInt64\n) AS\nSELECT number + 1 AS next_number\nFROM system.numbers +CREATE VIEW default.t\n(\n `number` UInt64\n)\nAS SELECT number\nFROM system.numbers +CREATE VIEW default.t\n(\n `next_number` UInt64\n)\nAS SELECT number + 1 AS next_number\nFROM system.numbers diff --git a/tests/queries/0_stateless/00940_order_by_read_in_order_query_plan.reference b/tests/queries/0_stateless/00940_order_by_read_in_order_query_plan.reference index c827d208387..4c1a3fd5c15 100644 --- a/tests/queries/0_stateless/00940_order_by_read_in_order_query_plan.reference +++ b/tests/queries/0_stateless/00940_order_by_read_in_order_query_plan.reference @@ -293,8 +293,8 @@ select * from (select * from tab where (a + b) * c = 8 union all select * from t select * from (explain plan actions = 1 select * from (select * from tab where (a + b) * c = 8 union all select * from tab3 where (a + b) * c = 18) order by sin(a / b)) where explain like '%sort description%' or explain like '%ReadType%'; Prefix sort description: sin(divide(a, b)) ASC Result sort description: sin(divide(a, b)) ASC - ReadType: InOrder - ReadType: InOrder + ReadType: InOrder + ReadType: InOrder select * from (select * from tab where (a + b) * c = 8 union all select * from tab4) order by sin(a / b); 2 2 2 2 2 2 2 2 @@ -311,7 +311,7 @@ select * from (select * from tab where (a + b) * c = 8 union all select * from t select * from (explain plan actions = 1 select * from (select * from tab where (a + b) * c = 8 union all select * from tab4) order by sin(a / b)) where explain like '%sort description%' or explain like '%ReadType%'; Prefix sort description: sin(divide(a, b)) ASC Result sort description: sin(divide(a, b)) ASC - ReadType: InOrder + ReadType: InOrder ReadType: InOrder select * from (select * from tab union all select * from tab5) order by (a + b) * c; 0 0 0 0 diff --git a/tests/queries/0_stateless/00945_bloom_filter_index.sql b/tests/queries/0_stateless/00945_bloom_filter_index.sql index dc47e858c4d..faa7feda04d 100644 --- a/tests/queries/0_stateless/00945_bloom_filter_index.sql +++ b/tests/queries/0_stateless/00945_bloom_filter_index.sql @@ -1,3 +1,4 @@ +SET allow_suspicious_low_cardinality_types=1; DROP TABLE IF EXISTS single_column_bloom_filter; diff --git a/tests/queries/0_stateless/00974_query_profiler.sql b/tests/queries/0_stateless/00974_query_profiler.sql index b697bd56800..24e4241b813 100644 --- a/tests/queries/0_stateless/00974_query_profiler.sql +++ b/tests/queries/0_stateless/00974_query_profiler.sql @@ -1,4 +1,4 @@ --- Tags: no-tsan, no-asan, no-ubsan, no-msan, no-debug, no-fasttest, no-cpu-aarch64 +-- Tags: no-tsan, no-asan, no-ubsan, no-msan, no-debug, no-fasttest -- Tag no-fasttest: Not sure why fail even in sequential mode. Disabled for now to make some progress. SET allow_introspection_functions = 1; diff --git a/tests/queries/0_stateless/00984_parser_stack_overflow.reference b/tests/queries/0_stateless/00984_parser_stack_overflow.reference index 0cf6a1f96df..e28ada842c0 100644 --- a/tests/queries/0_stateless/00984_parser_stack_overflow.reference +++ b/tests/queries/0_stateless/00984_parser_stack_overflow.reference @@ -1,4 +1,6 @@ -exceeded -exceeded +1 +1 +0 +0 20002 1 diff --git a/tests/queries/0_stateless/00984_parser_stack_overflow.sh b/tests/queries/0_stateless/00984_parser_stack_overflow.sh index a7854b91ee2..dc836388cf6 100755 --- a/tests/queries/0_stateless/00984_parser_stack_overflow.sh +++ b/tests/queries/0_stateless/00984_parser_stack_overflow.sh @@ -9,10 +9,10 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CURDIR"/../shell_config.sh # Too deep recursion -perl -e 'print "(" x 10000' | $CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL" --data-binary @- | grep -oF 'exceeded' -perl -e 'print "SELECT " . ("[" x 10000)' | $CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL" --data-binary @- | grep -oF 'exceeded' -perl -e 'print "SELECT " . ("([" x 5000)' | $CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL" --data-binary @- | grep -oF 'exceeded' -perl -e 'print "SELECT 1" . ("+1" x 10000)' | $CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL" --data-binary @- | grep -oF 'exceeded' +perl -e 'print "(" x 10000' | $CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL" --data-binary @- | grep -cP 'exceeded|too large' +perl -e 'print "SELECT " . ("[" x 10000)' | $CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL" --data-binary @- | grep -cP 'exceeded|too large' +perl -e 'print "SELECT " . ("([" x 5000)' | $CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL" --data-binary @- | grep -cP 'exceeded|too large' +perl -e 'print "SELECT 1" . ("+1" x 10000)' | $CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL" --data-binary @- | grep -cP 'exceeded|too large' # But this is Ok perl -e 'print "SELECT 1" . (",1" x 10000)' | $CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL" --data-binary @- | wc -c diff --git a/tests/queries/0_stateless/01019_Buffer_and_max_memory_usage.sql b/tests/queries/0_stateless/01019_Buffer_and_max_memory_usage.sql index 9fbc3f951fe..777effe9e81 100644 --- a/tests/queries/0_stateless/01019_Buffer_and_max_memory_usage.sql +++ b/tests/queries/0_stateless/01019_Buffer_and_max_memory_usage.sql @@ -25,6 +25,7 @@ CREATE TABLE buffer_ (key UInt64) Engine=Buffer(currentDatabase(), null_, SET max_memory_usage=10e6; SET max_block_size=100e3; +SET max_insert_threads=1; -- Check that max_memory_usage is ignored only on flush and not on squash SET min_insert_block_size_bytes=9e6; diff --git a/tests/queries/0_stateless/01023_materialized_view_query_context.sql b/tests/queries/0_stateless/01023_materialized_view_query_context.sql index 21569f375df..f8c282fedd5 100644 --- a/tests/queries/0_stateless/01023_materialized_view_query_context.sql +++ b/tests/queries/0_stateless/01023_materialized_view_query_context.sql @@ -1,4 +1,16 @@ --- Tags: no-parallel +-- Tags: no-parallel, no-replicated-database + +-- FIXME: old analyzer does not check db exist, new one checks it and test fails. test is suppressed for replicated. +-- without analyzer: +-- 2024.02.22 18:55:00.320120 [ 116105 ] {61f04f21-6d66-4064-926f-20657de2e66c} executeQuery: (from 0.0.0.0:0, user: ) (comment: 01023_materialized_view_query_context.sql) /* ddl_entry=query-0000000009 */ CREATE MATERIALIZED VIEW test_143n70zj.mv UUID '0572ef25-139a-4705-a213-601675435648' TO test_143n70zj.output (`key` UInt64, `val` UInt64) AS SELECT key, dictGetUInt64('dict_in_01023.dict', 'val', key) AS val FROM test_143n70zj.dist_out (stage: Complete) +-- 2024.02.22 18:55:00.321303 [ 116105 ] {61f04f21-6d66-4064-926f-20657de2e66c} DDLWorker(test_143n70zj): Executed query: /* ddl_entry=query-0000000009 */ CREATE MATERIALIZED VIEW test_143n70zj.mv UUID '0572ef25-139a-4705-a213-601675435648' TO test_143n70zj.output (`key` UInt64, `val` UInt64) AS SELECT key, dictGetUInt64('dict_in_01023.dict', 'val', key) AS val FROM test_143n70zj.dist_out +-- +-- with analyzer: +-- 2024.02.22 19:33:36.266538 [ 108818 ] {0e1586f5-8ae0-4065-81b7-1e7d43b85d82} executeQuery: (from 0.0.0.0:0, user: ) (comment: 01023_materialized_view_query_context.sql) /* ddl_entry=query-0000000009 */ CREATE MATERIALIZED VIEW test_devov0ke.mv UUID 'bf3a2bfe-1446-4a02-b760-bae514488c5a' TO test_devov0ke.output (`key` UInt64, `val` UInt64) AS SELECT key, dictGetUInt64('dict_in_01023.dict', 'val', key) AS val FROM test_devov0ke.dist_out (stage: Complete) +-- 2024.02.22 19:33:36.266796 [ 108818 ] {0e1586f5-8ae0-4065-81b7-1e7d43b85d82} Planner: Query SELECT __table1.key AS key, dictGetUInt64('dict_in_01023.dict', 'val', __table1.key) AS val FROM test_devov0ke.dist_out AS __table1 to stage Complete only analyze +-- 2024.02.22 19:33:36.266855 [ 108818 ] {0e1586f5-8ae0-4065-81b7-1e7d43b85d82} Planner: Query SELECT __table1.key AS key, dictGetUInt64('dict_in_01023.dict', 'val', __table1.key) AS val FROM test_devov0ke.dist_out AS __table1 from stage FetchColumns to stage Complete only analyze +-- 2024.02.22 19:33:36.280740 [ 108818 ] {0e1586f5-8ae0-4065-81b7-1e7d43b85d82} executeQuery: Code: 36. DB::Exception: Dictionary (`dict_in_01023.dict`) not found. (BAD_ARGUMENTS) (version 24.2.1.1429 (official build)) (from 0.0.0.0:0) (comment: 01023_materialized_view_query_context.sql) (in query: /* ddl_entry=query-0000000009 */ CREATE MATERIALIZED VIEW test_devov0ke.mv UUID 'bf3a2bfe-1446-4a02-b760-bae514488c5a' TO test_devov0ke.output (`key` UInt64, `val` UInt64) AS SELECT key, dictGetUInt64('dict_in_01023.dict', 'val', key) AS val FROM test_devov0ke.dist_out), Stack trace (when copying this message, always include the lines below): +-- 2024.02.22 19:33:36.280936 [ 108818 ] {0e1586f5-8ae0-4065-81b7-1e7d43b85d82} DDLWorker(test_devov0ke): Query /* ddl_entry=query-0000000009 */ CREATE MATERIALIZED VIEW test_devov0ke.mv UUID 'bf3a2bfe-1446-4a02-b760-bae514488c5a' TO test_devov0ke.output (`key` UInt64, `val` UInt64) AS SELECT key, dictGetUInt64('dict_in_01023.dict', 'val', key) AS val FROM test_devov0ke.dist_out wasn't finished successfully: Code: 36. DB::Exception: Dictionary (`dict_in_01023.dict`) not found. (BAD_ARGUMENTS), Stack trace (when copying this message, always include the lines below): -- Create dictionary, since dictGet*() uses DB::Context in executeImpl() -- (To cover scope of the Context in PushingToViews chain) diff --git a/tests/queries/0_stateless/01029_early_constant_folding.reference b/tests/queries/0_stateless/01029_early_constant_folding.reference index abcb2ddc6a7..4df5414ba4a 100644 --- a/tests/queries/0_stateless/01029_early_constant_folding.reference +++ b/tests/queries/0_stateless/01029_early_constant_folding.reference @@ -2,7 +2,7 @@ SELECT 1 WHERE 0 SELECT 1 SELECT 1 -WHERE (1 IN (0, 2)) AND (2 = (identity(_CAST(2, \'Nullable(UInt8)\')) AS subquery)) +WHERE (1 IN (0, 2)) AND (2 = (__scalarSubqueryResult(_CAST(2, \'Nullable(UInt8)\')) AS subquery)) SELECT 1 WHERE 1 IN (( SELECT arrayJoin([1, 2, 3]) diff --git a/tests/queries/0_stateless/01030_storage_url_syntax.sql b/tests/queries/0_stateless/01030_storage_url_syntax.sql index 9b31558eece..eda108aca2f 100644 --- a/tests/queries/0_stateless/01030_storage_url_syntax.sql +++ b/tests/queries/0_stateless/01030_storage_url_syntax.sql @@ -1,7 +1,7 @@ drop table if exists test_table_url_syntax ; create table test_table_url_syntax (id UInt32) ENGINE = URL('') -; -- { serverError 36 } +; -- { serverError UNSUPPORTED_URI_SCHEME } create table test_table_url_syntax (id UInt32) ENGINE = URL('','','','') ; -- { serverError 42 } drop table if exists test_table_url_syntax @@ -11,7 +11,7 @@ drop table if exists test_table_url ; create table test_table_url(id UInt32) ENGINE = URL('http://localhost/endpoint') -; -- { serverError 36 } +; -- { serverError CANNOT_DETECT_FORMAT } create table test_table_url(id UInt32) ENGINE = URL('http://localhost/endpoint.json'); drop table test_table_url; diff --git a/tests/queries/0_stateless/01033_function_substring.reference b/tests/queries/0_stateless/01033_function_substring.reference index b0fac36e24a..362a14f80f3 100644 --- a/tests/queries/0_stateless/01033_function_substring.reference +++ b/tests/queries/0_stateless/01033_function_substring.reference @@ -170,4 +170,6 @@ g -UBSAN bug +-- UBSAN bug +-- Alias +el diff --git a/tests/queries/0_stateless/01033_function_substring.sql b/tests/queries/0_stateless/01033_function_substring.sql index 82c6b5859e2..9955700f302 100644 --- a/tests/queries/0_stateless/01033_function_substring.sql +++ b/tests/queries/0_stateless/01033_function_substring.sql @@ -132,7 +132,7 @@ SELECT substring(s, l, r) FROM t; DROP table if exists t; -SELECT 'UBSAN bug'; +SELECT '-- UBSAN bug'; /** NOTE: The behaviour of substring and substringUTF8 is inconsistent when negative offset is greater than string size: * substring: @@ -144,3 +144,6 @@ SELECT 'UBSAN bug'; * This may be subject for change. */ SELECT substringUTF8('hello, ÿÑ�øòõÑ�', -9223372036854775808, number) FROM numbers(16) FORMAT Null; + +SELECT '-- Alias'; +SELECT byteSlice('hello', 2, 2); diff --git a/tests/queries/0_stateless/01047_window_view_parser_inner_table.sql b/tests/queries/0_stateless/01047_window_view_parser_inner_table.sql index e292447512c..f17f3ac63b0 100644 --- a/tests/queries/0_stateless/01047_window_view_parser_inner_table.sql +++ b/tests/queries/0_stateless/01047_window_view_parser_inner_table.sql @@ -1,8 +1,9 @@ - +SET send_logs_level = 'fatal'; SET allow_experimental_analyzer = 0; SET allow_experimental_window_view = 1; DROP DATABASE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}; set allow_deprecated_database_ordinary=1; +-- Creation of a database with Ordinary engine emits a warning. CREATE DATABASE {CLICKHOUSE_DATABASE:Identifier} ENGINE=Ordinary; DROP TABLE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}.mt; diff --git a/tests/queries/0_stateless/01048_window_view_parser.sql b/tests/queries/0_stateless/01048_window_view_parser.sql index f87d9aa023e..adcb4a6364d 100644 --- a/tests/queries/0_stateless/01048_window_view_parser.sql +++ b/tests/queries/0_stateless/01048_window_view_parser.sql @@ -1,9 +1,11 @@ -- Tags: no-parallel +SET send_logs_level = 'fatal'; SET allow_experimental_analyzer = 0; SET allow_experimental_window_view = 1; DROP DATABASE IF EXISTS test_01048; set allow_deprecated_database_ordinary=1; +-- Creation of a database with Ordinary engine emits a warning. CREATE DATABASE test_01048 ENGINE=Ordinary; DROP TABLE IF EXISTS test_01048.mt; diff --git a/tests/queries/0_stateless/01053_drop_database_mat_view.sql b/tests/queries/0_stateless/01053_drop_database_mat_view.sql index 2642430eb05..6ab31fce644 100644 --- a/tests/queries/0_stateless/01053_drop_database_mat_view.sql +++ b/tests/queries/0_stateless/01053_drop_database_mat_view.sql @@ -1,6 +1,8 @@ +SET send_logs_level = 'fatal'; DROP DATABASE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}; set allow_deprecated_database_ordinary=1; +-- Creation of a database with Ordinary engine emits a warning. CREATE DATABASE {CLICKHOUSE_DATABASE:Identifier} ENGINE=Ordinary; -- Different inner table name with Atomic set allow_deprecated_syntax_for_merge_tree=1; diff --git a/tests/queries/0_stateless/01064_incremental_streaming_from_2_src_with_feedback.sql b/tests/queries/0_stateless/01064_incremental_streaming_from_2_src_with_feedback.sql index ae90dc3cc72..f5ac347cfff 100644 --- a/tests/queries/0_stateless/01064_incremental_streaming_from_2_src_with_feedback.sql +++ b/tests/queries/0_stateless/01064_incremental_streaming_from_2_src_with_feedback.sql @@ -25,7 +25,8 @@ AS minState( toUInt64(-1) ) as fastest_session, maxState( toUInt64(0) ) as biggest_inactivity_period FROM numbers(50000) -GROUP BY id; +GROUP BY id +SETTINGS max_insert_threads=1; -- source table #1 diff --git a/tests/queries/0_stateless/01073_grant_and_revoke.reference b/tests/queries/0_stateless/01073_grant_and_revoke.reference index 449f21e896a..b91820914e6 100644 --- a/tests/queries/0_stateless/01073_grant_and_revoke.reference +++ b/tests/queries/0_stateless/01073_grant_and_revoke.reference @@ -4,12 +4,12 @@ B C GRANT INSERT, ALTER DELETE ON *.* TO test_user_01073 GRANT SELECT ON db1.* TO test_user_01073 -GRANT SELECT ON db2.table TO test_user_01073 -GRANT SELECT(col1) ON db3.table TO test_user_01073 -GRANT SELECT(col1, col2) ON db4.table TO test_user_01073 +GRANT SELECT ON db2.`table` TO test_user_01073 +GRANT SELECT(col1) ON db3.`table` TO test_user_01073 +GRANT SELECT(col1, col2) ON db4.`table` TO test_user_01073 D GRANT ALTER DELETE ON *.* TO test_user_01073 -GRANT SELECT(col1) ON db4.table TO test_user_01073 +GRANT SELECT(col1) ON db4.`table` TO test_user_01073 E GRANT SELECT ON db1.* TO test_role_01073 REVOKE SELECT(c1, c2, c3, c4, c5) ON db1.table1 FROM test_role_01073 diff --git a/tests/queries/0_stateless/01074_partial_revokes.reference b/tests/queries/0_stateless/01074_partial_revokes.reference index 43e44f3c941..9ffa2878ad4 100644 --- a/tests/queries/0_stateless/01074_partial_revokes.reference +++ b/tests/queries/0_stateless/01074_partial_revokes.reference @@ -4,21 +4,21 @@ REVOKE SELECT ON db.* FROM test_user_01074 --cleanup --simple 2 GRANT SELECT ON db.* TO test_user_01074 -REVOKE SELECT ON db.table FROM test_user_01074 +REVOKE SELECT ON db.`table` FROM test_user_01074 --cleanup --simple 3 -GRANT SELECT ON db.table TO test_user_01074 -REVOKE SELECT(col1) ON db.table FROM test_user_01074 +GRANT SELECT ON db.`table` TO test_user_01074 +REVOKE SELECT(col1) ON db.`table` FROM test_user_01074 --cleanup --complex 1 GRANT SELECT ON *.* TO test_user_01074 -REVOKE SELECT(col1, col2) ON db.table FROM test_user_01074 +REVOKE SELECT(col1, col2) ON db.`table` FROM test_user_01074 --cleanup --complex 2 GRANT SELECT ON *.* TO test_user_01074 REVOKE SELECT ON db.* FROM test_user_01074 -GRANT SELECT ON db.table TO test_user_01074 -REVOKE SELECT(col1) ON db.table FROM test_user_01074 +GRANT SELECT ON db.`table` TO test_user_01074 +REVOKE SELECT(col1) ON db.`table` FROM test_user_01074 â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”┳â”â”â”â”â”â”â”â”â”â”â”┳â”â”â”â”â”â”â”â”â”â”â”â”â”┳â”â”â”â”â”â”â”â”â”â”┳â”â”â”â”â”â”â”┳â”â”â”â”â”â”â”â”┳â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”┳â”â”â”â”â”â”â”â”â”â”â”â”â”â”┓ ┃ user_name  ┃ role_name ┃ access_type ┃ database ┃ table ┃ column ┃ is_partial_revoke ┃ grant_option ┃ ┡â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”╇â”â”â”â”â”â”â”â”â”â”â”╇â”â”â”â”â”â”â”â”â”â”â”â”â”╇â”â”â”â”â”â”â”â”â”â”╇â”â”â”â”â”â”â”╇â”â”â”â”â”â”â”â”╇â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”╇â”â”â”â”â”â”â”â”â”â”â”â”â”â”┩ @@ -40,7 +40,7 @@ GRANT SELECT ON *.* TO test_user_01074 --cleanup --grant option 1 GRANT SELECT ON *.* TO test_user_01074 WITH GRANT OPTION -REVOKE GRANT OPTION FOR SELECT(col1) ON db.table FROM test_user_01074 +REVOKE GRANT OPTION FOR SELECT(col1) ON db.`table` FROM test_user_01074 â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”┳â”â”â”â”â”â”â”â”â”â”â”┳â”â”â”â”â”â”â”â”â”â”â”â”â”┳â”â”â”â”â”â”â”â”â”â”┳â”â”â”â”â”â”â”┳â”â”â”â”â”â”â”â”┳â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”┳â”â”â”â”â”â”â”â”â”â”â”â”â”â”┓ ┃ user_name  ┃ role_name ┃ access_type ┃ database ┃ table ┃ column ┃ is_partial_revoke ┃ grant_option ┃ ┡â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”╇â”â”â”â”â”â”â”â”â”â”â”╇â”â”â”â”â”â”â”â”â”â”â”â”â”╇â”â”â”â”â”â”â”â”â”â”╇â”â”â”â”â”â”â”╇â”â”â”â”â”â”â”â”╇â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”╇â”â”â”â”â”â”â”â”â”â”â”â”â”â”┩ @@ -51,7 +51,7 @@ REVOKE GRANT OPTION FOR SELECT(col1) ON db.table FROM test_user_01074 --cleanup --grant option 2 GRANT SELECT ON *.* TO test_user_01074 WITH GRANT OPTION -REVOKE SELECT(col1) ON db.table FROM test_user_01074 +REVOKE SELECT(col1) ON db.`table` FROM test_user_01074 --cleanup --grant option 3 GRANT SELECT ON *.* TO test_user_01074 diff --git a/tests/queries/0_stateless/01083_expressions_in_engine_arguments.reference b/tests/queries/0_stateless/01083_expressions_in_engine_arguments.reference index b25cfadd0ec..19db37f852a 100644 --- a/tests/queries/0_stateless/01083_expressions_in_engine_arguments.reference +++ b/tests/queries/0_stateless/01083_expressions_in_engine_arguments.reference @@ -6,6 +6,6 @@ CREATE TABLE default.distributed\n(\n `n` Int8\n)\nENGINE = Distributed(\'tes CREATE TABLE default.distributed_tf\n(\n `n` Int8\n) AS cluster(\'test_shard_localhost\', \'default\', \'buffer\') CREATE TABLE default.url\n(\n `n` UInt64,\n `col` String\n)\nENGINE = URL(\'https://localhost:8443/?query=select+n,+_table+from+default.merge+format+CSV\', \'CSV\') CREATE TABLE default.rich_syntax\n(\n `n` Int64\n) AS remote(\'localhos{x|y|t}\', cluster(\'test_shard_localhost\', remote(\'127.0.0.{1..4}\', \'default\', \'view\'))) -CREATE VIEW default.view\n(\n `n` Int64\n) AS\nSELECT toInt64(n) AS n\nFROM\n(\n SELECT toString(n) AS n\n FROM default.merge\n WHERE _table != \'qwerty\'\n ORDER BY _table ASC\n)\nUNION ALL\nSELECT *\nFROM default.file +CREATE VIEW default.view\n(\n `n` Int64\n)\nAS SELECT toInt64(n) AS n\nFROM\n(\n SELECT toString(n) AS n\n FROM default.merge\n WHERE _table != \'qwerty\'\n ORDER BY _table ASC\n)\nUNION ALL\nSELECT *\nFROM default.file CREATE DICTIONARY default.dict\n(\n `n` UInt64,\n `col` String DEFAULT \'42\'\n)\nPRIMARY KEY n\nSOURCE(CLICKHOUSE(HOST \'localhost\' PORT 9440 SECURE 1 USER \'default\' TABLE \'url\'))\nLIFETIME(MIN 0 MAX 1)\nLAYOUT(CACHE(SIZE_IN_CELLS 1)) 16 diff --git a/tests/queries/0_stateless/01085_window_view_attach.sql b/tests/queries/0_stateless/01085_window_view_attach.sql index 51a88a04f95..a40c6f20a1a 100644 --- a/tests/queries/0_stateless/01085_window_view_attach.sql +++ b/tests/queries/0_stateless/01085_window_view_attach.sql @@ -1,9 +1,11 @@ +SET send_logs_level = 'fatal'; SET allow_experimental_analyzer = 0; SET allow_experimental_window_view = 1; DROP DATABASE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}; set allow_deprecated_database_ordinary=1; +-- Creation of a database with Ordinary engine emits a warning. CREATE DATABASE {CLICKHOUSE_DATABASE:Identifier} ENGINE=Ordinary; DROP TABLE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}.mt; diff --git a/tests/queries/0_stateless/01086_window_view_cleanup.sh b/tests/queries/0_stateless/01086_window_view_cleanup.sh index b078b4718c0..01bf50bc14d 100755 --- a/tests/queries/0_stateless/01086_window_view_cleanup.sh +++ b/tests/queries/0_stateless/01086_window_view_cleanup.sh @@ -1,6 +1,9 @@ #!/usr/bin/env bash # Tags: no-parallel +# Creation of a database with Ordinary engine emits a warning. +CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL=fatal + CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh diff --git a/tests/queries/0_stateless/01092_memory_profiler.sql b/tests/queries/0_stateless/01092_memory_profiler.sql index b69d3faf94e..3a04de650ce 100644 --- a/tests/queries/0_stateless/01092_memory_profiler.sql +++ b/tests/queries/0_stateless/01092_memory_profiler.sql @@ -1,4 +1,4 @@ --- Tags: no-tsan, no-asan, no-ubsan, no-msan, no-debug, no-parallel, no-fasttest, no-cpu-aarch64 +-- Tags: no-tsan, no-asan, no-ubsan, no-msan, no-debug, no-parallel, no-fasttest SET allow_introspection_functions = 1; diff --git a/tests/queries/0_stateless/01109_exchange_tables.sql b/tests/queries/0_stateless/01109_exchange_tables.sql index c118945887b..b10377436f9 100644 --- a/tests/queries/0_stateless/01109_exchange_tables.sql +++ b/tests/queries/0_stateless/01109_exchange_tables.sql @@ -1,4 +1,5 @@ -- Tags: no-parallel +SET send_logs_level = 'fatal'; DROP DATABASE IF EXISTS test_01109; CREATE DATABASE test_01109 ENGINE=Atomic; @@ -31,6 +32,7 @@ DROP DATABASE IF EXISTS test_01109_other_atomic; DROP DATABASE IF EXISTS test_01109_ordinary; CREATE DATABASE test_01109_other_atomic; set allow_deprecated_database_ordinary=1; +-- Creation of a database with Ordinary engine emits a warning. CREATE DATABASE test_01109_ordinary ENGINE=Ordinary; CREATE TABLE test_01109_other_atomic.t3 ENGINE=MergeTree() ORDER BY tuple() @@ -63,6 +65,3 @@ DROP DATABASE test_01109; DROP DATABASE test_01109_other_atomic; DROP DATABASE test_01109_ordinary; DROP DATABASE test_01109_rename_exists; - - - diff --git a/tests/queries/0_stateless/01114_database_atomic.reference b/tests/queries/0_stateless/01114_database_atomic.reference index 93e89e3a2ec..f42cd099d4e 100644 --- a/tests/queries/0_stateless/01114_database_atomic.reference +++ b/tests/queries/0_stateless/01114_database_atomic.reference @@ -1,4 +1,4 @@ -2 +1 CREATE DATABASE test_01114_1\nENGINE = Atomic CREATE DATABASE test_01114_2\nENGINE = Atomic CREATE DATABASE test_01114_3\nENGINE = Ordinary diff --git a/tests/queries/0_stateless/01114_database_atomic.sh b/tests/queries/0_stateless/01114_database_atomic.sh index 3e1f9eb1f43..1b1f064ae0b 100755 --- a/tests/queries/0_stateless/01114_database_atomic.sh +++ b/tests/queries/0_stateless/01114_database_atomic.sh @@ -2,6 +2,9 @@ # Tags: no-parallel, no-fasttest # Tag no-fasttest: 45 seconds running +# Creation of a database with Ordinary engine emits a warning. +CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL=fatal + CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh diff --git a/tests/queries/0_stateless/01119_wierd_user_names.reference b/tests/queries/0_stateless/01119_weird_user_names.reference similarity index 100% rename from tests/queries/0_stateless/01119_wierd_user_names.reference rename to tests/queries/0_stateless/01119_weird_user_names.reference diff --git a/tests/queries/0_stateless/01119_wierd_user_names.sql b/tests/queries/0_stateless/01119_weird_user_names.sql similarity index 89% rename from tests/queries/0_stateless/01119_wierd_user_names.sql rename to tests/queries/0_stateless/01119_weird_user_names.sql index 7a28016f4f3..0d6f02786b0 100644 --- a/tests/queries/0_stateless/01119_wierd_user_names.sql +++ b/tests/queries/0_stateless/01119_weird_user_names.sql @@ -13,10 +13,10 @@ create user " "; create user ' spaces'; create user 'spaces '; create user ` INTERSERVER SECRET `; -- { serverError BAD_ARGUMENTS } -create user ''; -- { serverError BAD_ARGUMENTS } +create user ''; -- { clientError SYNTAX_ERROR } create user 'test 01119'; alter user `test 01119` rename to " spaces "; -alter user " spaces " rename to ''; -- { serverError BAD_ARGUMENTS } +alter user " spaces " rename to ''; -- { clientError SYNTAX_ERROR } alter user " spaces " rename to " INTERSERVER SECRET "; -- { serverError BAD_ARGUMENTS } create user "ВаÑÑ ÐŸÑƒÐ¿ÐºÐ¸Ð½"; create user "æ— åæ° "; diff --git a/tests/queries/0_stateless/01148_zookeeper_path_macros_unfolding.sql b/tests/queries/0_stateless/01148_zookeeper_path_macros_unfolding.sql index 505c406c2cc..fc3fcb34fc0 100644 --- a/tests/queries/0_stateless/01148_zookeeper_path_macros_unfolding.sql +++ b/tests/queries/0_stateless/01148_zookeeper_path_macros_unfolding.sql @@ -1,5 +1,7 @@ -- Tags: zookeeper, no-replicated-database, no-parallel, no-ordinary-database +SET send_logs_level = 'fatal'; + DROP TABLE IF EXISTS rmt; DROP TABLE IF EXISTS rmt1; DROP TABLE IF EXISTS rmt2; @@ -32,6 +34,7 @@ SHOW CREATE TABLE test_01148_atomic.rmt3; DROP DATABASE IF EXISTS test_01148_ordinary; set allow_deprecated_database_ordinary=1; +-- Creation of a database with Ordinary engine emits a warning. CREATE DATABASE test_01148_ordinary ENGINE=Ordinary; RENAME TABLE test_01148_atomic.rmt3 to test_01148_ordinary.rmt3; -- { serverError 48 } DROP DATABASE test_01148_ordinary; diff --git a/tests/queries/0_stateless/01153_attach_mv_uuid.reference b/tests/queries/0_stateless/01153_attach_mv_uuid.reference index e37fe28e303..ca0a4b6ddbe 100644 --- a/tests/queries/0_stateless/01153_attach_mv_uuid.reference +++ b/tests/queries/0_stateless/01153_attach_mv_uuid.reference @@ -4,18 +4,18 @@ 2 4 3 9 4 16 -CREATE MATERIALIZED VIEW default.mv UUID \'e15f3ab5-6cae-4df3-b879-f40deafd82c2\'\n(\n `n` Int32,\n `n2` Int64\n)\nENGINE = MergeTree\nPARTITION BY n % 10\nORDER BY n AS\nSELECT\n n,\n n * n AS n2\nFROM default.src +CREATE MATERIALIZED VIEW default.mv UUID \'e15f3ab5-6cae-4df3-b879-f40deafd82c2\'\n(\n `n` Int32,\n `n2` Int64\n)\nENGINE = MergeTree\nPARTITION BY n % 10\nORDER BY n\nAS SELECT\n n,\n n * n AS n2\nFROM default.src 1 1 2 4 -CREATE MATERIALIZED VIEW default.mv UUID \'e15f3ab5-6cae-4df3-b879-f40deafd82c2\'\n(\n `n` Int32,\n `n2` Int64\n)\nENGINE = MergeTree\nPARTITION BY n % 10\nORDER BY n AS\nSELECT\n n,\n n * n AS n2\nFROM default.src +CREATE MATERIALIZED VIEW default.mv UUID \'e15f3ab5-6cae-4df3-b879-f40deafd82c2\'\n(\n `n` Int32,\n `n2` Int64\n)\nENGINE = MergeTree\nPARTITION BY n % 10\nORDER BY n\nAS SELECT\n n,\n n * n AS n2\nFROM default.src 1 1 2 4 3 9 4 16 -CREATE MATERIALIZED VIEW default.mv UUID \'e15f3ab5-6cae-4df3-b879-f40deafd82c2\' TO INNER UUID \'3bd68e3c-2693-4352-ad66-a66eba9e345e\'\n(\n `n` Int32,\n `n2` Int64\n)\nENGINE = MergeTree\nPARTITION BY n % 10\nORDER BY n AS\nSELECT\n n,\n n * n AS n2\nFROM default.src +CREATE MATERIALIZED VIEW default.mv UUID \'e15f3ab5-6cae-4df3-b879-f40deafd82c2\' TO INNER UUID \'3bd68e3c-2693-4352-ad66-a66eba9e345e\'\n(\n `n` Int32,\n `n2` Int64\n)\nENGINE = MergeTree\nPARTITION BY n % 10\nORDER BY n\nAS SELECT\n n,\n n * n AS n2\nFROM default.src 1 1 2 4 -CREATE MATERIALIZED VIEW default.mv UUID \'e15f3ab5-6cae-4df3-b879-f40deafd82c2\' TO INNER UUID \'3bd68e3c-2693-4352-ad66-a66eba9e345e\'\n(\n `n` Int32,\n `n2` Int64\n)\nENGINE = MergeTree\nPARTITION BY n % 10\nORDER BY n AS\nSELECT\n n,\n n * n AS n2\nFROM default.src +CREATE MATERIALIZED VIEW default.mv UUID \'e15f3ab5-6cae-4df3-b879-f40deafd82c2\' TO INNER UUID \'3bd68e3c-2693-4352-ad66-a66eba9e345e\'\n(\n `n` Int32,\n `n2` Int64\n)\nENGINE = MergeTree\nPARTITION BY n % 10\nORDER BY n\nAS SELECT\n n,\n n * n AS n2\nFROM default.src 1 1 2 4 3 9 diff --git a/tests/queries/0_stateless/01155_rename_move_materialized_view.sql b/tests/queries/0_stateless/01155_rename_move_materialized_view.sql index 1eff1c0779a..80ed707b695 100644 --- a/tests/queries/0_stateless/01155_rename_move_materialized_view.sql +++ b/tests/queries/0_stateless/01155_rename_move_materialized_view.sql @@ -1,11 +1,13 @@ -- Tags: no-parallel +SET send_logs_level = 'fatal'; SET prefer_localhost_replica = 1; DROP DATABASE IF EXISTS test_01155_ordinary; DROP DATABASE IF EXISTS test_01155_atomic; set allow_deprecated_database_ordinary=1; +-- Creation of a database with Ordinary engine emits a warning. CREATE DATABASE test_01155_ordinary ENGINE=Ordinary; CREATE DATABASE test_01155_atomic ENGINE=Atomic; @@ -70,7 +72,10 @@ RENAME DATABASE test_01155_ordinary TO test_01155_atomic; SET check_table_dependencies=1; set allow_deprecated_database_ordinary=1; +-- Creation of a database with Ordinary engine emits a warning. +SET send_logs_level='fatal'; CREATE DATABASE test_01155_ordinary ENGINE=Ordinary; +SET send_logs_level='warning'; SHOW CREATE DATABASE test_01155_atomic; RENAME TABLE test_01155_atomic.mv1 TO test_01155_ordinary.mv1; diff --git a/tests/queries/0_stateless/01158_zookeeper_log_long.reference b/tests/queries/0_stateless/01158_zookeeper_log_long.reference index 547acb5b377..17c7ac84d1c 100644 --- a/tests/queries/0_stateless/01158_zookeeper_log_long.reference +++ b/tests/queries/0_stateless/01158_zookeeper_log_long.reference @@ -5,16 +5,18 @@ log ::ffff:127.0.0.1 Request 0 Create /test/01158/default/rmt/log/log- 0 1 \N 0 1 \N \N \N 0 0 0 0 ::ffff:127.0.0.1 Response 0 Create /test/01158/default/rmt/log/log- 0 1 \N 0 1 ZOK \N \N /test/01158/default/rmt/log/log-0000000000 0 0 0 0 parts -Request 0 Multi 0 0 \N 4 0 \N \N \N 0 0 0 0 +Request 0 Multi 0 0 \N 5 0 \N \N \N 0 0 0 0 Request 0 Create /test/01158/default/rmt/log/log- 0 1 \N 0 1 \N \N \N 0 0 0 0 Request 0 Remove /test/01158/default/rmt/block_numbers/all/block-0000000000 0 0 -1 0 2 \N \N \N 0 0 0 0 Request 0 Create /test/01158/default/rmt/blocks/all_6308706741995381342_2495791770474910886 0 0 \N 0 3 \N \N \N 0 0 0 0 Request 0 Create /test/01158/default/rmt/replicas/1/parts/all_0_0_0 0 0 \N 0 4 \N \N \N 0 0 0 0 -Response 0 Multi 0 0 \N 4 0 ZOK \N \N 0 0 0 0 +Request 0 Check /clickhouse/sessions/zookeeper/ 0 0 1 0 5 \N \N \N 0 0 0 0 +Response 0 Multi 0 0 \N 5 0 ZOK \N \N 0 0 0 0 Response 0 Create /test/01158/default/rmt/log/log- 0 1 \N 0 1 ZOK \N \N /test/01158/default/rmt/log/log-0000000000 0 0 0 0 Response 0 Remove /test/01158/default/rmt/block_numbers/all/block-0000000000 0 0 -1 0 2 ZOK \N \N 0 0 0 0 Response 0 Create /test/01158/default/rmt/blocks/all_6308706741995381342_2495791770474910886 0 0 \N 0 3 ZOK \N \N /test/01158/default/rmt/blocks/all_6308706741995381342_2495791770474910886 0 0 0 0 Response 0 Create /test/01158/default/rmt/replicas/1/parts/all_0_0_0 0 0 \N 0 4 ZOK \N \N /test/01158/default/rmt/replicas/1/parts/all_0_0_0 0 0 0 0 +Response 0 Check /clickhouse/sessions/zookeeper/ 0 0 1 0 5 ZOK \N \N 0 0 0 0 Request 0 Exists /test/01158/default/rmt/replicas/1/parts/all_0_0_0 0 0 \N 0 0 \N \N \N 0 0 0 0 Response 0 Exists /test/01158/default/rmt/replicas/1/parts/all_0_0_0 0 0 \N 0 0 ZOK \N \N 0 0 96 0 blocks diff --git a/tests/queries/0_stateless/01158_zookeeper_log_long.sql b/tests/queries/0_stateless/01158_zookeeper_log_long.sql index 24fd8060548..1dd7578a3e1 100644 --- a/tests/queries/0_stateless/01158_zookeeper_log_long.sql +++ b/tests/queries/0_stateless/01158_zookeeper_log_long.sql @@ -26,7 +26,7 @@ from system.zookeeper_log where path like '/test/01158/' || currentDatabase() || order by xid, type, request_idx; select 'parts'; -select type, has_watch, op_num, path, is_ephemeral, is_sequential, version, requests_size, request_idx, error, watch_type, +select type, has_watch, op_num, replace(path, toString(serverUUID()), ''), is_ephemeral, is_sequential, if(startsWith(path, '/clickhouse/sessions'), 1, version), requests_size, request_idx, error, watch_type, watch_state, path_created, stat_version, stat_cversion, stat_dataLength, stat_numChildren from system.zookeeper_log where (session_id, xid) in (select session_id, xid from system.zookeeper_log where path='/test/01158/' || currentDatabase() || '/rmt/replicas/1/parts/all_0_0_0') diff --git a/tests/queries/0_stateless/01175_distributed_ddl_output_mode_long.reference b/tests/queries/0_stateless/01175_distributed_ddl_output_mode_long.reference index b9a66a1e1a9..2151328d8b7 100644 --- a/tests/queries/0_stateless/01175_distributed_ddl_output_mode_long.reference +++ b/tests/queries/0_stateless/01175_distributed_ddl_output_mode_long.reference @@ -3,7 +3,7 @@ Received exception from server: Code: 57. Error: Received from localhost:9000. Error: There was an error on [localhost:9000]: Code: 57. Error: Table default.none already exists. (TABLE_ALREADY_EXISTS) (query: create table none on cluster test_shard_localhost (n int) engine=Memory;) Received exception from server: -Code: 159. Error: Received from localhost:9000. Error: Watching task is executing longer than distributed_ddl_task_timeout (=1) seconds. There are 1 unfinished hosts (0 of them are currently executing the task), they are going to execute the query in background. (TIMEOUT_EXCEEDED) +Code: 159. Error: Received from localhost:9000. Error: Distributed DDL task is not finished on 1 of 2 hosts (0 of them are currently executing the task, 0 are inactive). They are going to execute the query in background. Was waiting for seconds, which is longer than distributed_ddl_task_timeout. (TIMEOUT_EXCEEDED) (query: drop table if exists none on cluster test_unavailable_shard;) throw localhost 9000 0 0 0 @@ -12,7 +12,7 @@ Code: 57. Error: Received from localhost:9000. Error: There was an error on [loc (query: create table throw on cluster test_shard_localhost (n int) engine=Memory format Null;) localhost 9000 0 1 0 Received exception from server: -Code: 159. Error: Received from localhost:9000. Error: Watching task is executing longer than distributed_ddl_task_timeout (=1) seconds. There are 1 unfinished hosts (0 of them are currently executing the task), they are going to execute the query in background. (TIMEOUT_EXCEEDED) +Code: 159. Error: Received from localhost:9000. Error: Distributed DDL task is not finished on 1 of 2 hosts (0 of them are currently executing the task, 0 are inactive). They are going to execute the query in background. Was waiting for seconds, which is longer than distributed_ddl_task_timeout. (TIMEOUT_EXCEEDED) (query: drop table if exists throw on cluster test_unavailable_shard;) null_status_on_timeout localhost 9000 0 0 0 diff --git a/tests/queries/0_stateless/01175_distributed_ddl_output_mode_long.sh b/tests/queries/0_stateless/01175_distributed_ddl_output_mode_long.sh index 12e142adda9..f17e85da60a 100755 --- a/tests/queries/0_stateless/01175_distributed_ddl_output_mode_long.sh +++ b/tests/queries/0_stateless/01175_distributed_ddl_output_mode_long.sh @@ -54,14 +54,14 @@ $CLIENT --distributed_ddl_output_mode=none -q "create table none on cluster test $CLIENT --distributed_ddl_output_mode=none -q "create table none on cluster test_shard_localhost (n int) engine=Memory;" 2>&1 | sed "s/DB::Exception/Error/g" | sed "s/ (version.*)//" # Timeout -run_until_out_contains 'There are 1 unfinished hosts' $CLICKHOUSE_CLIENT_WITH_SETTINGS --distributed_ddl_output_mode=none -q "drop table if exists none on cluster test_unavailable_shard;" 2>&1 | sed "s/DB::Exception/Error/g" | sed "s/ (version.*)//" | sed "s/Watching task .* is executing longer/Watching task is executing longer/" +run_until_out_contains 'not finished on 1 ' $CLICKHOUSE_CLIENT_WITH_SETTINGS --distributed_ddl_output_mode=none -q "drop table if exists none on cluster test_unavailable_shard;" 2>&1 | sed "s/DB::Exception/Error/g" | sed "s/ (version.*)//" | sed "s/Distributed DDL task .* is not finished/Distributed DDL task is not finished/" | sed "s/for .* seconds/for seconds/" $CLIENT --distributed_ddl_output_mode=throw -q "select value from system.settings where name='distributed_ddl_output_mode';" $CLIENT --distributed_ddl_output_mode=throw -q "create table throw on cluster test_shard_localhost (n int) engine=Memory;" $CLIENT --distributed_ddl_output_mode=throw -q "create table throw on cluster test_shard_localhost (n int) engine=Memory format Null;" 2>&1 | sed "s/DB::Exception/Error/g" | sed "s/ (version.*)//" -run_until_out_contains 'There are 1 unfinished hosts' $CLICKHOUSE_CLIENT_WITH_SETTINGS --distributed_ddl_output_mode=throw -q "drop table if exists throw on cluster test_unavailable_shard;" 2>&1 | sed "s/DB::Exception/Error/g" | sed "s/ (version.*)//" | sed "s/Watching task .* is executing longer/Watching task is executing longer/" +run_until_out_contains 'not finished on 1 ' $CLICKHOUSE_CLIENT_WITH_SETTINGS --distributed_ddl_output_mode=throw -q "drop table if exists throw on cluster test_unavailable_shard;" 2>&1 | sed "s/DB::Exception/Error/g" | sed "s/ (version.*)//" | sed "s/Distributed DDL task .* is not finished/Distributed DDL task is not finished/" | sed "s/for .* seconds/for seconds/" $CLIENT --distributed_ddl_output_mode=null_status_on_timeout -q "select value from system.settings where name='distributed_ddl_output_mode';" diff --git a/tests/queries/0_stateless/01192_rename_database_zookeeper.sh b/tests/queries/0_stateless/01192_rename_database_zookeeper.sh index d1a7144e886..1ac01fe6abc 100755 --- a/tests/queries/0_stateless/01192_rename_database_zookeeper.sh +++ b/tests/queries/0_stateless/01192_rename_database_zookeeper.sh @@ -1,6 +1,9 @@ #!/usr/bin/env bash # Tags: zookeeper, no-parallel, no-fasttest +# Creation of a database with Ordinary engine emits a warning. +CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL=fatal + CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh diff --git a/tests/queries/0_stateless/01193_metadata_loading.sh b/tests/queries/0_stateless/01193_metadata_loading.sh index c25cdf4e970..69178a93d42 100755 --- a/tests/queries/0_stateless/01193_metadata_loading.sh +++ b/tests/queries/0_stateless/01193_metadata_loading.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-tsan, no-asan, no-ubsan, no-msan, no-debug, no-parallel, no-fasttest, no-s3-storage +# Tags: no-tsan, no-asan, no-ubsan, no-msan, no-debug, no-parallel, no-fasttest, no-s3-storage, no-sanitize-coverage CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh @@ -8,16 +8,12 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # Check that attaching a database with a large number of tables is not too slow. # it is the worst way of making performance test, nevertheless it can detect significant slowdown and some other issues, that usually found by stress test -db="test_01193_$RANDOM" +db="test_01193_$RANDOM_$RANDOM_$RANDOM_$RANDOM" tables=1000 threads=10 count_multiplier=1 max_time_ms=1500 -debug_or_sanitizer_build=$($CLICKHOUSE_CLIENT -q "WITH ((SELECT value FROM system.build_options WHERE name='BUILD_TYPE') AS build, (SELECT value FROM system.build_options WHERE name='CXX_FLAGS') as flags) SELECT build='Debug' OR flags LIKE '%fsanitize%' OR hasThreadFuzzer()") - -if [[ debug_or_sanitizer_build -eq 1 ]]; then tables=100; count_multiplier=10; max_time_ms=1500; fi - create_tables() { $CLICKHOUSE_CLIENT -q "WITH 'CREATE TABLE $db.table_$1_' AS create1, diff --git a/tests/queries/0_stateless/01201_read_single_thread_in_order.sql b/tests/queries/0_stateless/01201_read_single_thread_in_order.sql index 24ed935a125..1c2b3eb3dbf 100644 --- a/tests/queries/0_stateless/01201_read_single_thread_in_order.sql +++ b/tests/queries/0_stateless/01201_read_single_thread_in_order.sql @@ -1,3 +1,5 @@ +-- Tags: long + DROP TABLE IF EXISTS t; CREATE TABLE t @@ -8,7 +10,7 @@ ENGINE = MergeTree ORDER BY number SETTINGS index_granularity = 128, ratio_of_defaults_for_sparse_serialization = 1.0, index_granularity_bytes = '10Mi'; -SET min_insert_block_size_rows = 0, min_insert_block_size_bytes = 0; +SET min_insert_block_size_rows = 0, min_insert_block_size_bytes = 0, max_insert_threads = 1; INSERT INTO t SELECT number FROM numbers(10000000); SET max_threads = 1, max_block_size = 12345; diff --git a/tests/queries/0_stateless/01224_no_superfluous_dict_reload.sql b/tests/queries/0_stateless/01224_no_superfluous_dict_reload.sql index 5db92e70650..2a1b202c6dd 100644 --- a/tests/queries/0_stateless/01224_no_superfluous_dict_reload.sql +++ b/tests/queries/0_stateless/01224_no_superfluous_dict_reload.sql @@ -1,8 +1,11 @@ -- Tags: no-parallel +SET send_logs_level = 'fatal'; + DROP DATABASE IF EXISTS dict_db_01224; DROP DATABASE IF EXISTS dict_db_01224_dictionary; set allow_deprecated_database_ordinary=1; +-- Creation of a database with Ordinary engine emits a warning. CREATE DATABASE dict_db_01224 ENGINE=Ordinary; -- Different internal dictionary name with Atomic CREATE DATABASE dict_db_01224_dictionary Engine=Dictionary; diff --git a/tests/queries/0_stateless/01225_show_create_table_from_dictionary.sql b/tests/queries/0_stateless/01225_show_create_table_from_dictionary.sql index bc733a0c546..28a5a0d9d55 100644 --- a/tests/queries/0_stateless/01225_show_create_table_from_dictionary.sql +++ b/tests/queries/0_stateless/01225_show_create_table_from_dictionary.sql @@ -1,8 +1,11 @@ -- Tags: no-parallel +SET send_logs_level = 'fatal'; + DROP DATABASE IF EXISTS dict_db_01225; DROP DATABASE IF EXISTS dict_db_01225_dictionary; set allow_deprecated_database_ordinary=1; +-- Creation of a database with Ordinary engine emits a warning. CREATE DATABASE dict_db_01225 ENGINE=Ordinary; -- Different internal dictionary name with Atomic CREATE DATABASE dict_db_01225_dictionary Engine=Dictionary; diff --git a/tests/queries/0_stateless/01232_json_as_string_format.sh b/tests/queries/0_stateless/01232_json_as_string_format.sh index 667aea7ba78..8d2fe193b55 100755 --- a/tests/queries/0_stateless/01232_json_as_string_format.sh +++ b/tests/queries/0_stateless/01232_json_as_string_format.sh @@ -9,7 +9,7 @@ $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS json_as_string"; $CLICKHOUSE_CLIENT --query="CREATE TABLE json_as_string (field String) ENGINE = Memory"; -echo ' +cat << 'EOF' | $CLICKHOUSE_CLIENT --query="INSERT INTO json_as_string FORMAT JSONAsString"; { "id" : 1, "date" : "01.01.2020", @@ -42,9 +42,10 @@ echo ' "{" : 1, "}}" : 2 } -}' | $CLICKHOUSE_CLIENT --query="INSERT INTO json_as_string FORMAT JSONAsString"; +} +EOF -echo ' +cat << 'EOF' | $CLICKHOUSE_CLIENT --query="INSERT INTO json_as_string FORMAT JSONAsString"; [ { "id" : 1, @@ -79,7 +80,8 @@ echo ' "}}" : 2 } } -]' | $CLICKHOUSE_CLIENT --query="INSERT INTO json_as_string FORMAT JSONAsString"; +] +EOF $CLICKHOUSE_CLIENT --query="SELECT * FROM json_as_string ORDER BY field"; diff --git a/tests/queries/0_stateless/01244_optimize_distributed_group_by_sharding_key.sql b/tests/queries/0_stateless/01244_optimize_distributed_group_by_sharding_key.sql index 0d24b238d64..e80f41b173c 100644 --- a/tests/queries/0_stateless/01244_optimize_distributed_group_by_sharding_key.sql +++ b/tests/queries/0_stateless/01244_optimize_distributed_group_by_sharding_key.sql @@ -42,7 +42,7 @@ select 'GROUP BY number ORDER BY number DESC'; select count(), * from dist_01247 group by number order by number desc; select 'GROUP BY toString(number)'; -select count(), * from dist_01247 group by toString(number); +select count(), any(number) from dist_01247 group by toString(number); select 'GROUP BY number%2'; select count(), any(number) from dist_01247 group by number%2; diff --git a/tests/queries/0_stateless/01249_bad_arguments_for_bloom_filter.sql b/tests/queries/0_stateless/01249_bad_arguments_for_bloom_filter.sql index d187a2e4d4e..0c9cfafa496 100644 --- a/tests/queries/0_stateless/01249_bad_arguments_for_bloom_filter.sql +++ b/tests/queries/0_stateless/01249_bad_arguments_for_bloom_filter.sql @@ -1,7 +1,10 @@ -- Tags: no-parallel +SET send_logs_level = 'fatal'; + DROP DATABASE IF EXISTS test_01249; set allow_deprecated_database_ordinary=1; +-- Creation of a database with Ordinary engine emits a warning. CREATE DATABASE test_01249 ENGINE=Ordinary; -- Full ATTACH requires UUID with Atomic USE test_01249; diff --git a/tests/queries/0_stateless/01271_show_privileges.reference b/tests/queries/0_stateless/01271_show_privileges.reference index 6a7e4748130..b18ae8a99be 100644 --- a/tests/queries/0_stateless/01271_show_privileges.reference +++ b/tests/queries/0_stateless/01271_show_privileges.reference @@ -46,9 +46,9 @@ ALTER DATABASE SETTINGS ['ALTER DATABASE SETTING','ALTER MODIFY DATABASE SETTING ALTER NAMED COLLECTION [] NAMED_COLLECTION NAMED COLLECTION ADMIN ALTER TABLE [] \N ALTER ALTER DATABASE [] \N ALTER -ALTER VIEW REFRESH ['ALTER LIVE VIEW REFRESH','REFRESH VIEW'] VIEW ALTER VIEW ALTER VIEW MODIFY QUERY ['ALTER TABLE MODIFY QUERY'] VIEW ALTER VIEW ALTER VIEW MODIFY REFRESH ['ALTER TABLE MODIFY QUERY'] VIEW ALTER VIEW +ALTER VIEW MODIFY SQL SECURITY ['ALTER TABLE MODIFY SQL SECURITY'] VIEW ALTER VIEW ALTER VIEW [] \N ALTER ALTER [] \N ALL CREATE DATABASE [] DATABASE CREATE @@ -90,6 +90,7 @@ DROP QUOTA [] GLOBAL ACCESS MANAGEMENT CREATE SETTINGS PROFILE ['CREATE PROFILE'] GLOBAL ACCESS MANAGEMENT ALTER SETTINGS PROFILE ['ALTER PROFILE'] GLOBAL ACCESS MANAGEMENT DROP SETTINGS PROFILE ['DROP PROFILE'] GLOBAL ACCESS MANAGEMENT +ALLOW SQL SECURITY NONE ['CREATE SQL SECURITY NONE','ALLOW SQL SECURITY NONE','SQL SECURITY NONE','SECURITY NONE'] GLOBAL ACCESS MANAGEMENT SHOW USERS ['SHOW CREATE USER'] GLOBAL SHOW ACCESS SHOW ROLES ['SHOW CREATE ROLE'] GLOBAL SHOW ACCESS SHOW ROW POLICIES ['SHOW POLICIES','SHOW CREATE ROW POLICY','SHOW CREATE POLICY'] TABLE SHOW ACCESS @@ -101,15 +102,19 @@ SHOW NAMED COLLECTIONS ['SHOW NAMED COLLECTIONS'] NAMED_COLLECTION NAMED COLLECT SHOW NAMED COLLECTIONS SECRETS ['SHOW NAMED COLLECTIONS SECRETS'] NAMED_COLLECTION NAMED COLLECTION ADMIN NAMED COLLECTION ['NAMED COLLECTION USAGE','USE NAMED COLLECTION'] NAMED_COLLECTION NAMED COLLECTION ADMIN NAMED COLLECTION ADMIN ['NAMED COLLECTION CONTROL'] NAMED_COLLECTION ALL +SET DEFINER [] USER_NAME ALL SYSTEM SHUTDOWN ['SYSTEM KILL','SHUTDOWN'] GLOBAL SYSTEM SYSTEM DROP DNS CACHE ['SYSTEM DROP DNS','DROP DNS CACHE','DROP DNS'] GLOBAL SYSTEM DROP CACHE +SYSTEM DROP CONNECTIONS CACHE ['SYSTEM DROP CONNECTIONS CACHE','DROP CONNECTIONS CACHE'] GLOBAL SYSTEM DROP CACHE SYSTEM DROP MARK CACHE ['SYSTEM DROP MARK','DROP MARK CACHE','DROP MARKS'] GLOBAL SYSTEM DROP CACHE SYSTEM DROP UNCOMPRESSED CACHE ['SYSTEM DROP UNCOMPRESSED','DROP UNCOMPRESSED CACHE','DROP UNCOMPRESSED'] GLOBAL SYSTEM DROP CACHE SYSTEM DROP MMAP CACHE ['SYSTEM DROP MMAP','DROP MMAP CACHE','DROP MMAP'] GLOBAL SYSTEM DROP CACHE SYSTEM DROP QUERY CACHE ['SYSTEM DROP QUERY','DROP QUERY CACHE','DROP QUERY'] GLOBAL SYSTEM DROP CACHE SYSTEM DROP COMPILED EXPRESSION CACHE ['SYSTEM DROP COMPILED EXPRESSION','DROP COMPILED EXPRESSION CACHE','DROP COMPILED EXPRESSIONS'] GLOBAL SYSTEM DROP CACHE SYSTEM DROP FILESYSTEM CACHE ['SYSTEM DROP FILESYSTEM CACHE','DROP FILESYSTEM CACHE'] GLOBAL SYSTEM DROP CACHE +SYSTEM DROP DISTRIBUTED CACHE ['SYSTEM DROP DISTRIBUTED CACHE','DROP DISTRIBUTED CACHE'] GLOBAL SYSTEM DROP CACHE SYSTEM SYNC FILESYSTEM CACHE ['SYSTEM REPAIR FILESYSTEM CACHE','REPAIR FILESYSTEM CACHE','SYNC FILESYSTEM CACHE'] GLOBAL SYSTEM +SYSTEM DROP PAGE CACHE ['SYSTEM DROP PAGE CACHE','DROP PAGE CACHE'] GLOBAL SYSTEM DROP CACHE SYSTEM DROP SCHEMA CACHE ['SYSTEM DROP SCHEMA CACHE','DROP SCHEMA CACHE'] GLOBAL SYSTEM DROP CACHE SYSTEM DROP FORMAT SCHEMA CACHE ['SYSTEM DROP FORMAT SCHEMA CACHE','DROP FORMAT SCHEMA CACHE'] GLOBAL SYSTEM DROP CACHE SYSTEM DROP S3 CLIENT CACHE ['SYSTEM DROP S3 CLIENT','DROP S3 CLIENT CACHE'] GLOBAL SYSTEM DROP CACHE @@ -134,6 +139,7 @@ SYSTEM DISTRIBUTED SENDS ['SYSTEM STOP DISTRIBUTED SENDS','SYSTEM START DISTRIBU SYSTEM REPLICATED SENDS ['SYSTEM STOP REPLICATED SENDS','SYSTEM START REPLICATED SENDS','STOP REPLICATED SENDS','START REPLICATED SENDS'] TABLE SYSTEM SENDS SYSTEM SENDS ['SYSTEM STOP SENDS','SYSTEM START SENDS','STOP SENDS','START SENDS'] \N SYSTEM SYSTEM REPLICATION QUEUES ['SYSTEM STOP REPLICATION QUEUES','SYSTEM START REPLICATION QUEUES','STOP REPLICATION QUEUES','START REPLICATION QUEUES'] TABLE SYSTEM +SYSTEM VIRTUAL PARTS UPDATE ['SYSTEM STOP VIRTUAL PARTS UPDATE','SYSTEM START VIRTUAL PARTS UPDATE','STOP VIRTUAL PARTS UPDATE','START VIRTUAL PARTS UPDATE'] TABLE SYSTEM SYSTEM DROP REPLICA ['DROP REPLICA'] TABLE SYSTEM SYSTEM SYNC REPLICA ['SYNC REPLICA'] TABLE SYSTEM SYSTEM REPLICA READINESS ['SYSTEM REPLICA READY','SYSTEM REPLICA UNREADY'] GLOBAL SYSTEM diff --git a/tests/queries/0_stateless/01278_format_multiple_queries.reference b/tests/queries/0_stateless/01278_format_multiple_queries.reference index 001b10b0990..9e3ae2250b3 100644 --- a/tests/queries/0_stateless/01278_format_multiple_queries.reference +++ b/tests/queries/0_stateless/01278_format_multiple_queries.reference @@ -1,7 +1,7 @@ SELECT a, b AS x -FROM table AS t +FROM `table` AS t INNER JOIN table2 AS t2 ON t.id = t2.t_id WHERE 1 = 1 ; diff --git a/tests/queries/0_stateless/01278_min_insert_block_size_rows_for_materialized_views.sh b/tests/queries/0_stateless/01278_min_insert_block_size_rows_for_materialized_views.sh index 08cc97c84bf..713d187cd88 100755 --- a/tests/queries/0_stateless/01278_min_insert_block_size_rows_for_materialized_views.sh +++ b/tests/queries/0_stateless/01278_min_insert_block_size_rows_for_materialized_views.sh @@ -76,7 +76,7 @@ insert into data_01278 select reinterpretAsString(number), // s6 reinterpretAsString(number), // s7 reinterpretAsString(number) // s8 -from numbers(100000); -- { serverError 241 }" > /dev/null 2>&1 +from numbers(200000); -- { serverError 241 }" > /dev/null 2>&1 local ret_code=$? if [[ $ret_code -eq 0 ]]; then diff --git a/tests/queries/0_stateless/01281_parseDateTime64BestEffort.sql b/tests/queries/0_stateless/01281_parseDateTime64BestEffort.sql index c1cec6ea212..808eaf291d5 100644 --- a/tests/queries/0_stateless/01281_parseDateTime64BestEffort.sql +++ b/tests/queries/0_stateless/01281_parseDateTime64BestEffort.sql @@ -7,7 +7,7 @@ SELECT parseDateTime64BestEffort('2020-05-14T03:37:03.253184Z', 'bar'); -- {ser SELECT parseDateTime64BestEffort('2020-05-14T03:37:03.253184Z', 3, 4); -- {serverError 43} -- invalid timezone parameter SELECT parseDateTime64BestEffort('2020-05-14T03:37:03.253184Z', 3, 'baz'); -- {serverError BAD_ARGUMENTS} -- unknown timezone -SELECT parseDateTime64BestEffort('2020-05-14T03:37:03.253184Z', materialize(3), 4); -- {serverError 44} -- non-const precision +SELECT parseDateTime64BestEffort('2020-05-14T03:37:03.253184Z', materialize(3), 4); -- {serverError 43, 44} -- non-const precision SELECT parseDateTime64BestEffort('2020-05-14T03:37:03.253184Z', 3, materialize('UTC')); -- {serverError 44} -- non-const timezone SELECT parseDateTime64BestEffort('2020-05-14T03:37:03.253184012345678910111213141516171819Z', 3, 'UTC'); -- {serverError 6} diff --git a/tests/queries/0_stateless/01292_create_user.reference b/tests/queries/0_stateless/01292_create_user.reference index b7c30b304bf..d5841a74a2c 100644 --- a/tests/queries/0_stateless/01292_create_user.reference +++ b/tests/queries/0_stateless/01292_create_user.reference @@ -56,14 +56,14 @@ CREATE USER u1_01292 HOST LOCAL CREATE USER `u2_01292@%.myhost.com` -- settings CREATE USER u1_01292 -CREATE USER u2_01292 SETTINGS PROFILE default +CREATE USER u2_01292 SETTINGS PROFILE `default` CREATE USER u3_01292 SETTINGS max_memory_usage = 5000000 CREATE USER u4_01292 SETTINGS max_memory_usage MIN 5000000 CREATE USER u5_01292 SETTINGS max_memory_usage MAX 5000000 CREATE USER u6_01292 SETTINGS max_memory_usage CONST CREATE USER u7_01292 SETTINGS max_memory_usage WRITABLE CREATE USER u8_01292 SETTINGS max_memory_usage = 5000000 MIN 4000000 MAX 6000000 CONST -CREATE USER u9_01292 SETTINGS PROFILE default, max_memory_usage = 5000000 WRITABLE +CREATE USER u9_01292 SETTINGS PROFILE `default`, max_memory_usage = 5000000 WRITABLE CREATE USER u1_01292 SETTINGS readonly = 1 CREATE USER u2_01292 SETTINGS readonly = 1 CREATE USER u3_01292 @@ -82,7 +82,7 @@ CREATE USER u5_01292 DEFAULT ROLE ALL EXCEPT r1_01292 CREATE USER u6_01292 DEFAULT ROLE NONE -- complex CREATE USER u1_01292 IDENTIFIED WITH plaintext_password HOST LOCAL SETTINGS readonly = 1 -CREATE USER u1_01292 HOST LIKE \'%.%.myhost.com\' DEFAULT ROLE NONE SETTINGS PROFILE default +CREATE USER u1_01292 HOST LIKE \'%.%.myhost.com\' DEFAULT ROLE NONE SETTINGS PROFILE `default` -- if not exists CREATE USER u1_01292 GRANT r1_01292 TO u1_01292 diff --git a/tests/queries/0_stateless/01293_create_role.reference b/tests/queries/0_stateless/01293_create_role.reference index 42f091bddd5..f1bd76322d4 100644 --- a/tests/queries/0_stateless/01293_create_role.reference +++ b/tests/queries/0_stateless/01293_create_role.reference @@ -11,14 +11,14 @@ CREATE ROLE `r2_01293@%.myhost.com` CREATE ROLE `r2_01293@%.myhost.com` -- settings CREATE ROLE r1_01293 -CREATE ROLE r2_01293 SETTINGS PROFILE default +CREATE ROLE r2_01293 SETTINGS PROFILE `default` CREATE ROLE r3_01293 SETTINGS max_memory_usage = 5000000 CREATE ROLE r4_01293 SETTINGS max_memory_usage MIN 5000000 CREATE ROLE r5_01293 SETTINGS max_memory_usage MAX 5000000 CREATE ROLE r6_01293 SETTINGS max_memory_usage CONST CREATE ROLE r7_01293 SETTINGS max_memory_usage WRITABLE CREATE ROLE r8_01293 SETTINGS max_memory_usage = 5000000 MIN 4000000 MAX 6000000 CONST -CREATE ROLE r9_01293 SETTINGS PROFILE default, max_memory_usage = 5000000 WRITABLE +CREATE ROLE r9_01293 SETTINGS PROFILE `default`, max_memory_usage = 5000000 WRITABLE CREATE ROLE r1_01293 SETTINGS readonly = 1 CREATE ROLE r2_01293 SETTINGS readonly = 1 CREATE ROLE r3_01293 diff --git a/tests/queries/0_stateless/01293_show_settings.reference b/tests/queries/0_stateless/01293_show_settings.reference index f053387d1c5..187f55697e4 100644 --- a/tests/queries/0_stateless/01293_show_settings.reference +++ b/tests/queries/0_stateless/01293_show_settings.reference @@ -3,6 +3,7 @@ connect_timeout Seconds 10 connect_timeout_with_failover_ms Milliseconds 2000 connect_timeout_with_failover_secure_ms Milliseconds 3000 external_storage_connect_timeout_sec UInt64 10 +s3_connect_timeout_ms UInt64 1000 filesystem_prefetch_max_memory_usage UInt64 1073741824 max_untracked_memory UInt64 1048576 memory_profiler_step UInt64 1048576 diff --git a/tests/queries/0_stateless/01294_create_settings_profile.reference b/tests/queries/0_stateless/01294_create_settings_profile.reference index a10d5758752..7e1838c8dae 100644 --- a/tests/queries/0_stateless/01294_create_settings_profile.reference +++ b/tests/queries/0_stateless/01294_create_settings_profile.reference @@ -1,57 +1,57 @@ -- default -CREATE SETTINGS PROFILE s1_01294 +CREATE SETTINGS PROFILE `s1_01294` -- same as default -CREATE SETTINGS PROFILE s2_01294 -CREATE SETTINGS PROFILE s3_01294 +CREATE SETTINGS PROFILE `s2_01294` +CREATE SETTINGS PROFILE `s3_01294` -- rename -CREATE SETTINGS PROFILE s2_01294_renamed +CREATE SETTINGS PROFILE `s2_01294_renamed` -- settings -CREATE SETTINGS PROFILE s1_01294 -CREATE SETTINGS PROFILE s2_01294 SETTINGS INHERIT default -CREATE SETTINGS PROFILE s3_01294 SETTINGS max_memory_usage = 5000000 -CREATE SETTINGS PROFILE s4_01294 SETTINGS max_memory_usage MIN 5000000 -CREATE SETTINGS PROFILE s5_01294 SETTINGS max_memory_usage MAX 5000000 -CREATE SETTINGS PROFILE s6_01294 SETTINGS max_memory_usage CONST -CREATE SETTINGS PROFILE s7_01294 SETTINGS max_memory_usage WRITABLE -CREATE SETTINGS PROFILE s8_01294 SETTINGS max_memory_usage = 5000000 MIN 4000000 MAX 6000000 CONST -CREATE SETTINGS PROFILE s9_01294 SETTINGS INHERIT default, max_memory_usage = 5000000 WRITABLE -CREATE SETTINGS PROFILE s10_01294 SETTINGS INHERIT s1_01294, INHERIT s3_01294, INHERIT default, readonly = 0, max_memory_usage MAX 6000000 -CREATE SETTINGS PROFILE s1_01294 SETTINGS readonly = 0 -CREATE SETTINGS PROFILE s2_01294 SETTINGS readonly = 1 -CREATE SETTINGS PROFILE s3_01294 +CREATE SETTINGS PROFILE `s1_01294` +CREATE SETTINGS PROFILE `s2_01294` SETTINGS INHERIT `default` +CREATE SETTINGS PROFILE `s3_01294` SETTINGS max_memory_usage = 5000000 +CREATE SETTINGS PROFILE `s4_01294` SETTINGS max_memory_usage MIN 5000000 +CREATE SETTINGS PROFILE `s5_01294` SETTINGS max_memory_usage MAX 5000000 +CREATE SETTINGS PROFILE `s6_01294` SETTINGS max_memory_usage CONST +CREATE SETTINGS PROFILE `s7_01294` SETTINGS max_memory_usage WRITABLE +CREATE SETTINGS PROFILE `s8_01294` SETTINGS max_memory_usage = 5000000 MIN 4000000 MAX 6000000 CONST +CREATE SETTINGS PROFILE `s9_01294` SETTINGS INHERIT `default`, max_memory_usage = 5000000 WRITABLE +CREATE SETTINGS PROFILE `s10_01294` SETTINGS INHERIT `s1_01294`, INHERIT `s3_01294`, INHERIT `default`, readonly = 0, max_memory_usage MAX 6000000 +CREATE SETTINGS PROFILE `s1_01294` SETTINGS readonly = 0 +CREATE SETTINGS PROFILE `s2_01294` SETTINGS readonly = 1 +CREATE SETTINGS PROFILE `s3_01294` -- to roles -CREATE SETTINGS PROFILE s1_01294 -CREATE SETTINGS PROFILE s2_01294 TO ALL -CREATE SETTINGS PROFILE s3_01294 TO r1_01294 -CREATE SETTINGS PROFILE s4_01294 TO u1_01294 -CREATE SETTINGS PROFILE s5_01294 TO r1_01294, u1_01294 -CREATE SETTINGS PROFILE s6_01294 TO ALL EXCEPT r1_01294 -CREATE SETTINGS PROFILE s7_01294 TO ALL EXCEPT r1_01294, u1_01294 -CREATE SETTINGS PROFILE s1_01294 TO u1_01294 -CREATE SETTINGS PROFILE s2_01294 +CREATE SETTINGS PROFILE `s1_01294` +CREATE SETTINGS PROFILE `s2_01294` TO ALL +CREATE SETTINGS PROFILE `s3_01294` TO r1_01294 +CREATE SETTINGS PROFILE `s4_01294` TO u1_01294 +CREATE SETTINGS PROFILE `s5_01294` TO r1_01294, u1_01294 +CREATE SETTINGS PROFILE `s6_01294` TO ALL EXCEPT r1_01294 +CREATE SETTINGS PROFILE `s7_01294` TO ALL EXCEPT r1_01294, u1_01294 +CREATE SETTINGS PROFILE `s1_01294` TO u1_01294 +CREATE SETTINGS PROFILE `s2_01294` -- complex -CREATE SETTINGS PROFILE s1_01294 SETTINGS readonly = 0 TO r1_01294 -CREATE SETTINGS PROFILE s1_01294 SETTINGS INHERIT default +CREATE SETTINGS PROFILE `s1_01294` SETTINGS readonly = 0 TO r1_01294 +CREATE SETTINGS PROFILE `s1_01294` SETTINGS INHERIT `default` -- multiple profiles in one command -CREATE SETTINGS PROFILE s1_01294 SETTINGS max_memory_usage = 5000000 -CREATE SETTINGS PROFILE s2_01294 SETTINGS max_memory_usage = 5000000 -CREATE SETTINGS PROFILE s3_01294 TO ALL -CREATE SETTINGS PROFILE s4_01294 TO ALL -CREATE SETTINGS PROFILE s1_01294 SETTINGS max_memory_usage = 6000000 -CREATE SETTINGS PROFILE s2_01294 SETTINGS max_memory_usage = 6000000 -CREATE SETTINGS PROFILE s3_01294 TO ALL -CREATE SETTINGS PROFILE s4_01294 TO ALL -CREATE SETTINGS PROFILE s1_01294 SETTINGS max_memory_usage = 6000000 -CREATE SETTINGS PROFILE s2_01294 SETTINGS max_memory_usage = 6000000 TO r1_01294 -CREATE SETTINGS PROFILE s3_01294 TO r1_01294 -CREATE SETTINGS PROFILE s4_01294 TO r1_01294 +CREATE SETTINGS PROFILE `s1_01294` SETTINGS max_memory_usage = 5000000 +CREATE SETTINGS PROFILE `s2_01294` SETTINGS max_memory_usage = 5000000 +CREATE SETTINGS PROFILE `s3_01294` TO ALL +CREATE SETTINGS PROFILE `s4_01294` TO ALL +CREATE SETTINGS PROFILE `s1_01294` SETTINGS max_memory_usage = 6000000 +CREATE SETTINGS PROFILE `s2_01294` SETTINGS max_memory_usage = 6000000 +CREATE SETTINGS PROFILE `s3_01294` TO ALL +CREATE SETTINGS PROFILE `s4_01294` TO ALL +CREATE SETTINGS PROFILE `s1_01294` SETTINGS max_memory_usage = 6000000 +CREATE SETTINGS PROFILE `s2_01294` SETTINGS max_memory_usage = 6000000 TO r1_01294 +CREATE SETTINGS PROFILE `s3_01294` TO r1_01294 +CREATE SETTINGS PROFILE `s4_01294` TO r1_01294 -- readonly ambiguity -CREATE SETTINGS PROFILE s1_01294 SETTINGS readonly = 1 -CREATE SETTINGS PROFILE s2_01294 SETTINGS readonly CONST -CREATE SETTINGS PROFILE s3_01294 SETTINGS INHERIT readonly -CREATE SETTINGS PROFILE s4_01294 SETTINGS INHERIT readonly, INHERIT readonly -CREATE SETTINGS PROFILE s5_01294 SETTINGS INHERIT readonly, readonly = 1 -CREATE SETTINGS PROFILE s6_01294 SETTINGS INHERIT readonly, readonly CONST +CREATE SETTINGS PROFILE `s1_01294` SETTINGS readonly = 1 +CREATE SETTINGS PROFILE `s2_01294` SETTINGS readonly CONST +CREATE SETTINGS PROFILE `s3_01294` SETTINGS INHERIT `readonly` +CREATE SETTINGS PROFILE `s4_01294` SETTINGS INHERIT `readonly`, INHERIT `readonly` +CREATE SETTINGS PROFILE `s5_01294` SETTINGS INHERIT `readonly`, readonly = 1 +CREATE SETTINGS PROFILE `s6_01294` SETTINGS INHERIT `readonly`, readonly CONST -- system.settings_profiles s1_01294 local_directory 0 0 [] [] s2_01294 local_directory 1 0 ['r1_01294'] [] diff --git a/tests/queries/0_stateless/01295_create_row_policy.reference b/tests/queries/0_stateless/01295_create_row_policy.reference index d73d9752bc1..2c300332a89 100644 --- a/tests/queries/0_stateless/01295_create_row_policy.reference +++ b/tests/queries/0_stateless/01295_create_row_policy.reference @@ -1,35 +1,35 @@ -- default -CREATE ROW POLICY p1_01295 ON db.table +CREATE ROW POLICY p1_01295 ON db.`table` -- same as default -CREATE ROW POLICY p2_01295 ON db.table -CREATE ROW POLICY p3_01295 ON db.table +CREATE ROW POLICY p2_01295 ON db.`table` +CREATE ROW POLICY p3_01295 ON db.`table` -- rename -CREATE ROW POLICY p2_01295_renamed ON db.table +CREATE ROW POLICY p2_01295_renamed ON db.`table` -- filter -CREATE ROW POLICY p1_01295 ON db.table FOR SELECT USING (a < b) AND (c > d) -CREATE ROW POLICY p2_01295 ON db.table AS restrictive FOR SELECT USING id = currentUser() -CREATE ROW POLICY p3_01295 ON db.table FOR SELECT USING 1 -CREATE ROW POLICY p1_01295 ON db.table AS restrictive FOR SELECT USING 0 +CREATE ROW POLICY p1_01295 ON db.`table` FOR SELECT USING (a < b) AND (c > d) +CREATE ROW POLICY p2_01295 ON db.`table` AS restrictive FOR SELECT USING id = currentUser() +CREATE ROW POLICY p3_01295 ON db.`table` FOR SELECT USING 1 +CREATE ROW POLICY p1_01295 ON db.`table` AS restrictive FOR SELECT USING 0 -- to roles -CREATE ROW POLICY p1_01295 ON db.table -CREATE ROW POLICY p2_01295 ON db.table TO ALL -CREATE ROW POLICY p3_01295 ON db.table TO r1_01295 -CREATE ROW POLICY p4_01295 ON db.table TO u1_01295 -CREATE ROW POLICY p5_01295 ON db.table TO r1_01295, u1_01295 -CREATE ROW POLICY p6_01295 ON db.table TO ALL EXCEPT r1_01295 -CREATE ROW POLICY p7_01295 ON db.table TO ALL EXCEPT r1_01295, u1_01295 -CREATE ROW POLICY p1_01295 ON db.table TO u1_01295 -CREATE ROW POLICY p2_01295 ON db.table +CREATE ROW POLICY p1_01295 ON db.`table` +CREATE ROW POLICY p2_01295 ON db.`table` TO ALL +CREATE ROW POLICY p3_01295 ON db.`table` TO r1_01295 +CREATE ROW POLICY p4_01295 ON db.`table` TO u1_01295 +CREATE ROW POLICY p5_01295 ON db.`table` TO r1_01295, u1_01295 +CREATE ROW POLICY p6_01295 ON db.`table` TO ALL EXCEPT r1_01295 +CREATE ROW POLICY p7_01295 ON db.`table` TO ALL EXCEPT r1_01295, u1_01295 +CREATE ROW POLICY p1_01295 ON db.`table` TO u1_01295 +CREATE ROW POLICY p2_01295 ON db.`table` -- multiple policies in one command -CREATE ROW POLICY p1_01295 ON db.table FOR SELECT USING 1 -CREATE ROW POLICY p2_01295 ON db.table FOR SELECT USING 1 -CREATE ROW POLICY p3_01295 ON db.table TO u1_01295 +CREATE ROW POLICY p1_01295 ON db.`table` FOR SELECT USING 1 +CREATE ROW POLICY p2_01295 ON db.`table` FOR SELECT USING 1 +CREATE ROW POLICY p3_01295 ON db.`table` TO u1_01295 CREATE ROW POLICY p3_01295 ON db2.table2 TO u1_01295 -CREATE ROW POLICY p4_01295 ON db.table FOR SELECT USING a = b +CREATE ROW POLICY p4_01295 ON db.`table` FOR SELECT USING a = b CREATE ROW POLICY p5_01295 ON db2.table2 FOR SELECT USING a = b -CREATE ROW POLICY p1_01295 ON db.table FOR SELECT USING 1 TO ALL -CREATE ROW POLICY p2_01295 ON db.table FOR SELECT USING 1 TO ALL +CREATE ROW POLICY p1_01295 ON db.`table` FOR SELECT USING 1 TO ALL +CREATE ROW POLICY p2_01295 ON db.`table` FOR SELECT USING 1 TO ALL -- system.row_policies -p1_01295 ON db.table p1_01295 db table local_directory (a < b) AND (c > d) 0 0 [] [] -p2_01295 ON db.table p2_01295 db table local_directory id = currentUser() 1 0 ['u1_01295'] [] -p3_01295 ON db.table p3_01295 db table local_directory 1 0 1 [] ['r1_01295'] +p1_01295 ON db.`table` p1_01295 db table local_directory (a < b) AND (c > d) 0 0 [] [] +p2_01295 ON db.`table` p2_01295 db table local_directory id = currentUser() 1 0 ['u1_01295'] [] +p3_01295 ON db.`table` p3_01295 db table local_directory 1 0 1 [] ['r1_01295'] diff --git a/tests/queries/0_stateless/01296_create_row_policy_in_current_database.reference b/tests/queries/0_stateless/01296_create_row_policy_in_current_database.reference index fa9c2f73021..bfca341bd7b 100644 --- a/tests/queries/0_stateless/01296_create_row_policy_in_current_database.reference +++ b/tests/queries/0_stateless/01296_create_row_policy_in_current_database.reference @@ -1,20 +1,20 @@ -- one policy -CREATE ROW POLICY p1_01296 ON db_01296.table -CREATE ROW POLICY p1_01296 ON db_01296.table -CREATE ROW POLICY p1_01296 ON db_01296.table FOR SELECT USING 1 -CREATE ROW POLICY p1_01296 ON db_01296.table FOR SELECT USING 1 +CREATE ROW POLICY p1_01296 ON db_01296.`table` +CREATE ROW POLICY p1_01296 ON db_01296.`table` +CREATE ROW POLICY p1_01296 ON db_01296.`table` FOR SELECT USING 1 +CREATE ROW POLICY p1_01296 ON db_01296.`table` FOR SELECT USING 1 -- multiple policies -CREATE ROW POLICY p1_01296 ON db_01296.table FOR SELECT USING 1 -CREATE ROW POLICY p2_01296 ON db_01296.table FOR SELECT USING 1 -CREATE ROW POLICY p3_01296 ON db_01296.table TO u1_01296 +CREATE ROW POLICY p1_01296 ON db_01296.`table` FOR SELECT USING 1 +CREATE ROW POLICY p2_01296 ON db_01296.`table` FOR SELECT USING 1 +CREATE ROW POLICY p3_01296 ON db_01296.`table` TO u1_01296 CREATE ROW POLICY p3_01296 ON db_01296.table2 TO u1_01296 -CREATE ROW POLICY p4_01296 ON db_01296.table FOR SELECT USING a = b +CREATE ROW POLICY p4_01296 ON db_01296.`table` FOR SELECT USING a = b CREATE ROW POLICY p5_01296 ON db_01296.table2 FOR SELECT USING a = b -CREATE ROW POLICY p1_01296 ON db_01296.table FOR SELECT USING 1 -CREATE ROW POLICY p2_01296 ON db_01296.table FOR SELECT USING 1 -CREATE ROW POLICY p3_01296 ON db_01296.table TO u1_01296 +CREATE ROW POLICY p1_01296 ON db_01296.`table` FOR SELECT USING 1 +CREATE ROW POLICY p2_01296 ON db_01296.`table` FOR SELECT USING 1 +CREATE ROW POLICY p3_01296 ON db_01296.`table` TO u1_01296 CREATE ROW POLICY p3_01296 ON db_01296.table2 TO u1_01296 -CREATE ROW POLICY p4_01296 ON db_01296.table FOR SELECT USING a = b +CREATE ROW POLICY p4_01296 ON db_01296.`table` FOR SELECT USING a = b CREATE ROW POLICY p5_01296 ON db_01296.table2 FOR SELECT USING a = b -CREATE ROW POLICY p1_01296 ON db_01296.table FOR SELECT USING 1 TO ALL -CREATE ROW POLICY p2_01296 ON db_01296.table FOR SELECT USING 1 TO ALL +CREATE ROW POLICY p1_01296 ON db_01296.`table` FOR SELECT USING 1 TO ALL +CREATE ROW POLICY p2_01296 ON db_01296.`table` FOR SELECT USING 1 TO ALL diff --git a/tests/queries/0_stateless/01320_create_sync_race_condition_zookeeper.sh b/tests/queries/0_stateless/01320_create_sync_race_condition_zookeeper.sh index 57409d782ae..1d5f5d54853 100755 --- a/tests/queries/0_stateless/01320_create_sync_race_condition_zookeeper.sh +++ b/tests/queries/0_stateless/01320_create_sync_race_condition_zookeeper.sh @@ -1,6 +1,9 @@ #!/usr/bin/env bash # Tags: race, zookeeper, no-parallel +# Creation of a database with Ordinary engine emits a warning. +CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL=fatal + CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh diff --git a/tests/queries/0_stateless/01383_log_broken_table.sh b/tests/queries/0_stateless/01383_log_broken_table.sh index 5cc0f24a87f..4d82fa8547d 100755 --- a/tests/queries/0_stateless/01383_log_broken_table.sh +++ b/tests/queries/0_stateless/01383_log_broken_table.sh @@ -20,6 +20,7 @@ function test_func() MAX_MEM=$((2 * $MAX_MEM)) $CLICKHOUSE_CLIENT --query "INSERT INTO log SELECT number, number, number FROM numbers(1000000)" --max_memory_usage $MAX_MEM > "${CLICKHOUSE_TMP}"/insert_result 2>&1 + RES=$? grep -o -F 'Memory limit' "${CLICKHOUSE_TMP}"/insert_result || cat "${CLICKHOUSE_TMP}"/insert_result @@ -27,7 +28,7 @@ function test_func() cat "${CLICKHOUSE_TMP}"/select_result - [[ $MAX_MEM -gt 200000000 ]] && break; + { [[ $RES -eq 0 ]] || [[ $MAX_MEM -gt 200000000 ]]; } && break; done $CLICKHOUSE_CLIENT --query "DROP TABLE log"; diff --git a/tests/queries/0_stateless/01414_low_cardinality_nullable.sql b/tests/queries/0_stateless/01414_low_cardinality_nullable.sql index 2d3d31e9b5c..cd5111faf45 100644 --- a/tests/queries/0_stateless/01414_low_cardinality_nullable.sql +++ b/tests/queries/0_stateless/01414_low_cardinality_nullable.sql @@ -1,3 +1,5 @@ +SET allow_suspicious_low_cardinality_types=1; + DROP TABLE IF EXISTS lc_nullable; CREATE TABLE lc_nullable ( diff --git a/tests/queries/0_stateless/01418_custom_settings.reference b/tests/queries/0_stateless/01418_custom_settings.reference index 8484a5d0e6f..923d43077d8 100644 --- a/tests/queries/0_stateless/01418_custom_settings.reference +++ b/tests/queries/0_stateless/01418_custom_settings.reference @@ -30,10 +30,10 @@ custom_f \'word\' --- compound identifier --- test String custom_compound.identifier.v1 \'test\' -CREATE SETTINGS PROFILE s1_01418 SETTINGS custom_compound.identifier.v2 = 100 +CREATE SETTINGS PROFILE `s1_01418` SETTINGS custom_compound.identifier.v2 = 100 --- null type --- \N Nullable(Nothing) custom_null NULL \N Nullable(Nothing) custom_null NULL -CREATE SETTINGS PROFILE s2_01418 SETTINGS custom_null = NULL +CREATE SETTINGS PROFILE `s2_01418` SETTINGS custom_null = NULL diff --git a/tests/queries/0_stateless/01441_low_cardinality_array_index.sql b/tests/queries/0_stateless/01441_low_cardinality_array_index.sql index 4b31a86edfb..b5e14c957c6 100644 --- a/tests/queries/0_stateless/01441_low_cardinality_array_index.sql +++ b/tests/queries/0_stateless/01441_low_cardinality_array_index.sql @@ -1,3 +1,5 @@ +SET allow_suspicious_low_cardinality_types=1; + DROP TABLE IF EXISTS t_01411; CREATE TABLE t_01411( diff --git a/tests/queries/0_stateless/01460_line_as_string_format.sh b/tests/queries/0_stateless/01460_line_as_string_format.sh index 4ab9cb59858..a8782dd2d32 100755 --- a/tests/queries/0_stateless/01460_line_as_string_format.sh +++ b/tests/queries/0_stateless/01460_line_as_string_format.sh @@ -7,12 +7,14 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS line_as_string1"; $CLICKHOUSE_CLIENT --query="CREATE TABLE line_as_string1(field String) ENGINE = Memory"; -echo '"id" : 1, +cat <<'EOF' | $CLICKHOUSE_CLIENT --query="INSERT INTO line_as_string1 FORMAT LineAsString"; +"id" : 1, "date" : "01.01.2020", "string" : "123{{{\"\\", "array" : [1, 2, 3], -Finally implement this new feature.' | $CLICKHOUSE_CLIENT --query="INSERT INTO line_as_string1 FORMAT LineAsString"; +Finally implement this new feature. +EOF $CLICKHOUSE_CLIENT --query="SELECT * FROM line_as_string1"; $CLICKHOUSE_CLIENT --query="DROP TABLE line_as_string1" @@ -26,7 +28,9 @@ $CLICKHOUSE_CLIENT --query="create table line_as_string2( $CLICKHOUSE_CLIENT --query="INSERT INTO line_as_string2(c) values ('ClickHouse')"; -echo 'ClickHouse is a `fast` #open-source# (OLAP) database "management" :system:' | $CLICKHOUSE_CLIENT --query="INSERT INTO line_as_string2(c) FORMAT LineAsString"; +# Shellcheck thinks `fast` is a shell expansion +# shellcheck disable=SC2016 +echo -e 'ClickHouse is a `fast` #open-source# (OLAP) database "management" :system:' | $CLICKHOUSE_CLIENT --query="INSERT INTO line_as_string2(c) FORMAT LineAsString"; $CLICKHOUSE_CLIENT --query="SELECT * FROM line_as_string2 order by c"; $CLICKHOUSE_CLIENT --query="DROP TABLE line_as_string2" diff --git a/tests/queries/0_stateless/01516_create_table_primary_key.sql b/tests/queries/0_stateless/01516_create_table_primary_key.sql index 630c573c2cc..1e5a0b9cddf 100644 --- a/tests/queries/0_stateless/01516_create_table_primary_key.sql +++ b/tests/queries/0_stateless/01516_create_table_primary_key.sql @@ -1,7 +1,10 @@ -- Tags: no-parallel +SET send_logs_level = 'fatal'; + DROP DATABASE IF EXISTS test_01516; set allow_deprecated_database_ordinary=1; +-- Creation of a database with Ordinary engine emits a warning. CREATE DATABASE test_01516 ENGINE=Ordinary; -- Full ATTACH requires UUID with Atomic USE test_01516; diff --git a/tests/queries/0_stateless/01517_drop_mv_with_inner_table.sql b/tests/queries/0_stateless/01517_drop_mv_with_inner_table.sql index 67a2009b913..167625629a5 100644 --- a/tests/queries/0_stateless/01517_drop_mv_with_inner_table.sql +++ b/tests/queries/0_stateless/01517_drop_mv_with_inner_table.sql @@ -1,5 +1,7 @@ -- Tags: no-parallel +SET send_logs_level = 'fatal'; + -- -- Atomic no SYNC -- (should go first to check that thread for DROP TABLE does not hang) @@ -33,6 +35,7 @@ show tables from db_01517_atomic_sync; --- drop database if exists db_01517_ordinary; set allow_deprecated_database_ordinary=1; +-- Creation of a database with Ordinary engine emits a warning. create database db_01517_ordinary Engine=Ordinary; create table db_01517_ordinary.source (key Int) engine=Null; diff --git a/tests/queries/0_stateless/01526_max_untracked_memory.sh b/tests/queries/0_stateless/01526_max_untracked_memory.sh index 45fdb314fb2..b2bad637422 100755 --- a/tests/queries/0_stateless/01526_max_untracked_memory.sh +++ b/tests/queries/0_stateless/01526_max_untracked_memory.sh @@ -1,6 +1,5 @@ #!/usr/bin/env bash -# Tags: no-tsan, no-asan, no-ubsan, no-msan, no-cpu-aarch64 -# requires TraceCollector, does not available under sanitizers and aarch64 +# Tags: no-tsan, no-asan, no-ubsan, no-msan CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/01528_clickhouse_local_prepare_parts.sh b/tests/queries/0_stateless/01528_clickhouse_local_prepare_parts.sh index 538d712ad9c..d3fe6d16559 100755 --- a/tests/queries/0_stateless/01528_clickhouse_local_prepare_parts.sh +++ b/tests/queries/0_stateless/01528_clickhouse_local_prepare_parts.sh @@ -1,5 +1,8 @@ #!/usr/bin/env bash +# Creation of a database with Ordinary engine emits a warning. +CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL=fatal + CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh diff --git a/tests/queries/0_stateless/01541_max_memory_usage_for_user_long.sh b/tests/queries/0_stateless/01541_max_memory_usage_for_user_long.sh index e2d0306fee0..9f0699929f8 100755 --- a/tests/queries/0_stateless/01541_max_memory_usage_for_user_long.sh +++ b/tests/queries/0_stateless/01541_max_memory_usage_for_user_long.sh @@ -45,11 +45,13 @@ query_id=$$-$RANDOM-$SECONDS ${CLICKHOUSE_CLIENT} --user=test_01541 --max_block_size=1 --format Null --query_id $query_id -q 'SELECT sleepEachRow(1) FROM numbers(600)' & # trap sleep_query_pid=$! +# Shellcheck wrongly process "trap" https://www.shellcheck.net/wiki/SC2317 +# shellcheck disable=SC2317 function cleanup() { echo 'KILL sleep' # if the timeout will not be enough, it will trigger "No such process" error/message - kill $sleep_query_pid + kill "$sleep_query_pid" # waiting for a query to finish while ${CLICKHOUSE_CLIENT} -q "SELECT query_id FROM system.processes WHERE query_id = '$query_id'" | grep -xq "$query_id"; do sleep 0.1 diff --git a/tests/queries/0_stateless/01548_query_log_query_execution_ms.sh b/tests/queries/0_stateless/01548_query_log_query_execution_ms.sh index 0d13a1d4eff..48cbd57c1c0 100755 --- a/tests/queries/0_stateless/01548_query_log_query_execution_ms.sh +++ b/tests/queries/0_stateless/01548_query_log_query_execution_ms.sh @@ -47,7 +47,7 @@ function main() { # retries, since there is no guarantee that every time query will take ~0.4 second. local retries=20 i=0 - while [ "$(test_query_duration_ms | xargs)" != '1 1' ] && [[ $i < $retries ]]; do + while [ "$(test_query_duration_ms | xargs)" != '1 1' ] && (( i < retries )); do ((++i)) done } diff --git a/tests/queries/0_stateless/01557_field_infinite_convert_to_number.sql b/tests/queries/0_stateless/01557_field_infinite_convert_to_number.sql index edc4d5cbc91..cc71c8e6f6c 100644 --- a/tests/queries/0_stateless/01557_field_infinite_convert_to_number.sql +++ b/tests/queries/0_stateless/01557_field_infinite_convert_to_number.sql @@ -1 +1 @@ -SET max_threads = nan; -- { serverError 70 } +SET max_threads = nan; -- { serverError CANNOT_CONVERT_TYPE } diff --git a/tests/queries/0_stateless/01569_query_profiler_big_query_id.sh b/tests/queries/0_stateless/01569_query_profiler_big_query_id.sh index e54783e9655..941ab216d0b 100755 --- a/tests/queries/0_stateless/01569_query_profiler_big_query_id.sh +++ b/tests/queries/0_stateless/01569_query_profiler_big_query_id.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-tsan, no-asan, no-ubsan, no-msan, no-debug, no-cpu-aarch64 +# Tags: no-tsan, no-asan, no-ubsan, no-msan, no-debug CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh @@ -9,4 +9,3 @@ query_id="aggregating_merge_tree_simple_aggregate_function_string_query100_profi ${CLICKHOUSE_CLIENT} --query="select sleep(1)" --query_id="$query_id" --query_profiler_real_time_period_ns=10000000 ${CLICKHOUSE_CLIENT} --query="system flush logs" ${CLICKHOUSE_CLIENT} --query="select count(*) > 1 from system.trace_log where query_id = '$query_id'" - diff --git a/tests/queries/0_stateless/01582_move_to_prewhere_compact_parts.reference b/tests/queries/0_stateless/01582_move_to_prewhere_compact_parts.reference index 30b5ae9c648..25d8f62f5dd 100644 --- a/tests/queries/0_stateless/01582_move_to_prewhere_compact_parts.reference +++ b/tests/queries/0_stateless/01582_move_to_prewhere_compact_parts.reference @@ -1,12 +1,8 @@ -SELECT - x, - y -FROM prewhere_move -PREWHERE x > 100 -SELECT - x1, - x2, - x3, - x4 -FROM prewhere_move -PREWHERE (x1 > 100) AND (x2 > 100) AND (x3 > 100) AND (x4 > 100) + Prewhere info + Prewhere filter + Prewhere filter column: greater(x, 100) (removed) + Filter + Filter column: and(greater(x1, 100), greater(x2, 100), greater(x3, 100), greater(x4, \'100\')) (removed) + Prewhere info + Prewhere filter + Prewhere filter column: greater(x1, 100) diff --git a/tests/queries/0_stateless/01582_move_to_prewhere_compact_parts.sql b/tests/queries/0_stateless/01582_move_to_prewhere_compact_parts.sql index bd3e651e0dc..9a4f8d1f734 100644 --- a/tests/queries/0_stateless/01582_move_to_prewhere_compact_parts.sql +++ b/tests/queries/0_stateless/01582_move_to_prewhere_compact_parts.sql @@ -1,3 +1,5 @@ +-- Tags: no-random-merge-tree-settings + SET optimize_move_to_prewhere = 1; SET convert_query_to_cnf = 0; @@ -5,14 +7,15 @@ DROP TABLE IF EXISTS prewhere_move; CREATE TABLE prewhere_move (x Int, y String) ENGINE = MergeTree ORDER BY tuple(); INSERT INTO prewhere_move SELECT number, toString(number) FROM numbers(1000); -EXPLAIN SYNTAX SELECT * FROM prewhere_move WHERE x > 100; +SELECT replaceRegexpAll(explain, '__table1\.|_UInt8', '') FROM (EXPLAIN actions=1 SELECT * FROM prewhere_move WHERE x > 100) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter%'; DROP TABLE prewhere_move; -CREATE TABLE prewhere_move (x1 Int, x2 Int, x3 Int, x4 Int) ENGINE = MergeTree ORDER BY tuple(); -INSERT INTO prewhere_move SELECT number, number, number, number FROM numbers(1000); +CREATE TABLE prewhere_move (x1 Int, x2 Int, x3 Int, x4 String CODEC(NONE)) ENGINE = MergeTree ORDER BY tuple(); +INSERT INTO prewhere_move SELECT number, number, number, repeat('a', 1024) FROM numbers(1000); -- Not all conditions moved -EXPLAIN SYNTAX SELECT * FROM prewhere_move WHERE x1 > 100 AND x2 > 100 AND x3 > 100 AND x4 > 100; +SET move_all_conditions_to_prewhere = 0; +SELECT replaceRegexpAll(explain, '__table1\.|_UInt8|_String', '') FROM (EXPLAIN actions=1 SELECT * FROM prewhere_move WHERE x1 > 100 AND x2 > 100 AND x3 > 100 AND x4 > '100') WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter%'; DROP TABLE prewhere_move; diff --git a/tests/queries/0_stateless/01600_detach_permanently.sh b/tests/queries/0_stateless/01600_detach_permanently.sh index 0d1815a75e8..036706d2fe8 100755 --- a/tests/queries/0_stateless/01600_detach_permanently.sh +++ b/tests/queries/0_stateless/01600_detach_permanently.sh @@ -1,6 +1,9 @@ #!/usr/bin/env bash # Tags: no-parallel +# Creation of a database with Ordinary engine emits a warning. +CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL=fatal + CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh diff --git a/tests/queries/0_stateless/01600_remerge_sort_lowered_memory_bytes_ratio.sql b/tests/queries/0_stateless/01600_remerge_sort_lowered_memory_bytes_ratio.sql index c25f308eda8..b3739af93f8 100644 --- a/tests/queries/0_stateless/01600_remerge_sort_lowered_memory_bytes_ratio.sql +++ b/tests/queries/0_stateless/01600_remerge_sort_lowered_memory_bytes_ratio.sql @@ -2,7 +2,7 @@ -- Check remerge_sort_lowered_memory_bytes_ratio setting -set max_memory_usage='300Mi'; +set max_memory_usage='200Mi'; -- enter remerge once limit*2 is reached set max_bytes_before_remerge_sort='10Mi'; -- more blocks diff --git a/tests/queries/0_stateless/01601_detach_permanently.sql b/tests/queries/0_stateless/01601_detach_permanently.sql index 95c80e77213..6ab3a7f9b21 100644 --- a/tests/queries/0_stateless/01601_detach_permanently.sql +++ b/tests/queries/0_stateless/01601_detach_permanently.sql @@ -1,5 +1,7 @@ -- Tags: no-parallel +SET send_logs_level = 'fatal'; + SELECT 'database atomic tests'; DROP DATABASE IF EXISTS test1601_detach_permanently_atomic; @@ -73,6 +75,7 @@ SELECT 'database ordinary tests'; DROP DATABASE IF EXISTS test1601_detach_permanently_ordinary; set allow_deprecated_database_ordinary=1; +-- Creation of a database with Ordinary engine emits a warning. CREATE DATABASE test1601_detach_permanently_ordinary Engine=Ordinary; create table test1601_detach_permanently_ordinary.test_name_reuse (number UInt64) engine=MergeTree order by tuple(); diff --git a/tests/queries/0_stateless/01602_show_create_view.reference b/tests/queries/0_stateless/01602_show_create_view.reference index 5fe11a38db3..b3a345f2742 100644 --- a/tests/queries/0_stateless/01602_show_create_view.reference +++ b/tests/queries/0_stateless/01602_show_create_view.reference @@ -1,6 +1,6 @@ -CREATE VIEW test_1602.v\n(\n `EventDate` DateTime,\n `CounterID` UInt32,\n `UserID` UInt32\n) AS\nSELECT *\nFROM test_1602.tbl -CREATE MATERIALIZED VIEW test_1602.vv\n(\n `EventDate` DateTime,\n `CounterID` UInt32,\n `UserID` UInt32\n)\nENGINE = MergeTree\nPARTITION BY toYYYYMM(EventDate)\nORDER BY (CounterID, EventDate, intHash32(UserID))\nSETTINGS index_granularity = 8192 AS\nSELECT *\nFROM test_1602.tbl -CREATE VIEW test_1602.VIEW\n(\n `EventDate` DateTime,\n `CounterID` UInt32,\n `UserID` UInt32\n) AS\nSELECT *\nFROM test_1602.tbl -CREATE VIEW test_1602.DATABASE\n(\n `EventDate` DateTime,\n `CounterID` UInt32,\n `UserID` UInt32\n) AS\nSELECT *\nFROM test_1602.tbl -CREATE VIEW test_1602.DICTIONARY\n(\n `EventDate` DateTime,\n `CounterID` UInt32,\n `UserID` UInt32\n) AS\nSELECT *\nFROM test_1602.tbl -CREATE VIEW test_1602.TABLE\n(\n `EventDate` DateTime,\n `CounterID` UInt32,\n `UserID` UInt32\n) AS\nSELECT *\nFROM test_1602.tbl +CREATE VIEW test_1602.v\n(\n `EventDate` DateTime,\n `CounterID` UInt32,\n `UserID` UInt32\n)\nAS SELECT *\nFROM test_1602.tbl +CREATE MATERIALIZED VIEW test_1602.vv\n(\n `EventDate` DateTime,\n `CounterID` UInt32,\n `UserID` UInt32\n)\nENGINE = MergeTree\nPARTITION BY toYYYYMM(EventDate)\nORDER BY (CounterID, EventDate, intHash32(UserID))\nSETTINGS index_granularity = 8192\nAS SELECT *\nFROM test_1602.tbl +CREATE VIEW test_1602.VIEW\n(\n `EventDate` DateTime,\n `CounterID` UInt32,\n `UserID` UInt32\n)\nAS SELECT *\nFROM test_1602.tbl +CREATE VIEW test_1602.DATABASE\n(\n `EventDate` DateTime,\n `CounterID` UInt32,\n `UserID` UInt32\n)\nAS SELECT *\nFROM test_1602.tbl +CREATE VIEW test_1602.DICTIONARY\n(\n `EventDate` DateTime,\n `CounterID` UInt32,\n `UserID` UInt32\n)\nAS SELECT *\nFROM test_1602.tbl +CREATE VIEW test_1602.`TABLE`\n(\n `EventDate` DateTime,\n `CounterID` UInt32,\n `UserID` UInt32\n)\nAS SELECT *\nFROM test_1602.tbl diff --git a/tests/queries/0_stateless/01603_insert_select_too_many_parts.sql b/tests/queries/0_stateless/01603_insert_select_too_many_parts.sql index a56b680e212..d0832cdcc8e 100644 --- a/tests/queries/0_stateless/01603_insert_select_too_many_parts.sql +++ b/tests/queries/0_stateless/01603_insert_select_too_many_parts.sql @@ -3,9 +3,12 @@ CREATE TABLE too_many_parts (x UInt64) ENGINE = MergeTree ORDER BY tuple() SETTI SYSTEM STOP MERGES too_many_parts; SET max_block_size = 1, min_insert_block_size_rows = 0, min_insert_block_size_bytes = 0; +-- Avoid concurrent parts check to avoid flakiness +SET max_threads=1, max_insert_threads=1; -- exception is not thrown if threshold is exceeded when multi-block INSERT is already started. -INSERT INTO too_many_parts SELECT * FROM numbers(10); +-- Single thread is used as different threads check it separately https://github.com/ClickHouse/ClickHouse/issues/61158 +INSERT INTO too_many_parts SELECT * FROM numbers(10) SETTINGS max_insert_threads=1; SELECT count() FROM too_many_parts; -- exception is thrown if threshold is exceeded on new INSERT. diff --git a/tests/queries/0_stateless/01603_rename_overwrite_bug.sql b/tests/queries/0_stateless/01603_rename_overwrite_bug.sql index acf9f520709..cc283ab4292 100644 --- a/tests/queries/0_stateless/01603_rename_overwrite_bug.sql +++ b/tests/queries/0_stateless/01603_rename_overwrite_bug.sql @@ -1,7 +1,10 @@ -- Tags: no-parallel +SET send_logs_level = 'fatal'; + DROP database IF EXISTS test_1603_rename_bug_ordinary; set allow_deprecated_database_ordinary=1; +-- Creation of a database with Ordinary engine emits a warning. create database test_1603_rename_bug_ordinary engine=Ordinary; create table test_1603_rename_bug_ordinary.foo engine=Memory as select * from numbers(100); create table test_1603_rename_bug_ordinary.bar engine=Log as select * from numbers(200); diff --git a/tests/queries/0_stateless/01611_constant_folding_subqueries.reference b/tests/queries/0_stateless/01611_constant_folding_subqueries.reference index c3df2314112..327a4694aa8 100644 --- a/tests/queries/0_stateless/01611_constant_folding_subqueries.reference +++ b/tests/queries/0_stateless/01611_constant_folding_subqueries.reference @@ -5,7 +5,13 @@ SELECT (SELECT * FROM system.numbers LIMIT 1 OFFSET 1) AS n, toUInt64(10 / n) FO 1,10 EXPLAIN SYNTAX SELECT (SELECT * FROM system.numbers LIMIT 1 OFFSET 1) AS n, toUInt64(10 / n); SELECT - identity(_CAST(0, \'Nullable(UInt64)\')) AS n, + __scalarSubqueryResult(_CAST(0, \'Nullable(UInt64)\')) AS n, toUInt64(10 / n) SELECT * FROM (WITH (SELECT * FROM system.numbers LIMIT 1 OFFSET 1) AS n, toUInt64(10 / n) as q SELECT * FROM system.one WHERE q > 0); 0 +SELECT * FROM (SELECT (SELECT '\d[a-z]') AS n, extractAll('5abc', assumeNotNull(n))) FORMAT CSV; +"\d[a-z]","['5a']" +EXPLAIN SYNTAX SELECT (SELECT * FROM system.numbers LIMIT 1 OFFSET 1) AS n, toUInt64(10 / n); +SELECT + __scalarSubqueryResult(_CAST(0, \'Nullable(UInt64)\')) AS n, + toUInt64(10 / n) diff --git a/tests/queries/0_stateless/01611_constant_folding_subqueries.sql b/tests/queries/0_stateless/01611_constant_folding_subqueries.sql index 59f057d1ec5..b30fb43f621 100644 --- a/tests/queries/0_stateless/01611_constant_folding_subqueries.sql +++ b/tests/queries/0_stateless/01611_constant_folding_subqueries.sql @@ -3,3 +3,6 @@ SELECT * FROM (SELECT (SELECT * FROM system.numbers LIMIT 1 OFFSET 1) AS n, toUI SELECT (SELECT * FROM system.numbers LIMIT 1 OFFSET 1) AS n, toUInt64(10 / n) FORMAT CSV; EXPLAIN SYNTAX SELECT (SELECT * FROM system.numbers LIMIT 1 OFFSET 1) AS n, toUInt64(10 / n); SELECT * FROM (WITH (SELECT * FROM system.numbers LIMIT 1 OFFSET 1) AS n, toUInt64(10 / n) as q SELECT * FROM system.one WHERE q > 0); + +SELECT * FROM (SELECT (SELECT '\d[a-z]') AS n, extractAll('5abc', assumeNotNull(n))) FORMAT CSV; +EXPLAIN SYNTAX SELECT (SELECT * FROM system.numbers LIMIT 1 OFFSET 1) AS n, toUInt64(10 / n); diff --git a/tests/queries/0_stateless/01622_constraints_simple_optimization.sql b/tests/queries/0_stateless/01622_constraints_simple_optimization.sql index a4d0035c590..acde02e2c67 100644 --- a/tests/queries/0_stateless/01622_constraints_simple_optimization.sql +++ b/tests/queries/0_stateless/01622_constraints_simple_optimization.sql @@ -1,6 +1,9 @@ DROP TABLE IF EXISTS constraint_test_assumption; DROP TABLE IF EXISTS constraint_test_transitivity; DROP TABLE IF EXISTS constraint_test_transitivity2; +DROP TABLE IF EXISTS constraint_test_transitivity3; +DROP TABLE IF EXISTS constraint_test_constants_repl; +DROP TABLE IF EXISTS constraint_test_constants; SET convert_query_to_cnf = 1; SET optimize_using_constraints = 1; @@ -37,7 +40,6 @@ SELECT count() FROM constraint_test_transitivity WHERE a = d; ---> assumption -> DROP TABLE constraint_test_transitivity; - CREATE TABLE constraint_test_strong_connectivity (a String, b String, c String, d String, CONSTRAINT c1 ASSUME a <= b AND b <= c AND c <= d AND d <= a) ENGINE = TinyLog; INSERT INTO constraint_test_strong_connectivity (a, b, c, d) VALUES ('1', '2', '3', '4'); @@ -71,7 +73,6 @@ SELECT count() FROM constraint_test_transitivity3 WHERE b >= a; ---> assumption DROP TABLE constraint_test_transitivity3; - CREATE TABLE constraint_test_constants_repl (a Int64, b Int64, c Int64, d Int64, CONSTRAINT c1 ASSUME a - b = 10 AND c + d = 20) ENGINE = TinyLog; INSERT INTO constraint_test_constants_repl (a, b, c, d) VALUES (1, 2, 3, 4); diff --git a/tests/queries/0_stateless/01625_constraints_index_append.reference b/tests/queries/0_stateless/01625_constraints_index_append.reference index 591d8a85897..1b1a48c0e29 100644 --- a/tests/queries/0_stateless/01625_constraints_index_append.reference +++ b/tests/queries/0_stateless/01625_constraints_index_append.reference @@ -1,23 +1,15 @@ -SELECT i AS i -FROM index_append_test_test -PREWHERE a = 0 -WHERE (a = 0) AND indexHint((i + 40) > 0) -SETTINGS convert_query_to_cnf = 1, optimize_using_constraints = 1, optimize_move_to_prewhere = 1, optimize_substitute_columns = 1, optimize_append_index = 1 -1 -SELECT i AS i -FROM index_append_test_test -PREWHERE a < 0 -SETTINGS convert_query_to_cnf = 1, optimize_using_constraints = 1, optimize_move_to_prewhere = 1, optimize_substitute_columns = 1, optimize_append_index = 1 -0 -SELECT i AS i -FROM index_append_test_test -PREWHERE a >= 0 -WHERE (a >= 0) AND indexHint((i + 40) > 0) -SETTINGS convert_query_to_cnf = 1, optimize_using_constraints = 1, optimize_move_to_prewhere = 1, optimize_substitute_columns = 1, optimize_append_index = 1 -1 -SELECT i AS i -FROM index_append_test_test -PREWHERE (2 * b) < 100 -WHERE ((2 * b) < 100) AND indexHint(i < 100) -SETTINGS convert_query_to_cnf = 1, optimize_using_constraints = 1, optimize_move_to_prewhere = 1, optimize_substitute_columns = 1, optimize_append_index = 1 -1 + Filter column: and(equals(a, 0), indexHint(greater(plus(i, 40), 0))) (removed) + Prewhere info + Prewhere filter + Prewhere filter column: equals(a, 0) + Prewhere info + Prewhere filter + Prewhere filter column: less(a, 0) (removed) + Filter column: and(greaterOrEquals(a, 0), indexHint(greater(plus(i, 40), 0))) (removed) + Prewhere info + Prewhere filter + Prewhere filter column: greaterOrEquals(a, 0) + Filter column: and(less(multiply(2, b), 100), indexHint(less(i, 100))) (removed) + Prewhere info + Prewhere filter + Prewhere filter column: less(multiply(2, b), 100) diff --git a/tests/queries/0_stateless/01625_constraints_index_append.sh b/tests/queries/0_stateless/01625_constraints_index_append.sh deleted file mode 100755 index acceedbb1d1..00000000000 --- a/tests/queries/0_stateless/01625_constraints_index_append.sh +++ /dev/null @@ -1,37 +0,0 @@ -#!/usr/bin/env bash - -CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# shellcheck source=../shell_config.sh -. "$CURDIR"/../shell_config.sh - -# We should have correct env vars from shell_config.sh to run this test - -$CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS index_append_test_test;" - -$CLICKHOUSE_CLIENT --query "CREATE TABLE index_append_test_test (i Int64, a UInt32, b UInt64, CONSTRAINT c1 ASSUME i <= 2 * b AND i + 40 > a) ENGINE = MergeTree() ORDER BY i;" -$CLICKHOUSE_CLIENT --query "INSERT INTO index_append_test_test VALUES (1, 10, 1), (2, 20, 2);" - -function run_with_settings() -{ - query="$1 SETTINGS convert_query_to_cnf = 1\ - , optimize_using_constraints = 1\ - , optimize_move_to_prewhere = 1\ - , optimize_substitute_columns = 1\ - , optimize_append_index = 1" - - if [[ $query =~ "EXPLAIN QUERY TREE" ]]; then query="${query}, allow_experimental_analyzer = 1"; fi - - $CLICKHOUSE_CLIENT --query="$query" - -} - -run_with_settings "EXPLAIN SYNTAX SELECT i FROM index_append_test_test WHERE a = 0" -run_with_settings "EXPLAIN QUERY TREE SELECT i FROM index_append_test_test WHERE a = 0" | grep -Fac "indexHint" -run_with_settings "EXPLAIN SYNTAX SELECT i FROM index_append_test_test WHERE a < 0" -run_with_settings "EXPLAIN QUERY TREE SELECT i FROM index_append_test_test WHERE a < 0" | grep -Fac "indexHint" -run_with_settings "EXPLAIN SYNTAX SELECT i FROM index_append_test_test WHERE a >= 0" -run_with_settings "EXPLAIN QUERY TREE SELECT i FROM index_append_test_test WHERE a >= 0" | grep -Fac "indexHint" -run_with_settings "EXPLAIN SYNTAX SELECT i FROM index_append_test_test WHERE 2 * b < 100" -run_with_settings "EXPLAIN QUERY TREE SELECT i FROM index_append_test_test WHERE 2 * b < 100" | grep -Fac "indexHint" - -$CLICKHOUSE_CLIENT --query "DROP TABLE index_append_test_test;" diff --git a/tests/queries/0_stateless/01625_constraints_index_append.sql b/tests/queries/0_stateless/01625_constraints_index_append.sql new file mode 100644 index 00000000000..482cd325bb7 --- /dev/null +++ b/tests/queries/0_stateless/01625_constraints_index_append.sql @@ -0,0 +1,26 @@ +-- Tags: no-parallel + +-- CNF optimization uses QueryNodeHash to order conditions. We need fixed database.table.column identifier name to stabilize result +DROP DATABASE IF EXISTS db_memory_01625; +CREATE DATABASE db_memory_01625 ENGINE = Memory; +USE db_memory_01625; + +DROP TABLE IF EXISTS index_append_test_test; + +CREATE TABLE index_append_test_test (i Int64, a UInt32, b UInt64, CONSTRAINT c1 ASSUME i <= 2 * b AND i + 40 > a) ENGINE = MergeTree() ORDER BY i; + +INSERT INTO index_append_test_test VALUES (1, 10, 1), (2, 20, 2); + +SET convert_query_to_cnf = 1; +SET optimize_using_constraints = 1; +SET optimize_move_to_prewhere = 1; +SET optimize_substitute_columns = 1; +SET optimize_append_index = 1; + +SELECT replaceRegexpAll(explain, '__table1\.|_UInt8', '') FROM (EXPLAIN actions=1 SELECT i FROM index_append_test_test WHERE a = 0) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; +SELECT replaceRegexpAll(explain, '__table1\.|_UInt8', '') FROM (EXPLAIN actions=1 SELECT i FROM index_append_test_test WHERE a < 0) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; +SELECT replaceRegexpAll(explain, '__table1\.|_UInt8', '') FROM (EXPLAIN actions=1 SELECT i FROM index_append_test_test WHERE a >= 0) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; +SELECT replaceRegexpAll(explain, '__table1\.|_UInt8', '') FROM (EXPLAIN actions=1 SELECT i FROM index_append_test_test WHERE 2 * b < 100) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; + +DROP TABLE index_append_test_test; +DROP DATABASE db_memory_01625; diff --git a/tests/queries/0_stateless/01651_lc_insert_tiny_log.sql b/tests/queries/0_stateless/01651_lc_insert_tiny_log.sql index 22532529812..d405bb01fd9 100644 --- a/tests/queries/0_stateless/01651_lc_insert_tiny_log.sql +++ b/tests/queries/0_stateless/01651_lc_insert_tiny_log.sql @@ -1,3 +1,4 @@ +set allow_suspicious_low_cardinality_types=1; drop table if exists perf_lc_num; CREATE TABLE perf_lc_num(  num UInt8,  arr Array(LowCardinality(Int64)) default [num]  ) ENGINE = TinyLog; diff --git a/tests/queries/0_stateless/01656_test_query_log_factories_info.sql b/tests/queries/0_stateless/01656_test_query_log_factories_info.sql index 020d7cc5e72..8a6b604b053 100644 --- a/tests/queries/0_stateless/01656_test_query_log_factories_info.sql +++ b/tests/queries/0_stateless/01656_test_query_log_factories_info.sql @@ -41,7 +41,9 @@ FROM system.query_log WHERE current_database = currentDatabase() AND type = 'Que ORDER BY query_start_time DESC LIMIT 1 FORMAT TabSeparatedWithNames; SELECT ''; -SELECT arraySort(used_functions) +-- 1. analyzer includes arrayJoin into functions list +-- 2. for crc32 (CaseInsensitive function) we use lower case now +SELECT arraySort(arrayMap(x -> x == 'crc32' ? 'CRC32' : x, arrayFilter(x-> x != 'arrayJoin', used_functions))) as `arraySort(used_functions)` FROM system.query_log WHERE current_database = currentDatabase() AND type = 'QueryFinish' AND (query LIKE '%toDate(\'2000-12-05\')%') ORDER BY query_start_time DESC LIMIT 1 FORMAT TabSeparatedWithNames; SELECT ''; diff --git a/tests/queries/0_stateless/01717_int_div_float_too_large_ubsan.sql b/tests/queries/0_stateless/01717_int_div_float_too_large_ubsan.sql index c4f26a079f0..dc1e5b37050 100644 --- a/tests/queries/0_stateless/01717_int_div_float_too_large_ubsan.sql +++ b/tests/queries/0_stateless/01717_int_div_float_too_large_ubsan.sql @@ -1,2 +1,2 @@ -SELECT intDiv(9223372036854775807, 0.9998999834060669); -- { serverError 153 } -SELECT intDiv(9223372036854775807, 1.); -- { serverError 153 } +SELECT intDiv(18446744073709551615, 0.9998999834060669); -- { serverError 153 } +SELECT intDiv(18446744073709551615, 1.); -- { serverError 153 } diff --git a/tests/queries/0_stateless/01730_distributed_group_by_no_merge_order_by_long.sql b/tests/queries/0_stateless/01730_distributed_group_by_no_merge_order_by_long.sql index 3d6a25fe799..74bafe6e4cd 100644 --- a/tests/queries/0_stateless/01730_distributed_group_by_no_merge_order_by_long.sql +++ b/tests/queries/0_stateless/01730_distributed_group_by_no_merge_order_by_long.sql @@ -12,7 +12,7 @@ select * from remote('127.{2..11}', view(select * from numbers(1e6))) group by n -- and the query with GROUP BY on remote servers will first do GROUP BY and then send the block, -- so the initiator will first receive all blocks from remotes and only after start merging, -- and will hit the memory limit. -select * from remote('127.{2..11}', view(select * from numbers(1e6))) group by number order by number limit 1e6 settings distributed_group_by_no_merge=2, max_memory_usage='100Mi', max_block_size=1e12; -- { serverError 241 } +select * from remote('127.{2..11}', view(select * from numbers(1e6))) group by number order by number limit 1e6 settings distributed_group_by_no_merge=2, max_memory_usage='20Mi', max_block_size=4294967296; -- { serverError 241 } -- with optimize_aggregation_in_order=1 remote servers will produce blocks more frequently, -- since they don't need to wait until the aggregation will be finished, diff --git a/tests/queries/0_stateless/01732_explain_syntax_union_query.reference b/tests/queries/0_stateless/01732_explain_syntax_union_query.reference index ccafa916b9f..5246cfec7aa 100644 --- a/tests/queries/0_stateless/01732_explain_syntax_union_query.reference +++ b/tests/queries/0_stateless/01732_explain_syntax_union_query.reference @@ -54,7 +54,6 @@ SELECT 1 - SELECT 1 - - ( SELECT 1 UNION DISTINCT diff --git a/tests/queries/0_stateless/01737_clickhouse_server_wait_server_pool_long.sh b/tests/queries/0_stateless/01737_clickhouse_server_wait_server_pool_long.sh index adab3906e5b..cd8abb57a80 100755 --- a/tests/queries/0_stateless/01737_clickhouse_server_wait_server_pool_long.sh +++ b/tests/queries/0_stateless/01737_clickhouse_server_wait_server_pool_long.sh @@ -18,10 +18,12 @@ CLICKHOUSE_WATCHDOG_ENABLE=0 $CLICKHOUSE_SERVER_BINARY "${server_opts[@]}" >& cl server_pid=$! trap cleanup EXIT +# Shellcheck wrongly process "trap" https://www.shellcheck.net/wiki/SC2317 +# shellcheck disable=SC2317 function cleanup() { - kill -9 $server_pid - kill -9 $client_pid + kill -9 "$server_pid" + kill -9 "$client_pid" echo "Test failed. Server log:" cat clickhouse-server.log diff --git a/tests/queries/0_stateless/01737_move_order_key_to_prewhere_select_final.reference b/tests/queries/0_stateless/01737_move_order_key_to_prewhere_select_final.reference index 98c76cc2a50..26a0e97729c 100644 --- a/tests/queries/0_stateless/01737_move_order_key_to_prewhere_select_final.reference +++ b/tests/queries/0_stateless/01737_move_order_key_to_prewhere_select_final.reference @@ -1,114 +1,38 @@ optimize_move_to_prewhere_if_final = 1 - -SELECT - x, - y, - z -FROM prewhere_move_select_final -PREWHERE x > 100 - -SELECT - x, - y, - z -FROM prewhere_move_select_final -FINAL -PREWHERE x > 100 - -SELECT - x, - y, - z -FROM prewhere_move_select_final -PREWHERE y > 100 - -SELECT - x, - y, - z -FROM prewhere_move_select_final -FINAL -PREWHERE y > 100 - -SELECT - x, - y, - z -FROM prewhere_move_select_final -PREWHERE (x + y) > 100 - -SELECT - x, - y, - z -FROM prewhere_move_select_final -FINAL -PREWHERE (x + y) > 100 - -SELECT - x, - y, - z -FROM prewhere_move_select_final -FINAL -WHERE z > 400 - -SELECT - x, - y, - z -FROM prewhere_move_select_final -FINAL -PREWHERE y > 100 -WHERE (y > 100) AND (z > 400) - -SELECT - x, - y, - z -FROM prewhere_move_select_final -FINAL -PREWHERE x > 50 -WHERE (x > 50) AND (z > 400) - -SELECT - x, - y, - z -FROM prewhere_move_select_final -FINAL -PREWHERE (x + y) > 50 -WHERE ((x + y) > 50) AND (z > 400) - + Prewhere info + Prewhere filter + Prewhere filter column: greater(x, 100) (removed) + Prewhere info + Prewhere filter + Prewhere filter column: greater(x, 100) (removed) + Prewhere info + Prewhere filter + Prewhere filter column: greater(y, 100) (removed) + Prewhere info + Prewhere filter + Prewhere filter column: greater(y, 100) (removed) + Prewhere info + Prewhere filter + Prewhere filter column: greater(plus(x, y), 100) (removed) + Prewhere info + Prewhere filter + Prewhere filter column: greater(plus(x, y), 100) (removed) + Filter + Filter column: and(greater(y, 100), greater(z, 400)) (removed) + Prewhere info + Prewhere filter + Prewhere filter column: greater(y, 100) + Filter + Filter column: and(greater(x, 50), greater(z, 400)) (removed) + Prewhere info + Prewhere filter + Prewhere filter column: greater(x, 50) + Filter + Filter column: and(greater(plus(x, y), 50), greater(z, 400)) (removed) + Prewhere info + Prewhere filter + Prewhere filter column: greater(plus(x, y), 50) optimize_move_to_prewhere_if_final = 0 - -SELECT - x, - y, - z -FROM prewhere_move_select_final -PREWHERE y > 100 - -SELECT - x, - y, - z -FROM prewhere_move_select_final -FINAL -WHERE y > 100 - -SELECT - x, - y, - z -FROM prewhere_move_select_final -FINAL -WHERE z > 400 - -SELECT - x, - y, - z -FROM prewhere_move_select_final -FINAL -WHERE (y > 100) AND (z > 400) + Prewhere info + Prewhere filter + Prewhere filter column: greater(y, 100) (removed) diff --git a/tests/queries/0_stateless/01737_move_order_key_to_prewhere_select_final.sql b/tests/queries/0_stateless/01737_move_order_key_to_prewhere_select_final.sql index ede15738c5b..d4830e9e357 100644 --- a/tests/queries/0_stateless/01737_move_order_key_to_prewhere_select_final.sql +++ b/tests/queries/0_stateless/01737_move_order_key_to_prewhere_select_final.sql @@ -10,42 +10,27 @@ select 'optimize_move_to_prewhere_if_final = 1'; SET optimize_move_to_prewhere_if_final = 1; -- order key can be pushed down with final -select ''; -EXPLAIN SYNTAX SELECT * FROM prewhere_move_select_final WHERE x > 100; -select ''; -EXPLAIN SYNTAX SELECT * FROM prewhere_move_select_final FINAL WHERE x > 100; -select ''; -EXPLAIN SYNTAX SELECT * FROM prewhere_move_select_final WHERE y > 100; -select ''; -EXPLAIN SYNTAX SELECT * FROM prewhere_move_select_final FINAL WHERE y > 100; -select ''; -EXPLAIN SYNTAX SELECT * FROM prewhere_move_select_final WHERE x + y > 100; -select ''; -EXPLAIN SYNTAX SELECT * FROM prewhere_move_select_final FINAL WHERE x + y > 100; +SELECT replaceRegexpAll(explain, '__table1\.|_UInt8|_UInt16', '') FROM (EXPLAIN actions=1 SELECT * FROM prewhere_move_select_final WHERE x > 100) WHERE explain LIKE '%Prewhere%'; +SELECT replaceRegexpAll(explain, '__table1\.|_UInt8|_UInt16', '') FROM (EXPLAIN actions=1 SELECT * FROM prewhere_move_select_final FINAL WHERE x > 100) WHERE explain LIKE '%Prewhere%'; +SELECT replaceRegexpAll(explain, '__table1\.|_UInt8|_UInt16', '') FROM (EXPLAIN actions=1 SELECT * FROM prewhere_move_select_final WHERE y > 100) WHERE explain LIKE '%Prewhere%'; +SELECT replaceRegexpAll(explain, '__table1\.|_UInt8|_UInt16', '') FROM (EXPLAIN actions=1 SELECT * FROM prewhere_move_select_final FINAL WHERE y > 100) WHERE explain LIKE '%Prewhere%'; +SELECT replaceRegexpAll(explain, '__table1\.|_UInt8|_UInt16', '') FROM (EXPLAIN actions=1 SELECT * FROM prewhere_move_select_final WHERE x + y > 100) WHERE explain LIKE '%Prewhere%'; +SELECT replaceRegexpAll(explain, '__table1\.|_UInt8|_UInt16', '') FROM (EXPLAIN actions=1 SELECT * FROM prewhere_move_select_final FINAL WHERE x + y > 100) WHERE explain LIKE '%Prewhere%'; -- can not be pushed down -select ''; -EXPLAIN SYNTAX SELECT * FROM prewhere_move_select_final FINAL WHERE z > 400; +SELECT * FROM (EXPLAIN actions=1 SELECT * FROM prewhere_move_select_final FINAL WHERE z > 400) WHERE explain LIKE '%Prewhere filter'; -- only condition with x/y can be pushed down -select ''; -EXPLAIN SYNTAX SELECT * FROM prewhere_move_select_final FINAL WHERE y > 100 and z > 400; -select ''; -EXPLAIN SYNTAX SELECT * FROM prewhere_move_select_final FINAL WHERE x > 50 and z > 400; -select ''; -EXPLAIN SYNTAX SELECT * FROM prewhere_move_select_final FINAL WHERE x + y > 50 and z > 400; +SELECT replaceRegexpAll(explain, '__table1\.|_UInt8|_UInt16', '') FROM (EXPLAIN actions=1 SELECT * FROM prewhere_move_select_final FINAL WHERE y > 100 and z > 400) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter%'; +SELECT replaceRegexpAll(explain, '__table1\.|_UInt8|_UInt16', '') FROM (EXPLAIN actions=1 SELECT * FROM prewhere_move_select_final FINAL WHERE x > 50 and z > 400) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter%'; +SELECT replaceRegexpAll(explain, '__table1\.|_UInt8|_UInt16', '') FROM (EXPLAIN actions=1 SELECT * FROM prewhere_move_select_final FINAL WHERE x + y > 50 and z > 400) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter%'; -select ''; select 'optimize_move_to_prewhere_if_final = 0'; SET optimize_move_to_prewhere_if_final = 0; -select ''; -EXPLAIN SYNTAX SELECT * FROM prewhere_move_select_final WHERE y > 100; -select ''; -EXPLAIN SYNTAX SELECT * FROM prewhere_move_select_final FINAL WHERE y > 100; -select ''; -EXPLAIN SYNTAX SELECT * FROM prewhere_move_select_final FINAL WHERE z > 400; -select ''; -EXPLAIN SYNTAX SELECT * FROM prewhere_move_select_final FINAL WHERE y > 100 and z > 400; +SELECT replaceRegexpAll(explain, '__table1\.|_UInt8|_UInt16', '') FROM (EXPLAIN actions=1 SELECT * FROM prewhere_move_select_final WHERE y > 100) WHERE explain LIKE '%Prewhere%'; +SELECT replaceRegexpAll(explain, '__table1\.|_UInt8|_UInt16', '') FROM (EXPLAIN actions=1 SELECT * FROM prewhere_move_select_final FINAL WHERE y > 100) WHERE explain LIKE '%Prewhere%'; +SELECT replaceRegexpAll(explain, '__table1\.|_UInt8|_UInt16', '') FROM (EXPLAIN actions=1 SELECT * FROM prewhere_move_select_final FINAL WHERE z > 400) WHERE explain LIKE '%Prewhere%'; +SELECT replaceRegexpAll(explain, '__table1\.|_UInt8|_UInt16', '') FROM (EXPLAIN actions=1 SELECT * FROM prewhere_move_select_final FINAL WHERE y > 100 and z > 400) WHERE explain LIKE '%Prewhere%'; DROP TABLE prewhere_move_select_final; diff --git a/tests/queries/0_stateless/01763_filter_push_down_bugs.reference b/tests/queries/0_stateless/01763_filter_push_down_bugs.reference index 80bd7dfd8c0..19018a610b7 100644 --- a/tests/queries/0_stateless/01763_filter_push_down_bugs.reference +++ b/tests/queries/0_stateless/01763_filter_push_down_bugs.reference @@ -9,7 +9,7 @@ String1_0 String2_0 String3_0 String4_0 1 Expression ((Projection + Before ORDER BY)) Filter (WHERE) Join (JOIN FillRightFirst) - Filter (( + Before JOIN)) + Expression ReadFromMergeTree (default.t1) Indexes: PrimaryKey diff --git a/tests/queries/0_stateless/01786_explain_merge_tree.reference b/tests/queries/0_stateless/01786_explain_merge_tree.reference index fd1bc713b08..3a015d32539 100644 --- a/tests/queries/0_stateless/01786_explain_merge_tree.reference +++ b/tests/queries/0_stateless/01786_explain_merge_tree.reference @@ -1,77 +1,79 @@ - ReadFromMergeTree (default.test_index) - Indexes: - MinMax - Keys: - y - Parts: 4/5 - Granules: 11/12 - Partition - Keys: - y - bitAnd(z, 3) - Parts: 3/4 - Granules: 10/11 - PrimaryKey - Keys: - x - y - Parts: 2/3 - Granules: 6/10 - Skip - Name: t_minmax - Description: minmax GRANULARITY 2 - Parts: 1/2 - Granules: 3/6 - Skip - Name: t_set - Description: set GRANULARITY 2 - Parts: 1/1 - Granules: 2/3 + ReadFromMergeTree (default.test_index) + Indexes: + MinMax + Keys: + y + Parts: 4/5 + Granules: 11/12 + Partition + Keys: + y + bitAnd(z, 3) + Parts: 3/4 + Granules: 10/11 + PrimaryKey + Keys: + x + y + Parts: 2/3 + Granules: 6/10 + Skip + Name: t_minmax + Description: minmax GRANULARITY 2 + Parts: 1/2 + Granules: 3/6 + Skip + Name: t_set + Description: set GRANULARITY 2 + Parts: 1/1 + Granules: 2/3 ----------------- - "Node Type": "ReadFromMergeTree", - "Description": "default.test_index", - "Indexes": [ - { - "Type": "MinMax", - "Keys": ["y"], - "Initial Parts": 5, - "Selected Parts": 4, - "Initial Granules": 12, - "Selected Granules": 11 - }, - { - "Type": "Partition", - "Keys": ["y", "bitAnd(z, 3)"], - "Initial Parts": 4, - "Selected Parts": 3, - "Initial Granules": 11, - "Selected Granules": 10 - }, - { - "Type": "PrimaryKey", - "Keys": ["x", "y"], - "Initial Parts": 3, - "Selected Parts": 2, - "Initial Granules": 10, - "Selected Granules": 6 - }, - { - "Type": "Skip", - "Name": "t_minmax", - "Description": "minmax GRANULARITY 2", - "Initial Parts": 2, - "Selected Parts": 1, - "Initial Granules": 6, - "Selected Granules": 3 - }, - { - "Type": "Skip", - "Name": "t_set", - "Description": "set GRANULARITY 2", - "Initial Parts": 1, - "Selected Parts": 1, - "Initial Granules": 3, - "Selected Granules": 2 + "Node Type": "ReadFromMergeTree", + "Description": "default.test_index", + "Indexes": [ + { + "Type": "MinMax", + "Keys": ["y"], + "Initial Parts": 5, + "Selected Parts": 4, + "Initial Granules": 12, + "Selected Granules": 11 + }, + { + "Type": "Partition", + "Keys": ["y", "bitAnd(z, 3)"], + "Initial Parts": 4, + "Selected Parts": 3, + "Initial Granules": 11, + "Selected Granules": 10 + }, + { + "Type": "PrimaryKey", + "Keys": ["x", "y"], + "Initial Parts": 3, + "Selected Parts": 2, + "Initial Granules": 10, + "Selected Granules": 6 + }, + { + "Type": "Skip", + "Name": "t_minmax", + "Description": "minmax GRANULARITY 2", + "Initial Parts": 2, + "Selected Parts": 1, + "Initial Granules": 6, + "Selected Granules": 3 + }, + { + "Type": "Skip", + "Name": "t_set", + "Description": "set GRANULARITY 2", + "Initial Parts": 1, + "Selected Parts": 1, + "Initial Granules": 3, + "Selected Granules": 2 + } + ] } ] } @@ -89,15 +91,15 @@ ReadType: InReverseOrder Parts: 1 Granules: 3 - ReadFromMergeTree (default.idx) - Indexes: - PrimaryKey - Keys: - x - plus(x, y) - Condition: or((x in 2-element set), (plus(plus(x, y), 1) in (-Inf, 2])) - Parts: 1/1 - Granules: 1/1 + ReadFromMergeTree (default.idx) + Indexes: + PrimaryKey + Keys: + x + plus(x, y) + Condition: or((x in 2-element set), (plus(plus(x, y), 1) in (-Inf, 2])) + Parts: 1/1 + Granules: 1/1 ReadFromMergeTree (default.test_index) Indexes: MinMax diff --git a/tests/queries/0_stateless/01810_max_part_removal_threads_long.sh b/tests/queries/0_stateless/01810_max_part_removal_threads_long.sh index 87153a4bd58..3782a7d3ad6 100755 --- a/tests/queries/0_stateless/01810_max_part_removal_threads_long.sh +++ b/tests/queries/0_stateless/01810_max_part_removal_threads_long.sh @@ -7,6 +7,9 @@ # and we can do it compatible with parallel run only in .sh # (via $CLICKHOUSE_DATABASE) +# Creation of a database with Ordinary engine emits a warning. +CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL=fatal + CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh diff --git a/tests/queries/0_stateless/01824_move_to_prewhere_many_columns.reference b/tests/queries/0_stateless/01824_move_to_prewhere_many_columns.reference index 686a864f222..ccd51cba776 100644 --- a/tests/queries/0_stateless/01824_move_to_prewhere_many_columns.reference +++ b/tests/queries/0_stateless/01824_move_to_prewhere_many_columns.reference @@ -1,12 +1,19 @@ 1 Wide 2 Compact 35 -SELECT count() -FROM t_move_to_prewhere -PREWHERE a AND b AND c AND (NOT ignore(fat_string)) + Filter + Filter column: and(a, b, c, not(ignore(fat_string))) (removed) + Prewhere info + Prewhere filter + Prewhere filter column: and(a, b, c) (removed) 1 Compact 2 Compact 35 SELECT count() FROM t_move_to_prewhere -PREWHERE a AND b AND c AND (NOT ignore(fat_string)) +WHERE a AND b AND c AND (NOT ignore(fat_string)) + Filter + Filter column: and(a, b, c, not(ignore(fat_string))) (removed) + Prewhere info + Prewhere filter + Prewhere filter column: a diff --git a/tests/queries/0_stateless/01824_move_to_prewhere_many_columns.sql b/tests/queries/0_stateless/01824_move_to_prewhere_many_columns.sql index 2987c541aef..6ad804ac1b3 100644 --- a/tests/queries/0_stateless/01824_move_to_prewhere_many_columns.sql +++ b/tests/queries/0_stateless/01824_move_to_prewhere_many_columns.sql @@ -2,6 +2,7 @@ SET optimize_move_to_prewhere = 1; SET convert_query_to_cnf = 0; +SET move_all_conditions_to_prewhere = 0; DROP TABLE IF EXISTS t_move_to_prewhere; @@ -17,7 +18,7 @@ WHERE table = 't_move_to_prewhere' AND database = currentDatabase() ORDER BY partition; SELECT count() FROM t_move_to_prewhere WHERE a AND b AND c AND NOT ignore(fat_string); -EXPLAIN SYNTAX SELECT count() FROM t_move_to_prewhere WHERE a AND b AND c AND NOT ignore(fat_string); +SELECT replaceRegexpAll(explain, '__table1\.', '') FROM (EXPLAIN actions=1 SELECT count() FROM t_move_to_prewhere WHERE a AND b AND c AND NOT ignore(fat_string)) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter%'; DROP TABLE IF EXISTS t_move_to_prewhere; @@ -38,5 +39,6 @@ ORDER BY partition; SELECT count() FROM t_move_to_prewhere WHERE a AND b AND c AND NOT ignore(fat_string); EXPLAIN SYNTAX SELECT count() FROM t_move_to_prewhere WHERE a AND b AND c AND NOT ignore(fat_string); +SELECT replaceRegexpAll(explain, '__table1\.', '') FROM (EXPLAIN actions=1 SELECT count() FROM t_move_to_prewhere WHERE a AND b AND c AND NOT ignore(fat_string)) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter%'; DROP TABLE IF EXISTS t_move_to_prewhere; diff --git a/tests/queries/0_stateless/01848_partition_value_column.sql b/tests/queries/0_stateless/01848_partition_value_column.sql index 28d842af3e9..de5e766c92c 100644 --- a/tests/queries/0_stateless/01848_partition_value_column.sql +++ b/tests/queries/0_stateless/01848_partition_value_column.sql @@ -14,8 +14,8 @@ select count() from tbl where _partition_value.3 = 4 settings max_rows_to_read = create table tbl2(i int) engine MergeTree order by i; insert into tbl2 values (1); -select _partition_value from tbl2; -- { serverError 16 } -select _partition_value from tbl2 group by 1; -- { serverError 16 } +select _partition_value from tbl2; -- { serverError UNKNOWN_IDENTIFIER } +select _partition_value from tbl2 group by 1; -- { serverError UNKNOWN_IDENTIFIER } drop table tbl; drop table tbl2; diff --git a/tests/queries/0_stateless/01874_select_from_trailing_whitespaces.reference b/tests/queries/0_stateless/01874_select_from_trailing_whitespaces.reference index a52505659d1..4dc5ccac840 100644 --- a/tests/queries/0_stateless/01874_select_from_trailing_whitespaces.reference +++ b/tests/queries/0_stateless/01874_select_from_trailing_whitespaces.reference @@ -46,10 +46,12 @@ SELECT * FROM system.one UNION ALL SELECT * FROM system.one SELECT * FROM system.one UNION ALL -SELECT * -FROM system.one +( + SELECT * + FROM system.one +) # /* oneline */ select * from system.one union all (select * from system.one) -SELECT * FROM system.one UNION ALL SELECT * FROM system.one +SELECT * FROM system.one UNION ALL (SELECT * FROM system.one) # select 1 union all (select 1 union distinct select 1) SELECT 1 UNION ALL diff --git a/tests/queries/0_stateless/01889_sqlite_read_write.reference b/tests/queries/0_stateless/01889_sqlite_read_write.reference index 9f2b382e41e..e605693d95d 100644 --- a/tests/queries/0_stateless/01889_sqlite_read_write.reference +++ b/tests/queries/0_stateless/01889_sqlite_read_write.reference @@ -29,7 +29,7 @@ CREATE TABLE default.sqlite_table3\n(\n `col1` String,\n `col2` Int32\n)\n not a null 2 3 4 -line6 6 +line\'6 6 7 test table function line1 1 diff --git a/tests/queries/0_stateless/01889_sqlite_read_write.sh b/tests/queries/0_stateless/01889_sqlite_read_write.sh index 02b9a649e94..fd0a1df20ac 100755 --- a/tests/queries/0_stateless/01889_sqlite_read_write.sh +++ b/tests/queries/0_stateless/01889_sqlite_read_write.sh @@ -76,7 +76,7 @@ ${CLICKHOUSE_CLIENT} --query='DROP TABLE IF EXISTS sqlite_table3' ${CLICKHOUSE_CLIENT} --query="CREATE TABLE sqlite_table3 (col1 String, col2 Int32) ENGINE = SQLite('${DB_PATH}', 'table3')" ${CLICKHOUSE_CLIENT} --query='SHOW CREATE TABLE sqlite_table3;' | sed -r 's/(.*SQLite)(.*)/\1/' -${CLICKHOUSE_CLIENT} --query="INSERT INTO sqlite_table3 VALUES ('line6', 6);" +${CLICKHOUSE_CLIENT} --query="INSERT INTO sqlite_table3 VALUES ('line\'6', 6);" ${CLICKHOUSE_CLIENT} --query="INSERT INTO sqlite_table3 VALUES (NULL, 7);" ${CLICKHOUSE_CLIENT} --query='SELECT * FROM sqlite_table3 ORDER BY col2' diff --git a/tests/queries/0_stateless/01890_materialized_distributed_join.sh b/tests/queries/0_stateless/01890_materialized_distributed_join.sh index 0d761f2defa..88f7dcf9a69 100755 --- a/tests/queries/0_stateless/01890_materialized_distributed_join.sh +++ b/tests/queries/0_stateless/01890_materialized_distributed_join.sh @@ -20,7 +20,7 @@ $CLICKHOUSE_CLIENT -nm -q " insert into test_shard values (1, 1); insert into test_local values (1, 2); - create materialized view test_distributed engine Distributed('test_cluster_two_shards', $CLICKHOUSE_DATABASE, 'test_shard', k) as select k, v from test_source; + create materialized view $CLICKHOUSE_DATABASE.test_distributed engine Distributed('test_cluster_two_shards', $CLICKHOUSE_DATABASE, 'test_shard', k) as select k, v from test_source; select * from test_distributed td asof join $CLICKHOUSE_DATABASE.test_local tl on td.k = tl.k and td.v < tl.v; select td.v, td.k, td.v, tl.v, tl.k, td.v from test_distributed td asof join $CLICKHOUSE_DATABASE.test_local tl on td.k = tl.k and td.v < tl.v FORMAT TSVWithNamesAndTypes; diff --git a/tests/queries/0_stateless/01902_table_function_merge_db_params.reference b/tests/queries/0_stateless/01902_table_function_merge_db_params.reference new file mode 100644 index 00000000000..28edbbdfd38 --- /dev/null +++ b/tests/queries/0_stateless/01902_table_function_merge_db_params.reference @@ -0,0 +1,6 @@ +01902_db_params t 0 +01902_db_params t 1 +01902_db_params t 2 +01902_db_params t 0 +01902_db_params t 1 +01902_db_params t 2 diff --git a/tests/queries/0_stateless/01902_table_function_merge_db_params.sql b/tests/queries/0_stateless/01902_table_function_merge_db_params.sql new file mode 100644 index 00000000000..3d97cf2b0c6 --- /dev/null +++ b/tests/queries/0_stateless/01902_table_function_merge_db_params.sql @@ -0,0 +1,13 @@ +DROP DATABASE IF EXISTS 01902_db_params; +CREATE DATABASE 01902_db_params; +CREATE TABLE 01902_db_params.t(n Int8) ENGINE=MergeTree ORDER BY n; +INSERT INTO 01902_db_params.t SELECT * FROM numbers(3); +SELECT _database, _table, n FROM merge(REGEXP('^01902_db_params'), '^t') ORDER BY _database, _table, n; + +SELECT _database, _table, n FROM merge() ORDER BY _database, _table, n; -- {serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH} +SELECT _database, _table, n FROM merge('^t') ORDER BY _database, _table, n; -- {serverError BAD_ARGUMENTS} + +USE 01902_db_params; +SELECT _database, _table, n FROM merge('^t') ORDER BY _database, _table, n; + +DROP DATABASE 01902_db_params; diff --git a/tests/queries/0_stateless/01913_fix_column_transformer_replace_format.reference b/tests/queries/0_stateless/01913_fix_column_transformer_replace_format.reference index c2ebb7fa4f4..33be11c07d5 100644 --- a/tests/queries/0_stateless/01913_fix_column_transformer_replace_format.reference +++ b/tests/queries/0_stateless/01913_fix_column_transformer_replace_format.reference @@ -1 +1 @@ -CREATE VIEW default.my_view\n(\n `Id` UInt32,\n `Object.Key` Array(UInt16),\n `Object.Value` Array(String)\n) AS\nSELECT * REPLACE arrayMap(x -> (x + 1), `Object.Key`) AS `Object.Key`\nFROM default.my_table +CREATE VIEW default.my_view\n(\n `Id` UInt32,\n `Object.Key` Array(UInt16),\n `Object.Value` Array(String)\n)\nAS SELECT * REPLACE arrayMap(x -> (x + 1), `Object.Key`) AS `Object.Key`\nFROM default.my_table diff --git a/tests/queries/0_stateless/01920_not_chain_format.reference b/tests/queries/0_stateless/01920_not_chain_format.reference index 22abfd17dc7..bb58a0ff146 100644 --- a/tests/queries/0_stateless/01920_not_chain_format.reference +++ b/tests/queries/0_stateless/01920_not_chain_format.reference @@ -1,5 +1,5 @@ -- { echo } EXPLAIN SYNTAX SELECT NOT NOT (NOT (NOT (NULL))); -SELECT NOT (NOT (NOT NOT NULL)) +SELECT NOT (NOT (NOT (NOT NULL))) EXPLAIN SYNTAX SELECT NOT (NOT (NOT NOT NULL)); -SELECT NOT (NOT (NOT NOT NULL)) +SELECT NOT (NOT (NOT (NOT NULL))) diff --git a/tests/queries/0_stateless/01921_not_chain.reference b/tests/queries/0_stateless/01921_not_chain.reference index c29c66f1274..ebd18f4b342 100644 --- a/tests/queries/0_stateless/01921_not_chain.reference +++ b/tests/queries/0_stateless/01921_not_chain.reference @@ -4,6 +4,6 @@ SELECT 1 != (NOT 1); SELECT 1 != NOT 1; 1 EXPLAIN SYNTAX SELECT 1 != (NOT 1); -SELECT 1 != NOT 1 +SELECT 1 != (NOT 1) EXPLAIN SYNTAX SELECT 1 != NOT 1; -SELECT 1 != NOT 1 +SELECT 1 != (NOT 1) diff --git a/tests/queries/0_stateless/01952_optimize_distributed_group_by_sharding_key.reference b/tests/queries/0_stateless/01952_optimize_distributed_group_by_sharding_key.reference index ddfa6929d69..212dd348edb 100644 --- a/tests/queries/0_stateless/01952_optimize_distributed_group_by_sharding_key.reference +++ b/tests/queries/0_stateless/01952_optimize_distributed_group_by_sharding_key.reference @@ -71,3 +71,81 @@ Expression (Projection) Expression ((Before ORDER BY + (Convert VIEW subquery result to VIEW table structure + (Materialize constants after VIEW subquery + (Projection + Before ORDER BY))))) ReadFromSystemNumbers ReadFromRemote (Read from remote replica) +set allow_experimental_analyzer = 1; +explain select distinct k1 from remote('127.{1,2}', view(select 1 k1, 2 k2, 3 v from numbers(2)), cityHash64(k1, k2)); -- not optimized +Expression (Project names) + Distinct (DISTINCT) + Union + Distinct (Preliminary DISTINCT) + Expression ((Projection + (Change column names to column identifiers + (Convert VIEW subquery result to VIEW table structure + (Materialize constants after VIEW subquery + (Project names + (Projection + Change column names to column identifiers))))))) + ReadFromSystemNumbers + ReadFromRemote (Read from remote replica) +explain select distinct k1, k2 from remote('127.{1,2}', view(select 1 k1, 2 k2, 3 v from numbers(2)), cityHash64(k1, k2)); -- optimized +Union + Expression (Project names) + Distinct (DISTINCT) + Distinct (Preliminary DISTINCT) + Expression ((Projection + (Change column names to column identifiers + (Convert VIEW subquery result to VIEW table structure + (Materialize constants after VIEW subquery + (Project names + (Projection + Change column names to column identifiers))))))) + ReadFromSystemNumbers + ReadFromRemote (Read from remote replica) +explain select distinct on (k1) k2 from remote('127.{1,2}', view(select 1 k1, 2 k2, 3 v from numbers(2)), cityHash64(k1, k2)); -- not optimized +Expression (Project names) + LimitBy + Union + Expression (Before LIMIT BY) + LimitBy + Expression ((Before LIMIT BY + (Projection + (Change column names to column identifiers + (Convert VIEW subquery result to VIEW table structure + (Materialize constants after VIEW subquery + (Project names + (Projection + Change column names to column identifiers)))))))) + ReadFromSystemNumbers + Expression + ReadFromRemote (Read from remote replica) +explain select distinct on (k1, k2) v from remote('127.{1,2}', view(select 1 k1, 2 k2, 3 v from numbers(2)), cityHash64(k1, k2)); -- optimized +Union + Expression (Project names) + LimitBy + Expression ((Before LIMIT BY + (Projection + (Change column names to column identifiers + (Convert VIEW subquery result to VIEW table structure + (Materialize constants after VIEW subquery + (Project names + (Projection + Change column names to column identifiers)))))))) + ReadFromSystemNumbers + ReadFromRemote (Read from remote replica) +explain select distinct k1 from remote('127.{1,2}', view(select 1 k1, 2 k2, 3 v from numbers(2)), cityHash64(k1, k2)) order by v; -- not optimized +Expression (Project names) + Distinct (DISTINCT) + Sorting (Merge sorted streams for ORDER BY, without aggregation) + Union + Sorting (Sorting for ORDER BY) + Expression (Before ORDER BY) + Distinct (Preliminary DISTINCT) + Expression ((Projection + (Change column names to column identifiers + (Convert VIEW subquery result to VIEW table structure + (Materialize constants after VIEW subquery + (Project names + (Projection + Change column names to column identifiers))))))) + ReadFromSystemNumbers + ReadFromRemote (Read from remote replica) +explain select distinct k1, k2 from remote('127.{1,2}', view(select 1 k1, 2 k2, 3 v from numbers(2)), cityHash64(k1, k2)) order by v; -- optimized +Expression (Project names) + Sorting (Merge sorted streams after aggregation stage for ORDER BY) + Union + Distinct (DISTINCT) + Sorting (Sorting for ORDER BY) + Expression (Before ORDER BY) + Distinct (Preliminary DISTINCT) + Expression ((Projection + (Change column names to column identifiers + (Convert VIEW subquery result to VIEW table structure + (Materialize constants after VIEW subquery + (Project names + (Projection + Change column names to column identifiers))))))) + ReadFromSystemNumbers + ReadFromRemote (Read from remote replica) +explain select distinct on (k1) k2 from remote('127.{1,2}', view(select 1 k1, 2 k2, 3 v from numbers(2)), cityHash64(k1, k2)) order by v; -- not optimized +Expression (Project names) + LimitBy + Expression (Before LIMIT BY) + Sorting (Merge sorted streams for ORDER BY, without aggregation) + Union + LimitBy + Expression ((Before LIMIT BY + (Before ORDER BY + (Projection + (Change column names to column identifiers + (Convert VIEW subquery result to VIEW table structure + (Materialize constants after VIEW subquery + (Project names + (Projection + Change column names to column identifiers))))))) [lifted up part])) + Sorting (Sorting for ORDER BY) + Expression ((Before ORDER BY + (Projection + (Change column names to column identifiers + (Convert VIEW subquery result to VIEW table structure + (Materialize constants after VIEW subquery + (Project names + (Projection + Change column names to column identifiers)))))))) + ReadFromSystemNumbers + ReadFromRemote (Read from remote replica) +explain select distinct on (k1, k2) v from remote('127.{1,2}', view(select 1 k1, 2 k2, 3 v from numbers(2)), cityHash64(k1, k2)) order by v; -- optimized +Expression (Project names) + Sorting (Merge sorted streams after aggregation stage for ORDER BY) + Union + LimitBy + Expression ((Before LIMIT BY + (Before ORDER BY + (Projection + (Change column names to column identifiers + (Convert VIEW subquery result to VIEW table structure + (Materialize constants after VIEW subquery + (Project names + (Projection + Change column names to column identifiers))))))) [lifted up part])) + Sorting (Sorting for ORDER BY) + Expression ((Before ORDER BY + (Projection + (Change column names to column identifiers + (Convert VIEW subquery result to VIEW table structure + (Materialize constants after VIEW subquery + (Project names + (Projection + Change column names to column identifiers)))))))) + ReadFromSystemNumbers + ReadFromRemote (Read from remote replica) diff --git a/tests/queries/0_stateless/01952_optimize_distributed_group_by_sharding_key.sql b/tests/queries/0_stateless/01952_optimize_distributed_group_by_sharding_key.sql index 74b55b95315..adf55a9dd7f 100644 --- a/tests/queries/0_stateless/01952_optimize_distributed_group_by_sharding_key.sql +++ b/tests/queries/0_stateless/01952_optimize_distributed_group_by_sharding_key.sql @@ -4,6 +4,8 @@ set optimize_skip_unused_shards=1; set optimize_distributed_group_by_sharding_key=1; set prefer_localhost_replica=1; +set allow_experimental_analyzer = 0; + -- { echo } explain select distinct k1 from remote('127.{1,2}', view(select 1 k1, 2 k2, 3 v from numbers(2)), cityHash64(k1, k2)); -- not optimized explain select distinct k1, k2 from remote('127.{1,2}', view(select 1 k1, 2 k2, 3 v from numbers(2)), cityHash64(k1, k2)); -- optimized @@ -14,3 +16,15 @@ explain select distinct k1 from remote('127.{1,2}', view(select 1 k1, 2 k2, 3 v explain select distinct k1, k2 from remote('127.{1,2}', view(select 1 k1, 2 k2, 3 v from numbers(2)), cityHash64(k1, k2)) order by v; -- optimized explain select distinct on (k1) k2 from remote('127.{1,2}', view(select 1 k1, 2 k2, 3 v from numbers(2)), cityHash64(k1, k2)) order by v; -- not optimized explain select distinct on (k1, k2) v from remote('127.{1,2}', view(select 1 k1, 2 k2, 3 v from numbers(2)), cityHash64(k1, k2)) order by v; -- optimized + +set allow_experimental_analyzer = 1; + +explain select distinct k1 from remote('127.{1,2}', view(select 1 k1, 2 k2, 3 v from numbers(2)), cityHash64(k1, k2)); -- not optimized +explain select distinct k1, k2 from remote('127.{1,2}', view(select 1 k1, 2 k2, 3 v from numbers(2)), cityHash64(k1, k2)); -- optimized +explain select distinct on (k1) k2 from remote('127.{1,2}', view(select 1 k1, 2 k2, 3 v from numbers(2)), cityHash64(k1, k2)); -- not optimized +explain select distinct on (k1, k2) v from remote('127.{1,2}', view(select 1 k1, 2 k2, 3 v from numbers(2)), cityHash64(k1, k2)); -- optimized + +explain select distinct k1 from remote('127.{1,2}', view(select 1 k1, 2 k2, 3 v from numbers(2)), cityHash64(k1, k2)) order by v; -- not optimized +explain select distinct k1, k2 from remote('127.{1,2}', view(select 1 k1, 2 k2, 3 v from numbers(2)), cityHash64(k1, k2)) order by v; -- optimized +explain select distinct on (k1) k2 from remote('127.{1,2}', view(select 1 k1, 2 k2, 3 v from numbers(2)), cityHash64(k1, k2)) order by v; -- not optimized +explain select distinct on (k1, k2) v from remote('127.{1,2}', view(select 1 k1, 2 k2, 3 v from numbers(2)), cityHash64(k1, k2)) order by v; -- optimized diff --git a/tests/queries/0_stateless/01999_grant_with_replace.reference b/tests/queries/0_stateless/01999_grant_with_replace.reference index 740c55d5325..dc2047ab73c 100644 --- a/tests/queries/0_stateless/01999_grant_with_replace.reference +++ b/tests/queries/0_stateless/01999_grant_with_replace.reference @@ -4,12 +4,12 @@ B GRANT SELECT ON db1.* TO test_user_01999 GRANT SHOW TABLES, SHOW COLUMNS, SHOW DICTIONARIES ON db2.tb2 TO test_user_01999 C -GRANT SELECT(col1) ON db3.table TO test_user_01999 +GRANT SELECT(col1) ON db3.`table` TO test_user_01999 D GRANT SELECT(col3) ON db3.table3 TO test_user_01999 GRANT SELECT(col1, col2) ON db4.table4 TO test_user_01999 E -GRANT SELECT(cola) ON db5.table TO test_user_01999 +GRANT SELECT(cola) ON db5.`table` TO test_user_01999 GRANT INSERT(colb) ON db6.tb61 TO test_user_01999 GRANT SHOW ON db7.* TO test_user_01999 F diff --git a/tests/queries/0_stateless/02008_materialize_column.sql b/tests/queries/0_stateless/02008_materialize_column.sql index a78920d2525..cc7d3096402 100644 --- a/tests/queries/0_stateless/02008_materialize_column.sql +++ b/tests/queries/0_stateless/02008_materialize_column.sql @@ -17,6 +17,7 @@ ALTER TABLE tmp MATERIALIZE COLUMN s; ALTER TABLE tmp MODIFY COLUMN s String DEFAULT toString(x+2); SELECT arraySort(groupArray(x)), groupArray(s) FROM tmp; +ALTER TABLE tmp CLEAR COLUMN s; -- Need to clear because MATERIALIZE COLUMN won't override past values; ALTER TABLE tmp MATERIALIZE COLUMN s; ALTER TABLE tmp MODIFY COLUMN s String DEFAULT toString(x+3); SELECT arraySort(groupArray(x)), groupArray(s) FROM tmp; diff --git a/tests/queries/0_stateless/02010_array_index_bad_cast.sql b/tests/queries/0_stateless/02010_array_index_bad_cast.sql index 19c58bb28a7..42a6556fc77 100644 --- a/tests/queries/0_stateless/02010_array_index_bad_cast.sql +++ b/tests/queries/0_stateless/02010_array_index_bad_cast.sql @@ -1,2 +1,3 @@ -- This query throws exception about uncomparable data types (but at least it does not introduce bad cast in code). +SET allow_suspicious_low_cardinality_types=1; SELECT has(materialize(CAST(['2021-07-14'] AS Array(LowCardinality(Nullable(DateTime))))), materialize('2021-07-14'::DateTime64(7))); -- { serverError 44 } diff --git a/tests/queries/0_stateless/02021_create_database_with_comment.sh b/tests/queries/0_stateless/02021_create_database_with_comment.sh index 11e62e790b7..8432963e059 100755 --- a/tests/queries/0_stateless/02021_create_database_with_comment.sh +++ b/tests/queries/0_stateless/02021_create_database_with_comment.sh @@ -1,5 +1,7 @@ #!/usr/bin/env bash +CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL=fatal + CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh diff --git a/tests/queries/0_stateless/02096_rename_atomic_hang.sql b/tests/queries/0_stateless/02096_rename_atomic_hang.sql index dec5f3f9506..32d7efec6c2 100644 --- a/tests/queries/0_stateless/02096_rename_atomic_hang.sql +++ b/tests/queries/0_stateless/02096_rename_atomic_hang.sql @@ -1,8 +1,9 @@ -- Tags: no-parallel - +SET send_logs_level = 'fatal'; drop database if exists db_hang; drop database if exists db_hang_temp; set allow_deprecated_database_ordinary=1; +-- Creation of a database with Ordinary engine emits a warning. create database db_hang engine=Ordinary; use db_hang; create table db_hang.test(A Int64) Engine=MergeTree order by A; diff --git a/tests/queries/0_stateless/02116_tuple_element.sql b/tests/queries/0_stateless/02116_tuple_element.sql index 97f6c049705..64d9b9db331 100644 --- a/tests/queries/0_stateless/02116_tuple_element.sql +++ b/tests/queries/0_stateless/02116_tuple_element.sql @@ -19,7 +19,7 @@ SELECT tupleElement(t1) FROM t_tuple_element; -- { serverError NUMBER_OF_ARGUMEN SELECT tupleElement(t1, 'b') FROM t_tuple_element; -- { serverError NOT_FOUND_COLUMN_IN_BLOCK, UNKNOWN_IDENTIFIER } SELECT tupleElement(t1, 0) FROM t_tuple_element; -- { serverError ILLEGAL_INDEX, NOT_FOUND_COLUMN_IN_BLOCK } SELECT tupleElement(t1, 3) FROM t_tuple_element; -- { serverError ILLEGAL_INDEX, NOT_FOUND_COLUMN_IN_BLOCK } -SELECT tupleElement(t1, materialize('a')) FROM t_tuple_element; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT tupleElement(t1, materialize('a')) FROM t_tuple_element; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT, NOT_FOUND_COLUMN_IN_BLOCK } SELECT t2.1 FROM t_tuple_element; EXPLAIN SYNTAX SELECT t2.1 FROM t_tuple_element; @@ -31,7 +31,7 @@ SELECT tupleElement(t2) FROM t_tuple_element; -- { serverError NUMBER_OF_ARGUMEN SELECT tupleElement(t2, 'a') FROM t_tuple_element; -- { serverError NOT_FOUND_COLUMN_IN_BLOCK, UNKNOWN_IDENTIFIER } SELECT tupleElement(t2, 0) FROM t_tuple_element; -- { serverError ILLEGAL_INDEX, NOT_FOUND_COLUMN_IN_BLOCK } SELECT tupleElement(t2, 3) FROM t_tuple_element; -- { serverError ILLEGAL_INDEX, NOT_FOUND_COLUMN_IN_BLOCK } -SELECT tupleElement(t2, materialize(1)) FROM t_tuple_element; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT tupleElement(t2, materialize(1)) FROM t_tuple_element; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT, NOT_FOUND_COLUMN_IN_BLOCK } DROP TABLE t_tuple_element; diff --git a/tests/queries/0_stateless/02117_show_create_table_system.reference b/tests/queries/0_stateless/02117_show_create_table_system.reference index 1b758f4132b..5081527ceef 100644 --- a/tests/queries/0_stateless/02117_show_create_table_system.reference +++ b/tests/queries/0_stateless/02117_show_create_table_system.reference @@ -195,6 +195,8 @@ CREATE TABLE system.disks `unreserved_space` UInt64, `keep_free_space` UInt64, `type` String, + `object_storage_type` String, + `metadata_type` String, `is_encrypted` UInt8, `is_read_only` UInt8, `is_write_once` UInt8, @@ -686,9 +688,6 @@ CREATE TABLE system.projection_parts `rows_where_ttl_info.expression` Array(String), `rows_where_ttl_info.min` Array(DateTime), `rows_where_ttl_info.max` Array(DateTime), - `is_broken` UInt8, - `exception_code` Int32, - `exception` String, `bytes` UInt64 ALIAS bytes_on_disk, `marks_size` UInt64 ALIAS marks_bytes, `part_name` String ALIAS name @@ -1083,6 +1082,7 @@ CREATE TABLE system.tables `data_paths` Array(String), `metadata_path` String, `metadata_modification_time` DateTime, + `metadata_version` Int32, `dependencies_database` Array(String), `dependencies_table` Array(String), `create_table_query` String, diff --git a/tests/queries/0_stateless/02117_show_create_table_system.sql b/tests/queries/0_stateless/02117_show_create_table_system.sql index 32465abbed7..438f26dcca7 100644 --- a/tests/queries/0_stateless/02117_show_create_table_system.sql +++ b/tests/queries/0_stateless/02117_show_create_table_system.sql @@ -1,6 +1,6 @@ /* we will `use system` to bypass style check, because `show create table` statement -cannot fit the requirement in check-sytle, which is as +cannot fit the requirement in check-style, which is as "# Queries to: tables_with_database_column=( diff --git a/tests/queries/0_stateless/02131_row_policies_combination.reference b/tests/queries/0_stateless/02131_row_policies_combination.reference index b76028d5077..5015cb14456 100644 --- a/tests/queries/0_stateless/02131_row_policies_combination.reference +++ b/tests/queries/0_stateless/02131_row_policies_combination.reference @@ -12,6 +12,15 @@ R1, R2, R3: (x == 1) OR (x == 2) OR (x == 3) 1 2 3 +R1, R2, R3 + additional_table_filters and PREWHERE: (x == 1) OR (x == 2) OR (x == 3) AND (x < 3) AND (x > 1) +2 +3 +R1, R2, R3 + additional_result_filter and PREWHERE: (x == 1) OR (x == 2) OR (x == 3) AND (x < 3) AND (x > 1) +2 +3 +R1, R2, R3 + additional_table_filters and WHERE: (x == 1) OR (x == 2) OR (x == 3) AND (x < 3) AND (x > 1) +2 +3 R1, R2, R3, R4: ((x == 1) OR (x == 2) OR (x == 3)) AND (x <= 2) 1 2 diff --git a/tests/queries/0_stateless/02131_row_policies_combination.sql b/tests/queries/0_stateless/02131_row_policies_combination.sql index b5be672bb1b..02f2365eed8 100644 --- a/tests/queries/0_stateless/02131_row_policies_combination.sql +++ b/tests/queries/0_stateless/02131_row_policies_combination.sql @@ -23,6 +23,24 @@ CREATE ROW POLICY 02131_filter_3 ON 02131_rptable USING x=3 AS permissive TO ALL SELECT 'R1, R2, R3: (x == 1) OR (x == 2) OR (x == 3)'; SELECT * FROM 02131_rptable; +SELECT 'R1, R2, R3 + additional_table_filters and PREWHERE: (x == 1) OR (x == 2) OR (x == 3) AND (x < 3) AND (x > 1)'; +SELECT * FROM 02131_rptable +PREWHERE x >= 2 +SETTINGS additional_table_filters = {'02131_rptable': 'x > 1'} +; + +SELECT 'R1, R2, R3 + additional_result_filter and PREWHERE: (x == 1) OR (x == 2) OR (x == 3) AND (x < 3) AND (x > 1)'; +SELECT * FROM 02131_rptable +PREWHERE x >= 2 +SETTINGS additional_result_filter = 'x > 1' +; + +SELECT 'R1, R2, R3 + additional_table_filters and WHERE: (x == 1) OR (x == 2) OR (x == 3) AND (x < 3) AND (x > 1)'; +SELECT * FROM 02131_rptable +WHERE x >= 2 +SETTINGS additional_table_filters = {'02131_rptable': 'x > 1'} +; + CREATE ROW POLICY 02131_filter_4 ON 02131_rptable USING x<=2 AS restrictive TO ALL; SELECT 'R1, R2, R3, R4: ((x == 1) OR (x == 2) OR (x == 3)) AND (x <= 2)'; SELECT * FROM 02131_rptable; diff --git a/tests/queries/0_stateless/02141_clickhouse_local_interactive_table.reference b/tests/queries/0_stateless/02141_clickhouse_local_interactive_table.reference index beda9e36223..0bb8966cbe4 100644 --- a/tests/queries/0_stateless/02141_clickhouse_local_interactive_table.reference +++ b/tests/queries/0_stateless/02141_clickhouse_local_interactive_table.reference @@ -1,2 +1,2 @@ -CREATE TABLE default.table\n(\n `key` String\n)\nENGINE = File(\'TSVWithNamesAndTypes\', \'/dev/null\') -CREATE TABLE foo.table\n(\n `key` String\n)\nENGINE = File(\'TSVWithNamesAndTypes\', \'/dev/null\') +CREATE TABLE default.`table`\n(\n `key` String\n)\nENGINE = File(\'TSVWithNamesAndTypes\', \'/dev/null\') +CREATE TABLE foo.`table`\n(\n `key` String\n)\nENGINE = File(\'TSVWithNamesAndTypes\', \'/dev/null\') diff --git a/tests/queries/0_stateless/02149_read_in_order_fixed_prefix.reference b/tests/queries/0_stateless/02149_read_in_order_fixed_prefix.reference index f3415a34823..d608364e01b 100644 --- a/tests/queries/0_stateless/02149_read_in_order_fixed_prefix.reference +++ b/tests/queries/0_stateless/02149_read_in_order_fixed_prefix.reference @@ -64,8 +64,10 @@ ExpressionTransform (Sorting) (Expression) ExpressionTransform - (ReadFromMergeTree) - MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) 0 → 1 + (Expression) + ExpressionTransform + (ReadFromMergeTree) + MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) 0 → 1 (Expression) ExpressionTransform (Limit) @@ -91,8 +93,10 @@ ExpressionTransform PartialSortingTransform (Expression) ExpressionTransform - (ReadFromMergeTree) - MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) 0 → 1 + (Expression) + ExpressionTransform + (ReadFromMergeTree) + MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) 0 → 1 (Expression) ExpressionTransform (Limit) @@ -115,7 +119,7 @@ SELECT date, i FROM t_read_in_order -PREWHERE date = \'2020-10-12\' +WHERE date = \'2020-10-12\' ORDER BY i DESC LIMIT 5 (Expression) @@ -125,9 +129,11 @@ ExpressionTransform (Sorting) (Expression) ExpressionTransform - (ReadFromMergeTree) - ReverseTransform - MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InReverseOrder) 0 → 1 + (Expression) + ExpressionTransform + (ReadFromMergeTree) + ReverseTransform + MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InReverseOrder) 0 → 1 (Expression) ExpressionTransform (Limit) diff --git a/tests/queries/0_stateless/02152_http_external_tables_memory_tracking.reference b/tests/queries/0_stateless/02152_http_external_tables_memory_tracking.reference deleted file mode 100644 index 1fc09c8d154..00000000000 --- a/tests/queries/0_stateless/02152_http_external_tables_memory_tracking.reference +++ /dev/null @@ -1,16 +0,0 @@ -Checking input_format_parallel_parsing=false& -1 -Checking input_format_parallel_parsing=false&cancel_http_readonly_queries_on_client_close=1&readonly=1 -1 -Checking input_format_parallel_parsing=false&send_progress_in_http_headers=true -1 -Checking input_format_parallel_parsing=false&cancel_http_readonly_queries_on_client_close=1&readonly=1&send_progress_in_http_headers=true -1 -Checking input_format_parallel_parsing=true& -1 -Checking input_format_parallel_parsing=true&cancel_http_readonly_queries_on_client_close=1&readonly=1 -1 -Checking input_format_parallel_parsing=true&send_progress_in_http_headers=true -1 -Checking input_format_parallel_parsing=true&cancel_http_readonly_queries_on_client_close=1&readonly=1&send_progress_in_http_headers=true -1 diff --git a/tests/queries/0_stateless/02152_http_external_tables_memory_tracking.sh b/tests/queries/0_stateless/02152_http_external_tables_memory_tracking.sh deleted file mode 100755 index 5f9eb460e44..00000000000 --- a/tests/queries/0_stateless/02152_http_external_tables_memory_tracking.sh +++ /dev/null @@ -1,51 +0,0 @@ -#!/usr/bin/env bash -# Tags: no-tsan, no-cpu-aarch64, no-parallel -# TSan does not supports tracing. -# trace_log doesn't work on aarch64 - -# Regression for proper release of Context, -# via tracking memory of external tables. - -CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# shellcheck source=../shell_config.sh -. "$CURDIR"/../shell_config.sh - -tmp_file=$(mktemp "$CURDIR/clickhouse.XXXXXX.csv") -trap 'rm $tmp_file' EXIT - -$CLICKHOUSE_CLIENT -q "SELECT toString(number) FROM numbers(1e6) FORMAT TSV" > "$tmp_file" - -function run_and_check() -{ - local query_id - query_id="$(${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" --data-binary @- <<<'SELECT generateUUIDv4()')" - - echo "Checking $*" - - # Run query with external table (implicit StorageMemory user) - $CLICKHOUSE_CURL -sS -F "s=@$tmp_file;" "$CLICKHOUSE_URL&s_structure=key+Int&query=SELECT+count()+FROM+s&memory_profiler_sample_probability=1&max_untracked_memory=0&query_id=$query_id&$*" -o /dev/null - - ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" --data-binary @- <<<'SYSTEM FLUSH LOGS' - - # Check that temporary table had been destroyed. - ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&allow_introspection_functions=1" --data-binary @- <<<" - WITH arrayStringConcat(arrayMap(x -> demangle(addressToSymbol(x)), trace), '\n') AS sym - SELECT count()>0 FROM system.trace_log - WHERE - sym LIKE '%DB::StorageMemory::drop%\n%TemporaryTableHolder::~TemporaryTableHolder%' AND - query_id = '$query_id' - " -} - -for input_format_parallel_parsing in false true; do - query_args_variants=( - "" - "cancel_http_readonly_queries_on_client_close=1&readonly=1" - "send_progress_in_http_headers=true" - # nested progress callback - "cancel_http_readonly_queries_on_client_close=1&readonly=1&send_progress_in_http_headers=true" - ) - for query_args in "${query_args_variants[@]}"; do - run_and_check "input_format_parallel_parsing=$input_format_parallel_parsing&$query_args" - done -done diff --git a/tests/queries/0_stateless/02156_storage_merge_prewhere.reference b/tests/queries/0_stateless/02156_storage_merge_prewhere.reference index 74ba452d783..86a36a9392c 100644 --- a/tests/queries/0_stateless/02156_storage_merge_prewhere.reference +++ b/tests/queries/0_stateless/02156_storage_merge_prewhere.reference @@ -1,12 +1,15 @@ -SELECT count() -FROM t_02156_merge1 -PREWHERE notEmpty(v) AND (k = 3) + Prewhere info + Prewhere filter + Prewhere filter column: and(notEmpty(v), equals(k, 3)) (removed) + Prewhere info + Prewhere filter + Prewhere filter column: and(notEmpty(v), equals(k, 3)) (removed) + Prewhere info + Prewhere filter + Prewhere filter column: and(notEmpty(v), equals(k, 3)) (removed) 2 -SELECT count() -FROM t_02156_merge2 -WHERE (k = 3) AND notEmpty(v) + Filter column: and(equals(k, 3), notEmpty(v)) (removed) 2 -SELECT count() -FROM t_02156_merge3 -WHERE (k = 3) AND notEmpty(v) + Filter column: and(equals(k, 3), notEmpty(v)) (removed) + Filter column: and(equals(k, 3), notEmpty(v)) (removed) 2 diff --git a/tests/queries/0_stateless/02156_storage_merge_prewhere.sql b/tests/queries/0_stateless/02156_storage_merge_prewhere.sql index 83d88a68d9b..ca61a8f2d57 100644 --- a/tests/queries/0_stateless/02156_storage_merge_prewhere.sql +++ b/tests/queries/0_stateless/02156_storage_merge_prewhere.sql @@ -1,5 +1,6 @@ SET optimize_move_to_prewhere = 1; SET enable_multiple_prewhere_read_steps = 1; +SET prefer_localhost_replica = 1; -- Make sure plan is reliable DROP TABLE IF EXISTS t_02156_mt1; DROP TABLE IF EXISTS t_02156_mt2; @@ -23,13 +24,13 @@ INSERT INTO t_02156_mt1 SELECT number, toString(number) FROM numbers(10000); INSERT INTO t_02156_mt2 SELECT number, toString(number) FROM numbers(10000); INSERT INTO t_02156_log SELECT number, toString(number) FROM numbers(10000); -EXPLAIN SYNTAX SELECT count() FROM t_02156_merge1 WHERE k = 3 AND notEmpty(v); +SELECT replaceRegexpAll(explain, '__table1\.|_UInt8', '') FROM (EXPLAIN actions=1 SELECT count() FROM t_02156_merge1 WHERE k = 3 AND notEmpty(v)) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; SELECT count() FROM t_02156_merge1 WHERE k = 3 AND notEmpty(v); -EXPLAIN SYNTAX SELECT count() FROM t_02156_merge2 WHERE k = 3 AND notEmpty(v); +SELECT replaceRegexpAll(explain, '__table1\.|_UInt8', '') FROM (EXPLAIN actions=1 SELECT count() FROM t_02156_merge2 WHERE k = 3 AND notEmpty(v)) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; SELECT count() FROM t_02156_merge2 WHERE k = 3 AND notEmpty(v); -EXPLAIN SYNTAX SELECT count() FROM t_02156_merge3 WHERE k = 3 AND notEmpty(v); +SELECT replaceRegexpAll(explain, '__table1\.|_UInt8', '') FROM (EXPLAIN actions=1 SELECT count() FROM t_02156_merge3 WHERE k = 3 AND notEmpty(v)) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; SELECT count() FROM t_02156_merge3 WHERE k = 3 AND notEmpty(v); DROP TABLE IF EXISTS t_02156_mt1; diff --git a/tests/queries/0_stateless/02161_addressToLineWithInlines.sql b/tests/queries/0_stateless/02161_addressToLineWithInlines.sql index b6b497b4b55..78b414378f1 100644 --- a/tests/queries/0_stateless/02161_addressToLineWithInlines.sql +++ b/tests/queries/0_stateless/02161_addressToLineWithInlines.sql @@ -1,4 +1,4 @@ --- Tags: no-tsan, no-asan, no-ubsan, no-msan, no-debug, no-cpu-aarch64 +-- Tags: no-tsan, no-asan, no-ubsan, no-msan, no-debug SET allow_introspection_functions = 0; SELECT addressToLineWithInlines(1); -- { serverError 446 } diff --git a/tests/queries/0_stateless/02174_cte_scalar_cache_mv.reference b/tests/queries/0_stateless/02174_cte_scalar_cache_mv.reference index 8ec3608317f..dcfab092b5c 100644 --- a/tests/queries/0_stateless/02174_cte_scalar_cache_mv.reference +++ b/tests/queries/0_stateless/02174_cte_scalar_cache_mv.reference @@ -19,6 +19,48 @@ 94 94 94 94 5 99 99 99 99 5 02177_MV 7 80 22 +4 4 4 4 5 +9 9 9 9 5 +14 14 14 14 5 +19 19 19 19 5 +24 24 24 24 5 +29 29 29 29 5 +34 34 34 34 5 +39 39 39 39 5 +44 44 44 44 5 +49 49 49 49 5 +54 54 54 54 5 +59 59 59 59 5 +64 64 64 64 5 +69 69 69 69 5 +74 74 74 74 5 +79 79 79 79 5 +84 84 84 84 5 +89 89 89 89 5 +94 94 94 94 5 +99 99 99 99 5 +02177_MV 0 0 22 +10 +40 +70 +100 +130 +160 +190 +220 +250 +280 +310 +340 +370 +400 +430 +460 +490 +520 +550 +580 +02177_MV_2 0 0 21 10 40 70 @@ -61,3 +103,24 @@ 188 198 02177_MV_3 20 0 1 +8 +18 +28 +38 +48 +58 +68 +78 +88 +98 +108 +118 +128 +138 +148 +158 +168 +178 +188 +198 +02177_MV_3 19 0 2 diff --git a/tests/queries/0_stateless/02174_cte_scalar_cache_mv.sql b/tests/queries/0_stateless/02174_cte_scalar_cache_mv.sql index 742d72fe2b2..ca54b9e1400 100644 --- a/tests/queries/0_stateless/02174_cte_scalar_cache_mv.sql +++ b/tests/queries/0_stateless/02174_cte_scalar_cache_mv.sql @@ -14,6 +14,8 @@ CREATE MATERIALIZED VIEW mv1 TO t2 AS FROM t1 LIMIT 5; +set allow_experimental_analyzer = 0; + -- FIRST INSERT INSERT INTO t1 WITH @@ -58,8 +60,48 @@ WHERE AND query LIKE '-- FIRST INSERT\nINSERT INTO t1\n%' AND event_date >= yesterday() AND event_time > now() - interval 10 minute; +truncate table t2; +set allow_experimental_analyzer = 1; + +-- FIRST INSERT ANALYZER +INSERT INTO t1 +WITH + (SELECT max(i) FROM t1) AS t1 +SELECT + number as i, + t1 + t1 + t1 AS j -- Using global cache +FROM system.numbers +LIMIT 100 +SETTINGS + min_insert_block_size_rows=5, + max_insert_block_size=5, + min_insert_block_size_rows_for_materialized_views=5, + max_block_size=5, + max_threads=1; + +SELECT k, l, m, n, count() +FROM t2 +GROUP BY k, l, m, n +ORDER BY k, l, m, n; + +SYSTEM FLUSH LOGS; + +SELECT + '02177_MV', + ProfileEvents['ScalarSubqueriesGlobalCacheHit'] as scalar_cache_global_hit, + ProfileEvents['ScalarSubqueriesLocalCacheHit'] as scalar_cache_local_hit, + ProfileEvents['ScalarSubqueriesCacheMiss'] as scalar_cache_miss +FROM system.query_log +WHERE + current_database = currentDatabase() + AND type = 'QueryFinish' + AND query LIKE '-- FIRST INSERT ANALYZER\nINSERT INTO t1\n%' + AND event_date >= yesterday() AND event_time > now() - interval 10 minute; + DROP TABLE mv1; +set allow_experimental_analyzer = 0; + CREATE TABLE t3 (z Int64) ENGINE = Memory; CREATE MATERIALIZED VIEW mv2 TO t3 AS SELECT @@ -91,8 +133,36 @@ WHERE AND query LIKE '-- SECOND INSERT\nINSERT INTO t1%' AND event_date >= yesterday() AND event_time > now() - interval 10 minute; +truncate table t3; +set allow_experimental_analyzer = 1; + +-- SECOND INSERT ANALYZER +INSERT INTO t1 +SELECT 0 as i, number as j from numbers(100) +SETTINGS + min_insert_block_size_rows=5, + max_insert_block_size=5, + min_insert_block_size_rows_for_materialized_views=5, + max_block_size=5, + max_threads=1; + +SELECT * FROM t3 ORDER BY z ASC; +SYSTEM FLUSH LOGS; +SELECT + '02177_MV_2', + ProfileEvents['ScalarSubqueriesGlobalCacheHit'] as scalar_cache_global_hit, + ProfileEvents['ScalarSubqueriesLocalCacheHit'] as scalar_cache_local_hit, + ProfileEvents['ScalarSubqueriesCacheMiss'] as scalar_cache_miss +FROM system.query_log +WHERE + current_database = currentDatabase() + AND type = 'QueryFinish' + AND query LIKE '-- SECOND INSERT ANALYZER\nINSERT INTO t1%' + AND event_date >= yesterday() AND event_time > now() - interval 10 minute; + DROP TABLE mv2; +set allow_experimental_analyzer = 0; CREATE TABLE t4 (z Int64) ENGINE = Memory; CREATE MATERIALIZED VIEW mv3 TO t4 AS @@ -126,6 +196,35 @@ WHERE AND query LIKE '-- THIRD INSERT\nINSERT INTO t1%' AND event_date >= yesterday() AND event_time > now() - interval 10 minute; +truncate table t4; +set allow_experimental_analyzer = 1; + +-- THIRD INSERT ANALYZER +INSERT INTO t1 +SELECT number as i, number as j from numbers(100) + SETTINGS + min_insert_block_size_rows=5, + max_insert_block_size=5, + min_insert_block_size_rows_for_materialized_views=5, + max_block_size=5, + max_threads=1; +SYSTEM FLUSH LOGS; + +SELECT * FROM t4 ORDER BY z ASC; + +SELECT + '02177_MV_3', + ProfileEvents['ScalarSubqueriesGlobalCacheHit'] as scalar_cache_global_hit, + ProfileEvents['ScalarSubqueriesLocalCacheHit'] as scalar_cache_local_hit, + ProfileEvents['ScalarSubqueriesCacheMiss'] as scalar_cache_miss +FROM system.query_log +WHERE + current_database = currentDatabase() + AND type = 'QueryFinish' + AND query LIKE '-- THIRD INSERT ANALYZER\nINSERT INTO t1%' + AND event_date >= yesterday() AND event_time > now() - interval 10 minute; + + DROP TABLE mv3; DROP TABLE t1; DROP TABLE t2; diff --git a/tests/queries/0_stateless/02181_detect_output_format_by_file_extension.reference b/tests/queries/0_stateless/02181_detect_output_format_by_file_extension.reference new file mode 100644 index 00000000000..7b36cc96f5e --- /dev/null +++ b/tests/queries/0_stateless/02181_detect_output_format_by_file_extension.reference @@ -0,0 +1,20 @@ +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 diff --git a/tests/queries/0_stateless/02181_detect_output_format_by_file_extension.sh b/tests/queries/0_stateless/02181_detect_output_format_by_file_extension.sh new file mode 100755 index 00000000000..ec1edd710a1 --- /dev/null +++ b/tests/queries/0_stateless/02181_detect_output_format_by_file_extension.sh @@ -0,0 +1,13 @@ +#!/usr/bin/env bash +# Tags: no-parallel, no-fasttest + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + + +$CLICKHOUSE_LOCAL -q "select * from numbers(10)" > $CLICKHOUSE_TMP/data.parquet +$CLICKHOUSE_LOCAL -q "select * from table" < $CLICKHOUSE_TMP/data.parquet + +$CLICKHOUSE_CLIENT -q "select * from numbers(10)" > $CLICKHOUSE_TMP/data.parquet +$CLICKHOUSE_LOCAL -q "select * from table" < $CLICKHOUSE_TMP/data.parquet diff --git a/tests/queries/0_stateless/02181_format_describe_query.reference b/tests/queries/0_stateless/02181_format_describe_query.reference index 328ea19bd37..238a83df50a 100644 --- a/tests/queries/0_stateless/02181_format_describe_query.reference +++ b/tests/queries/0_stateless/02181_format_describe_query.reference @@ -1,3 +1,3 @@ DESCRIBE TABLE file('data.csv') -DESCRIBE TABLE table +DESCRIBE TABLE `table` DESCRIBE TABLE file('data.csv') diff --git a/tests/queries/0_stateless/02184_default_table_engine.reference b/tests/queries/0_stateless/02184_default_table_engine.reference index 495b9627acb..83760a178bd 100644 --- a/tests/queries/0_stateless/02184_default_table_engine.reference +++ b/tests/queries/0_stateless/02184_default_table_engine.reference @@ -9,7 +9,7 @@ CREATE TABLE default.numbers1\n(\n `number` UInt64\n)\nENGINE = Memory CREATE TABLE default.numbers2\n(\n `number` UInt64\n)\nENGINE = MergeTree\nORDER BY intHash32(number)\nSAMPLE BY intHash32(number)\nSETTINGS index_granularity = 8192 45 CREATE TABLE default.numbers3\n(\n `number` UInt64\n)\nENGINE = Log -CREATE MATERIALIZED VIEW default.test_view_filtered\n(\n `EventDate` Date,\n `CounterID` UInt32\n)\nENGINE = Memory AS\nSELECT\n CounterID,\n EventDate\nFROM default.test_table\nWHERE EventDate < \'2013-01-01\' +CREATE MATERIALIZED VIEW default.test_view_filtered\n(\n `EventDate` Date,\n `CounterID` UInt32\n)\nENGINE = Memory\nAS SELECT\n CounterID,\n EventDate\nFROM default.test_table\nWHERE EventDate < \'2013-01-01\' 2014-01-02 0 0 1969-12-31 16:00:00 2014-01-02 03:04:06 1 2014-01-01 19:04:06 CREATE TABLE default.t1\n(\n `Rows` UInt64,\n `MaxHitTime` DateTime(\'UTC\')\n)\nENGINE = MergeTree\nORDER BY Rows\nSETTINGS index_granularity = 8192 diff --git a/tests/queries/0_stateless/02184_nested_tuple.sql b/tests/queries/0_stateless/02184_nested_tuple.sql index 67a20e3dce1..09ed8eb7200 100644 --- a/tests/queries/0_stateless/02184_nested_tuple.sql +++ b/tests/queries/0_stateless/02184_nested_tuple.sql @@ -1,3 +1,4 @@ +SET allow_suspicious_low_cardinality_types=1; DROP TABLE IF EXISTS t_nested_tuple; CREATE TABLE t_nested_tuple diff --git a/tests/queries/0_stateless/02185_orc_corrupted_file.sh b/tests/queries/0_stateless/02185_orc_corrupted_file.sh index 1987f094faa..12510ae3836 100755 --- a/tests/queries/0_stateless/02185_orc_corrupted_file.sh +++ b/tests/queries/0_stateless/02185_orc_corrupted_file.sh @@ -8,4 +8,4 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') cp $CUR_DIR/data_orc/corrupted.orc $USER_FILES_PATH/ -${CLICKHOUSE_CLIENT} --query="select * from file('corrupted.orc')" 2>&1 | grep -F -q 'Cannot extract table structure' && echo 'OK' || echo 'FAIL' +${CLICKHOUSE_CLIENT} --query="select * from file('corrupted.orc')" 2>&1 | grep -F -q 'CANNOT_EXTRACT_TABLE_STRUCTURE' && echo 'OK' || echo 'FAIL' diff --git a/tests/queries/0_stateless/02205_HTTP_user_agent.python b/tests/queries/0_stateless/02205_HTTP_user_agent.python index d8f8a32b6db..83089741bf2 100644 --- a/tests/queries/0_stateless/02205_HTTP_user_agent.python +++ b/tests/queries/0_stateless/02205_HTTP_user_agent.python @@ -1,6 +1,7 @@ #!/usr/bin/env python3 from http.server import SimpleHTTPRequestHandler, HTTPServer +from socketserver import ThreadingMixIn import socket import sys import threading @@ -116,11 +117,19 @@ class HTTPServerV6(HTTPServer): address_family = socket.AF_INET6 +class ThreadedHTTPServer(ThreadingMixIn, HTTPServer): + pass + + +class ThreadedHTTPServerV6(ThreadingMixIn, HTTPServerV6): + pass + + def start_server(requests_amount): if IS_IPV6: - httpd = HTTPServerV6(HTTP_SERVER_ADDRESS, HttpProcessor) + httpd = ThreadedHTTPServerV6(HTTP_SERVER_ADDRESS, HttpProcessor) else: - httpd = HTTPServer(HTTP_SERVER_ADDRESS, HttpProcessor) + httpd = ThreadedHTTPServer(HTTP_SERVER_ADDRESS, HttpProcessor) def real_func(): for i in range(requests_amount): diff --git a/tests/queries/0_stateless/02206_information_schema_show_database.reference b/tests/queries/0_stateless/02206_information_schema_show_database.reference index fcc41e771b3..2cc93c56e82 100644 --- a/tests/queries/0_stateless/02206_information_schema_show_database.reference +++ b/tests/queries/0_stateless/02206_information_schema_show_database.reference @@ -1,6 +1,6 @@ CREATE DATABASE INFORMATION_SCHEMA\nENGINE = Memory -CREATE VIEW INFORMATION_SCHEMA.COLUMNS\n(\n `table_catalog` String,\n `table_schema` String,\n `table_name` String,\n `column_name` String,\n `ordinal_position` UInt64,\n `column_default` String,\n `is_nullable` String,\n `data_type` String,\n `character_maximum_length` Nullable(UInt64),\n `character_octet_length` Nullable(UInt64),\n `numeric_precision` Nullable(UInt64),\n `numeric_precision_radix` Nullable(UInt64),\n `numeric_scale` Nullable(UInt64),\n `datetime_precision` Nullable(UInt64),\n `character_set_catalog` Nullable(String),\n `character_set_schema` Nullable(String),\n `character_set_name` Nullable(String),\n `collation_catalog` Nullable(String),\n `collation_schema` Nullable(String),\n `collation_name` Nullable(String),\n `domain_catalog` Nullable(String),\n `domain_schema` Nullable(String),\n `domain_name` Nullable(String),\n `extra` Nullable(String),\n `column_comment` String,\n `column_type` String,\n `TABLE_CATALOG` String,\n `TABLE_SCHEMA` String,\n `TABLE_NAME` String,\n `COLUMN_NAME` String,\n `ORDINAL_POSITION` UInt64,\n `COLUMN_DEFAULT` String,\n `IS_NULLABLE` String,\n `DATA_TYPE` String,\n `CHARACTER_MAXIMUM_LENGTH` Nullable(UInt64),\n `CHARACTER_OCTET_LENGTH` Nullable(UInt64),\n `NUMERIC_PRECISION` Nullable(UInt64),\n `NUMERIC_PRECISION_RADIX` Nullable(UInt64),\n `NUMERIC_SCALE` Nullable(UInt64),\n `DATETIME_PRECISION` Nullable(UInt64),\n `CHARACTER_SET_CATALOG` Nullable(String),\n `CHARACTER_SET_SCHEMA` Nullable(String),\n `CHARACTER_SET_NAME` Nullable(String),\n `COLLATION_CATALOG` Nullable(String),\n `COLLATION_SCHEMA` Nullable(String),\n `COLLATION_NAME` Nullable(String),\n `DOMAIN_CATALOG` Nullable(String),\n `DOMAIN_SCHEMA` Nullable(String),\n `DOMAIN_NAME` Nullable(String),\n `EXTRA` Nullable(String),\n `COLUMN_COMMENT` String,\n `COLUMN_TYPE` String\n) AS\nSELECT\n database AS table_catalog,\n database AS table_schema,\n table AS table_name,\n name AS column_name,\n position AS ordinal_position,\n default_expression AS column_default,\n type LIKE \'Nullable(%)\' AS is_nullable,\n type AS data_type,\n character_octet_length AS character_maximum_length,\n character_octet_length,\n numeric_precision,\n numeric_precision_radix,\n numeric_scale,\n datetime_precision,\n NULL AS character_set_catalog,\n NULL AS character_set_schema,\n NULL AS character_set_name,\n NULL AS collation_catalog,\n NULL AS collation_schema,\n NULL AS collation_name,\n NULL AS domain_catalog,\n NULL AS domain_schema,\n NULL AS domain_name,\n multiIf(default_kind = \'DEFAULT\', \'DEFAULT_GENERATED\', default_kind = \'MATERIALIZED\', \'STORED GENERATED\', default_kind = \'ALIAS\', \'VIRTUAL GENERATED\', \'\') AS extra,\n comment AS column_comment,\n type AS column_type,\n table_catalog AS TABLE_CATALOG,\n table_schema AS TABLE_SCHEMA,\n table_name AS TABLE_NAME,\n column_name AS COLUMN_NAME,\n ordinal_position AS ORDINAL_POSITION,\n column_default AS COLUMN_DEFAULT,\n is_nullable AS IS_NULLABLE,\n data_type AS DATA_TYPE,\n character_maximum_length AS CHARACTER_MAXIMUM_LENGTH,\n character_octet_length AS CHARACTER_OCTET_LENGTH,\n numeric_precision AS NUMERIC_PRECISION,\n numeric_precision_radix AS NUMERIC_PRECISION_RADIX,\n numeric_scale AS NUMERIC_SCALE,\n datetime_precision AS DATETIME_PRECISION,\n character_set_catalog AS CHARACTER_SET_CATALOG,\n character_set_schema AS CHARACTER_SET_SCHEMA,\n character_set_name AS CHARACTER_SET_NAME,\n collation_catalog AS COLLATION_CATALOG,\n collation_schema AS COLLATION_SCHEMA,\n collation_name AS COLLATION_NAME,\n domain_catalog AS DOMAIN_CATALOG,\n domain_schema AS DOMAIN_SCHEMA,\n domain_name AS DOMAIN_NAME,\n extra AS EXTRA,\n column_comment AS COLUMN_COMMENT,\n column_type AS COLUMN_TYPE\nFROM system.columns -CREATE VIEW INFORMATION_SCHEMA.TABLES (`table_catalog` String, `table_schema` String, `table_name` String, `table_type` String, `table_rows` Nullable(UInt64), `data_length` Nullable(UInt64), `table_collation` Nullable(String), `table_comment` Nullable(String), `TABLE_CATALOG` String, `TABLE_SCHEMA` String, `TABLE_NAME` String, `TABLE_TYPE` String, `TABLE_ROWS` Nullable(UInt64), `DATA_LENGTH` Nullable(UInt64), `TABLE_COLLATION` Nullable(String), `TABLE_COMMENT` Nullable(String)) AS SELECT database AS table_catalog, database AS table_schema, name AS table_name, multiIf(is_temporary, \'LOCAL TEMPORARY\', engine LIKE \'%View\', \'VIEW\', engine LIKE \'System%\', \'SYSTEM VIEW\', has_own_data = 0, \'FOREIGN TABLE\', \'BASE TABLE\') AS table_type, total_rows AS table_rows, total_bytes AS data_length, \'utf8mb4_0900_ai_ci\' AS table_collation, comment AS table_comment, table_catalog AS TABLE_CATALOG, table_schema AS TABLE_SCHEMA, table_name AS TABLE_NAME, table_type AS TABLE_TYPE, table_rows AS TABLE_ROWS, data_length AS DATA_LENGTH, table_collation AS TABLE_COLLATION, table_comment AS TABLE_COMMENT FROM system.tables -CREATE VIEW INFORMATION_SCHEMA.tables (`table_catalog` String, `table_schema` String, `table_name` String, `table_type` String, `table_rows` Nullable(UInt64), `data_length` Nullable(UInt64), `table_collation` Nullable(String), `table_comment` Nullable(String), `TABLE_CATALOG` String, `TABLE_SCHEMA` String, `TABLE_NAME` String, `TABLE_TYPE` String, `TABLE_ROWS` Nullable(UInt64), `DATA_LENGTH` Nullable(UInt64), `TABLE_COLLATION` Nullable(String), `TABLE_COMMENT` Nullable(String)) AS SELECT database AS table_catalog, database AS table_schema, name AS table_name, multiIf(is_temporary, \'LOCAL TEMPORARY\', engine LIKE \'%View\', \'VIEW\', engine LIKE \'System%\', \'SYSTEM VIEW\', has_own_data = 0, \'FOREIGN TABLE\', \'BASE TABLE\') AS table_type, total_rows AS table_rows, total_bytes AS data_length, \'utf8mb4_0900_ai_ci\' AS table_collation, comment AS table_comment, table_catalog AS TABLE_CATALOG, table_schema AS TABLE_SCHEMA, table_name AS TABLE_NAME, table_type AS TABLE_TYPE, table_rows AS TABLE_ROWS, data_length AS DATA_LENGTH, table_collation AS TABLE_COLLATION, table_comment AS TABLE_COMMENT FROM system.tables -CREATE VIEW information_schema.TABLES (`table_catalog` String, `table_schema` String, `table_name` String, `table_type` String, `table_rows` Nullable(UInt64), `data_length` Nullable(UInt64), `table_collation` Nullable(String), `table_comment` Nullable(String), `TABLE_CATALOG` String, `TABLE_SCHEMA` String, `TABLE_NAME` String, `TABLE_TYPE` String, `TABLE_ROWS` Nullable(UInt64), `DATA_LENGTH` Nullable(UInt64), `TABLE_COLLATION` Nullable(String), `TABLE_COMMENT` Nullable(String)) AS SELECT database AS table_catalog, database AS table_schema, name AS table_name, multiIf(is_temporary, \'LOCAL TEMPORARY\', engine LIKE \'%View\', \'VIEW\', engine LIKE \'System%\', \'SYSTEM VIEW\', has_own_data = 0, \'FOREIGN TABLE\', \'BASE TABLE\') AS table_type, total_rows AS table_rows, total_bytes AS data_length, \'utf8mb4_0900_ai_ci\' AS table_collation, comment AS table_comment, table_catalog AS TABLE_CATALOG, table_schema AS TABLE_SCHEMA, table_name AS TABLE_NAME, table_type AS TABLE_TYPE, table_rows AS TABLE_ROWS, data_length AS DATA_LENGTH, table_collation AS TABLE_COLLATION, table_comment AS TABLE_COMMENT FROM system.tables -CREATE VIEW information_schema.tables (`table_catalog` String, `table_schema` String, `table_name` String, `table_type` String, `table_rows` Nullable(UInt64), `data_length` Nullable(UInt64), `table_collation` Nullable(String), `table_comment` Nullable(String), `TABLE_CATALOG` String, `TABLE_SCHEMA` String, `TABLE_NAME` String, `TABLE_TYPE` String, `TABLE_ROWS` Nullable(UInt64), `DATA_LENGTH` Nullable(UInt64), `TABLE_COLLATION` Nullable(String), `TABLE_COMMENT` Nullable(String)) AS SELECT database AS table_catalog, database AS table_schema, name AS table_name, multiIf(is_temporary, \'LOCAL TEMPORARY\', engine LIKE \'%View\', \'VIEW\', engine LIKE \'System%\', \'SYSTEM VIEW\', has_own_data = 0, \'FOREIGN TABLE\', \'BASE TABLE\') AS table_type, total_rows AS table_rows, total_bytes AS data_length, \'utf8mb4_0900_ai_ci\' AS table_collation, comment AS table_comment, table_catalog AS TABLE_CATALOG, table_schema AS TABLE_SCHEMA, table_name AS TABLE_NAME, table_type AS TABLE_TYPE, table_rows AS TABLE_ROWS, data_length AS DATA_LENGTH, table_collation AS TABLE_COLLATION, table_comment AS TABLE_COMMENT FROM system.tables +CREATE VIEW INFORMATION_SCHEMA.COLUMNS\n(\n `table_catalog` String,\n `table_schema` String,\n `table_name` String,\n `column_name` String,\n `ordinal_position` UInt64,\n `column_default` String,\n `is_nullable` String,\n `data_type` String,\n `character_maximum_length` Nullable(UInt64),\n `character_octet_length` Nullable(UInt64),\n `numeric_precision` Nullable(UInt64),\n `numeric_precision_radix` Nullable(UInt64),\n `numeric_scale` Nullable(UInt64),\n `datetime_precision` Nullable(UInt64),\n `character_set_catalog` Nullable(String),\n `character_set_schema` Nullable(String),\n `character_set_name` Nullable(String),\n `collation_catalog` Nullable(String),\n `collation_schema` Nullable(String),\n `collation_name` Nullable(String),\n `domain_catalog` Nullable(String),\n `domain_schema` Nullable(String),\n `domain_name` Nullable(String),\n `extra` Nullable(String),\n `column_comment` String,\n `column_type` String,\n `TABLE_CATALOG` String,\n `TABLE_SCHEMA` String,\n `TABLE_NAME` String,\n `COLUMN_NAME` String,\n `ORDINAL_POSITION` UInt64,\n `COLUMN_DEFAULT` String,\n `IS_NULLABLE` String,\n `DATA_TYPE` String,\n `CHARACTER_MAXIMUM_LENGTH` Nullable(UInt64),\n `CHARACTER_OCTET_LENGTH` Nullable(UInt64),\n `NUMERIC_PRECISION` Nullable(UInt64),\n `NUMERIC_PRECISION_RADIX` Nullable(UInt64),\n `NUMERIC_SCALE` Nullable(UInt64),\n `DATETIME_PRECISION` Nullable(UInt64),\n `CHARACTER_SET_CATALOG` Nullable(String),\n `CHARACTER_SET_SCHEMA` Nullable(String),\n `CHARACTER_SET_NAME` Nullable(String),\n `COLLATION_CATALOG` Nullable(String),\n `COLLATION_SCHEMA` Nullable(String),\n `COLLATION_NAME` Nullable(String),\n `DOMAIN_CATALOG` Nullable(String),\n `DOMAIN_SCHEMA` Nullable(String),\n `DOMAIN_NAME` Nullable(String),\n `EXTRA` Nullable(String),\n `COLUMN_COMMENT` String,\n `COLUMN_TYPE` String\n)\nSQL SECURITY INVOKER\nAS SELECT\n database AS table_catalog,\n database AS table_schema,\n `table` AS table_name,\n name AS column_name,\n position AS ordinal_position,\n default_expression AS column_default,\n type LIKE \'Nullable(%)\' AS is_nullable,\n type AS data_type,\n character_octet_length AS character_maximum_length,\n character_octet_length,\n numeric_precision,\n numeric_precision_radix,\n numeric_scale,\n datetime_precision,\n NULL AS character_set_catalog,\n NULL AS character_set_schema,\n NULL AS character_set_name,\n NULL AS collation_catalog,\n NULL AS collation_schema,\n NULL AS collation_name,\n NULL AS domain_catalog,\n NULL AS domain_schema,\n NULL AS domain_name,\n multiIf(default_kind = \'DEFAULT\', \'DEFAULT_GENERATED\', default_kind = \'MATERIALIZED\', \'STORED GENERATED\', default_kind = \'ALIAS\', \'VIRTUAL GENERATED\', \'\') AS extra,\n comment AS column_comment,\n type AS column_type,\n table_catalog AS TABLE_CATALOG,\n table_schema AS TABLE_SCHEMA,\n table_name AS TABLE_NAME,\n column_name AS COLUMN_NAME,\n ordinal_position AS ORDINAL_POSITION,\n column_default AS COLUMN_DEFAULT,\n is_nullable AS IS_NULLABLE,\n data_type AS DATA_TYPE,\n character_maximum_length AS CHARACTER_MAXIMUM_LENGTH,\n character_octet_length AS CHARACTER_OCTET_LENGTH,\n numeric_precision AS NUMERIC_PRECISION,\n numeric_precision_radix AS NUMERIC_PRECISION_RADIX,\n numeric_scale AS NUMERIC_SCALE,\n datetime_precision AS DATETIME_PRECISION,\n character_set_catalog AS CHARACTER_SET_CATALOG,\n character_set_schema AS CHARACTER_SET_SCHEMA,\n character_set_name AS CHARACTER_SET_NAME,\n collation_catalog AS COLLATION_CATALOG,\n collation_schema AS COLLATION_SCHEMA,\n collation_name AS COLLATION_NAME,\n domain_catalog AS DOMAIN_CATALOG,\n domain_schema AS DOMAIN_SCHEMA,\n domain_name AS DOMAIN_NAME,\n extra AS EXTRA,\n column_comment AS COLUMN_COMMENT,\n column_type AS COLUMN_TYPE\nFROM system.columns +CREATE VIEW INFORMATION_SCHEMA.TABLES (`table_catalog` String, `table_schema` String, `table_name` String, `table_type` String, `table_rows` Nullable(UInt64), `data_length` Nullable(UInt64), `table_collation` Nullable(String), `table_comment` Nullable(String), `TABLE_CATALOG` String, `TABLE_SCHEMA` String, `TABLE_NAME` String, `TABLE_TYPE` String, `TABLE_ROWS` Nullable(UInt64), `DATA_LENGTH` Nullable(UInt64), `TABLE_COLLATION` Nullable(String), `TABLE_COMMENT` Nullable(String)) SQL SECURITY INVOKER AS SELECT database AS table_catalog, database AS table_schema, name AS table_name, multiIf(is_temporary, \'LOCAL TEMPORARY\', engine LIKE \'%View\', \'VIEW\', engine LIKE \'System%\', \'SYSTEM VIEW\', has_own_data = 0, \'FOREIGN TABLE\', \'BASE TABLE\') AS table_type, total_rows AS table_rows, total_bytes AS data_length, \'utf8mb4_0900_ai_ci\' AS table_collation, comment AS table_comment, table_catalog AS TABLE_CATALOG, table_schema AS TABLE_SCHEMA, table_name AS TABLE_NAME, table_type AS TABLE_TYPE, table_rows AS TABLE_ROWS, data_length AS DATA_LENGTH, table_collation AS TABLE_COLLATION, table_comment AS TABLE_COMMENT FROM system.tables +CREATE VIEW INFORMATION_SCHEMA.tables (`table_catalog` String, `table_schema` String, `table_name` String, `table_type` String, `table_rows` Nullable(UInt64), `data_length` Nullable(UInt64), `table_collation` Nullable(String), `table_comment` Nullable(String), `TABLE_CATALOG` String, `TABLE_SCHEMA` String, `TABLE_NAME` String, `TABLE_TYPE` String, `TABLE_ROWS` Nullable(UInt64), `DATA_LENGTH` Nullable(UInt64), `TABLE_COLLATION` Nullable(String), `TABLE_COMMENT` Nullable(String)) SQL SECURITY INVOKER AS SELECT database AS table_catalog, database AS table_schema, name AS table_name, multiIf(is_temporary, \'LOCAL TEMPORARY\', engine LIKE \'%View\', \'VIEW\', engine LIKE \'System%\', \'SYSTEM VIEW\', has_own_data = 0, \'FOREIGN TABLE\', \'BASE TABLE\') AS table_type, total_rows AS table_rows, total_bytes AS data_length, \'utf8mb4_0900_ai_ci\' AS table_collation, comment AS table_comment, table_catalog AS TABLE_CATALOG, table_schema AS TABLE_SCHEMA, table_name AS TABLE_NAME, table_type AS TABLE_TYPE, table_rows AS TABLE_ROWS, data_length AS DATA_LENGTH, table_collation AS TABLE_COLLATION, table_comment AS TABLE_COMMENT FROM system.tables +CREATE VIEW information_schema.TABLES (`table_catalog` String, `table_schema` String, `table_name` String, `table_type` String, `table_rows` Nullable(UInt64), `data_length` Nullable(UInt64), `table_collation` Nullable(String), `table_comment` Nullable(String), `TABLE_CATALOG` String, `TABLE_SCHEMA` String, `TABLE_NAME` String, `TABLE_TYPE` String, `TABLE_ROWS` Nullable(UInt64), `DATA_LENGTH` Nullable(UInt64), `TABLE_COLLATION` Nullable(String), `TABLE_COMMENT` Nullable(String)) SQL SECURITY INVOKER AS SELECT database AS table_catalog, database AS table_schema, name AS table_name, multiIf(is_temporary, \'LOCAL TEMPORARY\', engine LIKE \'%View\', \'VIEW\', engine LIKE \'System%\', \'SYSTEM VIEW\', has_own_data = 0, \'FOREIGN TABLE\', \'BASE TABLE\') AS table_type, total_rows AS table_rows, total_bytes AS data_length, \'utf8mb4_0900_ai_ci\' AS table_collation, comment AS table_comment, table_catalog AS TABLE_CATALOG, table_schema AS TABLE_SCHEMA, table_name AS TABLE_NAME, table_type AS TABLE_TYPE, table_rows AS TABLE_ROWS, data_length AS DATA_LENGTH, table_collation AS TABLE_COLLATION, table_comment AS TABLE_COMMENT FROM system.tables +CREATE VIEW information_schema.tables (`table_catalog` String, `table_schema` String, `table_name` String, `table_type` String, `table_rows` Nullable(UInt64), `data_length` Nullable(UInt64), `table_collation` Nullable(String), `table_comment` Nullable(String), `TABLE_CATALOG` String, `TABLE_SCHEMA` String, `TABLE_NAME` String, `TABLE_TYPE` String, `TABLE_ROWS` Nullable(UInt64), `DATA_LENGTH` Nullable(UInt64), `TABLE_COLLATION` Nullable(String), `TABLE_COMMENT` Nullable(String)) SQL SECURITY INVOKER AS SELECT database AS table_catalog, database AS table_schema, name AS table_name, multiIf(is_temporary, \'LOCAL TEMPORARY\', engine LIKE \'%View\', \'VIEW\', engine LIKE \'System%\', \'SYSTEM VIEW\', has_own_data = 0, \'FOREIGN TABLE\', \'BASE TABLE\') AS table_type, total_rows AS table_rows, total_bytes AS data_length, \'utf8mb4_0900_ai_ci\' AS table_collation, comment AS table_comment, table_catalog AS TABLE_CATALOG, table_schema AS TABLE_SCHEMA, table_name AS TABLE_NAME, table_type AS TABLE_TYPE, table_rows AS TABLE_ROWS, data_length AS DATA_LENGTH, table_collation AS TABLE_COLLATION, table_comment AS TABLE_COMMENT FROM system.tables diff --git a/tests/queries/0_stateless/02221_system_zookeeper_unrestricted_like.sh b/tests/queries/0_stateless/02221_system_zookeeper_unrestricted_like.sh index 152d8344764..c62ec14b340 100755 --- a/tests/queries/0_stateless/02221_system_zookeeper_unrestricted_like.sh +++ b/tests/queries/0_stateless/02221_system_zookeeper_unrestricted_like.sh @@ -13,7 +13,7 @@ ${CLICKHOUSE_CLIENT} -n --query="CREATE TABLE sample_table ( ) ENGINE ReplicatedMergeTree('/clickhouse/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/02221_system_zookeeper_unrestricted_like', '1') ORDER BY tuple(); -DROP TABLE IF EXISTS sample_table;" +DROP TABLE IF EXISTS sample_table SYNC;" ${CLICKHOUSE_CLIENT} -n --query "CREATE TABLE sample_table_2 ( diff --git a/tests/queries/0_stateless/02227_union_match_by_name.reference b/tests/queries/0_stateless/02227_union_match_by_name.reference index c28035fab49..d726ae86de7 100644 --- a/tests/queries/0_stateless/02227_union_match_by_name.reference +++ b/tests/queries/0_stateless/02227_union_match_by_name.reference @@ -36,7 +36,7 @@ Header: avgWeighted(x, y) Nullable(Float64) Header: x Nullable(Nothing) y UInt8 Expression (Projection) - Header: NULL_Nullable(Nothing) Nullable(Nothing) + Header: _CAST(NULL_Nullable(Nothing), \'Nullable(Nothing)\'_String) Nullable(Nothing) 1_UInt8 UInt8 Expression (Change column names to column identifiers) Header: __table5.dummy UInt8 diff --git a/tests/queries/0_stateless/02233_HTTP_ranged.python b/tests/queries/0_stateless/02233_HTTP_ranged.python index 66ef3304098..5d06e4824b1 100644 --- a/tests/queries/0_stateless/02233_HTTP_ranged.python +++ b/tests/queries/0_stateless/02233_HTTP_ranged.python @@ -1,6 +1,7 @@ #!/usr/bin/env python3 from http.server import BaseHTTPRequestHandler, HTTPServer +from socketserver import ThreadingMixIn import socket import sys import re @@ -206,13 +207,22 @@ class HTTPServerV6(HTTPServer): address_family = socket.AF_INET6 +class ThreadedHTTPServer(ThreadingMixIn, HTTPServer): + pass + + +class ThreadedHTTPServerV6(ThreadingMixIn, HTTPServerV6): + pass + + def start_server(): if IS_IPV6: - httpd = HTTPServerV6(HTTP_SERVER_ADDRESS, HttpProcessor) + httpd = ThreadedHTTPServerV6(HTTP_SERVER_ADDRESS, HttpProcessor) else: - httpd = HTTPServer(HTTP_SERVER_ADDRESS, HttpProcessor) + httpd = ThreadedHTTPServer(HTTP_SERVER_ADDRESS, HttpProcessor) t = threading.Thread(target=httpd.serve_forever) + t.start() return t, httpd @@ -235,8 +245,6 @@ def run_test(allow_range, settings, check_retries=False): HttpProcessor.responses_to_get = ["500", "200", "206"] retries_num = len(HttpProcessor.responses_to_get) - t, httpd = start_server() - t.start() test_select(settings) download_buffer_size = settings["max_download_buffer_size"] @@ -261,12 +269,12 @@ def run_test(allow_range, settings, check_retries=False): if HttpProcessor.range_used: raise Exception("HTTP Range used while not supported") - httpd.shutdown() - t.join() print("PASSED") def main(): + t, httpd = start_server() + settings = {"max_download_buffer_size": 20} # Test Accept-Ranges=False @@ -285,10 +293,15 @@ def main(): settings["max_download_threads"] = 2 run_test(allow_range=True, settings=settings, check_retries=True) + httpd.shutdown() + t.join() + if __name__ == "__main__": try: main() + sys.stdout.flush() + os._exit(0) except Exception as ex: exc_type, exc_value, exc_traceback = sys.exc_info() traceback.print_tb(exc_traceback, file=sys.stderr) diff --git a/tests/queries/0_stateless/02235_add_part_offset_virtual_column.reference b/tests/queries/0_stateless/02235_add_part_offset_virtual_column.reference index 2455f50b7f2..e88abb35ab4 100644 --- a/tests/queries/0_stateless/02235_add_part_offset_virtual_column.reference +++ b/tests/queries/0_stateless/02235_add_part_offset_virtual_column.reference @@ -50,6 +50,8 @@ SOME GRANULES FILTERED OUT 100002 foo PREWHERE 301408 164953047376 164953047376 +335872 166463369216 166463369216 +301407 164952947376 164952947376 42 10042 20042 diff --git a/tests/queries/0_stateless/02235_add_part_offset_virtual_column.sql b/tests/queries/0_stateless/02235_add_part_offset_virtual_column.sql index 73ae6eb499f..5af6565c03d 100644 --- a/tests/queries/0_stateless/02235_add_part_offset_virtual_column.sql +++ b/tests/queries/0_stateless/02235_add_part_offset_virtual_column.sql @@ -52,7 +52,7 @@ SELECT _part_offset, foo FROM t_1 where granule == 0 AND _part_offset >= 100000 SELECT 'PREWHERE'; SELECT count(*), sum(_part_offset), sum(order_0) from t_1 prewhere granule == 0 where _part_offset >= 100000; -SELECT count(*), sum(_part_offset), sum(order_0) from t_1 prewhere _part != '' where granule == 0; -- { serverError 10, 16 } -SELECT count(*), sum(_part_offset), sum(order_0) from t_1 prewhere _part_offset > 100000 where granule == 0; -- { serverError 10, 16 } +SELECT count(*), sum(_part_offset), sum(order_0) from t_1 prewhere _part != '' where granule == 0; +SELECT count(*), sum(_part_offset), sum(order_0) from t_1 prewhere _part_offset > 100000 where granule == 0; SELECT _part_offset FROM t_1 PREWHERE order_0 % 10000 == 42 ORDER BY order_0 LIMIT 3; SELECT _part_offset, foo FROM t_1 PREWHERE order_0 % 10000 == 42 ORDER BY order_0 LIMIT 3; diff --git a/tests/queries/0_stateless/02235_remote_fs_cache_stress.sh b/tests/queries/0_stateless/02235_remote_fs_cache_stress.sh index bc1a4cbfdd1..0b6b9f461b0 100755 --- a/tests/queries/0_stateless/02235_remote_fs_cache_stress.sh +++ b/tests/queries/0_stateless/02235_remote_fs_cache_stress.sh @@ -6,7 +6,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CUR_DIR"/../shell_config.sh -${CLICKHOUSE_CLIENT} --multiquery --multiline --query=""" +${CLICKHOUSE_CLIENT} --allow_suspicious_low_cardinality_types=1 --multiquery --multiline --query=""" DROP TABLE IF EXISTS t_01411; DROP TABLE IF EXISTS t_01411_num; diff --git a/tests/queries/0_stateless/02241_filesystem_cache_on_write_operations.reference b/tests/queries/0_stateless/02241_filesystem_cache_on_write_operations.reference index 157837983f7..186dcc1eeb2 100644 --- a/tests/queries/0_stateless/02241_filesystem_cache_on_write_operations.reference +++ b/tests/queries/0_stateless/02241_filesystem_cache_on_write_operations.reference @@ -1,62 +1,220 @@ Using storage policy: s3_cache +DROP TABLE IF EXISTS test_02241 +CREATE TABLE test_02241 (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache', min_bytes_for_wide_part = 10485760, compress_marks=false, compress_primary_key=false, ratio_of_defaults_for_sparse_serialization = 1 +SYSTEM STOP MERGES test_02241 +SYSTEM DROP FILESYSTEM CACHE +SELECT file_segment_range_begin, file_segment_range_end, size, state + FROM + ( + SELECT file_segment_range_begin, file_segment_range_end, size, state, local_path + FROM + ( + SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path + FROM system.remote_data_paths + ) AS data_paths + INNER JOIN + system.filesystem_cache AS caches + ON data_paths.cache_path = caches.cache_path + ) + WHERE endsWith(local_path, 'data.bin') + FORMAT Vertical +SELECT count() FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path 0 +SELECT count(), sum(size) FROM system.filesystem_cache 0 0 +INSERT INTO test_02241 SELECT number, toString(number) FROM numbers(100) +SELECT file_segment_range_begin, file_segment_range_end, size, state + FROM + ( + SELECT file_segment_range_begin, file_segment_range_end, size, state, local_path + FROM + ( + SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path + FROM system.remote_data_paths + ) AS data_paths + INNER JOIN + system.filesystem_cache AS caches + ON data_paths.cache_path = caches.cache_path + ) + WHERE endsWith(local_path, 'data.bin') + FORMAT Vertical Row 1: ────── file_segment_range_begin: 0 file_segment_range_end: 745 size: 746 state: DOWNLOADED +SELECT count() FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path 8 +SELECT count(), sum(size) FROM system.filesystem_cache 8 1100 +SELECT count() FROM system.filesystem_cache WHERE cache_hits > 0 0 +SELECT * FROM test_02241 FORMAT Null +SELECT count() FROM system.filesystem_cache WHERE cache_hits > 0 2 +SELECT * FROM test_02241 FORMAT Null +SELECT count() FROM system.filesystem_cache WHERE cache_hits > 0 2 +SELECT count(), sum(size) size FROM system.filesystem_cache 8 1100 +SYSTEM DROP FILESYSTEM CACHE +INSERT INTO test_02241 SELECT number, toString(number) FROM numbers(100, 200) +SELECT file_segment_range_begin, file_segment_range_end, size, state + FROM + ( + SELECT file_segment_range_begin, file_segment_range_end, size, state, local_path + FROM + ( + SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path + FROM system.remote_data_paths + ) AS data_paths + INNER JOIN + system.filesystem_cache AS caches + ON data_paths.cache_path = caches.cache_path + ) + WHERE endsWith(local_path, 'data.bin') + FORMAT Vertical; Row 1: ────── file_segment_range_begin: 0 file_segment_range_end: 1659 size: 1660 state: DOWNLOADED +SELECT count() FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path 8 +SELECT count(), sum(size) FROM system.filesystem_cache 8 2014 +SELECT count(), sum(size) FROM system.filesystem_cache 8 2014 +INSERT INTO test_02241 SELECT number, toString(number) FROM numbers(100) SETTINGS enable_filesystem_cache_on_write_operations=0 +SELECT count(), sum(size) FROM system.filesystem_cache 8 2014 +INSERT INTO test_02241 SELECT number, toString(number) FROM numbers(100) +INSERT INTO test_02241 SELECT number, toString(number) FROM numbers(300, 10000) +SELECT count(), sum(size) FROM system.filesystem_cache 24 84045 +SYSTEM START MERGES test_02241 +OPTIMIZE TABLE test_02241 FINAL +SELECT count(), sum(size) FROM system.filesystem_cache 32 167243 +ALTER TABLE test_02241 UPDATE value = 'kek' WHERE key = 100 +SELECT count(), sum(size) FROM system.filesystem_cache 41 250541 +INSERT INTO test_02241 SELECT number, toString(number) FROM numbers(5000000) +SYSTEM FLUSH LOGS INSERT INTO test_02241 SELECT number, toString(number) FROM numbers(5000000) 0 +SELECT count() FROM test_02241 5010500 +SELECT count() FROM test_02241 WHERE value LIKE '%010%' 18816 Using storage policy: local_cache +DROP TABLE IF EXISTS test_02241 +CREATE TABLE test_02241 (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='local_cache', min_bytes_for_wide_part = 10485760, compress_marks=false, compress_primary_key=false, ratio_of_defaults_for_sparse_serialization = 1 +SYSTEM STOP MERGES test_02241 +SYSTEM DROP FILESYSTEM CACHE +SELECT file_segment_range_begin, file_segment_range_end, size, state + FROM + ( + SELECT file_segment_range_begin, file_segment_range_end, size, state, local_path + FROM + ( + SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path + FROM system.remote_data_paths + ) AS data_paths + INNER JOIN + system.filesystem_cache AS caches + ON data_paths.cache_path = caches.cache_path + ) + WHERE endsWith(local_path, 'data.bin') + FORMAT Vertical +SELECT count() FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path 0 +SELECT count(), sum(size) FROM system.filesystem_cache 0 0 +INSERT INTO test_02241 SELECT number, toString(number) FROM numbers(100) +SELECT file_segment_range_begin, file_segment_range_end, size, state + FROM + ( + SELECT file_segment_range_begin, file_segment_range_end, size, state, local_path + FROM + ( + SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path + FROM system.remote_data_paths + ) AS data_paths + INNER JOIN + system.filesystem_cache AS caches + ON data_paths.cache_path = caches.cache_path + ) + WHERE endsWith(local_path, 'data.bin') + FORMAT Vertical Row 1: ────── file_segment_range_begin: 0 file_segment_range_end: 745 size: 746 state: DOWNLOADED +SELECT count() FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path 8 +SELECT count(), sum(size) FROM system.filesystem_cache 8 1100 +SELECT count() FROM system.filesystem_cache WHERE cache_hits > 0 0 +SELECT * FROM test_02241 FORMAT Null +SELECT count() FROM system.filesystem_cache WHERE cache_hits > 0 2 +SELECT * FROM test_02241 FORMAT Null +SELECT count() FROM system.filesystem_cache WHERE cache_hits > 0 2 +SELECT count(), sum(size) size FROM system.filesystem_cache 8 1100 +SYSTEM DROP FILESYSTEM CACHE +INSERT INTO test_02241 SELECT number, toString(number) FROM numbers(100, 200) +SELECT file_segment_range_begin, file_segment_range_end, size, state + FROM + ( + SELECT file_segment_range_begin, file_segment_range_end, size, state, local_path + FROM + ( + SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path + FROM system.remote_data_paths + ) AS data_paths + INNER JOIN + system.filesystem_cache AS caches + ON data_paths.cache_path = caches.cache_path + ) + WHERE endsWith(local_path, 'data.bin') + FORMAT Vertical; Row 1: ────── file_segment_range_begin: 0 file_segment_range_end: 1659 size: 1660 state: DOWNLOADED +SELECT count() FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path 8 +SELECT count(), sum(size) FROM system.filesystem_cache 8 2014 +SELECT count(), sum(size) FROM system.filesystem_cache 8 2014 +INSERT INTO test_02241 SELECT number, toString(number) FROM numbers(100) SETTINGS enable_filesystem_cache_on_write_operations=0 +SELECT count(), sum(size) FROM system.filesystem_cache 8 2014 +INSERT INTO test_02241 SELECT number, toString(number) FROM numbers(100) +INSERT INTO test_02241 SELECT number, toString(number) FROM numbers(300, 10000) +SELECT count(), sum(size) FROM system.filesystem_cache 24 84045 +SYSTEM START MERGES test_02241 +OPTIMIZE TABLE test_02241 FINAL +SELECT count(), sum(size) FROM system.filesystem_cache 32 167243 +ALTER TABLE test_02241 UPDATE value = 'kek' WHERE key = 100 +SELECT count(), sum(size) FROM system.filesystem_cache 41 250541 +INSERT INTO test_02241 SELECT number, toString(number) FROM numbers(5000000) +SYSTEM FLUSH LOGS INSERT INTO test_02241 SELECT number, toString(number) FROM numbers(5000000) 0 +SELECT count() FROM test_02241 5010500 +SELECT count() FROM test_02241 WHERE value LIKE '%010%' 18816 diff --git a/tests/queries/0_stateless/02241_filesystem_cache_on_write_operations.sh b/tests/queries/0_stateless/02241_filesystem_cache_on_write_operations.sh index 96f61cf61e8..ee1d942a421 100755 --- a/tests/queries/0_stateless/02241_filesystem_cache_on_write_operations.sh +++ b/tests/queries/0_stateless/02241_filesystem_cache_on_write_operations.sh @@ -10,13 +10,13 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) for STORAGE_POLICY in 's3_cache' 'local_cache'; do echo "Using storage policy: $STORAGE_POLICY" - $CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS test_02241" - $CLICKHOUSE_CLIENT --query "CREATE TABLE test_02241 (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='$STORAGE_POLICY', min_bytes_for_wide_part = 10485760, compress_marks=false, compress_primary_key=false, ratio_of_defaults_for_sparse_serialization = 1" - $CLICKHOUSE_CLIENT --query "SYSTEM STOP MERGES test_02241" + $CLICKHOUSE_CLIENT --echo --query "DROP TABLE IF EXISTS test_02241" + $CLICKHOUSE_CLIENT --echo --query "CREATE TABLE test_02241 (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='$STORAGE_POLICY', min_bytes_for_wide_part = 10485760, compress_marks=false, compress_primary_key=false, ratio_of_defaults_for_sparse_serialization = 1" + $CLICKHOUSE_CLIENT --echo --query "SYSTEM STOP MERGES test_02241" - $CLICKHOUSE_CLIENT --query "SYSTEM DROP FILESYSTEM CACHE" + $CLICKHOUSE_CLIENT --echo --query "SYSTEM DROP FILESYSTEM CACHE" - $CLICKHOUSE_CLIENT -n --query "SELECT file_segment_range_begin, file_segment_range_end, size, state + $CLICKHOUSE_CLIENT --echo -n --query "SELECT file_segment_range_begin, file_segment_range_end, size, state FROM ( SELECT file_segment_range_begin, file_segment_range_end, size, state, local_path @@ -32,12 +32,12 @@ for STORAGE_POLICY in 's3_cache' 'local_cache'; do WHERE endsWith(local_path, 'data.bin') FORMAT Vertical" - $CLICKHOUSE_CLIENT --query "SELECT count() FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path" - $CLICKHOUSE_CLIENT --query "SELECT count(), sum(size) FROM system.filesystem_cache" + $CLICKHOUSE_CLIENT --echo --query "SELECT count() FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path" + $CLICKHOUSE_CLIENT --echo --query "SELECT count(), sum(size) FROM system.filesystem_cache" - $CLICKHOUSE_CLIENT --enable_filesystem_cache_on_write_operations=1 --query "INSERT INTO test_02241 SELECT number, toString(number) FROM numbers(100)" + $CLICKHOUSE_CLIENT --echo --enable_filesystem_cache_on_write_operations=1 --query "INSERT INTO test_02241 SELECT number, toString(number) FROM numbers(100)" - $CLICKHOUSE_CLIENT -n --query "SELECT file_segment_range_begin, file_segment_range_end, size, state + $CLICKHOUSE_CLIENT --echo -n --query "SELECT file_segment_range_begin, file_segment_range_end, size, state FROM ( SELECT file_segment_range_begin, file_segment_range_end, size, state, local_path @@ -53,24 +53,24 @@ for STORAGE_POLICY in 's3_cache' 'local_cache'; do WHERE endsWith(local_path, 'data.bin') FORMAT Vertical" - $CLICKHOUSE_CLIENT --query "SELECT count() FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path" - $CLICKHOUSE_CLIENT --query "SELECT count(), sum(size) FROM system.filesystem_cache" + $CLICKHOUSE_CLIENT --echo --query "SELECT count() FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path" + $CLICKHOUSE_CLIENT --echo --query "SELECT count(), sum(size) FROM system.filesystem_cache" - $CLICKHOUSE_CLIENT --query "SELECT count() FROM system.filesystem_cache WHERE cache_hits > 0" + $CLICKHOUSE_CLIENT --echo --query "SELECT count() FROM system.filesystem_cache WHERE cache_hits > 0" - $CLICKHOUSE_CLIENT --query "SELECT * FROM test_02241 FORMAT Null" - $CLICKHOUSE_CLIENT --query "SELECT count() FROM system.filesystem_cache WHERE cache_hits > 0" + $CLICKHOUSE_CLIENT --echo --query "SELECT * FROM test_02241 FORMAT Null" + $CLICKHOUSE_CLIENT --echo --query "SELECT count() FROM system.filesystem_cache WHERE cache_hits > 0" - $CLICKHOUSE_CLIENT --query "SELECT * FROM test_02241 FORMAT Null" - $CLICKHOUSE_CLIENT --query "SELECT count() FROM system.filesystem_cache WHERE cache_hits > 0" + $CLICKHOUSE_CLIENT --echo --query "SELECT * FROM test_02241 FORMAT Null" + $CLICKHOUSE_CLIENT --echo --query "SELECT count() FROM system.filesystem_cache WHERE cache_hits > 0" - $CLICKHOUSE_CLIENT --query "SELECT count(), sum(size) size FROM system.filesystem_cache" + $CLICKHOUSE_CLIENT --echo --query "SELECT count(), sum(size) size FROM system.filesystem_cache" - $CLICKHOUSE_CLIENT --query "SYSTEM DROP FILESYSTEM CACHE" + $CLICKHOUSE_CLIENT --echo --query "SYSTEM DROP FILESYSTEM CACHE" - $CLICKHOUSE_CLIENT --enable_filesystem_cache_on_write_operations=1 --query "INSERT INTO test_02241 SELECT number, toString(number) FROM numbers(100, 200)" + $CLICKHOUSE_CLIENT --echo --enable_filesystem_cache_on_write_operations=1 --query "INSERT INTO test_02241 SELECT number, toString(number) FROM numbers(100, 200)" - $CLICKHOUSE_CLIENT -n --query "SELECT file_segment_range_begin, file_segment_range_end, size, state + $CLICKHOUSE_CLIENT --echo -n --query "SELECT file_segment_range_begin, file_segment_range_end, size, state FROM ( SELECT file_segment_range_begin, file_segment_range_end, size, state, local_path @@ -86,27 +86,28 @@ for STORAGE_POLICY in 's3_cache' 'local_cache'; do WHERE endsWith(local_path, 'data.bin') FORMAT Vertical;" - $CLICKHOUSE_CLIENT --query "SELECT count() FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path" - $CLICKHOUSE_CLIENT --query "SELECT count(), sum(size) FROM system.filesystem_cache" + $CLICKHOUSE_CLIENT --echo --query "SELECT count() FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path" + $CLICKHOUSE_CLIENT --echo --query "SELECT count(), sum(size) FROM system.filesystem_cache" - $CLICKHOUSE_CLIENT --query "SELECT count(), sum(size) FROM system.filesystem_cache" - $CLICKHOUSE_CLIENT --enable_filesystem_cache_on_write_operations=1 --query "INSERT INTO test_02241 SELECT number, toString(number) FROM numbers(100) SETTINGS enable_filesystem_cache_on_write_operations=0" - $CLICKHOUSE_CLIENT --query "SELECT count(), sum(size) FROM system.filesystem_cache" + $CLICKHOUSE_CLIENT --echo --query "SELECT count(), sum(size) FROM system.filesystem_cache" + $CLICKHOUSE_CLIENT --echo --enable_filesystem_cache_on_write_operations=1 --query "INSERT INTO test_02241 SELECT number, toString(number) FROM numbers(100) SETTINGS enable_filesystem_cache_on_write_operations=0" + $CLICKHOUSE_CLIENT --echo --query "SELECT count(), sum(size) FROM system.filesystem_cache" - $CLICKHOUSE_CLIENT --enable_filesystem_cache_on_write_operations=1 --query "INSERT INTO test_02241 SELECT number, toString(number) FROM numbers(100)" - $CLICKHOUSE_CLIENT --enable_filesystem_cache_on_write_operations=1 --query "INSERT INTO test_02241 SELECT number, toString(number) FROM numbers(300, 10000)" - $CLICKHOUSE_CLIENT --query "SELECT count(), sum(size) FROM system.filesystem_cache" + $CLICKHOUSE_CLIENT --echo --enable_filesystem_cache_on_write_operations=1 --query "INSERT INTO test_02241 SELECT number, toString(number) FROM numbers(100)" + $CLICKHOUSE_CLIENT --echo --enable_filesystem_cache_on_write_operations=1 --query "INSERT INTO test_02241 SELECT number, toString(number) FROM numbers(300, 10000)" + $CLICKHOUSE_CLIENT --echo --query "SELECT count(), sum(size) FROM system.filesystem_cache" - $CLICKHOUSE_CLIENT --query "SYSTEM START MERGES test_02241" + $CLICKHOUSE_CLIENT --echo --query "SYSTEM START MERGES test_02241" - $CLICKHOUSE_CLIENT --enable_filesystem_cache_on_write_operations=1 --query "OPTIMIZE TABLE test_02241 FINAL" - $CLICKHOUSE_CLIENT --query "SELECT count(), sum(size) FROM system.filesystem_cache" + $CLICKHOUSE_CLIENT --echo --enable_filesystem_cache_on_write_operations=1 --query "OPTIMIZE TABLE test_02241 FINAL" - $CLICKHOUSE_CLIENT --enable_filesystem_cache_on_write_operations=1 --mutations_sync=2 --query "ALTER TABLE test_02241 UPDATE value = 'kek' WHERE key = 100" - $CLICKHOUSE_CLIENT --query "SELECT count(), sum(size) FROM system.filesystem_cache" - $CLICKHOUSE_CLIENT --enable_filesystem_cache_on_write_operations=1 --query "INSERT INTO test_02241 SELECT number, toString(number) FROM numbers(5000000)" + $CLICKHOUSE_CLIENT --echo --query "SELECT count(), sum(size) FROM system.filesystem_cache" - $CLICKHOUSE_CLIENT --query "SYSTEM FLUSH LOGS" + $CLICKHOUSE_CLIENT --echo --enable_filesystem_cache_on_write_operations=1 --mutations_sync=2 --query "ALTER TABLE test_02241 UPDATE value = 'kek' WHERE key = 100" + $CLICKHOUSE_CLIENT --echo --query "SELECT count(), sum(size) FROM system.filesystem_cache" + $CLICKHOUSE_CLIENT --echo --enable_filesystem_cache_on_write_operations=1 --query "INSERT INTO test_02241 SELECT number, toString(number) FROM numbers(5000000)" + + $CLICKHOUSE_CLIENT --echo --query "SYSTEM FLUSH LOGS" $CLICKHOUSE_CLIENT -n --query "SELECT query, ProfileEvents['RemoteFSReadBytes'] > 0 as remote_fs_read @@ -121,6 +122,6 @@ for STORAGE_POLICY in 's3_cache' 'local_cache'; do DESC LIMIT 1" - $CLICKHOUSE_CLIENT --query "SELECT count() FROM test_02241" - $CLICKHOUSE_CLIENT --query "SELECT count() FROM test_02241 WHERE value LIKE '%010%'" + $CLICKHOUSE_CLIENT --echo --query "SELECT count() FROM test_02241" + $CLICKHOUSE_CLIENT --echo --query "SELECT count() FROM test_02241 WHERE value LIKE '%010%'" done diff --git a/tests/queries/0_stateless/02245_parquet_skip_unknown_type.sh b/tests/queries/0_stateless/02245_parquet_skip_unknown_type.sh index 954e2e83f27..8ff6e28b123 100755 --- a/tests/queries/0_stateless/02245_parquet_skip_unknown_type.sh +++ b/tests/queries/0_stateless/02245_parquet_skip_unknown_type.sh @@ -12,6 +12,6 @@ DATA_FILE=$USER_FILES_PATH/$FILE_NAME cp $CUR_DIR/data_parquet_bad_column/metadata_0.parquet $DATA_FILE -$CLICKHOUSE_CLIENT -q "desc file(test_02245.parquet)" 2>&1 | grep -qF "Cannot extract table structure" && echo "OK" || echo "FAIL" +$CLICKHOUSE_CLIENT -q "desc file(test_02245.parquet)" 2>&1 | grep -qF "CANNOT_EXTRACT_TABLE_STRUCTURE" && echo "OK" || echo "FAIL" $CLICKHOUSE_CLIENT -q "desc file(test_02245.parquet) settings input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference=1" $CLICKHOUSE_CLIENT -q "select count(*) from file(test_02245.parquet) settings input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference=1" diff --git a/tests/queries/0_stateless/02252_jit_profile_events.sql b/tests/queries/0_stateless/02252_jit_profile_events.sql index fb7f806c46b..487f43737e8 100644 --- a/tests/queries/0_stateless/02252_jit_profile_events.sql +++ b/tests/queries/0_stateless/02252_jit_profile_events.sql @@ -1,4 +1,4 @@ --- Tags: no-fasttest, no-parallel, no-cpu-aarch64, no-msan +-- Tags: no-fasttest, no-parallel, no-msan SET compile_expressions = 1; SET min_count_to_compile_expression = 0; diff --git a/tests/queries/0_stateless/02265_rename_join_ordinary_to_atomic.sql b/tests/queries/0_stateless/02265_rename_join_ordinary_to_atomic.sql index 041cb887647..235fc86f828 100644 --- a/tests/queries/0_stateless/02265_rename_join_ordinary_to_atomic.sql +++ b/tests/queries/0_stateless/02265_rename_join_ordinary_to_atomic.sql @@ -1,5 +1,7 @@ -- Tags: no-parallel +SET send_logs_level = 'fatal'; + set allow_deprecated_database_ordinary=1; DROP DATABASE IF EXISTS 02265_atomic_db; DROP DATABASE IF EXISTS 02265_ordinary_db; diff --git a/tests/queries/0_stateless/02265_test_dns_profile_events.sh b/tests/queries/0_stateless/02265_test_dns_profile_events.sh index 756a761a0ae..50fa6ba2cda 100755 --- a/tests/queries/0_stateless/02265_test_dns_profile_events.sh +++ b/tests/queries/0_stateless/02265_test_dns_profile_events.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-parallel +# Tags: no-parallel, no-fasttest CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/02268_json_wrong_root_type_in_schema_inference.sql b/tests/queries/0_stateless/02268_json_wrong_root_type_in_schema_inference.sql index 5462d38f1a3..98bf29c32f5 100644 --- a/tests/queries/0_stateless/02268_json_wrong_root_type_in_schema_inference.sql +++ b/tests/queries/0_stateless/02268_json_wrong_root_type_in_schema_inference.sql @@ -1,7 +1,7 @@ -- Tags: no-fasttest insert into function file('02268_data.jsonl', 'TSV') select 1; -select * from file('02268_data.jsonl'); --{serverError CANNOT_PARSE_INPUT_ASSERTION_FAILED} +select * from file('02268_data.jsonl'); --{serverError CANNOT_EXTRACT_TABLE_STRUCTURE} insert into function file('02268_data.jsonCompactEachRow', 'TSV') select 1; -select * from file('02268_data.jsonCompactEachRow'); --{serverError CANNOT_PARSE_INPUT_ASSERTION_FAILED} +select * from file('02268_data.jsonCompactEachRow'); --{serverError CANNOT_EXTRACT_TABLE_STRUCTURE} diff --git a/tests/queries/0_stateless/02286_mysql_dump_input_format.sh b/tests/queries/0_stateless/02286_mysql_dump_input_format.sh index a3711497ae8..2f6167c3ddf 100755 --- a/tests/queries/0_stateless/02286_mysql_dump_input_format.sh +++ b/tests/queries/0_stateless/02286_mysql_dump_input_format.sh @@ -23,7 +23,7 @@ $CLICKHOUSE_CLIENT -q "desc file(dump1.sql, MySQLDump) settings input_format_mys $CLICKHOUSE_CLIENT -q "select * from file(dump1.sql, MySQLDump) settings input_format_mysql_dump_table_name='test', max_threads=1" $CLICKHOUSE_CLIENT -q "desc file(dump1.sql, MySQLDump) settings input_format_mysql_dump_table_name='test2'" $CLICKHOUSE_CLIENT -q "select * from file(dump1.sql, MySQLDump) settings input_format_mysql_dump_table_name='test2', max_threads=1" -$CLICKHOUSE_CLIENT -q "desc file(dump1.sql, MySQLDump) settings input_format_mysql_dump_table_name='test 3'" 2>&1 | grep -F -q 'Cannot extract table structure' && echo 'OK' || echo 'FAIL' +$CLICKHOUSE_CLIENT -q "desc file(dump1.sql, MySQLDump) settings input_format_mysql_dump_table_name='test 3'" 2>&1 | grep -F -q 'CANNOT_EXTRACT_TABLE_STRUCTURE' && echo 'OK' || echo 'FAIL' $CLICKHOUSE_CLIENT -q "select * from file(dump1.sql, MySQLDump, 'x Nullable(Int32)') settings input_format_mysql_dump_table_name='test 3'" 2>&1 | grep -F -q 'EMPTY_DATA_PASSED' && echo 'OK' || echo 'FAIL' echo "dump2" diff --git a/tests/queries/0_stateless/02293_formats_json_columns.sh b/tests/queries/0_stateless/02293_formats_json_columns.sh index ce35c4bd878..4eae5a1abb4 100755 --- a/tests/queries/0_stateless/02293_formats_json_columns.sh +++ b/tests/queries/0_stateless/02293_formats_json_columns.sh @@ -88,4 +88,4 @@ echo ' } ' > $DATA_FILE -$CLICKHOUSE_CLIENT -q "desc file(data_02293, JSONColumns) settings input_format_max_rows_to_read_for_schema_inference=3, input_format_json_infer_incomplete_types_as_strings=0" 2>&1 | grep -F -q 'Cannot extract table structure' && echo 'OK' || echo 'FAIL' +$CLICKHOUSE_CLIENT -q "desc file(data_02293, JSONColumns) settings input_format_max_rows_to_read_for_schema_inference=3, input_format_json_infer_incomplete_types_as_strings=0" 2>&1 | grep -F -q 'CANNOT_EXTRACT_TABLE_STRUCTURE' && echo 'OK' || echo 'FAIL' diff --git a/tests/queries/0_stateless/02294_system_certificates.reference b/tests/queries/0_stateless/02294_system_certificates.reference index 4655f996c40..432df9110a2 100644 --- a/tests/queries/0_stateless/02294_system_certificates.reference +++ b/tests/queries/0_stateless/02294_system_certificates.reference @@ -1,10 +1,10 @@ -version Int32 -serial_number Nullable(String) -signature_algo Nullable(String) -issuer Nullable(String) -not_before Nullable(String) -not_after Nullable(String) -subject Nullable(String) -pkey_algo Nullable(String) -path String -default UInt8 +version Int32 Version of the certificate. Values are 0 for v1, 1 for v2, 2 for v3. +serial_number Nullable(String) Serial Number of the certificate assigned by the issuer. +signature_algo Nullable(String) Signature Algorithm - an algorithm used by the issuer to sign this certificate. +issuer Nullable(String) Issuer - an unique identifier for the Certificate Authority issuing this certificate. +not_before Nullable(String) The beginning of the time window when this certificate is valid. +not_after Nullable(String) The end of the time window when this certificate is valid. +subject Nullable(String) Subject - identifies the owner of the public key. +pkey_algo Nullable(String) Public Key Algorithm defines the algorithm the public key can be used with. +path String Path to the file or directory containing this certificate. +default UInt8 Certificate is in the default certificate location. diff --git a/tests/queries/0_stateless/02317_distinct_in_order_optimization_explain.reference b/tests/queries/0_stateless/02317_distinct_in_order_optimization_explain.reference index 69571551c2b..85e8a802bdc 100644 --- a/tests/queries/0_stateless/02317_distinct_in_order_optimization_explain.reference +++ b/tests/queries/0_stateless/02317_distinct_in_order_optimization_explain.reference @@ -56,6 +56,7 @@ algorithm: Thread Sorting (Stream): a ASC, b ASC Sorting (Stream): a ASC, b ASC Sorting (Stream): a ASC, b ASC +Sorting (Stream): a ASC, b ASC -- check that reading in order optimization for ORDER BY and DISTINCT applied correctly in the same query -- disabled, check that sorting description for ReadFromMergeTree match ORDER BY columns Sorting (Stream): a ASC diff --git a/tests/queries/0_stateless/02326_settings_changes_system_table.reference b/tests/queries/0_stateless/02326_settings_changes_system_table.reference index 1c8c4fa1880..946b2727d30 100644 --- a/tests/queries/0_stateless/02326_settings_changes_system_table.reference +++ b/tests/queries/0_stateless/02326_settings_changes_system_table.reference @@ -1,3 +1,3 @@ -version String -changes Array(Tuple(\n name String,\n previous_value String,\n new_value String,\n reason String)) +version String The ClickHouse server version. +changes Array(Tuple(\n name String,\n previous_value String,\n new_value String,\n reason String)) The list of changes in settings which changed the behaviour of ClickHouse. 22.5 [('memory_overcommit_ratio_denominator','0','1073741824','Enable memory overcommit feature by default'),('memory_overcommit_ratio_denominator_for_user','0','1073741824','Enable memory overcommit feature by default')] diff --git a/tests/queries/0_stateless/02327_capnproto_protobuf_empty_messages.sh b/tests/queries/0_stateless/02327_capnproto_protobuf_empty_messages.sh index dfc0dedeaf1..650faf6985e 100755 --- a/tests/queries/0_stateless/02327_capnproto_protobuf_empty_messages.sh +++ b/tests/queries/0_stateless/02327_capnproto_protobuf_empty_messages.sh @@ -15,11 +15,11 @@ mkdir -p $SCHEMADIR/$SERVER_SCHEMADIR cp -r $CLIENT_SCHEMADIR/02327_* $SCHEMADIR/$SERVER_SCHEMADIR/ -$CLICKHOUSE_CLIENT --query="desc file(data.pb) settings format_schema='$SERVER_SCHEMADIR/02327_schema:MessageWithEmpty'" 2>&1 | grep -F -q 'Cannot extract table structure' && echo 'OK' || echo 'FAIL'; -$CLICKHOUSE_CLIENT --query="desc file(data.capnp) settings format_schema='$SERVER_SCHEMADIR/02327_schema:MessageWithEmpty'" 2>&1 | grep -F -q 'Cannot extract table structure' && echo 'OK' || echo 'FAIL'; +$CLICKHOUSE_CLIENT --query="desc file(data.pb) settings format_schema='$SERVER_SCHEMADIR/02327_schema:MessageWithEmpty'" 2>&1 | grep -F -q 'The table structure cannot be extracted' && echo 'OK' || echo 'FAIL'; +$CLICKHOUSE_CLIENT --query="desc file(data.capnp) settings format_schema='$SERVER_SCHEMADIR/02327_schema:MessageWithEmpty'" 2>&1 | grep -F -q 'The table structure cannot be extracted' && echo 'OK' || echo 'FAIL'; -$CLICKHOUSE_CLIENT --query="create table test_protobuf engine=File(Protobuf) settings format_schema='$SERVER_SCHEMADIR/02327_schema:MessageWithEmpty'" 2>&1 | grep -F -q 'Cannot extract table structure' && echo 'OK' || echo 'FAIL'; -$CLICKHOUSE_CLIENT --query="create table test_capnp engine=File(CapnProto) settings format_schema='$SERVER_SCHEMADIR/02327_schema:MessageWithEmpty'" 2>&1 | grep -F -q 'Cannot extract table structure' && echo 'OK' || echo 'FAIL'; +$CLICKHOUSE_CLIENT --query="create table test_protobuf engine=File(Protobuf) settings format_schema='$SERVER_SCHEMADIR/02327_schema:MessageWithEmpty'" 2>&1 | grep -F -q 'The table structure cannot be extracted' && echo 'OK' || echo 'FAIL'; +$CLICKHOUSE_CLIENT --query="create table test_capnp engine=File(CapnProto) settings format_schema='$SERVER_SCHEMADIR/02327_schema:MessageWithEmpty'" 2>&1 | grep -F -q 'The table structure cannot be extracted' && echo 'OK' || echo 'FAIL'; $CLICKHOUSE_CLIENT --query="desc file(data.pb) settings format_schema='$SERVER_SCHEMADIR/02327_schema:MessageWithEmpty', input_format_protobuf_skip_fields_with_unsupported_types_in_schema_inference=1"; $CLICKHOUSE_CLIENT --query="desc file(data.capnp) settings format_schema='$SERVER_SCHEMADIR/02327_schema:MessageWithEmpty', input_format_capn_proto_skip_fields_with_unsupported_types_in_schema_inference=1"; diff --git a/tests/queries/0_stateless/02327_try_infer_integers_schema_inference.sql b/tests/queries/0_stateless/02327_try_infer_integers_schema_inference.sql index 0ceed178865..a4a69f4fa40 100644 --- a/tests/queries/0_stateless/02327_try_infer_integers_schema_inference.sql +++ b/tests/queries/0_stateless/02327_try_infer_integers_schema_inference.sql @@ -1,6 +1,7 @@ -- Tags: no-fasttest set input_format_try_infer_integers=1; +set input_format_try_infer_exponent_floats=1; select 'JSONEachRow'; desc format(JSONEachRow, '{"x" : 123}'); diff --git a/tests/queries/0_stateless/02343_create_empty_as_select.reference b/tests/queries/0_stateless/02343_create_empty_as_select.reference index 3b0d34c5863..8a21a716bd1 100644 --- a/tests/queries/0_stateless/02343_create_empty_as_select.reference +++ b/tests/queries/0_stateless/02343_create_empty_as_select.reference @@ -1,4 +1,4 @@ CREATE TABLE default.t\n(\n `1` UInt8\n)\nENGINE = Memory 0 -CREATE MATERIALIZED VIEW default.mv\n(\n `1` UInt8\n)\nENGINE = Memory AS\nSELECT 1 +CREATE MATERIALIZED VIEW default.mv\n(\n `1` UInt8\n)\nENGINE = Memory\nAS SELECT 1 0 diff --git a/tests/queries/0_stateless/02346_inverted_index_match_predicate.reference b/tests/queries/0_stateless/02346_inverted_index_match_predicate.reference index 9dc8d5b76d9..84fc422379c 100644 --- a/tests/queries/0_stateless/02346_inverted_index_match_predicate.reference +++ b/tests/queries/0_stateless/02346_inverted_index_match_predicate.reference @@ -1,20 +1,20 @@ 1 Hello ClickHouse 2 Hello World - Granules: 6/6 - Granules: 2/6 + Granules: 6/6 + Granules: 2/6 Granules: 6/6 Granules: 2/6 --- 1 Hello ClickHouse 2 Hello World 6 World Champion - Granules: 6/6 - Granules: 3/6 + Granules: 6/6 + Granules: 3/6 Granules: 6/6 Granules: 3/6 --- 5 OLAP Database - Granules: 6/6 - Granules: 1/6 + Granules: 6/6 + Granules: 1/6 Granules: 6/6 Granules: 1/6 diff --git a/tests/queries/0_stateless/02346_inverted_index_search.sql b/tests/queries/0_stateless/02346_inverted_index_search.sql index be56f24d5da..d225d3463d1 100644 --- a/tests/queries/0_stateless/02346_inverted_index_search.sql +++ b/tests/queries/0_stateless/02346_inverted_index_search.sql @@ -243,40 +243,6 @@ CREATE TABLE tab (row_id UInt32, str String, INDEX idx str TYPE inverted) ENGINE INSERT INTO tab VALUES (0, 'a'); SELECT * FROM tab WHERE str == 'b' AND 1.0; - --- Tests with parameter max_digestion_size_per_segment are flaky in CI, not clear why --> comment out for the time being: - --- ---------------------------------------------------- --- SELECT 'Test max_digestion_size_per_segment'; --- --- DROP TABLE IF EXISTS tab; --- --- CREATE TABLE tab(k UInt64, s String, INDEX af(s) TYPE inverted(0)) --- Engine=MergeTree --- ORDER BY (k) --- SETTINGS max_digestion_size_per_segment = 1024, index_granularity = 256 --- AS --- SELECT --- number, --- format('{},{},{},{}', hex(12345678), hex(87654321), hex(number/17 + 5), hex(13579012)) as s --- FROM numbers(10240); --- --- -- check inverted index was created --- SELECT name, type FROM system.data_skipping_indices WHERE table == 'tab' AND database = currentDatabase() LIMIT 1; --- --- -- search inverted index --- SELECT s FROM tab WHERE hasToken(s, '6969696969898240'); --- --- -- check the query only read 1 granule (1 row total; each granule has 256 rows) --- SYSTEM FLUSH LOGS; --- SELECT read_rows==256 from system.query_log --- WHERE query_kind ='Select' --- AND current_database = currentDatabase() --- AND endsWith(trimRight(query), 'SELECT s FROM tab WHERE hasToken(s, \'6969696969898240\');') --- AND type='QueryFinish' --- AND result_rows==1 --- LIMIT 1; --- SELECT 'Test max_rows_per_postings_list'; DROP TABLE IF EXISTS tab; -- create table 'tab' with inverted index parameter (ngrams, max_rows_per_most_list) which is (0, 10240) diff --git a/tests/queries/0_stateless/02346_non_negative_derivative.reference b/tests/queries/0_stateless/02346_non_negative_derivative.reference index b81af45962e..22e5f609ad7 100644 --- a/tests/queries/0_stateless/02346_non_negative_derivative.reference +++ b/tests/queries/0_stateless/02346_non_negative_derivative.reference @@ -1,63 +1,63 @@ 1 -1979-12-12 21:21:21.127 3.7 0 -2299-12-31 23:37:36.788 1.1 0 -2299-12-31 23:37:36.789 2.34 0 +1979-12-12 21:21:21.123 1.1 0 +1979-12-12 21:21:21.123 2.34 0 +1979-12-12 21:21:21.127 3.7 340.00000000000006 1979-12-12 21:21:21.129 2.1 0 1979-12-12 21:21:22.000 1.3345 0 1979-12-12 21:21:23.000 1.54 0.20550000000000002 1979-12-12 21:21:23.000 1.54 0 -1979-12-12 21:21:21.127 3.7 0 -2299-12-31 23:37:36.788 1.1 0 -2299-12-31 23:37:36.789 2.34 0 +1979-12-12 21:21:21.123 1.1 0 +1979-12-12 21:21:21.123 2.34 0 +1979-12-12 21:21:21.127 3.7 0.0000010200000000000004 1979-12-12 21:21:21.129 2.1 0 1979-12-12 21:21:22.000 1.3345 0 1979-12-12 21:21:23.000 1.54 6.165000000000001e-10 1979-12-12 21:21:23.000 1.54 0 -1979-12-12 21:21:21.127 3.7 0 -2299-12-31 23:37:36.788 1.1 0 -2299-12-31 23:37:36.789 2.34 0 +1979-12-12 21:21:21.123 1.1 0 +1979-12-12 21:21:21.123 2.34 0 +1979-12-12 21:21:21.127 3.7 0.00136 1979-12-12 21:21:21.129 2.1 0 1979-12-12 21:21:22.000 1.3345 0 1979-12-12 21:21:23.000 1.54 8.22e-7 1979-12-12 21:21:23.000 1.54 0 -1979-12-12 21:21:21.127 3.7 0 -2299-12-31 23:37:36.788 1.1 0 -2299-12-31 23:37:36.789 2.34 0 +1979-12-12 21:21:21.123 1.1 0 +1979-12-12 21:21:21.123 2.34 0 +1979-12-12 21:21:21.127 3.7 1.7000000000000004 1979-12-12 21:21:21.129 2.1 0 1979-12-12 21:21:22.000 1.3345 0 1979-12-12 21:21:23.000 1.54 0.0010275000000000002 1979-12-12 21:21:23.000 1.54 0 -1979-12-12 21:21:21.127 3.7 0 -2299-12-31 23:37:36.788 1.1 0 -2299-12-31 23:37:36.789 2.34 0 +1979-12-12 21:21:21.123 1.1 0 +1979-12-12 21:21:21.123 2.34 0 +1979-12-12 21:21:21.127 3.7 2040.0000000000005 1979-12-12 21:21:21.129 2.1 0 1979-12-12 21:21:22.000 1.3345 0 1979-12-12 21:21:23.000 1.54 1.233 1979-12-12 21:21:23.000 1.54 0 -1979-12-12 21:21:21.127 3.7 0 -2299-12-31 23:37:36.788 1.1 0 -2299-12-31 23:37:36.789 2.34 0 +1979-12-12 21:21:21.123 1.1 0 +1979-12-12 21:21:21.123 2.34 0 +1979-12-12 21:21:21.127 3.7 142800.00000000003 1979-12-12 21:21:21.129 2.1 0 1979-12-12 21:21:22.000 1.3345 0 1979-12-12 21:21:23.000 1.54 86.31 1979-12-12 21:21:23.000 1.54 0 -1979-12-12 21:21:21.127 3.7 0 -2299-12-31 23:37:36.788 1.1 0 -2299-12-31 23:37:36.789 2.34 0 +1979-12-12 21:21:21.123 1.1 0 +1979-12-12 21:21:21.123 2.34 0 +1979-12-12 21:21:21.127 3.7 9792000.000000002 1979-12-12 21:21:21.129 2.1 0 1979-12-12 21:21:22.000 1.3345 0 1979-12-12 21:21:23.000 1.54 5918.400000000001 1979-12-12 21:21:23.000 1.54 0 -1979-12-12 21:21:21.127 3.7 0 -2299-12-31 23:37:36.788 1.1 0 -2299-12-31 23:37:36.789 2.34 0 +1979-12-12 21:21:21.123 1.1 0 +1979-12-12 21:21:21.123 2.34 0 +1979-12-12 21:21:21.127 3.7 264384000.00000003 1979-12-12 21:21:21.129 2.1 0 1979-12-12 21:21:22.000 1.3345 0 1979-12-12 21:21:23.000 1.54 159796.80000000002 1979-12-12 21:21:23.000 1.54 0 -1979-12-12 21:21:21.127 3.7 0 -2299-12-31 23:37:36.788 1.1 0 -2299-12-31 23:37:36.789 2.34 0 +1979-12-12 21:21:21.123 1.1 0 +1979-12-12 21:21:21.123 2.34 0 +1979-12-12 21:21:21.127 3.7 2056320000.0000002 1979-12-12 21:21:21.129 2.1 0 1979-12-12 21:21:22.000 1.3345 0 1979-12-12 21:21:23.000 1.54 1242864 diff --git a/tests/queries/0_stateless/02346_non_negative_derivative.sql b/tests/queries/0_stateless/02346_non_negative_derivative.sql index 265a8afb2cb..704241da16c 100644 --- a/tests/queries/0_stateless/02346_non_negative_derivative.sql +++ b/tests/queries/0_stateless/02346_non_negative_derivative.sql @@ -18,7 +18,7 @@ SELECT ( SELECT ts, metric, - nonNegativeDerivative(metric, ts) OVER (PARTITION BY metric ORDER BY ts ASC Rows BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS deriv + nonNegativeDerivative(metric, ts) OVER (PARTITION BY metric ORDER BY ts, metric ASC Rows BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS deriv FROM nnd LIMIT 5, 1 ) = ( @@ -29,37 +29,37 @@ SELECT ( FROM nnd LIMIT 5, 1 ); -SELECT ts, metric, nonNegativeDerivative(metric, ts) OVER (PARTITION BY id>3 ORDER BY ts ASC Rows BETWEEN 1 PRECEDING AND 1 FOLLOWING) AS deriv FROM nnd; +SELECT ts, metric, nonNegativeDerivative(metric, ts) OVER (PARTITION BY id>3 ORDER BY ts, metric ASC Rows BETWEEN 1 PRECEDING AND 1 FOLLOWING) AS deriv FROM nnd; -- Nanosecond -SELECT ts, metric, nonNegativeDerivative(metric, ts, INTERVAL 3 NANOSECOND) OVER (PARTITION BY id>3 ORDER BY ts ASC Rows BETWEEN 2 PRECEDING AND 2 FOLLOWING) AS deriv FROM nnd; +SELECT ts, metric, nonNegativeDerivative(metric, ts, INTERVAL 3 NANOSECOND) OVER (PARTITION BY id>3 ORDER BY ts, metric ASC Rows BETWEEN 2 PRECEDING AND 2 FOLLOWING) AS deriv FROM nnd; -- Microsecond -SELECT ts, metric, nonNegativeDerivative(metric, ts, INTERVAL 4 MICROSECOND) OVER (PARTITION BY id>3 ORDER BY ts ASC Rows BETWEEN 1 PRECEDING AND 1 FOLLOWING) AS deriv FROM nnd; +SELECT ts, metric, nonNegativeDerivative(metric, ts, INTERVAL 4 MICROSECOND) OVER (PARTITION BY id>3 ORDER BY ts, metric ASC Rows BETWEEN 1 PRECEDING AND 1 FOLLOWING) AS deriv FROM nnd; -- Millisecond -SELECT ts, metric, nonNegativeDerivative(metric, ts, INTERVAL 5 MILLISECOND) OVER (PARTITION BY id>3 ORDER BY ts ASC Rows BETWEEN 1 PRECEDING AND 1 FOLLOWING) AS deriv FROM nnd; +SELECT ts, metric, nonNegativeDerivative(metric, ts, INTERVAL 5 MILLISECOND) OVER (PARTITION BY id>3 ORDER BY ts, metric ASC Rows BETWEEN 1 PRECEDING AND 1 FOLLOWING) AS deriv FROM nnd; -- Second -SELECT ts, metric, nonNegativeDerivative(metric, ts, INTERVAL 6 SECOND) OVER (PARTITION BY id>3 ORDER BY ts ASC Rows BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING) AS deriv FROM nnd; +SELECT ts, metric, nonNegativeDerivative(metric, ts, INTERVAL 6 SECOND) OVER (PARTITION BY id>3 ORDER BY ts, metric ASC Rows BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING) AS deriv FROM nnd; -- Minute -SELECT ts, metric, nonNegativeDerivative(metric, ts, INTERVAL 7 MINUTE) OVER (PARTITION BY id>3 ORDER BY ts ASC Rows BETWEEN UNBOUNDED PRECEDING AND 2 FOLLOWING) AS deriv FROM nnd; +SELECT ts, metric, nonNegativeDerivative(metric, ts, INTERVAL 7 MINUTE) OVER (PARTITION BY id>3 ORDER BY ts, metric ASC Rows BETWEEN UNBOUNDED PRECEDING AND 2 FOLLOWING) AS deriv FROM nnd; -- Hour -SELECT ts, metric, nonNegativeDerivative(metric, ts, INTERVAL 8 HOUR) OVER (PARTITION BY id>3 ORDER BY ts ASC Rows BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS deriv FROM nnd; +SELECT ts, metric, nonNegativeDerivative(metric, ts, INTERVAL 8 HOUR) OVER (PARTITION BY id>3 ORDER BY ts, metric ASC Rows BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS deriv FROM nnd; -- Day -SELECT ts, metric, nonNegativeDerivative(metric, ts, INTERVAL 9 DAY) OVER (PARTITION BY id>3 ORDER BY ts ASC Rows BETWEEN 3 PRECEDING AND 3 FOLLOWING) AS deriv FROM nnd; +SELECT ts, metric, nonNegativeDerivative(metric, ts, INTERVAL 9 DAY) OVER (PARTITION BY id>3 ORDER BY ts, metric ASC Rows BETWEEN 3 PRECEDING AND 3 FOLLOWING) AS deriv FROM nnd; -- Week -SELECT ts, metric, nonNegativeDerivative(metric, ts, INTERVAL 10 WEEK) OVER (PARTITION BY id>3 ORDER BY ts ASC Rows BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING) AS deriv FROM nnd; +SELECT ts, metric, nonNegativeDerivative(metric, ts, INTERVAL 10 WEEK) OVER (PARTITION BY id>3 ORDER BY ts, metric ASC Rows BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING) AS deriv FROM nnd; -- shall not work for month, quarter, year (intervals with floating number of seconds) -- Month -SELECT ts, metric, nonNegativeDerivative(metric, ts, INTERVAL 11 MONTH) OVER (PARTITION BY metric ORDER BY ts ASC Rows BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS deriv FROM nnd; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT ts, metric, nonNegativeDerivative(metric, ts, INTERVAL 11 MONTH) OVER (PARTITION BY metric ORDER BY ts, metric ASC Rows BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS deriv FROM nnd; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } -- Quarter -SELECT ts, metric, nonNegativeDerivative(metric, ts, INTERVAL 12 QUARTER) OVER (PARTITION BY metric ORDER BY ts ASC Rows BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS deriv FROM nnd; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT ts, metric, nonNegativeDerivative(metric, ts, INTERVAL 12 QUARTER) OVER (PARTITION BY metric ORDER BY ts, metric ASC Rows BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS deriv FROM nnd; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } -- Year -SELECT ts, metric, nonNegativeDerivative(metric, ts, INTERVAL 13 YEAR) OVER (PARTITION BY metric ORDER BY ts ASC Rows BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS deriv FROM nnd; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT ts, metric, nonNegativeDerivative(metric, ts, INTERVAL 13 YEAR) OVER (PARTITION BY metric ORDER BY ts, metric ASC Rows BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS deriv FROM nnd; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } -- test against wrong arguments/types -SELECT ts, metric, nonNegativeDerivative(metric, 1, INTERVAL 3 NANOSECOND) OVER (PARTITION BY metric ORDER BY ts ASC Rows BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS deriv FROM nnd; -- { serverError BAD_ARGUMENTS } -SELECT ts, metric, nonNegativeDerivative('string not datetime', ts, INTERVAL 3 NANOSECOND) OVER (PARTITION BY metric ORDER BY ts ASC Rows BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS deriv FROM nnd; -- { serverError BAD_ARGUMENTS } -SELECT ts, metric, nonNegativeDerivative(metric, ts, INTERVAL 3 NANOSECOND, id) OVER (PARTITION BY metric ORDER BY ts ASC Rows BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS deriv FROM nnd; -- { serverError BAD_ARGUMENTS } -SELECT ts, metric, nonNegativeDerivative(metric) OVER (PARTITION BY metric ORDER BY ts ASC Rows BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS deriv FROM nnd; -- { serverError BAD_ARGUMENTS } +SELECT ts, metric, nonNegativeDerivative(metric, 1, INTERVAL 3 NANOSECOND) OVER (PARTITION BY metric ORDER BY ts, metric ASC Rows BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS deriv FROM nnd; -- { serverError BAD_ARGUMENTS } +SELECT ts, metric, nonNegativeDerivative('string not datetime', ts, INTERVAL 3 NANOSECOND) OVER (PARTITION BY metric ORDER BY ts, metric ASC Rows BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS deriv FROM nnd; -- { serverError BAD_ARGUMENTS } +SELECT ts, metric, nonNegativeDerivative(metric, ts, INTERVAL 3 NANOSECOND, id) OVER (PARTITION BY metric ORDER BY ts, metric ASC Rows BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS deriv FROM nnd; -- { serverError BAD_ARGUMENTS } +SELECT ts, metric, nonNegativeDerivative(metric) OVER (PARTITION BY metric ORDER BY ts, metric ASC Rows BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS deriv FROM nnd; -- { serverError BAD_ARGUMENTS } -- cleanup DROP TABLE IF EXISTS nnd; diff --git a/tests/queries/0_stateless/02352_rwlock.sh b/tests/queries/0_stateless/02352_rwlock.sh index 08551794c2e..b4a77e0b08a 100755 --- a/tests/queries/0_stateless/02352_rwlock.sh +++ b/tests/queries/0_stateless/02352_rwlock.sh @@ -6,6 +6,9 @@ # In other words to ensure that after WRITE lock failure (DROP), # READ lock (SELECT) available instantly. +# Creation of a database with Ordinary engine emits a warning. +CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL=fatal + CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh diff --git a/tests/queries/0_stateless/02354_vector_search_queries.reference b/tests/queries/0_stateless/02354_vector_search_queries.reference index befa6af5a08..41c1915ecc3 100644 --- a/tests/queries/0_stateless/02354_vector_search_queries.reference +++ b/tests/queries/0_stateless/02354_vector_search_queries.reference @@ -18,17 +18,18 @@ ARRAY, 10 rows, index_granularity = 8192, GRANULARITY = 1 million --> 1 granule, - Annoy: WHERE-type, EXPLAIN Expression ((Projection + Before ORDER BY)) Limit (preliminary LIMIT (without OFFSET)) - ReadFromMergeTree (default.tab_annoy) - Indexes: - PrimaryKey - Condition: true - Parts: 1/1 - Granules: 1/1 - Skip - Name: idx - Description: annoy GRANULARITY 100000000 - Parts: 1/1 - Granules: 1/1 + Expression + ReadFromMergeTree (default.tab_annoy) + Indexes: + PrimaryKey + Condition: true + Parts: 1/1 + Granules: 1/1 + Skip + Name: idx + Description: annoy GRANULARITY 100000000 + Parts: 1/1 + Granules: 1/1 - Annoy: ORDER-BY-type, EXPLAIN Expression (Projection) Limit (preliminary LIMIT (without OFFSET)) @@ -48,17 +49,18 @@ Expression (Projection) - Usearch: WHERE-type, EXPLAIN Expression ((Projection + Before ORDER BY)) Limit (preliminary LIMIT (without OFFSET)) - ReadFromMergeTree (default.tab_usearch) - Indexes: - PrimaryKey - Condition: true - Parts: 1/1 - Granules: 1/1 - Skip - Name: idx - Description: usearch GRANULARITY 100000000 - Parts: 1/1 - Granules: 1/1 + Expression + ReadFromMergeTree (default.tab_usearch) + Indexes: + PrimaryKey + Condition: true + Parts: 1/1 + Granules: 1/1 + Skip + Name: idx + Description: usearch GRANULARITY 100000000 + Parts: 1/1 + Granules: 1/1 - Usearch: ORDER-BY-type, EXPLAIN Expression (Projection) Limit (preliminary LIMIT (without OFFSET)) @@ -95,17 +97,18 @@ ARRAY vectors, 12 rows, index_granularity = 3, GRANULARITY = 2 --> 4 granules, 2 - Annoy: WHERE-type, EXPLAIN Expression ((Projection + Before ORDER BY)) Limit (preliminary LIMIT (without OFFSET)) - ReadFromMergeTree (default.tab_annoy) - Indexes: - PrimaryKey - Condition: true - Parts: 1/1 - Granules: 4/4 - Skip - Name: idx - Description: annoy GRANULARITY 2 - Parts: 1/1 - Granules: 1/4 + Expression + ReadFromMergeTree (default.tab_annoy) + Indexes: + PrimaryKey + Condition: true + Parts: 1/1 + Granules: 4/4 + Skip + Name: idx + Description: annoy GRANULARITY 2 + Parts: 1/1 + Granules: 1/4 - Annoy: ORDER-BY-type, EXPLAIN Expression (Projection) Limit (preliminary LIMIT (without OFFSET)) @@ -125,17 +128,18 @@ Expression (Projection) - Usearch: WHERE-type, EXPLAIN Expression ((Projection + Before ORDER BY)) Limit (preliminary LIMIT (without OFFSET)) - ReadFromMergeTree (default.tab_usearch) - Indexes: - PrimaryKey - Condition: true - Parts: 1/1 - Granules: 4/4 - Skip - Name: idx - Description: usearch GRANULARITY 2 - Parts: 1/1 - Granules: 1/4 + Expression + ReadFromMergeTree (default.tab_usearch) + Indexes: + PrimaryKey + Condition: true + Parts: 1/1 + Granules: 4/4 + Skip + Name: idx + Description: usearch GRANULARITY 2 + Parts: 1/1 + Granules: 1/4 - Usearch: ORDER-BY-type, EXPLAIN Expression (Projection) Limit (preliminary LIMIT (without OFFSET)) diff --git a/tests/queries/0_stateless/02366_kql_create_table.reference b/tests/queries/0_stateless/02366_kql_create_table.reference new file mode 100644 index 00000000000..35136b5ff42 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_create_table.reference @@ -0,0 +1,4 @@ +-- test create table -- +Theodore +Diaz +Theodore Diaz 28 diff --git a/tests/queries/0_stateless/02366_kql_create_table.sql b/tests/queries/0_stateless/02366_kql_create_table.sql new file mode 100644 index 00000000000..b266679b06a --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_create_table.sql @@ -0,0 +1,29 @@ +DROP TABLE IF EXISTS Customers; +CREATE TABLE Customers +( + FirstName Nullable(String), + LastName String, + Occupation String, + Education String, + Age Nullable(UInt8) +) ENGINE = Memory; + +INSERT INTO Customers VALUES ('Theodore','Diaz','Skilled Manual','Bachelors',28),('Stephanie','Cox','Management abcd defg','Bachelors',33),('Peter','Nara','Skilled Manual','Graduate Degree',26),('Latoya','Shen','Professional','Graduate Degree',25),('Apple','','Skilled Manual','Bachelors',28),(NULL,'why','Professional','Partial College',38); +Select '-- test create table --' ; +Select * from kql(Customers|project FirstName) limit 1;; +DROP TABLE IF EXISTS kql_table1; +CREATE TABLE kql_table1 ENGINE = Memory AS select *, now() as new_column From kql(Customers | project LastName | filter LastName=='Diaz'); +select LastName from kql_table1 limit 1; +DROP TABLE IF EXISTS kql_table2; +CREATE TABLE kql_table2 +( + FirstName Nullable(String), + LastName String, + Age Nullable(UInt8) +) ENGINE = Memory; +INSERT INTO kql_table2 select * from kql(Customers|project FirstName,LastName,Age | filter FirstName=='Theodore'); +select * from kql_table2 limit 1; +-- select * from kql(Customers | where FirstName !in ("test", "test2")); +DROP TABLE IF EXISTS Customers; +DROP TABLE IF EXISTS kql_table1; +DROP TABLE IF EXISTS kql_table2; \ No newline at end of file diff --git a/tests/queries/0_stateless/02366_kql_datatype.reference b/tests/queries/0_stateless/02366_kql_datatype.reference new file mode 100644 index 00000000000..fe666f3734c --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_datatype.reference @@ -0,0 +1,105 @@ +-- bool +true +\N +-- int +123 +\N +-- long +123 +255 +-1 +\N +456 +-- real +0.01 +\N +nan +inf +-inf +-- datetime +2015-12-31 23:59:59.900000000 +2015-12-31 00:00:00.000000000 +2014-05-25 08:20:03.123456000 +2014-11-08 15:55:55.000000000 +2014-11-08 15:55:00.000000000 +2014-11-08 00:00:00.000000000 +\N +2014-05-25 08:20:03.123456000 +2014-11-08 15:55:55.123456000 +-- time +1216984.12345 +45055.123 +86400 +-86400 +6.000000000000001e-9 +6e-7 +172800 +259200 +-- guid +\N +-- timespan (time) +172800 +1800 +10 +0.1 +0.00001 +1e-7 +1120343 +-- null +1 +\N \N \N \N \N +-- decimal +\N +123.345 +100000 +-- dynamic +\N +1 +86400 +[1,2,3] +[[1],[2],[3]] +['a','b','c'] +-- cast functions +true +1 +-- tobool("false") +false +1 +-- tobool(1) +true +1 +-- tobool(123) +true +1 +-- tobool("abc") +\N +\N +-- todouble() +123.4 +\N +-- toreal() +123.4 +\N +-- toint() +1 +\N +-- tostring() +123 +1 +-- todatetime() +1 +\N +-- make_timespan() +01:12:00 01:12:30 1.12:30:55 +-- totimespan() +1e-7 +60 +\N +1120343 +-- tolong() +123 +\N +-- todecimal() +123.345 +\N +\N diff --git a/tests/queries/0_stateless/02366_kql_datatype.sql b/tests/queries/0_stateless/02366_kql_datatype.sql new file mode 100644 index 00000000000..ecd29504298 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_datatype.sql @@ -0,0 +1,117 @@ +set dialect = 'kusto'; + +print '-- bool' +print bool(true); +print bool(true); +print bool(null); +print '-- int'; +print int(123); +print int(null); +print int('4'); -- { clientError BAD_ARGUMENTS } +print '-- long'; +print long(123); +print long(0xff); +print long(-1); +print long(null); +print 456; +print '-- real'; +print real(0.01); +print real(null); +print real(nan); +print real(+inf); +print real(-inf); +print double('4.2'); -- { clientError BAD_ARGUMENTS } +print '-- datetime'; +print datetime(2015-12-31 23:59:59.9); +print datetime(2015-12-31); +print datetime('2014-05-25T08:20:03.123456'); +print datetime('2014-11-08 15:55:55'); +print datetime('2014-11-08 15:55'); +print datetime('2014-11-08'); +print datetime(null); +print datetime('2014-05-25T08:20:03.123456Z'); +print datetime('2014-11-08 15:55:55.123456Z'); +print '-- time'; +print time('14.02:03:04.12345'); +print time('12:30:55.123'); +print time(1d); +print time(-1d); +print time(6nanoseconds); +print time(6tick); +print time(2); +print time(2) + 1d; +print '-- guid' +print guid(74be27de-1e4e-49d9-b579-fe0b331d3642); +print guid(null); +print '-- timespan (time)'; +print timespan(2d); -- 2 days +--print timespan(1.5h); -- 1.5 hour +print timespan(30m); -- 30 minutes +print timespan(10s); -- 10 seconds +--print timespan(0.1s); -- 0.1 second +print timespan(100ms); -- 100 millisecond +print timespan(10microsecond); -- 10 microseconds +print timespan(1tick); -- 100 nanoseconds +--print timespan(1.5h) / timespan(30m); +print timespan('12.23:12:23') / timespan(1s); +print '-- null'; +print isnull(null); +print bool(null), int(null), long(null), real(null), double(null); +print '-- decimal'; +print decimal(null); +print decimal(123.345); +print decimal(1e5); +print '-- dynamic'; -- no support for mixed types and bags for now +print dynamic(null); +print dynamic(1); +print dynamic(timespan(1d)); +print dynamic([1,2,3]); +print dynamic([[1], [2], [3]]); +print dynamic(['a', "b", 'c']); +print '-- cast functions' +print '--tobool("true")'; -- == true +print tobool('true'); -- == true +print tobool('true') == toboolean('true'); -- == true +print '-- tobool("false")'; -- == false +print tobool('false'); -- == false +print tobool('false') == toboolean('false'); -- == false +print '-- tobool(1)'; -- == true +print tobool(1); -- == true +print tobool(1) == toboolean(1); -- == true +print '-- tobool(123)'; -- == true +print tobool(123); -- == true +print tobool(123) == toboolean(123); -- == true +print '-- tobool("abc")'; -- == null +print tobool('abc'); -- == null +print tobool('abc') == toboolean('abc'); -- == null +print '-- todouble()'; +print todouble('123.4'); +print todouble('abc') == null; +print '-- toreal()'; +print toreal("123.4"); +print toreal('abc') == null; +print '-- toint()'; +print toint("123") == int(123); +print toint('abc'); +print '-- tostring()'; +print tostring(123); +print tostring(null) == ''; +print '-- todatetime()'; +print todatetime("2015-12-24") == datetime(2015-12-24); +print todatetime('abc') == null; +print '-- make_timespan()'; +print v1=make_timespan(1,12), v2=make_timespan(1,12,30), v3=make_timespan(1,12,30,55.123); +print '-- totimespan()'; +print totimespan(1tick); +print totimespan('0.00:01:00'); +print totimespan('abc'); +print totimespan('12.23:12:23') / totimespan(1s); +-- print totimespan(strcat('12.', '23', ':12:', '23')) / timespan(1s); -> 1120343 +print '-- tolong()'; +print tolong('123'); +print tolong('abc'); +print '-- todecimal()'; +print todecimal(123.345); +print todecimal(null); +print todecimal('abc'); +-- print todecimal(4 * 2 + 3); -> 11 diff --git a/tests/queries/0_stateless/02366_kql_distinct.reference b/tests/queries/0_stateless/02366_kql_distinct.reference new file mode 100644 index 00000000000..2100f44f18c --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_distinct.reference @@ -0,0 +1,27 @@ +-- distinct * -- +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Peter Nara Skilled Manual Graduate Degree 26 +Latoya Shen Professional Graduate Degree 25 +Apple Skilled Manual Bachelors 28 +\N why Professional Partial College 38 +-- distinct one column -- +Skilled Manual +Management abcd defg +Professional +-- distinct two column -- +Skilled Manual Bachelors +Management abcd defg Bachelors +Skilled Manual Graduate Degree +Professional Graduate Degree +Professional Partial College +-- distinct with where -- +Skilled Manual Bachelors +Management abcd defg Bachelors +Skilled Manual Graduate Degree +Professional Graduate Degree +Professional Partial College +-- distinct with where, order -- +Skilled Manual Bachelors +Skilled Manual Graduate Degree +Professional Graduate Degree diff --git a/tests/queries/0_stateless/02366_kql_distinct.sql b/tests/queries/0_stateless/02366_kql_distinct.sql new file mode 100644 index 00000000000..3c997eb4865 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_distinct.sql @@ -0,0 +1,28 @@ +DROP TABLE IF EXISTS Customers; +CREATE TABLE Customers +( + FirstName Nullable(String), + LastName String, + Occupation String, + Education String, + Age Nullable(UInt8) +) ENGINE = Memory; + +INSERT INTO Customers VALUES ('Theodore','Diaz','Skilled Manual','Bachelors',28), ('Stephanie','Cox','Management abcd defg','Bachelors',33),('Peter','Nara','Skilled Manual','Graduate Degree',26),('Latoya','Shen','Professional','Graduate Degree',25),('Apple','','Skilled Manual','Bachelors',28),(NULL,'why','Professional','Partial College',38); + +set dialect = 'kusto'; + +print '-- distinct * --'; +Customers | distinct *; + +print '-- distinct one column --'; +Customers | distinct Occupation; + +print '-- distinct two column --'; +Customers | distinct Occupation, Education; + +print '-- distinct with where --'; +Customers where Age <30 | distinct Occupation, Education; + +print '-- distinct with where, order --'; +Customers |where Age <30 | order by Age| distinct Occupation, Education; diff --git a/tests/queries/0_stateless/02366_kql_extend.reference b/tests/queries/0_stateless/02366_kql_extend.reference new file mode 100644 index 00000000000..2936c9ea19c --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_extend.reference @@ -0,0 +1,32 @@ +-- extend #1 -- +Aldi Apple 4 2016-09-10 400 +Costco Apple 2 2016-09-11 200 +-- extend #2 -- +Apple 200 +Apple 400 +-- extend #3 -- +Apple cost 480 on average based on 5 samples. +Snargaluff cost 28080 on average based on 5 samples. +-- extend #4 -- +1 +-- extend #5 -- +Aldi Apple 4 2016-09-10 Apple was purchased from Aldi for $4 on 2016-09-10 400 +Costco Apple 2 2016-09-11 Apple was purchased from Costco for $2 on 2016-09-11 200 +-- extend #6 -- +Aldi Apple 2016-09-10 400 +Costco Apple 2016-09-11 200 +Aldi Apple 2016-09-10 600 +Costco Snargaluff 2016-09-12 10000 +Aldi Apple 2016-09-12 700 +Aldi Snargaluff 2016-09-11 40000 +Costco Snargaluff 2016-09-12 10400 +Aldi Apple 2016-09-12 500 +Aldi Snargaluff 2016-09-11 60000 +Costco Snargaluff 2016-09-10 20000 +-- extend #7 -- +5 +-- extend #8 -- +-- extend #9 -- +-- extend #10 -- +-- extend #11 -- +5 [2,1] diff --git a/tests/queries/0_stateless/02366_kql_extend.sql b/tests/queries/0_stateless/02366_kql_extend.sql new file mode 100644 index 00000000000..51fb16c1ad8 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_extend.sql @@ -0,0 +1,58 @@ +-- datatable(Supplier:string, Fruit:string, Price: real, Purchase:datetime) +-- [ +-- 'Aldi','Apple',4,'2016-09-10', +-- 'Costco','Apple',2,'2016-09-11', +-- 'Aldi','Apple',6,'2016-09-10', +-- 'Costco','Snargaluff',100,'2016-09-12', +-- 'Aldi','Apple',7,'2016-09-12', +-- 'Aldi','Snargaluff',400,'2016-09-11', +-- 'Costco','Snargaluff',104,'2016-09-12', +-- 'Aldi','Apple',5,'2016-09-12', +-- 'Aldi','Snargaluff',600,'2016-09-11', +-- 'Costco','Snargaluff',200,'2016-09-10', +-- ] + +DROP TABLE IF EXISTS Ledger; +CREATE TABLE Ledger +( + Supplier Nullable(String), + Fruit String , + Price Float64, + Purchase Date +) ENGINE = Memory; +INSERT INTO Ledger VALUES ('Aldi','Apple',4,'2016-09-10'), ('Costco','Apple',2,'2016-09-11'), ('Aldi','Apple',6,'2016-09-10'), ('Costco','Snargaluff',100,'2016-09-12'), ('Aldi','Apple',7,'2016-09-12'), ('Aldi','Snargaluff',400,'2016-09-11'),('Costco','Snargaluff',104,'2016-09-12'),('Aldi','Apple',5,'2016-09-12'),('Aldi','Snargaluff',600,'2016-09-11'),('Costco','Snargaluff',200,'2016-09-10'); + +set dialect = 'kusto'; + +print '-- extend #1 --'; +Ledger | extend PriceInCents = 100 * Price | take 2; + +print '-- extend #2 --'; +Ledger | extend PriceInCents = 100 * Price | sort by PriceInCents asc | project Fruit, PriceInCents | take 2; + +print '-- extend #3 --'; +Ledger | extend PriceInCents = 100 * Price | sort by PriceInCents asc | project Fruit, PriceInCents | summarize AveragePrice = avg(PriceInCents), Purchases = count() by Fruit | extend Sentence = strcat(Fruit, ' cost ', tostring(AveragePrice), ' on average based on ', tostring(Purchases), ' samples.') | project Sentence | sort by Sentence asc; + +print '-- extend #4 --'; +Ledger | extend a = Price | extend b = a | extend c = a, d = b + 500 | extend Pass = bool(b == a and c == a and d == b + 500) | summarize binary_all_and(Pass); + +print '-- extend #5 --'; +Ledger | take 2 | extend strcat(Fruit, ' was purchased from ', Supplier, ' for $', tostring(Price), ' on ', tostring(Purchase)) | extend PriceInCents = 100 * Price; + +print '-- extend #6 --'; +Ledger | extend Price = 100 * Price; + +print '-- extend #7 --'; +print a = 4 | extend a = 5; + +print '-- extend #8 --'; +-- print x = 5 | extend array_sort_desc(range(0, x), range(1, x + 1)) + +print '-- extend #9 --'; +print x = 19 | extend = 4 + ; -- { clientError SYNTAX_ERROR } + +print '-- extend #10 --'; +Ledger | extend PriceInCents = * Price | sort by PriceInCents asc | project Fruit, PriceInCents | summarize AveragePrice = avg(PriceInCents), Purchases = count() by Fruit | extend Sentence = strcat(Fruit, ' cost ', tostring(AveragePrice), ' on average based on ', tostring(Purchases), ' samples.') | project Sentence; -- { clientError SYNTAX_ERROR } + +print '-- extend #11 --'; -- should ideally return this in the future: 5 [2,1] because of the alias ex +print x = 5 | extend ex = array_sort_desc(dynamic([1, 2]), dynamic([3, 4])); diff --git a/tests/queries/0_stateless/02366_kql_func_binary.reference b/tests/queries/0_stateless/02366_kql_func_binary.reference new file mode 100644 index 00000000000..6276cd6d867 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_func_binary.reference @@ -0,0 +1,7 @@ + -- binary functions +4 7 +1 +1 +1 +7 3 +1 diff --git a/tests/queries/0_stateless/02366_kql_func_binary.sql b/tests/queries/0_stateless/02366_kql_func_binary.sql new file mode 100644 index 00000000000..824022b564c --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_func_binary.sql @@ -0,0 +1,8 @@ +set dialect='kusto'; +print ' -- binary functions'; +print binary_and(4,7), binary_or(4,7); +print binary_shift_left(1, 1) == binary_shift_left(1, 65); +print binary_shift_right(2, 1) == binary_shift_right(2, 65); +print binary_shift_right(binary_shift_left(1, 65), 65) == 1; +print binary_xor(2, 5), bitset_count_ones(42); +print bitset_count_ones(binary_shift_left(binary_and(4,7), 1)); diff --git a/tests/queries/0_stateless/02366_kql_func_datetime.reference b/tests/queries/0_stateless/02366_kql_func_datetime.reference new file mode 100644 index 00000000000..40d8d7e19ac --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_func_datetime.reference @@ -0,0 +1,76 @@ +-- dayofmonth() +31 +-- dayofweek() +4.00:00:00 +-- dayofyear() +365 +-- getmonth() +10 +-- getyear() +2015 +-- hoursofday() +23 +-- startofday() +2017-01-01 00:00:00.000000000 +2016-12-31 00:00:00.000000000 +2017-01-02 00:00:00.000000000 +-- endofday() +2017-01-01 23:59:59.999999000 +2016-12-31 23:59:59.999999000 +2017-01-02 23:59:59.999999000 +-- endofmonth() +2017-01-31 23:59:59.999999000 +2016-12-31 23:59:59.999999000 +2017-02-28 23:59:59.999999000 +2022-09-30 23:59:59.999999000 +-- startofweek() +2017-01-01 00:00:00.000000000 +2016-12-25 00:00:00.000000000 +2017-01-08 00:00:00.000000000 +-- endofweek() +2017-01-07 23:59:59.999999000 +2016-12-31 23:59:59.999999000 +2017-01-14 23:59:59.999999000 +-- startofyear() +2017-01-01 00:00:00.000000000 +2016-01-01 00:00:00.000000000 +2018-01-01 00:00:00.000000000 +-- endofyear() +2017-12-31 23:59:59.999999000 +2016-12-31 23:59:59.999999000 +2018-12-31 23:59:59.999999000 +-- unixtime_seconds_todatetime() +2019-01-01 00:00:00.000000000 +1970-01-02 00:00:00.000000000 +1969-12-31 00:00:00.000000000 +-- unixtime_microseconds_todatetime +2019-01-01 00:00:00.000000 +-- unixtime_milliseconds_todatetime() +2019-01-01 00:00:00.000 +-- unixtime_nanoseconds_todatetime() +2019-01-01 00:00:00.000000000 +-- weekofyear() +52 +-- monthofyear() +12 +-- weekofyear() +52 +-- now() +1 +-- make_datetime() +1 +2017-10-01 12:10:00.0000000 +2017-10-01 12:11:00.0000000 +-- format_datetime +15-12-14 02:03:04.1234500 +17-01-29 [09:00:05] 2017-01-29 [09:00:05] 17-01-29 [09:00:05 AM] +-- format_timespan() +02:03:04.1234500 +29.09:00:05:12 +-- ago() +-- datetime_diff() +17 2 13 4 29 2 5 10 +-- datetime_part() +2017 4 10 44 30 303 01 02 03 +-- datetime_add() +2018-01-01 00:00:00.0000000 2017-04-01 00:00:00.0000000 2017-02-01 00:00:00.0000000 2017-01-08 00:00:00.0000000 2017-01-02 00:00:00.0000000 2017-01-01 01:00:00.0000000 2017-01-01 00:01:00.0000000 2017-01-01 00:00:01.0000000 diff --git a/tests/queries/0_stateless/02366_kql_func_datetime.sql b/tests/queries/0_stateless/02366_kql_func_datetime.sql new file mode 100644 index 00000000000..b1fba4166a9 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_func_datetime.sql @@ -0,0 +1,86 @@ +set dialect = 'kusto'; + +print '-- dayofmonth()'; +print dayofmonth(datetime(2015-12-31)); +print '-- dayofweek()'; +print dayofweek(datetime(2015-12-31)); +print '-- dayofyear()'; +print dayofyear(datetime(2015-12-31)); +print '-- getmonth()'; +print getmonth(datetime(2015-10-12)); +print '-- getyear()'; +print getyear(datetime(2015-10-12)); +print '-- hoursofday()'; +print hourofday(datetime(2015-12-31 23:59:59.9)); +print '-- startofday()'; +print startofday(datetime(2017-01-01 10:10:17)); +print startofday(datetime(2017-01-01 10:10:17), -1); +print startofday(datetime(2017-01-01 10:10:17), 1); +print '-- endofday()'; +print endofday(datetime(2017-01-01 10:10:17)); +print endofday(datetime(2017-01-01 10:10:17), -1); +print endofday(datetime(2017-01-01 10:10:17), 1); +print '-- endofmonth()'; +print endofmonth(datetime(2017-01-01 10:10:17)); +print endofmonth(datetime(2017-01-01 10:10:17), -1); +print endofmonth(datetime(2017-01-01 10:10:17), 1); +print endofmonth(datetime(2022-09-23)); +print '-- startofweek()'; +print startofweek(datetime(2017-01-01 10:10:17)); +print startofweek(datetime(2017-01-01 10:10:17), -1); +print startofweek(datetime(2017-01-01 10:10:17), 1); +print '-- endofweek()'; +print endofweek(datetime(2017-01-01 10:10:17)); +print endofweek(datetime(2017-01-01 10:10:17), -1); +print endofweek(datetime(2017-01-01 10:10:17), 1); +print '-- startofyear()'; +print startofyear(datetime(2017-01-01 10:10:17)); +print startofyear(datetime(2017-01-01 10:10:17), -1); +print startofyear(datetime(2017-01-01 10:10:17), 1); +print '-- endofyear()'; +print endofyear(datetime(2017-01-01 10:10:17)); +print endofyear(datetime(2017-01-01 10:10:17), -1); +print endofyear(datetime(2017-01-01 10:10:17), 1); +print '-- unixtime_seconds_todatetime()'; +print unixtime_seconds_todatetime(1546300800); +print unixtime_seconds_todatetime(1d); +print unixtime_seconds_todatetime(-1d); +print '-- unixtime_microseconds_todatetime'; +print unixtime_microseconds_todatetime(1546300800000000); +print '-- unixtime_milliseconds_todatetime()'; +print unixtime_milliseconds_todatetime(1546300800000); +print '-- unixtime_nanoseconds_todatetime()'; +print unixtime_nanoseconds_todatetime(1546300800000000000); +print '-- weekofyear()'; +print week_of_year(datetime(2000-01-01)); +print '-- monthofyear()'; +print monthofyear(datetime(2015-12-31)); +print '-- weekofyear()'; +print week_of_year(datetime(2000-01-01)); +print '-- now()'; +print getyear(now(-2d))>1900; +print '-- make_datetime()'; +print make_datetime(2017,10,01,12,10) == datetime(2017-10-01 12:10:00); +print year_month_day_hour_minute = make_datetime(2017,10,01,12,10); +print year_month_day_hour_minute_second = make_datetime(2017,10,01,12,11,0.1234567); +print '-- format_datetime'; +print format_datetime(datetime(2015-12-14 02:03:04.12345), 'y-M-d h:m:s.fffffff'); +print v1=format_datetime(datetime(2017-01-29 09:00:05),'yy-MM-dd [HH:mm:ss]'), v2=format_datetime(datetime(2017-01-29 09:00:05), 'yyyy-M-dd [H:mm:ss]'), v3=format_datetime(datetime(2017-01-29 09:00:05), 'yy-MM-dd [hh:mm:ss tt]'); +print '-- format_timespan()'; +print format_timespan(time('14.02:03:04.12345'), 'h:m:s.fffffff'); +print v1=format_timespan(time('29.09:00:05.12345'), 'dd.hh:mm:ss:FF'); +-- print v2=format_timespan(time('29.09:00:05.12345'), 'ddd.h:mm:ss [fffffff]'); == '029.9:00:05 [1234500]' +print '-- ago()'; +-- print ago(1d) - now(); +print '-- datetime_diff()'; +print year = datetime_diff('year',datetime(2017-01-01),datetime(2000-12-31)), quarter = datetime_diff('quarter',datetime(2017-07-01),datetime(2017-03-30)), month = datetime_diff('month',datetime(2017-01-01),datetime(2015-12-30)), week = datetime_diff('week',datetime(2017-10-29 00:00),datetime(2017-09-30 23:59)), day = datetime_diff('day',datetime(2017-10-29 00:00),datetime(2017-09-30 23:59)), hour = datetime_diff('hour',datetime(2017-10-31 01:00),datetime(2017-10-30 23:59)), minute = datetime_diff('minute',datetime(2017-10-30 23:05:01),datetime(2017-10-30 23:00:59)), second = datetime_diff('second',datetime(2017-10-30 23:00:10.100),datetime(2017-10-30 23:00:00.900)); +-- millisecond = datetime_diff('millisecond',datetime(2017-10-30 23:00:00.200100),datetime(2017-10-30 23:00:00.100900)), +-- microsecond = datetime_diff('microsecond',datetime(2017-10-30 23:00:00.1009001),datetime(2017-10-30 23:00:00.1008009)), +-- nanosecond = datetime_diff('nanosecond',datetime(2017-10-30 23:00:00.0000000),datetime(2017-10-30 23:00:00.0000007)) +print '-- datetime_part()'; +print year = datetime_part("year", datetime(2017-10-30 01:02:03.7654321)),quarter = datetime_part("quarter", datetime(2017-10-30 01:02:03.7654321)),month = datetime_part("month", datetime(2017-10-30 01:02:03.7654321)),weekOfYear = datetime_part("week_of_year", datetime(2017-10-30 01:02:03.7654321)),day = datetime_part("day", datetime(2017-10-30 01:02:03.7654321)),dayOfYear = datetime_part("dayOfYear", datetime(2017-10-30 01:02:03.7654321)),hour = datetime_part("hour", datetime(2017-10-30 01:02:03.7654321)),minute = datetime_part("minute", datetime(2017-10-30 01:02:03.7654321)),second = datetime_part("second", datetime(2017-10-30 01:02:03.7654321)); +-- millisecond = datetime_part("millisecond", dt), +-- microsecond = datetime_part("microsecond", dt), +-- nanosecond = datetime_part("nanosecond", dt) +print '-- datetime_add()'; +print year = datetime_add('year',1,make_datetime(2017,1,1)),quarter = datetime_add('quarter',1,make_datetime(2017,1,1)),month = datetime_add('month',1,make_datetime(2017,1,1)),week = datetime_add('week',1,make_datetime(2017,1,1)),day = datetime_add('day',1,make_datetime(2017,1,1)),hour = datetime_add('hour',1,make_datetime(2017,1,1)),minute = datetime_add('minute',1,make_datetime(2017,1,1)),second = datetime_add('second',1,make_datetime(2017,1,1)); \ No newline at end of file diff --git a/tests/queries/0_stateless/02366_kql_func_dynamic.reference b/tests/queries/0_stateless/02366_kql_func_dynamic.reference new file mode 100644 index 00000000000..564f1eebc4b --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_func_dynamic.reference @@ -0,0 +1,152 @@ +-- constant index value +1 c ['A',NULL,'C'] +-- array_length() +1 +1 +-- array_sum() +1 +1 +-- array_index_of() +3 +1 +-- array_iif() +[1,5,3] +[1,5,3] +[1,5,NULL] +[NULL,NULL,NULL] +-- array_concat() +[1,2,3,4,5,6] +-- array_reverse() +[] +[1] +[4,3,2,1] +['example','an','is','this'] +-- array_rotate_left() +[] +[] +[] +[3,4,5,1,2] +[1,2,3,4,5] +[3,4,5,1,2] +[4,5,1,2,3] +[1,2,3,4,5] +[4,5,1,2,3] +-- array_rotate_right() +[] +[] +[] +[4,5,1,2,3] +[1,2,3,4,5] +[4,5,1,2,3] +[3,4,5,1,2] +[1,2,3,4,5] +[3,4,5,1,2] +-- array_shift_left() +[] +[] +[] +[3,4,5,NULL,NULL] +[NULL,NULL,1,2,3] +[3,4,5,-1,-1] +['c','',''] +-- array_shift_right() +[] +[] +[] +[3,4,5,NULL,NULL] +[NULL,NULL,1,2,3] +[3,4,5,-1,-1] +['c','',''] +-- array_slice() +[3,4] +-- array_split() +[[1],[2,3],[4,5]] +[[1,2],[3,4,5]] +[[1],[2,3],[4,5]] +[[1,2,3,4],[],[4,5]] +-- array_sort_asc() +(['a','c','c','d',NULL]) +([1,2,3,4]) +['a','b','c'] +(['p','q','r'],['hello','clickhouse','world']) +([NULL,'a','c','c','d']) +([NULL,'a','c','c','d']) +([NULL,NULL,NULL]) +[1,2,3,NULL,NULL] +['a','e','b','c','d'] +(['George','John','Paul','Ringo']) +(['blue','green','yellow',NULL,NULL]) +([NULL,NULL,'blue','green','yellow']) +-- array_sort_desc() +(['d','c','c','a',NULL]) +([4,3,2,1]) +['c','b','a'] +(['r','q','p'],['world','clickhouse','hello']) +([NULL,'d','c','c','a']) +([NULL,'d','c','c','a']) +([NULL,NULL,NULL]) +[3,2,1,NULL,NULL] +['d','c','b','e','a'] +(['Ringo','Paul','John','George']) +(['yellow','green','blue',NULL,NULL]) +([NULL,NULL,'yellow','green','blue']) +-- jaccard_index() +0.75 +0 +0 +nan +0 +0.75 +0.25 +-- pack_array() +1 2 4 [1,2,4] +['ab','0.0.0.42','4.2'] +-- repeat() +[] +[1,1,1] +['asd','asd','asd'] +[86400,86400,86400] +[true,true,true] +[NULL] +[NULL] +-- set_difference() +[] +[] +[] +[] +[4,5,6] +[4] +[1,3] +[1,2,3] +['d','s'] +['Chewbacca','Han Solo'] +-- set_has_element() +0 +1 +0 +1 +0 +-- set_intersect() +[] +[1,2,3] +[1,2,3] +[] +[5] +[] +['a'] +['Darth Vader'] +-- set_union() +[] +[1,2,3] +[1,2,3,4,5,6] +[1,2,3,4] +[1,2,3,4,5] +[1,2,3] +['a','d','f','s'] +['Chewbacca','Darth Sidious','Darth Vader','Han Solo'] +-- zip() +[] +[[1,2],[3,4],[5,6]] +[['Darth','Vader','has a suit'],['Master','Yoda','doesn\'t have a suit']] +[[1,10],[2,20],[3,NULL]] +[[NULL,1],[NULL,2],[NULL,3]] diff --git a/tests/queries/0_stateless/02366_kql_func_dynamic.sql b/tests/queries/0_stateless/02366_kql_func_dynamic.sql new file mode 100644 index 00000000000..b0956f032d0 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_func_dynamic.sql @@ -0,0 +1,161 @@ +DROP TABLE IF EXISTS array_test; +CREATE TABLE array_test (floats Array(Float64), + strings Array(String), + nullable_strings Array(Nullable(String)) + ) ENGINE=Memory; +INSERT INTO array_test VALUES([1.0, 2.5], ['a', 'c'], ['A', NULL, 'C']); +set dialect = 'kusto'; +print '-- constant index value'; +array_test | project floats[0], strings[1], nullable_strings; +print '-- array_length()'; +print array_length(dynamic(['John', 'Denver', 'Bob', 'Marley'])) == 4; +print array_length(dynamic([1, 2, 3])) == 3; +print '-- array_sum()'; +print array_sum(dynamic([2, 5, 3])) == 10; +print array_sum(dynamic([2.5, 5.5, 3])) == 11; +print '-- array_index_of()'; +print array_index_of(dynamic(['John', 'Denver', 'Bob', 'Marley']), 'Marley'); +print array_index_of(dynamic([1, 2, 3]), 2); +print '-- array_iif()'; +print array_iif(dynamic([true,false,true]), dynamic([1,2,3]), dynamic([4,5,6])); +print array_iif(dynamic([1,0,1]), dynamic([1,2,3]), dynamic([4,5,6])); +print array_iif(dynamic([true,false,true]), dynamic([1,2]), dynamic([4,5,6])); +print array_iif(dynamic(['a','b','c']), dynamic([1,2,3]), dynamic([4,5,6])); +print '-- array_concat()'; +print array_concat(dynamic([1,2,3]),dynamic([4,5,6])); +print '-- array_reverse()'; +print array_reverse(dynamic([])); +print array_reverse(dynamic([1])); +print array_reverse(dynamic([1,2,3,4])); +print array_reverse(dynamic(["this", "is", "an", "example"])); +print '-- array_rotate_left()'; +print array_rotate_left(dynamic([]), 0); +print array_rotate_left(dynamic([]), 500); +print array_rotate_left(dynamic([]), -500); +print array_rotate_left(dynamic([1,2,3,4,5]), 2); +print array_rotate_left(dynamic([1,2,3,4,5]), 5); +print array_rotate_left(dynamic([1,2,3,4,5]), 7); +print array_rotate_left(dynamic([1,2,3,4,5]), -2); +print array_rotate_left(dynamic([1,2,3,4,5]), -5); +print array_rotate_left(dynamic([1,2,3,4,5]), -7); +print '-- array_rotate_right()'; +print array_rotate_right(dynamic([]), 0); +print array_rotate_right(dynamic([]), 500); +print array_rotate_right(dynamic([]), -500); +print array_rotate_right(dynamic([1,2,3,4,5]), 2); +print array_rotate_right(dynamic([1,2,3,4,5]), 5); +print array_rotate_right(dynamic([1,2,3,4,5]), 7); +print array_rotate_right(dynamic([1,2,3,4,5]), -2); +print array_rotate_right(dynamic([1,2,3,4,5]), -5); +print array_rotate_right(dynamic([1,2,3,4,5]), -7); +print '-- array_shift_left()'; +print array_shift_left(dynamic([]), 0); +print array_shift_left(dynamic([]), 555); +print array_shift_left(dynamic([]), -555); +print array_shift_left(dynamic([1,2,3,4,5]), 2); +print array_shift_left(dynamic([1,2,3,4,5]), -2); +print array_shift_left(dynamic([1,2,3,4,5]), 2, -1); +print array_shift_left(dynamic(['a', 'b', 'c']), 2); +print '-- array_shift_right()'; +print array_shift_left(dynamic([]), 0); +print array_shift_left(dynamic([]), 555); +print array_shift_left(dynamic([]), -555); +print array_shift_right(dynamic([1,2,3,4,5]), -2); +print array_shift_right(dynamic([1,2,3,4,5]), 2); +print array_shift_right(dynamic([1,2,3,4,5]), -2, -1); +print array_shift_right(dynamic(['a', 'b', 'c']), -2); +print '-- array_slice()'; +--print array_slice(dynamic([1,2,3]), 1, 2); -- will enable whe analyzer dixed +print array_slice(dynamic([1,2,3,4,5]), -3, -2); +print '-- array_split()'; +print array_split(dynamic([1,2,3,4,5]), dynamic([1,-2])); +print array_split(dynamic([1,2,3,4,5]), 2); +print array_split(dynamic([1,2,3,4,5]), dynamic([1,3])); +print array_split(dynamic([1,2,3,4,5]), dynamic([-1,-2])); +print '-- array_sort_asc()'; +print array_sort_asc(dynamic([null, 'd', 'a', 'c', 'c'])); +print array_sort_asc(dynamic([4, 1, 3, 2])); +print array_sort_asc(dynamic(['b', 'a', 'c']), dynamic(['q', 'p', 'r']))[0]; +print array_sort_asc(dynamic(['q', 'p', 'r']), dynamic(['clickhouse','hello', 'world'])); +print array_sort_asc( dynamic(['d', null, 'a', 'c', 'c']) , false); +print array_sort_asc( dynamic(['d', null, 'a', 'c', 'c']) , 1 > 2); +print array_sort_asc( dynamic([null, null, null]) , false); +print array_sort_asc(dynamic([2, 1, null,3, null]), dynamic([20, 10, 40, 30, 50]), 1 < 2)[0]; +print array_sort_asc(dynamic(['1','3','4','5','2']),dynamic(["a","b","c","d","e"]), dynamic(["a","b","c","d","e"]), dynamic(["a","b","c","d","e"]))[3]; +print array_sort_asc(split("John,Paul,George,Ringo", ",")); +print array_sort_asc(dynamic([null,"blue","yellow","green",null])); +print array_sort_asc(dynamic([null,"blue","yellow","green",null]), false); +print '-- array_sort_desc()'; +print array_sort_desc(dynamic([null, 'd', 'a', 'c', 'c'])); +print array_sort_desc(dynamic([4, 1, 3, 2])); +print array_sort_desc(dynamic(['b', 'a', 'c']), dynamic(['q', 'p', 'r']))[0]; +print array_sort_desc(dynamic(['q', 'p', 'r']), dynamic(['clickhouse','hello', 'world'])); +print array_sort_desc( dynamic(['d', null, 'a', 'c', 'c']) , false); +print array_sort_desc( dynamic(['d', null, 'a', 'c', 'c']) , 1 > 2); +print array_sort_desc( dynamic([null, null, null]) , false); +print array_sort_desc(dynamic([2, 1, null,3, null]), dynamic([20, 10, 40, 30, 50]), 1 < 2)[0]; +print array_sort_desc(dynamic(['1','3','4','5','2']),dynamic(["a","b","c","d","e"]), dynamic(["a","b","c","d","e"]), dynamic(["a","b","c","d","e"]))[3]; +print array_sort_desc(split("John,Paul,George,Ringo", ",")); +print array_sort_desc(dynamic([null,"blue","yellow","green",null])); +print array_sort_desc(dynamic([null,"blue","yellow","green",null]), false); +print '-- jaccard_index()'; +print jaccard_index(dynamic([1, 1, 2, 2, 3, 3]), dynamic([1, 2, 3, 4, 4, 4])); +print jaccard_index(dynamic([1, 2, 3]), dynamic([])); +print jaccard_index(dynamic([]), dynamic([1, 2, 3, 4])); +print jaccard_index(dynamic([]), dynamic([])); +print jaccard_index(dynamic([1, 2, 3]), dynamic([4, 5, 6, 7])); +print jaccard_index(dynamic(['a', 's', 'd']), dynamic(['f', 'd', 's', 'a'])); +print jaccard_index(dynamic(['Chewbacca', 'Darth Vader', 'Han Solo']), dynamic(['Darth Sidious', 'Darth Vader'])); +print '-- pack_array()'; +print pack_array(); -- { clientError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +print x = 1 | extend y = x * 2 | extend z = y * 2 | extend pack_array(x,y,z); +print pack_array(strcat('a', 'b'), format_ipv4(42), tostring(4.2)); +print '-- repeat()'; +print repeat(1, 0); +print repeat(1, 3); +print repeat("asd", 3); +print repeat(timespan(1d), 3); +print repeat(true, 3); +print repeat(1, -3); +print repeat(6.7,-4); +print '-- set_difference()'; +print set_difference(dynamic([]), dynamic([])); +print set_difference(dynamic([]), dynamic([9])); +print set_difference(dynamic([]), dynamic(["asd"])); +print set_difference(dynamic([1, 1, 2, 2, 3, 3]), dynamic([1, 2, 3])); +print array_sort_asc(set_difference(dynamic([1, 4, 2, 3, 5, 4, 6]), dynamic([1, 2, 3])))[0]; +print set_difference(dynamic([4]), dynamic([1, 2, 3])); +print array_sort_asc(set_difference(dynamic([1, 2, 3, 4, 5]), dynamic([5]), dynamic([2, 4])))[0]; +print array_sort_asc(set_difference(dynamic([1, 2, 3]), dynamic([])))[0]; +print array_sort_asc(set_difference(dynamic(['a', 's', 'd']), dynamic(['a', 'f'])))[0]; +print array_sort_asc(set_difference(dynamic(['Chewbacca', 'Darth Vader', 'Han Solo']), dynamic(['Darth Sidious', 'Darth Vader'])))[0]; +print '-- set_has_element()'; +print set_has_element(dynamic([]), 9); +print set_has_element(dynamic(["this", "is", "an", "example"]), "example"); +print set_has_element(dynamic(["this", "is", "an", "example"]), "examplee"); +print set_has_element(dynamic([1, 2, 3]), 2); +print set_has_element(dynamic([1, 2, 3, 4.2]), 4); +print '-- set_intersect()'; +print set_intersect(dynamic([]), dynamic([])); +print array_sort_asc(set_intersect(dynamic([1, 1, 2, 2, 3, 3]), dynamic([1, 2, 3])))[0]; +print array_sort_asc(set_intersect(dynamic([1, 4, 2, 3, 5, 4, 6]), dynamic([1, 2, 3])))[0]; +print set_intersect(dynamic([4]), dynamic([1, 2, 3])); +print set_intersect(dynamic([1, 2, 3, 4, 5]), dynamic([1, 3, 5]), dynamic([2, 5])); +print set_intersect(dynamic([1, 2, 3]), dynamic([])); +print set_intersect(dynamic(['a', 's', 'd']), dynamic(['a', 'f'])); +print set_intersect(dynamic(['Chewbacca', 'Darth Vader', 'Han Solo']), dynamic(['Darth Sidious', 'Darth Vader'])); +print '-- set_union()'; +print set_union(dynamic([]), dynamic([])); +print array_sort_asc(set_union(dynamic([1, 1, 2, 2, 3, 3]), dynamic([1, 2, 3])))[0]; +print array_sort_asc(set_union(dynamic([1, 4, 2, 3, 5, 4, 6]), dynamic([1, 2, 3])))[0]; +print array_sort_asc(set_union(dynamic([4]), dynamic([1, 2, 3])))[0]; +print array_sort_asc(set_union(dynamic([1, 3, 4]), dynamic([5]), dynamic([2, 4])))[0]; +print array_sort_asc(set_union(dynamic([1, 2, 3]), dynamic([])))[0]; +print array_sort_asc(set_union(dynamic(['a', 's', 'd']), dynamic(['a', 'f'])))[0]; +print array_sort_asc(set_union(dynamic(['Chewbacca', 'Darth Vader', 'Han Solo']), dynamic(['Darth Sidious', 'Darth Vader'])))[0]; +print '-- zip()'; +print zip(dynamic([]), dynamic([])); +print zip(dynamic([1,3,5]), dynamic([2,4,6])); +print zip(dynamic(['Darth','Master']), dynamic(['Vader','Yoda']), dynamic(['has a suit','doesn\'t have a suit'])); +print zip(dynamic([1,2,3]), dynamic([10,20])); +print zip(dynamic([]), dynamic([1,2,3])); \ No newline at end of file diff --git a/tests/queries/0_stateless/02366_kql_func_ip.reference b/tests/queries/0_stateless/02366_kql_func_ip.reference new file mode 100644 index 00000000000..2a0bbf53fff --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_func_ip.reference @@ -0,0 +1,123 @@ +-- ipv4_is_private(\'127.0.0.1\') +0 +-- ipv4_is_private(\'10.1.2.3\') +1 +-- ipv4_is_private(\'192.168.1.1/24\') +1 +ipv4_is_private(strcat(\'192.\',\'168.\',\'1.\',\'1\',\'/24\')) +1 +-- ipv4_is_private(\'abc\') +\N +-- ipv4_netmask_suffix(\'192.168.1.1/24\') +24 +-- ipv4_netmask_suffix(\'192.168.1.1\') +32 +-- ipv4_netmask_suffix(\'127.0.0.1/16\') +16 +-- ipv4_netmask_suffix(\'abc\') +\N +ipv4_netmask_suffix(strcat(\'127.\', \'0.\', \'0.1/16\')) +16 +-- ipv4_is_in_range(\'127.0.0.1\', \'127.0.0.1\') +1 +-- ipv4_is_in_range(\'192.168.1.6\', \'192.168.1.1/24\') +1 +-- ipv4_is_in_range(\'192.168.1.1\', \'192.168.2.1/24\') +0 +-- ipv4_is_in_range(strcat(\'192.\',\'168.\', \'1.1\'), \'192.168.2.1/24\') +0 +-- ipv4_is_in_range(\'abc\', \'127.0.0.1\') +\N +-- parse_ipv6(127.0.0.1) +0000:0000:0000:0000:0000:ffff:7f00:0001 +-- parse_ipv6(fe80::85d:e82c:9446:7994) +fe80:0000:0000:0000:085d:e82c:9446:7994 +-- parse_ipv4(\'127.0.0.1\') +2130706433 +-- parse_ipv4(\'192.1.168.1\') < parse_ipv4(\'192.1.168.2\') +1 +-- parse_ipv4(arrayStringConcat([\'127\', \'0\', \'0\', \'1\'], \'.\')) +-- parse_ipv4_mask(\'127.0.0.1\', 24) == 2130706432 +2130706432 +-- parse_ipv4_mask(\'abc\', 31) +\N +\N +-- parse_ipv4_mask(\'192.1.168.2\', 31) == parse_ipv4_mask(\'192.1.168.3\', 31) +3221334018 +3221334018 +-- ipv4_is_match(\'127.0.0.1\', \'127.0.0.1\') +1 +-- ipv4_is_match(\'192.168.1.1\', \'192.168.1.255\') +0 +-- ipv4_is_match(\'192.168.1.1/24\', \'192.168.1.255/24\') +1 +-- ipv4_is_match(\'192.168.1.1\', \'192.168.1.255\', 24) +1 +-- ipv4_is_match(\'abc\', \'def\', 24) +\N +-- ipv4_compare() +0 +-1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +-- format_ipv4() +192.168.1.0 +192.168.1.1 +192.168.1.0 +192.168.1.0 +1 +1 +127.0.0.0 +-- format_ipv4_mask() +192.168.1.0/24 +192.168.1.0/24 +192.168.1.0/24 +192.168.1.1/32 +192.168.1.0/24 +1 +1 +127.0.0.0/24 +-- parse_ipv6_mask() +0000:0000:0000:0000:0000:0000:0000:0000 +fe80:0000:0000:0000:085d:e82c:9446:7900 +0000:0000:0000:0000:0000:ffff:c0a8:ff00 +0000:0000:0000:0000:0000:ffff:c0a8:ff00 +0000:0000:0000:0000:0000:ffff:ffff:ffff +fe80:0000:0000:0000:085d:e82c:9446:7994 +fe80:0000:0000:0000:085d:e82c:9446:7900 +0000:0000:0000:0000:0000:ffff:c0a8:ffff +0000:0000:0000:0000:0000:ffff:c0a8:ff00 +-- ipv6_is_match() +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 diff --git a/tests/queries/0_stateless/02366_kql_func_ip.sql b/tests/queries/0_stateless/02366_kql_func_ip.sql new file mode 100644 index 00000000000..c9b335f203a --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_func_ip.sql @@ -0,0 +1,131 @@ +set dialect='kusto'; +print '-- ipv4_is_private(\'127.0.0.1\')'; +print ipv4_is_private('127.0.0.1'); +print '-- ipv4_is_private(\'10.1.2.3\')'; +print ipv4_is_private('10.1.2.3'); +print '-- ipv4_is_private(\'192.168.1.1/24\')'; +print ipv4_is_private('192.168.1.1/24'); +print 'ipv4_is_private(strcat(\'192.\',\'168.\',\'1.\',\'1\',\'/24\'))'; +print ipv4_is_private(strcat('192.','168.','1.','1','/24')); +print '-- ipv4_is_private(\'abc\')'; +print ipv4_is_private('abc'); -- == null + +print '-- ipv4_netmask_suffix(\'192.168.1.1/24\')'; +print ipv4_netmask_suffix('192.168.1.1/24'); -- == 24 +print '-- ipv4_netmask_suffix(\'192.168.1.1\')'; +print ipv4_netmask_suffix('192.168.1.1'); -- == 32 +print '-- ipv4_netmask_suffix(\'127.0.0.1/16\')'; +print ipv4_netmask_suffix('127.0.0.1/16'); -- == 16 +print '-- ipv4_netmask_suffix(\'abc\')'; +print ipv4_netmask_suffix('abc'); -- == null +print 'ipv4_netmask_suffix(strcat(\'127.\', \'0.\', \'0.1/16\'))'; +print ipv4_netmask_suffix(strcat('127.', '0.', '0.1/16')); -- == 16 + +print '-- ipv4_is_in_range(\'127.0.0.1\', \'127.0.0.1\')'; +print ipv4_is_in_range('127.0.0.1', '127.0.0.1'); -- == true +print '-- ipv4_is_in_range(\'192.168.1.6\', \'192.168.1.1/24\')'; +print ipv4_is_in_range('192.168.1.6', '192.168.1.1/24'); -- == true +print '-- ipv4_is_in_range(\'192.168.1.1\', \'192.168.2.1/24\')'; +print ipv4_is_in_range('192.168.1.1', '192.168.2.1/24'); -- == false +print '-- ipv4_is_in_range(strcat(\'192.\',\'168.\', \'1.1\'), \'192.168.2.1/24\')'; +print ipv4_is_in_range(strcat('192.','168.', '1.1'), '192.168.2.1/24'); -- == false +print '-- ipv4_is_in_range(\'abc\', \'127.0.0.1\')'; -- == null +print ipv4_is_in_range('abc', '127.0.0.1'); + +print '-- parse_ipv6(127.0.0.1)'; +print parse_ipv6('127.0.0.1'); +print '-- parse_ipv6(fe80::85d:e82c:9446:7994)'; +print parse_ipv6('fe80::85d:e82c:9446:7994'); +print '-- parse_ipv4(\'127.0.0.1\')'; +print parse_ipv4('127.0.0.1'); +print '-- parse_ipv4(\'192.1.168.1\') < parse_ipv4(\'192.1.168.2\')'; +print parse_ipv4('192.1.168.1') < parse_ipv4('192.1.168.2'); +print '-- parse_ipv4(arrayStringConcat([\'127\', \'0\', \'0\', \'1\'], \'.\'))'; +print parse_ipv4(arrayStringConcat(['127', '0', '0', '1'], '.')); -- { clientError UNKNOWN_FUNCTION } + +print '-- parse_ipv4_mask(\'127.0.0.1\', 24) == 2130706432'; +print parse_ipv4_mask('127.0.0.1', 24); +print '-- parse_ipv4_mask(\'abc\', 31)'; +print parse_ipv4_mask('abc', 31) +print '-- parse_ipv4_mask(\'192.1.168.2\', 1000)'; +print parse_ipv4_mask('192.1.168.2', 1000); +print '-- parse_ipv4_mask(\'192.1.168.2\', 31) == parse_ipv4_mask(\'192.1.168.3\', 31)'; +--print parse_ipv4_mask('192.1.168.2', 31) == parse_ipv4_mask('192.1.168.3', 31); // this qual failed in analyzer 3221334018 +print parse_ipv4_mask('192.1.168.2', 31); +print parse_ipv4_mask('192.1.168.3', 31); +print '-- ipv4_is_match(\'127.0.0.1\', \'127.0.0.1\')'; +print ipv4_is_match('127.0.0.1', '127.0.0.1'); +print '-- ipv4_is_match(\'192.168.1.1\', \'192.168.1.255\')'; +print ipv4_is_match('192.168.1.1', '192.168.1.255'); +print '-- ipv4_is_match(\'192.168.1.1/24\', \'192.168.1.255/24\')'; +print ipv4_is_match('192.168.1.1/24', '192.168.1.255/24'); +print '-- ipv4_is_match(\'192.168.1.1\', \'192.168.1.255\', 24)'; +print ipv4_is_match('192.168.1.1', '192.168.1.255', 24); +print '-- ipv4_is_match(\'abc\', \'def\', 24)'; +print ipv4_is_match('abc', 'dev', 24); +print '-- ipv4_compare()'; +print ipv4_compare('127.0.0.1', '127.0.0.1'); +print ipv4_compare('192.168.1.1', '192.168.1.255'); +print ipv4_compare('192.168.1.255', '192.168.1.1'); +print ipv4_compare('192.168.1.1/24', '192.168.1.255/24'); +print ipv4_compare('192.168.1.1', '192.168.1.255', 24); +print ipv4_compare('192.168.1.1/24', '192.168.1.255'); +print ipv4_compare('192.168.1.1', '192.168.1.255/24'); +print ipv4_compare('192.168.1.1/30', '192.168.1.255/24'); +print ipv4_compare('192.168.1.1', '192.168.1.0', 31); +print ipv4_compare('192.168.1.1/24', '192.168.1.255', 31); +print ipv4_compare('192.168.1.1', '192.168.1.255', 24); +print '-- format_ipv4()'; +print format_ipv4('192.168.1.255', 24); +print format_ipv4('192.168.1.1', 32); +print format_ipv4('192.168.1.1/24', 32); +print format_ipv4(3232236031, 24); +print format_ipv4('192.168.1.1/24', -1) == ''; +print format_ipv4('abc', 24) == ''; +print format_ipv4(strcat('127.0', '.0.', '1', '/32'), 12 + 12); +print '-- format_ipv4_mask()'; +print format_ipv4_mask('192.168.1.255', 24); +print format_ipv4_mask(3232236031, 24); +print format_ipv4_mask('192.168.1.1', 24); +print format_ipv4_mask('192.168.1.1', 32); +print format_ipv4_mask('192.168.1.1/24', 32); +print format_ipv4_mask('192.168.1.1/24', -1) == ''; +print format_ipv4_mask('abc', 24) == ''; +print format_ipv4_mask(strcat('127.0', '.0.', '1', '/32'), 12 + 12); +print '-- parse_ipv6_mask()'; +print parse_ipv6_mask("127.0.0.1", 24); +print parse_ipv6_mask("fe80::85d:e82c:9446:7994", 120); +print parse_ipv6_mask("192.168.255.255", 120); +print parse_ipv6_mask("192.168.255.255/24", 124); +print parse_ipv6_mask("255.255.255.255", 128); +print parse_ipv6_mask("fe80::85d:e82c:9446:7994", 128); +print parse_ipv6_mask("fe80::85d:e82c:9446:7994/120", 124); +print parse_ipv6_mask("::192.168.255.255", 128); +print parse_ipv6_mask("::192.168.255.255/24", 128); +print '-- ipv6_is_match()'; +print ipv6_is_match('::ffff:7f00:1', '127.0.0.1') == true; +print ipv6_is_match('fe80::85d:e82c:9446:7994', 'fe80::85d:e82c:9446:7995') == false; +print ipv6_is_match('192.168.1.1/24', '192.168.1.255/24') == true; +print ipv6_is_match('fe80::85d:e82c:9446:7994/127', 'fe80::85d:e82c:9446:7995/127') == true; +print ipv6_is_match('fe80::85d:e82c:9446:7994', 'fe80::85d:e82c:9446:7995', 127) == true; +print ipv6_is_match('192.168.1.1', '192.168.1.1'); -- // Equal IPs +print ipv6_is_match('192.168.1.1/24', '192.168.1.255'); -- // 24 bit IP4-prefix is used for comparison +print ipv6_is_match('192.168.1.1', '192.168.1.255/24'); -- // 24 bit IP4-prefix is used for comparison +print ipv6_is_match('192.168.1.1/30', '192.168.1.255/24'); -- // 24 bit IP4-prefix is used for comparison +print ipv6_is_match('fe80::85d:e82c:9446:7994', 'fe80::85d:e82c:9446:7994'); -- // Equal IPs +print ipv6_is_match('fe80::85d:e82c:9446:7994/120', 'fe80::85d:e82c:9446:7998'); -- // 120 bit IP6-prefix is used for comparison +print ipv6_is_match('fe80::85d:e82c:9446:7994', 'fe80::85d:e82c:9446:7998/120'); -- // 120 bit IP6-prefix is used for comparison +print ipv6_is_match('fe80::85d:e82c:9446:7994/120', 'fe80::85d:e82c:9446:7998/120'); -- // 120 bit IP6-prefix is used for comparison +print ipv6_is_match('192.168.1.1', '::ffff:c0a8:0101'); -- // Equal IPs +print ipv6_is_match('192.168.1.1/24', '::ffff:c0a8:01ff'); -- // 24 bit IP-prefix is used for comparison +print ipv6_is_match('::ffff:c0a8:0101', '192.168.1.255/24'); -- // 24 bit IP-prefix is used for comparison +print ipv6_is_match('::192.168.1.1/30', '192.168.1.255/24'); -- // 24 bit IP-prefix is used for comparison +print ipv6_is_match('192.168.1.1', '192.168.1.0', 31); -- // 31 bit IP4-prefix is used for comparison +print ipv6_is_match('192.168.1.1/24', '192.168.1.255', 31); -- // 24 bit IP4-prefix is used for comparison +print ipv6_is_match('192.168.1.1', '192.168.1.255', 24); -- // 24 bit IP4-prefix is used for comparison +print ipv6_is_match('fe80::85d:e82c:9446:7994', 'fe80::85d:e82c:9446:7995', 127); -- // 127 bit IP6-prefix is used for comparison +print ipv6_is_match('fe80::85d:e82c:9446:7994/127', 'fe80::85d:e82c:9446:7998', 120); -- // 120 bit IP6-prefix is used for comparison +print ipv6_is_match('fe80::85d:e82c:9446:7994/120', 'fe80::85d:e82c:9446:7998', 127); -- // 120 bit IP6-prefix is used for comparison +print ipv6_is_match('192.168.1.1/24', '::ffff:c0a8:01ff', 127); -- // 127 bit IP6-prefix is used for comparison +print ipv6_is_match('::ffff:c0a8:0101', '192.168.1.255', 120); -- // 120 bit IP6-prefix is used for comparison +print ipv6_is_match('::192.168.1.1/30', '192.168.1.255/24', 127); -- // 120 bit IP6-prefix is used for comparison \ No newline at end of file diff --git a/tests/queries/0_stateless/02366_kql_func_math.reference b/tests/queries/0_stateless/02366_kql_func_math.reference new file mode 100644 index 00000000000..92f283abcb6 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_func_math.reference @@ -0,0 +1,4 @@ +-- isnan -- +1 +0 +0 diff --git a/tests/queries/0_stateless/02366_kql_func_math.sql b/tests/queries/0_stateless/02366_kql_func_math.sql new file mode 100644 index 00000000000..4e83622eb6b --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_func_math.sql @@ -0,0 +1,7 @@ +set dialect = 'kusto'; +print '-- isnan --'; +print isnan(double(nan)); +print isnan(4.2); +print isnan(4); -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } +print isnan(real(+inf)); +print isnan(dynamic(null)); -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } diff --git a/tests/queries/0_stateless/02366_kql_func_scalar.reference b/tests/queries/0_stateless/02366_kql_func_scalar.reference new file mode 100644 index 00000000000..b7fa62c5d43 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_func_scalar.reference @@ -0,0 +1,16 @@ +-- bin_at() +4.5 +-12:0:0 +2017-05-14 12:00:00.000000000 +2017-05-14 00:00:00.000000000 +2018-02-25 15:14:00.000000000 5 +2018-02-24 15:14:00.000000000 3 +2018-02-23 15:14:00.000000000 4 +-- bin() +4 +1970-05-11 00:00:00.000000000 +336:0:0 +1970-05-11 13:45:07.345000000 +1970-05-11 13:45:07.345623000 +2022-09-26 10:13:23.987232000 +1970-05-11 13:45:07.456336000 diff --git a/tests/queries/0_stateless/02366_kql_func_scalar.sql b/tests/queries/0_stateless/02366_kql_func_scalar.sql new file mode 100644 index 00000000000..d7e94cfd9d1 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_func_scalar.sql @@ -0,0 +1,26 @@ +DROP TABLE IF EXISTS Bin_at_test; +CREATE TABLE Bin_at_test +( + `Date` DateTime('UTC'), + Num Nullable(UInt8) +) ENGINE = Memory; +INSERT INTO Bin_at_test VALUES ('2018-02-24T15:14:01',3), ('2018-02-23T16:14:01',4), ('2018-02-26T15:14:01',5); + +set dialect = 'kusto'; +print '-- bin_at()'; +print bin_at(6.5, 2.5, 7); +print bin_at(1h, 1d, 12h); +print bin_at(datetime(2017-05-15 10:20:00.0), 1d, datetime(1970-01-01 12:00:00.0)); +print bin_at(datetime(2017-05-17 10:20:00.0), 7d, datetime(2017-06-04 00:00:00.0)); +Bin_at_test | summarize sum(Num) by d = todatetime(bin_at(Date, 1d, datetime('2018-02-24 15:14:00'))) | order by d; +print '-- bin()'; +print bin(4.5, 1); +print bin(datetime(1970-05-11 13:45:07), 1d); +print bin(16d, 7d); +print bin(datetime(1970-05-11 13:45:07.345623), 1ms); +-- print bin(datetime(2022-09-26 10:13:23.987234), 6ms); -> 2022-09-26 10:13:23.982000000 +print bin(datetime(1970-05-11 13:45:07.345623), 1microsecond); +print bin(datetime(2022-09-26 10:13:23.987234), 6microseconds); +print bin(datetime(1970-05-11 13:45:07.456345672), 16microseconds); +-- print bin(datetime(2022-09-26 10:13:23.987234128), 1tick); -> 2022-09-26 10:13:23.987234100 +-- print bin(datetime(2022-09-26 10:13:23.987234128), 99nanosecond); -> null diff --git a/tests/queries/0_stateless/02366_kql_func_string.reference b/tests/queries/0_stateless/02366_kql_func_string.reference new file mode 100644 index 00000000000..9bdd38ca5db --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_func_string.reference @@ -0,0 +1,360 @@ +-- test String Functions -- +-- Customers |where Education contains \'degree\' +Latoya Shen Professional Graduate Degree 25 +Peter Nara Skilled Manual Graduate Degree 26 + +-- Customers |where Education !contains \'degree\' +\N why Professional Partial College 38 +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Apple Skilled Manual Bachelors 28 + +-- Customers |where Education contains \'Degree\' +Latoya Shen Professional Graduate Degree 25 +Peter Nara Skilled Manual Graduate Degree 26 + +-- Customers |where Education !contains \'Degree\' +\N why Professional Partial College 38 +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Apple Skilled Manual Bachelors 28 + +-- Customers | where FirstName endswith \'RE\' +Theodore Diaz Skilled Manual Bachelors 28 + +-- Customers | where ! FirstName endswith \'RE\' +Latoya Shen Professional Graduate Degree 25 +Peter Nara Skilled Manual Graduate Degree 26 +Stephanie Cox Management abcd defg Bachelors 33 +Apple Skilled Manual Bachelors 28 + +--Customers | where FirstName endswith_cs \'re\' +Theodore Diaz Skilled Manual Bachelors 28 + +-- Customers | where FirstName !endswith_cs \'re\' +Latoya Shen Professional Graduate Degree 25 +Peter Nara Skilled Manual Graduate Degree 26 +Stephanie Cox Management abcd defg Bachelors 33 +Apple Skilled Manual Bachelors 28 + +-- Customers | where Occupation == \'Skilled Manual\' +Peter Nara Skilled Manual Graduate Degree 26 +Theodore Diaz Skilled Manual Bachelors 28 +Apple Skilled Manual Bachelors 28 + +-- Customers | where Occupation != \'Skilled Manual\' +\N why Professional Partial College 38 +Latoya Shen Professional Graduate Degree 25 +Stephanie Cox Management abcd defg Bachelors 33 + +-- Customers | where Occupation has \'skilled\' +Peter Nara Skilled Manual Graduate Degree 26 +Theodore Diaz Skilled Manual Bachelors 28 +Apple Skilled Manual Bachelors 28 + +-- Customers | where Occupation !has \'skilled\' +\N why Professional Partial College 38 +Latoya Shen Professional Graduate Degree 25 +Stephanie Cox Management abcd defg Bachelors 33 + +-- Customers | where Occupation has \'Skilled\' +Peter Nara Skilled Manual Graduate Degree 26 +Theodore Diaz Skilled Manual Bachelors 28 +Apple Skilled Manual Bachelors 28 + +-- Customers | where Occupation !has \'Skilled\' +\N why Professional Partial College 38 +Latoya Shen Professional Graduate Degree 25 +Stephanie Cox Management abcd defg Bachelors 33 + +-- Customers | where Occupation hasprefix_cs \'Ab\' + +-- Customers | where Occupation !hasprefix_cs \'Ab\' +\N why Professional Partial College 38 +Latoya Shen Professional Graduate Degree 25 +Peter Nara Skilled Manual Graduate Degree 26 +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Apple Skilled Manual Bachelors 28 + +-- Customers | where Occupation hasprefix_cs \'ab\' +Stephanie Cox Management abcd defg Bachelors 33 + +-- Customers | where Occupation !hasprefix_cs \'ab\' +\N why Professional Partial College 38 +Latoya Shen Professional Graduate Degree 25 +Peter Nara Skilled Manual Graduate Degree 26 +Theodore Diaz Skilled Manual Bachelors 28 +Apple Skilled Manual Bachelors 28 + +-- Customers | where Occupation hassuffix \'Ent\' +Stephanie Cox Management abcd defg Bachelors 33 + +-- Customers | where Occupation !hassuffix \'Ent\' +\N why Professional Partial College 38 +Latoya Shen Professional Graduate Degree 25 +Peter Nara Skilled Manual Graduate Degree 26 +Theodore Diaz Skilled Manual Bachelors 28 +Apple Skilled Manual Bachelors 28 + +-- Customers | where Occupation hassuffix \'ent\' +Stephanie Cox Management abcd defg Bachelors 33 + +-- Customers | where Occupation hassuffix \'ent\' +Stephanie Cox Management abcd defg Bachelors 33 + +-- Customers |where Education in (\'Bachelors\',\'High School\') +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Apple Skilled Manual Bachelors 28 + +-- Customers | where Education !in (\'Bachelors\',\'High School\') +\N why Professional Partial College 38 +Latoya Shen Professional Graduate Degree 25 +Peter Nara Skilled Manual Graduate Degree 26 + +-- Customers | where FirstName matches regex \'P.*r\' +Peter Nara Skilled Manual Graduate Degree 26 + +-- Customers | where FirstName startswith \'pet\' +Peter Nara Skilled Manual Graduate Degree 26 + +-- Customers | where FirstName !startswith \'pet\' +Latoya Shen Professional Graduate Degree 25 +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Apple Skilled Manual Bachelors 28 + +-- Customers | where FirstName startswith_cs \'pet\' + +-- Customers | where FirstName !startswith_cs \'pet\' +Latoya Shen Professional Graduate Degree 25 +Peter Nara Skilled Manual Graduate Degree 26 +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Apple Skilled Manual Bachelors 28 + +-- Customers | where isempty(LastName) +Apple Skilled Manual Bachelors 28 + +-- Customers | where isnotempty(LastName) +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Peter Nara Skilled Manual Graduate Degree 26 +Latoya Shen Professional Graduate Degree 25 +\N why Professional Partial College 38 + +-- Customers | where isnotnull(FirstName) +Latoya Shen Professional Graduate Degree 25 +Peter Nara Skilled Manual Graduate Degree 26 +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Apple Skilled Manual Bachelors 28 + +-- Customers | where isnull(FirstName) +\N why Professional Partial College 38 + +-- Customers | project url_decode(\'https%3A%2F%2Fwww.test.com%2Fhello%20word\') | take 1 +https://www.test.com/hello word + +-- Customers | project url_encode(\'https://www.test.com/hello word\') | take 1 +https%3A%2F%2Fwww.test.com%2Fhello%20word + +-- Customers | project name_abbr = strcat(substring(FirstName,0,3), \' \', substring(LastName,2)) +\N +Lat en +Pet ra +The az +Ste x +App + +-- Customers | project name = strcat(FirstName, \' \', LastName) +\N +Latoya Shen +Peter Nara +Theodore Diaz +Stephanie Cox +Apple + +-- Customers | project FirstName, strlen(FirstName) +\N \N +Latoya 6 +Peter 5 +Theodore 8 +Stephanie 9 +Apple 5 + +-- Customers | project strrep(FirstName,2,\'_\') +\N +Latoya_Latoya +Peter_Peter +Theodore_Theodore +Stephanie_Stephanie +Apple_Apple + +-- Customers | project toupper(FirstName) +\N +LATOYA +PETER +THEODORE +STEPHANIE +APPLE + +-- Customers | project tolower(FirstName) +\N +latoya +peter +theodore +stephanie +apple + +-- support subquery for in orerator (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/in-cs-operator) (subquery need to be wraped with bracket inside bracket); TODO: case-insensitive not supported yet +Latoya Shen Professional Graduate Degree 25 +Peter Nara Skilled Manual Graduate Degree 26 +Theodore Diaz Skilled Manual Bachelors 28 +Apple Skilled Manual Bachelors 28 + +-- has_all (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/has-all-operator); TODO: subquery not supported yet +Peter Nara Skilled Manual Graduate Degree 26 +Theodore Diaz Skilled Manual Bachelors 28 +Apple Skilled Manual Bachelors 28 + +-- has_any (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/has-anyoperator); TODO: subquery not supported yet +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Peter Nara Skilled Manual Graduate Degree 26 +Apple Skilled Manual Bachelors 28 + +-- countof (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/countoffunction) +3 +3 +1 + +-- extract ( https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/extractfunction) +PINEAPPLE ice cream is 20 +PINEAPPLE +20 + +20 +\N +\N +\N +\N +\N +45.6 +45.6 + +-- extract_all (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/extractallfunction); TODO: captureGroups not supported yet +[['T','h','e'],['p','ric','e'],['P','INEAPPL','E'],['i','c','e'],['c','rea','m']] + +-- extract_json (https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/extractjsonfunction) + + +John +iPhone +\N +26 +26 +26 +26 +\N + +-- split (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/splitfunction) +['aa','bb'] +['bbb'] +[''] +['a','','b'] +['aa','cc'] +['aabbcc'] +['aaa','bbb','ccc'] +[NULL] + +-- strcat_delim (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/strcat-delimfunction); TODO: only support string now. +1-2-Ab + +-- indexof (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/indexoffunction); TODO: length and occurrence not supported yet +2 +2 +-1 +-- base64_encode_fromguid() +8jMxriJurkmwahbmqbIS6w== +-- base64_decode_toarray() +[] +[75,117,115,116,111] +-- base64_decode_toguid() +10e99626-bc2b-4c75-bb3e-fe606de25700 +1 +-- base64_encode_tostring + +S3VzdG8x +-- base64_decode_tostring + +Kusto1 +-- parse_url() +{"Scheme":"scheme","Host":"","Port":"0","Path":"/this/is/a/path","Username":"username","Password":"password","Query Parameters":{"k1":"v1","k2":"v2"},"Fragment":"fragment"} +-- parse_urlquery() +{"Query Parameters":{"k1":"v1","k2":"v2","k3":"v3"}} +-- strcmp() +0 1 -1 1 +-- substring() +CD +-- translate() +kusto xxx +-- trim() +https://www.ibm.com +Te st1 + asd +asd +sd +-- trim_start() +www.ibm.com +Te st1// $ +asdw + +asd +-- trim_end() +https +- Te st1 +wasd + +asd +-- trim, trim_start, trim_end all at once +--https://bing.com-- -- https://bing.com-- --https://bing.com https://bing.com +-- replace_regex +Number was: 1 +-- has_any_index() +0 1 -1 -1 +-- parse_version() +1000000020000000300000040 +1000000020000000000000000 +1000000020000000000000000 +\N +\N +\N +\N +1000000020000000300000004 +1000000020000000000000000 +1000000020000000300000000 +1000000000000000000000000 +-- parse_json() +[1,2,3] +[{"a":123.5,"b":"{\\"c\\":456}"}] +-- parse_command_line() +[NULL] +[NULL] +-- reverse() +321 +43.321 + +dsa +][ +]3,2,1[ +]\'redaV\',\'htraD\'[ +000000000.00:00:21 51-01-7102 +Peter Nara Skilled Manual Graduate Degree 26 +Latoya Shen Professional Graduate Degree 25 +-- parse_csv() +[''] +['aaa'] +['aa','b','cc'] +['record1','a','b','c'] diff --git a/tests/queries/0_stateless/02366_kql_func_string.sql b/tests/queries/0_stateless/02366_kql_func_string.sql new file mode 100644 index 00000000000..d251b04e08b --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_func_string.sql @@ -0,0 +1,313 @@ +-- Tags: no-fasttest + +DROP TABLE IF EXISTS Customers; +CREATE TABLE Customers +( + FirstName Nullable(String), + LastName String, + Occupation String, + Education String, + Age Nullable(UInt8) +) ENGINE = Memory; + +INSERT INTO Customers VALUES ('Theodore','Diaz','Skilled Manual','Bachelors',28), ('Stephanie','Cox','Management abcd defg','Bachelors',33),('Peter','Nara','Skilled Manual','Graduate Degree',26),('Latoya','Shen','Professional','Graduate Degree',25),('Apple','','Skilled Manual','Bachelors',28),(NULL,'why','Professional','Partial College',38); + +-- datatable (Version:string) [ +-- '1.2.3.4', +-- '1.2', +-- '1.2.3', +-- '1' +-- ] + +DROP TABLE IF EXISTS Versions; +CREATE TABLE Versions +( + Version String +) ENGINE = Memory; +INSERT INTO Versions VALUES ('1.2.3.4'),('1.2'),('1.2.3'),('1'); + + +set dialect='kusto'; +print '-- test String Functions --'; + +print '-- Customers |where Education contains \'degree\''; +Customers |where Education contains 'degree' | order by LastName; +print ''; +print '-- Customers |where Education !contains \'degree\''; +Customers |where Education !contains 'degree' | order by LastName; +print ''; +print '-- Customers |where Education contains \'Degree\''; +Customers |where Education contains 'Degree' | order by LastName; +print ''; +print '-- Customers |where Education !contains \'Degree\''; +Customers |where Education !contains 'Degree' | order by LastName; +print ''; +print '-- Customers | where FirstName endswith \'RE\''; +Customers | where FirstName endswith 'RE' | order by LastName; +print ''; +print '-- Customers | where ! FirstName endswith \'RE\''; +Customers | where FirstName ! endswith 'RE' | order by LastName; +print ''; +print '--Customers | where FirstName endswith_cs \'re\''; +Customers | where FirstName endswith_cs 're' | order by LastName; +print ''; +print '-- Customers | where FirstName !endswith_cs \'re\''; +Customers | where FirstName !endswith_cs 're' | order by LastName; +print ''; +print '-- Customers | where Occupation == \'Skilled Manual\''; +Customers | where Occupation == 'Skilled Manual' | order by LastName; +print ''; +print '-- Customers | where Occupation != \'Skilled Manual\''; +Customers | where Occupation != 'Skilled Manual' | order by LastName; +print ''; +print '-- Customers | where Occupation has \'skilled\''; +Customers | where Occupation has 'skilled' | order by LastName; +print ''; +print '-- Customers | where Occupation !has \'skilled\''; +Customers | where Occupation !has 'skilled' | order by LastName; +print ''; +print '-- Customers | where Occupation has \'Skilled\''; +Customers | where Occupation has 'Skilled'| order by LastName; +print ''; +print '-- Customers | where Occupation !has \'Skilled\''; +Customers | where Occupation !has 'Skilled'| order by LastName; +print ''; +print '-- Customers | where Occupation hasprefix_cs \'Ab\''; +Customers | where Occupation hasprefix_cs 'Ab'| order by LastName; +print ''; +print '-- Customers | where Occupation !hasprefix_cs \'Ab\''; +Customers | where Occupation !hasprefix_cs 'Ab'| order by LastName; +print ''; +print '-- Customers | where Occupation hasprefix_cs \'ab\''; +Customers | where Occupation hasprefix_cs 'ab'| order by LastName; +print ''; +print '-- Customers | where Occupation !hasprefix_cs \'ab\''; +Customers | where Occupation !hasprefix_cs 'ab'| order by LastName; +print ''; +print '-- Customers | where Occupation hassuffix \'Ent\''; +Customers | where Occupation hassuffix 'Ent'| order by LastName; +print ''; +print '-- Customers | where Occupation !hassuffix \'Ent\''; +Customers | where Occupation !hassuffix 'Ent'| order by LastName; +print ''; +print '-- Customers | where Occupation hassuffix \'ent\''; +Customers | where Occupation hassuffix 'ent'| order by LastName; +print ''; +print '-- Customers | where Occupation hassuffix \'ent\''; +Customers | where Occupation hassuffix 'ent'| order by LastName; +print ''; +print '-- Customers |where Education in (\'Bachelors\',\'High School\')'; +Customers |where Education in ('Bachelors','High School')| order by LastName; +print ''; +print '-- Customers | where Education !in (\'Bachelors\',\'High School\')'; +Customers | where Education !in ('Bachelors','High School')| order by LastName; +print ''; +print '-- Customers | where FirstName matches regex \'P.*r\''; +Customers | where FirstName matches regex 'P.*r'| order by LastName; +print ''; +print '-- Customers | where FirstName startswith \'pet\''; +Customers | where FirstName startswith 'pet'| order by LastName; +print ''; +print '-- Customers | where FirstName !startswith \'pet\''; +Customers | where FirstName !startswith 'pet'| order by LastName; +print ''; +print '-- Customers | where FirstName startswith_cs \'pet\''; +Customers | where FirstName startswith_cs 'pet'| order by LastName; +print ''; +print '-- Customers | where FirstName !startswith_cs \'pet\''; +Customers | where FirstName !startswith_cs 'pet'| order by LastName; +print ''; +print '-- Customers | where isempty(LastName)'; +Customers | where isempty(LastName); +print ''; +print '-- Customers | where isnotempty(LastName)'; +Customers | where isnotempty(LastName); +print ''; +print '-- Customers | where isnotnull(FirstName)'; +Customers | where isnotnull(FirstName)| order by LastName; +print ''; +print '-- Customers | where isnull(FirstName)'; +Customers | where isnull(FirstName)| order by LastName; +print ''; +print '-- Customers | project url_decode(\'https%3A%2F%2Fwww.test.com%2Fhello%20word\') | take 1'; +Customers | project url_decode('https%3A%2F%2Fwww.test.com%2Fhello%20word') | take 1; +print ''; +print '-- Customers | project url_encode(\'https://www.test.com/hello word\') | take 1'; +Customers | project url_encode('https://www.test.com/hello word') | take 1; +print ''; +print '-- Customers | project name_abbr = strcat(substring(FirstName,0,3), \' \', substring(LastName,2))'; +Customers | project name_abbr = strcat(substring(FirstName,0,3), ' ', substring(LastName,2))| order by LastName; +print ''; +print '-- Customers | project name = strcat(FirstName, \' \', LastName)'; +Customers | project name = strcat(FirstName, ' ', LastName)| order by LastName; +print ''; +print '-- Customers | project FirstName, strlen(FirstName)'; +Customers | project FirstName, strlen(FirstName)| order by LastName; +print ''; +print '-- Customers | project strrep(FirstName,2,\'_\')'; +Customers | project strrep(FirstName,2,'_')| order by LastName; +print ''; +print '-- Customers | project toupper(FirstName)'; +Customers | project toupper(FirstName)| order by LastName; +print ''; +print '-- Customers | project tolower(FirstName)'; +Customers | project tolower(FirstName)| order by LastName; +print ''; +print '-- support subquery for in orerator (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/in-cs-operator) (subquery need to be wraped with bracket inside bracket); TODO: case-insensitive not supported yet'; +Customers | where Age in ((Customers|project Age|where Age < 30)) | order by LastName; +-- Customer | where LastName in~ ("diaz", "cox") +print ''; +print '-- has_all (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/has-all-operator); TODO: subquery not supported yet'; +Customers | where Occupation has_all ('manual', 'skilled') | order by LastName; +print ''; +print '-- has_any (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/has-anyoperator); TODO: subquery not supported yet'; +Customers|where Occupation has_any ('Skilled','abcd'); +print ''; +print '-- countof (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/countoffunction)'; +Customers | project countof('The cat sat on the mat', 'at') | take 1; +Customers | project countof('The cat sat on the mat', 'at', 'normal') | take 1; +Customers | project countof('The cat sat on the mat', '\\s.he', 'regex') | take 1; +print ''; +print '-- extract ( https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/extractfunction)'; +print extract('(\\b[A-Z]+\\b).+(\\b\\d+)', 0, 'The price of PINEAPPLE ice cream is 20'); +print extract('(\\b[A-Z]+\\b).+(\\b\\d+)', 1, 'The price of PINEAPPLE ice cream is 20'); +print extract('(\\b[A-Z]+\\b).+(\\b\\d+)', 2, 'The price of PINEAPPLE ice cream is 20'); +print extract('(\\b[A-Z]+\\b).+(\\b\\d+)', 3, 'The price of PINEAPPLE ice cream is 20'); +print extract('(\\b[A-Z]+\\b).+(\\b\\d+)', 2, 'The price of PINEAPPLE ice cream is 20', typeof(real)); +print extract("x=([0-9.]+)", 1, "hello x=45.6|wo" , typeof(bool)); +print extract("x=([0-9.]+)", 1, "hello x=45.6|wo" , typeof(date)); +print extract("x=([0-9.]+)", 1, "hello x=45.6|wo" , typeof(guid)); +print extract("x=([0-9.]+)", 1, "hello x=45.6|wo" , typeof(int)); +print extract("x=([0-9.]+)", 1, "hello x=45.6|wo" , typeof(long)); +print extract("x=([0-9.]+)", 1, "hello x=45.6|wo" , typeof(real)); +print extract("x=([0-9.]+)", 1, "hello x=45.6|wo" , typeof(decimal)); +print ''; +print '-- extract_all (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/extractallfunction); TODO: captureGroups not supported yet'; +Customers | project extract_all('(\\w)(\\w+)(\\w)','The price of PINEAPPLE ice cream is 20') | take 1; +print ''; +print '-- extract_json (https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/extractjsonfunction)'; +print extract_json('', ''); -- { serverError BAD_ARGUMENTS } +print extract_json('a', ''); -- { serverError BAD_ARGUMENTS } +print extract_json('$.firstName', ''); +print extract_json('$.phoneNumbers[0].type', ''); +print extractjson('$.firstName', '{"firstName":"John","lastName":"doe","age":26,"address":{"streetAddress":"naist street","city":"Nara","postalCode":"630-0192"},"phoneNumbers":[{"type":"iPhone","number":"0123-4567-8888"},{"type":"home","number":"0123-4567-8910"}]}'); +print extract_json('$.phoneNumbers[0].type', '{"firstName":"John","lastName":"doe","age":26,"address":{"streetAddress":"naist street","city":"Nara","postalCode":"630-0192"},"phoneNumbers":[{"type":"iPhone","number":"0123-4567-8888"},{"type":"home","number":"0123-4567-8910"}]}', typeof(string)); +print extract_json('$.phoneNumbers[0].type', '{"firstName":"John","lastName":"doe","age":26,"address":{"streetAddress":"naist street","city":"Nara","postalCode":"630-0192"},"phoneNumbers":[{"type":"iPhone","number":"0123-4567-8888"},{"type":"home","number":"0123-4567-8910"}]}', typeof(int)); +print extract_json('$.age', '{"firstName":"John","lastName":"doe","age":26,"address":{"streetAddress":"naist street","city":"Nara","postalCode":"630-0192"},"phoneNumbers":[{"type":"iPhone","number":"0123-4567-8888"},{"type":"home","number":"0123-4567-8910"}]}'); +print extract_json('$.age', '{"firstName":"John","lastName":"doe","age":26,"address":{"streetAddress":"naist street","city":"Nara","postalCode":"630-0192"},"phoneNumbers":[{"type":"iPhone","number":"0123-4567-8888"},{"type":"home","number":"0123-4567-8910"}]}', typeof(int)); +print extract_json('$.age', '{"firstName":"John","lastName":"doe","age":26,"address":{"streetAddress":"naist street","city":"Nara","postalCode":"630-0192"},"phoneNumbers":[{"type":"iPhone","number":"0123-4567-8888"},{"type":"home","number":"0123-4567-8910"}]}', typeof(long)); +-- print extract_json('$.age', '{"firstName":"John","lastName":"doe","age":26,"address":{"streetAddress":"naist street","city":"Nara","postalCode":"630-0192"},"phoneNumbers":[{"type":"iPhone","number":"0123-4567-8888"},{"type":"home","number":"0123-4567-8910"}]}', typeof(bool)); -> true +print extract_json('$.age', '{"firstName":"John","lastName":"doe","age":26,"address":{"streetAddress":"naist street","city":"Nara","postalCode":"630-0192"},"phoneNumbers":[{"type":"iPhone","number":"0123-4567-8888"},{"type":"home","number":"0123-4567-8910"}]}', typeof(double)); +print extract_json('$.age', '{"firstName":"John","lastName":"doe","age":26,"address":{"streetAddress":"naist street","city":"Nara","postalCode":"630-0192"},"phoneNumbers":[{"type":"iPhone","number":"0123-4567-8888"},{"type":"home","number":"0123-4567-8910"}]}', typeof(guid)); +-- print extract_json('$.phoneNumbers', '{"firstName":"John","lastName":"doe","age":26,"address":{"streetAddress":"naist street","city":"Nara","postalCode":"630-0192"},"phoneNumbers":[{"type":"iPhone","number":"0123-4567-8888"},{"type":"home","number":"0123-4567-8910"}]}', typeof(dynamic)); we won't be able to handle this particular case for a while, because it should return a dictionary +print ''; +print '-- split (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/splitfunction)'; +Customers | project split('aa_bb', '_') | take 1; +Customers | project split('aaa_bbb_ccc', '_', 1) | take 1; +Customers | project split('', '_') | take 1; +Customers | project split('a__b', '_') | take 1; +Customers | project split('aabbcc', 'bb') | take 1; +Customers | project split('aabbcc', '') | take 1; +Customers | project split('aaa_bbb_ccc', '_', -1) | take 1; +Customers | project split('aaa_bbb_ccc', '_', 10) | take 1; +print ''; +print '-- strcat_delim (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/strcat-delimfunction); TODO: only support string now.'; +Customers | project strcat_delim('-', '1', '2', strcat('A','b')) | take 1; +-- Customers | project strcat_delim('-', '1', '2', 'A' , 1s); +print ''; +print '-- indexof (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/indexoffunction); TODO: length and occurrence not supported yet'; +Customers | project indexof('abcdefg','cde') | take 1; +Customers | project indexof('abcdefg','cde',2) | take 1; +Customers | project indexof('abcdefg','cde',6) | take 1; +print '-- base64_encode_fromguid()'; +-- print base64_encode_fromguid(guid(null)); +print base64_encode_fromguid(guid('ae3133f2-6e22-49ae-b06a-16e6a9b212eb')); +print base64_encode_fromguid(dynamic(null)); -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } +print base64_encode_fromguid("abcd1231"); -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } +print '-- base64_decode_toarray()'; +print base64_decode_toarray(''); +print base64_decode_toarray('S3VzdG8='); +print '-- base64_decode_toguid()'; +print base64_decode_toguid("JpbpECu8dUy7Pv5gbeJXAA=="); +print base64_decode_toguid(base64_encode_fromguid(guid('ae3133f2-6e22-49ae-b06a-16e6a9b212eb'))) == guid('ae3133f2-6e22-49ae-b06a-16e6a9b212eb'); +print '-- base64_encode_tostring'; +print base64_encode_tostring(''); +print base64_encode_tostring('Kusto1'); +print '-- base64_decode_tostring'; +print base64_decode_tostring(''); +print base64_decode_tostring('S3VzdG8x'); +print '-- parse_url()'; +print parse_url('scheme://username:password@host:1234/this/is/a/path?k1=v1&k2=v2#fragment'); +print '-- parse_urlquery()'; +print parse_urlquery('k1=v1&k2=v2&k3=v3'); +print '-- strcmp()'; +print strcmp('ABC','ABC'), strcmp('abc','ABC'), strcmp('ABC','abc'), strcmp('abcde','abc'); +print '-- substring()'; +print substring("ABCD", -2, 2); +print '-- translate()'; +print translate('krasp', 'otsku', 'spark'), translate('abc', '', 'ab'), translate('abc', 'x', 'abc'); +print '-- trim()'; +print trim("--", "--https://www.ibm.com--"); +print trim("[^\w]+", strcat("- ","Te st", "1", "// $")); +print trim("", " asd "); +print trim("a$", "asd"); +print trim("^a", "asd"); +print '-- trim_start()'; +print trim_start("https://", "https://www.ibm.com"); +print trim_start("[^\w]+", strcat("- ","Te st", "1", "// $")); +print trim_start("asd$", "asdw"); +print trim_start("asd$", "asd"); +print trim_start("d$", "asd"); +print '-- trim_end()'; +print trim_end("://www.ibm.com", "https://www.ibm.com"); +print trim_end("[^\w]+", strcat("- ","Te st", "1", "// $")); +print trim_end("^asd", "wasd"); +print trim_end("^asd", "asd"); +print trim_end("^a", "asd"); +print '-- trim, trim_start, trim_end all at once'; +print str = "--https://bing.com--", pattern = '--' | extend start = trim_start(pattern, str), end = trim_end(pattern, str), both = trim(pattern, str); +print '-- replace_regex'; +print replace_regex(strcat('Number is ', '1'), 'is (\d+)', 'was: \1'); +print '-- has_any_index()'; +print has_any_index('this is an example', dynamic(['this', 'example'])), has_any_index("this is an example", dynamic(['not', 'example'])), has_any_index("this is an example", dynamic(['not', 'found'])), has_any_index("this is an example", dynamic([])); +print '-- parse_version()'; +print parse_version(42); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +-- print parse_version(''); -> NULL +print parse_version('1.2.3.40'); +print parse_version('1.2'); +print parse_version(strcat('1.', '2')); +print parse_version('1.2.4.5.6'); +print parse_version('moo'); +print parse_version('moo.boo.foo'); +print parse_version(strcat_delim('.', 'moo', 'boo', 'foo')); +Versions | project parse_version(Version); +print '-- parse_json()'; +print parse_json(dynamic([1, 2, 3])); +print parse_json('{"a":123.5, "b":"{\\"c\\":456}"}'); +print '-- parse_command_line()'; +print parse_command_line(55, 'windows'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +-- print parse_command_line((52 + 3) * 4 % 2, 'windows'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +print parse_command_line('', 'windows'); +print parse_command_line(strrep(' ', 6), 'windows'); +-- print parse_command_line('echo \"hello world!\" print$?', 'windows'); -> ["echo","hello world!","print$?"] +-- print parse_command_line("yolo swag 'asd bcd' \"moo moo \"", 'windows'); -> ["yolo","swag","'asd","bcd'","moo moo "] +-- print parse_command_line(strcat_delim(' ', "yolo", "swag", "\'asd bcd\'", "\"moo moo \""), 'windows'); -> ["yolo","swag","'asd","bcd'","moo moo "] +print '-- reverse()'; +print reverse(123); +print reverse(123.34); +print reverse(''); +print reverse("asd"); +print reverse(dynamic([])); +print reverse(dynamic([1, 2, 3])); +print reverse(dynamic(['Darth', "Vader"])); +print reverse(datetime(2017-10-15 12:00)); +-- print reverse(timespan(3h)); -> 00:00:30 +Customers | where Education contains 'degree' | order by reverse(FirstName); +print '-- parse_csv()'; +print parse_csv(''); +print parse_csv(65); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +print parse_csv('aaa'); +print result=parse_csv('aa,b,cc'); +print result_multi_record=parse_csv('record1,a,b,c\nrecord2,x,y,z'); +-- print result=parse_csv('aa,"b,b,b",cc,"Escaping quotes: ""Title""","line1\nline2"'); -> ["aa","b,b,b","cc","Escaping quotes: \"Title\"","line1\nline2"] +-- print parse_csv(strcat(strcat_delim(',', 'aa', '"b,b,b"', 'cc', '"Escaping quotes: ""Title"""', '"line1\nline2"'), '\r\n', strcat_delim(',', 'asd', 'qcf'))); -> ["aa","b,b,b","cc","Escaping quotes: \"Title\"","line1\nline2"] diff --git a/tests/queries/0_stateless/02366_kql_mvexpand.reference b/tests/queries/0_stateless/02366_kql_mvexpand.reference new file mode 100644 index 00000000000..25be070eb0b --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_mvexpand.reference @@ -0,0 +1,65 @@ +-- mv-expand -- +-- mv_expand_test_table | mv-expand c -- +1 ['Salmon','Steak','Chicken'] 1 [5,6,7,8] +1 ['Salmon','Steak','Chicken'] 2 [5,6,7,8] +1 ['Salmon','Steak','Chicken'] 3 [5,6,7,8] +1 ['Salmon','Steak','Chicken'] 4 [5,6,7,8] +-- mv_expand_test_table | mv-expand c, d -- +1 ['Salmon','Steak','Chicken'] 1 5 +1 ['Salmon','Steak','Chicken'] 2 6 +1 ['Salmon','Steak','Chicken'] 3 7 +1 ['Salmon','Steak','Chicken'] 4 8 +-- mv_expand_test_table | mv-expand b | mv-expand c -- +1 Salmon 1 [5,6,7,8] +1 Salmon 2 [5,6,7,8] +1 Salmon 3 [5,6,7,8] +1 Salmon 4 [5,6,7,8] +1 Steak 1 [5,6,7,8] +1 Steak 2 [5,6,7,8] +1 Steak 3 [5,6,7,8] +1 Steak 4 [5,6,7,8] +1 Chicken 1 [5,6,7,8] +1 Chicken 2 [5,6,7,8] +1 Chicken 3 [5,6,7,8] +1 Chicken 4 [5,6,7,8] +-- mv_expand_test_table | mv-expand with_itemindex=index b, c, d -- +0 1 Salmon 1 5 +1 1 Steak 2 6 +2 1 Chicken 3 7 +3 1 4 8 +-- mv_expand_test_table | mv-expand array_concat(c,d) -- +1 ['Salmon','Steak','Chicken'] [1,2,3,4] [5,6,7,8] 1 +1 ['Salmon','Steak','Chicken'] [1,2,3,4] [5,6,7,8] 2 +1 ['Salmon','Steak','Chicken'] [1,2,3,4] [5,6,7,8] 3 +1 ['Salmon','Steak','Chicken'] [1,2,3,4] [5,6,7,8] 4 +1 ['Salmon','Steak','Chicken'] [1,2,3,4] [5,6,7,8] 5 +1 ['Salmon','Steak','Chicken'] [1,2,3,4] [5,6,7,8] 6 +1 ['Salmon','Steak','Chicken'] [1,2,3,4] [5,6,7,8] 7 +1 ['Salmon','Steak','Chicken'] [1,2,3,4] [5,6,7,8] 8 +-- mv_expand_test_table | mv-expand x = c, y = d -- +1 ['Salmon','Steak','Chicken'] [1,2,3,4] [5,6,7,8] 1 5 +1 ['Salmon','Steak','Chicken'] [1,2,3,4] [5,6,7,8] 2 6 +1 ['Salmon','Steak','Chicken'] [1,2,3,4] [5,6,7,8] 3 7 +1 ['Salmon','Steak','Chicken'] [1,2,3,4] [5,6,7,8] 4 8 +-- mv_expand_test_table | mv-expand xy = array_concat(c, d) -- +1 ['Salmon','Steak','Chicken'] [1,2,3,4] [5,6,7,8] 1 +1 ['Salmon','Steak','Chicken'] [1,2,3,4] [5,6,7,8] 2 +1 ['Salmon','Steak','Chicken'] [1,2,3,4] [5,6,7,8] 3 +1 ['Salmon','Steak','Chicken'] [1,2,3,4] [5,6,7,8] 4 +1 ['Salmon','Steak','Chicken'] [1,2,3,4] [5,6,7,8] 5 +1 ['Salmon','Steak','Chicken'] [1,2,3,4] [5,6,7,8] 6 +1 ['Salmon','Steak','Chicken'] [1,2,3,4] [5,6,7,8] 7 +1 ['Salmon','Steak','Chicken'] [1,2,3,4] [5,6,7,8] 8 +-- mv_expand_test_table | mv-expand xy = array_concat(c, d) limit 2| summarize count() by xy -- +1 1 +2 1 +-- mv_expand_test_table | mv-expand with_itemindex=index c,d to typeof(bool) -- +0 1 ['Salmon','Steak','Chicken'] 1 true +1 1 ['Salmon','Steak','Chicken'] 2 true +2 1 ['Salmon','Steak','Chicken'] 3 true +3 1 ['Salmon','Steak','Chicken'] 4 true +-- mv_expand_test_table | mv-expand c to typeof(bool) -- +1 ['Salmon','Steak','Chicken'] [5,6,7,8] true +1 ['Salmon','Steak','Chicken'] [5,6,7,8] true +1 ['Salmon','Steak','Chicken'] [5,6,7,8] true +1 ['Salmon','Steak','Chicken'] [5,6,7,8] true diff --git a/tests/queries/0_stateless/02366_kql_mvexpand.sql b/tests/queries/0_stateless/02366_kql_mvexpand.sql new file mode 100644 index 00000000000..af336a19638 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_mvexpand.sql @@ -0,0 +1,39 @@ +-- datatable(a: int, b: dynamic, c: dynamic, d: dynamic) [ +-- 1, dynamic(['Salmon', 'Steak', 'Chicken']), dynamic([1, 2, 3, 4]), dynamic([5, 6, 7, 8]) +-- ] + +DROP TABLE IF EXISTS mv_expand_test_table; +CREATE TABLE mv_expand_test_table +( + a UInt8, + b Array(String), + c Array(Int8), + d Array(Int8) +) ENGINE = Memory; +INSERT INTO mv_expand_test_table VALUES (1, ['Salmon', 'Steak','Chicken'],[1,2,3,4],[5,6,7,8]); +set dialect='kusto'; +print '-- mv-expand --'; +print '-- mv_expand_test_table | mv-expand c --'; +mv_expand_test_table | mv-expand c; +print '-- mv_expand_test_table | mv-expand c, d --'; +mv_expand_test_table | mv-expand c, d; +print '-- mv_expand_test_table | mv-expand b | mv-expand c --'; +mv_expand_test_table | mv-expand b | mv-expand c; +print '-- mv_expand_test_table | mv-expand with_itemindex=index b, c, d --'; +mv_expand_test_table | mv-expand with_itemindex=index b, c, d; +print '-- mv_expand_test_table | mv-expand array_concat(c,d) --'; +mv_expand_test_table | mv-expand array_concat(c,d); +print '-- mv_expand_test_table | mv-expand x = c, y = d --'; +mv_expand_test_table | mv-expand x = c, y = d; +print '-- mv_expand_test_table | mv-expand xy = array_concat(c, d) --'; +mv_expand_test_table | mv-expand xy = array_concat(c, d); +print '-- mv_expand_test_table | mv-expand xy = array_concat(c, d) limit 2| summarize count() by xy --'; +mv_expand_test_table | mv-expand xy = array_concat(c, d) limit 2| summarize count() by xy; +print '-- mv_expand_test_table | mv-expand with_itemindex=index c,d to typeof(bool) --'; +mv_expand_test_table | mv-expand with_itemindex=index c,d to typeof(bool); +print '-- mv_expand_test_table | mv-expand c to typeof(bool) --'; +mv_expand_test_table | mv-expand c to typeof(bool); +SET max_query_size = 28; +SET dialect='kusto'; +mv_expand_test_table | mv-expand c, d; -- { serverError SYNTAX_ERROR } +SET max_query_size=262144; diff --git a/tests/queries/0_stateless/02366_kql_native_interval_format.reference b/tests/queries/0_stateless/02366_kql_native_interval_format.reference new file mode 100644 index 00000000000..8a12c6885c4 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_native_interval_format.reference @@ -0,0 +1,23 @@ +numeric +kusto +00:00:00 +00:00:00.0000001 +00:00:00.0010000 +00:00:42 +01:06:00 +2.18:00:00 +5.00:00:00 +7.00:00:00 +14.00:00:00 +('00:01:12','21.00:00:00','00:00:00.0000002') +numeric +99 +100 +1 +42 +66 +66 +5 +1 +2 +(72,3,200) diff --git a/tests/queries/0_stateless/02366_kql_native_interval_format.sql.j2 b/tests/queries/0_stateless/02366_kql_native_interval_format.sql.j2 new file mode 100644 index 00000000000..0731687222d --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_native_interval_format.sql.j2 @@ -0,0 +1,16 @@ +select value from system.settings where name = 'interval_output_format'; + +{% for format in ['kusto', 'numeric'] -%} +select '{{ format }}'; +set interval_output_format = '{{ format }}'; +select toIntervalNanosecond(99); +select toIntervalNanosecond(100); +select toIntervalMillisecond(1); +select toIntervalSecond(42); +select toIntervalMinute(66); +select toIntervalHour(66); +select toIntervalDay(5); +select toIntervalWeek(1); +select toIntervalWeek(2); +select toIntervalSecond(72) + toIntervalWeek(3) + toIntervalNanosecond(200); +{% endfor -%} diff --git a/tests/queries/0_stateless/02366_kql_operator_in_sql.reference b/tests/queries/0_stateless/02366_kql_operator_in_sql.reference new file mode 100644 index 00000000000..4e0987aa5c3 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_operator_in_sql.reference @@ -0,0 +1,60 @@ +-- #1 -- +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Apple Skilled Manual Bachelors 28 +-- #2 -- +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Peter Nara Skilled Manual Graduate Degree 26 +Latoya Shen Professional Graduate Degree 25 +Apple Skilled Manual Bachelors 28 +-- #3 -- +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Latoya Shen Professional Graduate Degree 25 +Apple Skilled Manual Bachelors 28 +-- #4 -- +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Latoya Shen Professional Graduate Degree 25 +Apple Skilled Manual Bachelors 28 +-- #5 -- +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Latoya Shen Professional Graduate Degree 25 +Apple Skilled Manual Bachelors 28 +-- #6 -- +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Latoya Shen Professional Graduate Degree 25 +Apple Skilled Manual Bachelors 28 +-- #7 -- +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Latoya Shen Professional Graduate Degree 25 +Apple Skilled Manual Bachelors 28 +-- #8 -- +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Latoya Shen Professional Graduate Degree 25 +Apple Skilled Manual Bachelors 28 +-- #9 -- +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Peter Nara Skilled Manual Graduate Degree 26 +Latoya Shen Professional Graduate Degree 25 +Apple Skilled Manual Bachelors 28 +-- #10 -- +-- #11 -- +-- #12 -- +-- #13 -- +-- #14 -- +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Latoya Shen Professional Graduate Degree 25 +Apple Skilled Manual Bachelors 28 +-- #15 -- +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Latoya Shen Professional Graduate Degree 25 +Apple Skilled Manual Bachelors 28 diff --git a/tests/queries/0_stateless/02366_kql_operator_in_sql.sql b/tests/queries/0_stateless/02366_kql_operator_in_sql.sql new file mode 100644 index 00000000000..0b02faa0680 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_operator_in_sql.sql @@ -0,0 +1,42 @@ +DROP TABLE IF EXISTS Customers; +CREATE TABLE Customers +( + FirstName Nullable(String), + LastName String, + Occupation String, + Education String, + Age Nullable(UInt8) +) ENGINE = Memory; + +INSERT INTO Customers VALUES ('Theodore','Diaz','Skilled Manual','Bachelors',28),('Stephanie','Cox','Management abcd defg','Bachelors',33),('Peter','Nara','Skilled Manual','Graduate Degree',26),('Latoya','Shen','Professional','Graduate Degree',25),('Apple','','Skilled Manual','Bachelors',28),(NULL,'why','Professional','Partial College',38); +Select '-- #1 --' ; +select * from kql($$Customers | where FirstName !in ('Peter', 'Latoya')$$); +Select '-- #2 --' ; +select * from kql($$Customers | where FirstName !in ("test", "test2")$$); +Select '-- #3 --' ; +select * from kql($$Customers | where FirstName !contains 'Pet'$$); +Select '-- #4 --' ; +select * from kql($$Customers | where FirstName !contains_cs 'Pet'$$); +Select '-- #5 --' ; +select * from kql($$Customers | where FirstName !endswith 'ter'$$); +Select '-- #6 --' ; +select * from kql($$Customers | where FirstName !endswith_cs 'ter'$$); +Select '-- #7 --' ; +select * from kql($$Customers | where FirstName != 'Peter'$$); +Select '-- #8 --' ; +select * from kql($$Customers | where FirstName !has 'Peter'$$); +Select '-- #9 --' ; +select * from kql($$Customers | where FirstName !has_cs 'peter'$$); +Select '-- #10 --' ; +-- select * from kql($$Customers | where FirstName !hasprefix 'Peter'$$); -- will enable when analyzer fixed `and` issue +Select '-- #11 --' ; +--select * from kql($$Customers | where FirstName !hasprefix_cs 'Peter'$$); +Select '-- #12 --' ; +--select * from kql($$Customers | where FirstName !hassuffix 'Peter'$$); +Select '-- #13 --' ; +--select * from kql($$Customers | where FirstName !hassuffix_cs 'Peter'$$); +Select '-- #14 --' ; +select * from kql($$Customers | where FirstName !startswith 'Peter'$$); +Select '-- #15 --' ; +select * from kql($$Customers | where FirstName !startswith_cs 'Peter'$$); +DROP TABLE IF EXISTS Customers; diff --git a/tests/queries/0_stateless/02366_kql_summarize.reference b/tests/queries/0_stateless/02366_kql_summarize.reference new file mode 100644 index 00000000000..aeb42feb6be --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_summarize.reference @@ -0,0 +1,92 @@ +-- test summarize -- +12 25 46 32.416666666666664 389 +Skilled Manual 5 26 36 30.2 151 +Professional 6 25 46 34.166666666666664 205 +Management abcd defg 1 33 33 33 33 +Skilled Manual 0 +Professional 2 +Management abcd defg 0 +Skilled Manual 36 +Professional 38 +Management abcd defg 33 +Skilled Manual 26 +Professional 25 +Management abcd defg 33 +Skilled Manual 30.2 +Professional 29.25 +Management abcd defg 33 +Skilled Manual 151 +Professional 117 +Management abcd defg 33 +4 +2 +40 2 +30 4 +20 6 +Skilled Manual 5 +Professional 6 +Management abcd defg 1 +-- make_list() -- +Skilled Manual ['Bachelors','Graduate Degree','High School','Partial College','Bachelors'] +Professional ['Graduate Degree','Partial College','Partial College','Partial College','Partial College','Partial College'] +Management abcd defg ['Bachelors'] +Skilled Manual ['Bachelors','Graduate Degree'] +Professional ['Graduate Degree','Partial College'] +Management abcd defg ['Bachelors'] +-- make_list_if() -- +Skilled Manual ['Edward','Christine'] +Professional ['Dalton','Angel'] +Management abcd defg ['Stephanie'] +Skilled Manual ['Edward'] +Professional ['Dalton'] +Management abcd defg ['Stephanie'] +-- make_set() -- +Skilled Manual ['Graduate Degree','High School','Partial College','Bachelors'] +Professional ['Graduate Degree','Partial College'] +Management abcd defg ['Bachelors'] +Skilled Manual ['Graduate Degree','Bachelors'] +Professional ['Graduate Degree','Partial College'] +Management abcd defg ['Bachelors'] +-- make_set_if() -- +Skilled Manual ['Partial College','High School'] +Professional ['Partial College'] +Management abcd defg ['Bachelors'] +Skilled Manual ['High School'] +Professional ['Partial College'] +Management abcd defg ['Bachelors'] +-- stdev() -- +6.855102059227432 +-- stdevif() -- +7.557189365836421 +-- binary_all_and -- +42 +-- binary_all_or -- +46 +-- binary_all_xor -- +4 +43.8 +25.55 30.5 43.8 +30.5 +35 +[25,35,45] +-- Summarize following sort -- +Skilled Manual 5 +Professional 6 +Management abcd defg 1 +-- summarize with bin -- +0 1 +245000 2 +0 1 +245 2 +0 1 +245 2 +2015-10-12 00:00:00.000000000 +2016-10-12 00:00:00.000000000 +-- make_list_with_nulls -- +['Theodore','Stephanie','Peter','Latoya','Joshua','Edward','Dalton','Christine','Cameron','Angel','Apple',NULL] +Skilled Manual ['Theodore','Peter','Edward','Christine','Apple'] +Professional ['Latoya','Joshua','Dalton','Cameron','Angel',NULL] +Management abcd defg ['Stephanie'] +Skilled Manual ['Theodore','Peter','Edward','Christine','Apple'] [28,26,36,33,28] +Professional ['Latoya','Joshua','Dalton','Cameron','Angel',NULL] [25,26,42,28,46,38] +Management abcd defg ['Stephanie'] [33] diff --git a/tests/queries/0_stateless/02366_kql_summarize.sql b/tests/queries/0_stateless/02366_kql_summarize.sql new file mode 100644 index 00000000000..21a1b643d98 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_summarize.sql @@ -0,0 +1,102 @@ +-- datatable(FirstName:string, LastName:string, Occupation:string, Education:string, Age:int) [ +-- 'Theodore', 'Diaz', 'Skilled Manual', 'Bachelors', 28, +-- 'Stephanie', 'Cox', 'Management abcd defg', 'Bachelors', 33, +-- 'Peter', 'Nara', 'Skilled Manual', 'Graduate Degree', 26, +-- 'Latoya', 'Shen', 'Professional', 'Graduate Degree', 25, +-- 'Joshua', 'Lee', 'Professional', 'Partial College', 26, +-- 'Edward', 'Hernandez', 'Skilled Manual', 'High School', 36, +-- 'Dalton', 'Wood', 'Professional', 'Partial College', 42, +-- 'Christine', 'Nara', 'Skilled Manual', 'Partial College', 33, +-- 'Cameron', 'Rodriguez', 'Professional', 'Partial College', 28, +-- 'Angel', 'Stewart', 'Professional', 'Partial College', 46, +-- 'Apple', '', 'Skilled Manual', 'Bachelors', 28, +-- dynamic(null), 'why', 'Professional', 'Partial College', 38 +-- ] + +DROP TABLE IF EXISTS Customers; +CREATE TABLE Customers +( + FirstName Nullable(String), + LastName String, + Occupation String, + Education String, + Age Nullable(UInt8) +) ENGINE = Memory; + +INSERT INTO Customers VALUES ('Theodore','Diaz','Skilled Manual','Bachelors',28),('Stephanie','Cox','Management abcd defg','Bachelors',33),('Peter','Nara','Skilled Manual','Graduate Degree',26),('Latoya','Shen','Professional','Graduate Degree',25),('Joshua','Lee','Professional','Partial College',26),('Edward','Hernandez','Skilled Manual','High School',36),('Dalton','Wood','Professional','Partial College',42),('Christine','Nara','Skilled Manual','Partial College',33),('Cameron','Rodriguez','Professional','Partial College',28),('Angel','Stewart','Professional','Partial College',46),('Apple','','Skilled Manual','Bachelors',28),(NULL,'why','Professional','Partial College',38); + +drop table if exists EventLog; +create table EventLog +( + LogEntry String, + Created Int64 +) ENGINE = Memory; + +insert into EventLog values ('Darth Vader has entered the room.', 546), ('Rambo is suspciously looking at Darth Vader.', 245234), ('Darth Sidious electrocutes both using Force Lightning.', 245554); + +drop table if exists Dates; +create table Dates +( + EventTime DateTime, +) ENGINE = Memory; + +Insert into Dates VALUES ('2015-10-12') , ('2016-10-12') +Select '-- test summarize --' ; +set dialect='kusto'; +Customers | summarize count(), min(Age), max(Age), avg(Age), sum(Age); +Customers | summarize count(), min(Age), max(Age), avg(Age), sum(Age) by Occupation | order by Occupation; +Customers | summarize countif(Age>40) by Occupation | order by Occupation; +Customers | summarize MyMax = maxif(Age, Age<40) by Occupation | order by Occupation; +Customers | summarize MyMin = minif(Age, Age<40) by Occupation | order by Occupation; +Customers | summarize MyAvg = avgif(Age, Age<40) by Occupation | order by Occupation; +Customers | summarize MySum = sumif(Age, Age<40) by Occupation | order by Occupation; +Customers | summarize dcount(Education); +Customers | summarize dcountif(Education, Occupation=='Professional'); +Customers | summarize count_ = count() by bin(Age, 10) | order by count_ asc; +Customers | summarize job_count = count() by Occupation | where job_count > 0 | order by Occupation; +Customers | summarize 'Edu Count'=count() by Education | sort by 'Edu Count' desc; -- { clientError 62 } + +print '-- make_list() --'; +Customers | summarize f_list = make_list(Education) by Occupation | sort by Occupation; +Customers | summarize f_list = make_list(Education, 2) by Occupation | sort by Occupation; +print '-- make_list_if() --'; +Customers | summarize f_list = make_list_if(FirstName, Age>30) by Occupation | sort by Occupation; +Customers | summarize f_list = make_list_if(FirstName, Age>30, 1) by Occupation | sort by Occupation; +print '-- make_set() --'; +Customers | summarize f_list = make_set(Education) by Occupation | sort by Occupation; +Customers | summarize f_list = make_set(Education, 2) by Occupation | sort by Occupation; +print '-- make_set_if() --'; +Customers | summarize f_list = make_set_if(Education, Age>30) by Occupation | sort by Occupation; +Customers | summarize f_list = make_set_if(Education, Age>30, 1) by Occupation | sort by Occupation; +print '-- stdev() --'; +Customers | project Age | summarize stdev(Age); +print '-- stdevif() --'; +Customers | project Age | summarize stdevif(Age, Age%2==0); +print '-- binary_all_and --'; +Customers | project Age | where Age > 40 | summarize binary_all_and(Age); +print '-- binary_all_or --'; +Customers | project Age | where Age > 40 | summarize binary_all_or(Age); +print '-- binary_all_xor --'; +Customers | project Age | where Age > 40 | summarize binary_all_xor(Age); + +Customers | project Age | summarize percentile(Age, 95); +Customers | project Age | summarize percentiles(Age, 5, 50, 95)|project round(percentiles_Age[0],2),round(percentiles_Age[1],2),round(percentiles_Age[2],2); +Customers | project Age | summarize percentiles(Age, 5, 50, 95)[1]; +Customers | summarize w=count() by AgeBucket=bin(Age, 5) | summarize percentilew(AgeBucket, w, 75); +Customers | summarize w=count() by AgeBucket=bin(Age, 5) | summarize percentilesw(AgeBucket, w, 50, 75, 99.9); + +print '-- Summarize following sort --'; +Customers | sort by FirstName | summarize count() by Occupation | sort by Occupation; + +print '-- summarize with bin --'; +EventLog | summarize count=count() by bin(Created, 1000); +EventLog | summarize count=count() by bin(unixtime_seconds_todatetime(Created/1000), 1s); +EventLog | summarize count=count() by time_label=bin(Created/1000, 1s); +Dates | project bin(datetime(EventTime), 1m); +print '-- make_list_with_nulls --'; +Customers | summarize t = make_list_with_nulls(FirstName); +Customers | summarize f_list = make_list_with_nulls(FirstName) by Occupation | sort by Occupation; +Customers | summarize f_list = make_list_with_nulls(FirstName), a_list = make_list_with_nulls(Age) by Occupation | sort by Occupation; +-- TODO: +-- arg_max() +-- arg_min() diff --git a/tests/queries/0_stateless/02366_kql_tabular.reference b/tests/queries/0_stateless/02366_kql_tabular.reference new file mode 100644 index 00000000000..e70c02ce34f --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_tabular.reference @@ -0,0 +1,139 @@ +-- test Query only has table name: -- +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management Bachelors 33 +Peter Nara Skilled Manual Graduate Degree 26 +Latoya Shen Professional Graduate Degree 25 +Joshua Lee Professional Partial College 26 +Edward Hernandez Skilled Manual High School 36 +Dalton Wood Professional Partial College 42 +Christine Nara Skilled Manual Partial College 33 +Cameron Rodriguez Professional Partial College 28 +Angel Stewart Professional Partial College 46 +-- Query has Column Selection -- +Theodore Diaz Skilled Manual +Stephanie Cox Management +Peter Nara Skilled Manual +Latoya Shen Professional +Joshua Lee Professional +Edward Hernandez Skilled Manual +Dalton Wood Professional +Christine Nara Skilled Manual +Cameron Rodriguez Professional +Angel Stewart Professional +-- Query has limit -- +Theodore Diaz Skilled Manual +Stephanie Cox Management +Peter Nara Skilled Manual +Latoya Shen Professional +Joshua Lee Professional +Theodore Diaz Skilled Manual +Stephanie Cox Management +Peter Nara Skilled Manual +Latoya Shen Professional +Joshua Lee Professional +-- Query has second limit with bigger value -- +Theodore Diaz Skilled Manual +Stephanie Cox Management +Peter Nara Skilled Manual +Latoya Shen Professional +Joshua Lee Professional +-- Query has second limit with smaller value -- +Theodore Diaz Skilled Manual +Stephanie Cox Management +Peter Nara Skilled Manual +-- Query has second Column selection -- +Theodore Diaz +Stephanie Cox +Peter Nara +-- Query has second Column selection with extra column -- +-- Query with desc sort -- +Theodore +Stephanie +Peter +Latoya +Joshua +Skilled Manual +Skilled Manual +Professional +Professional +Management +-- Query with asc sort -- +Management +Professional +Professional +Skilled Manual +Skilled Manual +-- Query with sort (without keyword asc desc) -- +Theodore +Stephanie +Peter +Latoya +Joshua +Skilled Manual +Skilled Manual +Professional +Professional +Management +-- Query with sort 2 Columns with different direction -- +Stephanie Cox Management +Latoya Shen Professional +Joshua Lee Professional +Peter Nara Skilled Manual +Theodore Diaz Skilled Manual +-- Query with second sort -- +Stephanie Cox Management +Latoya Shen Professional +Joshua Lee Professional +Peter Nara Skilled Manual +Theodore Diaz Skilled Manual +-- Test String Equals (==) -- +Theodore Diaz Skilled Manual +Peter Nara Skilled Manual +Edward Hernandez Skilled Manual +Christine Nara Skilled Manual +-- Test String Not equals (!=) -- +Stephanie Cox Management +Latoya Shen Professional +Joshua Lee Professional +Dalton Wood Professional +Cameron Rodriguez Professional +Angel Stewart Professional +-- Test Filter using a list (in) -- +Theodore Diaz Skilled Manual Bachelors +Stephanie Cox Management Bachelors +Edward Hernandez Skilled Manual High School +-- Test Filter using a list (!in) -- +Peter Nara Skilled Manual Graduate Degree +Latoya Shen Professional Graduate Degree +Joshua Lee Professional Partial College +Dalton Wood Professional Partial College +Christine Nara Skilled Manual Partial College +Cameron Rodriguez Professional Partial College +Angel Stewart Professional Partial College +-- Test Filter using common string operations (contains_cs) -- +Joshua Lee Professional Partial College +Dalton Wood Professional Partial College +Christine Nara Skilled Manual Partial College +Cameron Rodriguez Professional Partial College +Angel Stewart Professional Partial College +-- Test Filter using common string operations (startswith_cs) -- +Latoya Shen Professional Graduate Degree +Joshua Lee Professional Partial College +Dalton Wood Professional Partial College +Cameron Rodriguez Professional Partial College +Angel Stewart Professional Partial College +-- Test Filter using common string operations (endswith_cs) -- +Latoya Shen Professional Graduate Degree +Joshua Lee Professional Partial College +-- Test Filter using numerical equal (==) -- +Peter Nara Skilled Manual Graduate Degree 26 +Joshua Lee Professional Partial College 26 +-- Test Filter using numerical great and less (> , <) -- +Stephanie Cox Management Bachelors 33 +Edward Hernandez Skilled Manual High School 36 +Christine Nara Skilled Manual Partial College 33 +-- Test Filter using multi where -- +Dalton Wood Professional Partial College 42 +Angel Stewart Professional Partial College 46 +-- Complex query with unknown function -- +-- Missing column in front of startsWith -- diff --git a/tests/queries/0_stateless/02366_kql_tabular.sql b/tests/queries/0_stateless/02366_kql_tabular.sql new file mode 100644 index 00000000000..1a3d1ed92eb --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_tabular.sql @@ -0,0 +1,93 @@ +DROP TABLE IF EXISTS Customers; +CREATE TABLE Customers +( + FirstName Nullable(String), + LastName String, + Occupation String, + Education String, + Age Nullable(UInt8) +) ENGINE = Memory; + +INSERT INTO Customers VALUES ('Theodore','Diaz','Skilled Manual','Bachelors',28), ('Stephanie','Cox','Management','Bachelors',33), ('Peter','Nara','Skilled Manual','Graduate Degree',26), ('Latoya','Shen','Professional','Graduate Degree',25), ('Joshua','Lee','Professional','Partial College',26), ('Edward','Hernandez','Skilled Manual','High School',36), ('Dalton','Wood','Professional','Partial College',42), ('Christine','Nara','Skilled Manual','Partial College',33), ('Cameron','Rodriguez','Professional','Partial College',28), ('Angel','Stewart','Professional','Partial College',46); + +set dialect='kusto'; +print '-- test Query only has table name: --'; +Customers; + +print '-- Query has Column Selection --'; +Customers | project FirstName,LastName,Occupation; + +print '-- Query has limit --'; +Customers | project FirstName,LastName,Occupation | take 5; +Customers | project FirstName,LastName,Occupation | limit 5; + +print '-- Query has second limit with bigger value --'; +Customers | project FirstName,LastName,Occupation | take 5 | take 7; + +print '-- Query has second limit with smaller value --'; +Customers | project FirstName,LastName,Occupation | take 5 | take 3; + +print '-- Query has second Column selection --'; +Customers | project FirstName,LastName,Occupation | take 3 | project FirstName,LastName; + +print '-- Query has second Column selection with extra column --'; +Customers| project FirstName,LastName,Occupation | take 3 | project FirstName,LastName,Education;-- { serverError 47 } + +print '-- Query with desc sort --'; +Customers | project FirstName | take 5 | sort by FirstName desc; +Customers | project Occupation | take 5 | order by Occupation desc; + +print '-- Query with asc sort --'; +Customers | project Occupation | take 5 | sort by Occupation asc; + +print '-- Query with sort (without keyword asc desc) --'; +Customers | project FirstName | take 5 | sort by FirstName; +Customers | project Occupation | take 5 | order by Occupation; + +print '-- Query with sort 2 Columns with different direction --'; +Customers | project FirstName,LastName,Occupation | take 5 | sort by Occupation asc, LastName desc; + +print '-- Query with second sort --'; +Customers | project FirstName,LastName,Occupation | take 5 | sort by Occupation desc |sort by Occupation asc, LastName desc; + +print '-- Test String Equals (==) --'; +Customers | project FirstName,LastName,Occupation | where Occupation == 'Skilled Manual'; + +print '-- Test String Not equals (!=) --'; +Customers | project FirstName,LastName,Occupation | where Occupation != 'Skilled Manual'; + +print '-- Test Filter using a list (in) --'; +Customers | project FirstName,LastName,Occupation,Education | where Education in ('Bachelors','High School'); + +print '-- Test Filter using a list (!in) --'; +set dialect='kusto'; +Customers | project FirstName,LastName,Occupation,Education | where Education !in ('Bachelors','High School'); + +print '-- Test Filter using common string operations (contains_cs) --'; +Customers | project FirstName,LastName,Occupation,Education | where Education contains_cs 'Coll'; + +print '-- Test Filter using common string operations (startswith_cs) --'; +Customers | project FirstName,LastName,Occupation,Education | where Occupation startswith_cs 'Prof'; + +print '-- Test Filter using common string operations (endswith_cs) --'; +Customers | project FirstName,LastName,Occupation,Education | where FirstName endswith_cs 'a'; + +print '-- Test Filter using numerical equal (==) --'; +Customers | project FirstName,LastName,Occupation,Education,Age | where Age == 26; + +print '-- Test Filter using numerical great and less (> , <) --'; +Customers | project FirstName,LastName,Occupation,Education,Age | where Age > 30 and Age < 40; + +print '-- Test Filter using multi where --'; +Customers | project FirstName,LastName,Occupation,Education,Age | where Age > 30 | where Occupation == 'Professional'; + +print '-- Complex query with unknown function --'; +hits | where CounterID == 62 and EventDate >= '2013-07-14' and EventDate <= '2013-07-15' and IsRefresh == 0 and DontCountHits == 0 | summarize count() by d=bin(poopoo(EventTime), 1m) | order by d | limit 10; -- { clientError UNKNOWN_FUNCTION } + +print '-- Missing column in front of startsWith --'; +StormEvents | where startswith "W" | summarize Count=count() by State; -- { clientError SYNTAX_ERROR } + +SET max_query_size = 55; +SET dialect='kusto'; +Customers | where Education contains 'degree' | order by LastName; -- { serverError 62 } +SET max_query_size=262144; diff --git a/tests/queries/0_stateless/02402_merge_engine_with_view.sql b/tests/queries/0_stateless/02402_merge_engine_with_view.sql index ae9de1426e7..81c2d67d05b 100644 --- a/tests/queries/0_stateless/02402_merge_engine_with_view.sql +++ b/tests/queries/0_stateless/02402_merge_engine_with_view.sql @@ -11,4 +11,4 @@ SELECT * FROM m2 WHERE id > 1 AND id < 5 ORDER BY id SETTINGS force_primary_key= -- #40706 CREATE VIEW v AS SELECT 1; -SELECT 1 FROM merge(currentDatabase(), '^v$'); \ No newline at end of file +SELECT 1 FROM merge(currentDatabase(), '^v$'); diff --git a/tests/queries/0_stateless/02406_minmax_behaviour.reference b/tests/queries/0_stateless/02406_minmax_behaviour.reference index d52ba640a0e..5a715e1c7f7 100644 --- a/tests/queries/0_stateless/02406_minmax_behaviour.reference +++ b/tests/queries/0_stateless/02406_minmax_behaviour.reference @@ -56,6 +56,10 @@ SELECT min(n::Nullable(String)) from (Select if(number < 15 and number % 2 == 1, 22 SELECT max(n::Nullable(String)) from (Select if(number < 15 and number % 2 == 1, number * 2, NULL) as n from numbers(10, 20)); 26 +SELECT max(number) from (Select if(number % 2 == 1, NULL, -number::Int8) as number FROM numbers(128)); +0 +SELECT min(number) from (Select if(number % 2 == 1, NULL, -number::Int8) as number FROM numbers(128)); +-126 SELECT argMax(number, now()) FROM (Select number as number from numbers(10, 10000)) settings max_threads=1, max_block_size=100; 10 SELECT argMax(number, now()) FROM (Select number as number from numbers(10, 10000)) settings max_threads=1, max_block_size=20000; @@ -190,3 +194,7 @@ SELECT min(n::Nullable(String)) from (Select if(number < 15 and number % 2 == 1, 22 SELECT max(n::Nullable(String)) from (Select if(number < 15 and number % 2 == 1, number * 2, NULL) as n from numbers(10, 20)); 26 +SELECT max(number::Nullable(Decimal64(3))) from numbers(11) settings max_block_size=10; +10 +SELECT min(-number::Nullable(Decimal64(3))) from numbers(11) settings max_block_size=10; +-10 diff --git a/tests/queries/0_stateless/02406_minmax_behaviour.sql b/tests/queries/0_stateless/02406_minmax_behaviour.sql index a3afe7d40b0..314374a260d 100644 --- a/tests/queries/0_stateless/02406_minmax_behaviour.sql +++ b/tests/queries/0_stateless/02406_minmax_behaviour.sql @@ -48,6 +48,9 @@ SELECT maxIf(number::Nullable(String), number < 10) as number from numbers(10, 1 SELECT min(n::Nullable(String)) from (Select if(number < 15 and number % 2 == 1, number * 2, NULL) as n from numbers(10, 20)); SELECT max(n::Nullable(String)) from (Select if(number < 15 and number % 2 == 1, number * 2, NULL) as n from numbers(10, 20)); +SELECT max(number) from (Select if(number % 2 == 1, NULL, -number::Int8) as number FROM numbers(128)); +SELECT min(number) from (Select if(number % 2 == 1, NULL, -number::Int8) as number FROM numbers(128)); + SELECT argMax(number, now()) FROM (Select number as number from numbers(10, 10000)) settings max_threads=1, max_block_size=100; SELECT argMax(number, now()) FROM (Select number as number from numbers(10, 10000)) settings max_threads=1, max_block_size=20000; SELECT argMax(number, 1) FROM (Select number as number from numbers(10, 10000)) settings max_threads=1, max_block_size=100; @@ -138,3 +141,6 @@ SELECT maxIf(number::Nullable(String), number < 10) as number from numbers(10, 1 SELECT min(n::Nullable(String)) from (Select if(number < 15 and number % 2 == 1, number * 2, NULL) as n from numbers(10, 20)); SELECT max(n::Nullable(String)) from (Select if(number < 15 and number % 2 == 1, number * 2, NULL) as n from numbers(10, 20)); + +SELECT max(number::Nullable(Decimal64(3))) from numbers(11) settings max_block_size=10; +SELECT min(-number::Nullable(Decimal64(3))) from numbers(11) settings max_block_size=10; diff --git a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference index 379eea4dbbb..e15002da69c 100644 --- a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference +++ b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference @@ -62,6 +62,7 @@ __bitBoolMaskOr __bitSwapLastTwo __bitWrapperFunc __getScalar +__scalarSubqueryResult abs accurateCast accurateCastOrDefault @@ -643,14 +644,6 @@ shardNum showCertificate sigmoid sign -simpleJSONExtractBool -simpleJSONExtractFloat -simpleJSONExtractInt -simpleJSONExtractRaw -simpleJSONExtractString -simpleJSONExtractUInt -simpleJSONHas -sin sinh sipHash128 sipHash128Keyed diff --git a/tests/queries/0_stateless/02416_json_object_inference.sql b/tests/queries/0_stateless/02416_json_object_inference.sql index 91137c0243c..3022ee026d0 100644 --- a/tests/queries/0_stateless/02416_json_object_inference.sql +++ b/tests/queries/0_stateless/02416_json_object_inference.sql @@ -2,5 +2,5 @@ set allow_experimental_object_type=1; desc format(JSONEachRow, '{"a" : {"b" : {"c" : 1, "d" : "str"}}}'); set allow_experimental_object_type=0, input_format_json_read_objects_as_strings=0, input_format_json_try_infer_named_tuples_from_objects=0, input_format_json_read_numbers_as_strings=0; -desc format(JSONEachRow, '{"a" : {"b" : {"c" : 1, "d" : "str"}}}'); -- {serverError 652} +desc format(JSONEachRow, '{"a" : {"b" : {"c" : 1, "d" : "str"}}}'); -- {serverError CANNOT_EXTRACT_TABLE_STRUCTURE} diff --git a/tests/queries/0_stateless/02424_pod_array_overflow.sql b/tests/queries/0_stateless/02424_pod_array_overflow.sql index 4b85d5be029..50c46cf19f1 100644 --- a/tests/queries/0_stateless/02424_pod_array_overflow.sql +++ b/tests/queries/0_stateless/02424_pod_array_overflow.sql @@ -1 +1 @@ -SELECT * FROM format(Native, '\x02\x02\x02\x6b\x30\x1a\x4d\x61\x70\x28\x46\x69\x78\x65\x64\x53\x74\x72\x69\x6e\x67\x28\x31\x29\x2c\x20\x49\x6e\x74\x36\x34\x29\x01\x00\x00\x00\x00\x00\x00\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\x7f\x00\x7f\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x64\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xcf\x31\x3f\x56\x69\x11\x89\x25'); -- { serverError 128 } +SELECT * FROM format(Native, '\x02\x02\x02\x6b\x30\x1a\x4d\x61\x70\x28\x46\x69\x78\x65\x64\x53\x74\x72\x69\x6e\x67\x28\x31\x29\x2c\x20\x49\x6e\x74\x36\x34\x29\x01\x00\x00\x00\x00\x00\x00\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\x7f\x00\x7f\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x64\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xcf\x31\x3f\x56\x69\x11\x89\x25'); -- { serverError CANNOT_EXTRACT_TABLE_STRUCTURE } diff --git a/tests/queries/0_stateless/02426_pod_array_overflow_2.sql b/tests/queries/0_stateless/02426_pod_array_overflow_2.sql index 52a00730227..6a0d97acee3 100644 --- a/tests/queries/0_stateless/02426_pod_array_overflow_2.sql +++ b/tests/queries/0_stateless/02426_pod_array_overflow_2.sql @@ -1 +1 @@ -SELECT * FROM format(Native, 'k0\x23Array(Tuple(FixedString(1), Int64))\0\0\0\0\0\0\0�����\0����������������\0�\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0d\0\0\0\0\0\0\0\0\0\0\0\0\0�1?Vi�%'); -- { serverError 128 } +SELECT * FROM format(Native, 'k0\x23Array(Tuple(FixedString(1), Int64))\0\0\0\0\0\0\0�����\0����������������\0�\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0d\0\0\0\0\0\0\0\0\0\0\0\0\0�1?Vi�%'); -- { serverError CANNOT_EXTRACT_TABLE_STRUCTURE } diff --git a/tests/queries/0_stateless/02426_pod_array_overflow_3.sql b/tests/queries/0_stateless/02426_pod_array_overflow_3.sql index 857ba2ca28e..caabf7d1679 100644 --- a/tests/queries/0_stateless/02426_pod_array_overflow_3.sql +++ b/tests/queries/0_stateless/02426_pod_array_overflow_3.sql @@ -1 +1 @@ -SELECT * FROM format(Native, '\x01\x01\x01x\x0CArray(UInt8)\x01\x00\xBD\xEF\xBF\xBD\xEF\xBF\xBD\xEF'); -- { serverError 128 } +SELECT * FROM format(Native, '\x01\x01\x01x\x0CArray(UInt8)\x01\x00\xBD\xEF\xBF\xBD\xEF\xBF\xBD\xEF'); -- { serverError CANNOT_EXTRACT_TABLE_STRUCTURE } diff --git a/tests/queries/0_stateless/02428_combinators_with_over_statement.sql b/tests/queries/0_stateless/02428_combinators_with_over_statement.sql index 7946b997b00..2b82839d6eb 100644 --- a/tests/queries/0_stateless/02428_combinators_with_over_statement.sql +++ b/tests/queries/0_stateless/02428_combinators_with_over_statement.sql @@ -1,5 +1,6 @@ drop table if exists test; create table test (x AggregateFunction(uniq, UInt64), y Int64) engine=Memory; +set max_insert_threads = 1; insert into test select uniqState(number) as x, number as y from numbers(10) group by number order by x, y; select uniqStateMap(map(1, x)) OVER (PARTITION BY y) from test; select uniqStateForEach([x]) OVER (PARTITION BY y) from test; diff --git a/tests/queries/0_stateless/02428_parameterized_view.reference b/tests/queries/0_stateless/02428_parameterized_view.reference index 422fdaa4983..fd77e6ed8df 100644 --- a/tests/queries/0_stateless/02428_parameterized_view.reference +++ b/tests/queries/0_stateless/02428_parameterized_view.reference @@ -23,6 +23,7 @@ ERROR 20 20 ERROR +20 30 20 30 diff --git a/tests/queries/0_stateless/02428_parameterized_view.sh b/tests/queries/0_stateless/02428_parameterized_view.sh index ad9c672f4c5..c6f0927db36 100755 --- a/tests/queries/0_stateless/02428_parameterized_view.sh +++ b/tests/queries/0_stateless/02428_parameterized_view.sh @@ -37,7 +37,7 @@ $CLICKHOUSE_CLIENT -q "CREATE VIEW test_02428_pv1 AS SELECT * FROM test_02428_Ca $CLICKHOUSE_CLIENT -q "SELECT Price FROM test_02428_pv1(price=20)" $CLICKHOUSE_CLIENT -q "SELECT Price FROM \`test_02428_pv1\`(price=20)" -$CLICKHOUSE_CLIENT -q "SELECT Price FROM test_02428_pv1" 2>&1 | grep -Fq "UNKNOWN_QUERY_PARAMETER" && echo 'ERROR' || echo 'OK' +$CLICKHOUSE_CLIENT -q "SELECT Price FROM test_02428_pv1" 2>&1 | grep -q "UNKNOWN_QUERY_PARAMETER\|UNKNOWN_IDENTIFIER" && echo 'ERROR' || echo 'OK' $CLICKHOUSE_CLIENT --param_p 10 -q "SELECT Price FROM test_02428_pv1(price={p:UInt64})" $CLICKHOUSE_CLIENT --param_l 1 -q "SELECT Price FROM test_02428_pv1(price=50) LIMIT ({l:UInt64})" @@ -72,7 +72,8 @@ $CLICKHOUSE_CLIENT -q "INSERT INTO ${CLICKHOUSE_TEST_UNIQUE_NAME}.Catalog VALUES $CLICKHOUSE_CLIENT -q "INSERT INTO ${CLICKHOUSE_TEST_UNIQUE_NAME}.Catalog VALUES ('Paper', 20, 1)" $CLICKHOUSE_CLIENT -q "CREATE VIEW ${CLICKHOUSE_TEST_UNIQUE_NAME}.pv1 AS SELECT * FROM ${CLICKHOUSE_TEST_UNIQUE_NAME}.Catalog WHERE Price={price:UInt64}" $CLICKHOUSE_CLIENT -q "SELECT Price FROM ${CLICKHOUSE_TEST_UNIQUE_NAME}.pv1(price=20)" -$CLICKHOUSE_CLIENT -q "SELECT Price FROM \`${CLICKHOUSE_TEST_UNIQUE_NAME}.pv1\`(price=20)" 2>&1 | grep -Fq "UNKNOWN_FUNCTION" && echo 'ERROR' || echo 'OK' +$CLICKHOUSE_CLIENT -q "SELECT Price FROM \`${CLICKHOUSE_TEST_UNIQUE_NAME}.pv1\`(price=20) SETTINGS allow_experimental_analyzer = 0" 2>&1 | grep -Fq "UNKNOWN_FUNCTION" && echo 'ERROR' || echo 'OK' +$CLICKHOUSE_CLIENT -q "SELECT Price FROM \`${CLICKHOUSE_TEST_UNIQUE_NAME}.pv1\`(price=20) SETTINGS allow_experimental_analyzer = 1" $CLICKHOUSE_CLIENT -q "INSERT INTO test_02428_Catalog VALUES ('Book2', 30, 8)" diff --git a/tests/queries/0_stateless/02447_drop_database_replica.reference b/tests/queries/0_stateless/02447_drop_database_replica.reference index 1af3ee244f1..bd3b689ca3c 100644 --- a/tests/queries/0_stateless/02447_drop_database_replica.reference +++ b/tests/queries/0_stateless/02447_drop_database_replica.reference @@ -13,6 +13,7 @@ t rdb_default 1 1 s1 r1 1 2 2 +2 s1 r1 OK 2 0 s1 r2 QUEUED 2 0 s2 r1 QUEUED 2 0 @@ -24,4 +25,5 @@ rdb_default 1 2 s1 r2 0 t t2 t3 +t4 rdb_default_4 1 1 s1 r1 1 diff --git a/tests/queries/0_stateless/02447_drop_database_replica.sh b/tests/queries/0_stateless/02447_drop_database_replica.sh index fb89db5045b..5c4604bc8cd 100755 --- a/tests/queries/0_stateless/02447_drop_database_replica.sh +++ b/tests/queries/0_stateless/02447_drop_database_replica.sh @@ -33,8 +33,9 @@ $CLICKHOUSE_CLIENT -q "select cluster, shard_num, replica_num, database_shard_na $CLICKHOUSE_CLIENT -q "system drop database replica 's1|r1' from database $db2" 2>&1| grep -Fac "is active, cannot drop it" # Also check that it doesn't exceed distributed_ddl_task_timeout waiting for inactive replicas -timeout 60s $CLICKHOUSE_CLIENT --distributed_ddl_task_timeout=1000 --distributed_ddl_output_mode=throw_only_active -q "create table $db.t2 (n int) engine=Log" 2>&1| grep -Fac "TIMEOUT_EXCEEDED" -timeout 60s $CLICKHOUSE_CLIENT --distributed_ddl_task_timeout=1000 --distributed_ddl_output_mode=null_status_on_timeout_only_active -q "create table $db.t3 (n int) engine=Log" | sort +timeout 60s $CLICKHOUSE_CLIENT --distributed_ddl_task_timeout=1000 --distributed_ddl_output_mode=none_only_active -q "create table $db.t2 (n int) engine=Log" 2>&1| grep -Fac "TIMEOUT_EXCEEDED" +timeout 60s $CLICKHOUSE_CLIENT --distributed_ddl_task_timeout=1000 --distributed_ddl_output_mode=throw_only_active -q "create table $db.t3 (n int) engine=Log" 2>&1| grep -Fac "TIMEOUT_EXCEEDED" +timeout 60s $CLICKHOUSE_CLIENT --distributed_ddl_task_timeout=1000 --distributed_ddl_output_mode=null_status_on_timeout_only_active -q "create table $db.t4 (n int) engine=Log" | sort $CLICKHOUSE_CLIENT -q "detach database $db3" $CLICKHOUSE_CLIENT -q "system drop database replica 'r1' from shard 's2' from database $db" diff --git a/tests/queries/0_stateless/02449_check_dependencies_and_table_shutdown.reference b/tests/queries/0_stateless/02449_check_dependencies_and_table_shutdown.reference index 37cd3d93e39..317dbe1c61c 100644 --- a/tests/queries/0_stateless/02449_check_dependencies_and_table_shutdown.reference +++ b/tests/queries/0_stateless/02449_check_dependencies_and_table_shutdown.reference @@ -1,5 +1,5 @@ CREATE DICTIONARY default.dict\n(\n `id` UInt32,\n `value` String\n)\nPRIMARY KEY id\nSOURCE(CLICKHOUSE(HOST \'localhost\' PORT 9000 USER \'default\' DB \'default\' TABLE \'view\'))\nLIFETIME(MIN 600 MAX 600)\nLAYOUT(HASHED()) -CREATE TABLE default.table\n(\n `col` String MATERIALIZED dictGet(\'default.dict\', \'value\', toUInt32(1))\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 +CREATE TABLE default.`table`\n(\n `col` String MATERIALIZED dictGet(\'default.dict\', \'value\', toUInt32(1))\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 1 v 1 v 1 v diff --git a/tests/queries/0_stateless/02454_set_parameters_formatting.reference b/tests/queries/0_stateless/02454_set_parameters_formatting.reference index 1098a8159f3..c438c068635 100644 --- a/tests/queries/0_stateless/02454_set_parameters_formatting.reference +++ b/tests/queries/0_stateless/02454_set_parameters_formatting.reference @@ -1,3 +1,3 @@ -SET param_a = 1 -SET max_threads = 1, param_a = 1 -SET max_threads = 1, param_a = 1 +SET param_a = '1' +SET max_threads = 1, param_a = '1' +SET max_threads = 1, param_a = '1' diff --git a/tests/queries/0_stateless/02455_duplicate_column_names_in_schema_inference.sql b/tests/queries/0_stateless/02455_duplicate_column_names_in_schema_inference.sql index 626a4d7034e..f67e5496a98 100644 --- a/tests/queries/0_stateless/02455_duplicate_column_names_in_schema_inference.sql +++ b/tests/queries/0_stateless/02455_duplicate_column_names_in_schema_inference.sql @@ -1,7 +1,7 @@ -- Tags: no-fasttest -desc format(JSONEachRow, '{"x" : 1, "x" : 2}'); -- {serverError INCORRECT_DATA} -desc format(JSONEachRow, '{"x" : 1, "y" : 2}\n{"x" : 2, "x" : 3}'); -- {serverError INCORRECT_DATA} -desc format(CSVWithNames, 'a,b,a\n1,2,3'); -- {serverError INCORRECT_DATA} -desc format(CSV, '1,2,3') settings column_names_for_schema_inference='a, b, a'; -- {serverError INCORRECT_DATA} +desc format(JSONEachRow, '{"x" : 1, "x" : 2}'); -- {serverError CANNOT_EXTRACT_TABLE_STRUCTURE} +desc format(JSONEachRow, '{"x" : 1, "y" : 2}\n{"x" : 2, "x" : 3}'); -- {serverError CANNOT_EXTRACT_TABLE_STRUCTURE} +desc format(CSVWithNames, 'a,b,a\n1,2,3'); -- {serverError CANNOT_EXTRACT_TABLE_STRUCTURE} +desc format(CSV, '1,2,3') settings column_names_for_schema_inference='a, b, a'; -- {serverError CANNOT_EXTRACT_TABLE_STRUCTURE} diff --git a/tests/queries/0_stateless/02457_insert_select_progress_http.sh b/tests/queries/0_stateless/02457_insert_select_progress_http.sh index ae62ee4b77e..ccf37dfb327 100755 --- a/tests/queries/0_stateless/02457_insert_select_progress_http.sh +++ b/tests/queries/0_stateless/02457_insert_select_progress_http.sh @@ -4,6 +4,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&send_progress_in_http_headers=1&http_headers_progress_interval_ms=0" -d @- <<< "insert into function null('_ Int') select * from numbers(5) settings max_block_size=1" -v |& { +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&send_progress_in_http_headers=1&http_headers_progress_interval_ms=0" -d @- <<< "insert into function null('_ Int') select * from numbers(5) settings max_block_size=1, max_insert_threads=1" -v |& { grep -F -e X-ClickHouse-Progress: -e X-ClickHouse-Summary: | sed 's/,\"elapsed_ns[^}]*//' } diff --git a/tests/queries/0_stateless/02458_use_structure_from_insertion_table.sql b/tests/queries/0_stateless/02458_use_structure_from_insertion_table.sql index ac549a7faf1..71a2381d7b6 100644 --- a/tests/queries/0_stateless/02458_use_structure_from_insertion_table.sql +++ b/tests/queries/0_stateless/02458_use_structure_from_insertion_table.sql @@ -10,14 +10,14 @@ set input_format_json_infer_incomplete_types_as_strings=0; insert into test select * from file(02458_data.jsonl); insert into test select x, 1 from file(02458_data.jsonl); insert into test select x, y from file(02458_data.jsonl); -insert into test select x + 1, y from file(02458_data.jsonl); -- {serverError ONLY_NULLS_WHILE_READING_SCHEMA} +insert into test select x + 1, y from file(02458_data.jsonl); -- {serverError CANNOT_EXTRACT_TABLE_STRUCTURE} insert into test select x, z from file(02458_data.jsonl); insert into test select * from file(02458_data.jsoncompacteachrow); -insert into test select x, 1 from file(02458_data.jsoncompacteachrow); -- {serverError ONLY_NULLS_WHILE_READING_SCHEMA} -insert into test select x, y from file(02458_data.jsoncompacteachrow); -- {serverError ONLY_NULLS_WHILE_READING_SCHEMA} -insert into test select x + 1, y from file(02458_data.jsoncompacteachrow); -- {serverError ONLY_NULLS_WHILE_READING_SCHEMA} -insert into test select x, z from file(02458_data.jsoncompacteachrow); -- {serverError ONLY_NULLS_WHILE_READING_SCHEMA} +insert into test select x, 1 from file(02458_data.jsoncompacteachrow); -- {serverError CANNOT_EXTRACT_TABLE_STRUCTURE} +insert into test select x, y from file(02458_data.jsoncompacteachrow); -- {serverError CANNOT_EXTRACT_TABLE_STRUCTURE} +insert into test select x + 1, y from file(02458_data.jsoncompacteachrow); -- {serverError CANNOT_EXTRACT_TABLE_STRUCTURE} +insert into test select x, z from file(02458_data.jsoncompacteachrow); -- {serverError CANNOT_EXTRACT_TABLE_STRUCTURE} insert into test select * from input() format CSV 1,2 insert into test select x, y from input() format CSV 1,2 -- {serverError CANNOT_EXTRACT_TABLE_STRUCTURE} diff --git a/tests/queries/0_stateless/02476_fix_lambda_parsing.reference b/tests/queries/0_stateless/02476_fix_lambda_parsing.reference index de508c7a0d3..04e64792b23 100644 --- a/tests/queries/0_stateless/02476_fix_lambda_parsing.reference +++ b/tests/queries/0_stateless/02476_fix_lambda_parsing.reference @@ -1,4 +1,4 @@ -SELECT f(x, y -> z) +SELECT f(x, (y -> z)) SELECT f((x, y) -> z) SELECT f((x, y) -> z) -SELECT f(x, (x, y) -> z) +SELECT f(x, ((x, y) -> z)) diff --git a/tests/queries/0_stateless/02481_aggregation_in_order_plan.reference b/tests/queries/0_stateless/02481_aggregation_in_order_plan.reference index 969ec320790..ec3d1c15690 100644 --- a/tests/queries/0_stateless/02481_aggregation_in_order_plan.reference +++ b/tests/queries/0_stateless/02481_aggregation_in_order_plan.reference @@ -4,7 +4,7 @@ 0 1 2 200 Aggregating Order: a ASC, c ASC - ReadFromMergeTree (default.tab) + ReadFromMergeTree (default.tab) Aggregating Order: __table1.a ASC, __table1.c ASC ReadFromMergeTree (default.tab) diff --git a/tests/queries/0_stateless/02486_truncate_and_unexpected_parts.sql b/tests/queries/0_stateless/02486_truncate_and_unexpected_parts.sql index 5c90313b6b8..507ac7289f5 100644 --- a/tests/queries/0_stateless/02486_truncate_and_unexpected_parts.sql +++ b/tests/queries/0_stateless/02486_truncate_and_unexpected_parts.sql @@ -5,7 +5,7 @@ create table rmt1 (n int) engine=ReplicatedMergeTree('/test/02468/{database}', ' system stop cleanup rmt; system stop merges rmt1; -insert into rmt select * from numbers(10) settings max_block_size=1; +insert into rmt select * from numbers(10) settings max_block_size=1, max_insert_threads=1; alter table rmt drop partition id '0'; truncate table rmt1; @@ -31,7 +31,7 @@ create table rmt2 (n int) engine=ReplicatedMergeTree('/test/02468/{database}2', system stop cleanup rmt; system stop merges rmt1; -insert into rmt select * from numbers(10) settings max_block_size=1; +insert into rmt select * from numbers(10) settings max_block_size=1, max_insert_threads=1; system sync replica rmt1 lightweight; alter table rmt replace partition id '0' from rmt2; diff --git a/tests/queries/0_stateless/02493_inconsistent_hex_and_binary_number.expect b/tests/queries/0_stateless/02493_inconsistent_hex_and_binary_number.expect index 2d595b0f492..1cc11f9bf9f 100755 --- a/tests/queries/0_stateless/02493_inconsistent_hex_and_binary_number.expect +++ b/tests/queries/0_stateless/02493_inconsistent_hex_and_binary_number.expect @@ -18,23 +18,23 @@ spawn bash send "source $basedir/../shell_config.sh\r" send "\$CLICKHOUSE_CLIENT --query 'select 0b'\r" -expect "DB::Exception: Missing columns: '0b' while processing query: 'SELECT `0b`', required columns: '0b'. (UNKNOWN_IDENTIFIER)" +expect "(UNKNOWN_IDENTIFIER)" send "\$CLICKHOUSE_CLIENT --query 'select 0b;'\r" -expect "DB::Exception: Missing columns: '0b' while processing query: 'SELECT `0b`', required columns: '0b'. (UNKNOWN_IDENTIFIER)" +expect "(UNKNOWN_IDENTIFIER)" send "\$CLICKHOUSE_CLIENT --query 'select 0b ;'\r" -expect "DB::Exception: Missing columns: '0b' while processing query: 'SELECT `0b`', required columns: '0b'. (UNKNOWN_IDENTIFIER)" +expect "(UNKNOWN_IDENTIFIER)" send "\$CLICKHOUSE_CLIENT --query 'select 0x'\r" -expect "DB::Exception: Missing columns: '0x' while processing query: 'SELECT `0x`', required columns: '0x'. (UNKNOWN_IDENTIFIER)" +expect "(UNKNOWN_IDENTIFIER)" send "\$CLICKHOUSE_CLIENT --query 'select 0x;'\r" -expect "DB::Exception: Missing columns: '0x' while processing query: 'SELECT `0x`', required columns: '0x'. (UNKNOWN_IDENTIFIER)" +expect "(UNKNOWN_IDENTIFIER)" send "\$CLICKHOUSE_CLIENT --query 'select 0x ;'\r" -expect "DB::Exception: Missing columns: '0x' while processing query: 'SELECT `0x`', required columns: '0x'. (UNKNOWN_IDENTIFIER)" +expect "(UNKNOWN_IDENTIFIER)" send "exit\r" expect eof diff --git a/tests/queries/0_stateless/02496_remove_redundant_sorting.reference b/tests/queries/0_stateless/02496_remove_redundant_sorting.reference index e7c169cf45e..bc22ae23ee1 100644 --- a/tests/queries/0_stateless/02496_remove_redundant_sorting.reference +++ b/tests/queries/0_stateless/02496_remove_redundant_sorting.reference @@ -220,13 +220,16 @@ FROM ) GROUP BY number ORDER BY number +SETTINGS optimize_aggregators_of_group_by_keys=0 -- avoid removing any() as it depends on order and we need it for the test -- explain Expression (Projection) Sorting (Sorting for ORDER BY) Expression (Before ORDER BY) Aggregating - Expression ((Before GROUP BY + (Projection + (Before ORDER BY + (Projection + Before ORDER BY))))) - ReadFromSystemNumbers + Expression ((Before GROUP BY + Projection)) + Sorting (Sorting for ORDER BY) + Expression ((Before ORDER BY + (Projection + Before ORDER BY))) + ReadFromSystemNumbers -- execute 0 1 @@ -289,13 +292,16 @@ FROM GROUP BY number ) ORDER BY a ASC +SETTINGS optimize_aggregators_of_group_by_keys=0 -- avoid removing any() as it depends on order and we need it for the test -- explain Expression (Projection) Sorting (Sorting for ORDER BY) Expression ((Before ORDER BY + (Projection + Before ORDER BY))) Aggregating - Expression ((Before GROUP BY + (Projection + Before ORDER BY))) - ReadFromSystemNumbers + Expression ((Before GROUP BY + Projection)) + Sorting (Sorting for ORDER BY) + Expression (Before ORDER BY) + ReadFromSystemNumbers -- execute 0 1 @@ -321,14 +327,18 @@ FROM ) WHERE a > 0 ORDER BY a +SETTINGS optimize_aggregators_of_group_by_keys=0 -- avoid removing any() as it depends on order and we need it for the test -- explain Expression (Projection) Sorting (Sorting for ORDER BY) - Expression ((Before ORDER BY + )) - Aggregating - Filter - Filter (( + (Before GROUP BY + (Projection + (Before ORDER BY + (Projection + Before ORDER BY)))))) - ReadFromSystemNumbers + Expression (Before ORDER BY) + Filter ((WHERE + (Projection + Before ORDER BY))) + Filter (HAVING) + Aggregating + Expression ((Before GROUP BY + Projection)) + Sorting (Sorting for ORDER BY) + Expression ((Before ORDER BY + (Projection + Before ORDER BY))) + ReadFromSystemNumbers -- execute 1 2 diff --git a/tests/queries/0_stateless/02496_remove_redundant_sorting.sh b/tests/queries/0_stateless/02496_remove_redundant_sorting.sh index c676e0340b1..23eccb4e782 100755 --- a/tests/queries/0_stateless/02496_remove_redundant_sorting.sh +++ b/tests/queries/0_stateless/02496_remove_redundant_sorting.sh @@ -157,7 +157,8 @@ FROM ORDER BY number DESC ) GROUP BY number -ORDER BY number" +ORDER BY number +SETTINGS optimize_aggregators_of_group_by_keys=0 -- avoid removing any() as it depends on order and we need it for the test" run_query "$query" echo "-- query with aggregation function but w/o GROUP BY -> remove sorting" @@ -200,7 +201,8 @@ FROM ) GROUP BY number ) -ORDER BY a ASC" +ORDER BY a ASC +SETTINGS optimize_aggregators_of_group_by_keys=0 -- avoid removing any() as it depends on order and we need it for the test" run_query "$query" echo "-- Check that optimization works for subqueries as well, - main query have neither ORDER BY nor GROUP BY" @@ -222,7 +224,8 @@ FROM GROUP BY number ) WHERE a > 0 -ORDER BY a" +ORDER BY a +SETTINGS optimize_aggregators_of_group_by_keys=0 -- avoid removing any() as it depends on order and we need it for the test" run_query "$query" echo "-- GROUP BY in most inner query makes execution parallelized, and removing inner sorting steps will keep it that way. But need to correctly update data streams sorting properties after removing sorting steps" diff --git a/tests/queries/0_stateless/02496_remove_redundant_sorting_analyzer.reference b/tests/queries/0_stateless/02496_remove_redundant_sorting_analyzer.reference index 16d3327b9c2..47348651025 100644 --- a/tests/queries/0_stateless/02496_remove_redundant_sorting_analyzer.reference +++ b/tests/queries/0_stateless/02496_remove_redundant_sorting_analyzer.reference @@ -220,6 +220,7 @@ FROM ) GROUP BY number ORDER BY number +SETTINGS optimize_aggregators_of_group_by_keys=0 -- avoid removing any() as it depends on order and we need it for the test -- explain Expression (Project names) Sorting (Sorting for ORDER BY) @@ -291,6 +292,7 @@ FROM GROUP BY number ) ORDER BY a ASC +SETTINGS optimize_aggregators_of_group_by_keys=0 -- avoid removing any() as it depends on order and we need it for the test -- explain Expression (Project names) Sorting (Sorting for ORDER BY) @@ -325,6 +327,7 @@ FROM ) WHERE a > 0 ORDER BY a +SETTINGS optimize_aggregators_of_group_by_keys=0 -- avoid removing any() as it depends on order and we need it for the test -- explain Expression (Project names) Sorting (Sorting for ORDER BY) diff --git a/tests/queries/0_stateless/02497_schema_inference_nulls.sql b/tests/queries/0_stateless/02497_schema_inference_nulls.sql index a25060e8182..b78b5709dbb 100644 --- a/tests/queries/0_stateless/02497_schema_inference_nulls.sql +++ b/tests/queries/0_stateless/02497_schema_inference_nulls.sql @@ -4,7 +4,7 @@ set input_format_json_try_infer_named_tuples_from_objects=0; set input_format_json_read_objects_as_strings=0; set input_format_json_infer_incomplete_types_as_strings=0; set input_format_json_read_numbers_as_strings=0; -desc format(JSONEachRow, '{"x" : 1234}, {"x" : "String"}') settings input_format_json_try_infer_numbers_from_strings=1; -- { serverError TYPE_MISMATCH } +desc format(JSONEachRow, '{"x" : 1234}, {"x" : "String"}') settings input_format_json_try_infer_numbers_from_strings=1; -- { serverError CANNOT_EXTRACT_TABLE_STRUCTURE } desc format(JSONEachRow, '{"x" : [null, 1]}'); desc format(JSONEachRow, '{"x" : [null, 1]}, {"x" : []}'); desc format(JSONEachRow, '{"x" : [null, 1]}, {"x" : [null]}'); @@ -26,7 +26,7 @@ desc format(JSONEachRow, '{"x" : [1, 2]}, {"x" : [null]}'); select 'JSONCompactEachRow'; set schema_inference_make_columns_nullable=1; -desc format(JSONCompactEachRow, '[1234], ["String"]') settings input_format_json_try_infer_numbers_from_strings=1; -- { serverError TYPE_MISMATCH } +desc format(JSONCompactEachRow, '[1234], ["String"]') settings input_format_json_try_infer_numbers_from_strings=1; -- { serverError CANNOT_EXTRACT_TABLE_STRUCTURE } desc format(JSONCompactEachRow, '[[null, 1]]'); desc format(JSONCompactEachRow, '[[null, 1]], [[]]'); desc format(JSONCompactEachRow, '[[null, 1]], [[null]]'); diff --git a/tests/queries/0_stateless/02500_numbers_inference.sh b/tests/queries/0_stateless/02500_numbers_inference.sh index ce9cd5bdc9f..5d863bd616f 100755 --- a/tests/queries/0_stateless/02500_numbers_inference.sh +++ b/tests/queries/0_stateless/02500_numbers_inference.sh @@ -8,10 +8,10 @@ $CLICKHOUSE_LOCAL -q "desc format(JSONEachRow, '{\"x\" : 1.2}')"; echo '{"x" : 1.2}' | $CLICKHOUSE_LOCAL --input-format='JSONEachRow' --table='test' -q "desc test"; $CLICKHOUSE_LOCAL -q "desc format(JSONEachRow, '{\"x\" : 1}')"; echo '{"x" : 1}' | $CLICKHOUSE_LOCAL --input-format='JSONEachRow' --table='test' -q "desc test"; -$CLICKHOUSE_LOCAL -q "desc format(JSONEachRow, '{\"x\" : 1e10}')"; -echo '{"x" : 1e10}' | $CLICKHOUSE_LOCAL --input-format='JSONEachRow' --table='test' -q "desc test"; -$CLICKHOUSE_LOCAL -q "desc format(JSONEachRow, '{\"x\" : [1, 42.42, 1, 1e10]}')"; -echo '{"x" : [1, 42.42, 1, 1e10]}' | $CLICKHOUSE_LOCAL --input-format='JSONEachRow' --table='test' -q "desc test"; +$CLICKHOUSE_LOCAL -q "desc format(JSONEachRow, '{\"x\" : 1e10}')" --input_format_try_infer_exponent_floats=1; +echo '{"x" : 1e10}' | $CLICKHOUSE_LOCAL --input-format='JSONEachRow' --table='test' -q "desc test" --input_format_try_infer_exponent_floats=1; +$CLICKHOUSE_LOCAL -q "desc format(JSONEachRow, '{\"x\" : [1, 42.42, 1, 1e10]}')" --input_format_try_infer_exponent_floats=1; +echo '{"x" : [1, 42.42, 1, 1e10]}' | $CLICKHOUSE_LOCAL --input-format='JSONEachRow' --table='test' -q "desc test" --input_format_try_infer_exponent_floats=1; $CLICKHOUSE_LOCAL -q "desc format(JSONEachRow, '{\"x\" : [1, 42.42, false]}')"; echo '{"x" : [1, 42.42, false]}' | $CLICKHOUSE_LOCAL --input-format='JSONEachRow' --table='test' -q "desc test"; @@ -19,10 +19,10 @@ $CLICKHOUSE_LOCAL -q "desc format(TSV, '1.2')"; echo '1.2' | $CLICKHOUSE_LOCAL --input-format='TSV' --table='test' -q "desc test"; $CLICKHOUSE_LOCAL -q "desc format(TSV, '1')"; echo '1' | $CLICKHOUSE_LOCAL --input-format='TSV' --table='test' -q "desc test"; -$CLICKHOUSE_LOCAL -q "desc format(TSV, '1e10')"; -echo '1e10' | $CLICKHOUSE_LOCAL --input-format='TSV' --table='test' -q "desc test"; -$CLICKHOUSE_LOCAL -q "desc format(TSV, '[1, 42.42, 1, 1e10]')"; -echo '[1, 42.42, 1, 1e10]' | $CLICKHOUSE_LOCAL --input-format='TSV' --table='test' -q "desc test"; +$CLICKHOUSE_LOCAL -q "desc format(TSV, '1e10')" --input_format_try_infer_exponent_floats=1; +echo '1e10' | $CLICKHOUSE_LOCAL --input-format='TSV' --table='test' -q "desc test" --input_format_try_infer_exponent_floats=1; +$CLICKHOUSE_LOCAL -q "desc format(TSV, '[1, 42.42, 1, 1e10]')" --input_format_try_infer_exponent_floats=1; +echo '[1, 42.42, 1, 1e10]' | $CLICKHOUSE_LOCAL --input-format='TSV' --table='test' -q "desc test" --input_format_try_infer_exponent_floats=1; $CLICKHOUSE_LOCAL -q "desc format(TSV, '[1, 42.42, false]')"; echo '[1, 42.42, false]' | $CLICKHOUSE_LOCAL --input-format='TSV' --table='test' -q "desc test"; diff --git a/tests/queries/0_stateless/02502_bad_values_schema_inference.sql b/tests/queries/0_stateless/02502_bad_values_schema_inference.sql index 4c796842c0d..67ac09832de 100644 --- a/tests/queries/0_stateless/02502_bad_values_schema_inference.sql +++ b/tests/queries/0_stateless/02502_bad_values_schema_inference.sql @@ -1,2 +1,2 @@ -desc format(Values, '(\'abc)'); -- { serverError CANNOT_PARSE_INPUT_ASSERTION_FAILED } +desc format(Values, '(\'abc)'); -- { serverError CANNOT_EXTRACT_TABLE_STRUCTURE } diff --git a/tests/queries/0_stateless/02514_analyzer_drop_join_on.reference b/tests/queries/0_stateless/02514_analyzer_drop_join_on.reference index a5a71560d00..2c62e278050 100644 --- a/tests/queries/0_stateless/02514_analyzer_drop_join_on.reference +++ b/tests/queries/0_stateless/02514_analyzer_drop_join_on.reference @@ -55,33 +55,33 @@ Header: a2 String Header: __table1.a2 String __table1.k UInt64 __table4.d2 String - Expression ((Actions for left table alias column keys + DROP unused columns after JOIN)) + Expression (DROP unused columns after JOIN) Header: __table1.a2 String __table1.k UInt64 Join (JOIN FillRightFirst) Header: __table1.a2 String __table1.k UInt64 - Expression ((Actions for left table alias column keys + DROP unused columns after JOIN)) + Expression (DROP unused columns after JOIN) Header: __table1.a2 String __table1.k UInt64 Join (JOIN FillRightFirst) Header: __table1.a2 String __table1.k UInt64 - Expression ((Actions for left table alias column keys + Change column names to column identifiers)) + Expression (Change column names to column identifiers) Header: __table1.a2 String __table1.k UInt64 ReadFromMemoryStorage Header: a2 String k UInt64 - Expression ((Actions for right table alias column keys + Change column names to column identifiers)) + Expression (Change column names to column identifiers) Header: __table2.k UInt64 ReadFromMemoryStorage Header: k UInt64 - Expression ((Actions for right table alias column keys + Change column names to column identifiers)) + Expression (Change column names to column identifiers) Header: __table3.k UInt64 ReadFromMemoryStorage Header: k UInt64 - Expression ((Actions for right table alias column keys + Change column names to column identifiers)) + Expression (Change column names to column identifiers) Header: __table4.d2 String __table4.k UInt64 ReadFromMemoryStorage diff --git a/tests/queries/0_stateless/02521_incorrect_dealy_for_insert_bug_44902.sh b/tests/queries/0_stateless/02521_incorrect_dealy_for_insert_bug_44902.sh index 0ae44ec0c01..b58db7c87ad 100755 --- a/tests/queries/0_stateless/02521_incorrect_dealy_for_insert_bug_44902.sh +++ b/tests/queries/0_stateless/02521_incorrect_dealy_for_insert_bug_44902.sh @@ -14,7 +14,7 @@ $CLICKHOUSE_CLIENT -q "SYSTEM STOP MERGES test_02521_insert_delay" for i in {0..4} do query_id="${CLICKHOUSE_DATABASE}_02521_${i}_$RANDOM$RANDOM" - $CLICKHOUSE_CLIENT --query_id="$query_id" -q "INSERT INTO test_02521_insert_delay SELECT number, toString(number) FROM numbers(${i}, 1)" + $CLICKHOUSE_CLIENT --query_id="$query_id" --max_insert_threads 1 -q "INSERT INTO test_02521_insert_delay SELECT number, toString(number) FROM numbers(${i}, 1)" $CLICKHOUSE_CLIENT -q "SYSTEM FLUSH LOGS" $CLICKHOUSE_CLIENT --param_query_id="$query_id" -q "select ProfileEvents['DelayedInsertsMilliseconds'] as delay from system.query_log where event_date >= yesterday() and current_database = '$CLICKHOUSE_DATABASE' and query_id = {query_id:String} order by delay desc limit 1" done diff --git a/tests/queries/0_stateless/02534_join_prewhere_bug_44062.reference b/tests/queries/0_stateless/02534_join_prewhere_bug.reference similarity index 88% rename from tests/queries/0_stateless/02534_join_prewhere_bug_44062.reference rename to tests/queries/0_stateless/02534_join_prewhere_bug.reference index aaef17371d8..115ea994de1 100644 --- a/tests/queries/0_stateless/02534_join_prewhere_bug_44062.reference +++ b/tests/queries/0_stateless/02534_join_prewhere_bug.reference @@ -34,5 +34,12 @@ ORDER BY test2.col1 ; 5600 123 123 5601 321 -32 +SELECT col2, col2 + 1 FROM test1 +FULL OUTER JOIN test2 USING (col1) +PREWHERE (col2 * 2) :: UInt8 +; +123 124 +-32 -31 +-30 -29 DROP TABLE IF EXISTS test1; DROP TABLE IF EXISTS test2; diff --git a/tests/queries/0_stateless/02534_join_prewhere_bug_44062.sql b/tests/queries/0_stateless/02534_join_prewhere_bug.sql similarity index 92% rename from tests/queries/0_stateless/02534_join_prewhere_bug_44062.sql rename to tests/queries/0_stateless/02534_join_prewhere_bug.sql index 073f81e4ff3..016c92597ec 100644 --- a/tests/queries/0_stateless/02534_join_prewhere_bug_44062.sql +++ b/tests/queries/0_stateless/02534_join_prewhere_bug.sql @@ -42,5 +42,10 @@ WHERE test2.col1 IS NOT NULL ORDER BY test2.col1 ; +SELECT col2, col2 + 1 FROM test1 +FULL OUTER JOIN test2 USING (col1) +PREWHERE (col2 * 2) :: UInt8 +; + DROP TABLE IF EXISTS test1; DROP TABLE IF EXISTS test2; diff --git a/tests/queries/0_stateless/02539_settings_alias.reference b/tests/queries/0_stateless/02539_settings_alias.reference index db17cf631de..a4b3d996674 100644 --- a/tests/queries/0_stateless/02539_settings_alias.reference +++ b/tests/queries/0_stateless/02539_settings_alias.reference @@ -18,7 +18,7 @@ Using HTTP with query params Using client options 0 2 -CREATE VIEW default.`02539_settings_alias_view`\n(\n `1` UInt8\n) AS\nSELECT 1\nSETTINGS replication_alter_partitions_sync = 2 +CREATE VIEW default.`02539_settings_alias_view`\n(\n `1` UInt8\n)\nAS SELECT 1\nSETTINGS replication_alter_partitions_sync = 2 replication_alter_partitions_sync 0 1 alter_sync replication_alter_partitions_sync 2 1 alter_sync alter_sync 0 1 diff --git a/tests/queries/0_stateless/02554_fix_grouping_sets_predicate_push_down.reference b/tests/queries/0_stateless/02554_fix_grouping_sets_predicate_push_down.reference index 7e43f249a74..9bb0c022752 100644 --- a/tests/queries/0_stateless/02554_fix_grouping_sets_predicate_push_down.reference +++ b/tests/queries/0_stateless/02554_fix_grouping_sets_predicate_push_down.reference @@ -13,7 +13,7 @@ FROM day_, type_1 FROM test_grouping_sets_predicate - PREWHERE day_ = \'2023-01-05\' + WHERE day_ = \'2023-01-05\' GROUP BY GROUPING SETS ( (day_, type_1), @@ -39,8 +39,10 @@ ExpressionTransform × 2 Copy 1 → 2 (Expression) ExpressionTransform - (ReadFromMergeTree) - MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) 0 → 1 + (Expression) + ExpressionTransform + (ReadFromMergeTree) + MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) 0 → 1 (Expression) ExpressionTransform × 2 (Filter) @@ -70,8 +72,8 @@ ExpressionTransform × 2 FilterTransform (Filter) FilterTransform - (Filter) - FilterTransform + (Expression) + ExpressionTransform (ReadFromMergeTree) MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) 0 → 1 (Expression) diff --git a/tests/queries/0_stateless/02559_multiple_read_steps_in_prewhere.sql b/tests/queries/0_stateless/02559_multiple_read_steps_in_prewhere.sql index 1e969afac33..805186edcbd 100644 --- a/tests/queries/0_stateless/02559_multiple_read_steps_in_prewhere.sql +++ b/tests/queries/0_stateless/02559_multiple_read_steps_in_prewhere.sql @@ -1,6 +1,6 @@ DROP TABLE IF EXISTS test_02559; -CREATE TABLE test_02559 (id1 UInt64, id2 UInt64) ENGINE=MergeTree ORDER BY id1; +CREATE TABLE test_02559 (id1 UInt64, id2 UInt64) ENGINE=MergeTree ORDER BY id1 SETTINGS min_bytes_for_wide_part = 0; INSERT INTO test_02559 SELECT number, number FROM numbers(10); diff --git a/tests/queries/0_stateless/02572_system_logs_materialized_views_ignore_errors.reference b/tests/queries/0_stateless/02572_system_logs_materialized_views_ignore_errors.reference index 029f80b46b0..596a047c104 100644 --- a/tests/queries/0_stateless/02572_system_logs_materialized_views_ignore_errors.reference +++ b/tests/queries/0_stateless/02572_system_logs_materialized_views_ignore_errors.reference @@ -1,2 +1,24 @@ -11 queryfinish OK -11 querystart OK +"-- Attach MV to system.query_log and check that writing query_log will not fail\n\nset log_queries=1;","querystart","OK" +"-- Attach MV to system.query_log and check that writing query_log will not fail\n\nset log_queries=1;","queryfinish","OK" +"drop table if exists log_proxy_02572;","querystart","OK" +"drop table if exists log_proxy_02572;","queryfinish","OK" +"drop table if exists push_to_logs_proxy_mv_02572;","querystart","OK" +"drop table if exists push_to_logs_proxy_mv_02572;","queryfinish","OK" +"-- create log tables\nsystem flush logs;","querystart","OK" +"-- create log tables\nsystem flush logs;","queryfinish","OK" +"create table log_proxy_02572 as system.query_log engine=Distributed('test_shard_localhost', currentDatabase(), 'receiver_02572');","querystart","OK" +"create table log_proxy_02572 as system.query_log engine=Distributed('test_shard_localhost', currentDatabase(), 'receiver_02572');","queryfinish","OK" +"create materialized view push_to_logs_proxy_mv_02572 to log_proxy_02572 as select * from system.query_log;","querystart","OK" +"create materialized view push_to_logs_proxy_mv_02572 to log_proxy_02572 as select * from system.query_log;","queryfinish","OK" +"select 1 format Null;","querystart","OK" +"select 1 format Null;","queryfinish","OK" +"system flush logs;","querystart","OK" +"system flush logs;","queryfinish","OK" +"system flush logs;","querystart","OK" +"system flush logs;","queryfinish","OK" +"drop table log_proxy_02572;","querystart","OK" +"drop table log_proxy_02572;","queryfinish","OK" +"drop table push_to_logs_proxy_mv_02572;","querystart","OK" +"drop table push_to_logs_proxy_mv_02572;","queryfinish","OK" +"set log_queries=0;","querystart","OK" +"set log_queries=0;","queryfinish","OK" diff --git a/tests/queries/0_stateless/02572_system_logs_materialized_views_ignore_errors.sql b/tests/queries/0_stateless/02572_system_logs_materialized_views_ignore_errors.sql index a7a74190821..2381639fba0 100644 --- a/tests/queries/0_stateless/02572_system_logs_materialized_views_ignore_errors.sql +++ b/tests/queries/0_stateless/02572_system_logs_materialized_views_ignore_errors.sql @@ -21,10 +21,12 @@ system flush logs; drop table log_proxy_02572; drop table push_to_logs_proxy_mv_02572; +set log_queries=0; + system flush logs; -- lower() to pass through clickhouse-test "exception" check -select count(), lower(type::String), errorCodeToName(exception_code) +select replaceAll(query, '\n', '\\n'), lower(type::String), errorCodeToName(exception_code) from system.query_log where current_database = currentDatabase() - group by 2, 3 - order by 2; + order by event_time_microseconds + format CSV; diff --git a/tests/queries/0_stateless/02675_profile_events_from_query_log_and_client.sh b/tests/queries/0_stateless/02675_profile_events_from_query_log_and_client.sh index 288f1129b53..e346d9893a7 100755 --- a/tests/queries/0_stateless/02675_profile_events_from_query_log_and_client.sh +++ b/tests/queries/0_stateless/02675_profile_events_from_query_log_and_client.sh @@ -9,7 +9,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) echo "INSERT TO S3" $CLICKHOUSE_CLIENT --print-profile-events --profile-events-delay-ms=-1 -nq " INSERT INTO TABLE FUNCTION s3('http://localhost:11111/test/profile_events.csv', 'test', 'testtest', 'CSV', 'number UInt64') SELECT number FROM numbers(1000000) SETTINGS s3_max_single_part_upload_size = 10, s3_truncate_on_insert = 1; -" 2>&1 | grep -o -e '\ \[\ .*\ \]\ S3.*:\ .*\ ' | grep -v 'Microseconds' | sort +" 2>&1 | grep -o -e '\ \[\ .*\ \]\ S3.*:\ .*\ ' | grep -v 'Microseconds' | grep -v 'S3DiskConnections' | grep -v 'S3DiskAddresses' | sort echo "CHECK WITH query_log" $CLICKHOUSE_CLIENT -nq " diff --git a/tests/queries/0_stateless/02703_row_policy_for_database.reference b/tests/queries/0_stateless/02703_row_policy_for_database.reference index ec03e538d95..b67ea69ae72 100644 --- a/tests/queries/0_stateless/02703_row_policy_for_database.reference +++ b/tests/queries/0_stateless/02703_row_policy_for_database.reference @@ -3,7 +3,7 @@ CREATE ROW POLICY db1_02703 ON db1_02703.* FOR SELECT USING 1 TO ALL -- SHOW CREATE POLICY ON db1_02703.* CREATE ROW POLICY db1_02703 ON db1_02703.* FOR SELECT USING 1 TO ALL -CREATE ROW POLICY tbl1_02703 ON db1_02703.table FOR SELECT USING 1 TO ALL +CREATE ROW POLICY tbl1_02703 ON db1_02703.`table` FOR SELECT USING 1 TO ALL -- SHOW CREATE POLICY ON db1_02703.`*` R1, R2: (x == 1) OR (x == 2) 1 diff --git a/tests/queries/0_stateless/02708_dotProduct.reference b/tests/queries/0_stateless/02708_dotProduct.reference new file mode 100644 index 00000000000..5cc9a9f0502 --- /dev/null +++ b/tests/queries/0_stateless/02708_dotProduct.reference @@ -0,0 +1,34 @@ +-- Negative tests +-- Tests + -- Array +[1,2,3] [4,5,6] 32 UInt16 +[1,2,3] [4,5,6] 32 UInt32 +[1,2,3] [4,5,6] 32 UInt64 +[1,2,3] [4,5,6] 32 UInt64 +[-1,-2,-3] [4,5,6] -32 Int16 +[-1,-2,-3] [4,5,6] -32 Int32 +[-1,-2,-3] [4,5,6] -32 Int64 +[-1,-2,-3] [4,5,6] -32 Int64 +[1,2,3] [4,5,6] 32 Float32 +[1,2,3] [4,5,6] 32 Float64 + -- Tuple +(1,2,3) (4,5,6) 32 UInt64 +(1,2,3) (4,5,6) 32 UInt64 +(1,2,3) (4,5,6) 32 UInt64 +(1,2,3) (4,5,6) 32 UInt64 +(-1,-2,-3) (4,5,6) -32 Int64 +(-1,-2,-3) (4,5,6) -32 Int64 +(-1,-2,-3) (4,5,6) -32 Int64 +(-1,-2,-3) (4,5,6) -32 Int64 +(1,2,3) (4,5,6) 32 Float64 +(1,2,3) (4,5,6) 32 Float64 +-- Non-const argument +[1,2,3] [4,5,6] 32 UInt16 + -- Array with mixed element arguments types (result type is the supertype) +[1,2,3] [4,5,6] 32 Float32 + -- Tuple with mixed element arguments types +(1,2,3) (4,5,6) 32 Float64 +-- Aliases +32 +32 +32 diff --git a/tests/queries/0_stateless/02708_dotProduct.sql b/tests/queries/0_stateless/02708_dotProduct.sql new file mode 100644 index 00000000000..6ad615664e8 --- /dev/null +++ b/tests/queries/0_stateless/02708_dotProduct.sql @@ -0,0 +1,47 @@ +SELECT '-- Negative tests'; + +SELECT arrayDotProduct([1, 2]); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT arrayDotProduct([1, 2], 'abc'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT arrayDotProduct('abc', [1, 2]); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT arrayDotProduct([1, 2], ['abc', 'def']); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT arrayDotProduct([1, 2], [3, 4, 5]); -- { serverError BAD_ARGUMENTS } +SELECT dotProduct([1, 2], (3, 4, 5)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } + +SELECT '-- Tests'; +SELECT ' -- Array'; +SELECT [1, 2, 3]::Array(UInt8) AS x, [4, 5, 6]::Array(UInt8) AS y, dotProduct(x, y) AS res, toTypeName(res); +SELECT [1, 2, 3]::Array(UInt16) AS x, [4, 5, 6]::Array(UInt16) AS y, dotProduct(x, y) AS res, toTypeName(res); +SELECT [1, 2, 3]::Array(UInt32) AS x, [4, 5, 6]::Array(UInt32) AS y, dotProduct(x, y) AS res, toTypeName(res); +SELECT [1, 2, 3]::Array(UInt64) AS x, [4, 5, 6]::Array(UInt64) AS y, dotProduct(x, y) AS res, toTypeName(res); +SELECT [-1, -2, -3]::Array(Int8) AS x, [4, 5, 6]::Array(Int8) AS y, dotProduct(x, y) AS res, toTypeName(res); +SELECT [-1, -2, -3]::Array(Int16) AS x, [4, 5, 6]::Array(Int16) AS y, dotProduct(x, y) AS res, toTypeName(res); +SELECT [-1, -2, -3]::Array(Int32) AS x, [4, 5, 6]::Array(Int32) AS y, dotProduct(x, y) AS res, toTypeName(res); +SELECT [-1, -2, -3]::Array(Int64) AS x, [4, 5, 6]::Array(Int64) AS y, dotProduct(x, y) AS res, toTypeName(res); +SELECT [1, 2, 3]::Array(Float32) AS x, [4, 5, 6]::Array(Float32) AS y, dotProduct(x, y) AS res, toTypeName(res); +SELECT [1, 2, 3]::Array(Float64) AS x, [4, 5, 6]::Array(Float64) AS y, dotProduct(x, y) AS res, toTypeName(res); + +SELECT ' -- Tuple'; +SELECT (1::UInt8, 2::UInt8, 3::UInt8) AS x, (4::UInt8, 5::UInt8, 6::UInt8) AS y, dotProduct(x, y) AS res, toTypeName(res); +SELECT (1::UInt16, 2::UInt16, 3::UInt16) AS x, (4::UInt16, 5::UInt16, 6::UInt16) AS y, dotProduct(x, y) AS res, toTypeName(res); +SELECT (1::UInt32, 2::UInt32, 3::UInt32) AS x, (4::UInt32, 5::UInt32, 6::UInt32) AS y, dotProduct(x, y) AS res, toTypeName(res); +SELECT (1::UInt64, 2::UInt64, 3::UInt64) AS x, (4::UInt64, 5::UInt64, 6::UInt64) AS y, dotProduct(x, y) AS res, toTypeName(res); +SELECT (-1::Int8, -2::Int8, -3::Int8) AS x, (4::Int8, 5::Int8, 6::Int8) AS y, dotProduct(x, y) AS res, toTypeName(res); +SELECT (-1::Int16, -2::Int16, -3::Int16) AS x, (4::Int16, 5::Int16, 6::Int16) AS y, dotProduct(x, y) AS res, toTypeName(res); +SELECT (-1::Int32, -2::Int32, -3::Int32) AS x, (4::Int32, 5::Int32, 6::Int32) AS y, dotProduct(x, y) AS res, toTypeName(res); +SELECT (-1::Int64, -2::Int64, -3::Int64) AS x, (4::Int64, 5::Int64, 6::Int64) AS y, dotProduct(x, y) AS res, toTypeName(res); +SELECT (1::Float32, 2::Float32, 3::Float32) AS x, (4::Float32, 5::Float32, 6::Float32) AS y, dotProduct(x, y) AS res, toTypeName(res); +SELECT (1::Float64, 2::Float64, 3::Float64) AS x, (4::Float64, 5::Float64, 6::Float64) AS y, dotProduct(x, y) AS res, toTypeName(res); + +SELECT '-- Non-const argument'; +SELECT materialize([1::UInt8, 2::UInt8, 3::UInt8]) AS x, [4::UInt8, 5::UInt8, 6::UInt8] AS y, dotProduct(x, y) AS res, toTypeName(res); + +SELECT ' -- Array with mixed element arguments types (result type is the supertype)'; +SELECT [1::UInt16, 2::UInt8, 3::Float32] AS x, [4::Int16, 5::Float32, 6::UInt8] AS y, dotProduct(x, y) AS res, toTypeName(res); + +SELECT ' -- Tuple with mixed element arguments types'; +SELECT (1::UInt16, 2::UInt8, 3::Float32) AS x, (4::Int16, 5::Float32, 6::UInt8) AS y, dotProduct(x, y) AS res, toTypeName(res); + +SELECT '-- Aliases'; +SELECT scalarProduct([1, 2, 3], [4, 5, 6]); +SELECT scalarProduct((1, 2, 3), (4, 5, 6)); +SELECT arrayDotProduct([1, 2, 3], [4, 5, 6]); -- actually no alias but the internal function for arrays diff --git a/tests/queries/0_stateless/02708_dot_product.reference b/tests/queries/0_stateless/02708_dot_product.reference deleted file mode 100644 index 45e53871aa2..00000000000 --- a/tests/queries/0_stateless/02708_dot_product.reference +++ /dev/null @@ -1,14 +0,0 @@ -3881.304 -3881.304 -3881.304 -376.5 -230 -0 -0 -Float64 -Float32 -Float64 -Float64 -UInt16 -UInt64 -Int64 diff --git a/tests/queries/0_stateless/02708_dot_product.sql b/tests/queries/0_stateless/02708_dot_product.sql deleted file mode 100644 index e94cb577bf4..00000000000 --- a/tests/queries/0_stateless/02708_dot_product.sql +++ /dev/null @@ -1,55 +0,0 @@ -SELECT dotProduct([12, 2.22, 302], [1.32, 231.2, 11.1]); - -SELECT scalarProduct([12, 2.22, 302], [1.32, 231.2, 11.1]); - -SELECT arrayDotProduct([12, 2.22, 302], [1.32, 231.2, 11.1]); - -SELECT dotProduct([1.3, 2, 3, 4, 5], [222, 12, 5.3, 2, 8]); - -SELECT dotProduct([1, 1, 1, 1, 1], [222, 12, 0, -12, 8]); - -SELECT round(dotProduct([12345678901234567], [1]) - dotProduct(tuple(12345678901234567), tuple(1)), 2); - -SELECT round(dotProduct([-1, 2, 3.002], [2, 3.4, 4]) - dotProduct((-1, 2, 3.002), (2, 3.4, 4)), 2); - -DROP TABLE IF EXISTS product_fp64_fp64; -CREATE TABLE product_fp64_fp64 (x Array(Float64), y Array(Float64)) engine = MergeTree() order by x; -INSERT INTO TABLE product_fp64_fp64 (x, y) values ([1, 2], [3, 4]); -SELECT toTypeName(dotProduct(x, y)) from product_fp64_fp64; -DROP TABLE product_fp64_fp64; - -DROP TABLE IF EXISTS product_fp32_fp32; -CREATE TABLE product_fp32_fp32 (x Array(Float32), y Array(Float32)) engine = MergeTree() order by x; -INSERT INTO TABLE product_fp32_fp32 (x, y) values ([1, 2], [3, 4]); -SELECT toTypeName(dotProduct(x, y)) from product_fp32_fp32; -DROP TABLE product_fp32_fp32; - -DROP TABLE IF EXISTS product_fp32_fp64; -CREATE TABLE product_fp32_fp64 (x Array(Float32), y Array(Float64)) engine = MergeTree() order by x; -INSERT INTO TABLE product_fp32_fp64 (x, y) values ([1, 2], [3, 4]); -SELECT toTypeName(dotProduct(x, y)) from product_fp32_fp64; -DROP TABLE product_fp32_fp64; - -DROP TABLE IF EXISTS product_uint8_fp64; -CREATE TABLE product_uint8_fp64 (x Array(UInt8), y Array(Float64)) engine = MergeTree() order by x; -INSERT INTO TABLE product_uint8_fp64 (x, y) values ([1, 2], [3, 4]); -SELECT toTypeName(dotProduct(x, y)) from product_uint8_fp64; -DROP TABLE product_uint8_fp64; - -DROP TABLE IF EXISTS product_uint8_uint8; -CREATE TABLE product_uint8_uint8 (x Array(UInt8), y Array(UInt8)) engine = MergeTree() order by x; -INSERT INTO TABLE product_uint8_uint8 (x, y) values ([1, 2], [3, 4]); -SELECT toTypeName(dotProduct(x, y)) from product_uint8_uint8; -DROP TABLE product_uint8_uint8; - -DROP TABLE IF EXISTS product_uint64_uint64; -CREATE TABLE product_uint64_uint64 (x Array(UInt64), y Array(UInt64)) engine = MergeTree() order by x; -INSERT INTO TABLE product_uint64_uint64 (x, y) values ([1, 2], [3, 4]); -SELECT toTypeName(dotProduct(x, y)) from product_uint64_uint64; -DROP TABLE product_uint64_uint64; - -DROP TABLE IF EXISTS product_int32_uint64; -CREATE TABLE product_int32_uint64 (x Array(Int32), y Array(UInt64)) engine = MergeTree() order by x; -INSERT INTO TABLE product_int32_uint64 (x, y) values ([1, 2], [3, 4]); -SELECT toTypeName(dotProduct(x, y)) from product_int32_uint64; -DROP TABLE product_int32_uint64; diff --git a/tests/queries/0_stateless/02714_read_bytes_aggregateFunction.sql b/tests/queries/0_stateless/02714_read_bytes_aggregateFunction.sql index 26bc9ebe62b..1c70a77c4d1 100644 --- a/tests/queries/0_stateless/02714_read_bytes_aggregateFunction.sql +++ b/tests/queries/0_stateless/02714_read_bytes_aggregateFunction.sql @@ -42,12 +42,12 @@ ORDER BY event_time_microseconds; -- 1 * 8 + AggregateFunction(argMax, String, DateTime) -- -- Size of AggregateFunction(argMax, String, DateTime): --- SingleValueDataString() + SingleValueDataFixed(DateTime) --- SingleValueDataString = 64B for small strings, 64B + string size + 1 for larger --- SingleValueDataFixed(DateTime) = 1 + 4. With padding = 8 --- SingleValueDataString Total: 72B +-- 1 Base class + 1 specific/value class: +-- Base class: MAX(sizeOf(SingleValueDataFixed), sizeOf(SingleValueDataString), sizeOf(SingleValueDataGeneric)) = 64 +-- Specific class: SingleValueDataFixed(DateTime) = 4 + 1. With padding = 8 +-- Total: 8 + 64 + 8 = 80 -- --- ColumnAggregateFunction total: 8 + 72 = 80 +-- ColumnAggregateFunction total: 8 + 2 * 64 = 136 SELECT 'AggregateFunction(argMax, String, DateTime)', read_rows, read_bytes diff --git a/tests/queries/0_stateless/02722_database_filesystem.reference b/tests/queries/0_stateless/02722_database_filesystem.reference index 17f84cfc49c..2aa7e74befb 100644 --- a/tests/queries/0_stateless/02722_database_filesystem.reference +++ b/tests/queries/0_stateless/02722_database_filesystem.reference @@ -23,4 +23,3 @@ OK OK OK OK -OK diff --git a/tests/queries/0_stateless/02722_database_filesystem.sh b/tests/queries/0_stateless/02722_database_filesystem.sh index f3af7abcbb3..0c75c15fc69 100755 --- a/tests/queries/0_stateless/02722_database_filesystem.sh +++ b/tests/queries/0_stateless/02722_database_filesystem.sh @@ -81,8 +81,6 @@ CREATE DATABASE test2 ENGINE = Filesystem('relative_unknown_dir'); # FILE_DOESNT_EXIST: unknown file ${CLICKHOUSE_CLIENT} --query "SELECT COUNT(*) FROM test1.\`tmp2.csv\`;" 2>&1 | tr '\n' ' ' | grep -oF -e "UNKNOWN_TABLE" -e "FILE_DOESNT_EXIST" > /dev/null && echo "OK" || echo 'FAIL' ||: -# BAD_ARGUMENTS: Cannot determine the file format by it's extension -${CLICKHOUSE_CLIENT} --query "SELECT COUNT(*) FROM test1.\`${unique_name}/tmp.myext\`;" 2>&1 | tr '\n' ' ' | grep -oF -e "UNKNOWN_TABLE" -e "BAD_ARGUMENTS" > /dev/null && echo "OK" || echo 'FAIL' ||: # Clean ${CLICKHOUSE_CLIENT} --query "DROP DATABASE test1;" rm -rd $tmp_dir diff --git a/tests/queries/0_stateless/02724_database_s3.sh b/tests/queries/0_stateless/02724_database_s3.sh index 13b627c0342..80b47282146 100755 --- a/tests/queries/0_stateless/02724_database_s3.sh +++ b/tests/queries/0_stateless/02724_database_s3.sh @@ -46,7 +46,7 @@ DROP DATABASE IF EXISTS test3; CREATE DATABASE test3 ENGINE = S3; USE test3; SELECT * FROM \"http://localhost:11111/test/a.myext\" -""" 2>&1 | tr '\n' ' ' | grep -oF -e "UNKNOWN_TABLE" -e "BAD_ARGUMENTS" > /dev/null && echo "OK" || echo 'FAIL' ||: +""" 2>&1 | tr '\n' ' ' | grep -oF -e "UNKNOWN_TABLE" -e "S3_ERROR" > /dev/null && echo "OK" || echo 'FAIL' ||: ${CLICKHOUSE_CLIENT} --multiline --multiquery -q """ USE test3; diff --git a/tests/queries/0_stateless/02725_agg_projection_resprect_PK.reference b/tests/queries/0_stateless/02725_agg_projection_resprect_PK.reference index e6b95502e1e..80bff2c12b3 100644 --- a/tests/queries/0_stateless/02725_agg_projection_resprect_PK.reference +++ b/tests/queries/0_stateless/02725_agg_projection_resprect_PK.reference @@ -1,2 +1,2 @@ ReadFromMergeTree (p1) - Granules: 1/12 + Granules: 1/12 diff --git a/tests/queries/0_stateless/02725_database_hdfs.sh b/tests/queries/0_stateless/02725_database_hdfs.sh index b4e081f6de0..d62f928e947 100755 --- a/tests/queries/0_stateless/02725_database_hdfs.sh +++ b/tests/queries/0_stateless/02725_database_hdfs.sh @@ -58,7 +58,7 @@ SELECT * FROM \"abacaba/file.tsv\" """ 2>&1 | tr '\n' ' ' | grep -oF "CANNOT_EXTRACT_TABLE_STRUCTURE" ${CLICKHOUSE_CLIENT} -q "SELECT * FROM test_hdfs_4.\`http://localhost:11111/test/a.tsv\`" 2>&1 | tr '\n' ' ' | grep -oF -e "UNKNOWN_TABLE" -e "BAD_ARGUMENTS" > /dev/null && echo "OK" || echo 'FAIL' ||: -${CLICKHOUSE_CLIENT} --query "SELECT * FROM test_hdfs_4.\`hdfs://localhost:12222/file.myext\`" 2>&1 | tr '\n' ' ' | grep -oF -e "UNKNOWN_TABLE" -e "BAD_ARGUMENTS" > /dev/null && echo "OK" || echo 'FAIL' ||: +${CLICKHOUSE_CLIENT} --query "SELECT * FROM test_hdfs_4.\`hdfs://localhost:12222/file.myext\`" 2>&1 | tr '\n' ' ' | grep -oF -e "UNKNOWN_TABLE" -e "CANNOT_EXTRACT_TABLE_STRUCTURE" > /dev/null && echo "OK" || echo 'FAIL' ||: ${CLICKHOUSE_CLIENT} --query "SELECT * FROM test_hdfs_4.\`hdfs://localhost:12222/test_02725_3.tsv\`" 2>&1 | tr '\n' ' ' | grep -oF -e "UNKNOWN_TABLE" -e "CANNOT_EXTRACT_TABLE_STRUCTURE" > /dev/null && echo "OK" || echo 'FAIL' ||: ${CLICKHOUSE_CLIENT} --query "SELECT * FROM test_hdfs_4.\`hdfs://localhost:12222\`" 2>&1 | tr '\n' ' ' | grep -oF -e "UNKNOWN_TABLE" -e "BAD_ARGUMENTS" > /dev/null && echo "OK" || echo 'FAIL' ||: diff --git a/tests/queries/0_stateless/02732_transform_fuzz.sql b/tests/queries/0_stateless/02732_transform_fuzz.sql index c2918d4da81..872cf3a6599 100644 --- a/tests/queries/0_stateless/02732_transform_fuzz.sql +++ b/tests/queries/0_stateless/02732_transform_fuzz.sql @@ -1 +1 @@ -SELECT caseWithExpr(arrayReduce(NULL, []), []); -- { serverError BAD_ARGUMENTS } +SELECT caseWithExpr(arrayReduce(NULL, []), []); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } diff --git a/tests/queries/0_stateless/02735_parquet_encoder.sql b/tests/queries/0_stateless/02735_parquet_encoder.sql index 19125abf8da..fe45a2a317d 100644 --- a/tests/queries/0_stateless/02735_parquet_encoder.sql +++ b/tests/queries/0_stateless/02735_parquet_encoder.sql @@ -6,6 +6,7 @@ set output_format_parquet_data_page_size = 800; set output_format_parquet_batch_size = 100; set output_format_parquet_row_group_size_bytes = 1000000000; set engine_file_truncate_on_insert=1; +set allow_suspicious_low_cardinality_types=1; -- Write random data to parquet file, then read from it and check that it matches what we wrote. -- Do this for all kinds of data types: primitive, Nullable(primitive), Array(primitive), diff --git a/tests/queries/0_stateless/02763_row_policy_storage_merge_alias.sql.j2 b/tests/queries/0_stateless/02763_row_policy_storage_merge_alias.sql.j2 index bdd456951dd..99ac89c4eb4 100644 --- a/tests/queries/0_stateless/02763_row_policy_storage_merge_alias.sql.j2 +++ b/tests/queries/0_stateless/02763_row_policy_storage_merge_alias.sql.j2 @@ -12,8 +12,6 @@ CREATE TABLE 02763_a_merge (x UInt8, y UInt64, z UInt64) ENGINE = Merge(currentD {% for prew in [0 , 1] -%} - - SELECT 'x, y, z FROM 02763_a_merge'; SELECT x, y, z FROM 02763_a_merge ORDER BY x SETTINGS optimize_move_to_prewhere= {{prew}}; SELECT '* FROM 02763_a_merge'; diff --git a/tests/queries/0_stateless/02765_queries_with_subqueries_profile_events.sh b/tests/queries/0_stateless/02765_queries_with_subqueries_profile_events.sh index cded0b28409..84031ad9081 100755 --- a/tests/queries/0_stateless/02765_queries_with_subqueries_profile_events.sh +++ b/tests/queries/0_stateless/02765_queries_with_subqueries_profile_events.sh @@ -11,7 +11,7 @@ $CLICKHOUSE_CLIENT -n -q " CREATE TABLE input (key Int) Engine=Null; CREATE TABLE output AS input Engine=Null; - CREATE MATERIALIZED VIEW mv TO output AS SELECT * FROM input; + CREATE MATERIALIZED VIEW mv TO output SQL SECURITY NONE AS SELECT * FROM input; " for allow_experimental_analyzer in 0 1; do diff --git a/tests/queries/0_stateless/02771_ignore_data_skipping_indices.reference b/tests/queries/0_stateless/02771_ignore_data_skipping_indices.reference index 33df18c8801..e23e3094ca3 100644 --- a/tests/queries/0_stateless/02771_ignore_data_skipping_indices.reference +++ b/tests/queries/0_stateless/02771_ignore_data_skipping_indices.reference @@ -1,43 +1,43 @@ 1 2 3 1 2 3 1 2 3 - ReadFromMergeTree (default.data_02771) - Indexes: - PrimaryKey - Condition: true - Parts: 1/1 - Granules: 1/1 - Skip - Name: x_idx - Description: minmax GRANULARITY 1 - Parts: 0/1 - Granules: 0/1 - Skip - Name: y_idx - Description: minmax GRANULARITY 1 - Parts: 0/0 - Granules: 0/0 - Skip - Name: xy_idx - Description: minmax GRANULARITY 1 - Parts: 0/0 - Granules: 0/0 - ReadFromMergeTree (default.data_02771) - Indexes: - PrimaryKey - Condition: true - Parts: 1/1 - Granules: 1/1 - Skip - Name: x_idx - Description: minmax GRANULARITY 1 - Parts: 0/1 - Granules: 0/1 - Skip - Name: y_idx - Description: minmax GRANULARITY 1 - Parts: 0/0 - Granules: 0/0 + ReadFromMergeTree (default.data_02771) + Indexes: + PrimaryKey + Condition: true + Parts: 1/1 + Granules: 1/1 + Skip + Name: x_idx + Description: minmax GRANULARITY 1 + Parts: 0/1 + Granules: 0/1 + Skip + Name: y_idx + Description: minmax GRANULARITY 1 + Parts: 0/0 + Granules: 0/0 + Skip + Name: xy_idx + Description: minmax GRANULARITY 1 + Parts: 0/0 + Granules: 0/0 + ReadFromMergeTree (default.data_02771) + Indexes: + PrimaryKey + Condition: true + Parts: 1/1 + Granules: 1/1 + Skip + Name: x_idx + Description: minmax GRANULARITY 1 + Parts: 0/1 + Granules: 0/1 + Skip + Name: y_idx + Description: minmax GRANULARITY 1 + Parts: 0/0 + Granules: 0/0 ReadFromMergeTree (default.data_02771) Indexes: PrimaryKey diff --git a/tests/queries/0_stateless/02775_show_columns_called_from_mysql.expect b/tests/queries/0_stateless/02775_show_columns_called_from_mysql.expect index 8ba5774820e..3798acf2a93 100755 --- a/tests/queries/0_stateless/02775_show_columns_called_from_mysql.expect +++ b/tests/queries/0_stateless/02775_show_columns_called_from_mysql.expect @@ -123,7 +123,7 @@ expect -- "| dt_tz2 | DATETIME | NO | | NULL | | expect -- "| enm | TEXT | NO | | NULL | |" expect -- "| f32 | FLOAT | NO | | NULL | |" expect -- "| f64 | DOUBLE | NO | | NULL | |" -expect -- "| fs | BLOB | NO | | NULL | |" +expect -- "| fs | TEXT | NO | | NULL | |" expect -- "| i128 | TEXT | NO | | NULL | |" expect -- "| i16 | SMALLINT | NO | | NULL | |" expect -- "| i256 | TEXT | NO | | NULL | |" @@ -132,74 +132,8 @@ expect -- "| i64 | BIGINT | NO | | NULL | | expect -- "| i8 | TINYINT | NO | | NULL | |" expect -- "| ip4 | TEXT | NO | | NULL | |" expect -- "| ip6 | TEXT | NO | | NULL | |" -expect -- "| lfs | BLOB | NO | | NULL | |" -expect -- "| lnfs | BLOB | YES | | NULL | |" -expect -- "| lns | BLOB | YES | | NULL | |" -expect -- "| ls | BLOB | NO | | NULL | |" -expect -- "| m | JSON | NO | | NULL | |" -expect -- "| m_complex | JSON | NO | | NULL | |" -expect -- "| mpg | TEXT | NO | | NULL | |" -expect -- "| ndt64 | DATETIME | YES | | NULL | |" -expect -- "| ndt64_tz | DATETIME | YES | | NULL | |" -expect -- "| nested.col1 | TEXT | NO | | NULL | |" -expect -- "| nested.col2 | TEXT | NO | | NULL | |" -expect -- "| nfs | BLOB | YES | | NULL | |" -expect -- "| ns | BLOB | YES | | NULL | |" -expect -- "| o | JSON | NO | | NULL | |" -expect -- "| p | TEXT | NO | | NULL | |" -expect -- "| pg | TEXT | NO | | NULL | |" -expect -- "| r | TEXT | NO | | NULL | |" -expect -- "| s | BLOB | NO | | NULL | |" -expect -- "| sagg | TEXT | NO | | NULL | |" -expect -- "| t | JSON | NO | | NULL | |" -expect -- "| ui128 | TEXT | NO | | NULL | |" -expect -- "| ui16 | SMALLINT UNSIGNED | NO | | NULL | |" -expect -- "| ui256 | TEXT | NO | | NULL | |" -expect -- "| ui32 | INTEGER UNSIGNED | NO | | NULL | |" -expect -- "| ui64 | BIGINT UNSIGNED | NO | | NULL | |" -expect -- "| ui8 | TINYINT UNSIGNED | NO | | NULL | |" -expect -- "| uuid | CHAR | NO | | NULL | |" -expect -- "+---------------+-------------------+------+------+---------+-------+" - -send -- "SHOW COLUMNS FROM tab SETTINGS mysql_map_string_to_text_in_show_columns=1;\r" -expect -- "+---------------+-------------------+------+------+---------+-------+" -expect -- "| field | type | null | key | default | extra |" -expect -- "+---------------+-------------------+------+------+---------+-------+" -expect -- "| a | TEXT | NO | | NULL | |" -expect -- "| agg | TEXT | NO | | NULL | |" -expect -- "| b | TINYINT | NO | | NULL | |" -expect -- "| d | DATE | NO | | NULL | |" -expect -- "| d32 | DATE | NO | | NULL | |" -expect -- "| dec128 | DECIMAL(38, 2) | NO | | NULL | |" -expect -- "| dec128_native | DECIMAL(35, 30) | NO | | NULL | |" -expect -- "| dec128_text | TEXT | NO | | NULL | |" -expect -- "| dec256 | TEXT | NO | | NULL | |" -expect -- "| dec256_native | DECIMAL(65, 2) | NO | | NULL | |" -expect -- "| dec256_text | TEXT | NO | | NULL | |" -expect -- "| dec32 | DECIMAL(9, 2) | NO | | NULL | |" -expect -- "| dec64 | DECIMAL(18, 2) | NO | | NULL | |" -expect -- "| dt | DATETIME | NO | | NULL | |" -expect -- "| dt64 | DATETIME | NO | | NULL | |" -expect -- "| dt64_3_tz1 | DATETIME | NO | | NULL | |" -expect -- "| dt64_3_tz2 | DATETIME | NO | | NULL | |" -expect -- "| dt64_6 | DATETIME | NO | | NULL | |" -expect -- "| dt64_9 | DATETIME | NO | | NULL | |" -expect -- "| dt_tz1 | DATETIME | NO | | NULL | |" -expect -- "| dt_tz2 | DATETIME | NO | | NULL | |" -expect -- "| enm | TEXT | NO | | NULL | |" -expect -- "| f32 | FLOAT | NO | | NULL | |" -expect -- "| f64 | DOUBLE | NO | | NULL | |" -expect -- "| fs | BLOB | NO | | NULL | |" -expect -- "| i128 | TEXT | NO | | NULL | |" -expect -- "| i16 | SMALLINT | NO | | NULL | |" -expect -- "| i256 | TEXT | NO | | NULL | |" -expect -- "| i32 | INTEGER | NO | | NULL | |" -expect -- "| i64 | BIGINT | NO | | NULL | |" -expect -- "| i8 | TINYINT | NO | | NULL | |" -expect -- "| ip4 | TEXT | NO | | NULL | |" -expect -- "| ip6 | TEXT | NO | | NULL | |" -expect -- "| lfs | BLOB | NO | | NULL | |" -expect -- "| lnfs | BLOB | YES | | NULL | |" +expect -- "| lfs | TEXT | NO | | NULL | |" +expect -- "| lnfs | TEXT | YES | | NULL | |" expect -- "| lns | TEXT | YES | | NULL | |" expect -- "| ls | TEXT | NO | | NULL | |" expect -- "| m | JSON | NO | | NULL | |" @@ -209,7 +143,7 @@ expect -- "| ndt64 | DATETIME | YES | | NULL | | expect -- "| ndt64_tz | DATETIME | YES | | NULL | |" expect -- "| nested.col1 | TEXT | NO | | NULL | |" expect -- "| nested.col2 | TEXT | NO | | NULL | |" -expect -- "| nfs | BLOB | YES | | NULL | |" +expect -- "| nfs | TEXT | YES | | NULL | |" expect -- "| ns | TEXT | YES | | NULL | |" expect -- "| o | JSON | NO | | NULL | |" expect -- "| p | TEXT | NO | | NULL | |" @@ -227,7 +161,7 @@ expect -- "| ui8 | TINYINT UNSIGNED | NO | | NULL | | expect -- "| uuid | CHAR | NO | | NULL | |" expect -- "+---------------+-------------------+------+------+---------+-------+" -send -- "SHOW COLUMNS FROM tab SETTINGS mysql_map_fixed_string_to_text_in_show_columns=1;\r" +send -- "SHOW COLUMNS FROM tab SETTINGS mysql_map_string_to_text_in_show_columns=0;\r" expect -- "+---------------+-------------------+------+------+---------+-------+" expect -- "| field | type | null | key | default | extra |" expect -- "+---------------+-------------------+------+------+---------+-------+" @@ -293,6 +227,73 @@ expect -- "| ui8 | TINYINT UNSIGNED | NO | | NULL | | expect -- "| uuid | CHAR | NO | | NULL | |" expect -- "+---------------+-------------------+------+------+---------+-------+" +send -- "SHOW COLUMNS FROM tab SETTINGS mysql_map_fixed_string_to_text_in_show_columns=0;\r" +expect -- "+---------------+-------------------+------+------+---------+-------+" +expect -- "| field | type | null | key | default | extra |" +expect -- "+---------------+-------------------+------+------+---------+-------+" +expect -- "| a | TEXT | NO | | NULL | |" +expect -- "| agg | TEXT | NO | | NULL | |" +expect -- "| b | TINYINT | NO | | NULL | |" +expect -- "| d | DATE | NO | | NULL | |" +expect -- "| d32 | DATE | NO | | NULL | |" +expect -- "| dec128 | DECIMAL(38, 2) | NO | | NULL | |" +expect -- "| dec128_native | DECIMAL(35, 30) | NO | | NULL | |" +expect -- "| dec128_text | TEXT | NO | | NULL | |" +expect -- "| dec256 | TEXT | NO | | NULL | |" +expect -- "| dec256_native | DECIMAL(65, 2) | NO | | NULL | |" +expect -- "| dec256_text | TEXT | NO | | NULL | |" +expect -- "| dec32 | DECIMAL(9, 2) | NO | | NULL | |" +expect -- "| dec64 | DECIMAL(18, 2) | NO | | NULL | |" +expect -- "| dt | DATETIME | NO | | NULL | |" +expect -- "| dt64 | DATETIME | NO | | NULL | |" +expect -- "| dt64_3_tz1 | DATETIME | NO | | NULL | |" +expect -- "| dt64_3_tz2 | DATETIME | NO | | NULL | |" +expect -- "| dt64_6 | DATETIME | NO | | NULL | |" +expect -- "| dt64_9 | DATETIME | NO | | NULL | |" +expect -- "| dt_tz1 | DATETIME | NO | | NULL | |" +expect -- "| dt_tz2 | DATETIME | NO | | NULL | |" +expect -- "| enm | TEXT | NO | | NULL | |" +expect -- "| f32 | FLOAT | NO | | NULL | |" +expect -- "| f64 | DOUBLE | NO | | NULL | |" +expect -- "| fs | BLOB | NO | | NULL | |" +expect -- "| i128 | TEXT | NO | | NULL | |" +expect -- "| i16 | SMALLINT | NO | | NULL | |" +expect -- "| i256 | TEXT | NO | | NULL | |" +expect -- "| i32 | INTEGER | NO | | NULL | |" +expect -- "| i64 | BIGINT | NO | | NULL | |" +expect -- "| i8 | TINYINT | NO | | NULL | |" +expect -- "| ip4 | TEXT | NO | | NULL | |" +expect -- "| ip6 | TEXT | NO | | NULL | |" +expect -- "| lfs | BLOB | NO | | NULL | |" +expect -- "| lnfs | BLOB | YES | | NULL | |" +expect -- "| lns | TEXT | YES | | NULL | |" +expect -- "| ls | TEXT | NO | | NULL | |" +expect -- "| m | JSON | NO | | NULL | |" +expect -- "| m_complex | JSON | NO | | NULL | |" +expect -- "| mpg | TEXT | NO | | NULL | |" +expect -- "| ndt64 | DATETIME | YES | | NULL | |" +expect -- "| ndt64_tz | DATETIME | YES | | NULL | |" +expect -- "| nested.col1 | TEXT | NO | | NULL | |" +expect -- "| nested.col2 | TEXT | NO | | NULL | |" +expect -- "| nfs | BLOB | YES | | NULL | |" +expect -- "| ns | TEXT | YES | | NULL | |" +expect -- "| o | JSON | NO | | NULL | |" +expect -- "| p | TEXT | NO | | NULL | |" +expect -- "| pg | TEXT | NO | | NULL | |" +expect -- "| r | TEXT | NO | | NULL | |" +expect -- "| s | TEXT | NO | | NULL | |" +expect -- "| sagg | TEXT | NO | | NULL | |" +expect -- "| t | JSON | NO | | NULL | |" +expect -- "| ui128 | TEXT | NO | | NULL | |" +expect -- "| ui16 | SMALLINT UNSIGNED | NO | | NULL | |" +expect -- "| ui256 | TEXT | NO | | NULL | |" +expect -- "| ui32 | INTEGER UNSIGNED | NO | | NULL | |" +expect -- "| ui64 | BIGINT UNSIGNED | NO | | NULL | |" +expect -- "| ui8 | TINYINT UNSIGNED | NO | | NULL | |" +expect -- "| uuid | CHAR | NO | | NULL | |" +expect -- "+---------------+-------------------+------+------+---------+-------+" + + send -- "DROP TABLE tab;" send -- "quit;\r" diff --git a/tests/queries/0_stateless/02783_max_bytes_to_read_in_schema_inference.sql b/tests/queries/0_stateless/02783_max_bytes_to_read_in_schema_inference.sql index b4165e8e80a..ef0381df1a6 100644 --- a/tests/queries/0_stateless/02783_max_bytes_to_read_in_schema_inference.sql +++ b/tests/queries/0_stateless/02783_max_bytes_to_read_in_schema_inference.sql @@ -1,5 +1,5 @@ set input_format_max_rows_to_read_for_schema_inference=2; set input_format_json_infer_incomplete_types_as_strings=0; -desc format('JSONEachRow', '{"a" : null}, {"a" : 42}') settings input_format_max_bytes_to_read_for_schema_inference=10; -- {serverError ONLY_NULLS_WHILE_READING_SCHEMA} +desc format('JSONEachRow', '{"a" : null}, {"a" : 42}') settings input_format_max_bytes_to_read_for_schema_inference=10; -- {serverError CANNOT_EXTRACT_TABLE_STRUCTURE} desc format('JSONEachRow', '{"a" : null}, {"a" : 42}') settings input_format_max_bytes_to_read_for_schema_inference=20; diff --git a/tests/queries/0_stateless/02783_parsedatetimebesteffort_syslog.reference b/tests/queries/0_stateless/02783_parsedatetimebesteffort_syslog.reference index 1340b3affe3..ef9d076449a 100644 --- a/tests/queries/0_stateless/02783_parsedatetimebesteffort_syslog.reference +++ b/tests/queries/0_stateless/02783_parsedatetimebesteffort_syslog.reference @@ -4,34 +4,34 @@ The argument is before the reference time point ─────────────────────────────────────────────── Row 1: ────── -syslog_arg: Jun 30 23:58:30 -res: 2023-06-30 23:58:30 -res_null: 2023-06-30 23:58:30 -res_zero: 2023-06-30 23:58:30 -res_us: 2023-06-30 23:58:30 -res_us_null: 2023-06-30 23:58:30 -res_us_zero: 2023-06-30 23:58:30 -res64: 2023-06-30 23:58:30.000 -res64_null: 2023-06-30 23:58:30.000 -res64_zero: 2023-06-30 23:58:30.000 -res64_us: 2023-06-30 23:58:30.000 -res64_us_null: 2023-06-30 23:58:30.000 -res64_us_zero: 2023-06-30 23:58:30.000 +syslog_arg: Jun 29 23:59:30 +res: 2023-06-29 23:59:30 +res_null: 2023-06-29 23:59:30 +res_zero: 2023-06-29 23:59:30 +res_us: 2023-06-29 23:59:30 +res_us_null: 2023-06-29 23:59:30 +res_us_zero: 2023-06-29 23:59:30 +res64: 2023-06-29 23:59:30.000 +res64_null: 2023-06-29 23:59:30.000 +res64_zero: 2023-06-29 23:59:30.000 +res64_us: 2023-06-29 23:59:30.000 +res64_us_null: 2023-06-29 23:59:30.000 +res64_us_zero: 2023-06-29 23:59:30.000 ────────────────────────────────────────────── The argument is after the reference time point ────────────────────────────────────────────── Row 1: ────── -syslog_arg: Jul 1 00:00:30 -res: 2022-07-01 00:00:30 -res_null: 2022-07-01 00:00:30 -res_zero: 2022-07-01 00:00:30 -res_us: 2022-07-01 00:00:30 -res_us_null: 2022-07-01 00:00:30 -res_us_zero: 2022-07-01 00:00:30 -res64: 2022-07-01 00:00:30.000 -res64_null: 2022-07-01 00:00:30.000 -res64_zero: 2022-07-01 00:00:30.000 -res64_us: 2022-07-01 00:00:30.000 -res64_us_null: 2022-07-01 00:00:30.000 -res64_us_zero: 2022-07-01 00:00:30.000 +syslog_arg: Jul 1 23:59:30 +res: 2022-06-30 23:59:30 +res_null: 2022-06-30 23:59:30 +res_zero: 2022-06-30 23:59:30 +res_us: 2022-06-30 23:59:30 +res_us_null: 2022-06-30 23:59:30 +res_us_zero: 2022-06-30 23:59:30 +res64: 2022-06-30 23:59:30.000 +res64_null: 2022-06-30 23:59:30.000 +res64_zero: 2022-06-30 23:59:30.000 +res64_us: 2022-06-30 23:59:30.000 +res64_us_null: 2022-06-30 23:59:30.000 +res64_us_zero: 2022-06-30 23:59:30.000 diff --git a/tests/queries/0_stateless/02783_parsedatetimebesteffort_syslog.sql b/tests/queries/0_stateless/02783_parsedatetimebesteffort_syslog.sql index c67722393ab..ecaec9f99bf 100644 --- a/tests/queries/0_stateless/02783_parsedatetimebesteffort_syslog.sql +++ b/tests/queries/0_stateless/02783_parsedatetimebesteffort_syslog.sql @@ -8,7 +8,7 @@ SELECT '──────────────────────── WITH toDateTime('2023-06-30 23:59:30') AS dt_ref, now() AS dt_now, - date_sub(MINUTE, 1, dt_now) as dt_before, + date_sub(DAY, 1, dt_now) as dt_before, dateDiff(SECOND, dt_ref, dt_now) AS time_shift, formatDateTime(dt_before, '%b %e %T') AS syslog_before SELECT @@ -34,7 +34,7 @@ SELECT '──────────────────────── WITH toDateTime('2023-06-30 23:59:30') AS dt_ref, now() AS dt_now, - date_add(MINUTE, 1, dt_now) as dt_after, + date_add(DAY, 1, dt_now) as dt_after, dateDiff(SECOND, dt_ref, dt_now) AS time_shift, formatDateTime(dt_after, '%b %e %T') AS syslog_after SELECT diff --git a/tests/queries/0_stateless/02784_parallel_replicas_automatic_decision_join.sh b/tests/queries/0_stateless/02784_parallel_replicas_automatic_decision_join.sh index ef3e6000903..801cd22b79f 100755 --- a/tests/queries/0_stateless/02784_parallel_replicas_automatic_decision_join.sh +++ b/tests/queries/0_stateless/02784_parallel_replicas_automatic_decision_join.sh @@ -1,5 +1,6 @@ #!/usr/bin/env bash -## Note: The analyzer doesn't support JOIN with parallel replicas yet +# Tags: no-tsan, no-asan, no-msan +# It's not clear why distributed aggregation is much slower with sanitizers (https://github.com/ClickHouse/ClickHouse/issues/60625) CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/02785_date_predicate_optimizations_ast_query_tree_rewrite.reference b/tests/queries/0_stateless/02785_date_predicate_optimizations_ast_query_tree_rewrite.reference index 63658890119..fca48238778 100644 --- a/tests/queries/0_stateless/02785_date_predicate_optimizations_ast_query_tree_rewrite.reference +++ b/tests/queries/0_stateless/02785_date_predicate_optimizations_ast_query_tree_rewrite.reference @@ -24,21 +24,21 @@ QUERY id: 0 FUNCTION id: 12, function_name: less, function_type: ordinary, result_type: UInt8 ARGUMENTS LIST id: 13, nodes: 2 - COLUMN id: 14, column_name: date1, result_type: Date, source_id: 3 - CONSTANT id: 15, constant_value: \'1994-01-01\', constant_value_type: String - FUNCTION id: 16, function_name: and, function_type: ordinary, result_type: UInt8 + COLUMN id: 10, column_name: date1, result_type: Date, source_id: 3 + CONSTANT id: 14, constant_value: \'1994-01-01\', constant_value_type: String + FUNCTION id: 15, function_name: and, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 17, nodes: 2 - FUNCTION id: 18, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + LIST id: 16, nodes: 2 + FUNCTION id: 17, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 19, nodes: 2 - COLUMN id: 20, column_name: id, result_type: UInt32, source_id: 3 - CONSTANT id: 21, constant_value: UInt64_1, constant_value_type: UInt8 - FUNCTION id: 22, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 + LIST id: 18, nodes: 2 + COLUMN id: 19, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 20, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 21, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 23, nodes: 2 - COLUMN id: 20, column_name: id, result_type: UInt32, source_id: 3 - CONSTANT id: 24, constant_value: UInt64_3, constant_value_type: UInt8 + LIST id: 22, nodes: 2 + COLUMN id: 19, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 23, constant_value: UInt64_3, constant_value_type: UInt8 SETTINGS allow_experimental_analyzer=1 SELECT value1 FROM date_t @@ -66,21 +66,21 @@ QUERY id: 0 FUNCTION id: 12, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 ARGUMENTS LIST id: 13, nodes: 2 - COLUMN id: 14, column_name: date1, result_type: Date, source_id: 3 - CONSTANT id: 15, constant_value: \'1994-01-01\', constant_value_type: String - FUNCTION id: 16, function_name: and, function_type: ordinary, result_type: UInt8 + COLUMN id: 10, column_name: date1, result_type: Date, source_id: 3 + CONSTANT id: 14, constant_value: \'1994-01-01\', constant_value_type: String + FUNCTION id: 15, function_name: and, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 17, nodes: 2 - FUNCTION id: 18, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + LIST id: 16, nodes: 2 + FUNCTION id: 17, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 19, nodes: 2 - COLUMN id: 20, column_name: id, result_type: UInt32, source_id: 3 - CONSTANT id: 21, constant_value: UInt64_1, constant_value_type: UInt8 - FUNCTION id: 22, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 + LIST id: 18, nodes: 2 + COLUMN id: 19, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 20, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 21, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 23, nodes: 2 - COLUMN id: 20, column_name: id, result_type: UInt32, source_id: 3 - CONSTANT id: 24, constant_value: UInt64_3, constant_value_type: UInt8 + LIST id: 22, nodes: 2 + COLUMN id: 19, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 23, constant_value: UInt64_3, constant_value_type: UInt8 SETTINGS allow_experimental_analyzer=1 SELECT value1 FROM date_t @@ -244,21 +244,21 @@ QUERY id: 0 FUNCTION id: 12, function_name: less, function_type: ordinary, result_type: UInt8 ARGUMENTS LIST id: 13, nodes: 2 - COLUMN id: 14, column_name: date1, result_type: Date, source_id: 3 - CONSTANT id: 15, constant_value: \'1998-01-01\', constant_value_type: String - FUNCTION id: 16, function_name: and, function_type: ordinary, result_type: UInt8 + COLUMN id: 10, column_name: date1, result_type: Date, source_id: 3 + CONSTANT id: 14, constant_value: \'1998-01-01\', constant_value_type: String + FUNCTION id: 15, function_name: and, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 17, nodes: 2 - FUNCTION id: 18, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + LIST id: 16, nodes: 2 + FUNCTION id: 17, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 19, nodes: 2 - COLUMN id: 20, column_name: id, result_type: UInt32, source_id: 3 - CONSTANT id: 21, constant_value: UInt64_1, constant_value_type: UInt8 - FUNCTION id: 22, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 + LIST id: 18, nodes: 2 + COLUMN id: 19, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 20, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 21, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 23, nodes: 2 - COLUMN id: 20, column_name: id, result_type: UInt32, source_id: 3 - CONSTANT id: 24, constant_value: UInt64_3, constant_value_type: UInt8 + LIST id: 22, nodes: 2 + COLUMN id: 19, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 23, constant_value: UInt64_3, constant_value_type: UInt8 SETTINGS allow_experimental_analyzer=1 SELECT value1 FROM date_t @@ -289,34 +289,34 @@ QUERY id: 0 FUNCTION id: 14, function_name: less, function_type: ordinary, result_type: UInt8 ARGUMENTS LIST id: 15, nodes: 2 - COLUMN id: 16, column_name: date1, result_type: Date, source_id: 3 - CONSTANT id: 17, constant_value: \'1994-01-01\', constant_value_type: String - FUNCTION id: 18, function_name: and, function_type: ordinary, result_type: UInt8 + COLUMN id: 12, column_name: date1, result_type: Date, source_id: 3 + CONSTANT id: 16, constant_value: \'1994-01-01\', constant_value_type: String + FUNCTION id: 17, function_name: and, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 19, nodes: 2 - FUNCTION id: 20, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + LIST id: 18, nodes: 2 + FUNCTION id: 19, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 21, nodes: 2 - COLUMN id: 22, column_name: date1, result_type: Date, source_id: 3 - CONSTANT id: 23, constant_value: \'1994-01-01\', constant_value_type: String - FUNCTION id: 24, function_name: less, function_type: ordinary, result_type: UInt8 + LIST id: 20, nodes: 2 + COLUMN id: 12, column_name: date1, result_type: Date, source_id: 3 + CONSTANT id: 21, constant_value: \'1994-01-01\', constant_value_type: String + FUNCTION id: 22, function_name: less, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 25, nodes: 2 - COLUMN id: 26, column_name: date1, result_type: Date, source_id: 3 - CONSTANT id: 27, constant_value: \'1995-01-01\', constant_value_type: String - FUNCTION id: 28, function_name: and, function_type: ordinary, result_type: UInt8 + LIST id: 23, nodes: 2 + COLUMN id: 12, column_name: date1, result_type: Date, source_id: 3 + CONSTANT id: 24, constant_value: \'1995-01-01\', constant_value_type: String + FUNCTION id: 25, function_name: and, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 29, nodes: 2 - FUNCTION id: 30, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + LIST id: 26, nodes: 2 + FUNCTION id: 27, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 31, nodes: 2 - COLUMN id: 32, column_name: id, result_type: UInt32, source_id: 3 - CONSTANT id: 33, constant_value: UInt64_1, constant_value_type: UInt8 - FUNCTION id: 34, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 + LIST id: 28, nodes: 2 + COLUMN id: 29, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 30, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 31, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 35, nodes: 2 - COLUMN id: 32, column_name: id, result_type: UInt32, source_id: 3 - CONSTANT id: 36, constant_value: UInt64_3, constant_value_type: UInt8 + LIST id: 32, nodes: 2 + COLUMN id: 29, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 33, constant_value: UInt64_3, constant_value_type: UInt8 SETTINGS allow_experimental_analyzer=1 SELECT value1, @@ -346,26 +346,26 @@ QUERY id: 0 FUNCTION id: 11, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 ARGUMENTS LIST id: 12, nodes: 2 - COLUMN id: 13, column_name: date1, result_type: Date, source_id: 3 - CONSTANT id: 14, constant_value: \'1993-01-01\', constant_value_type: String - FUNCTION id: 15, function_name: less, function_type: ordinary, result_type: UInt8 + COLUMN id: 6, column_name: date1, result_type: Date, source_id: 3 + CONSTANT id: 13, constant_value: \'1993-01-01\', constant_value_type: String + FUNCTION id: 14, function_name: less, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 16, nodes: 2 - COLUMN id: 17, column_name: date1, result_type: Date, source_id: 3 - CONSTANT id: 18, constant_value: \'1994-01-01\', constant_value_type: String - FUNCTION id: 19, function_name: and, function_type: ordinary, result_type: UInt8 + LIST id: 15, nodes: 2 + COLUMN id: 6, column_name: date1, result_type: Date, source_id: 3 + CONSTANT id: 16, constant_value: \'1994-01-01\', constant_value_type: String + FUNCTION id: 17, function_name: and, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 20, nodes: 2 - FUNCTION id: 21, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + LIST id: 18, nodes: 2 + FUNCTION id: 19, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 22, nodes: 2 - COLUMN id: 23, column_name: id, result_type: UInt32, source_id: 3 - CONSTANT id: 24, constant_value: UInt64_1, constant_value_type: UInt8 - FUNCTION id: 25, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 + LIST id: 20, nodes: 2 + COLUMN id: 21, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 22, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 23, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 26, nodes: 2 - COLUMN id: 23, column_name: id, result_type: UInt32, source_id: 3 - CONSTANT id: 27, constant_value: UInt64_3, constant_value_type: UInt8 + LIST id: 24, nodes: 2 + COLUMN id: 21, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 25, constant_value: UInt64_3, constant_value_type: UInt8 SETTINGS allow_experimental_analyzer=1 SELECT value1 FROM date_t @@ -425,22 +425,22 @@ QUERY id: 0 FUNCTION id: 10, function_name: less, function_type: ordinary, result_type: UInt8 ARGUMENTS LIST id: 11, nodes: 2 - COLUMN id: 12, column_name: date1, result_type: Date, source_id: 3 - CONSTANT id: 13, constant_value: \'1994-01-01\', constant_value_type: String + COLUMN id: 8, column_name: date1, result_type: Date, source_id: 3 + CONSTANT id: 12, constant_value: \'1994-01-01\', constant_value_type: String WHERE - FUNCTION id: 14, function_name: and, function_type: ordinary, result_type: UInt8 + FUNCTION id: 13, function_name: and, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 15, nodes: 2 - FUNCTION id: 16, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + LIST id: 14, nodes: 2 + FUNCTION id: 15, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 17, nodes: 2 - COLUMN id: 18, column_name: id, result_type: UInt32, source_id: 3 - CONSTANT id: 19, constant_value: UInt64_1, constant_value_type: UInt8 - FUNCTION id: 20, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 + LIST id: 16, nodes: 2 + COLUMN id: 17, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 18, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 19, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 21, nodes: 2 - COLUMN id: 18, column_name: id, result_type: UInt32, source_id: 3 - CONSTANT id: 22, constant_value: UInt64_3, constant_value_type: UInt8 + LIST id: 20, nodes: 2 + COLUMN id: 17, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 21, constant_value: UInt64_3, constant_value_type: UInt8 SETTINGS allow_experimental_analyzer=1 SELECT value1 FROM date_t @@ -479,8 +479,8 @@ QUERY id: 0 FUNCTION id: 19, function_name: less, function_type: ordinary, result_type: UInt8 ARGUMENTS LIST id: 20, nodes: 2 - COLUMN id: 21, column_name: date1, result_type: Date, source_id: 3 - CONSTANT id: 22, constant_value: \'1994-01-01\', constant_value_type: String + COLUMN id: 17, column_name: date1, result_type: Date, source_id: 3 + CONSTANT id: 21, constant_value: \'1994-01-01\', constant_value_type: String SETTINGS allow_experimental_analyzer=1 SELECT value1 FROM date_t @@ -582,21 +582,21 @@ QUERY id: 0 FUNCTION id: 12, function_name: less, function_type: ordinary, result_type: UInt8 ARGUMENTS LIST id: 13, nodes: 2 - COLUMN id: 14, column_name: date1, result_type: Date, source_id: 3 - CONSTANT id: 15, constant_value: \'1994-01-01\', constant_value_type: String - FUNCTION id: 16, function_name: and, function_type: ordinary, result_type: UInt8 + COLUMN id: 10, column_name: date1, result_type: Date, source_id: 3 + CONSTANT id: 14, constant_value: \'1994-01-01\', constant_value_type: String + FUNCTION id: 15, function_name: and, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 17, nodes: 2 - FUNCTION id: 18, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + LIST id: 16, nodes: 2 + FUNCTION id: 17, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 19, nodes: 2 - COLUMN id: 20, column_name: id, result_type: UInt32, source_id: 3 - CONSTANT id: 21, constant_value: UInt64_1, constant_value_type: UInt8 - FUNCTION id: 22, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 + LIST id: 18, nodes: 2 + COLUMN id: 19, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 20, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 21, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 23, nodes: 2 - COLUMN id: 20, column_name: id, result_type: UInt32, source_id: 3 - CONSTANT id: 24, constant_value: UInt64_3, constant_value_type: UInt8 + LIST id: 22, nodes: 2 + COLUMN id: 19, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 23, constant_value: UInt64_3, constant_value_type: UInt8 SETTINGS allow_experimental_analyzer=1 SELECT value1 FROM date_t @@ -624,21 +624,21 @@ QUERY id: 0 FUNCTION id: 12, function_name: less, function_type: ordinary, result_type: UInt8 ARGUMENTS LIST id: 13, nodes: 2 - COLUMN id: 14, column_name: date1, result_type: Date, source_id: 3 - CONSTANT id: 15, constant_value: \'1992-04-01\', constant_value_type: String - FUNCTION id: 16, function_name: and, function_type: ordinary, result_type: UInt8 + COLUMN id: 10, column_name: date1, result_type: Date, source_id: 3 + CONSTANT id: 14, constant_value: \'1992-04-01\', constant_value_type: String + FUNCTION id: 15, function_name: and, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 17, nodes: 2 - FUNCTION id: 18, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + LIST id: 16, nodes: 2 + FUNCTION id: 17, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 19, nodes: 2 - COLUMN id: 20, column_name: id, result_type: UInt32, source_id: 3 - CONSTANT id: 21, constant_value: UInt64_1, constant_value_type: UInt8 - FUNCTION id: 22, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 + LIST id: 18, nodes: 2 + COLUMN id: 19, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 20, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 21, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 23, nodes: 2 - COLUMN id: 20, column_name: id, result_type: UInt32, source_id: 3 - CONSTANT id: 24, constant_value: UInt64_3, constant_value_type: UInt8 + LIST id: 22, nodes: 2 + COLUMN id: 19, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 23, constant_value: UInt64_3, constant_value_type: UInt8 SETTINGS allow_experimental_analyzer=1 SELECT value1 FROM date_t @@ -666,21 +666,21 @@ QUERY id: 0 FUNCTION id: 12, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 ARGUMENTS LIST id: 13, nodes: 2 - COLUMN id: 14, column_name: date1, result_type: Date, source_id: 3 - CONSTANT id: 15, constant_value: \'1992-04-01\', constant_value_type: String - FUNCTION id: 16, function_name: and, function_type: ordinary, result_type: UInt8 + COLUMN id: 10, column_name: date1, result_type: Date, source_id: 3 + CONSTANT id: 14, constant_value: \'1992-04-01\', constant_value_type: String + FUNCTION id: 15, function_name: and, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 17, nodes: 2 - FUNCTION id: 18, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + LIST id: 16, nodes: 2 + FUNCTION id: 17, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 19, nodes: 2 - COLUMN id: 20, column_name: id, result_type: UInt32, source_id: 3 - CONSTANT id: 21, constant_value: UInt64_1, constant_value_type: UInt8 - FUNCTION id: 22, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 + LIST id: 18, nodes: 2 + COLUMN id: 19, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 20, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 21, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 23, nodes: 2 - COLUMN id: 20, column_name: id, result_type: UInt32, source_id: 3 - CONSTANT id: 24, constant_value: UInt64_3, constant_value_type: UInt8 + LIST id: 22, nodes: 2 + COLUMN id: 19, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 23, constant_value: UInt64_3, constant_value_type: UInt8 SETTINGS allow_experimental_analyzer=1 SELECT value1 FROM date_t @@ -847,26 +847,26 @@ QUERY id: 0 FUNCTION id: 14, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 ARGUMENTS LIST id: 15, nodes: 2 - COLUMN id: 16, column_name: date1, result_type: Date, source_id: 3 - CONSTANT id: 17, constant_value: \'1993-01-01\', constant_value_type: String - FUNCTION id: 18, function_name: less, function_type: ordinary, result_type: UInt8 + COLUMN id: 10, column_name: date1, result_type: Date, source_id: 3 + CONSTANT id: 16, constant_value: \'1993-01-01\', constant_value_type: String + FUNCTION id: 17, function_name: less, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 19, nodes: 2 - COLUMN id: 20, column_name: date1, result_type: Date, source_id: 3 - CONSTANT id: 21, constant_value: \'1994-01-01\', constant_value_type: String - FUNCTION id: 22, function_name: and, function_type: ordinary, result_type: UInt8 + LIST id: 18, nodes: 2 + COLUMN id: 10, column_name: date1, result_type: Date, source_id: 3 + CONSTANT id: 19, constant_value: \'1994-01-01\', constant_value_type: String + FUNCTION id: 20, function_name: and, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 23, nodes: 2 - FUNCTION id: 24, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + LIST id: 21, nodes: 2 + FUNCTION id: 22, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 25, nodes: 2 - COLUMN id: 26, column_name: id, result_type: UInt32, source_id: 3 - CONSTANT id: 27, constant_value: UInt64_1, constant_value_type: UInt8 - FUNCTION id: 28, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 + LIST id: 23, nodes: 2 + COLUMN id: 24, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 25, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 26, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 29, nodes: 2 - COLUMN id: 26, column_name: id, result_type: UInt32, source_id: 3 - CONSTANT id: 30, constant_value: UInt64_3, constant_value_type: UInt8 + LIST id: 27, nodes: 2 + COLUMN id: 24, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 28, constant_value: UInt64_3, constant_value_type: UInt8 SETTINGS allow_experimental_analyzer=1 SELECT value1 FROM datetime_t @@ -894,21 +894,21 @@ QUERY id: 0 FUNCTION id: 12, function_name: less, function_type: ordinary, result_type: UInt8 ARGUMENTS LIST id: 13, nodes: 2 - COLUMN id: 14, column_name: date1, result_type: DateTime, source_id: 3 - CONSTANT id: 15, constant_value: \'1994-01-01 00:00:00\', constant_value_type: String - FUNCTION id: 16, function_name: and, function_type: ordinary, result_type: UInt8 + COLUMN id: 10, column_name: date1, result_type: DateTime, source_id: 3 + CONSTANT id: 14, constant_value: \'1994-01-01 00:00:00\', constant_value_type: String + FUNCTION id: 15, function_name: and, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 17, nodes: 2 - FUNCTION id: 18, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + LIST id: 16, nodes: 2 + FUNCTION id: 17, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 19, nodes: 2 - COLUMN id: 20, column_name: id, result_type: UInt32, source_id: 3 - CONSTANT id: 21, constant_value: UInt64_1, constant_value_type: UInt8 - FUNCTION id: 22, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 + LIST id: 18, nodes: 2 + COLUMN id: 19, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 20, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 21, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 23, nodes: 2 - COLUMN id: 20, column_name: id, result_type: UInt32, source_id: 3 - CONSTANT id: 24, constant_value: UInt64_3, constant_value_type: UInt8 + LIST id: 22, nodes: 2 + COLUMN id: 19, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 23, constant_value: UInt64_3, constant_value_type: UInt8 SETTINGS allow_experimental_analyzer=1 SELECT value1 FROM datetime_t @@ -936,21 +936,21 @@ QUERY id: 0 FUNCTION id: 12, function_name: less, function_type: ordinary, result_type: UInt8 ARGUMENTS LIST id: 13, nodes: 2 - COLUMN id: 14, column_name: date1, result_type: DateTime, source_id: 3 - CONSTANT id: 15, constant_value: \'1994-01-01 00:00:00\', constant_value_type: String - FUNCTION id: 16, function_name: and, function_type: ordinary, result_type: UInt8 + COLUMN id: 10, column_name: date1, result_type: DateTime, source_id: 3 + CONSTANT id: 14, constant_value: \'1994-01-01 00:00:00\', constant_value_type: String + FUNCTION id: 15, function_name: and, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 17, nodes: 2 - FUNCTION id: 18, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + LIST id: 16, nodes: 2 + FUNCTION id: 17, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 19, nodes: 2 - COLUMN id: 20, column_name: id, result_type: UInt32, source_id: 3 - CONSTANT id: 21, constant_value: UInt64_1, constant_value_type: UInt8 - FUNCTION id: 22, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 + LIST id: 18, nodes: 2 + COLUMN id: 19, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 20, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 21, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 23, nodes: 2 - COLUMN id: 20, column_name: id, result_type: UInt32, source_id: 3 - CONSTANT id: 24, constant_value: UInt64_3, constant_value_type: UInt8 + LIST id: 22, nodes: 2 + COLUMN id: 19, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 23, constant_value: UInt64_3, constant_value_type: UInt8 SETTINGS allow_experimental_analyzer=1 SELECT value1 FROM date32_t @@ -978,21 +978,21 @@ QUERY id: 0 FUNCTION id: 12, function_name: less, function_type: ordinary, result_type: UInt8 ARGUMENTS LIST id: 13, nodes: 2 - COLUMN id: 14, column_name: date1, result_type: Date32, source_id: 3 - CONSTANT id: 15, constant_value: \'1994-01-01\', constant_value_type: String - FUNCTION id: 16, function_name: and, function_type: ordinary, result_type: UInt8 + COLUMN id: 10, column_name: date1, result_type: Date32, source_id: 3 + CONSTANT id: 14, constant_value: \'1994-01-01\', constant_value_type: String + FUNCTION id: 15, function_name: and, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 17, nodes: 2 - FUNCTION id: 18, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + LIST id: 16, nodes: 2 + FUNCTION id: 17, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 19, nodes: 2 - COLUMN id: 20, column_name: id, result_type: UInt32, source_id: 3 - CONSTANT id: 21, constant_value: UInt64_1, constant_value_type: UInt8 - FUNCTION id: 22, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 + LIST id: 18, nodes: 2 + COLUMN id: 19, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 20, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 21, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 23, nodes: 2 - COLUMN id: 20, column_name: id, result_type: UInt32, source_id: 3 - CONSTANT id: 24, constant_value: UInt64_3, constant_value_type: UInt8 + LIST id: 22, nodes: 2 + COLUMN id: 19, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 23, constant_value: UInt64_3, constant_value_type: UInt8 SETTINGS allow_experimental_analyzer=1 SELECT value1 FROM date32_t @@ -1020,21 +1020,21 @@ QUERY id: 0 FUNCTION id: 12, function_name: less, function_type: ordinary, result_type: UInt8 ARGUMENTS LIST id: 13, nodes: 2 - COLUMN id: 14, column_name: date1, result_type: Date32, source_id: 3 - CONSTANT id: 15, constant_value: \'1994-01-01\', constant_value_type: String - FUNCTION id: 16, function_name: and, function_type: ordinary, result_type: UInt8 + COLUMN id: 10, column_name: date1, result_type: Date32, source_id: 3 + CONSTANT id: 14, constant_value: \'1994-01-01\', constant_value_type: String + FUNCTION id: 15, function_name: and, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 17, nodes: 2 - FUNCTION id: 18, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + LIST id: 16, nodes: 2 + FUNCTION id: 17, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 19, nodes: 2 - COLUMN id: 20, column_name: id, result_type: UInt32, source_id: 3 - CONSTANT id: 21, constant_value: UInt64_1, constant_value_type: UInt8 - FUNCTION id: 22, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 + LIST id: 18, nodes: 2 + COLUMN id: 19, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 20, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 21, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 23, nodes: 2 - COLUMN id: 20, column_name: id, result_type: UInt32, source_id: 3 - CONSTANT id: 24, constant_value: UInt64_3, constant_value_type: UInt8 + LIST id: 22, nodes: 2 + COLUMN id: 19, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 23, constant_value: UInt64_3, constant_value_type: UInt8 SETTINGS allow_experimental_analyzer=1 SELECT value1 FROM datetime64_t @@ -1062,21 +1062,21 @@ QUERY id: 0 FUNCTION id: 12, function_name: less, function_type: ordinary, result_type: UInt8 ARGUMENTS LIST id: 13, nodes: 2 - COLUMN id: 14, column_name: date1, result_type: DateTime64(3), source_id: 3 - CONSTANT id: 15, constant_value: \'1994-01-01 00:00:00\', constant_value_type: String - FUNCTION id: 16, function_name: and, function_type: ordinary, result_type: UInt8 + COLUMN id: 10, column_name: date1, result_type: DateTime64(3), source_id: 3 + CONSTANT id: 14, constant_value: \'1994-01-01 00:00:00\', constant_value_type: String + FUNCTION id: 15, function_name: and, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 17, nodes: 2 - FUNCTION id: 18, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + LIST id: 16, nodes: 2 + FUNCTION id: 17, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 19, nodes: 2 - COLUMN id: 20, column_name: id, result_type: UInt32, source_id: 3 - CONSTANT id: 21, constant_value: UInt64_1, constant_value_type: UInt8 - FUNCTION id: 22, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 + LIST id: 18, nodes: 2 + COLUMN id: 19, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 20, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 21, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 23, nodes: 2 - COLUMN id: 20, column_name: id, result_type: UInt32, source_id: 3 - CONSTANT id: 24, constant_value: UInt64_3, constant_value_type: UInt8 + LIST id: 22, nodes: 2 + COLUMN id: 19, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 23, constant_value: UInt64_3, constant_value_type: UInt8 SETTINGS allow_experimental_analyzer=1 SELECT value1 FROM datetime64_t @@ -1104,19 +1104,19 @@ QUERY id: 0 FUNCTION id: 12, function_name: less, function_type: ordinary, result_type: UInt8 ARGUMENTS LIST id: 13, nodes: 2 - COLUMN id: 14, column_name: date1, result_type: DateTime64(3), source_id: 3 - CONSTANT id: 15, constant_value: \'1994-01-01 00:00:00\', constant_value_type: String - FUNCTION id: 16, function_name: and, function_type: ordinary, result_type: UInt8 + COLUMN id: 10, column_name: date1, result_type: DateTime64(3), source_id: 3 + CONSTANT id: 14, constant_value: \'1994-01-01 00:00:00\', constant_value_type: String + FUNCTION id: 15, function_name: and, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 17, nodes: 2 - FUNCTION id: 18, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + LIST id: 16, nodes: 2 + FUNCTION id: 17, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 19, nodes: 2 - COLUMN id: 20, column_name: id, result_type: UInt32, source_id: 3 - CONSTANT id: 21, constant_value: UInt64_1, constant_value_type: UInt8 - FUNCTION id: 22, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 + LIST id: 18, nodes: 2 + COLUMN id: 19, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 20, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 21, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 23, nodes: 2 - COLUMN id: 20, column_name: id, result_type: UInt32, source_id: 3 - CONSTANT id: 24, constant_value: UInt64_3, constant_value_type: UInt8 + LIST id: 22, nodes: 2 + COLUMN id: 19, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 23, constant_value: UInt64_3, constant_value_type: UInt8 SETTINGS allow_experimental_analyzer=1 diff --git a/tests/queries/0_stateless/02789_reading_from_s3_with_connection_pool.sh b/tests/queries/0_stateless/02789_reading_from_s3_with_connection_pool.sh index ce90157d004..e4a1de9a2ec 100755 --- a/tests/queries/0_stateless/02789_reading_from_s3_with_connection_pool.sh +++ b/tests/queries/0_stateless/02789_reading_from_s3_with_connection_pool.sh @@ -19,8 +19,8 @@ query_id=$(${CLICKHOUSE_CLIENT} --query "select queryID() from ($query) limit 1" ${CLICKHOUSE_CLIENT} --query "SYSTEM FLUSH LOGS" ${CLICKHOUSE_CLIENT} -nm --query " WITH - ProfileEvents['ReadBufferFromS3ResetSessions'] AS reset, - ProfileEvents['ReadBufferFromS3PreservedSessions'] AS preserved + ProfileEvents['DiskConnectionsReset'] AS reset, + ProfileEvents['DiskConnectionsPreserved'] AS preserved SELECT preserved > reset FROM system.query_log WHERE type = 'QueryFinish' @@ -51,7 +51,7 @@ select queryID() from( " 2>&1) ${CLICKHOUSE_CLIENT} --query "SYSTEM FLUSH LOGS" ${CLICKHOUSE_CLIENT} -nm --query " -SELECT ProfileEvents['ReadWriteBufferFromHTTPPreservedSessions'] > 0 +SELECT ProfileEvents['StorageConnectionsPreserved'] > 0 FROM system.query_log WHERE type = 'QueryFinish' AND current_database = currentDatabase() diff --git a/tests/queries/0_stateless/02797_aggregator_huge_mem_usage_bug.sql b/tests/queries/0_stateless/02797_aggregator_huge_mem_usage_bug.sql index e204d968382..3532f617e89 100644 --- a/tests/queries/0_stateless/02797_aggregator_huge_mem_usage_bug.sql +++ b/tests/queries/0_stateless/02797_aggregator_huge_mem_usage_bug.sql @@ -3,9 +3,9 @@ DROP TABLE IF EXISTS v; create view v (s LowCardinality(String), n UInt8) as select 'test' as s, toUInt8(number) as n from numbers(10000000); -- this is what allows mem usage to go really high -set max_block_size=10000000000; +set max_block_size=4294967296; -set max_memory_usage = '1Gi'; +set max_memory_usage = '420Mi'; select s, sum(n) from v group by s format Null; diff --git a/tests/queries/0_stateless/02797_join_nested_lowcardinality_convert.sql.j2 b/tests/queries/0_stateless/02797_join_nested_lowcardinality_convert.sql.j2 index 79a7c654f10..95bac76c591 100644 --- a/tests/queries/0_stateless/02797_join_nested_lowcardinality_convert.sql.j2 +++ b/tests/queries/0_stateless/02797_join_nested_lowcardinality_convert.sql.j2 @@ -1,4 +1,4 @@ - +SET allow_suspicious_low_cardinality_types=1; DROP TABLE IF EXISTS test1__fuzz_36; DROP TABLE IF EXISTS test1__fuzz_38; diff --git a/tests/queries/0_stateless/02809_prewhere_and_in.reference b/tests/queries/0_stateless/02809_prewhere_and_in.reference index 3080ae862bb..54ea18b3eab 100644 --- a/tests/queries/0_stateless/02809_prewhere_and_in.reference +++ b/tests/queries/0_stateless/02809_prewhere_and_in.reference @@ -1,8 +1,8 @@ -PREWHERE a IN -PREWHERE a IN -PREWHERE a IN -PREWHERE a IN -PREWHERE b NOT IN -PREWHERE b NOT IN -PREWHERE b NOT IN -PREWHERE b NOT IN + Prewhere filter + Prewhere filter + Prewhere filter + Prewhere filter + Prewhere filter + Prewhere filter + Prewhere filter + Prewhere filter diff --git a/tests/queries/0_stateless/02809_prewhere_and_in.sql b/tests/queries/0_stateless/02809_prewhere_and_in.sql index 345577d6c7c..448f9512cf6 100644 --- a/tests/queries/0_stateless/02809_prewhere_and_in.sql +++ b/tests/queries/0_stateless/02809_prewhere_and_in.sql @@ -16,40 +16,16 @@ AS SELECT * FROM numbers(10); SET optimize_move_to_prewhere=1; -- Queries with 'IN' -SELECT substring(explain, 1, 13) FROM (EXPLAIN SYNTAX - SELECT * FROM t_02809 WHERE a IN (SELECT * FROM system.one) -) WHERE explain LIKE '%WHERE%'; - -SELECT substring(explain, 1, 13) FROM (EXPLAIN SYNTAX - SELECT * FROM t_02809 WHERE a IN (1,2,3) -) WHERE explain LIKE '%WHERE%'; - -SELECT substring(explain, 1, 13) FROM (EXPLAIN SYNTAX - SELECT * FROM t_02809 WHERE a IN t_02809_set -) WHERE explain LIKE '%WHERE%'; - -SELECT substring(explain, 1, 13) FROM (EXPLAIN SYNTAX - SELECT * FROM t_02809 WHERE a IN t_02809_aux -) WHERE explain LIKE '%WHERE%'; - +SELECT * FROM (EXPLAIN actions=1 SELECT * FROM t_02809 WHERE a IN (SELECT * FROM system.one)) WHERE explain LIKE '%Prewhere filter'; +SELECT * FROM (EXPLAIN actions=1 SELECT * FROM t_02809 WHERE a IN (1,2,3)) WHERE explain LIKE '%Prewhere filter'; +SELECT * FROM (EXPLAIN actions=1 SELECT * FROM t_02809 WHERE a IN t_02809_set) WHERE explain LIKE '%Prewhere filter'; +SELECT * FROM (EXPLAIN actions=1 SELECT * FROM t_02809 WHERE a IN t_02809_aux) WHERE explain LIKE '%Prewhere filter'; -- Queries with 'NOT IN' -SELECT substring(explain, 1, 17) FROM (EXPLAIN SYNTAX - SELECT * FROM t_02809 WHERE b NOT IN (SELECT * FROM system.one) -) WHERE explain LIKE '%WHERE%'; - -SELECT substring(explain, 1, 17) FROM (EXPLAIN SYNTAX - SELECT * FROM t_02809 WHERE b NOT IN (1,2,3) -) WHERE explain LIKE '%WHERE%'; - -SELECT substring(explain, 1, 17) FROM (EXPLAIN SYNTAX - SELECT * FROM t_02809 WHERE b NOT IN t_02809_set -) WHERE explain LIKE '%WHERE%'; - -SELECT substring(explain, 1, 17) FROM (EXPLAIN SYNTAX - SELECT * FROM t_02809 WHERE b NOT IN t_02809_aux -) WHERE explain LIKE '%WHERE%'; - +SELECT * FROM (EXPLAIN actions=1 SELECT * FROM t_02809 WHERE a NOT IN (SELECT * FROM system.one)) WHERE explain LIKE '%Prewhere filter'; +SELECT * FROM (EXPLAIN actions=1 SELECT * FROM t_02809 WHERE a NOT IN (1,2,3)) WHERE explain LIKE '%Prewhere filter'; +SELECT * FROM (EXPLAIN actions=1 SELECT * FROM t_02809 WHERE a NOT IN t_02809_set) WHERE explain LIKE '%Prewhere filter'; +SELECT * FROM (EXPLAIN actions=1 SELECT * FROM t_02809 WHERE a NOT IN t_02809_aux) WHERE explain LIKE '%Prewhere filter'; DROP TABLE t_02809; DROP TABLE t_02809_set; diff --git a/tests/queries/0_stateless/02810_system_sync_filesystem_cache.reference b/tests/queries/0_stateless/02810_system_sync_filesystem_cache.reference deleted file mode 100644 index 7614df8ec46..00000000000 --- a/tests/queries/0_stateless/02810_system_sync_filesystem_cache.reference +++ /dev/null @@ -1,3 +0,0 @@ -ok -ok -ok diff --git a/tests/queries/0_stateless/02810_system_sync_filesystem_cache.sh b/tests/queries/0_stateless/02810_system_sync_filesystem_cache.sh deleted file mode 100755 index c88ba4d5a74..00000000000 --- a/tests/queries/0_stateless/02810_system_sync_filesystem_cache.sh +++ /dev/null @@ -1,69 +0,0 @@ -#!/usr/bin/env bash -# Tags: no-fasttest, no-parallel, no-s3-storage, no-random-settings - -# set -x - -CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# shellcheck source=../shell_config.sh -. "$CUR_DIR"/../shell_config.sh - - -$CLICKHOUSE_CLIENT -nm --query """ -DROP TABLE IF EXISTS test; - -CREATE TABLE test (a Int32, b String) -ENGINE = MergeTree() ORDER BY tuple() -SETTINGS disk = disk(type = cache, max_size = '100Ki', path = ${CLICKHOUSE_TEST_UNIQUE_NAME}, delayed_cleanup_interval_ms = 10000000, disk = s3_disk), min_bytes_for_wide_part = 10485760; - -INSERT INTO test SELECT 1, 'test'; -""" - -query_id=$RANDOM - -$CLICKHOUSE_CLIENT --query_id "$query_id" --query "SELECT * FROM test FORMAT Null SETTINGS enable_filesystem_cache_log = 1" - -${CLICKHOUSE_CLIENT} -q "system flush logs" - -key=$($CLICKHOUSE_CLIENT -nm --query """ -SELECT key FROM system.filesystem_cache_log WHERE query_id = '$query_id' ORDER BY size DESC LIMIT 1; -""") - -offset=$($CLICKHOUSE_CLIENT -nm --query """ -SELECT offset FROM system.filesystem_cache_log WHERE query_id = '$query_id' ORDER BY size DESC LIMIT 1; -""") - -path=$($CLICKHOUSE_CLIENT -nm --query """ -SELECT cache_path FROM system.filesystem_cache WHERE key = '$key' AND file_segment_range_begin = $offset; -""") - -rm $path - -$CLICKHOUSE_CLIENT --query "SELECT * FROM test FORMAT Null SETTINGS enable_filesystem_cache_log = 1" 2>&1 | grep -F -e "No such file or directory" > /dev/null && echo "ok" || echo "fail" - -CLICKHOUSE_CLIENT=$(echo ${CLICKHOUSE_CLIENT} | sed 's/'"--send_logs_level=${CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL}"'/--send_logs_level=fatal/g') - -$CLICKHOUSE_CLIENT --query "SYSTEM SYNC FILESYSTEM CACHE" 2>&1 | grep -q "$key" && echo 'ok' || echo 'fail' - -$CLICKHOUSE_CLIENT --query "SELECT * FROM test FORMAT Null" - -key=$($CLICKHOUSE_CLIENT -nm --query """ -SELECT key FROM system.filesystem_cache_log WHERE query_id = '$query_id' ORDER BY size DESC LIMIT 1; -""") - -offset=$($CLICKHOUSE_CLIENT -nm --query """ -SELECT offset FROM system.filesystem_cache_log WHERE query_id = '$query_id' ORDER BY size DESC LIMIT 1; -""") - -path=$($CLICKHOUSE_CLIENT -nm --query """ -SELECT cache_path FROM system.filesystem_cache WHERE key = '$key' AND file_segment_range_begin = $offset; -""") - -echo -n 'fff' > $path - -#cat $path - -$CLICKHOUSE_CLIENT --query "SYSTEM SYNC FILESYSTEM CACHE" 2>&1 | grep -q "$key" && echo 'ok' || echo 'fail' - -$CLICKHOUSE_CLIENT --query "SELECT * FROM test FORMAT Null" - -$CLICKHOUSE_CLIENT --query "SYSTEM SYNC FILESYSTEM CACHE" diff --git a/tests/queries/0_stateless/02813_create_index_noop.sql b/tests/queries/0_stateless/02813_create_index_noop.sql index 3d65f81af9d..0f32dc6bdf3 100644 --- a/tests/queries/0_stateless/02813_create_index_noop.sql +++ b/tests/queries/0_stateless/02813_create_index_noop.sql @@ -997,4 +997,4 @@ CREATE INDEX idx_tab4_5 ON tab4 (col4,col0 DESC); CREATE INDEX idx_tab4_5 ON tab4 (col4,col1 DESC); CREATE INDEX idx_tab4_5 ON tab4 (col4,col1 DESC,col3); CREATE INDEX idx_tab4_5 ON tab4 (col4,col3 DESC,col1 DESC); -CREATE INDEX idx_tab4_5 ON tab4 (col4,col3); \ No newline at end of file +CREATE INDEX idx_tab4_5 ON tab4 (col4,col3); diff --git a/tests/queries/0_stateless/02813_seriesDecomposeSTL.sql b/tests/queries/0_stateless/02813_seriesDecomposeSTL.sql index 929d0474e09..496267f2476 100644 --- a/tests/queries/0_stateless/02813_seriesDecomposeSTL.sql +++ b/tests/queries/0_stateless/02813_seriesDecomposeSTL.sql @@ -1,6 +1,3 @@ --- Tags: no-cpu-aarch64 --- Tag no-cpu-aarch64: values generated are slighly different on aarch64 - DROP TABLE IF EXISTS tb2; CREATE TABLE tb2 (`period` UInt32, `ts` Array(Float64)) ENGINE = Memory; diff --git a/tests/queries/0_stateless/02813_seriesOutliersDetectTukey.sql b/tests/queries/0_stateless/02813_seriesOutliersDetectTukey.sql index ca116e8b7ed..0030929e6a3 100644 --- a/tests/queries/0_stateless/02813_seriesOutliersDetectTukey.sql +++ b/tests/queries/0_stateless/02813_seriesOutliersDetectTukey.sql @@ -8,7 +8,7 @@ INSERT INTO tb1 VALUES (1, [-3, 2.40, 15, 3.90, 5, 6, 4.50, 5.20, 3, 4, 5, 16, 7 -- non-const inputs SELECT seriesOutliersDetectTukey(a) FROM tb1 ORDER BY n; -SELECT seriesOutliersDetectTukey(a,10,90,1.5) FROM tb1 ORDER BY n; +SELECT seriesOutliersDetectTukey(a,.10,.90,1.5) FROM tb1 ORDER BY n; DROP TABLE IF EXISTS tb1; -- const inputs @@ -16,17 +16,17 @@ SELECT seriesOutliersDetectTukey([-3, 2, 15, 3, 5, 6, 4.50, 5, 12, 45, 12, 3.40, SELECT seriesOutliersDetectTukey([-3, 2.40, 15, 3.90, 5, 6, 4.50, 5.20, 12, 60, 12, 3.40, 3, 4, 5, 6, 3.40, 2.7]); -- const inputs with optional arguments -SELECT seriesOutliersDetectTukey([-3, 2, 15, 3, 5, 6, 4.50, 5, 12, 45, 12, 3.40, 3, 4, 5, 6], 25, 75, 1.5); -SELECT seriesOutliersDetectTukey([-3, 2, 15, 3, 5, 6, 4.50, 5, 12, 45, 12, 3.40, 3, 4, 5, 6], 10, 90, 1.5); -SELECT seriesOutliersDetectTukey([-3, 2, 15, 3, 5, 6, 4.50, 5, 12, 45, 12, 3.40, 3, 4, 5, 6], 2, 98, 1.5); -SELECT seriesOutliersDetectTukey([-3, 2, 15, 3], 2, 98, 1.5); +SELECT seriesOutliersDetectTukey([-3, 2, 15, 3, 5, 6, 4.50, 5, 12, 45, 12, 3.40, 3, 4, 5, 6], .25, .75, 1.5); +SELECT seriesOutliersDetectTukey([-3, 2, 15, 3, 5, 6, 4.50, 5, 12, 45, 12, 3.40, 3, 4, 5, 6], .10, .90, 1.5); +SELECT seriesOutliersDetectTukey([-3, 2, 15, 3, 5, 6, 4.50, 5, 12, 45, 12, 3.40, 3, 4, 5, 6], .02, .98, 1.5); +SELECT seriesOutliersDetectTukey([-3, 2, 15, 3], 0.02, 0.98, 1.5); SELECT seriesOutliersDetectTukey(arrayMap(x -> sin(x / 10), range(30))); -SELECT seriesOutliersDetectTukey([-3, 2, 15, 3, 5, 6, 4, 5, 12, 45, 12, 3, 3, 4, 5, 6], 25, 75, 3); +SELECT seriesOutliersDetectTukey([-3, 2, 15, 3, 5, 6, 4, 5, 12, 45, 12, 3, 3, 4, 5, 6], .25, .75, 3); -- negative tests -SELECT seriesOutliersDetectTukey([-3, 2, 15, 3, 5, 6, 4, 5, 12, 45, 12, 3, 3, 4, 5, 6], 25, 75, -1); -- { serverError BAD_ARGUMENTS} -SELECT seriesOutliersDetectTukey([-3, 2, 15, 3], 33, 53); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH} -SELECT seriesOutliersDetectTukey([-3, 2, 15, 3], 33); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH} +SELECT seriesOutliersDetectTukey([-3, 2, 15, 3, 5, 6, 4, 5, 12, 45, 12, 3, 3, 4, 5, 6], .25, .75, -1); -- { serverError BAD_ARGUMENTS} +SELECT seriesOutliersDetectTukey([-3, 2, 15, 3], .33, .53); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH} +SELECT seriesOutliersDetectTukey([-3, 2, 15, 3], .33); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH} SELECT seriesOutliersDetectTukey([-3, 2.4, 15, NULL]); -- { serverError ILLEGAL_COLUMN} SELECT seriesOutliersDetectTukey([]); -- { serverError ILLEGAL_COLUMN} -SELECT seriesOutliersDetectTukey([-3, 2.4, 15]); -- { serverError BAD_ARGUMENTS} \ No newline at end of file +SELECT seriesOutliersDetectTukey([-3, 2.4, 15]); -- { serverError BAD_ARGUMENTS} diff --git a/tests/queries/0_stateless/02815_analyzer_aggregate_functions_of_group_by_keys.reference b/tests/queries/0_stateless/02815_analyzer_aggregate_functions_of_group_by_keys.reference new file mode 100644 index 00000000000..84119736fe9 --- /dev/null +++ b/tests/queries/0_stateless/02815_analyzer_aggregate_functions_of_group_by_keys.reference @@ -0,0 +1,526 @@ +set optimize_aggregators_of_group_by_keys = 1 +0 0 +0 1 +0 2 +1 0 +1 1 +1 2 +0 0 +0 1 +0 2 +1 0 +1 1 +1 2 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +1 +2 +2 +3 +3 +4 +4 +4 +5 +6 +6 +6 +8 +8 +9 +10 +12 +12 +12 +15 +16 +18 +20 +24 +0 +0 +QUERY id: 0 + PROJECTION COLUMNS + a UInt8 + b UInt8 + PROJECTION + LIST id: 1, nodes: 2 + FUNCTION id: 2, function_name: modulo, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 3, nodes: 2 + COLUMN id: 4, column_name: number, result_type: UInt64, source_id: 5 + CONSTANT id: 6, constant_value: UInt64_2, constant_value_type: UInt8 + FUNCTION id: 7, function_name: modulo, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 8, nodes: 2 + COLUMN id: 4, column_name: number, result_type: UInt64, source_id: 5 + CONSTANT id: 9, constant_value: UInt64_3, constant_value_type: UInt8 + JOIN TREE + TABLE_FUNCTION id: 5, alias: __table1, table_function_name: numbers + ARGUMENTS + LIST id: 10, nodes: 1 + CONSTANT id: 11, constant_value: UInt64_10000000, constant_value_type: UInt32 + GROUP BY + LIST id: 12, nodes: 2 + FUNCTION id: 13, function_name: modulo, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 14, nodes: 2 + COLUMN id: 4, column_name: number, result_type: UInt64, source_id: 5 + CONSTANT id: 15, constant_value: UInt64_2, constant_value_type: UInt8 + FUNCTION id: 16, function_name: modulo, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 17, nodes: 2 + COLUMN id: 4, column_name: number, result_type: UInt64, source_id: 5 + CONSTANT id: 18, constant_value: UInt64_3, constant_value_type: UInt8 + ORDER BY + LIST id: 19, nodes: 2 + SORT id: 20, sort_direction: ASCENDING, with_fill: 0 + EXPRESSION + FUNCTION id: 2, function_name: modulo, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 3, nodes: 2 + COLUMN id: 4, column_name: number, result_type: UInt64, source_id: 5 + CONSTANT id: 6, constant_value: UInt64_2, constant_value_type: UInt8 + SORT id: 21, sort_direction: ASCENDING, with_fill: 0 + EXPRESSION + FUNCTION id: 7, function_name: modulo, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 8, nodes: 2 + COLUMN id: 4, column_name: number, result_type: UInt64, source_id: 5 + CONSTANT id: 9, constant_value: UInt64_3, constant_value_type: UInt8 +QUERY id: 0 + PROJECTION COLUMNS + a UInt8 + b UInt8 + PROJECTION + LIST id: 1, nodes: 2 + FUNCTION id: 2, function_name: modulo, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 3, nodes: 2 + COLUMN id: 4, column_name: number, result_type: UInt64, source_id: 5 + CONSTANT id: 6, constant_value: UInt64_2, constant_value_type: UInt8 + FUNCTION id: 7, function_name: modulo, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 8, nodes: 2 + COLUMN id: 4, column_name: number, result_type: UInt64, source_id: 5 + CONSTANT id: 9, constant_value: UInt64_3, constant_value_type: UInt8 + JOIN TREE + TABLE_FUNCTION id: 5, alias: __table1, table_function_name: numbers + ARGUMENTS + LIST id: 10, nodes: 1 + CONSTANT id: 11, constant_value: UInt64_10000000, constant_value_type: UInt32 + GROUP BY + LIST id: 12, nodes: 2 + FUNCTION id: 13, function_name: modulo, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 14, nodes: 2 + COLUMN id: 4, column_name: number, result_type: UInt64, source_id: 5 + CONSTANT id: 15, constant_value: UInt64_2, constant_value_type: UInt8 + FUNCTION id: 16, function_name: modulo, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 17, nodes: 2 + COLUMN id: 4, column_name: number, result_type: UInt64, source_id: 5 + CONSTANT id: 18, constant_value: UInt64_3, constant_value_type: UInt8 + ORDER BY + LIST id: 19, nodes: 2 + SORT id: 20, sort_direction: ASCENDING, with_fill: 0 + EXPRESSION + FUNCTION id: 2, function_name: modulo, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 3, nodes: 2 + COLUMN id: 4, column_name: number, result_type: UInt64, source_id: 5 + CONSTANT id: 6, constant_value: UInt64_2, constant_value_type: UInt8 + SORT id: 21, sort_direction: ASCENDING, with_fill: 0 + EXPRESSION + FUNCTION id: 7, function_name: modulo, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 8, nodes: 2 + COLUMN id: 4, column_name: number, result_type: UInt64, source_id: 5 + CONSTANT id: 9, constant_value: UInt64_3, constant_value_type: UInt8 +QUERY id: 0 + PROJECTION COLUMNS + a UInt16 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: multiply, function_type: ordinary, result_type: UInt16 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: modulo, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 5, nodes: 2 + COLUMN id: 6, column_name: number, result_type: UInt64, source_id: 7 + CONSTANT id: 8, constant_value: UInt64_5, constant_value_type: UInt8 + FUNCTION id: 9, function_name: modulo, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 10, nodes: 2 + COLUMN id: 6, column_name: number, result_type: UInt64, source_id: 7 + CONSTANT id: 11, constant_value: UInt64_7, constant_value_type: UInt8 + JOIN TREE + TABLE_FUNCTION id: 7, alias: __table1, table_function_name: numbers + ARGUMENTS + LIST id: 12, nodes: 1 + CONSTANT id: 13, constant_value: UInt64_10000000, constant_value_type: UInt32 + GROUP BY + LIST id: 14, nodes: 2 + FUNCTION id: 15, function_name: modulo, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 16, nodes: 2 + COLUMN id: 6, column_name: number, result_type: UInt64, source_id: 7 + CONSTANT id: 17, constant_value: UInt64_7, constant_value_type: UInt8 + FUNCTION id: 18, function_name: modulo, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 19, nodes: 2 + COLUMN id: 6, column_name: number, result_type: UInt64, source_id: 7 + CONSTANT id: 20, constant_value: UInt64_5, constant_value_type: UInt8 + ORDER BY + LIST id: 21, nodes: 1 + SORT id: 22, sort_direction: ASCENDING, with_fill: 0 + EXPRESSION + FUNCTION id: 2, function_name: multiply, function_type: ordinary, result_type: UInt16 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: modulo, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 5, nodes: 2 + COLUMN id: 6, column_name: number, result_type: UInt64, source_id: 7 + CONSTANT id: 8, constant_value: UInt64_5, constant_value_type: UInt8 + FUNCTION id: 9, function_name: modulo, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 10, nodes: 2 + COLUMN id: 6, column_name: number, result_type: UInt64, source_id: 7 + CONSTANT id: 11, constant_value: UInt64_7, constant_value_type: UInt8 +QUERY id: 0 + PROJECTION COLUMNS + foo UInt64 + PROJECTION + LIST id: 1, nodes: 1 + COLUMN id: 2, column_name: foo, result_type: UInt64, source_id: 3 + JOIN TREE + QUERY id: 3, alias: __table1, is_subquery: 1 + PROJECTION COLUMNS + foo UInt64 + PROJECTION + LIST id: 4, nodes: 1 + COLUMN id: 5, column_name: number, result_type: UInt64, source_id: 6 + JOIN TREE + TABLE_FUNCTION id: 6, alias: __table2, table_function_name: numbers + ARGUMENTS + LIST id: 7, nodes: 1 + CONSTANT id: 8, constant_value: UInt64_1, constant_value_type: UInt8 + GROUP BY + LIST id: 9, nodes: 1 + COLUMN id: 5, column_name: number, result_type: UInt64, source_id: 6 +QUERY id: 0 + PROJECTION COLUMNS + min(number) OVER (PARTITION BY modulo(number, 2)) UInt64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: min, function_type: window, result_type: UInt64 + ARGUMENTS + LIST id: 3, nodes: 1 + COLUMN id: 4, column_name: number, result_type: UInt64, source_id: 5 + WINDOW + WINDOW id: 6, frame_type: RANGE, frame_begin_type: unbounded preceding, frame_end_type: current + PARTITION BY + LIST id: 7, nodes: 1 + FUNCTION id: 8, function_name: modulo, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 9, nodes: 2 + COLUMN id: 4, column_name: number, result_type: UInt64, source_id: 5 + CONSTANT id: 10, constant_value: UInt64_2, constant_value_type: UInt8 + JOIN TREE + TABLE_FUNCTION id: 5, alias: __table1, table_function_name: numbers + ARGUMENTS + LIST id: 11, nodes: 1 + CONSTANT id: 12, constant_value: UInt64_3, constant_value_type: UInt8 + GROUP BY + LIST id: 13, nodes: 1 + COLUMN id: 4, column_name: number, result_type: UInt64, source_id: 5 +set optimize_aggregators_of_group_by_keys = 0 +0 0 +0 1 +0 2 +1 0 +1 1 +1 2 +0 0 +0 1 +0 2 +1 0 +1 1 +1 2 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +1 +2 +2 +3 +3 +4 +4 +4 +5 +6 +6 +6 +8 +8 +9 +10 +12 +12 +12 +15 +16 +18 +20 +24 +0 +QUERY id: 0 + PROJECTION COLUMNS + a UInt8 + b UInt8 + PROJECTION + LIST id: 1, nodes: 2 + FUNCTION id: 2, function_name: min, function_type: aggregate, result_type: UInt8 + ARGUMENTS + LIST id: 3, nodes: 1 + FUNCTION id: 4, function_name: modulo, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 5, nodes: 2 + COLUMN id: 6, column_name: number, result_type: UInt64, source_id: 7 + CONSTANT id: 8, constant_value: UInt64_2, constant_value_type: UInt8 + FUNCTION id: 9, function_name: max, function_type: aggregate, result_type: UInt8 + ARGUMENTS + LIST id: 10, nodes: 1 + FUNCTION id: 11, function_name: modulo, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 12, nodes: 2 + COLUMN id: 6, column_name: number, result_type: UInt64, source_id: 7 + CONSTANT id: 13, constant_value: UInt64_3, constant_value_type: UInt8 + JOIN TREE + TABLE_FUNCTION id: 7, alias: __table1, table_function_name: numbers + ARGUMENTS + LIST id: 14, nodes: 1 + CONSTANT id: 15, constant_value: UInt64_10000000, constant_value_type: UInt32 + GROUP BY + LIST id: 16, nodes: 2 + FUNCTION id: 17, function_name: modulo, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 18, nodes: 2 + COLUMN id: 6, column_name: number, result_type: UInt64, source_id: 7 + CONSTANT id: 19, constant_value: UInt64_2, constant_value_type: UInt8 + FUNCTION id: 20, function_name: modulo, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 21, nodes: 2 + COLUMN id: 6, column_name: number, result_type: UInt64, source_id: 7 + CONSTANT id: 22, constant_value: UInt64_3, constant_value_type: UInt8 + ORDER BY + LIST id: 23, nodes: 2 + SORT id: 24, sort_direction: ASCENDING, with_fill: 0 + EXPRESSION + FUNCTION id: 2, function_name: min, function_type: aggregate, result_type: UInt8 + ARGUMENTS + LIST id: 3, nodes: 1 + FUNCTION id: 4, function_name: modulo, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 5, nodes: 2 + COLUMN id: 6, column_name: number, result_type: UInt64, source_id: 7 + CONSTANT id: 8, constant_value: UInt64_2, constant_value_type: UInt8 + SORT id: 25, sort_direction: ASCENDING, with_fill: 0 + EXPRESSION + FUNCTION id: 9, function_name: max, function_type: aggregate, result_type: UInt8 + ARGUMENTS + LIST id: 10, nodes: 1 + FUNCTION id: 11, function_name: modulo, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 12, nodes: 2 + COLUMN id: 6, column_name: number, result_type: UInt64, source_id: 7 + CONSTANT id: 13, constant_value: UInt64_3, constant_value_type: UInt8 +QUERY id: 0 + PROJECTION COLUMNS + a UInt8 + b UInt8 + PROJECTION + LIST id: 1, nodes: 2 + FUNCTION id: 2, function_name: any, function_type: aggregate, result_type: UInt8 + ARGUMENTS + LIST id: 3, nodes: 1 + FUNCTION id: 4, function_name: modulo, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 5, nodes: 2 + COLUMN id: 6, column_name: number, result_type: UInt64, source_id: 7 + CONSTANT id: 8, constant_value: UInt64_2, constant_value_type: UInt8 + FUNCTION id: 9, function_name: anyLast, function_type: aggregate, result_type: UInt8 + ARGUMENTS + LIST id: 10, nodes: 1 + FUNCTION id: 11, function_name: modulo, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 12, nodes: 2 + COLUMN id: 6, column_name: number, result_type: UInt64, source_id: 7 + CONSTANT id: 13, constant_value: UInt64_3, constant_value_type: UInt8 + JOIN TREE + TABLE_FUNCTION id: 7, alias: __table1, table_function_name: numbers + ARGUMENTS + LIST id: 14, nodes: 1 + CONSTANT id: 15, constant_value: UInt64_10000000, constant_value_type: UInt32 + GROUP BY + LIST id: 16, nodes: 2 + FUNCTION id: 17, function_name: modulo, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 18, nodes: 2 + COLUMN id: 6, column_name: number, result_type: UInt64, source_id: 7 + CONSTANT id: 19, constant_value: UInt64_2, constant_value_type: UInt8 + FUNCTION id: 20, function_name: modulo, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 21, nodes: 2 + COLUMN id: 6, column_name: number, result_type: UInt64, source_id: 7 + CONSTANT id: 22, constant_value: UInt64_3, constant_value_type: UInt8 + ORDER BY + LIST id: 23, nodes: 2 + SORT id: 24, sort_direction: ASCENDING, with_fill: 0 + EXPRESSION + FUNCTION id: 2, function_name: any, function_type: aggregate, result_type: UInt8 + ARGUMENTS + LIST id: 3, nodes: 1 + FUNCTION id: 4, function_name: modulo, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 5, nodes: 2 + COLUMN id: 6, column_name: number, result_type: UInt64, source_id: 7 + CONSTANT id: 8, constant_value: UInt64_2, constant_value_type: UInt8 + SORT id: 25, sort_direction: ASCENDING, with_fill: 0 + EXPRESSION + FUNCTION id: 9, function_name: anyLast, function_type: aggregate, result_type: UInt8 + ARGUMENTS + LIST id: 10, nodes: 1 + FUNCTION id: 11, function_name: modulo, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 12, nodes: 2 + COLUMN id: 6, column_name: number, result_type: UInt64, source_id: 7 + CONSTANT id: 13, constant_value: UInt64_3, constant_value_type: UInt8 +QUERY id: 0 + PROJECTION COLUMNS + a UInt16 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: max, function_type: aggregate, result_type: UInt16 + ARGUMENTS + LIST id: 3, nodes: 1 + FUNCTION id: 4, function_name: multiply, function_type: ordinary, result_type: UInt16 + ARGUMENTS + LIST id: 5, nodes: 2 + FUNCTION id: 6, function_name: modulo, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 7, nodes: 2 + COLUMN id: 8, column_name: number, result_type: UInt64, source_id: 9 + CONSTANT id: 10, constant_value: UInt64_5, constant_value_type: UInt8 + FUNCTION id: 11, function_name: modulo, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 12, nodes: 2 + COLUMN id: 8, column_name: number, result_type: UInt64, source_id: 9 + CONSTANT id: 13, constant_value: UInt64_7, constant_value_type: UInt8 + JOIN TREE + TABLE_FUNCTION id: 9, alias: __table1, table_function_name: numbers + ARGUMENTS + LIST id: 14, nodes: 1 + CONSTANT id: 15, constant_value: UInt64_10000000, constant_value_type: UInt32 + GROUP BY + LIST id: 16, nodes: 2 + FUNCTION id: 17, function_name: modulo, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 18, nodes: 2 + COLUMN id: 8, column_name: number, result_type: UInt64, source_id: 9 + CONSTANT id: 19, constant_value: UInt64_7, constant_value_type: UInt8 + FUNCTION id: 20, function_name: modulo, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 21, nodes: 2 + COLUMN id: 8, column_name: number, result_type: UInt64, source_id: 9 + CONSTANT id: 22, constant_value: UInt64_5, constant_value_type: UInt8 + ORDER BY + LIST id: 23, nodes: 1 + SORT id: 24, sort_direction: ASCENDING, with_fill: 0 + EXPRESSION + FUNCTION id: 2, function_name: max, function_type: aggregate, result_type: UInt16 + ARGUMENTS + LIST id: 3, nodes: 1 + FUNCTION id: 4, function_name: multiply, function_type: ordinary, result_type: UInt16 + ARGUMENTS + LIST id: 5, nodes: 2 + FUNCTION id: 6, function_name: modulo, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 7, nodes: 2 + COLUMN id: 8, column_name: number, result_type: UInt64, source_id: 9 + CONSTANT id: 10, constant_value: UInt64_5, constant_value_type: UInt8 + FUNCTION id: 11, function_name: modulo, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 12, nodes: 2 + COLUMN id: 8, column_name: number, result_type: UInt64, source_id: 9 + CONSTANT id: 13, constant_value: UInt64_7, constant_value_type: UInt8 +QUERY id: 0 + PROJECTION COLUMNS + foo UInt64 + PROJECTION + LIST id: 1, nodes: 1 + COLUMN id: 2, column_name: foo, result_type: UInt64, source_id: 3 + JOIN TREE + QUERY id: 3, alias: __table1, is_subquery: 1 + PROJECTION COLUMNS + foo UInt64 + PROJECTION + LIST id: 4, nodes: 1 + FUNCTION id: 5, function_name: anyLast, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 6, nodes: 1 + COLUMN id: 7, column_name: number, result_type: UInt64, source_id: 8 + JOIN TREE + TABLE_FUNCTION id: 8, alias: __table2, table_function_name: numbers + ARGUMENTS + LIST id: 9, nodes: 1 + CONSTANT id: 10, constant_value: UInt64_1, constant_value_type: UInt8 + GROUP BY + LIST id: 11, nodes: 1 + COLUMN id: 7, column_name: number, result_type: UInt64, source_id: 8 +QUERY id: 0 + PROJECTION COLUMNS + min(number) OVER (PARTITION BY modulo(number, 2)) UInt64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: min, function_type: window, result_type: UInt64 + ARGUMENTS + LIST id: 3, nodes: 1 + COLUMN id: 4, column_name: number, result_type: UInt64, source_id: 5 + WINDOW + WINDOW id: 6, frame_type: RANGE, frame_begin_type: unbounded preceding, frame_end_type: current + PARTITION BY + LIST id: 7, nodes: 1 + FUNCTION id: 8, function_name: modulo, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 9, nodes: 2 + COLUMN id: 4, column_name: number, result_type: UInt64, source_id: 5 + CONSTANT id: 10, constant_value: UInt64_2, constant_value_type: UInt8 + JOIN TREE + TABLE_FUNCTION id: 5, alias: __table1, table_function_name: numbers + ARGUMENTS + LIST id: 11, nodes: 1 + CONSTANT id: 12, constant_value: UInt64_3, constant_value_type: UInt8 + GROUP BY + LIST id: 13, nodes: 1 + COLUMN id: 4, column_name: number, result_type: UInt64, source_id: 5 diff --git a/tests/queries/0_stateless/02815_analyzer_aggregate_functions_of_group_by_keys.sql b/tests/queries/0_stateless/02815_analyzer_aggregate_functions_of_group_by_keys.sql new file mode 100644 index 00000000000..ca03cbb6f9f --- /dev/null +++ b/tests/queries/0_stateless/02815_analyzer_aggregate_functions_of_group_by_keys.sql @@ -0,0 +1,39 @@ +set allow_experimental_analyzer = 1; +set optimize_move_functions_out_of_any = 0; + +SELECT 'set optimize_aggregators_of_group_by_keys = 1'; +set optimize_aggregators_of_group_by_keys = 1; + +SELECT min(number % 2) AS a, max(number % 3) AS b FROM numbers(10000000) GROUP BY number % 2, number % 3 ORDER BY a, b; +SELECT any(number % 2) AS a, anyLast(number % 3) AS b FROM numbers(10000000) GROUP BY number % 2, number % 3 ORDER BY a, b; +SELECT max((number % 5) * (number % 7)) AS a FROM numbers(10000000) GROUP BY number % 7, number % 5 ORDER BY a; +SELECT foo FROM (SELECT anyLast(number) AS foo FROM numbers(1) GROUP BY number); +SELECT anyLast(number) FROM numbers(1) GROUP BY number; + +EXPLAIN QUERY TREE SELECT min(number % 2) AS a, max(number % 3) AS b FROM numbers(10000000) GROUP BY number % 2, number % 3 ORDER BY a, b; +EXPLAIN QUERY TREE SELECT any(number % 2) AS a, anyLast(number % 3) AS b FROM numbers(10000000) GROUP BY number % 2, number % 3 ORDER BY a, b; +EXPLAIN QUERY TREE SELECT max((number % 5) * (number % 7)) AS a FROM numbers(10000000) GROUP BY number % 7, number % 5 ORDER BY a; +EXPLAIN QUERY TREE SELECT foo FROM (SELECT anyLast(number) AS foo FROM numbers(1) GROUP BY number); + +EXPLAIN QUERY TREE +SELECT min(number) OVER (PARTITION BY number % 2) +FROM numbers(3) +GROUP BY number; + +SELECT 'set optimize_aggregators_of_group_by_keys = 0'; +set optimize_aggregators_of_group_by_keys = 0; + +SELECT min(number % 2) AS a, max(number % 3) AS b FROM numbers(10000000) GROUP BY number % 2, number % 3 ORDER BY a, b; +SELECT any(number % 2) AS a, anyLast(number % 3) AS b FROM numbers(10000000) GROUP BY number % 2, number % 3 ORDER BY a, b; +SELECT max((number % 5) * (number % 7)) AS a FROM numbers(10000000) GROUP BY number % 7, number % 5 ORDER BY a; +SELECT foo FROM (SELECT anyLast(number) AS foo FROM numbers(1) GROUP BY number); + +EXPLAIN QUERY TREE SELECT min(number % 2) AS a, max(number % 3) AS b FROM numbers(10000000) GROUP BY number % 2, number % 3 ORDER BY a, b; +EXPLAIN QUERY TREE SELECT any(number % 2) AS a, anyLast(number % 3) AS b FROM numbers(10000000) GROUP BY number % 2, number % 3 ORDER BY a, b; +EXPLAIN QUERY TREE SELECT max((number % 5) * (number % 7)) AS a FROM numbers(10000000) GROUP BY number % 7, number % 5 ORDER BY a; +EXPLAIN QUERY TREE SELECT foo FROM (SELECT anyLast(number) AS foo FROM numbers(1) GROUP BY number); + +EXPLAIN QUERY TREE +SELECT min(number) OVER (PARTITION BY number % 2) +FROM numbers(3) +GROUP BY number; diff --git a/tests/queries/0_stateless/02818_memory_profiler_sample_min_max_allocation_size.sh b/tests/queries/0_stateless/02818_memory_profiler_sample_min_max_allocation_size.sh index b1fbea26da7..9234c428147 100755 --- a/tests/queries/0_stateless/02818_memory_profiler_sample_min_max_allocation_size.sh +++ b/tests/queries/0_stateless/02818_memory_profiler_sample_min_max_allocation_size.sh @@ -1,6 +1,5 @@ #!/usr/bin/env bash -# Tags: no-tsan, no-asan, no-ubsan, no-msan, no-cpu-aarch64, no-random-settings -# requires TraceCollector, does not available under sanitizers and aarch64 +# Tags: no-tsan, no-asan, no-ubsan, no-msan, no-random-settings CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/02833_url_without_path_encoding.sh b/tests/queries/0_stateless/02833_url_without_path_encoding.sh index b71586099cf..eb845c6b45b 100755 --- a/tests/queries/0_stateless/02833_url_without_path_encoding.sh +++ b/tests/queries/0_stateless/02833_url_without_path_encoding.sh @@ -8,5 +8,5 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) $CLICKHOUSE_CLIENT -q "select count() from url('http://localhost:11111/test%2Fa.tsv') settings enable_url_encoding=1" # Grep 'test%2Fa.tsv' to ensure that path wasn't encoded/decoded -$CLICKHOUSE_CLIENT -q "select count() from url('http://localhost:11111/test%2Fa.tsv') settings enable_url_encoding=0" 2>&1 | grep -o "test%2Fa.tsv" -m1 - +$CLICKHOUSE_CLIENT -q "select count() from url('http://localhost:11111/test%2Fa.tsv') settings enable_url_encoding=0" 2>&1 | \ + grep -o "test%2Fa.tsv" -m1 | head -n 1 diff --git a/tests/queries/0_stateless/02842_move_pk_to_end_of_prewhere.reference b/tests/queries/0_stateless/02842_move_pk_to_end_of_prewhere.reference index 6e04d969e67..b91a4dd2f68 100644 --- a/tests/queries/0_stateless/02842_move_pk_to_end_of_prewhere.reference +++ b/tests/queries/0_stateless/02842_move_pk_to_end_of_prewhere.reference @@ -1,20 +1,15 @@ -SELECT count() -FROM t_02848_mt1 -PREWHERE notEmpty(v) AND (k = 3) + Prewhere filter + Prewhere filter column: and(notEmpty(v), equals(k, 3)) (removed) 1 -SELECT count() -FROM t_02848_mt2 -PREWHERE (d LIKE \'%es%\') AND (c < 20) AND (b = \'3\') AND (a = 3) + Prewhere filter + Prewhere filter column: and(like(d, \'%es%\'), less(c, 20), equals(b, \'3\'), equals(a, 3)) (removed) 1 -SELECT count() -FROM t_02848_mt2 -PREWHERE (d LIKE \'%es%\') AND (c < 20) AND (c > 0) AND (a = 3) + Prewhere filter + Prewhere filter column: and(like(d, \'%es%\'), less(c, 20), greater(c, 0), equals(a, 3)) (removed) 1 -SELECT count() -FROM t_02848_mt2 -PREWHERE (d LIKE \'%es%\') AND (b = \'3\') AND (c < 20) + Prewhere filter + Prewhere filter column: and(like(d, \'%es%\'), equals(b, \'3\'), less(c, 20)) (removed) 1 -SELECT count() -FROM t_02848_mt2 -PREWHERE (d LIKE \'%es%\') AND (b = \'3\') AND (a = 3) + Prewhere filter + Prewhere filter column: and(like(d, \'%es%\'), equals(b, \'3\'), equals(a, 3)) (removed) 1 diff --git a/tests/queries/0_stateless/02842_move_pk_to_end_of_prewhere.sql b/tests/queries/0_stateless/02842_move_pk_to_end_of_prewhere.sql index bc9d7e5664e..f863d765798 100644 --- a/tests/queries/0_stateless/02842_move_pk_to_end_of_prewhere.sql +++ b/tests/queries/0_stateless/02842_move_pk_to_end_of_prewhere.sql @@ -8,7 +8,7 @@ CREATE TABLE t_02848_mt1 (k UInt32, v String) ENGINE = MergeTree ORDER BY k SETT INSERT INTO t_02848_mt1 SELECT number, toString(number) FROM numbers(100); -EXPLAIN SYNTAX SELECT count() FROM t_02848_mt1 WHERE k = 3 AND notEmpty(v); +SELECT replaceRegexpAll(explain, '__table1\.|_UInt8', '') FROM (EXPLAIN actions=1 SELECT count() FROM t_02848_mt1 WHERE k = 3 AND notEmpty(v)) WHERE explain LIKE '%Prewhere filter%' OR explain LIKE '%Filter%'; SELECT count() FROM t_02848_mt1 WHERE k = 3 AND notEmpty(v); CREATE TABLE t_02848_mt2 (a UInt32, b String, c Int32, d String) ENGINE = MergeTree ORDER BY (a,b,c) SETTINGS min_bytes_for_wide_part=0; @@ -18,16 +18,16 @@ INSERT INTO t_02848_mt2 SELECT number, toString(number), number, 'aaaabbbbccccdd -- the estimated column sizes are: {a: 428, b: 318, c: 428, d: 73} -- it's not correct but let's fix it in the future. -EXPLAIN SYNTAX SELECT count() FROM t_02848_mt2 WHERE a = 3 AND b == '3' AND c < 20 AND d like '%es%'; +SELECT replaceRegexpAll(explain, '__table1\.|_UInt8|_String', '') FROM (EXPLAIN actions=1 SELECT count() FROM t_02848_mt2 WHERE a = 3 AND b == '3' AND c < 20 AND d like '%es%') WHERE explain LIKE '%Prewhere filter%' OR explain LIKE '%Filter%'; SELECT count() FROM t_02848_mt2 WHERE a = 3 AND b == '3' AND c < 20 AND d like '%es%'; -EXPLAIN SYNTAX SELECT count() FROM t_02848_mt2 WHERE a = 3 AND c < 20 AND c > 0 AND d like '%es%'; +SELECT replaceRegexpAll(explain, '__table1\.|_UInt8|_String', '') FROM (EXPLAIN actions=1 SELECT count() FROM t_02848_mt2 WHERE a = 3 AND c < 20 AND c > 0 AND d like '%es%') WHERE explain LIKE '%Prewhere filter%' OR explain LIKE '%Filter%'; SELECT count() FROM t_02848_mt2 WHERE a = 3 AND c < 20 AND c > 0 AND d like '%es%'; -EXPLAIN SYNTAX SELECT count() FROM t_02848_mt2 WHERE b == '3' AND c < 20 AND d like '%es%'; +SELECT replaceRegexpAll(explain, '__table1\.|_UInt8|_String', '') FROM (EXPLAIN actions=1 SELECT count() FROM t_02848_mt2 WHERE b == '3' AND c < 20 AND d like '%es%') WHERE explain LIKE '%Prewhere filter%' OR explain LIKE '%Filter%'; SELECT count() FROM t_02848_mt2 WHERE b == '3' AND c < 20 AND d like '%es%'; -EXPLAIN SYNTAX SELECT count() FROM t_02848_mt2 WHERE a = 3 AND b == '3' AND d like '%es%'; +SELECT replaceRegexpAll(explain, '__table1\.|_UInt8|_String', '') FROM (EXPLAIN actions=1 SELECT count() FROM t_02848_mt2 WHERE a = 3 AND b == '3' AND d like '%es%') WHERE explain LIKE '%Prewhere filter%' OR explain LIKE '%Filter%'; SELECT count() FROM t_02848_mt2 WHERE a = 3 AND b == '3' AND d like '%es%'; DROP TABLE t_02848_mt1; diff --git a/tests/queries/0_stateless/02864_statistic_operate.reference b/tests/queries/0_stateless/02864_statistic_operate.reference index 7fad7c810c1..3e291485031 100644 --- a/tests/queries/0_stateless/02864_statistic_operate.reference +++ b/tests/queries/0_stateless/02864_statistic_operate.reference @@ -1,31 +1,31 @@ CREATE TABLE default.t1\n(\n `a` Float64 STATISTIC(tdigest),\n `b` Int64 STATISTIC(tdigest),\n `pk` String\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS min_bytes_for_wide_part = 0, index_granularity = 8192 After insert -SELECT count() -FROM t1 -PREWHERE (a < 10) AND (b < 10) + Prewhere info + Prewhere filter + Prewhere filter column: and(less(a, 10), less(b, 10)) (removed) 10 0 After drop statistic -SELECT count() -FROM t1 -PREWHERE (b < 10) AND (a < 10) + Prewhere info + Prewhere filter + Prewhere filter column: and(less(b, 10), less(a, 10)) (removed) 10 CREATE TABLE default.t1\n(\n `a` Float64,\n `b` Int64,\n `pk` String\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS min_bytes_for_wide_part = 0, index_granularity = 8192 After add statistic CREATE TABLE default.t1\n(\n `a` Float64 STATISTIC(tdigest),\n `b` Int64 STATISTIC(tdigest),\n `pk` String\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS min_bytes_for_wide_part = 0, index_granularity = 8192 After materialize statistic -SELECT count() -FROM t1 -PREWHERE (a < 10) AND (b < 10) + Prewhere info + Prewhere filter + Prewhere filter column: and(less(a, 10), less(b, 10)) (removed) 20 After merge -SELECT count() -FROM t1 -PREWHERE (a < 10) AND (b < 10) + Prewhere info + Prewhere filter + Prewhere filter column: and(less(a, 10), less(b, 10)) (removed) 20 CREATE TABLE default.t1\n(\n `a` Float64 STATISTIC(tdigest),\n `c` Int64 STATISTIC(tdigest),\n `pk` String\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS min_bytes_for_wide_part = 0, index_granularity = 8192 After rename -SELECT count() -FROM t1 -PREWHERE (a < 10) AND (c < 10) + Prewhere info + Prewhere filter + Prewhere filter column: and(less(a, 10), less(c, 10)) (removed) 20 diff --git a/tests/queries/0_stateless/02864_statistic_operate.sql b/tests/queries/0_stateless/02864_statistic_operate.sql index 29bd213f04a..5f1c30f8eec 100644 --- a/tests/queries/0_stateless/02864_statistic_operate.sql +++ b/tests/queries/0_stateless/02864_statistic_operate.sql @@ -3,7 +3,7 @@ DROP TABLE IF EXISTS t1; SET allow_experimental_statistic = 1; SET allow_statistic_optimize = 1; -CREATE TABLE t1 +CREATE TABLE t1 ( a Float64 STATISTIC(tdigest), b Int64 STATISTIC(tdigest), @@ -16,14 +16,14 @@ SHOW CREATE TABLE t1; INSERT INTO t1 select number, -number, generateUUIDv4() FROM system.numbers LIMIT 10000; SELECT 'After insert'; -EXPLAIN SYNTAX SELECT count(*) FROM t1 WHERE b < 10 and a < 10; +SELECT replaceRegexpAll(explain, '__table1\.|_UInt8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM t1 WHERE b < 10 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; SELECT count(*) FROM t1 WHERE b < 10 and a < 10; SELECT count(*) FROM t1 WHERE b < NULL and a < '10'; ALTER TABLE t1 DROP STATISTIC a, b TYPE tdigest; SELECT 'After drop statistic'; -EXPLAIN SYNTAX SELECT count(*) FROM t1 WHERE b < 10 and a < 10; +SELECT replaceRegexpAll(explain, '__table1\.|_UInt8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM t1 WHERE b < 10 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; SELECT count(*) FROM t1 WHERE b < 10 and a < 10; SHOW CREATE TABLE t1; @@ -38,20 +38,20 @@ ALTER TABLE t1 MATERIALIZE STATISTIC a, b TYPE tdigest; INSERT INTO t1 select number, -number, generateUUIDv4() FROM system.numbers LIMIT 10000; SELECT 'After materialize statistic'; -EXPLAIN SYNTAX SELECT count(*) FROM t1 WHERE b < 10 and a < 10; +SELECT replaceRegexpAll(explain, '__table1\.|_UInt8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM t1 WHERE b < 10 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; SELECT count(*) FROM t1 WHERE b < 10 and a < 10; OPTIMIZE TABLE t1 FINAL; SELECT 'After merge'; -EXPLAIN SYNTAX SELECT count(*) FROM t1 WHERE b < 10 and a < 10; +SELECT replaceRegexpAll(explain, '__table1\.|_UInt8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM t1 WHERE b < 10 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; SELECT count(*) FROM t1 WHERE b < 10 and a < 10; ALTER TABLE t1 RENAME COLUMN b TO c; SHOW CREATE TABLE t1; SELECT 'After rename'; -EXPLAIN SYNTAX SELECT count(*) FROM t1 WHERE c < 10 and a < 10; +SELECT replaceRegexpAll(explain, '__table1\.|_UInt8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM t1 WHERE c < 10 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; SELECT count(*) FROM t1 WHERE c < 10 and a < 10; DROP TABLE IF EXISTS t1; diff --git a/tests/queries/0_stateless/02867_page_cache.reference b/tests/queries/0_stateless/02867_page_cache.reference new file mode 100644 index 00000000000..c3d6484a175 --- /dev/null +++ b/tests/queries/0_stateless/02867_page_cache.reference @@ -0,0 +1,21 @@ +cold read 54975576145920 +PageCacheBytesUnpinnedRoundedToHugePages 1 +PageCacheBytesUnpinnedRoundedToPages 1 +PageCacheChunkMisses 1 +ReadBufferFromS3Bytes 1 +repeat read 1 54975576145920 +PageCacheBytesUnpinnedRoundedToHugePages 1 +PageCacheBytesUnpinnedRoundedToPages 1 +PageCacheChunkDataHits 1 +dropped and bypassed cache 54975576145920 +PageCacheChunkMisses 1 +ReadBufferFromS3Bytes 1 +repeat read 2 54975576145920 +PageCacheBytesUnpinnedRoundedToHugePages 1 +PageCacheBytesUnpinnedRoundedToPages 1 +PageCacheChunkMisses 1 +ReadBufferFromS3Bytes 1 +repeat read 3 54975576145920 +PageCacheBytesUnpinnedRoundedToHugePages 1 +PageCacheBytesUnpinnedRoundedToPages 1 +PageCacheChunkDataHits 1 diff --git a/tests/queries/0_stateless/02867_page_cache.sql b/tests/queries/0_stateless/02867_page_cache.sql new file mode 100644 index 00000000000..f1882de4af6 --- /dev/null +++ b/tests/queries/0_stateless/02867_page_cache.sql @@ -0,0 +1,106 @@ +-- Tags: no-fasttest, no-parallel +-- no-fasttest because we need an S3 storage policy +-- no-parallel because we look at server-wide counters about page cache usage + +set use_page_cache_for_disks_without_file_cache = 1; +set page_cache_inject_eviction = 0; +set enable_filesystem_cache = 0; +set use_uncompressed_cache = 0; + +create table events_snapshot engine Memory as select * from system.events; +create view events_diff as + -- round all stats to 70 MiB to leave a lot of leeway for overhead + with if(event like '%Bytes%', 70*1024*1024, 35) as granularity, + -- cache hits counter can vary a lot depending on other settings: + -- e.g. if merge_tree_min_bytes_for_concurrent_read is small, multiple threads will read each chunk + -- so we just check that the value is not too low + if(event in ( + 'PageCacheBytesUnpinnedRoundedToPages', 'PageCacheBytesUnpinnedRoundedToHugePages', + 'PageCacheChunkDataHits'), 1, 1000) as clamp + select event, min2(intDiv(new.value - old.value, granularity), clamp) as diff + from system.events new + left outer join events_snapshot old + on old.event = new.event + where diff != 0 and + event in ( + 'ReadBufferFromS3Bytes', 'PageCacheChunkMisses', 'PageCacheChunkDataMisses', + 'PageCacheChunkDataHits', 'PageCacheChunkDataPartialHits', + 'PageCacheBytesUnpinnedRoundedToPages', 'PageCacheBytesUnpinnedRoundedToHugePages') + order by event; + +drop table if exists page_cache_03055; +create table page_cache_03055 (k Int64 CODEC(NONE)) engine MergeTree order by k settings storage_policy = 's3_cache'; + +-- Write an 80 MiB file (40 x 2 MiB chunks), and a few small files. +system stop merges page_cache_03055; +insert into page_cache_03055 select * from numbers(10485760) settings max_block_size=100000000, preferred_block_size_bytes=1000000000; + +select * from events_diff; +truncate table events_snapshot; +insert into events_snapshot select * from system.events; + +system start merges page_cache_03055; +optimize table page_cache_03055 final; +truncate table events_snapshot; +insert into events_snapshot select * from system.events; + +-- Cold read, should miss cache. (Populating cache on write is not implemented yet.) + +select 'cold read', sum(k) from page_cache_03055; + +select * from events_diff where event not in ('PageCacheChunkDataHits'); +truncate table events_snapshot; +insert into events_snapshot select * from system.events; + +-- Repeat read, should hit cache. + +select 'repeat read 1', sum(k) from page_cache_03055; + +select * from events_diff; +truncate table events_snapshot; +insert into events_snapshot select * from system.events; + +-- Drop cache and read again, should miss. Also don't write to cache. + +system drop page cache; + +select 'dropped and bypassed cache', sum(k) from page_cache_03055 settings read_from_page_cache_if_exists_otherwise_bypass_cache = 1; + +-- Data could be read multiple times because we're not writing to cache. +-- (Not checking PageCacheBytesUnpinned* because it's unreliable in this case because of an intentional race condition, see PageCache::evictChunk.) +select event, if(event in ('PageCacheChunkMisses', 'ReadBufferFromS3Bytes'), diff >= 1, diff) from events_diff where event not in ('PageCacheChunkDataHits', 'PageCacheBytesUnpinnedRoundedToPages', 'PageCacheBytesUnpinnedRoundedToHugePages'); +truncate table events_snapshot; +insert into events_snapshot select * from system.events; + +-- Repeat read, should still miss, but populate cache. + +select 'repeat read 2', sum(k) from page_cache_03055; + +select * from events_diff where event not in ('PageCacheChunkDataHits'); +truncate table events_snapshot; +insert into events_snapshot select * from system.events; + +-- Read again, hit the cache. + +select 'repeat read 3', sum(k) from page_cache_03055 settings read_from_page_cache_if_exists_otherwise_bypass_cache = 1; + +select * from events_diff; +truncate table events_snapshot; +insert into events_snapshot select * from system.events; + + +-- Known limitation: cache is not invalidated if a table is dropped and created again at the same path. +-- set allow_deprecated_database_ordinary=1; +-- create database test_03055 engine = Ordinary; +-- create table test_03055.t (k Int64) engine MergeTree order by k settings storage_policy = 's3_cache'; +-- insert into test_03055.t values (1); +-- select * from test_03055.t; +-- drop table test_03055.t; +-- create table test_03055.t (k Int64) engine MergeTree order by k settings storage_policy = 's3_cache'; +-- insert into test_03055.t values (2); +-- select * from test_03055.t; + + +drop table events_snapshot; +drop table page_cache_03055; +drop view events_diff; diff --git a/tests/queries/0_stateless/02868_select_support_from_keywords.reference b/tests/queries/0_stateless/02868_select_support_from_keywords.reference index d2dcb047cf0..6782e51e0e9 100644 --- a/tests/queries/0_stateless/02868_select_support_from_keywords.reference +++ b/tests/queries/0_stateless/02868_select_support_from_keywords.reference @@ -1 +1 @@ -CREATE VIEW default.test_view\n(\n `date` Date,\n `__sign` Int8,\n `from` Float64,\n `to` Float64\n) AS\nWITH cte AS\n (\n SELECT\n date,\n __sign,\n from,\n to\n FROM default.test_table\n FINAL\n )\nSELECT\n date,\n __sign,\n from,\n to\nFROM cte +CREATE VIEW default.test_view\n(\n `date` Date,\n `__sign` Int8,\n `from` Float64,\n `to` Float64\n)\nAS WITH cte AS\n (\n SELECT\n date,\n __sign,\n from,\n to\n FROM default.test_table\n FINAL\n )\nSELECT\n date,\n __sign,\n from,\n to\nFROM cte diff --git a/tests/queries/0_stateless/02882_primary_key_index_in_function_different_types.reference b/tests/queries/0_stateless/02882_primary_key_index_in_function_different_types.reference index 7a5e798359b..8999deae9f6 100644 --- a/tests/queries/0_stateless/02882_primary_key_index_in_function_different_types.reference +++ b/tests/queries/0_stateless/02882_primary_key_index_in_function_different_types.reference @@ -1,47 +1,51 @@ -CreatingSets - Expression - ReadFromMergeTree - Indexes: - PrimaryKey - Keys: - id - value - Condition: and((id in (-Inf, 10]), (value in 1-element set)) - Parts: 1/1 - Granules: 1/1 -CreatingSets - Expression - ReadFromMergeTree - Indexes: - PrimaryKey - Keys: - id - value - Condition: and((id in (-Inf, 10]), (value in 1-element set)) - Parts: 1/1 - Granules: 1/1 -CreatingSets - Expression - ReadFromMergeTree - Indexes: - PrimaryKey - Keys: - id - value - Condition: and((id in (-Inf, 10]), (value in 5-element set)) - Parts: 1/1 - Granules: 1/1 -CreatingSets - Expression - ReadFromMergeTree - Indexes: - PrimaryKey - Keys: - id - value - Condition: and((id in (-Inf, 10]), (value in 5-element set)) - Parts: 1/1 - Granules: 1/1 +CreatingSets + Expression + Expression + ReadFromMergeTree + Indexes: + PrimaryKey + Keys: + id + value + Condition: and((value in 1-element set), (id in (-Inf, 10])) + Parts: 1/1 + Granules: 1/1 +CreatingSets + Expression + Expression + ReadFromMergeTree + Indexes: + PrimaryKey + Keys: + id + value + Condition: and((value in 1-element set), (id in (-Inf, 10])) + Parts: 1/1 + Granules: 1/1 +CreatingSets + Expression + Expression + ReadFromMergeTree + Indexes: + PrimaryKey + Keys: + id + value + Condition: and((value in 5-element set), (id in (-Inf, 10])) + Parts: 1/1 + Granules: 1/1 +CreatingSets + Expression + Expression + ReadFromMergeTree + Indexes: + PrimaryKey + Keys: + id + value + Condition: and((value in 5-element set), (id in (-Inf, 10])) + Parts: 1/1 + Granules: 1/1 CreatingSets Expression Expression diff --git a/tests/queries/0_stateless/02884_create_view_with_sql_security_option.reference b/tests/queries/0_stateless/02884_create_view_with_sql_security_option.reference new file mode 100644 index 00000000000..79728fadc04 --- /dev/null +++ b/tests/queries/0_stateless/02884_create_view_with_sql_security_option.reference @@ -0,0 +1,32 @@ +===== StorageView ===== +OK +OK +OK +2 +2 +OK +OK +2 +2 +OK +2 +2 +OK +===== MaterializedView ===== +OK +0 +0 +OK +OK +OK +2 +OK +OK +===== TestGrants ===== +OK +OK +===== TestRowPolicy ===== +1 1 +2 2 +6 6 +9 9 diff --git a/tests/queries/0_stateless/02884_create_view_with_sql_security_option.sh b/tests/queries/0_stateless/02884_create_view_with_sql_security_option.sh new file mode 100755 index 00000000000..a4ab3ed0024 --- /dev/null +++ b/tests/queries/0_stateless/02884_create_view_with_sql_security_option.sh @@ -0,0 +1,226 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + + +user1="user02884_1_$RANDOM$RANDOM" +user2="user02884_2_$RANDOM$RANDOM" +user3="user02884_3_$RANDOM$RANDOM" +db="db02884_$RANDOM$RANDOM" + +${CLICKHOUSE_CLIENT} --multiquery <&1 | grep -c "INVOKER") >= 1 )) && echo "OK" || echo "UNEXPECTED" +(( $(${CLICKHOUSE_CLIENT} --query "SHOW TABLE $db.test_view_2" 2>&1 | grep -c "DEFINER = $user1") >= 1 )) && echo "OK" || echo "UNEXPECTED" + +${CLICKHOUSE_CLIENT} --multiquery <&1 | grep -c "Not enough privileges") >= 1 )) && echo "OK" || echo "UNEXPECTED" +${CLICKHOUSE_CLIENT} --user $user2 --query "SELECT count() FROM $db.test_view_2" +${CLICKHOUSE_CLIENT} --user $user2 --query "SELECT count() FROM $db.test_view_3" +(( $(${CLICKHOUSE_CLIENT} --user $user2 --query "SELECT * FROM $db.test_view_4" 2>&1 | grep -c "Not enough privileges") >= 1 )) && echo "OK" || echo "UNEXPECTED" +(( $(${CLICKHOUSE_CLIENT} --user $user2 --query "SELECT * FROM $db.test_view_5" 2>&1 | grep -c "Not enough privileges") >= 1 )) && echo "OK" || echo "UNEXPECTED" +${CLICKHOUSE_CLIENT} --user $user2 --query "SELECT count() FROM $db.test_view_6" +${CLICKHOUSE_CLIENT} --user $user2 --query "SELECT count() FROM $db.test_view_7" +(( $(${CLICKHOUSE_CLIENT} --user $user2 --query "SELECT * FROM $db.test_view_8" 2>&1 | grep -c "Not enough privileges") >= 1 )) && echo "OK" || echo "UNEXPECTED" +${CLICKHOUSE_CLIENT} --user $user2 --query "SELECT count() FROM $db.test_view_9" +${CLICKHOUSE_CLIENT} --user $user2 --query "SELECT count() FROM $db.test_view_10" + +${CLICKHOUSE_CLIENT} --query "ALTER TABLE $db.test_view_10 MODIFY SQL SECURITY INVOKER" +(( $(${CLICKHOUSE_CLIENT} --user $user2 --query "SELECT * FROM $db.test_view_10" 2>&1 | grep -c "Not enough privileges") >= 1 )) && echo "OK" || echo "UNEXPECTED" + + +echo "===== MaterializedView =====" +${CLICKHOUSE_CLIENT} --query " + CREATE MATERIALIZED VIEW $db.test_mv_1 (s String) + ENGINE = MergeTree ORDER BY s + DEFINER = $user1 SQL SECURITY DEFINER + AS SELECT * FROM $db.test_table; +" + +(( $(${CLICKHOUSE_CLIENT} --query " + CREATE MATERIALIZED VIEW $db.test_mv_2 (s String) + ENGINE = MergeTree ORDER BY s + SQL SECURITY INVOKER + AS SELECT * FROM $db.test_table; +" 2>&1 | grep -c "SQL SECURITY INVOKER can't be specified for MATERIALIZED VIEW") >= 1 )) && echo "OK" || echo "UNEXPECTED" + +${CLICKHOUSE_CLIENT} --query " + CREATE MATERIALIZED VIEW $db.test_mv_3 (s String) + ENGINE = MergeTree ORDER BY s + SQL SECURITY NONE + AS SELECT * FROM $db.test_table; +" + +${CLICKHOUSE_CLIENT} --query "CREATE TABLE $db.test_mv_data (s String) ENGINE = MergeTree ORDER BY s;" + +${CLICKHOUSE_CLIENT} --query " + CREATE MATERIALIZED VIEW $db.test_mv_4 + TO $db.test_mv_data + DEFINER = $user1 SQL SECURITY DEFINER + AS SELECT * FROM $db.test_table; +" + +${CLICKHOUSE_CLIENT} --query " + CREATE MATERIALIZED VIEW $db.test_mv_5 (s String) + ENGINE = MergeTree ORDER BY s + DEFINER = $user2 SQL SECURITY DEFINER + AS SELECT * FROM $db.test_table; +" + +${CLICKHOUSE_CLIENT} --query "GRANT SELECT ON $db.test_mv_5 TO $user2" + +${CLICKHOUSE_CLIENT} --query "ALTER TABLE $db.test_mv_5 MODIFY SQL SECURITY NONE" +${CLICKHOUSE_CLIENT} --user $user2 --query "SELECT * FROM $db.test_mv_5" + +${CLICKHOUSE_CLIENT} --query "GRANT SELECT ON $db.test_mv_1 TO $user2" +${CLICKHOUSE_CLIENT} --query "GRANT SELECT ON $db.test_mv_3 TO $user2" +${CLICKHOUSE_CLIENT} --query "GRANT SELECT ON $db.test_mv_4 TO $user2" + +${CLICKHOUSE_CLIENT} --user $user2 --query "SELECT count() FROM $db.test_mv_1" +${CLICKHOUSE_CLIENT} --user $user2 --query "SELECT count() FROM $db.test_mv_3" + +${CLICKHOUSE_CLIENT} --query "REVOKE SELECT ON $db.test_mv_data FROM $user1" +(( $(${CLICKHOUSE_CLIENT} --user $user2 --query "SELECT * FROM $db.test_mv_4" 2>&1 | grep -c "Not enough privileges") >= 1 )) && echo "OK" || echo "UNEXPECTED" +(( $(${CLICKHOUSE_CLIENT} --query "INSERT INTO $db.test_table VALUES ('foo'), ('bar');" 2>&1 | grep -c "Not enough privileges") >= 1 )) && echo "OK" || echo "UNEXPECTED" +(( $(${CLICKHOUSE_CLIENT} --materialized_views_ignore_errors 1 --query "INSERT INTO $db.test_table VALUES ('foo'), ('bar');" 2>&1 | grep -c "Failed to push block to view") >= 1 )) && echo "OK" || echo "UNEXPECTED" + +${CLICKHOUSE_CLIENT} --query "GRANT INSERT ON $db.test_mv_data TO $user1" +${CLICKHOUSE_CLIENT} --query "GRANT SELECT ON $db.test_mv_data TO $user1" +${CLICKHOUSE_CLIENT} --query "INSERT INTO $db.test_table VALUES ('foo'), ('bar');" +${CLICKHOUSE_CLIENT} --user $user2 --query "SELECT count() FROM $db.test_mv_4" + +${CLICKHOUSE_CLIENT} --query "REVOKE SELECT ON $db.test_table FROM $user1" +(( $(${CLICKHOUSE_CLIENT} --user $user2 --query "SELECT * FROM $db.test_mv_4" 2>&1 | grep -c "Not enough privileges") >= 1 )) && echo "OK" || echo "UNEXPECTED" +(( $(${CLICKHOUSE_CLIENT} --query "INSERT INTO $db.test_table VALUES ('foo'), ('bar');" 2>&1 | grep -c "Not enough privileges") >= 1 )) && echo "OK" || echo "UNEXPECTED" + + +echo "===== TestGrants =====" +${CLICKHOUSE_CLIENT} --query "GRANT CREATE ON *.* TO $user1" +${CLICKHOUSE_CLIENT} --query "GRANT SELECT ON $db.test_table TO $user1, $user2" + +${CLICKHOUSE_CLIENT} --user $user1 --query " + CREATE VIEW $db.test_view_g_1 + DEFINER = CURRENT_USER SQL SECURITY DEFINER + AS SELECT * FROM $db.test_table; +" + +(( $(${CLICKHOUSE_CLIENT} --user $user1 --query " + CREATE VIEW $db.test_view_g_2 + DEFINER = $user2 + AS SELECT * FROM $db.test_table; +" 2>&1 | grep -c "Not enough privileges") >= 1 )) && echo "OK" || echo "UNEXPECTED" + +${CLICKHOUSE_CLIENT} --query "GRANT SET DEFINER ON $user2 TO $user1" + +${CLICKHOUSE_CLIENT} --user $user1 --query " + CREATE VIEW $db.test_view_g_2 + DEFINER = $user2 + AS SELECT * FROM $db.test_table; +" + +(( $(${CLICKHOUSE_CLIENT} --user $user1 --query " + CREATE VIEW $db.test_view_g_3 + SQL SECURITY NONE + AS SELECT * FROM $db.test_table; +" 2>&1 | grep -c "Not enough privileges") >= 1 )) && echo "OK" || echo "UNEXPECTED" + +${CLICKHOUSE_CLIENT} --query "GRANT SET DEFINER ON $user2 TO $user1" + + +echo "===== TestRowPolicy =====" +${CLICKHOUSE_CLIENT} --multiquery <= z TO $user2; + +INSERT INTO $db.test_row_t VALUES (1, 2), (1, 1), (2, 2), (3, 2), (4, 0); + +GRANT SELECT ON $db.test_view_row_1 to $user2; +EOF + +${CLICKHOUSE_CLIENT} --user $user2 --query "SELECT * FROM $db.test_view_row_1" + +${CLICKHOUSE_CLIENT} --multiquery <= z TO $user2; + +INSERT INTO $db.test_row_t2 VALUES (5, 6), (6, 5), (6, 6), (8, 7), (9, 9); + +GRANT SELECT ON $db.test_mv_row_2 to $user2; +EOF + +${CLICKHOUSE_CLIENT} --user $user2 --query "SELECT * FROM $db.test_mv_row_2" + + +${CLICKHOUSE_CLIENT} --query "DROP DATABASE IF EXISTS $db;" +${CLICKHOUSE_CLIENT} --query "DROP USER IF EXISTS $user1, $user2, $user3"; diff --git a/tests/queries/0_stateless/02885_arg_min_max_combinator.reference b/tests/queries/0_stateless/02885_arg_min_max_combinator.reference index c4e850c0b99..f7ae9e7d104 100644 --- a/tests/queries/0_stateless/02885_arg_min_max_combinator.reference +++ b/tests/queries/0_stateless/02885_arg_min_max_combinator.reference @@ -1,3 +1,4 @@ 200 295 200 245 200 290 +999 diff --git a/tests/queries/0_stateless/02885_arg_min_max_combinator.sql b/tests/queries/0_stateless/02885_arg_min_max_combinator.sql index 86ee73d8f3b..8502234acfc 100644 --- a/tests/queries/0_stateless/02885_arg_min_max_combinator.sql +++ b/tests/queries/0_stateless/02885_arg_min_max_combinator.sql @@ -1,3 +1,10 @@ select sumArgMin(number, number % 20), sumArgMax(number, number % 20) from numbers(100); select sumArgMin(number, toString(number % 20)), sumArgMax(number, toString(number % 20)) from numbers(100); select sumArgMinIf(number, number % 20, number % 2 = 0), sumArgMaxIf(number, number % 20, number % 2 = 0) from numbers(100); +select sumArgMin() from numbers(100); -- {serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH} +select sumArgMin(number) from numbers(100); -- {serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH} +-- Try passing a non comparable type, for example an AggregationState +select sumArgMin(number, unhex('0000000000000000')::AggregateFunction(sum, UInt64)) from numbers(100); -- {serverError ILLEGAL_TYPE_OF_ARGUMENT} + +-- ASAN (data leak) +SELECT sumArgMax(number, tuple(number, repeat('a', (10 * (number % 100))::Int32))) FROM numbers(1000); diff --git a/tests/queries/0_stateless/02887_mutations_subcolumns.reference b/tests/queries/0_stateless/02887_mutations_subcolumns.reference index c2d6cbbd225..1ccc83b48a3 100644 --- a/tests/queries/0_stateless/02887_mutations_subcolumns.reference +++ b/tests/queries/0_stateless/02887_mutations_subcolumns.reference @@ -5,6 +5,6 @@ 4 ttt 5 ttt 6 ttt -{"a":"1","obj":{"k1":1,"k2":null,"k3":null}} -{"a":"3","obj":{"k1":null,"k2":null,"k3":1}} -{"a":"1","obj":{"k1":1,"k2":null,"k3":null}} +1 [('k1',1)] +3 [('k3',1)] +1 [('k1',1)] diff --git a/tests/queries/0_stateless/02887_mutations_subcolumns.sql b/tests/queries/0_stateless/02887_mutations_subcolumns.sql index a01158e1b06..87b3009e929 100644 --- a/tests/queries/0_stateless/02887_mutations_subcolumns.sql +++ b/tests/queries/0_stateless/02887_mutations_subcolumns.sql @@ -40,9 +40,9 @@ INSERT INTO t_mutations_subcolumns VALUES (2, '{"k2": 1}'); INSERT INTO t_mutations_subcolumns VALUES (3, '{"k3": 1}'); ALTER TABLE t_mutations_subcolumns DELETE WHERE obj.k2 = 1; -SELECT * FROM t_mutations_subcolumns ORDER BY a FORMAT JSONEachRow; +SELECT a, arrayFilter(x -> not isNull(x.2), tupleToNameValuePairs(obj)) FROM t_mutations_subcolumns ORDER BY a; ALTER TABLE t_mutations_subcolumns DELETE WHERE isNull(obj.k1); -SELECT * FROM t_mutations_subcolumns ORDER BY a FORMAT JSONEachRow; +SELECT a, arrayFilter(x -> not isNull(x.2), tupleToNameValuePairs(obj)) FROM t_mutations_subcolumns ORDER BY a; DROP TABLE t_mutations_subcolumns; diff --git a/tests/queries/0_stateless/02890_describe_table_options.reference b/tests/queries/0_stateless/02890_describe_table_options.reference index 5d99df36bb4..ff58202ae49 100644 --- a/tests/queries/0_stateless/02890_describe_table_options.reference +++ b/tests/queries/0_stateless/02890_describe_table_options.reference @@ -2,237 +2,197 @@ SET describe_compact_output = 0, describe_include_virtual_columns = 0, describe_include_subcolumns = 0; DESCRIBE TABLE t_describe_options FORMAT PrettyCompactNoEscapes; -┌─name─┬─type─────────────────────────────┬─default_type─┬─default_expression─┬─comment──────┬─codec_expression─┬─ttl_expression─┠-│ id │ UInt64 │ │ │ index column │ │ │ -│ arr │ Array(UInt64) │ DEFAULT │ [10, 20] │ │ ZSTD(1) │ │ -│ t │ Tuple( - a String, - b UInt64) │ DEFAULT │ ('foo', 0) │ │ ZSTD(1) │ │ -└──────┴──────────────────────────────────┴──────────────┴────────────────────┴──────────────┴──────────────────┴────────────────┘ +┌─name─┬─type──────────────────────┬─default_type─┬─default_expression─┬─comment──────┬─codec_expression─┬─ttl_expression─┠+│ id │ UInt64 │ │ │ index column │ │ │ +│ arr │ Array(UInt64) │ DEFAULT │ [10, 20] │ │ ZSTD(1) │ │ +│ t │ Tuple(a String, b UInt64) │ DEFAULT │ ('foo', 0) │ │ ZSTD(1) │ │ +└──────┴───────────────────────────┴──────────────┴────────────────────┴──────────────┴──────────────────┴────────────────┘ DESCRIBE remote(default, currentDatabase(), t_describe_options) FORMAT PrettyCompactNoEscapes; -┌─name─┬─type─────────────────────────────┬─default_type─┬─default_expression─┬─comment──────┬─codec_expression─┬─ttl_expression─┠-│ id │ UInt64 │ │ │ index column │ │ │ -│ arr │ Array(UInt64) │ DEFAULT │ [10, 20] │ │ ZSTD(1) │ │ -│ t │ Tuple( - a String, - b UInt64) │ DEFAULT │ ('foo', 0) │ │ ZSTD(1) │ │ -└──────┴──────────────────────────────────┴──────────────┴────────────────────┴──────────────┴──────────────────┴────────────────┘ +┌─name─┬─type──────────────────────┬─default_type─┬─default_expression─┬─comment──────┬─codec_expression─┬─ttl_expression─┠+│ id │ UInt64 │ │ │ index column │ │ │ +│ arr │ Array(UInt64) │ DEFAULT │ [10, 20] │ │ ZSTD(1) │ │ +│ t │ Tuple(a String, b UInt64) │ DEFAULT │ ('foo', 0) │ │ ZSTD(1) │ │ +└──────┴───────────────────────────┴──────────────┴────────────────────┴──────────────┴──────────────────┴────────────────┘ SET describe_compact_output = 0, describe_include_virtual_columns = 0, describe_include_subcolumns = 1; DESCRIBE TABLE t_describe_options FORMAT PrettyCompactNoEscapes; -┌─name──────┬─type─────────────────────────────┬─default_type─┬─default_expression─┬─comment──────┬─codec_expression─┬─ttl_expression─┬─is_subcolumn─┠-│ id │ UInt64 │ │ │ index column │ │ │ 0 │ -│ arr │ Array(UInt64) │ DEFAULT │ [10, 20] │ │ ZSTD(1) │ │ 0 │ -│ t │ Tuple( - a String, - b UInt64) │ DEFAULT │ ('foo', 0) │ │ ZSTD(1) │ │ 0 │ -│ arr.size0 │ UInt64 │ │ │ │ │ │ 1 │ -│ t.a │ String │ │ │ │ ZSTD(1) │ │ 1 │ -│ t.b │ UInt64 │ │ │ │ ZSTD(1) │ │ 1 │ -└───────────┴──────────────────────────────────┴──────────────┴────────────────────┴──────────────┴──────────────────┴────────────────┴──────────────┘ +┌─name──────┬─type──────────────────────┬─default_type─┬─default_expression─┬─comment──────┬─codec_expression─┬─ttl_expression─┬─is_subcolumn─┠+│ id │ UInt64 │ │ │ index column │ │ │ 0 │ +│ arr │ Array(UInt64) │ DEFAULT │ [10, 20] │ │ ZSTD(1) │ │ 0 │ +│ t │ Tuple(a String, b UInt64) │ DEFAULT │ ('foo', 0) │ │ ZSTD(1) │ │ 0 │ +│ arr.size0 │ UInt64 │ │ │ │ │ │ 1 │ +│ t.a │ String │ │ │ │ ZSTD(1) │ │ 1 │ +│ t.b │ UInt64 │ │ │ │ ZSTD(1) │ │ 1 │ +└───────────┴───────────────────────────┴──────────────┴────────────────────┴──────────────┴──────────────────┴────────────────┴──────────────┘ DESCRIBE remote(default, currentDatabase(), t_describe_options) FORMAT PrettyCompactNoEscapes; -┌─name──────┬─type─────────────────────────────┬─default_type─┬─default_expression─┬─comment──────┬─codec_expression─┬─ttl_expression─┬─is_subcolumn─┠-│ id │ UInt64 │ │ │ index column │ │ │ 0 │ -│ arr │ Array(UInt64) │ DEFAULT │ [10, 20] │ │ ZSTD(1) │ │ 0 │ -│ t │ Tuple( - a String, - b UInt64) │ DEFAULT │ ('foo', 0) │ │ ZSTD(1) │ │ 0 │ -│ arr.size0 │ UInt64 │ │ │ │ │ │ 1 │ -│ t.a │ String │ │ │ │ ZSTD(1) │ │ 1 │ -│ t.b │ UInt64 │ │ │ │ ZSTD(1) │ │ 1 │ -└───────────┴──────────────────────────────────┴──────────────┴────────────────────┴──────────────┴──────────────────┴────────────────┴──────────────┘ +┌─name──────┬─type──────────────────────┬─default_type─┬─default_expression─┬─comment──────┬─codec_expression─┬─ttl_expression─┬─is_subcolumn─┠+│ id │ UInt64 │ │ │ index column │ │ │ 0 │ +│ arr │ Array(UInt64) │ DEFAULT │ [10, 20] │ │ ZSTD(1) │ │ 0 │ +│ t │ Tuple(a String, b UInt64) │ DEFAULT │ ('foo', 0) │ │ ZSTD(1) │ │ 0 │ +│ arr.size0 │ UInt64 │ │ │ │ │ │ 1 │ +│ t.a │ String │ │ │ │ ZSTD(1) │ │ 1 │ +│ t.b │ UInt64 │ │ │ │ ZSTD(1) │ │ 1 │ +└───────────┴───────────────────────────┴──────────────┴────────────────────┴──────────────┴──────────────────┴────────────────┴──────────────┘ SET describe_compact_output = 0, describe_include_virtual_columns = 1, describe_include_subcolumns = 0; DESCRIBE TABLE t_describe_options FORMAT PrettyCompactNoEscapes; -┌─name─────────────┬─type─────────────────────────────┬─default_type─┬─default_expression─┬─comment──────┬─codec_expression─┬─ttl_expression─┬─is_virtual─┠-│ id │ UInt64 │ │ │ index column │ │ │ 0 │ -│ arr │ Array(UInt64) │ DEFAULT │ [10, 20] │ │ ZSTD(1) │ │ 0 │ -│ t │ Tuple( - a String, - b UInt64) │ DEFAULT │ ('foo', 0) │ │ ZSTD(1) │ │ 0 │ -│ _part │ LowCardinality(String) │ │ │ │ │ │ 1 │ -│ _part_index │ UInt64 │ │ │ │ │ │ 1 │ -│ _part_uuid │ UUID │ │ │ │ │ │ 1 │ -│ _partition_id │ LowCardinality(String) │ │ │ │ │ │ 1 │ -│ _partition_value │ UInt8 │ │ │ │ │ │ 1 │ -│ _sample_factor │ Float64 │ │ │ │ │ │ 1 │ -│ _part_offset │ UInt64 │ │ │ │ │ │ 1 │ -│ _row_exists │ UInt8 │ │ │ │ │ │ 1 │ -│ _block_number │ UInt64 │ │ │ │ │ │ 1 │ -└──────────────────┴──────────────────────────────────┴──────────────┴────────────────────┴──────────────┴──────────────────┴────────────────┴────────────┘ +┌─name───────────┬─type──────────────────────┬─default_type─┬─default_expression─┬─comment─────────────────────────────────────────────────────────────────────────────────┬─codec_expression─┬─ttl_expression─┬─is_virtual─┠+│ id │ UInt64 │ │ │ index column │ │ │ 0 │ +│ arr │ Array(UInt64) │ DEFAULT │ [10, 20] │ │ ZSTD(1) │ │ 0 │ +│ t │ Tuple(a String, b UInt64) │ DEFAULT │ ('foo', 0) │ │ ZSTD(1) │ │ 0 │ +│ _part │ LowCardinality(String) │ │ │ Name of part │ │ │ 1 │ +│ _part_index │ UInt64 │ │ │ Sequential index of the part in the query result │ │ │ 1 │ +│ _part_uuid │ UUID │ │ │ Unique part identifier (if enabled MergeTree setting assign_part_uuids) │ │ │ 1 │ +│ _partition_id │ LowCardinality(String) │ │ │ Name of partition │ │ │ 1 │ +│ _sample_factor │ Float64 │ │ │ Sample factor (from the query) │ │ │ 1 │ +│ _part_offset │ UInt64 │ │ │ Number of row in the part │ │ │ 1 │ +│ _row_exists │ UInt8 │ │ │ Persisted mask created by lightweight delete that show whether row exists or is deleted │ │ │ 1 │ +│ _block_number │ UInt64 │ │ │ Persisted original number of block that was assigned at insert │ Delta, LZ4 │ │ 1 │ +└────────────────┴───────────────────────────┴──────────────┴────────────────────┴─────────────────────────────────────────────────────────────────────────────────────────┴──────────────────┴────────────────┴────────────┘ DESCRIBE remote(default, currentDatabase(), t_describe_options) FORMAT PrettyCompactNoEscapes; -┌─name───────────┬─type─────────────────────────────┬─default_type─┬─default_expression─┬─comment──────┬─codec_expression─┬─ttl_expression─┬─is_virtual─┠-│ id │ UInt64 │ │ │ index column │ │ │ 0 │ -│ arr │ Array(UInt64) │ DEFAULT │ [10, 20] │ │ ZSTD(1) │ │ 0 │ -│ t │ Tuple( - a String, - b UInt64) │ DEFAULT │ ('foo', 0) │ │ ZSTD(1) │ │ 0 │ -│ _table │ LowCardinality(String) │ │ │ │ │ │ 1 │ -│ _part │ LowCardinality(String) │ │ │ │ │ │ 1 │ -│ _part_index │ UInt64 │ │ │ │ │ │ 1 │ -│ _part_uuid │ UUID │ │ │ │ │ │ 1 │ -│ _partition_id │ LowCardinality(String) │ │ │ │ │ │ 1 │ -│ _sample_factor │ Float64 │ │ │ │ │ │ 1 │ -│ _part_offset │ UInt64 │ │ │ │ │ │ 1 │ -│ _row_exists │ UInt8 │ │ │ │ │ │ 1 │ -│ _block_number │ UInt64 │ │ │ │ │ │ 1 │ -│ _shard_num │ UInt32 │ │ │ │ │ │ 1 │ -└────────────────┴──────────────────────────────────┴──────────────┴────────────────────┴──────────────┴──────────────────┴────────────────┴────────────┘ +┌─name───────────┬─type──────────────────────┬─default_type─┬─default_expression─┬─comment─────────────────────────────────────────────────────────────────────────────────┬─codec_expression─┬─ttl_expression─┬─is_virtual─┠+│ id │ UInt64 │ │ │ index column │ │ │ 0 │ +│ arr │ Array(UInt64) │ DEFAULT │ [10, 20] │ │ ZSTD(1) │ │ 0 │ +│ t │ Tuple(a String, b UInt64) │ DEFAULT │ ('foo', 0) │ │ ZSTD(1) │ │ 0 │ +│ _part │ LowCardinality(String) │ │ │ Name of part │ │ │ 1 │ +│ _part_index │ UInt64 │ │ │ Sequential index of the part in the query result │ │ │ 1 │ +│ _part_uuid │ UUID │ │ │ Unique part identifier (if enabled MergeTree setting assign_part_uuids) │ │ │ 1 │ +│ _partition_id │ LowCardinality(String) │ │ │ Name of partition │ │ │ 1 │ +│ _sample_factor │ Float64 │ │ │ Sample factor (from the query) │ │ │ 1 │ +│ _part_offset │ UInt64 │ │ │ Number of row in the part │ │ │ 1 │ +│ _row_exists │ UInt8 │ │ │ Persisted mask created by lightweight delete that show whether row exists or is deleted │ │ │ 1 │ +│ _block_number │ UInt64 │ │ │ Persisted original number of block that was assigned at insert │ Delta, LZ4 │ │ 1 │ +│ _shard_num │ UInt32 │ │ │ Deprecated. Use function shardNum instead │ │ │ 1 │ +└────────────────┴───────────────────────────┴──────────────┴────────────────────┴─────────────────────────────────────────────────────────────────────────────────────────┴──────────────────┴────────────────┴────────────┘ SET describe_compact_output = 0, describe_include_virtual_columns = 1, describe_include_subcolumns = 1; DESCRIBE TABLE t_describe_options FORMAT PrettyCompactNoEscapes; -┌─name─────────────┬─type─────────────────────────────┬─default_type─┬─default_expression─┬─comment──────┬─codec_expression─┬─ttl_expression─┬─is_subcolumn─┬─is_virtual─┠-│ id │ UInt64 │ │ │ index column │ │ │ 0 │ 0 │ -│ arr │ Array(UInt64) │ DEFAULT │ [10, 20] │ │ ZSTD(1) │ │ 0 │ 0 │ -│ t │ Tuple( - a String, - b UInt64) │ DEFAULT │ ('foo', 0) │ │ ZSTD(1) │ │ 0 │ 0 │ -│ _part │ LowCardinality(String) │ │ │ │ │ │ 0 │ 1 │ -│ _part_index │ UInt64 │ │ │ │ │ │ 0 │ 1 │ -│ _part_uuid │ UUID │ │ │ │ │ │ 0 │ 1 │ -│ _partition_id │ LowCardinality(String) │ │ │ │ │ │ 0 │ 1 │ -│ _partition_value │ UInt8 │ │ │ │ │ │ 0 │ 1 │ -│ _sample_factor │ Float64 │ │ │ │ │ │ 0 │ 1 │ -│ _part_offset │ UInt64 │ │ │ │ │ │ 0 │ 1 │ -│ _row_exists │ UInt8 │ │ │ │ │ │ 0 │ 1 │ -│ _block_number │ UInt64 │ │ │ │ │ │ 0 │ 1 │ -│ arr.size0 │ UInt64 │ │ │ │ │ │ 1 │ 0 │ -│ t.a │ String │ │ │ │ ZSTD(1) │ │ 1 │ 0 │ -│ t.b │ UInt64 │ │ │ │ ZSTD(1) │ │ 1 │ 0 │ -└──────────────────┴──────────────────────────────────┴──────────────┴────────────────────┴──────────────┴──────────────────┴────────────────┴──────────────┴────────────┘ +┌─name───────────┬─type──────────────────────┬─default_type─┬─default_expression─┬─comment─────────────────────────────────────────────────────────────────────────────────┬─codec_expression─┬─ttl_expression─┬─is_subcolumn─┬─is_virtual─┠+│ id │ UInt64 │ │ │ index column │ │ │ 0 │ 0 │ +│ arr │ Array(UInt64) │ DEFAULT │ [10, 20] │ │ ZSTD(1) │ │ 0 │ 0 │ +│ t │ Tuple(a String, b UInt64) │ DEFAULT │ ('foo', 0) │ │ ZSTD(1) │ │ 0 │ 0 │ +│ _part │ LowCardinality(String) │ │ │ Name of part │ │ │ 0 │ 1 │ +│ _part_index │ UInt64 │ │ │ Sequential index of the part in the query result │ │ │ 0 │ 1 │ +│ _part_uuid │ UUID │ │ │ Unique part identifier (if enabled MergeTree setting assign_part_uuids) │ │ │ 0 │ 1 │ +│ _partition_id │ LowCardinality(String) │ │ │ Name of partition │ │ │ 0 │ 1 │ +│ _sample_factor │ Float64 │ │ │ Sample factor (from the query) │ │ │ 0 │ 1 │ +│ _part_offset │ UInt64 │ │ │ Number of row in the part │ │ │ 0 │ 1 │ +│ _row_exists │ UInt8 │ │ │ Persisted mask created by lightweight delete that show whether row exists or is deleted │ │ │ 0 │ 1 │ +│ _block_number │ UInt64 │ │ │ Persisted original number of block that was assigned at insert │ Delta, LZ4 │ │ 0 │ 1 │ +│ arr.size0 │ UInt64 │ │ │ │ │ │ 1 │ 0 │ +│ t.a │ String │ │ │ │ ZSTD(1) │ │ 1 │ 0 │ +│ t.b │ UInt64 │ │ │ │ ZSTD(1) │ │ 1 │ 0 │ +└────────────────┴───────────────────────────┴──────────────┴────────────────────┴─────────────────────────────────────────────────────────────────────────────────────────┴──────────────────┴────────────────┴──────────────┴────────────┘ DESCRIBE remote(default, currentDatabase(), t_describe_options) FORMAT PrettyCompactNoEscapes; -┌─name───────────┬─type─────────────────────────────┬─default_type─┬─default_expression─┬─comment──────┬─codec_expression─┬─ttl_expression─┬─is_subcolumn─┬─is_virtual─┠-│ id │ UInt64 │ │ │ index column │ │ │ 0 │ 0 │ -│ arr │ Array(UInt64) │ DEFAULT │ [10, 20] │ │ ZSTD(1) │ │ 0 │ 0 │ -│ t │ Tuple( - a String, - b UInt64) │ DEFAULT │ ('foo', 0) │ │ ZSTD(1) │ │ 0 │ 0 │ -│ _table │ LowCardinality(String) │ │ │ │ │ │ 0 │ 1 │ -│ _part │ LowCardinality(String) │ │ │ │ │ │ 0 │ 1 │ -│ _part_index │ UInt64 │ │ │ │ │ │ 0 │ 1 │ -│ _part_uuid │ UUID │ │ │ │ │ │ 0 │ 1 │ -│ _partition_id │ LowCardinality(String) │ │ │ │ │ │ 0 │ 1 │ -│ _sample_factor │ Float64 │ │ │ │ │ │ 0 │ 1 │ -│ _part_offset │ UInt64 │ │ │ │ │ │ 0 │ 1 │ -│ _row_exists │ UInt8 │ │ │ │ │ │ 0 │ 1 │ -│ _block_number │ UInt64 │ │ │ │ │ │ 0 │ 1 │ -│ _shard_num │ UInt32 │ │ │ │ │ │ 0 │ 1 │ -│ arr.size0 │ UInt64 │ │ │ │ │ │ 1 │ 0 │ -│ t.a │ String │ │ │ │ ZSTD(1) │ │ 1 │ 0 │ -│ t.b │ UInt64 │ │ │ │ ZSTD(1) │ │ 1 │ 0 │ -└────────────────┴──────────────────────────────────┴──────────────┴────────────────────┴──────────────┴──────────────────┴────────────────┴──────────────┴────────────┘ +┌─name───────────┬─type──────────────────────┬─default_type─┬─default_expression─┬─comment─────────────────────────────────────────────────────────────────────────────────┬─codec_expression─┬─ttl_expression─┬─is_subcolumn─┬─is_virtual─┠+│ id │ UInt64 │ │ │ index column │ │ │ 0 │ 0 │ +│ arr │ Array(UInt64) │ DEFAULT │ [10, 20] │ │ ZSTD(1) │ │ 0 │ 0 │ +│ t │ Tuple(a String, b UInt64) │ DEFAULT │ ('foo', 0) │ │ ZSTD(1) │ │ 0 │ 0 │ +│ _part │ LowCardinality(String) │ │ │ Name of part │ │ │ 0 │ 1 │ +│ _part_index │ UInt64 │ │ │ Sequential index of the part in the query result │ │ │ 0 │ 1 │ +│ _part_uuid │ UUID │ │ │ Unique part identifier (if enabled MergeTree setting assign_part_uuids) │ │ │ 0 │ 1 │ +│ _partition_id │ LowCardinality(String) │ │ │ Name of partition │ │ │ 0 │ 1 │ +│ _sample_factor │ Float64 │ │ │ Sample factor (from the query) │ │ │ 0 │ 1 │ +│ _part_offset │ UInt64 │ │ │ Number of row in the part │ │ │ 0 │ 1 │ +│ _row_exists │ UInt8 │ │ │ Persisted mask created by lightweight delete that show whether row exists or is deleted │ │ │ 0 │ 1 │ +│ _block_number │ UInt64 │ │ │ Persisted original number of block that was assigned at insert │ Delta, LZ4 │ │ 0 │ 1 │ +│ _shard_num │ UInt32 │ │ │ Deprecated. Use function shardNum instead │ │ │ 0 │ 1 │ +│ arr.size0 │ UInt64 │ │ │ │ │ │ 1 │ 0 │ +│ t.a │ String │ │ │ │ ZSTD(1) │ │ 1 │ 0 │ +│ t.b │ UInt64 │ │ │ │ ZSTD(1) │ │ 1 │ 0 │ +└────────────────┴───────────────────────────┴──────────────┴────────────────────┴─────────────────────────────────────────────────────────────────────────────────────────┴──────────────────┴────────────────┴──────────────┴────────────┘ SET describe_compact_output = 1, describe_include_virtual_columns = 0, describe_include_subcolumns = 0; DESCRIBE TABLE t_describe_options FORMAT PrettyCompactNoEscapes; -┌─name─┬─type─────────────────────────────┠-│ id │ UInt64 │ -│ arr │ Array(UInt64) │ -│ t │ Tuple( - a String, - b UInt64) │ -└──────┴──────────────────────────────────┘ +┌─name─┬─type──────────────────────┠+│ id │ UInt64 │ +│ arr │ Array(UInt64) │ +│ t │ Tuple(a String, b UInt64) │ +└──────┴───────────────────────────┘ DESCRIBE remote(default, currentDatabase(), t_describe_options) FORMAT PrettyCompactNoEscapes; -┌─name─┬─type─────────────────────────────┠-│ id │ UInt64 │ -│ arr │ Array(UInt64) │ -│ t │ Tuple( - a String, - b UInt64) │ -└──────┴──────────────────────────────────┘ +┌─name─┬─type──────────────────────┠+│ id │ UInt64 │ +│ arr │ Array(UInt64) │ +│ t │ Tuple(a String, b UInt64) │ +└──────┴───────────────────────────┘ SET describe_compact_output = 1, describe_include_virtual_columns = 0, describe_include_subcolumns = 1; DESCRIBE TABLE t_describe_options FORMAT PrettyCompactNoEscapes; -┌─name──────┬─type─────────────────────────────┬─is_subcolumn─┠-│ id │ UInt64 │ 0 │ -│ arr │ Array(UInt64) │ 0 │ -│ t │ Tuple( - a String, - b UInt64) │ 0 │ -│ arr.size0 │ UInt64 │ 1 │ -│ t.a │ String │ 1 │ -│ t.b │ UInt64 │ 1 │ -└───────────┴──────────────────────────────────┴──────────────┘ +┌─name──────┬─type──────────────────────┬─is_subcolumn─┠+│ id │ UInt64 │ 0 │ +│ arr │ Array(UInt64) │ 0 │ +│ t │ Tuple(a String, b UInt64) │ 0 │ +│ arr.size0 │ UInt64 │ 1 │ +│ t.a │ String │ 1 │ +│ t.b │ UInt64 │ 1 │ +└───────────┴───────────────────────────┴──────────────┘ DESCRIBE remote(default, currentDatabase(), t_describe_options) FORMAT PrettyCompactNoEscapes; -┌─name──────┬─type─────────────────────────────┬─is_subcolumn─┠-│ id │ UInt64 │ 0 │ -│ arr │ Array(UInt64) │ 0 │ -│ t │ Tuple( - a String, - b UInt64) │ 0 │ -│ arr.size0 │ UInt64 │ 1 │ -│ t.a │ String │ 1 │ -│ t.b │ UInt64 │ 1 │ -└───────────┴──────────────────────────────────┴──────────────┘ +┌─name──────┬─type──────────────────────┬─is_subcolumn─┠+│ id │ UInt64 │ 0 │ +│ arr │ Array(UInt64) │ 0 │ +│ t │ Tuple(a String, b UInt64) │ 0 │ +│ arr.size0 │ UInt64 │ 1 │ +│ t.a │ String │ 1 │ +│ t.b │ UInt64 │ 1 │ +└───────────┴───────────────────────────┴──────────────┘ SET describe_compact_output = 1, describe_include_virtual_columns = 1, describe_include_subcolumns = 0; DESCRIBE TABLE t_describe_options FORMAT PrettyCompactNoEscapes; -┌─name─────────────┬─type─────────────────────────────┬─is_virtual─┠-│ id │ UInt64 │ 0 │ -│ arr │ Array(UInt64) │ 0 │ -│ t │ Tuple( - a String, - b UInt64) │ 0 │ -│ _part │ LowCardinality(String) │ 1 │ -│ _part_index │ UInt64 │ 1 │ -│ _part_uuid │ UUID │ 1 │ -│ _partition_id │ LowCardinality(String) │ 1 │ -│ _partition_value │ UInt8 │ 1 │ -│ _sample_factor │ Float64 │ 1 │ -│ _part_offset │ UInt64 │ 1 │ -│ _row_exists │ UInt8 │ 1 │ -│ _block_number │ UInt64 │ 1 │ -└──────────────────┴──────────────────────────────────┴────────────┘ +┌─name───────────┬─type──────────────────────┬─is_virtual─┠+│ id │ UInt64 │ 0 │ +│ arr │ Array(UInt64) │ 0 │ +│ t │ Tuple(a String, b UInt64) │ 0 │ +│ _part │ LowCardinality(String) │ 1 │ +│ _part_index │ UInt64 │ 1 │ +│ _part_uuid │ UUID │ 1 │ +│ _partition_id │ LowCardinality(String) │ 1 │ +│ _sample_factor │ Float64 │ 1 │ +│ _part_offset │ UInt64 │ 1 │ +│ _row_exists │ UInt8 │ 1 │ +│ _block_number │ UInt64 │ 1 │ +└────────────────┴───────────────────────────┴────────────┘ DESCRIBE remote(default, currentDatabase(), t_describe_options) FORMAT PrettyCompactNoEscapes; -┌─name───────────┬─type─────────────────────────────┬─is_virtual─┠-│ id │ UInt64 │ 0 │ -│ arr │ Array(UInt64) │ 0 │ -│ t │ Tuple( - a String, - b UInt64) │ 0 │ -│ _table │ LowCardinality(String) │ 1 │ -│ _part │ LowCardinality(String) │ 1 │ -│ _part_index │ UInt64 │ 1 │ -│ _part_uuid │ UUID │ 1 │ -│ _partition_id │ LowCardinality(String) │ 1 │ -│ _sample_factor │ Float64 │ 1 │ -│ _part_offset │ UInt64 │ 1 │ -│ _row_exists │ UInt8 │ 1 │ -│ _block_number │ UInt64 │ 1 │ -│ _shard_num │ UInt32 │ 1 │ -└────────────────┴──────────────────────────────────┴────────────┘ +┌─name───────────┬─type──────────────────────┬─is_virtual─┠+│ id │ UInt64 │ 0 │ +│ arr │ Array(UInt64) │ 0 │ +│ t │ Tuple(a String, b UInt64) │ 0 │ +│ _part │ LowCardinality(String) │ 1 │ +│ _part_index │ UInt64 │ 1 │ +│ _part_uuid │ UUID │ 1 │ +│ _partition_id │ LowCardinality(String) │ 1 │ +│ _sample_factor │ Float64 │ 1 │ +│ _part_offset │ UInt64 │ 1 │ +│ _row_exists │ UInt8 │ 1 │ +│ _block_number │ UInt64 │ 1 │ +│ _shard_num │ UInt32 │ 1 │ +└────────────────┴───────────────────────────┴────────────┘ SET describe_compact_output = 1, describe_include_virtual_columns = 1, describe_include_subcolumns = 1; DESCRIBE TABLE t_describe_options FORMAT PrettyCompactNoEscapes; -┌─name─────────────┬─type─────────────────────────────┬─is_subcolumn─┬─is_virtual─┠-│ id │ UInt64 │ 0 │ 0 │ -│ arr │ Array(UInt64) │ 0 │ 0 │ -│ t │ Tuple( - a String, - b UInt64) │ 0 │ 0 │ -│ _part │ LowCardinality(String) │ 0 │ 1 │ -│ _part_index │ UInt64 │ 0 │ 1 │ -│ _part_uuid │ UUID │ 0 │ 1 │ -│ _partition_id │ LowCardinality(String) │ 0 │ 1 │ -│ _partition_value │ UInt8 │ 0 │ 1 │ -│ _sample_factor │ Float64 │ 0 │ 1 │ -│ _part_offset │ UInt64 │ 0 │ 1 │ -│ _row_exists │ UInt8 │ 0 │ 1 │ -│ _block_number │ UInt64 │ 0 │ 1 │ -│ arr.size0 │ UInt64 │ 1 │ 0 │ -│ t.a │ String │ 1 │ 0 │ -│ t.b │ UInt64 │ 1 │ 0 │ -└──────────────────┴──────────────────────────────────┴──────────────┴────────────┘ +┌─name───────────┬─type──────────────────────┬─is_subcolumn─┬─is_virtual─┠+│ id │ UInt64 │ 0 │ 0 │ +│ arr │ Array(UInt64) │ 0 │ 0 │ +│ t │ Tuple(a String, b UInt64) │ 0 │ 0 │ +│ _part │ LowCardinality(String) │ 0 │ 1 │ +│ _part_index │ UInt64 │ 0 │ 1 │ +│ _part_uuid │ UUID │ 0 │ 1 │ +│ _partition_id │ LowCardinality(String) │ 0 │ 1 │ +│ _sample_factor │ Float64 │ 0 │ 1 │ +│ _part_offset │ UInt64 │ 0 │ 1 │ +│ _row_exists │ UInt8 │ 0 │ 1 │ +│ _block_number │ UInt64 │ 0 │ 1 │ +│ arr.size0 │ UInt64 │ 1 │ 0 │ +│ t.a │ String │ 1 │ 0 │ +│ t.b │ UInt64 │ 1 │ 0 │ +└────────────────┴───────────────────────────┴──────────────┴────────────┘ DESCRIBE remote(default, currentDatabase(), t_describe_options) FORMAT PrettyCompactNoEscapes; -┌─name───────────┬─type─────────────────────────────┬─is_subcolumn─┬─is_virtual─┠-│ id │ UInt64 │ 0 │ 0 │ -│ arr │ Array(UInt64) │ 0 │ 0 │ -│ t │ Tuple( - a String, - b UInt64) │ 0 │ 0 │ -│ _table │ LowCardinality(String) │ 0 │ 1 │ -│ _part │ LowCardinality(String) │ 0 │ 1 │ -│ _part_index │ UInt64 │ 0 │ 1 │ -│ _part_uuid │ UUID │ 0 │ 1 │ -│ _partition_id │ LowCardinality(String) │ 0 │ 1 │ -│ _sample_factor │ Float64 │ 0 │ 1 │ -│ _part_offset │ UInt64 │ 0 │ 1 │ -│ _row_exists │ UInt8 │ 0 │ 1 │ -│ _block_number │ UInt64 │ 0 │ 1 │ -│ _shard_num │ UInt32 │ 0 │ 1 │ -│ arr.size0 │ UInt64 │ 1 │ 0 │ -│ t.a │ String │ 1 │ 0 │ -│ t.b │ UInt64 │ 1 │ 0 │ -└────────────────┴──────────────────────────────────┴──────────────┴────────────┘ +┌─name───────────┬─type──────────────────────┬─is_subcolumn─┬─is_virtual─┠+│ id │ UInt64 │ 0 │ 0 │ +│ arr │ Array(UInt64) │ 0 │ 0 │ +│ t │ Tuple(a String, b UInt64) │ 0 │ 0 │ +│ _part │ LowCardinality(String) │ 0 │ 1 │ +│ _part_index │ UInt64 │ 0 │ 1 │ +│ _part_uuid │ UUID │ 0 │ 1 │ +│ _partition_id │ LowCardinality(String) │ 0 │ 1 │ +│ _sample_factor │ Float64 │ 0 │ 1 │ +│ _part_offset │ UInt64 │ 0 │ 1 │ +│ _row_exists │ UInt8 │ 0 │ 1 │ +│ _block_number │ UInt64 │ 0 │ 1 │ +│ _shard_num │ UInt32 │ 0 │ 1 │ +│ arr.size0 │ UInt64 │ 1 │ 0 │ +│ t.a │ String │ 1 │ 0 │ +│ t.b │ UInt64 │ 1 │ 0 │ +└────────────────┴───────────────────────────┴──────────────┴────────────┘ diff --git a/tests/queries/0_stateless/02890_describe_table_options.sql b/tests/queries/0_stateless/02890_describe_table_options.sql index 236100148ff..63806c7ee3d 100644 --- a/tests/queries/0_stateless/02890_describe_table_options.sql +++ b/tests/queries/0_stateless/02890_describe_table_options.sql @@ -1,5 +1,7 @@ DROP TABLE IF EXISTS t_describe_options; +SET print_pretty_type_names = 0; + CREATE TABLE t_describe_options ( id UInt64 COMMENT 'index column', arr Array(UInt64) DEFAULT [10, 20] CODEC(ZSTD), diff --git a/tests/queries/0_stateless/02899_use_default_format_on_http_exception.reference b/tests/queries/0_stateless/02899_use_default_format_on_http_exception.reference index a943df06764..624bcf80c71 100644 --- a/tests/queries/0_stateless/02899_use_default_format_on_http_exception.reference +++ b/tests/queries/0_stateless/02899_use_default_format_on_http_exception.reference @@ -1,25 +1,47 @@ +SELECT missing column WITH default_format=JSON +404NotFound +Content-Type:application/json;charset=UTF-8 +X-ClickHouse-Ex---tion-Code:47 +"exception":"Code:47. + INSERT WITH default_format=JSON -Content-Type:application/json;charset=UTF-8 -"exception":"Code:62. +501NotImplemented +Content-Type:application/json;charset=UTF-8 +X-ClickHouse-Ex---tion-Code:48 +"exception":"Code:48. INSERT WITH default_format=XML -Content-Type:application/xml;charset=UTF-8 -Code:62.DB::Ex---tion: +501NotImplemented +Content-Type:application/xml;charset=UTF-8 +X-ClickHouse-Ex---tion-Code:48 +Code:48.DB::Ex---tion: INSERT WITH default_format=BADFORMAT -Content-Type:text/plain;charset=UTF-8 -X-ClickHouse-Ex---tion-Code:62 -Code:62.DB::Ex---tion: +501NotImplemented +Content-Type:text/plain;charset=UTF-8 +X-ClickHouse-Ex---tion-Code:48 +Code:48.DB::Ex---tion: + +SELECT missing column WITH X-ClickHouse-Format: JSON +404NotFound +Content-Type:application/json;charset=UTF-8 +X-ClickHouse-Ex---tion-Code:47 +"exception":"Code:47. INSERT WITH X-ClickHouse-Format: JSON -Content-Type:application/json;charset=UTF-8 -"exception":"Code:62. +501NotImplemented +Content-Type:application/json;charset=UTF-8 +X-ClickHouse-Ex---tion-Code:48 +"exception":"Code:48. INSERT WITH X-ClickHouse-Format: XML -Content-Type:application/xml;charset=UTF-8 -Code:62.DB::Ex---tion: +501NotImplemented +Content-Type:application/xml;charset=UTF-8 +X-ClickHouse-Ex---tion-Code:48 +Code:48.DB::Ex---tion: INSERT WITH X-ClickHouse-Format: BADFORMAT -Content-Type:text/plain;charset=UTF-8 -X-ClickHouse-Ex---tion-Code:62 -Code:62.DB::Ex---tion: +501NotImplemented +Content-Type:text/plain;charset=UTF-8 +X-ClickHouse-Ex---tion-Code:48 +Code:48.DB::Ex---tion: diff --git a/tests/queries/0_stateless/02899_use_default_format_on_http_exception.sh b/tests/queries/0_stateless/02899_use_default_format_on_http_exception.sh index f92ab7db4fb..5e91fa13e91 100755 --- a/tests/queries/0_stateless/02899_use_default_format_on_http_exception.sh +++ b/tests/queries/0_stateless/02899_use_default_format_on_http_exception.sh @@ -6,28 +6,44 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) CH_URL="$CLICKHOUSE_URL&http_write_exception_in_output_format=1" +echo "SELECT missing column WITH default_format=JSON" +echo "SELECT x FROM system.numbers LIMIT 1;"\ + | ${CLICKHOUSE_CURL} -sS "${CH_URL}&default_format=JSON" -i --data-binary @- \ + | grep 'HTTP/1.1\|xception\|Content-Type' | sed 's/Exception/Ex---tion/;s/HTTP\/1.1//;s/\r//' | awk '{ print $1 $2 $3 }' +echo "" echo "INSERT WITH default_format=JSON" echo "INSERT INTO system.numbers Select * from numbers(10);" \ - | ${CLICKHOUSE_CURL} -sS "${CH_URL}&default_format=JSON" -i | grep 'xception\|Content-Type' | sed 's/Exception/Ex---tion/' | awk '{ print $1 $2 $3 }' + | ${CLICKHOUSE_CURL} -sS "${CH_URL}&default_format=JSON" -i --data-binary @- \ + | grep 'HTTP/1.1\|xception\|Content-Type' | sed 's/Exception/Ex---tion/;s/HTTP\/1.1//;s/\r//' | awk '{ print $1 $2 $3 }' echo "" echo "INSERT WITH default_format=XML" echo "INSERT INTO system.numbers Select * from numbers(10);" \ - | ${CLICKHOUSE_CURL} -sS "${CH_URL}&default_format=XML" -i | grep 'xception\|Content-Type' | sed 's/Exception/Ex---tion/' | awk '{ print $1 $2 $3 }' + | ${CLICKHOUSE_CURL} -sS "${CH_URL}&default_format=XML" -i --data-binary @- \ + | grep 'HTTP/1.1\|xception\|Content-Type' | sed 's/Exception/Ex---tion/;s/HTTP\/1.1//;s/\r//' | awk '{ print $1 $2 $3 }' echo "" echo "INSERT WITH default_format=BADFORMAT" echo "INSERT INTO system.numbers Select * from numbers(10);" \ - | ${CLICKHOUSE_CURL} -sS "${CH_URL}&default_format=BADFORMAT" -i | grep 'xception\|Content-Type' | sed 's/Exception/Ex---tion/' | awk '{ print $1 $2 $3 }' + | ${CLICKHOUSE_CURL} -sS "${CH_URL}&default_format=BADFORMAT" -i --data-binary @- \ + | grep 'HTTP/1.1\|xception\|Content-Type' | sed 's/Exception/Ex---tion/;s/HTTP\/1.1//;s/\r//' | awk '{ print $1 $2 $3 }' +echo "" +echo "SELECT missing column WITH X-ClickHouse-Format: JSON" +echo "SELECT x FROM system.numbers LIMIT 1;"\ + | ${CLICKHOUSE_CURL} -sS "${CH_URL}" -H 'X-ClickHouse-Format: JSON' -i --data-binary @- \ + | grep 'HTTP/1.1\|xception\|Content-Type' | sed 's/Exception/Ex---tion/;s/HTTP\/1.1//;s/\r//' | awk '{ print $1 $2 $3 }' echo "" echo "INSERT WITH X-ClickHouse-Format: JSON" echo "INSERT INTO system.numbers Select * from numbers(10);" \ - | ${CLICKHOUSE_CURL} -sS "${CH_URL}" -H 'X-ClickHouse-Format: JSON' -i | grep 'xception\|Content-Type' | sed 's/Exception/Ex---tion/' | awk '{ print $1 $2 $3 }' + | ${CLICKHOUSE_CURL} -sS "${CH_URL}" -H 'X-ClickHouse-Format: JSON' -i --data-binary @- \ + | grep 'HTTP/1.1\|xception\|Content-Type' | sed 's/Exception/Ex---tion/;s/HTTP\/1.1//;s/\r//' | awk '{ print $1 $2 $3 }' echo "" echo "INSERT WITH X-ClickHouse-Format: XML" echo "INSERT INTO system.numbers Select * from numbers(10);" \ - | ${CLICKHOUSE_CURL} -sS "${CH_URL}" -H 'X-ClickHouse-Format: XML' -i | grep 'xception\|Content-Type' | sed 's/Exception/Ex---tion/' | awk '{ print $1 $2 $3 }' + | ${CLICKHOUSE_CURL} -sS "${CH_URL}" -H 'X-ClickHouse-Format: XML' -i --data-binary @- \ + | grep 'HTTP/1.1\|xception\|Content-Type' | sed 's/Exception/Ex---tion/;s/HTTP\/1.1//;s/\r//' | awk '{ print $1 $2 $3 }' echo "" echo "INSERT WITH X-ClickHouse-Format: BADFORMAT" echo "INSERT INTO system.numbers Select * from numbers(10);" \ - | ${CLICKHOUSE_CURL} -sS "${CH_URL}" -H 'X-ClickHouse-Format: BADFORMAT' -i | grep 'xception\|Content-Type' | sed 's/Exception/Ex---tion/' | awk '{ print $1 $2 $3 }' + | ${CLICKHOUSE_CURL} -sS "${CH_URL}" -H 'X-ClickHouse-Format: BADFORMAT' -i --data-binary @- \ + | grep 'HTTP/1.1\|xception\|Content-Type' | sed 's/Exception/Ex---tion/;s/HTTP\/1.1//;s/\r//' | awk '{ print $1 $2 $3 }' diff --git a/tests/queries/0_stateless/02900_union_schema_inference_mode.sh b/tests/queries/0_stateless/02900_union_schema_inference_mode.sh index dc0dd8ae1f4..a0fdb5276e0 100755 --- a/tests/queries/0_stateless/02900_union_schema_inference_mode.sh +++ b/tests/queries/0_stateless/02900_union_schema_inference_mode.sh @@ -39,13 +39,13 @@ desc file('$CLICKHOUSE_TEST_UNIQUE_NAME/archive.tar :: data{1,2,3}.jsonl'); " echo 'Error' > $CLICKHOUSE_TEST_UNIQUE_NAME/data4.jsonl -$CLICKHOUSE_LOCAL -q "desc file('$CLICKHOUSE_TEST_UNIQUE_NAME/data{1,2,3,4}.jsonl') settings schema_inference_mode='union'" 2>&1 | grep -c -F "Cannot extract table structure" +$CLICKHOUSE_LOCAL -q "desc file('$CLICKHOUSE_TEST_UNIQUE_NAME/data{1,2,3,4}.jsonl') settings schema_inference_mode='union'" 2>&1 | grep -c -F "CANNOT_EXTRACT_TABLE_STRUCTURE" $CLICKHOUSE_LOCAL -nm -q " set schema_inference_mode = 'union'; desc file('$CLICKHOUSE_TEST_UNIQUE_NAME/data{2,3}.jsonl'); desc file('$CLICKHOUSE_TEST_UNIQUE_NAME/data{1,2,3,4}.jsonl'); -" 2>&1 | grep -c -F "Cannot extract table structure" +" 2>&1 | grep -c -F "CANNOT_EXTRACT_TABLE_STRUCTURE" echo 42 > $CLICKHOUSE_TEST_UNIQUE_NAME/data1.csv echo 42, 43 > $CLICKHOUSE_TEST_UNIQUE_NAME/data2.csv diff --git a/tests/queries/0_stateless/02908_many_requests_to_system_replicas.reference b/tests/queries/0_stateless/02908_many_requests_to_system_replicas.reference index af0e50ec332..f1ca07ef408 100644 --- a/tests/queries/0_stateless/02908_many_requests_to_system_replicas.reference +++ b/tests/queries/0_stateless/02908_many_requests_to_system_replicas.reference @@ -1,5 +1,5 @@ Creating 300 tables -Making making 200 requests to system.replicas +Making 200 requests to system.replicas Query system.replicas while waiting for other concurrent requests to finish 0 900 diff --git a/tests/queries/0_stateless/02908_many_requests_to_system_replicas.sh b/tests/queries/0_stateless/02908_many_requests_to_system_replicas.sh index f93175529c0..2c57545e603 100755 --- a/tests/queries/0_stateless/02908_many_requests_to_system_replicas.sh +++ b/tests/queries/0_stateless/02908_many_requests_to_system_replicas.sh @@ -12,38 +12,50 @@ CONCURRENCY=200 echo "Creating $NUM_TABLES tables" +function get_done_or_die_trying() +{ + # Sometimes curl produces errors like 'Recv failure: Connection reset by peer' and fails test, let's add a little bit of retries + for _ in $(seq 1 10) + do + curl "$CLICKHOUSE_URL" --silent --fail --show-error --data "$1" &>/dev/null && return + done + + echo "Cannot successfully make request" + exit 1 +} + function init_table() { set -e i=$1 - curl $CLICKHOUSE_URL --silent --fail --show-error --data "CREATE TABLE test_02908_r1_$i (a UInt64) ENGINE=ReplicatedMergeTree('/02908/{database}/test_$i', 'r1') ORDER BY tuple()" 2>&1 - curl $CLICKHOUSE_URL --silent --fail --show-error --data "CREATE TABLE test_02908_r2_$i (a UInt64) ENGINE=ReplicatedMergeTree('/02908/{database}/test_$i', 'r2') ORDER BY tuple()" 2>&1 - curl $CLICKHOUSE_URL --silent --fail --show-error --data "CREATE TABLE test_02908_r3_$i (a UInt64) ENGINE=ReplicatedMergeTree('/02908/{database}/test_$i', 'r3') ORDER BY tuple()" 2>&1 + get_done_or_die_trying "CREATE TABLE test_02908_r1_$i (a UInt64) ENGINE=ReplicatedMergeTree('/02908/{database}/test_$i', 'r1') ORDER BY tuple()" + get_done_or_die_trying "CREATE TABLE test_02908_r2_$i (a UInt64) ENGINE=ReplicatedMergeTree('/02908/{database}/test_$i', 'r2') ORDER BY tuple()" + get_done_or_die_trying "CREATE TABLE test_02908_r3_$i (a UInt64) ENGINE=ReplicatedMergeTree('/02908/{database}/test_$i', 'r3') ORDER BY tuple()" - curl $CLICKHOUSE_URL --silent --fail --show-error --data "INSERT INTO test_02908_r1_$i SELECT rand64() FROM numbers(5);" 2>&1 + get_done_or_die_trying "INSERT INTO test_02908_r1_$i SELECT rand64() FROM numbers(5);" } export init_table; -for i in `seq 1 $NUM_TABLES`; +for i in $(seq 1 $NUM_TABLES) do - init_table $i & + init_table "$i" & done wait; -echo "Making making $CONCURRENCY requests to system.replicas" +echo "Making $CONCURRENCY requests to system.replicas" -for i in `seq 1 $CONCURRENCY`; +for i in $(seq 1 $CONCURRENCY) do - curl $CLICKHOUSE_URL --silent --fail --show-error --data "SELECT * FROM system.replicas WHERE database=currentDatabase() FORMAT Null;" 2>&1 || echo "query $i failed" & + curl "$CLICKHOUSE_URL" --silent --fail --show-error --data "SELECT * FROM system.replicas WHERE database=currentDatabase() FORMAT Null;" 2>&1 || echo "query $i failed" & done echo "Query system.replicas while waiting for other concurrent requests to finish" # lost_part_count column is read from ZooKeeper -curl $CLICKHOUSE_URL --silent --fail --show-error --data "SELECT sum(lost_part_count) FROM system.replicas WHERE database=currentDatabase();" 2>&1; +curl "$CLICKHOUSE_URL" --silent --fail --show-error --data "SELECT sum(lost_part_count) FROM system.replicas WHERE database=currentDatabase();" 2>&1; # is_leader column is filled without ZooKeeper -curl $CLICKHOUSE_URL --silent --fail --show-error --data "SELECT sum(is_leader) FROM system.replicas WHERE database=currentDatabase();" 2>&1; +curl "$CLICKHOUSE_URL" --silent --fail --show-error --data "SELECT sum(is_leader) FROM system.replicas WHERE database=currentDatabase();" 2>&1; wait; diff --git a/tests/queries/0_stateless/02911_support_alias_column_in_indices.reference b/tests/queries/0_stateless/02911_support_alias_column_in_indices.reference index 883966ce6b5..b867a31dcc3 100644 --- a/tests/queries/0_stateless/02911_support_alias_column_in_indices.reference +++ b/tests/queries/0_stateless/02911_support_alias_column_in_indices.reference @@ -14,13 +14,13 @@ Expression ((Projection + Before ORDER BY)) Parts: 1/1 Granules: 1/1 Expression ((Project names + Projection)) - Filter ((WHERE + Change column names to column identifiers)) + Filter ((WHERE + (Change column names to column identifiers + Compute alias columns))) ReadFromMergeTree (02911_support_alias_column_in_indices.test1) Indexes: PrimaryKey Keys: c - Condition: (_CAST(plus(c, \'UInt64\'), 1) in [11, +Inf)) + Condition: (plus(c, 1) in [11, +Inf)) Parts: 1/2 Granules: 1/2 Skip @@ -44,12 +44,17 @@ Expression ((Projection + Before ORDER BY)) Parts: 1/1 Granules: 1/1 Expression ((Project names + Projection)) - Filter ((WHERE + Change column names to column identifiers)) + Filter ((WHERE + (Change column names to column identifiers + Compute alias columns))) ReadFromMergeTree (02911_support_alias_column_in_indices.test2) Indexes: PrimaryKey Keys: c - Condition: (_CAST(plus(_CAST(plus(c, \'UInt64\'), 1), \'UInt64\'), 1) in [16, +Inf)) + Condition: (plus(plus(c, 1), 1) in [16, +Inf)) Parts: 1/2 Granules: 1/2 + Skip + Name: i + Description: minmax GRANULARITY 1 + Parts: 1/1 + Granules: 1/1 diff --git a/tests/queries/0_stateless/02916_set_formatting.reference b/tests/queries/0_stateless/02916_set_formatting.reference index 34ff52365f9..46d30053970 100644 --- a/tests/queries/0_stateless/02916_set_formatting.reference +++ b/tests/queries/0_stateless/02916_set_formatting.reference @@ -5,7 +5,7 @@ Row 1: statement: CREATE VIEW default.v1 ( `v` UInt64 -) AS -SELECT v +) +AS SELECT v FROM default.t1 SETTINGS additional_table_filters = {'default.t1':'s != \'s1%\''} diff --git a/tests/queries/0_stateless/02918_optimize_count_for_merge_tables.reference b/tests/queries/0_stateless/02918_optimize_count_for_merge_tables.reference index 3f5700b6d63..786a6b3bf25 100644 --- a/tests/queries/0_stateless/02918_optimize_count_for_merge_tables.reference +++ b/tests/queries/0_stateless/02918_optimize_count_for_merge_tables.reference @@ -7,3 +7,6 @@ Expression ((Projection + Before ORDER BY)) Aggregating Expression (Before GROUP BY) ReadFromMerge + ReadFromMergeTree (default.mt1) + ReadFromMergeTree (default.mt2) + ReadFromStorage (TinyLog) diff --git a/tests/queries/0_stateless/02922_deduplication_with_zero_copy.reference b/tests/queries/0_stateless/02922_deduplication_with_zero_copy.reference index e69de29bb2d..29376f82128 100644 --- a/tests/queries/0_stateless/02922_deduplication_with_zero_copy.reference +++ b/tests/queries/0_stateless/02922_deduplication_with_zero_copy.reference @@ -0,0 +1,6 @@ +zero_copy: + +tables: +table +zero_copy: +tables: diff --git a/tests/queries/0_stateless/02922_deduplication_with_zero_copy.sh b/tests/queries/0_stateless/02922_deduplication_with_zero_copy.sh index e9deed2a7cd..bb013dccb65 100755 --- a/tests/queries/0_stateless/02922_deduplication_with_zero_copy.sh +++ b/tests/queries/0_stateless/02922_deduplication_with_zero_copy.sh @@ -3,7 +3,7 @@ set -e -CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +CURDIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh @@ -27,6 +27,7 @@ create table r2 (n int) allow_remote_fs_zero_copy_replication=1; " + function get_shared_locks() { table_shared_id="$1" @@ -42,11 +43,12 @@ function get_shared_locks() done } + function filter_temporary_locks() { while read -r lock do - owner=$($CLICKHOUSE_KEEPER_CLIENT -q "get_stat ${lock}" | grep 'ephemeralOwner' | sed 's/.*= //') + owner="$($CLICKHOUSE_KEEPER_CLIENT -q "get_stat ${lock}" | grep 'ephemeralOwner' | sed 's/.*= //')" if [[ "${owner}" -eq "0" ]] then echo "${lock}" @@ -63,10 +65,11 @@ function insert_duplicates() { wait $CLICKHOUSE_CLIENT -nm -q " +system sync replica r1; system sync replica r2; " - count=$($CLICKHOUSE_CLIENT -q "select count() from r2;") + count="$($CLICKHOUSE_CLIENT -q "select count() from r2;")" [[ "${count}" -eq "1" ]] } @@ -75,7 +78,7 @@ function loop() { set -e - table_shared_id=$($CLICKHOUSE_KEEPER_CLIENT -q "get /test/02922/${CLICKHOUSE_DATABASE}/table/table_shared_id") + table_shared_id="$1" while : do @@ -89,8 +92,8 @@ system sync replica r2; " done - persistent_locks="$(get_shared_locks ${table_shared_id} | filter_temporary_locks)" - num=$(echo "${persistent_locks}" | wc -w) + persistent_locks="$(get_shared_locks "${table_shared_id}" | filter_temporary_locks)" + num="$(echo "${persistent_locks}" | wc -w)" if [[ "${num}" -ne "2" ]] then @@ -101,24 +104,41 @@ system sync replica r2; } + export -f query_with_retry export -f filter_temporary_locks export -f insert_duplicates export -f get_shared_locks export -f loop +table_shared_id="$($CLICKHOUSE_KEEPER_CLIENT -q "get /test/02922/${CLICKHOUSE_DATABASE}/table/table_shared_id")" exit_code=0 -timeout 60 bash -c loop || exit_code="${?}" +timeout 40 bash -c "loop '${table_shared_id}'" || exit_code="${?}" if [[ "${exit_code}" -ne "124" ]] then echo "timeout expected, but loop exited with code: ${exit_code}." echo "the error is found if loop ends with 0." + echo "table_shared_id=${table_shared_id}" exit 1 fi -$CLICKHOUSE_CLIENT -nm -q " -drop table r1; -drop table r2; -" +function list_keeper_nodes() { + table_shared_id=$1 + + echo "zero_copy:" + $CLICKHOUSE_KEEPER_CLIENT -q "ls /clickhouse/zero_copy/zero_copy_s3" | grep -o "${table_shared_id}" | \ + sed "s/${table_shared_id}//g" || : + + echo "tables:" + $CLICKHOUSE_KEEPER_CLIENT -q "ls /test/02922/${CLICKHOUSE_DATABASE}" | grep -o "table" || : +} + +list_keeper_nodes "${table_shared_id}" + +$CLICKHOUSE_CLIENT -nm -q "drop table r1;" --send_logs_level="error" & +$CLICKHOUSE_CLIENT -nm -q "drop table r2;" --send_logs_level="error" & +wait + +list_keeper_nodes "${table_shared_id}" diff --git a/tests/queries/0_stateless/02931_alter_materialized_view_query_inconsistent.reference b/tests/queries/0_stateless/02931_alter_materialized_view_query_inconsistent.reference index 45e4b958f4b..0d6874fbb59 100644 --- a/tests/queries/0_stateless/02931_alter_materialized_view_query_inconsistent.reference +++ b/tests/queries/0_stateless/02931_alter_materialized_view_query_inconsistent.reference @@ -1,3 +1,3 @@ v UInt64 v2 UInt8 -CREATE MATERIALIZED VIEW default.pipe TO default.dest\n(\n `v` UInt64,\n `v2` UInt8\n) AS\nSELECT\n v * 2 AS v,\n 1 AS v2\nFROM default.src +CREATE MATERIALIZED VIEW default.pipe TO default.dest\n(\n `v` UInt64,\n `v2` UInt8\n)\nAS SELECT\n v * 2 AS v,\n 1 AS v2\nFROM default.src diff --git a/tests/queries/0_stateless/02932_refreshable_materialized_views.reference b/tests/queries/0_stateless/02932_refreshable_materialized_views.reference index 4c5b678cfa5..aa76806da9d 100644 --- a/tests/queries/0_stateless/02932_refreshable_materialized_views.reference +++ b/tests/queries/0_stateless/02932_refreshable_materialized_views.reference @@ -1,14 +1,14 @@ <1: created view> a [] 1 -CREATE MATERIALIZED VIEW default.a\nREFRESH AFTER 1 SECOND\n(\n `x` UInt64\n)\nENGINE = Memory AS\nSELECT number AS x\nFROM numbers(2)\nUNION ALL\nSELECT rand64() AS x +CREATE MATERIALIZED VIEW default.a\nREFRESH AFTER 2 SECOND\n(\n `x` UInt64\n)\nENGINE = Memory\nAS SELECT number AS x\nFROM numbers(2)\nUNION ALL\nSELECT rand64() AS x <2: refreshed> 3 1 1 -<3: time difference at least> 500 -<4: next refresh in> 1 +<3: time difference at least> 1000 +<4: next refresh in> 2 <4.5: altered> Scheduled Finished 2052-01-01 00:00:00 -CREATE MATERIALIZED VIEW default.a\nREFRESH EVERY 2 YEAR\n(\n `x` Int16\n)\nENGINE = Memory AS\nSELECT x * 2 AS x\nFROM default.src +CREATE MATERIALIZED VIEW default.a\nREFRESH EVERY 2 YEAR\n(\n `x` Int16\n)\nENGINE = Memory\nAS SELECT x * 2 AS x\nFROM default.src <5: no refresh> 3 <6: refreshed> 2 <7: refreshed> Scheduled Finished 2054-01-01 00:00:00 -CREATE MATERIALIZED VIEW default.b\nREFRESH EVERY 2 YEAR DEPENDS ON default.a\n(\n `y` Int32\n)\nENGINE = MergeTree\nORDER BY y\nSETTINGS index_granularity = 8192 AS\nSELECT x * 10 AS y\nFROM default.a +CREATE MATERIALIZED VIEW default.b\nREFRESH EVERY 2 YEAR DEPENDS ON default.a\n(\n `y` Int32\n)\nENGINE = MergeTree\nORDER BY y\nSETTINGS index_granularity = 8192\nAS SELECT x * 10 AS y\nFROM default.a <8: refreshed> 20 <9: refreshed> a Scheduled Finished 2054-01-01 00:00:00 <9: refreshed> b Scheduled Finished 2054-01-01 00:00:00 @@ -25,7 +25,7 @@ CREATE MATERIALIZED VIEW default.b\nREFRESH EVERY 2 YEAR DEPENDS ON default.a\n( <17: chain-refreshed> a Scheduled 2062-01-01 00:00:00 <17: chain-refreshed> b Scheduled 2062-01-01 00:00:00 <18: removed dependency> b Scheduled [] 2062-03-03 03:03:03 2064-01-01 00:00:00 5 -CREATE MATERIALIZED VIEW default.b\nREFRESH EVERY 2 YEAR\n(\n `y` Int32\n)\nENGINE = MergeTree\nORDER BY y\nSETTINGS index_granularity = 8192 AS\nSELECT x * 10 AS y\nFROM default.a +CREATE MATERIALIZED VIEW default.b\nREFRESH EVERY 2 YEAR\n(\n `y` Int32\n)\nENGINE = MergeTree\nORDER BY y\nSETTINGS index_granularity = 8192\nAS SELECT x * 10 AS y\nFROM default.a <19: exception> 1 <20: unexception> 1 <21: rename> 1 @@ -34,9 +34,9 @@ CREATE MATERIALIZED VIEW default.b\nREFRESH EVERY 2 YEAR\n(\n `y` Int32\n)\nE <24: rename during refresh> 1 <25: rename during refresh> f Running <27: cancelled> f Scheduled -CREATE MATERIALIZED VIEW default.g\nREFRESH EVERY 1 WEEK OFFSET 3 DAY 4 HOUR RANDOMIZE FOR 4 DAY 1 HOUR\n(\n `x` Int64\n)\nENGINE = Memory AS\nSELECT 42 +CREATE MATERIALIZED VIEW default.g\nREFRESH EVERY 1 WEEK OFFSET 3 DAY 4 HOUR RANDOMIZE FOR 4 DAY 1 HOUR\n(\n `x` Int64\n)\nENGINE = Memory\nAS SELECT 42 <29: randomize> 1 1 -CREATE MATERIALIZED VIEW default.h\nREFRESH EVERY 1 SECOND TO default.dest\n(\n `x` Int64\n) AS\nSELECT x * 10 AS x\nFROM default.src +CREATE MATERIALIZED VIEW default.h\nREFRESH EVERY 1 SECOND TO default.dest\n(\n `x` Int64\n)\nAS SELECT x * 10 AS x\nFROM default.src <30: to existing table> 10 <31: to existing table> 10 <31: to existing table> 20 diff --git a/tests/queries/0_stateless/02932_refreshable_materialized_views.sh b/tests/queries/0_stateless/02932_refreshable_materialized_views.sh index 8daea063fc5..89942e25b67 100755 --- a/tests/queries/0_stateless/02932_refreshable_materialized_views.sh +++ b/tests/queries/0_stateless/02932_refreshable_materialized_views.sh @@ -18,7 +18,7 @@ $CLICKHOUSE_CLIENT -nq "create view refreshes as select * from system.view_refre # Basic refreshing. $CLICKHOUSE_CLIENT -nq " create materialized view a - refresh after 1 second + refresh after 2 second engine Memory empty as select number as x from numbers(2) union all select rand64() as x" @@ -29,6 +29,7 @@ while [ "`$CLICKHOUSE_CLIENT -nq "select last_refresh_result from refreshes -- $ do sleep 0.1 done +start_time="`$CLICKHOUSE_CLIENT -nq "select reinterpret(now64(), 'Int64')"`" # Check table contents. $CLICKHOUSE_CLIENT -nq "select '<2: refreshed>', count(), sum(x=0), sum(x=1) from a" # Wait for table contents to change. @@ -39,7 +40,6 @@ do [ "$res2" == "$res1" ] || break sleep 0.1 done -time2="`$CLICKHOUSE_CLIENT -nq "select reinterpret(now64(), 'Int64')"`" # Wait for another change. while : do @@ -47,11 +47,11 @@ do [ "$res3" == "$res2" ] || break sleep 0.1 done -# Check that the two changes were at least 500ms apart, in particular that we're not refreshing +# Check that the two changes were at least 1 second apart, in particular that we're not refreshing # like crazy. This is potentially flaky, but we need at least one test that uses non-mocked timer # to make sure the clock+timer code works at all. If it turns out flaky, increase refresh period above. $CLICKHOUSE_CLIENT -nq " - select '<3: time difference at least>', min2(reinterpret(now64(), 'Int64') - $time2, 500); + select '<3: time difference at least>', min2(reinterpret(now64(), 'Int64') - $start_time, 1000); select '<4: next refresh in>', next_refresh_time-last_refresh_time from refreshes;" # Create a source table from which views will read. @@ -61,7 +61,7 @@ $CLICKHOUSE_CLIENT -nq " # Switch to fake clock, change refresh schedule, change query. $CLICKHOUSE_CLIENT -nq " system test view a set fake time '2050-01-01 00:00:01';" -while [ "`$CLICKHOUSE_CLIENT -nq "select status, last_refresh_time, next_refresh_time from refreshes -- $LINENO" | xargs`" != 'Scheduled 2050-01-01 00:00:01 2050-01-01 00:00:02' ] +while [ "`$CLICKHOUSE_CLIENT -nq "select status, last_refresh_time, next_refresh_time from refreshes -- $LINENO" | xargs`" != 'Scheduled 2050-01-01 00:00:01 2050-01-01 00:00:03' ] do sleep 0.1 done diff --git a/tests/queries/0_stateless/02933_compare_with_bool_as_string.reference b/tests/queries/0_stateless/02933_compare_with_bool_as_string.reference new file mode 100644 index 00000000000..d00491fd7e5 --- /dev/null +++ b/tests/queries/0_stateless/02933_compare_with_bool_as_string.reference @@ -0,0 +1 @@ +1 diff --git a/tests/queries/0_stateless/02933_compare_with_bool_as_string.sql b/tests/queries/0_stateless/02933_compare_with_bool_as_string.sql new file mode 100644 index 00000000000..5dbacd5fbbf --- /dev/null +++ b/tests/queries/0_stateless/02933_compare_with_bool_as_string.sql @@ -0,0 +1 @@ +select true = 'true'; diff --git a/tests/queries/0_stateless/02940_variant_text_deserialization.sql b/tests/queries/0_stateless/02940_variant_text_deserialization.sql index 041d02088ef..b909b2b6790 100644 --- a/tests/queries/0_stateless/02940_variant_text_deserialization.sql +++ b/tests/queries/0_stateless/02940_variant_text_deserialization.sql @@ -1,4 +1,5 @@ set allow_experimental_variant_type = 1; +set allow_suspicious_variant_types = 1; set session_timezone = 'UTC'; select 'JSON'; @@ -263,4 +264,4 @@ select v, variantElement(v, 'Array(LowCardinality(Nullable(String)))') from form select 'Nullable'; select v, variantElement(v, 'Array(Nullable(String))') from format(Values, 'v Variant(String, Array(Nullable(String)))', '(NULL), (''string''), ([''hello'', null, ''world''])') format Values; -select ''; \ No newline at end of file +select ''; diff --git a/tests/queries/0_stateless/02941_variant_type_1.sh b/tests/queries/0_stateless/02941_variant_type_1.sh index ed365bbd244..773a8c4a5e4 100755 --- a/tests/queries/0_stateless/02941_variant_type_1.sh +++ b/tests/queries/0_stateless/02941_variant_type_1.sh @@ -7,7 +7,7 @@ CLICKHOUSE_LOG_COMMENT= # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1" +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --allow_suspicious_variant_types=1" function test1_insert() { diff --git a/tests/queries/0_stateless/02941_variant_type_2.reference b/tests/queries/0_stateless/02941_variant_type_2.reference index 4b6d53c52ac..20a5176cb5e 100644 --- a/tests/queries/0_stateless/02941_variant_type_2.reference +++ b/tests/queries/0_stateless/02941_variant_type_2.reference @@ -1,51 +1,51 @@ Memory test4 insert test4 select -1000000 -200000 -200000 -200000 -200000 -200000 -200000 -200000 +500000 +100000 +100000 +100000 +100000 +100000 +100000 +100000 MergeTree compact test4 insert test4 select -1000000 -200000 -200000 -200000 -200000 -200000 -200000 -200000 +500000 +100000 +100000 +100000 +100000 +100000 +100000 +100000 test4 select -1000000 -200000 -200000 -200000 -200000 -200000 -200000 -200000 +500000 +100000 +100000 +100000 +100000 +100000 +100000 +100000 MergeTree wide test4 insert test4 select -1000000 -200000 -200000 -200000 -200000 -200000 -200000 -200000 +500000 +100000 +100000 +100000 +100000 +100000 +100000 +100000 test4 select -1000000 -200000 -200000 -200000 -200000 -200000 -200000 -200000 +500000 +100000 +100000 +100000 +100000 +100000 +100000 +100000 diff --git a/tests/queries/0_stateless/02941_variant_type_2.sh b/tests/queries/0_stateless/02941_variant_type_2.sh index 23666a9b4a8..d1fa0a777c9 100755 --- a/tests/queries/0_stateless/02941_variant_type_2.sh +++ b/tests/queries/0_stateless/02941_variant_type_2.sh @@ -7,17 +7,17 @@ CLICKHOUSE_LOG_COMMENT= # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1" +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --allow_suspicious_variant_types=1" function test4_insert() { echo "test4 insert" - $CH_CLIENT -nmq "insert into test select number, NULL from numbers(200000); -insert into test select number + 200000, number from numbers(200000); -insert into test select number + 400000, 'str_' || toString(number) from numbers(200000); -insert into test select number + 600000, ('lc_str_' || toString(number))::LowCardinality(String) from numbers(200000); -insert into test select number + 800000, tuple(number, number + 1)::Tuple(a UInt32, b UInt32) from numbers(200000); -insert into test select number + 1000000, range(number % 20 + 1)::Array(UInt64) from numbers(200000);" + $CH_CLIENT -nmq "insert into test select number, NULL from numbers(100000); +insert into test select number + 100000, number from numbers(100000); +insert into test select number + 200000, 'str_' || toString(number) from numbers(100000); +insert into test select number + 300000, ('lc_str_' || toString(number))::LowCardinality(String) from numbers(100000); +insert into test select number + 400000, tuple(number, number + 1)::Tuple(a UInt32, b UInt32) from numbers(100000); +insert into test select number + 500000, range(number % 20 + 1)::Array(UInt64) from numbers(100000);" } function test4_select diff --git a/tests/queries/0_stateless/02941_variant_type_3.sh b/tests/queries/0_stateless/02941_variant_type_3.sh index d6309e26414..a0efead280a 100755 --- a/tests/queries/0_stateless/02941_variant_type_3.sh +++ b/tests/queries/0_stateless/02941_variant_type_3.sh @@ -7,7 +7,7 @@ CLICKHOUSE_LOG_COMMENT= # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1" +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --allow_suspicious_variant_types=1" function test5_insert() { diff --git a/tests/queries/0_stateless/02941_variant_type_4.sh b/tests/queries/0_stateless/02941_variant_type_4.sh index 5ea04db4bb4..336540d1e79 100755 --- a/tests/queries/0_stateless/02941_variant_type_4.sh +++ b/tests/queries/0_stateless/02941_variant_type_4.sh @@ -7,7 +7,7 @@ CLICKHOUSE_LOG_COMMENT= # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1" +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --allow_suspicious_variant_types=1" function test6_insert() { diff --git a/tests/queries/0_stateless/02943_order_by_all.reference b/tests/queries/0_stateless/02943_order_by_all.reference index 6eed33cc68d..9cfed61bdaf 100644 --- a/tests/queries/0_stateless/02943_order_by_all.reference +++ b/tests/queries/0_stateless/02943_order_by_all.reference @@ -49,7 +49,25 @@ A 2 2 A 3 B \N C --- what happens if some column "all" already exists? +-- SELECT * +A 2 +B 3 +C \N +D 1 +A 2 +B 3 +C \N +D 1 +-- the trouble starts when "order by all is all" is ambiguous + -- columns +B 3 10 +D 1 20 +A 2 30 +C \N 40 +B +D +A +C B 3 10 D 1 20 A 2 30 @@ -58,6 +76,15 @@ B 3 10 D 1 20 A 2 30 C \N 40 +B +D +A +C +B 3 10 +D 1 20 +A 2 30 +C \N 40 + -- column aliases D 1 A 2 B 3 @@ -66,6 +93,7 @@ D 1 A 2 B 3 C \N + -- expressions A 2 B 3 D 1 @@ -74,6 +102,7 @@ A 2 B 3 D 1 \N + -- ORDER BY ALL loses its special meaning when used in conjunction with other columns B 3 10 D 1 20 A 2 30 @@ -82,12 +111,3 @@ B 3 10 D 1 20 A 2 30 C \N 40 --- test SELECT * ORDER BY ALL with no "all" column in the SELECT clause -A 2 30 -B 3 10 -C \N 40 -D 1 20 -A 2 30 -B 3 10 -C \N 40 -D 1 20 diff --git a/tests/queries/0_stateless/02943_order_by_all.sql b/tests/queries/0_stateless/02943_order_by_all.sql index 0960d75ad96..0d5e0ea52e4 100644 --- a/tests/queries/0_stateless/02943_order_by_all.sql +++ b/tests/queries/0_stateless/02943_order_by_all.sql @@ -5,12 +5,11 @@ DROP TABLE IF EXISTS order_by_all; CREATE TABLE order_by_all ( a String, - b Nullable(Int32), - all UInt64, + b Nullable(Int32) ) ENGINE = Memory; -INSERT INTO order_by_all VALUES ('B', 3, 10), ('C', NULL, 40), ('D', 1, 20), ('A', 2, 30); +INSERT INTO order_by_all VALUES ('B', 3), ('C', NULL), ('D', 1), ('A', 2); SELECT '-- no modifiers'; @@ -42,68 +41,75 @@ SET allow_experimental_analyzer = 1; SELECT b, a FROM order_by_all ORDER BY ALL NULLS FIRST; SELECT b, a FROM order_by_all ORDER BY ALL NULLS LAST; -SELECT '-- what happens if some column "all" already exists?'; - --- columns +SELECT '-- SELECT *'; SET allow_experimental_analyzer = 0; -SELECT a, b, all FROM order_by_all ORDER BY all; -- { serverError UNEXPECTED_EXPRESSION } -SELECT a, b, all FROM order_by_all ORDER BY ALL; -- { serverError UNEXPECTED_EXPRESSION } -SELECT a, b, all FROM order_by_all ORDER BY all SETTINGS enable_order_by_all = false; +SELECT * FROM order_by_all ORDER BY all; SET allow_experimental_analyzer = 1; -SELECT a, b, all FROM order_by_all ORDER BY all; -- { serverError UNEXPECTED_EXPRESSION } -SELECT a, b, all FROM order_by_all ORDER BY ALL; -- { serverError UNEXPECTED_EXPRESSION } -SELECT a, b, all FROM order_by_all ORDER BY all SETTINGS enable_order_by_all = false; - --- column aliases - -SET allow_experimental_analyzer = 0; -SELECT a, b AS all FROM order_by_all ORDER BY all; -- { serverError UNEXPECTED_EXPRESSION } -SELECT a, b AS all FROM order_by_all ORDER BY ALL; -- { serverError UNEXPECTED_EXPRESSION } -SELECT a, b AS all FROM order_by_all ORDER BY all SETTINGS enable_order_by_all = false; - -SET allow_experimental_analyzer = 1; -SELECT a, b AS all FROM order_by_all ORDER BY all; -- { serverError UNEXPECTED_EXPRESSION } -SELECT a, b AS all FROM order_by_all ORDER BY ALL; -- { serverError UNEXPECTED_EXPRESSION } -SELECT a, b AS all FROM order_by_all ORDER BY all SETTINGS enable_order_by_all = false; - --- expressions - -SET allow_experimental_analyzer = 0; -SELECT format('{} {}', a, b) AS all FROM order_by_all ORDER BY all; -- { serverError UNEXPECTED_EXPRESSION } -SELECT format('{} {}', a, b) AS all FROM order_by_all ORDER BY ALL; -- { serverError UNEXPECTED_EXPRESSION } -SELECT format('{} {}', a, b) AS all FROM order_by_all ORDER BY all SETTINGS enable_order_by_all = false; - -SET allow_experimental_analyzer = 1; -SELECT format('{} {}', a, b) AS all FROM order_by_all ORDER BY all; -- { serverError UNEXPECTED_EXPRESSION } -SELECT format('{} {}', a, b) AS all FROM order_by_all ORDER BY ALL; -- { serverError UNEXPECTED_EXPRESSION } -SELECT format('{} {}', a, b) AS all FROM order_by_all ORDER BY all SETTINGS enable_order_by_all = false; - -SET allow_experimental_analyzer = 0; -SELECT a, b, all FROM order_by_all ORDER BY all, a; - -SET allow_experimental_analyzer = 1; -SELECT a, b, all FROM order_by_all ORDER BY all, a; +SELECT * FROM order_by_all ORDER BY all; DROP TABLE order_by_all; -SELECT '-- test SELECT * ORDER BY ALL with no "all" column in the SELECT clause'; +SELECT '-- the trouble starts when "order by all is all" is ambiguous'; CREATE TABLE order_by_all ( a String, b Nullable(Int32), - c UInt64, + all UInt64 ) - ENGINE = Memory; +ENGINE = Memory; INSERT INTO order_by_all VALUES ('B', 3, 10), ('C', NULL, 40), ('D', 1, 20), ('A', 2, 30); +SELECT ' -- columns'; + SET allow_experimental_analyzer = 0; -SELECT * FROM order_by_all ORDER BY ALL; +SELECT a, b, all FROM order_by_all ORDER BY all; -- { serverError UNEXPECTED_EXPRESSION } +SELECT a, b, all FROM order_by_all ORDER BY all SETTINGS enable_order_by_all = false; +SELECT a FROM order_by_all ORDER BY all; -- { serverError UNEXPECTED_EXPRESSION } +SELECT a FROM order_by_all ORDER BY all SETTINGS enable_order_by_all = false; +SELECT * FROM order_by_all ORDER BY all; -- { serverError UNEXPECTED_EXPRESSION } +SELECT * FROM order_by_all ORDER BY all SETTINGS enable_order_by_all = false; SET allow_experimental_analyzer = 1; -SELECT * FROM order_by_all ORDER BY ALL; +SELECT a, b, all FROM order_by_all ORDER BY all; -- { serverError UNEXPECTED_EXPRESSION } +SELECT a, b, all FROM order_by_all ORDER BY all SETTINGS enable_order_by_all = false; +SELECT a FROM order_by_all ORDER BY all SETTINGS enable_order_by_all = false; +-- SELECT * FROM order_by_all ORDER BY all; -- { serverError UNEXPECTED_EXPRESSION } -- (*) see below +SELECT * FROM order_by_all ORDER BY all SETTINGS enable_order_by_all = false; +-- SELECT a FROM order_by_all ORDER BY all; -- { serverError UNEXPECTED_EXPRESSION } -- (*) see below + +-- (*) These queries show the expected behavior for analyzer. Unfortunately, it is not implemented that way yet, +-- which is not wrong but a bit unintuitive (some may say a landmine). Keeping the queries for now for reference. + +SELECT ' -- column aliases'; + +SET allow_experimental_analyzer = 0; +SELECT a, b AS all FROM order_by_all ORDER BY all; -- { serverError UNEXPECTED_EXPRESSION } +SELECT a, b AS all FROM order_by_all ORDER BY all SETTINGS enable_order_by_all = false; + +SET allow_experimental_analyzer = 1; +SELECT a, b AS all FROM order_by_all ORDER BY all; -- { serverError UNEXPECTED_EXPRESSION } +SELECT a, b AS all FROM order_by_all ORDER BY all SETTINGS enable_order_by_all = false; + +SELECT ' -- expressions'; + +SET allow_experimental_analyzer = 0; +SELECT format('{} {}', a, b) AS all FROM order_by_all ORDER BY all; -- { serverError UNEXPECTED_EXPRESSION } +SELECT format('{} {}', a, b) AS all FROM order_by_all ORDER BY all SETTINGS enable_order_by_all = false; + +SET allow_experimental_analyzer = 1; +SELECT format('{} {}', a, b) AS all FROM order_by_all ORDER BY all; -- { serverError UNEXPECTED_EXPRESSION } +SELECT format('{} {}', a, b) AS all FROM order_by_all ORDER BY all SETTINGS enable_order_by_all = false; + +SELECT ' -- ORDER BY ALL loses its special meaning when used in conjunction with other columns'; + +SET allow_experimental_analyzer = 0; +SELECT a, b, all FROM order_by_all ORDER BY all, a; + +SET allow_experimental_analyzer = 1; +SELECT a, b, all FROM order_by_all ORDER BY all, a; DROP TABLE order_by_all; diff --git a/tests/queries/0_stateless/02943_tokenbf_and_ngrambf_indexes_support_match_function.reference b/tests/queries/0_stateless/02943_tokenbf_and_ngrambf_indexes_support_match_function.reference index 1cf1644fe0a..0e1954cde62 100644 --- a/tests/queries/0_stateless/02943_tokenbf_and_ngrambf_indexes_support_match_function.reference +++ b/tests/queries/0_stateless/02943_tokenbf_and_ngrambf_indexes_support_match_function.reference @@ -2,12 +2,12 @@ 2 Hello World 1 Hello ClickHouse 2 Hello World - Granules: 6/6 - Granules: 2/6 Granules: 6/6 Granules: 2/6 - Granules: 6/6 - Granules: 2/6 + Granules: 6/6 + Granules: 2/6 + Granules: 6/6 + Granules: 2/6 Granules: 6/6 Granules: 2/6 --- @@ -17,22 +17,22 @@ 1 Hello ClickHouse 2 Hello World 6 World Champion - Granules: 6/6 - Granules: 3/6 Granules: 6/6 Granules: 3/6 - Granules: 6/6 - Granules: 3/6 + Granules: 6/6 + Granules: 3/6 + Granules: 6/6 + Granules: 3/6 Granules: 6/6 Granules: 3/6 --- 5 OLAP Database 5 OLAP Database - Granules: 6/6 - Granules: 1/6 Granules: 6/6 Granules: 1/6 - Granules: 6/6 - Granules: 1/6 + Granules: 6/6 + Granules: 1/6 + Granules: 6/6 + Granules: 1/6 Granules: 6/6 Granules: 1/6 diff --git a/tests/queries/0_stateless/02943_variant_read_subcolumns.sh b/tests/queries/0_stateless/02943_variant_read_subcolumns.sh index 88be09c2036..b816a20c818 100755 --- a/tests/queries/0_stateless/02943_variant_read_subcolumns.sh +++ b/tests/queries/0_stateless/02943_variant_read_subcolumns.sh @@ -7,7 +7,7 @@ CLICKHOUSE_LOG_COMMENT= # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --use_variant_as_common_type=1 " +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --use_variant_as_common_type=1 --allow_suspicious_variant_types=1" function test() diff --git a/tests/queries/0_stateless/02943_variant_type_with_different_local_and_global_order.reference b/tests/queries/0_stateless/02943_variant_type_with_different_local_and_global_order.reference index 1736a307c42..4109a88997c 100644 --- a/tests/queries/0_stateless/02943_variant_type_with_different_local_and_global_order.reference +++ b/tests/queries/0_stateless/02943_variant_type_with_different_local_and_global_order.reference @@ -44,9 +44,9 @@ str_38 str_38 \N ----------------------------------------------------------------------------------------------------------- test2 insert test2 select -2500000 -750000 -1750000 +500000 +100000 +400000 ----------------------------------------------------------------------------------------------------------- MergeTree compact test1 insert @@ -136,14 +136,14 @@ str_38 str_38 \N ----------------------------------------------------------------------------------------------------------- test2 insert test2 select -2500000 -750000 -1750000 +500000 +100000 +400000 ----------------------------------------------------------------------------------------------------------- test2 select -2500000 -750000 -1750000 +500000 +100000 +400000 ----------------------------------------------------------------------------------------------------------- MergeTree wide test1 insert @@ -233,12 +233,12 @@ str_38 str_38 \N ----------------------------------------------------------------------------------------------------------- test2 insert test2 select -2500000 -750000 -1750000 +500000 +100000 +400000 ----------------------------------------------------------------------------------------------------------- test2 select -2500000 -750000 -1750000 +500000 +100000 +400000 ----------------------------------------------------------------------------------------------------------- diff --git a/tests/queries/0_stateless/02943_variant_type_with_different_local_and_global_order.sh b/tests/queries/0_stateless/02943_variant_type_with_different_local_and_global_order.sh index e4c1206263f..1d88757a5d6 100755 --- a/tests/queries/0_stateless/02943_variant_type_with_different_local_and_global_order.sh +++ b/tests/queries/0_stateless/02943_variant_type_with_different_local_and_global_order.sh @@ -7,7 +7,7 @@ CLICKHOUSE_LOG_COMMENT= # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --use_variant_as_common_type=1 " +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --use_variant_as_common_type=1 --allow_suspicious_variant_types=1" function test1_insert() @@ -29,10 +29,10 @@ function test1_select() function test2_insert() { echo "test2 insert" - $CH_CLIENT -q "insert into test select number, number::Variant(UInt64)::Variant(UInt64, Array(UInt64)) from numbers(1000000) settings max_insert_block_size = 100000, min_insert_block_size_rows=100000" - $CH_CLIENT -q "insert into test select number, if(number % 2, NULL, number)::Variant(UInt64)::Variant(UInt64, String, Array(UInt64)) as res from numbers(1000000, 1000000) settings max_insert_block_size = 100000, min_insert_block_size_rows=100000" - $CH_CLIENT -q "insert into test select number, if(number % 2, NULL, 'str_' || toString(number))::Variant(String)::Variant(UInt64, String, Array(UInt64)) as res from numbers(2000000, 1000000) settings max_insert_block_size = 100000, min_insert_block_size_rows=100000" - $CH_CLIENT -q "insert into test select number, if(number < 3500000, if(number % 2, NULL, number)::Variant(UInt64)::Variant(UInt64, String, Array(UInt64)), if(number % 2, NULL, 'str_' || toString(number))::Variant(String)::Variant(UInt64, String, Array(UInt64))) from numbers(3000000, 1000000) settings max_insert_block_size = 100000, min_insert_block_size_rows=100000" + $CH_CLIENT -q "insert into test select number, number::Variant(UInt64)::Variant(UInt64, Array(UInt64)) from numbers(200000) settings max_insert_block_size = 10000, min_insert_block_size_rows=10000" + $CH_CLIENT -q "insert into test select number, if(number % 2, NULL, number)::Variant(UInt64)::Variant(UInt64, String, Array(UInt64)) as res from numbers(200000, 200000) settings max_insert_block_size = 10000, min_insert_block_size_rows=10000" + $CH_CLIENT -q "insert into test select number, if(number % 2, NULL, 'str_' || toString(number))::Variant(String)::Variant(UInt64, String, Array(UInt64)) as res from numbers(400000, 200000) settings max_insert_block_size = 10000, min_insert_block_size_rows=10000" + $CH_CLIENT -q "insert into test select number, if(number < 3500000, if(number % 2, NULL, number)::Variant(UInt64)::Variant(UInt64, String, Array(UInt64)), if(number % 2, NULL, 'str_' || toString(number))::Variant(String)::Variant(UInt64, String, Array(UInt64))) from numbers(600000, 200000) settings max_insert_block_size = 10000, min_insert_block_size_rows=10000" } function test2_select() diff --git a/tests/queries/0_stateless/02946_materialize_column_must_not_override_past_values.reference b/tests/queries/0_stateless/02946_materialize_column_must_not_override_past_values.reference new file mode 100644 index 00000000000..461075e9607 --- /dev/null +++ b/tests/queries/0_stateless/02946_materialize_column_must_not_override_past_values.reference @@ -0,0 +1,45 @@ +DEFAULT expressions +-- Compact parts +Before materialize +1 1 +2 54321 +After materialize +1 1 +2 54321 +-- Wide parts +Before materialize +1 1 +2 54321 +After materialize +1 1 +2 54321 +-- Nullable column != physically absent +Before materialize +1 1 +2 \N +3 54321 +After materialize +1 1 +2 \N +3 54321 +-- Parts with renamed column +Before materialize +1 1 +2 54321 +After rename +1 1 +2 54321 +After materialize +1 1 +2 54321 +MATERIALIZED expressions +-- Compact parts +Before materialize +1 54321 +After materialize +1 65432 +-- Compact parts +Before materialize +1 54321 +After materialize +1 65432 diff --git a/tests/queries/0_stateless/02946_materialize_column_must_not_override_past_values.sql b/tests/queries/0_stateless/02946_materialize_column_must_not_override_past_values.sql new file mode 100644 index 00000000000..cfdde287712 --- /dev/null +++ b/tests/queries/0_stateless/02946_materialize_column_must_not_override_past_values.sql @@ -0,0 +1,85 @@ +SET mutations_sync = 2; + +DROP TABLE IF EXISTS tab; + +-- Tests that existing parts which contain a non-default value in columns with DEFAULT expression remain unchanged by MATERIALIZE COLUMN> +SELECT 'DEFAULT expressions'; + +SELECT '-- Compact parts'; + +CREATE TABLE tab (id Int64, dflt Int64 DEFAULT 54321) ENGINE MergeTree ORDER BY id; +INSERT INTO tab (id, dflt) VALUES (1, 1); +INSERT INTO tab (id) VALUES (2); +SELECT 'Before materialize'; +SELECT * FROM tab ORDER BY id; +ALTER TABLE tab MATERIALIZE COLUMN dflt; +SELECT 'After materialize'; +SELECT * FROM tab ORDER BY id; +DROP TABLE tab; + +SELECT '-- Wide parts'; + +CREATE TABLE tab (id Int64, dflt Int64 DEFAULT 54321) ENGINE MergeTree ORDER BY id SETTINGS min_bytes_for_wide_part = 1; +INSERT INTO tab (id, dflt) VALUES (1, 1); +INSERT INTO tab (id) VALUES (2); +SELECT 'Before materialize'; +SELECT * FROM tab ORDER BY id; +ALTER TABLE tab MATERIALIZE COLUMN dflt; +SELECT 'After materialize'; +SELECT * FROM tab ORDER BY id; +DROP TABLE tab; + +SELECT '-- Nullable column != physically absent'; + +CREATE TABLE tab (id Int64, dflt Nullable(Int64) DEFAULT 54321) ENGINE MergeTree ORDER BY id SETTINGS min_bytes_for_wide_part = 1; +INSERT INTO tab (id, dflt) VALUES (1, 1); +INSERT INTO tab (id, dflt) VALUES (2, NULL); +INSERT INTO tab (id) VALUES (3); +SELECT 'Before materialize'; +SELECT * FROM tab ORDER BY id; +ALTER TABLE tab MATERIALIZE COLUMN dflt; +SELECT 'After materialize'; +SELECT * FROM tab ORDER BY id; +DROP TABLE tab; + +SELECT '-- Parts with renamed column'; + +CREATE TABLE tab (id Int64, dflt Int64 DEFAULT 54321) ENGINE MergeTree ORDER BY id; +INSERT INTO tab (id, dflt) VALUES (1, 1); +INSERT INTO tab (id) VALUES (2); +SELECT 'Before materialize'; +SELECT * FROM tab ORDER BY id; +ALTER TABLE tab RENAME COLUMN dflt TO dflt2; +SELECT 'After rename'; +SELECT * FROM tab ORDER BY id; +ALTER TABLE tab MATERIALIZE COLUMN dflt2; +SELECT 'After materialize'; +SELECT * FROM tab ORDER BY id; +DROP TABLE tab; + +-- But for columns with MATERIALIZED expression, all existing parts should be rewritten in case a new expression was set in the meantime. +SELECT 'MATERIALIZED expressions'; + +SELECT '-- Compact parts'; + +CREATE TABLE tab (id Int64, mtrl Int64 MATERIALIZED 54321) ENGINE MergeTree ORDER BY id; +INSERT INTO tab (id) VALUES (1); +SELECT 'Before materialize'; +SELECT id, mtrl FROM tab ORDER BY id; +ALTER TABLE tab MODIFY COLUMN mtrl Int64 MATERIALIZED 65432; +ALTER TABLE tab MATERIALIZE COLUMN mtrl; +SELECT 'After materialize'; +SELECT id, mtrl FROM tab ORDER BY id; +DROP TABLE tab; + +SELECT '-- Compact parts'; + +CREATE TABLE tab (id Int64, mtrl Int64 MATERIALIZED 54321) ENGINE MergeTree ORDER BY id SETTINGS min_bytes_for_wide_part = 1; +INSERT INTO tab (id) VALUES (1); +SELECT 'Before materialize'; +SELECT id, mtrl FROM tab ORDER BY id; +ALTER TABLE tab MODIFY COLUMN mtrl Int64 MATERIALIZED 65432; +ALTER TABLE tab MATERIALIZE COLUMN mtrl; +SELECT 'After materialize'; +SELECT id, mtrl FROM tab ORDER BY id; +DROP TABLE tab; diff --git a/tests/queries/0_stateless/02947_merge_tree_index_table_1.reference b/tests/queries/0_stateless/02947_merge_tree_index_table_1.reference new file mode 100644 index 00000000000..0ce9de91293 --- /dev/null +++ b/tests/queries/0_stateless/02947_merge_tree_index_table_1.reference @@ -0,0 +1,84 @@ +0 0 v0 +0 5 v25 +1 1 v1 +1 6 v36 +2 2 v4 +2 7 v49 +3 3 v9 +3 8 v64 +4 4 v16 +4 9 v81 +0 10 v100 +0 15 v225 +1 11 v121 +1 16 v256 +2 12 v144 +2 17 v289 +3 13 v169 +3 18 v324 +4 14 v196 +4 19 v361 +┌─part_name─┬─mark_number─┬─rows_in_granule─┬─a─┬──b─┠+│ all_1_1_0 │ 0 │ 3 │ 0 │ 0 │ +│ all_1_1_0 │ 1 │ 3 │ 1 │ 6 │ +│ all_1_1_0 │ 2 │ 3 │ 3 │ 3 │ +│ all_1_1_0 │ 3 │ 1 │ 4 │ 9 │ +│ all_1_1_0 │ 4 │ 0 │ 4 │ 9 │ +│ all_2_2_0 │ 0 │ 3 │ 0 │ 10 │ +│ all_2_2_0 │ 1 │ 3 │ 1 │ 16 │ +│ all_2_2_0 │ 2 │ 3 │ 3 │ 13 │ +│ all_2_2_0 │ 3 │ 1 │ 4 │ 19 │ +│ all_2_2_0 │ 4 │ 0 │ 4 │ 19 │ +└───────────┴─────────────┴─────────────────┴───┴────┘ +┌─part_name─┬─mark_number─┬─rows_in_granule─┬─a─┬──b─┬─a.mark─┬─b.mark─┬─s.mark─┠+│ all_1_1_0 │ 0 │ 3 │ 0 │ 0 │ (0,0) │ (0,0) │ (0,0) │ +│ all_1_1_0 │ 1 │ 3 │ 1 │ 6 │ (0,24) │ (0,24) │ (0,10) │ +│ all_1_1_0 │ 2 │ 3 │ 3 │ 3 │ (0,48) │ (0,48) │ (0,21) │ +│ all_1_1_0 │ 3 │ 1 │ 4 │ 9 │ (0,72) │ (0,72) │ (0,32) │ +│ all_1_1_0 │ 4 │ 0 │ 4 │ 9 │ (0,80) │ (0,80) │ (0,36) │ +│ all_2_2_0 │ 0 │ 3 │ 0 │ 10 │ (0,0) │ (0,0) │ (0,0) │ +│ all_2_2_0 │ 1 │ 3 │ 1 │ 16 │ (0,24) │ (0,24) │ (0,15) │ +│ all_2_2_0 │ 2 │ 3 │ 3 │ 13 │ (0,48) │ (0,48) │ (0,30) │ +│ all_2_2_0 │ 3 │ 1 │ 4 │ 19 │ (0,72) │ (0,72) │ (0,45) │ +│ all_2_2_0 │ 4 │ 0 │ 4 │ 19 │ (0,80) │ (0,80) │ (0,50) │ +└───────────┴─────────────┴─────────────────┴───┴────┴────────┴────────┴────────┘ +0 0 v0 +0 4 v16 +0 8 v64 +1 1 v1 +1 5 v25 +1 9 v81 +2 2 v4 +2 6 v36 +3 3 v9 +3 7 v49 +0 12 v144 +0 16 v256 +1 13 v169 +1 17 v289 +2 10 v100 +2 14 v196 +2 18 v324 +3 11 v121 +3 15 v225 +3 19 v361 +┌─part_name─┬─mark_number─┬─rows_in_granule─┬─a─┬──b─┠+│ all_1_1_0 │ 0 │ 3 │ 0 │ 0 │ +│ all_1_1_0 │ 1 │ 3 │ 1 │ 1 │ +│ all_1_1_0 │ 2 │ 4 │ 2 │ 2 │ +│ all_1_1_0 │ 3 │ 0 │ 3 │ 7 │ +│ all_2_2_0 │ 0 │ 3 │ 0 │ 12 │ +│ all_2_2_0 │ 1 │ 3 │ 1 │ 17 │ +│ all_2_2_0 │ 2 │ 4 │ 2 │ 18 │ +│ all_2_2_0 │ 3 │ 0 │ 3 │ 19 │ +└───────────┴─────────────┴─────────────────┴───┴────┘ +┌─part_name─┬─mark_number─┬─rows_in_granule─┬─a─┬──b─┬─a.mark──┬─b.mark──┬─s.mark──┠+│ all_1_1_0 │ 0 │ 3 │ 0 │ 0 │ (0,0) │ (35,0) │ (77,0) │ +│ all_1_1_0 │ 1 │ 3 │ 1 │ 1 │ (114,0) │ (153,0) │ (197,0) │ +│ all_1_1_0 │ 2 │ 4 │ 2 │ 2 │ (234,0) │ (281,0) │ (329,0) │ +│ all_1_1_0 │ 3 │ 0 │ 3 │ 7 │ (369,0) │ (369,0) │ (369,0) │ +│ all_2_2_0 │ 0 │ 3 │ 0 │ 12 │ (0,0) │ (38,0) │ (82,0) │ +│ all_2_2_0 │ 1 │ 3 │ 1 │ 17 │ (124,0) │ (168,0) │ (212,0) │ +│ all_2_2_0 │ 2 │ 4 │ 2 │ 18 │ (254,0) │ (297,0) │ (345,0) │ +│ all_2_2_0 │ 3 │ 0 │ 3 │ 19 │ (392,0) │ (392,0) │ (392,0) │ +└───────────┴─────────────┴─────────────────┴───┴────┴─────────┴─────────┴─────────┘ diff --git a/tests/queries/0_stateless/02947_merge_tree_index_table_1.sql b/tests/queries/0_stateless/02947_merge_tree_index_table_1.sql new file mode 100644 index 00000000000..412fd476413 --- /dev/null +++ b/tests/queries/0_stateless/02947_merge_tree_index_table_1.sql @@ -0,0 +1,37 @@ +DROP TABLE IF EXISTS t_merge_tree_index; + +CREATE TABLE t_merge_tree_index (a UInt64, b UInt64, s String) +ENGINE = MergeTree ORDER BY (a, b) +SETTINGS + index_granularity = 3, + min_bytes_for_wide_part = 0, + ratio_of_defaults_for_sparse_serialization = 1.0; + +SYSTEM STOP MERGES t_merge_tree_index; + +INSERT INTO t_merge_tree_index SELECT number % 5, number, 'v' || toString(number * number) FROM numbers(10); +INSERT INTO t_merge_tree_index SELECT number % 5, number, 'v' || toString(number * number) FROM numbers(10, 10); + +SELECT * FROM t_merge_tree_index ORDER BY _part, a, b; +SELECT * FROM mergeTreeIndex(currentDatabase(), t_merge_tree_index) ORDER BY part_name, mark_number FORMAT PrettyCompactNoEscapesMonoBlock; +SELECT * FROM mergeTreeIndex(currentDatabase(), t_merge_tree_index, with_marks = true) ORDER BY part_name, mark_number FORMAT PrettyCompactNoEscapesMonoBlock; + +DROP TABLE t_merge_tree_index; + +CREATE TABLE t_merge_tree_index (a UInt64, b UInt64, s String) +ENGINE = MergeTree ORDER BY (a, b) +SETTINGS + index_granularity = 3, + min_bytes_for_wide_part = '1G', + ratio_of_defaults_for_sparse_serialization = 1.0; + +SYSTEM STOP MERGES t_merge_tree_index; + +INSERT INTO t_merge_tree_index SELECT number % 4, number, 'v' || toString(number * number) FROM numbers(10); +INSERT INTO t_merge_tree_index SELECT number % 4, number, 'v' || toString(number * number) FROM numbers(10, 10); + +SELECT * FROM t_merge_tree_index ORDER BY _part, a, b; +SELECT * FROM mergeTreeIndex(currentDatabase(), t_merge_tree_index) ORDER BY part_name, mark_number FORMAT PrettyCompactNoEscapesMonoBlock; +SELECT * FROM mergeTreeIndex(currentDatabase(), t_merge_tree_index, with_marks = true) ORDER BY part_name, mark_number FORMAT PrettyCompactNoEscapesMonoBlock; + +DROP TABLE t_merge_tree_index; diff --git a/tests/queries/0_stateless/02947_merge_tree_index_table_2.reference b/tests/queries/0_stateless/02947_merge_tree_index_table_2.reference new file mode 100644 index 00000000000..7bfcb7b2822 --- /dev/null +++ b/tests/queries/0_stateless/02947_merge_tree_index_table_2.reference @@ -0,0 +1,51 @@ +┌─part_name─┬─mark_number─┬─rows_in_granule─┬─a─┬─b─┬─modulo(sipHash64(sp), 100)─┠+│ all_1_1_0 │ 0 │ 3 │ 0 │ 0 │ 19 │ +│ all_1_1_0 │ 1 │ 3 │ 1 │ 6 │ 19 │ +│ all_1_1_0 │ 2 │ 3 │ 3 │ 3 │ 19 │ +│ all_1_1_0 │ 3 │ 1 │ 4 │ 9 │ 19 │ +│ all_1_1_0 │ 4 │ 0 │ 4 │ 9 │ 19 │ +│ all_2_2_0 │ 0 │ 3 │ 0 │ 0 │ 96 │ +│ all_2_2_0 │ 1 │ 2 │ 3 │ 3 │ 96 │ +│ all_2_2_0 │ 2 │ 0 │ 4 │ 4 │ 96 │ +│ all_3_3_0 │ 0 │ 3 │ 0 │ 0 │ 96 │ +│ all_3_3_0 │ 1 │ 3 │ 1 │ 6 │ 96 │ +│ all_3_3_0 │ 2 │ 3 │ 3 │ 3 │ 96 │ +│ all_3_3_0 │ 3 │ 1 │ 4 │ 9 │ 96 │ +│ all_3_3_0 │ 4 │ 0 │ 4 │ 9 │ 96 │ +└───────────┴─────────────┴─────────────────┴───┴───┴────────────────────────────┘ +┌─part_name─┬─mark_number─┬─rows_in_granule─┬─a─┬─b─┬─modulo(sipHash64(sp), 100)─┬─a.mark──┬─b.mark──┬─c.mark──────┬─sp.sparse.idx.mark─┬─sp.mark─┬─arr.size0.mark─┬─arr.dict.mark─┬─arr.mark─┬─n.size0.mark─┬─n%2Ec1.mark─┬─n%2Ec2.mark─┬─t%2Ec2.mark─┬─t%2Ec1.mark─┬─t.mark──────┬─column%2Ewith%2Edots.mark─┠+│ all_1_1_0 │ 0 │ 3 │ 0 │ 0 │ 19 │ (0,0) │ (0,0) │ (NULL,NULL) │ (0,0) │ (0,0) │ (0,0) │ (0,8) │ (0,0) │ (0,0) │ (0,0) │ (0,0) │ (0,0) │ (0,0) │ (NULL,NULL) │ (0,0) │ +│ all_1_1_0 │ 1 │ 3 │ 1 │ 6 │ 19 │ (0,24) │ (0,24) │ (NULL,NULL) │ (0,9) │ (0,0) │ (0,24) │ (0,8) │ (0,22) │ (0,24) │ (0,36) │ (0,72) │ (0,24) │ (0,24) │ (NULL,NULL) │ (0,24) │ +│ all_1_1_0 │ 2 │ 3 │ 3 │ 3 │ 19 │ (0,48) │ (0,48) │ (NULL,NULL) │ (0,18) │ (0,0) │ (0,48) │ (0,8) │ (0,44) │ (0,48) │ (0,72) │ (0,144) │ (0,48) │ (0,48) │ (NULL,NULL) │ (0,48) │ +│ all_1_1_0 │ 3 │ 1 │ 4 │ 9 │ 19 │ (0,72) │ (0,72) │ (NULL,NULL) │ (0,27) │ (0,0) │ (0,72) │ (0,8) │ (0,66) │ (0,72) │ (0,108) │ (0,216) │ (0,72) │ (0,72) │ (NULL,NULL) │ (0,72) │ +│ all_1_1_0 │ 4 │ 0 │ 4 │ 9 │ 19 │ (0,80) │ (0,80) │ (NULL,NULL) │ (0,36) │ (0,0) │ (0,80) │ (0,25) │ (0,84) │ (0,80) │ (0,120) │ (0,240) │ (0,80) │ (0,80) │ (NULL,NULL) │ (0,80) │ +│ all_2_2_0 │ 0 │ 3 │ 0 │ 0 │ 96 │ (0,0) │ (42,0) │ (84,0) │ (NULL,NULL) │ (126,0) │ (NULL,NULL) │ (NULL,NULL) │ (165,0) │ (NULL,NULL) │ (232,0) │ (286,0) │ (NULL,NULL) │ (NULL,NULL) │ (342,0) │ (391,0) │ +│ all_2_2_0 │ 1 │ 2 │ 3 │ 3 │ 96 │ (433,0) │ (472,0) │ (511,0) │ (NULL,NULL) │ (550,0) │ (NULL,NULL) │ (NULL,NULL) │ (589,0) │ (NULL,NULL) │ (659,0) │ (717,0) │ (NULL,NULL) │ (NULL,NULL) │ (773,0) │ (817,0) │ +│ all_2_2_0 │ 2 │ 0 │ 4 │ 4 │ 96 │ (856,0) │ (856,0) │ (856,0) │ (NULL,NULL) │ (856,0) │ (NULL,NULL) │ (NULL,NULL) │ (856,0) │ (NULL,NULL) │ (856,0) │ (856,0) │ (NULL,NULL) │ (NULL,NULL) │ (856,0) │ (856,0) │ +│ all_3_3_0 │ 0 │ 3 │ 0 │ 0 │ 96 │ (0,0) │ (0,0) │ (0,0) │ (NULL,NULL) │ (0,0) │ (0,0) │ (0,8) │ (0,0) │ (0,0) │ (0,0) │ (0,0) │ (0,0) │ (0,0) │ (NULL,NULL) │ (0,0) │ +│ all_3_3_0 │ 1 │ 3 │ 1 │ 6 │ 96 │ (0,24) │ (0,24) │ (0,24) │ (NULL,NULL) │ (0,24) │ (0,24) │ (0,8) │ (0,22) │ (0,24) │ (0,36) │ (0,72) │ (0,24) │ (0,24) │ (NULL,NULL) │ (0,24) │ +│ all_3_3_0 │ 2 │ 3 │ 3 │ 3 │ 96 │ (0,48) │ (0,48) │ (0,48) │ (NULL,NULL) │ (0,48) │ (0,48) │ (0,8) │ (0,44) │ (0,48) │ (0,72) │ (0,144) │ (0,48) │ (0,48) │ (NULL,NULL) │ (0,48) │ +│ all_3_3_0 │ 3 │ 1 │ 4 │ 9 │ 96 │ (0,72) │ (0,72) │ (0,72) │ (NULL,NULL) │ (0,72) │ (0,72) │ (0,8) │ (0,66) │ (0,72) │ (0,108) │ (0,216) │ (0,72) │ (0,72) │ (NULL,NULL) │ (0,72) │ +│ all_3_3_0 │ 4 │ 0 │ 4 │ 9 │ 96 │ (0,80) │ (0,80) │ (0,80) │ (NULL,NULL) │ (0,80) │ (0,80) │ (0,25) │ (0,84) │ (0,80) │ (0,120) │ (0,240) │ (0,80) │ (0,80) │ (NULL,NULL) │ (0,80) │ +└───────────┴─────────────┴─────────────────┴───┴───┴────────────────────────────┴─────────┴─────────┴─────────────┴────────────────────┴─────────┴────────────────┴───────────────┴──────────┴──────────────┴─────────────┴─────────────┴─────────────┴─────────────┴─────────────┴───────────────────────────┘ +part_name String +mark_number UInt64 +rows_in_granule UInt64 +a UInt64 +b UInt64 +modulo(sipHash64(sp), 100) UInt8 +a.mark Tuple(offset_in_compressed_file Nullable(UInt64), offset_in_decompressed_block Nullable(UInt64)) +b.mark Tuple(offset_in_compressed_file Nullable(UInt64), offset_in_decompressed_block Nullable(UInt64)) +c.mark Tuple(offset_in_compressed_file Nullable(UInt64), offset_in_decompressed_block Nullable(UInt64)) +sp.sparse.idx.mark Tuple(offset_in_compressed_file Nullable(UInt64), offset_in_decompressed_block Nullable(UInt64)) +sp.mark Tuple(offset_in_compressed_file Nullable(UInt64), offset_in_decompressed_block Nullable(UInt64)) +arr.size0.mark Tuple(offset_in_compressed_file Nullable(UInt64), offset_in_decompressed_block Nullable(UInt64)) +arr.dict.mark Tuple(offset_in_compressed_file Nullable(UInt64), offset_in_decompressed_block Nullable(UInt64)) +arr.mark Tuple(offset_in_compressed_file Nullable(UInt64), offset_in_decompressed_block Nullable(UInt64)) +n.size0.mark Tuple(offset_in_compressed_file Nullable(UInt64), offset_in_decompressed_block Nullable(UInt64)) +n%2Ec1.mark Tuple(offset_in_compressed_file Nullable(UInt64), offset_in_decompressed_block Nullable(UInt64)) +n%2Ec2.mark Tuple(offset_in_compressed_file Nullable(UInt64), offset_in_decompressed_block Nullable(UInt64)) +t%2Ec2.mark Tuple(offset_in_compressed_file Nullable(UInt64), offset_in_decompressed_block Nullable(UInt64)) +t%2Ec1.mark Tuple(offset_in_compressed_file Nullable(UInt64), offset_in_decompressed_block Nullable(UInt64)) +t.mark Tuple(offset_in_compressed_file Nullable(UInt64), offset_in_decompressed_block Nullable(UInt64)) +column%2Ewith%2Edots.mark Tuple(offset_in_compressed_file Nullable(UInt64), offset_in_decompressed_block Nullable(UInt64)) diff --git a/tests/queries/0_stateless/02947_merge_tree_index_table_2.sql b/tests/queries/0_stateless/02947_merge_tree_index_table_2.sql new file mode 100644 index 00000000000..5520962fb7a --- /dev/null +++ b/tests/queries/0_stateless/02947_merge_tree_index_table_2.sql @@ -0,0 +1,38 @@ +DROP TABLE IF EXISTS t_merge_tree_index; + +SET print_pretty_type_names = 0; + +CREATE TABLE t_merge_tree_index +( + `a` UInt64, + `b` UInt64, + `sp` UInt64, + `arr` Array(LowCardinality(String)), + `n` Nested(c1 String, c2 UInt64), + `t` Tuple(c1 UInt64, c2 UInt64), + `column.with.dots` UInt64 +) +ENGINE = MergeTree +ORDER BY (a, b, sipHash64(sp) % 100) +SETTINGS + index_granularity = 3, + min_bytes_for_wide_part = 0, + min_rows_for_wide_part = 6, + ratio_of_defaults_for_sparse_serialization = 0.9; + +SYSTEM STOP MERGES t_merge_tree_index; + +INSERT INTO t_merge_tree_index SELECT number % 5, number, 0, ['foo', 'bar'], ['aaa', 'bbb', 'ccc'], [11, 22, 33], (number, number), number FROM numbers(10); + +ALTER TABLE t_merge_tree_index ADD COLUMN c UInt64 AFTER b; + +INSERT INTO t_merge_tree_index SELECT number % 5, number, number, 10, ['foo', 'bar'], ['aaa', 'bbb', 'ccc'], [11, 22, 33], (number, number), number FROM numbers(5); +INSERT INTO t_merge_tree_index SELECT number % 5, number, number, 10, ['foo', 'bar'], ['aaa', 'bbb', 'ccc'], [11, 22, 33], (number, number), number FROM numbers(10); + +SELECT * FROM mergeTreeIndex(currentDatabase(), t_merge_tree_index) ORDER BY part_name, mark_number FORMAT PrettyCompactNoEscapesMonoBlock; +SELECT * FROM mergeTreeIndex(currentDatabase(), t_merge_tree_index, with_marks = true) ORDER BY part_name, mark_number FORMAT PrettyCompactNoEscapesMonoBlock; + +SET describe_compact_output = 1; +DESCRIBE mergeTreeIndex(currentDatabase(), t_merge_tree_index, with_marks = true); + +DROP TABLE t_merge_tree_index; diff --git a/tests/queries/0_stateless/02947_merge_tree_index_table_3.reference b/tests/queries/0_stateless/02947_merge_tree_index_table_3.reference new file mode 100644 index 00000000000..7f508505ab8 --- /dev/null +++ b/tests/queries/0_stateless/02947_merge_tree_index_table_3.reference @@ -0,0 +1,10 @@ +ACCESS_DENIED +ACCESS_DENIED +ACCESS_DENIED +OK +ACCESS_DENIED +ACCESS_DENIED +ACCESS_DENIED +ACCESS_DENIED +OK +OK diff --git a/tests/queries/0_stateless/02947_merge_tree_index_table_3.sh b/tests/queries/0_stateless/02947_merge_tree_index_table_3.sh new file mode 100755 index 00000000000..6cb184cb1fe --- /dev/null +++ b/tests/queries/0_stateless/02947_merge_tree_index_table_3.sh @@ -0,0 +1,50 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +user_name="${CLICKHOUSE_DATABASE}_test_user_02947" + +$CLICKHOUSE_CLIENT -n -q " +DROP TABLE IF EXISTS t_merge_tree_index; +DROP USER IF EXISTS $user_name; + +CREATE TABLE t_merge_tree_index +( + a UInt64, + b UInt64, + arr Array(LowCardinality(String)), +) +ENGINE = MergeTree +ORDER BY (a, b) +SETTINGS + index_granularity = 3, + min_bytes_for_wide_part = 0, + min_rows_for_wide_part = 6, + ratio_of_defaults_for_sparse_serialization = 0.9; + +INSERT INTO t_merge_tree_index (a) VALUES (1); + +CREATE USER $user_name IDENTIFIED WITH plaintext_password BY 'password'; +REVOKE SELECT ON $CLICKHOUSE_DATABASE.t_merge_tree_index FROM $user_name; +GRANT SELECT (b) ON $CLICKHOUSE_DATABASE.t_merge_tree_index TO $user_name; +" + +$CLICKHOUSE_CLIENT --user "$user_name" --password "password" -q "SELECT a FROM t_merge_tree_index" 2>&1 | grep -m1 -o "ACCESS_DENIED" || echo "OK" +$CLICKHOUSE_CLIENT --user "$user_name" --password "password" -q "SELECT arr FROM t_merge_tree_index" 2>&1 | grep -m1 -o "ACCESS_DENIED" || echo "OK" +$CLICKHOUSE_CLIENT --user "$user_name" --password "password" -q "SELECT arr.size0 FROM t_merge_tree_index" 2>&1 | grep -m1 -o "ACCESS_DENIED" || echo "OK" +$CLICKHOUSE_CLIENT --user "$user_name" --password "password" -q "SELECT b FROM t_merge_tree_index" 2>&1 | grep -m1 -o "ACCESS_DENIED" || echo "OK" + +$CLICKHOUSE_CLIENT --user "$user_name" --password "password" -q "SELECT a FROM mergeTreeIndex(currentDatabase(), t_merge_tree_index, with_marks = true)" 2>&1 | grep -m1 -o "ACCESS_DENIED" || echo "OK" +$CLICKHOUSE_CLIENT --user "$user_name" --password "password" -q "SELECT a.mark FROM mergeTreeIndex(currentDatabase(), t_merge_tree_index, with_marks = true)" 2>&1 | grep -m1 -o "ACCESS_DENIED" || echo "OK" +$CLICKHOUSE_CLIENT --user "$user_name" --password "password" -q "SELECT arr.mark FROM mergeTreeIndex(currentDatabase(), t_merge_tree_index, with_marks = true)" 2>&1 | grep -m1 -o "ACCESS_DENIED" || echo "OK" +$CLICKHOUSE_CLIENT --user "$user_name" --password "password" -q "SELECT arr.size0.mark FROM mergeTreeIndex(currentDatabase(), t_merge_tree_index, with_marks = true)" 2>&1 | grep -m1 -o "ACCESS_DENIED" || echo "OK" + +$CLICKHOUSE_CLIENT --user "$user_name" --password "password" -q "SELECT b FROM mergeTreeIndex(currentDatabase(), t_merge_tree_index, with_marks = true)" 2>&1 | grep -m1 -o "ACCESS_DENIED" || echo "OK" +$CLICKHOUSE_CLIENT --user "$user_name" --password "password" -q "SELECT b.mark FROM mergeTreeIndex(currentDatabase(), t_merge_tree_index, with_marks = true)" 2>&1 | grep -m1 -o "ACCESS_DENIED" || echo "OK" + +$CLICKHOUSE_CLIENT -n -q " +DROP TABLE IF EXISTS t_merge_tree_index; +DROP USER IF EXISTS $user_name; +" diff --git a/tests/queries/0_stateless/02949_parallel_replicas_in_subquery.reference b/tests/queries/0_stateless/02949_parallel_replicas_in_subquery.reference index 4d33751c699..8ae3cdf8f3a 100644 --- a/tests/queries/0_stateless/02949_parallel_replicas_in_subquery.reference +++ b/tests/queries/0_stateless/02949_parallel_replicas_in_subquery.reference @@ -2,7 +2,12 @@ 2 test2 8 3 test3 8 4 test4 1985 +2 test2 8 +3 test3 8 +4 test4 1985 --- 1 test1 42 +1 test1 42 --- 3 test3 +3 test3 diff --git a/tests/queries/0_stateless/02949_parallel_replicas_in_subquery.sql b/tests/queries/0_stateless/02949_parallel_replicas_in_subquery.sql index 53b8a761cda..ab6e1532299 100644 --- a/tests/queries/0_stateless/02949_parallel_replicas_in_subquery.sql +++ b/tests/queries/0_stateless/02949_parallel_replicas_in_subquery.sql @@ -8,19 +8,23 @@ INSERT INTO merge_tree_in_subqueries VALUES(5, 'test5', 0); SET max_parallel_replicas=3, cluster_for_parallel_replicas='test_cluster_one_shard_three_replicas_localhost', parallel_replicas_for_non_replicated_merge_tree=1; -SELECT * FROM merge_tree_in_subqueries WHERE id IN (SELECT * FROM system.numbers LIMIT 0) SETTINGS allow_experimental_parallel_reading_from_replicas=2; -- { serverError SUPPORT_IS_DISABLED } +SELECT * FROM merge_tree_in_subqueries WHERE id IN (SELECT * FROM system.numbers LIMIT 0) SETTINGS allow_experimental_parallel_reading_from_replicas=2, parallel_replicas_allow_in_with_subquery=0; -- { serverError SUPPORT_IS_DISABLED } +SELECT * FROM merge_tree_in_subqueries WHERE id IN (SELECT * FROM system.numbers LIMIT 0) SETTINGS allow_experimental_parallel_reading_from_replicas=2, parallel_replicas_allow_in_with_subquery=1; SELECT * FROM merge_tree_in_subqueries WHERE id IN (SELECT * FROM system.numbers LIMIT 0) SETTINGS allow_experimental_parallel_reading_from_replicas=1; SELECT '---'; -SELECT * FROM merge_tree_in_subqueries WHERE id IN (SELECT * FROM system.numbers LIMIT 2, 3) ORDER BY id SETTINGS allow_experimental_parallel_reading_from_replicas=2; -- { serverError SUPPORT_IS_DISABLED }; +SELECT * FROM merge_tree_in_subqueries WHERE id IN (SELECT * FROM system.numbers LIMIT 2, 3) ORDER BY id SETTINGS allow_experimental_parallel_reading_from_replicas=2, parallel_replicas_allow_in_with_subquery=0; -- { serverError SUPPORT_IS_DISABLED }; +SELECT * FROM merge_tree_in_subqueries WHERE id IN (SELECT * FROM system.numbers LIMIT 2, 3) ORDER BY id SETTINGS allow_experimental_parallel_reading_from_replicas=2, parallel_replicas_allow_in_with_subquery=1; SELECT * FROM merge_tree_in_subqueries WHERE id IN (SELECT * FROM system.numbers LIMIT 2, 3) ORDER BY id SETTINGS allow_experimental_parallel_reading_from_replicas=1; SELECT '---'; -SELECT * FROM merge_tree_in_subqueries WHERE id IN (SELECT 1) ORDER BY id SETTINGS allow_experimental_parallel_reading_from_replicas=2; -- { serverError SUPPORT_IS_DISABLED }; +SELECT * FROM merge_tree_in_subqueries WHERE id IN (SELECT 1) ORDER BY id SETTINGS allow_experimental_parallel_reading_from_replicas=2, parallel_replicas_allow_in_with_subquery=0; -- { serverError SUPPORT_IS_DISABLED }; +SELECT * FROM merge_tree_in_subqueries WHERE id IN (SELECT 1) ORDER BY id SETTINGS allow_experimental_parallel_reading_from_replicas=2, parallel_replicas_allow_in_with_subquery=1; SELECT * FROM merge_tree_in_subqueries WHERE id IN (SELECT 1) ORDER BY id SETTINGS allow_experimental_parallel_reading_from_replicas=1; -- IN with tuples is allowed SELECT '---'; -SELECT id, name FROM merge_tree_in_subqueries WHERE (id, name) IN (3, 'test3') SETTINGS allow_experimental_parallel_reading_from_replicas=2; +SELECT id, name FROM merge_tree_in_subqueries WHERE (id, name) IN (3, 'test3') SETTINGS allow_experimental_parallel_reading_from_replicas=2, parallel_replicas_allow_in_with_subquery=0; +SELECT id, name FROM merge_tree_in_subqueries WHERE (id, name) IN (3, 'test3') SETTINGS allow_experimental_parallel_reading_from_replicas=2, parallel_replicas_allow_in_with_subquery=1; DROP TABLE IF EXISTS merge_tree_in_subqueries; diff --git a/tests/queries/0_stateless/02949_ttl_group_by_bug.sql b/tests/queries/0_stateless/02949_ttl_group_by_bug.sql index 2888f6e7d66..a3d0794c897 100644 --- a/tests/queries/0_stateless/02949_ttl_group_by_bug.sql +++ b/tests/queries/0_stateless/02949_ttl_group_by_bug.sql @@ -2,7 +2,7 @@ DROP TABLE IF EXISTS ttl_group_by_bug; CREATE TABLE ttl_group_by_bug (key UInt32, ts DateTime, value UInt32, min_value UInt32 default value, max_value UInt32 default value) -ENGINE = MergeTree() PARTITION BY toYYYYMM(ts) +ENGINE = MergeTree() ORDER BY (key, toStartOfInterval(ts, toIntervalMinute(3)), ts) TTL ts + INTERVAL 5 MINUTE GROUP BY key, toStartOfInterval(ts, toIntervalMinute(3)) SET value = sum(value), min_value = min(min_value), max_value = max(max_value), ts=min(toStartOfInterval(ts, toIntervalMinute(3))); diff --git a/tests/queries/0_stateless/02950_dictionary_short_circuit.reference b/tests/queries/0_stateless/02950_dictionary_short_circuit.reference new file mode 100644 index 00000000000..69ac2e5197b --- /dev/null +++ b/tests/queries/0_stateless/02950_dictionary_short_circuit.reference @@ -0,0 +1,54 @@ +Flat dictionary +('zero','zero') +('zero','zero') +\N +\N +1 +\N +Hashed dictionary +('zero','zero') +('zero','zero') +\N +\N +1 +\N +\N +\N +Hashed array dictionary +('zero','zero') +('zero','zero') +\N +\N +1 +\N +Range hashed dictionary +\N +\N +Cache dictionary +('zero','zero') +('zero','zero') +\N +\N +1 +\N +Direct dictionary +('zero','zero') +('zero','zero') +\N +\N +1 +\N +IP TRIE dictionary +NP +NP +NP +(13238,'RU') +(13238,'RU') +(13238,'RU') +POLYGON dictionary +(0.5,0) East +(-0.5,0) West +(10,10) 0 +Regular Expression Tree dictionary +Android +Android diff --git a/tests/queries/0_stateless/02950_dictionary_short_circuit.sql b/tests/queries/0_stateless/02950_dictionary_short_circuit.sql new file mode 100644 index 00000000000..f4575bcd115 --- /dev/null +++ b/tests/queries/0_stateless/02950_dictionary_short_circuit.sql @@ -0,0 +1,264 @@ +-- Tags: no-parallel + +DROP TABLE IF EXISTS dictionary_source_table; +CREATE TABLE dictionary_source_table +( + id UInt64, + v1 String, + v2 Nullable(String), + v3 Nullable(UInt64) +) ENGINE=TinyLog; + +INSERT INTO dictionary_source_table VALUES (0, 'zero', 'zero', 0), (1, 'one', NULL, 1); + +DROP DICTIONARY IF EXISTS flat_dictionary; +CREATE DICTIONARY flat_dictionary +( + id UInt64, + v1 String, + v2 Nullable(String) DEFAULT NULL, + v3 Nullable(UInt64) +) +PRIMARY KEY id +SOURCE(CLICKHOUSE(TABLE 'dictionary_source_table')) +LIFETIME(MIN 0 MAX 0) +LAYOUT(FLAT()); + +SELECT 'Flat dictionary'; +SELECT dictGetOrDefault('flat_dictionary', ('v1', 'v2'), 0, (intDiv(1, id), intDiv(1, id))) +FROM dictionary_source_table; +SELECT dictGetOrDefault('flat_dictionary', 'v2', id+1, intDiv(NULL, id)) +FROM dictionary_source_table; +SELECT dictGetOrDefault('flat_dictionary', 'v3', id+1, intDiv(NULL, id)) +FROM dictionary_source_table; +DROP DICTIONARY flat_dictionary; + + +DROP DICTIONARY IF EXISTS hashed_dictionary; +CREATE DICTIONARY hashed_dictionary +( + id UInt64, + v1 String, + v2 Nullable(String) DEFAULT NULL, + v3 Nullable(UInt64) +) +PRIMARY KEY id +SOURCE(CLICKHOUSE(TABLE 'dictionary_source_table')) +LIFETIME(MIN 0 MAX 0) +LAYOUT(HASHED()); + +SELECT 'Hashed dictionary'; +SELECT dictGetOrDefault('hashed_dictionary', ('v1', 'v2'), 0, (intDiv(1, id), intDiv(1, id))) +FROM dictionary_source_table; +SELECT dictGetOrDefault('hashed_dictionary', 'v2', id+1, intDiv(NULL, id)) +FROM dictionary_source_table; +SELECT dictGetOrDefault('hashed_dictionary', 'v3', id+1, intDiv(NULL, id)) +FROM dictionary_source_table; +SELECT dictGetOrDefault('hashed_dictionary', 'v2', 1, intDiv(1, id)) +FROM dictionary_source_table; +DROP DICTIONARY hashed_dictionary; + + +DROP DICTIONARY IF EXISTS hashed_array_dictionary; +CREATE DICTIONARY hashed_array_dictionary +( + id UInt64, + v1 String, + v2 Nullable(String) DEFAULT NULL, + v3 Nullable(UInt64) +) +PRIMARY KEY id +SOURCE(CLICKHOUSE(TABLE 'dictionary_source_table')) +LIFETIME(MIN 0 MAX 0) +LAYOUT(HASHED_ARRAY()); + +SELECT 'Hashed array dictionary'; +SELECT dictGetOrDefault('hashed_array_dictionary', ('v1', 'v2'), 0, (intDiv(1, id), intDiv(1, id))) +FROM dictionary_source_table; +SELECT dictGetOrDefault('hashed_array_dictionary', 'v2', id+1, intDiv(NULL, id)) +FROM dictionary_source_table; +SELECT dictGetOrDefault('hashed_array_dictionary', 'v3', id+1, intDiv(NULL, id)) +FROM dictionary_source_table; +DROP DICTIONARY hashed_array_dictionary; + + +DROP TABLE IF EXISTS range_dictionary_source_table; +CREATE TABLE range_dictionary_source_table +( + id UInt64, + start Date, + end Nullable(Date), + val Nullable(UInt64) +) ENGINE=TinyLog; + +INSERT INTO range_dictionary_source_table VALUES (0, '2023-01-01', Null, Null), (1, '2022-11-09', '2022-12-08', 1); + +DROP DICTIONARY IF EXISTS range_hashed_dictionary; +CREATE DICTIONARY range_hashed_dictionary +( + id UInt64, + start Date, + end Nullable(Date), + val Nullable(UInt64) +) +PRIMARY KEY id +SOURCE(CLICKHOUSE(TABLE 'range_dictionary_source_table')) +LIFETIME(MIN 0 MAX 0) +LAYOUT(RANGE_HASHED()) +RANGE(MIN start MAX end); + +SELECT 'Range hashed dictionary'; +SELECT dictGetOrDefault('range_hashed_dictionary', 'val', id, toDate('2023-01-02'), intDiv(NULL, id)) +FROM range_dictionary_source_table; +DROP DICTIONARY range_hashed_dictionary; +DROP TABLE range_dictionary_source_table; + + +DROP DICTIONARY IF EXISTS cache_dictionary; +CREATE DICTIONARY cache_dictionary +( + id UInt64, + v1 String, + v2 Nullable(String) DEFAULT NULL, + v3 Nullable(UInt64) +) +PRIMARY KEY id +SOURCE(CLICKHOUSE(TABLE 'dictionary_source_table')) +LIFETIME(MIN 0 MAX 0) +LAYOUT(CACHE(SIZE_IN_CELLS 10)); + +SELECT 'Cache dictionary'; +SELECT dictGetOrDefault('cache_dictionary', ('v1', 'v2'), 0, (intDiv(1, id), intDiv(1, id))) +FROM dictionary_source_table; +SELECT dictGetOrDefault('cache_dictionary', 'v2', id+1, intDiv(NULL, id)) +FROM dictionary_source_table; +SELECT dictGetOrDefault('cache_dictionary', 'v3', id+1, intDiv(NULL, id)) +FROM dictionary_source_table; +DROP DICTIONARY cache_dictionary; + + +DROP DICTIONARY IF EXISTS direct_dictionary; +CREATE DICTIONARY direct_dictionary +( + id UInt64, + v1 String, + v2 Nullable(String) DEFAULT NULL, + v3 Nullable(UInt64) +) +PRIMARY KEY id +SOURCE(CLICKHOUSE(TABLE 'dictionary_source_table')) +LAYOUT(DIRECT()); + +SELECT 'Direct dictionary'; +SELECT dictGetOrDefault('direct_dictionary', ('v1', 'v2'), 0, (intDiv(1, id), intDiv(1, id))) +FROM dictionary_source_table; +SELECT dictGetOrDefault('direct_dictionary', 'v2', id+1, intDiv(NULL, id)) +FROM dictionary_source_table; +SELECT dictGetOrDefault('direct_dictionary', 'v3', id+1, intDiv(NULL, id)) +FROM dictionary_source_table; +DROP DICTIONARY direct_dictionary; + + +DROP TABLE dictionary_source_table; + + +DROP TABLE IF EXISTS ip_dictionary_source_table; +CREATE TABLE ip_dictionary_source_table +( + id UInt64, + prefix String, + asn UInt32, + cca2 String +) ENGINE=TinyLog; + +INSERT INTO ip_dictionary_source_table VALUES (0, '202.79.32.0/20', 17501, 'NP'), (1, '2620:0:870::/48', 3856, 'US'), (2, '2a02:6b8:1::/48', 13238, 'RU'); + +DROP DICTIONARY IF EXISTS ip_dictionary; +CREATE DICTIONARY ip_dictionary +( + id UInt64, + prefix String, + asn UInt32, + cca2 String +) +PRIMARY KEY prefix +SOURCE(CLICKHOUSE(TABLE 'ip_dictionary_source_table')) +LAYOUT(IP_TRIE) +LIFETIME(3600); + +SELECT 'IP TRIE dictionary'; +SELECT dictGetOrDefault('ip_dictionary', 'cca2', toIPv4('202.79.32.10'), intDiv(0, id)) +FROM ip_dictionary_source_table; +SELECT dictGetOrDefault('ip_dictionary', ('asn', 'cca2'), IPv6StringToNum('2a02:6b8:1::1'), +(intDiv(1, id), intDiv(1, id))) FROM ip_dictionary_source_table; +DROP DICTIONARY ip_dictionary; + + +DROP TABLE IF EXISTS polygon_dictionary_source_table; +CREATE TABLE polygon_dictionary_source_table +( + key Array(Array(Array(Tuple(Float64, Float64)))), + name Nullable(String) +) ENGINE=TinyLog; + +INSERT INTO polygon_dictionary_source_table VALUES([[[(3, 1), (0, 1), (0, -1), (3, -1)]]], 'East'), ([[[(-3, 1), (-3, -1), (0, -1), (0, 1)]]], 'West'); + +DROP DICTIONARY IF EXISTS polygon_dictionary; +CREATE DICTIONARY polygon_dictionary +( + key Array(Array(Array(Tuple(Float64, Float64)))), + name Nullable(String) +) +PRIMARY KEY key +SOURCE(CLICKHOUSE(TABLE 'polygon_dictionary_source_table')) +LIFETIME(0) +LAYOUT(POLYGON()); + +DROP TABLE IF EXISTS points; +CREATE TABLE points (x Float64, y Float64) ENGINE=TinyLog; +INSERT INTO points VALUES (0.5, 0), (-0.5, 0), (10,10); + +SELECT 'POLYGON dictionary'; +SELECT tuple(x, y) as key, dictGetOrDefault('polygon_dictionary', 'name', key, intDiv(1, y)) +FROM points; + +DROP TABLE points; +DROP DICTIONARY polygon_dictionary; +DROP TABLE polygon_dictionary_source_table; + + +DROP TABLE IF EXISTS regexp_dictionary_source_table; +CREATE TABLE regexp_dictionary_source_table +( + id UInt64, + parent_id UInt64, + regexp String, + keys Array(String), + values Array(String), +) ENGINE=TinyLog; + +INSERT INTO regexp_dictionary_source_table VALUES (1, 0, 'Linux/(\d+[\.\d]*).+tlinux', ['name', 'version'], ['TencentOS', '\1']) +INSERT INTO regexp_dictionary_source_table VALUES (2, 0, '(\d+)/tclwebkit(\d+[\.\d]*)', ['name', 'version', 'comment'], ['Android', '$1', 'test $1 and $2']) +INSERT INTO regexp_dictionary_source_table VALUES (3, 2, '33/tclwebkit', ['version'], ['13']) +INSERT INTO regexp_dictionary_source_table VALUES (4, 2, '3[12]/tclwebkit', ['version'], ['12']) +INSERT INTO regexp_dictionary_source_table VALUES (5, 2, '3[12]/tclwebkit', ['version'], ['11']) +INSERT INTO regexp_dictionary_source_table VALUES (6, 2, '3[12]/tclwebkit', ['version'], ['10']) + +DROP DICTIONARY IF EXISTS regexp_dict; +create dictionary regexp_dict +( + regexp String, + name String, + version Nullable(UInt64), + comment String default 'nothing' +) +PRIMARY KEY(regexp) +SOURCE(CLICKHOUSE(TABLE 'regexp_dictionary_source_table')) +LIFETIME(0) +LAYOUT(regexp_tree); + +SELECT 'Regular Expression Tree dictionary'; +SELECT dictGetOrDefault('regexp_dict', 'name', concat(toString(number), '/tclwebkit', toString(number)), +intDiv(1,number)) FROM numbers(2); +DROP DICTIONARY regexp_dict; +DROP TABLE regexp_dictionary_source_table; diff --git a/tests/queries/0_stateless/02950_dictionary_ssd_cache_short_circuit.reference b/tests/queries/0_stateless/02950_dictionary_ssd_cache_short_circuit.reference new file mode 100644 index 00000000000..246d6f59274 --- /dev/null +++ b/tests/queries/0_stateless/02950_dictionary_ssd_cache_short_circuit.reference @@ -0,0 +1,6 @@ +('zero','zero') +('zero','zero') +\N +\N +1 +\N diff --git a/tests/queries/0_stateless/02950_dictionary_ssd_cache_short_circuit.sh b/tests/queries/0_stateless/02950_dictionary_ssd_cache_short_circuit.sh new file mode 100755 index 00000000000..a02bdd0a1d2 --- /dev/null +++ b/tests/queries/0_stateless/02950_dictionary_ssd_cache_short_circuit.sh @@ -0,0 +1,43 @@ +#!/usr/bin/env bash +# Tags: no-parallel + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') + +$CLICKHOUSE_CLIENT -n --query=" + DROP DATABASE IF EXISTS 02950_database_for_ssd_cache_dictionary; + CREATE DATABASE 02950_database_for_ssd_cache_dictionary; + + CREATE TABLE 02950_database_for_ssd_cache_dictionary.source_table + ( + id UInt64, + v1 String, + v2 Nullable(String), + v3 Nullable(UInt64) + ) + ENGINE = TinyLog; + + INSERT INTO 02950_database_for_ssd_cache_dictionary.source_table VALUES (0, 'zero', 'zero', 0), (1, 'one', NULL, 1); + + CREATE DICTIONARY 02950_database_for_ssd_cache_dictionary.ssd_cache_dictionary + ( + id UInt64, + v1 String, + v2 Nullable(String) DEFAULT NULL, + v3 Nullable(UInt64) + ) + PRIMARY KEY id + SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'source_table')) + LIFETIME(MIN 1 MAX 1000) + LAYOUT(SSD_CACHE(BLOCK_SIZE 4096 FILE_SIZE 8192 PATH '$USER_FILES_PATH/0d')); + + SELECT dictGetOrDefault('02950_database_for_ssd_cache_dictionary.ssd_cache_dictionary', ('v1', 'v2'), 0, (intDiv(1, id), intDiv(1, id))) FROM 02950_database_for_ssd_cache_dictionary.source_table; + SELECT dictGetOrDefault('02950_database_for_ssd_cache_dictionary.ssd_cache_dictionary', 'v2', id+1, intDiv(NULL, id)) FROM 02950_database_for_ssd_cache_dictionary.source_table; + SELECT dictGetOrDefault('02950_database_for_ssd_cache_dictionary.ssd_cache_dictionary', 'v3', id+1, intDiv(NULL, id)) FROM 02950_database_for_ssd_cache_dictionary.source_table; + + DROP DICTIONARY 02950_database_for_ssd_cache_dictionary.ssd_cache_dictionary; + DROP TABLE 02950_database_for_ssd_cache_dictionary.source_table; + DROP DATABASE 02950_database_for_ssd_cache_dictionary;" diff --git a/tests/queries/0_stateless/02956_clickhouse_local_system_parts.reference b/tests/queries/0_stateless/02956_clickhouse_local_system_parts.reference index 30365d83930..b33c7b90245 100644 --- a/tests/queries/0_stateless/02956_clickhouse_local_system_parts.reference +++ b/tests/queries/0_stateless/02956_clickhouse_local_system_parts.reference @@ -1 +1,2 @@ test all_1_1_0 1 +test2 all_1_1_0 1 diff --git a/tests/queries/0_stateless/02956_clickhouse_local_system_parts.sh b/tests/queries/0_stateless/02956_clickhouse_local_system_parts.sh index e9d8eb081fb..dac0cc2b865 100755 --- a/tests/queries/0_stateless/02956_clickhouse_local_system_parts.sh +++ b/tests/queries/0_stateless/02956_clickhouse_local_system_parts.sh @@ -5,4 +5,12 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -$CLICKHOUSE_LOCAL --multiquery "CREATE TABLE test (x UInt8) ENGINE = MergeTree ORDER BY (); INSERT INTO test SELECT 1; SELECT table, name, rows FROM system.parts WHERE database = currentDatabase();" +$CLICKHOUSE_LOCAL --multiquery " + CREATE TABLE test (x UInt8) ENGINE = MergeTree ORDER BY (); + INSERT INTO test SELECT 1; + + CREATE TABLE test2 (x UInt8) ENGINE = MergeTree ORDER BY (); + INSERT INTO test2 SELECT 1; + + SELECT table, name, rows FROM system.parts WHERE database = currentDatabase(); +" diff --git a/tests/queries/0_stateless/02956_fix_to_start_of_milli_microsecond.reference b/tests/queries/0_stateless/02956_fix_to_start_of_milli_microsecond.reference index d3a002c4fd4..413c79828c7 100644 --- a/tests/queries/0_stateless/02956_fix_to_start_of_milli_microsecond.reference +++ b/tests/queries/0_stateless/02956_fix_to_start_of_milli_microsecond.reference @@ -2,3 +2,6 @@ 2023-10-09 10:11:12.001 2023-10-09 10:11:12.000 2023-10-09 10:11:12.000 +2023-10-09 00:00:00.000000 +2023-10-09 00:00:00.000 +2023-10-09 00:00:00 diff --git a/tests/queries/0_stateless/02956_fix_to_start_of_milli_microsecond.sql b/tests/queries/0_stateless/02956_fix_to_start_of_milli_microsecond.sql index 178f21a9e63..15753d4532c 100644 --- a/tests/queries/0_stateless/02956_fix_to_start_of_milli_microsecond.sql +++ b/tests/queries/0_stateless/02956_fix_to_start_of_milli_microsecond.sql @@ -1,4 +1,7 @@ SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.000999', 6), toIntervalMillisecond(1)); SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.000500', 6), toIntervalMillisecond(1)); SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.000499', 6), toIntervalMillisecond(1)); -SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.000999', 6), toIntervalMillisecond(10)); \ No newline at end of file +SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.000999', 6), toIntervalMillisecond(10)); +select toStartOfInterval(toDateTime64('2023-10-09 00:01:34', 9), toIntervalMicrosecond(100000000)); +select toStartOfInterval(toDateTime64('2023-10-09 00:01:34', 9), toIntervalMillisecond(100000)); +select toStartOfInterval(toDateTime64('2023-10-09 00:01:34', 9), toIntervalSecond(100)); \ No newline at end of file diff --git a/tests/queries/0_stateless/02961_storage_config_volume_priority.reference b/tests/queries/0_stateless/02961_storage_config_volume_priority.reference new file mode 100644 index 00000000000..ba48e75ae25 --- /dev/null +++ b/tests/queries/0_stateless/02961_storage_config_volume_priority.reference @@ -0,0 +1,9 @@ +vol2_02961 1 +vol1_02961 2 +vol_untagged2_02961 3 +vol_untagged1_02961 4 +check non-unique values dont work +1 +check no gaps in range allowed +1 +restore valid config diff --git a/tests/queries/0_stateless/02961_storage_config_volume_priority.sh b/tests/queries/0_stateless/02961_storage_config_volume_priority.sh new file mode 100755 index 00000000000..4e085541a8d --- /dev/null +++ b/tests/queries/0_stateless/02961_storage_config_volume_priority.sh @@ -0,0 +1,51 @@ +#!/usr/bin/env bash +# Tags: no-fasttest, no-parallel, no-random-settings + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + + +$CLICKHOUSE_CLIENT --query " +SELECT + volume_name, + volume_priority +FROM system.storage_policies +WHERE policy_name = 'policy_02961' +ORDER BY volume_priority ASC; +" + +config_path=/etc/clickhouse-server/config.d/storage_conf_02961.xml +config_path_tmp=$config_path.tmp + +echo 'check non-unique values dont work' +cat $config_path \ +| sed "s|2<\/volume_priority>|1<\/volume_priority>|" \ +> $config_path_tmp +mv $config_path_tmp $config_path + +$CLICKHOUSE_CLIENT -nm --query " +set send_logs_level='error'; +SYSTEM RELOAD CONFIG" 2>&1 | grep -c 'volume_priority values must be unique across the policy' + +#first, restore original values +cat $config_path \ +| sed '0,/1<\/volume_priority>/s//2<\/volume_priority>/' \ +> $config_path_tmp +mv $config_path_tmp $config_path + +echo 'check no gaps in range allowed' +cat $config_path \ +| sed '0,/1<\/volume_priority>/s//3<\/volume_priority>/' \ +> $config_path_tmp +mv $config_path_tmp $config_path + +$CLICKHOUSE_CLIENT -nm --query " +set send_logs_level='error'; +SYSTEM RELOAD CONFIG" 2>&1 | grep -c 'volume_priority values must cover the range from 1 to N (lowest priority specified) without gaps' + +echo 'restore valid config' +cat $config_path \ +| sed '0,/3<\/volume_priority>/s//1<\/volume_priority>/' \ +> $config_path_tmp +mv $config_path_tmp $config_path diff --git a/tests/queries/0_stateless/02963_single_value_destructor.reference b/tests/queries/0_stateless/02963_single_value_destructor.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02963_single_value_destructor.sql b/tests/queries/0_stateless/02963_single_value_destructor.sql new file mode 100644 index 00000000000..ee8f9164aef --- /dev/null +++ b/tests/queries/0_stateless/02963_single_value_destructor.sql @@ -0,0 +1,8 @@ +-- When we use SingleValueDataBaseMemoryBlock we must ensure we call the class destructor on destroy + +Select argMax((number, number), (number, number)) FROM numbers(100000) format Null; +Select argMin((number, number), (number, number)) FROM numbers(100000) format Null; +Select anyHeavy((number, number)) FROM numbers(100000) format Null; +Select singleValueOrNull(number::Date32) FROM numbers(100000) format Null; +Select anyArgMax(number, (number, number)) FROM numbers(100000) format Null; +Select anyArgMin(number, (number, number)) FROM numbers(100000) format Null; diff --git a/tests/queries/0_stateless/02966_s3_access_key_id_restriction.sql b/tests/queries/0_stateless/02966_s3_access_key_id_restriction.sql deleted file mode 100644 index c1ca0b4bcd5..00000000000 --- a/tests/queries/0_stateless/02966_s3_access_key_id_restriction.sql +++ /dev/null @@ -1,6 +0,0 @@ --- Tags: no-fasttest - -select * from s3('http://localhost:11111/test/a.tsv', '\ninjection\n', 'admin'); -- { serverError 36 } -select * from deltaLake('http://localhost:11111/test/a.tsv', '\ninjection\n', 'admin'); -- { serverError 36 } -select * from hudi('http://localhost:11111/test/a.tsv', '\ninjection\n', 'admin'); -- { serverError 36 } -select * from iceberg('http://localhost:11111/test/a.tsv', '\ninjection\n', 'admin'); -- { serverError 36 } diff --git a/tests/queries/0_stateless/02966_topk_counts_approx_count_sum.reference b/tests/queries/0_stateless/02966_topk_counts_approx_count_sum.reference new file mode 100644 index 00000000000..0474f8e3fc9 --- /dev/null +++ b/tests/queries/0_stateless/02966_topk_counts_approx_count_sum.reference @@ -0,0 +1,2 @@ +[('6_0',476),('6_1',207),('5_0',154),('5_1',63),('4_0',47)] [('6_0',313208),('6_1',136206),('5_0',32032),('5_1',13104),('4_0',3026)] [('6_0',476,5),('6_1',207,15),('5_0',154,2)] [('6_0',313208,0),('6_1',136206,0),('5_0',32032,0)] [('6_0',476,5),('6_1',207,15),('5_0',154,2)] [('6_0',476,5),('6_1',207,15),('5_0',154,2)] [('6_0',313208,0),('6_1',136206,0),('5_0',32032,0)] +[(6,683,0),(5,217,0),(4,68,0),(3,22,0)] diff --git a/tests/queries/0_stateless/02966_topk_counts_approx_count_sum.sql b/tests/queries/0_stateless/02966_topk_counts_approx_count_sum.sql new file mode 100644 index 00000000000..9ed4baba171 --- /dev/null +++ b/tests/queries/0_stateless/02966_topk_counts_approx_count_sum.sql @@ -0,0 +1,26 @@ +WITH + arraySlice(arrayReverseSort(x -> (x.2, x.1), arrayZip(untuple(sumMap(([k], [1]))))), 1, 5) AS topKExact, + arraySlice(arrayReverseSort(x -> (x.2, x.1), arrayZip(untuple(sumMap(([k], [w]))))), 1, 5) AS topKWeightedExact +SELECT + topKExact, + topKWeightedExact, + topK(3, 2, 'counts')(k) AS topK_counts, + topKWeighted(3, 2, 'counts')(k, w) AS topKWeighted_counts, + approx_top_count(3, 6)(k) AS approx_top_count, + approx_top_k(3, 6)(k) AS approx_top_k, + approx_top_sum(3, 6)(k, w) AS approx_top_sum +FROM +( + SELECT + concat(countDigits(number * number), '_', intDiv((number % 10), 7)) AS k, + number AS w + FROM numbers(1000) +); + +SELECT topKMerge(4, 2, 'counts')(state) FROM ( SELECT topKState(4, 2, 'counts')(countDigits(number * number)) AS state FROM numbers(1000)); + +SELECT topKMerge(4, 3, 'counts')(state) FROM ( SELECT topKState(4, 2, 'counts')(countDigits(number * number)) AS state FROM numbers(1000)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } + +SELECT topKMerge(4, 2)(state) FROM ( SELECT topKState(4, 2, 'counts')(countDigits(number * number)) AS state FROM numbers(1000)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } + +SELECT topKMerge(state) FROM ( SELECT topKState(4, 2, 'counts')(countDigits(number * number)) AS state FROM numbers(1000)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } \ No newline at end of file diff --git a/tests/queries/0_stateless/02967_parallel_replicas_joins_and_analyzer.reference b/tests/queries/0_stateless/02967_parallel_replicas_joins_and_analyzer.reference index 6b1fdfd42a2..100e4e500cd 100644 --- a/tests/queries/0_stateless/02967_parallel_replicas_joins_and_analyzer.reference +++ b/tests/queries/0_stateless/02967_parallel_replicas_joins_and_analyzer.reference @@ -237,7 +237,7 @@ sub2 as (select y, z from tab2 where y != 4), sub3 as (select l.x, l.y, r.y, r.z as z from sub1 l any left join sub2 r on l.y = r.y), sub4 as (select z, a from tab3 where z != 8), sub5 as (select z, a, x, y, r.y, ll.z from sub4 rr any right join sub3 ll on ll.z = rr.z) -select * from sub5 order by x SETTINGS allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, prefer_localhost_replica = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', allow_experimental_analyzer=1;-- { echoOn } +select * from sub5 order by x SETTINGS allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, prefer_localhost_replica = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', allow_experimental_analyzer=1; Expression Sorting Expression @@ -250,6 +250,93 @@ Expression ReadFromRemoteParallelReplicas Expression ReadFromRemoteParallelReplicas +-- +-- Subqueries for IN allowed +with sub1 as (select x, y from tab1 where x in (select number from numbers(16) where number != 2)), +sub2 as (select y, z from tab2 where y != 4), +sub3 as (select l.x, l.y, r.y, r.z as z from sub1 l any left join sub2 r on l.y = r.y), +sub4 as (select z, a from tab3 where z != 8), +sub5 as (select x, y, r.y, z, rr.z, a from sub3 ll any left join sub4 rr on ll.z = rr.z) +select * from sub5 order by x +SETTINGS allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, prefer_localhost_replica = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', allow_experimental_analyzer=1; +0 0 0 0 0 0 +1 1 0 0 0 0 +3 3 0 0 0 0 +4 4 0 0 0 0 +5 5 0 0 0 0 +6 6 6 6 0 0 +7 7 0 0 0 0 +8 8 8 8 0 0 +9 9 0 0 0 0 +10 10 10 10 0 0 +11 11 0 0 0 0 +12 12 12 12 12 12 +13 13 0 0 0 0 +14 14 14 14 0 0 +15 15 0 0 0 0 +explain description=0 +with sub1 as (select x, y from tab1 where x in (select number from numbers(16) where number != 2)), +sub2 as (select y, z from tab2 where y != 4), +sub3 as (select l.x, l.y, r.y, r.z as z from sub1 l any left join sub2 r on l.y = r.y), +sub4 as (select z, a from tab3 where z != 8), +sub5 as (select x, y, r.y, z, rr.z, a from sub3 ll any left join sub4 rr on ll.z = rr.z) +select * from sub5 order by x +SETTINGS allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, prefer_localhost_replica = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', allow_experimental_analyzer=1; +Expression + Sorting + Expression + ReadFromRemoteParallelReplicas +-- +-- Subqueries for IN are not allowed +with sub1 as (select x, y from tab1 where x in (select number from numbers(16) where number != 2)), +sub2 as (select y, z from tab2 where y != 4), +sub3 as (select l.x, l.y, r.y, r.z as z from sub1 l any left join sub2 r on l.y = r.y), +sub4 as (select z, a from tab3 where z != 8), +sub5 as (select x, y, r.y, z, rr.z, a from sub3 ll any left join sub4 rr on ll.z = rr.z) +select * from sub5 order by x +SETTINGS allow_experimental_parallel_reading_from_replicas = 1, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, prefer_localhost_replica = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', allow_experimental_analyzer=1, parallel_replicas_allow_in_with_subquery=0; +0 0 0 0 0 0 +1 1 0 0 0 0 +3 3 0 0 0 0 +4 4 0 0 0 0 +5 5 0 0 0 0 +6 6 6 6 0 0 +7 7 0 0 0 0 +8 8 8 8 0 0 +9 9 0 0 0 0 +10 10 10 10 0 0 +11 11 0 0 0 0 +12 12 12 12 12 12 +13 13 0 0 0 0 +14 14 14 14 0 0 +15 15 0 0 0 0 +explain description=0 +with sub1 as (select x, y from tab1 where x in (select number from numbers(16) where number != 2)), +sub2 as (select y, z from tab2 where y != 4), +sub3 as (select l.x, l.y, r.y, r.z as z from sub1 l any left join sub2 r on l.y = r.y), +sub4 as (select z, a from tab3 where z != 8), +sub5 as (select x, y, r.y, z, rr.z, a from sub3 ll any left join sub4 rr on ll.z = rr.z) +select * from sub5 order by x +SETTINGS allow_experimental_parallel_reading_from_replicas = 1, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, prefer_localhost_replica = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', allow_experimental_analyzer=1, parallel_replicas_allow_in_with_subquery=0;-- { echoOn } +Expression + Sorting + Expression + Join + Expression + Join + Expression + CreatingSets + Expression + Expression + ReadFromMergeTree + CreatingSet + Expression + Filter + ReadFromSystemNumbers + Expression + ReadFromRemoteParallelReplicas + Expression + ReadFromRemoteParallelReplicas set parallel_replicas_prefer_local_join = 1; -- A query with only INNER/LEFT joins is fully send to replicas. JOIN is executed in GLOBAL mode. select x, y, r.y, z, rr.z, a from (select l.x, l.y, r.y, r.z as z from (select x, y from tab1 where x != 2) l any left join (select y, z from tab2 where y != 4) r on l.y = r.y) ll any left join (select z, a from tab3 where z != 8) rr on ll.z = rr.z order by x SETTINGS allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, prefer_localhost_replica = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', allow_experimental_analyzer=1; @@ -500,3 +587,90 @@ Expression ReadFromRemoteParallelReplicas Expression ReadFromRemoteParallelReplicas +-- +-- Subqueries for IN allowed +with sub1 as (select x, y from tab1 where x in (select number from numbers(16) where number != 2)), +sub2 as (select y, z from tab2 where y != 4), +sub3 as (select l.x, l.y, r.y, r.z as z from sub1 l any left join sub2 r on l.y = r.y), +sub4 as (select z, a from tab3 where z != 8), +sub5 as (select x, y, r.y, z, rr.z, a from sub3 ll any left join sub4 rr on ll.z = rr.z) +select * from sub5 order by x +SETTINGS allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, prefer_localhost_replica = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', allow_experimental_analyzer=1; +0 0 0 0 0 0 +1 1 0 0 0 0 +3 3 0 0 0 0 +4 4 0 0 0 0 +5 5 0 0 0 0 +6 6 6 6 0 0 +7 7 0 0 0 0 +8 8 8 8 0 0 +9 9 0 0 0 0 +10 10 10 10 0 0 +11 11 0 0 0 0 +12 12 12 12 12 12 +13 13 0 0 0 0 +14 14 14 14 0 0 +15 15 0 0 0 0 +explain description=0 +with sub1 as (select x, y from tab1 where x in (select number from numbers(16) where number != 2)), +sub2 as (select y, z from tab2 where y != 4), +sub3 as (select l.x, l.y, r.y, r.z as z from sub1 l any left join sub2 r on l.y = r.y), +sub4 as (select z, a from tab3 where z != 8), +sub5 as (select x, y, r.y, z, rr.z, a from sub3 ll any left join sub4 rr on ll.z = rr.z) +select * from sub5 order by x +SETTINGS allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, prefer_localhost_replica = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', allow_experimental_analyzer=1; +Expression + Sorting + Expression + ReadFromRemoteParallelReplicas +-- +-- Subqueries for IN are not allowed +with sub1 as (select x, y from tab1 where x in (select number from numbers(16) where number != 2)), +sub2 as (select y, z from tab2 where y != 4), +sub3 as (select l.x, l.y, r.y, r.z as z from sub1 l any left join sub2 r on l.y = r.y), +sub4 as (select z, a from tab3 where z != 8), +sub5 as (select x, y, r.y, z, rr.z, a from sub3 ll any left join sub4 rr on ll.z = rr.z) +select * from sub5 order by x +SETTINGS allow_experimental_parallel_reading_from_replicas = 1, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, prefer_localhost_replica = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', allow_experimental_analyzer=1, parallel_replicas_allow_in_with_subquery=0; +0 0 0 0 0 0 +1 1 0 0 0 0 +3 3 0 0 0 0 +4 4 0 0 0 0 +5 5 0 0 0 0 +6 6 6 6 0 0 +7 7 0 0 0 0 +8 8 8 8 0 0 +9 9 0 0 0 0 +10 10 10 10 0 0 +11 11 0 0 0 0 +12 12 12 12 12 12 +13 13 0 0 0 0 +14 14 14 14 0 0 +15 15 0 0 0 0 +explain description=0 +with sub1 as (select x, y from tab1 where x in (select number from numbers(16) where number != 2)), +sub2 as (select y, z from tab2 where y != 4), +sub3 as (select l.x, l.y, r.y, r.z as z from sub1 l any left join sub2 r on l.y = r.y), +sub4 as (select z, a from tab3 where z != 8), +sub5 as (select x, y, r.y, z, rr.z, a from sub3 ll any left join sub4 rr on ll.z = rr.z) +select * from sub5 order by x +SETTINGS allow_experimental_parallel_reading_from_replicas = 1, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, prefer_localhost_replica = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', allow_experimental_analyzer=1, parallel_replicas_allow_in_with_subquery=0; +Expression + Sorting + Expression + Join + Expression + Join + Expression + CreatingSets + Expression + Expression + ReadFromMergeTree + CreatingSet + Expression + Filter + ReadFromSystemNumbers + Expression + ReadFromRemoteParallelReplicas + Expression + ReadFromRemoteParallelReplicas diff --git a/tests/queries/0_stateless/02967_parallel_replicas_joins_and_analyzer.sql.j2 b/tests/queries/0_stateless/02967_parallel_replicas_joins_and_analyzer.sql.j2 index 7d2766d52f8..54505b147a3 100644 --- a/tests/queries/0_stateless/02967_parallel_replicas_joins_and_analyzer.sql.j2 +++ b/tests/queries/0_stateless/02967_parallel_replicas_joins_and_analyzer.sql.j2 @@ -126,4 +126,42 @@ sub4 as (select z, a from tab3 where z != 8), sub5 as (select z, a, x, y, r.y, ll.z from sub4 rr any right join sub3 ll on ll.z = rr.z) select * from sub5 order by x SETTINGS allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, prefer_localhost_replica = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', allow_experimental_analyzer=1; +-- +-- Subqueries for IN allowed +with sub1 as (select x, y from tab1 where x in (select number from numbers(16) where number != 2)), +sub2 as (select y, z from tab2 where y != 4), +sub3 as (select l.x, l.y, r.y, r.z as z from sub1 l any left join sub2 r on l.y = r.y), +sub4 as (select z, a from tab3 where z != 8), +sub5 as (select x, y, r.y, z, rr.z, a from sub3 ll any left join sub4 rr on ll.z = rr.z) +select * from sub5 order by x +SETTINGS allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, prefer_localhost_replica = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', allow_experimental_analyzer=1; + +explain description=0 +with sub1 as (select x, y from tab1 where x in (select number from numbers(16) where number != 2)), +sub2 as (select y, z from tab2 where y != 4), +sub3 as (select l.x, l.y, r.y, r.z as z from sub1 l any left join sub2 r on l.y = r.y), +sub4 as (select z, a from tab3 where z != 8), +sub5 as (select x, y, r.y, z, rr.z, a from sub3 ll any left join sub4 rr on ll.z = rr.z) +select * from sub5 order by x +SETTINGS allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, prefer_localhost_replica = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', allow_experimental_analyzer=1; + +-- +-- Subqueries for IN are not allowed +with sub1 as (select x, y from tab1 where x in (select number from numbers(16) where number != 2)), +sub2 as (select y, z from tab2 where y != 4), +sub3 as (select l.x, l.y, r.y, r.z as z from sub1 l any left join sub2 r on l.y = r.y), +sub4 as (select z, a from tab3 where z != 8), +sub5 as (select x, y, r.y, z, rr.z, a from sub3 ll any left join sub4 rr on ll.z = rr.z) +select * from sub5 order by x +SETTINGS allow_experimental_parallel_reading_from_replicas = 1, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, prefer_localhost_replica = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', allow_experimental_analyzer=1, parallel_replicas_allow_in_with_subquery=0; + +explain description=0 +with sub1 as (select x, y from tab1 where x in (select number from numbers(16) where number != 2)), +sub2 as (select y, z from tab2 where y != 4), +sub3 as (select l.x, l.y, r.y, r.z as z from sub1 l any left join sub2 r on l.y = r.y), +sub4 as (select z, a from tab3 where z != 8), +sub5 as (select x, y, r.y, z, rr.z, a from sub3 ll any left join sub4 rr on ll.z = rr.z) +select * from sub5 order by x +SETTINGS allow_experimental_parallel_reading_from_replicas = 1, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, prefer_localhost_replica = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', allow_experimental_analyzer=1, parallel_replicas_allow_in_with_subquery=0; + {%- endfor %} diff --git a/tests/queries/0_stateless/02968_mysql_prefer_column_name_to_alias.reference b/tests/queries/0_stateless/02968_mysql_prefer_column_name_to_alias.reference new file mode 100644 index 00000000000..2491b55b493 --- /dev/null +++ b/tests/queries/0_stateless/02968_mysql_prefer_column_name_to_alias.reference @@ -0,0 +1,2 @@ +b count() +2 1 diff --git a/tests/queries/0_stateless/02968_mysql_prefer_column_name_to_alias.sh b/tests/queries/0_stateless/02968_mysql_prefer_column_name_to_alias.sh new file mode 100755 index 00000000000..4457aafb8b2 --- /dev/null +++ b/tests/queries/0_stateless/02968_mysql_prefer_column_name_to_alias.sh @@ -0,0 +1,14 @@ +#!/usr/bin/env bash +# Tags: no-fasttest +# Tag no-fasttest: requires mysql client + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +# Some BI tools which connect to ClickHouse's MySQL port, run queries which succeed only with (the analyzer enabled) +# or (without analyzer and setting prefer_column_name_to_alias = 1). Since the setting is too impactful to enable it +# globally, it is enabled only by the MySQL handler internally as a workaround. Run a query from Bug 56173 to verify. +# +# When the analyzer is the new default, the test and the workaround can be deleted. +${MYSQL_CLIENT} --execute "select a + b as b, count() from (select 1 as a, 1 as b) group by a + b"; diff --git a/tests/queries/0_stateless/02968_url_args.reference b/tests/queries/0_stateless/02968_url_args.reference index 1c3693e4a66..e7e9e2c0d94 100644 --- a/tests/queries/0_stateless/02968_url_args.reference +++ b/tests/queries/0_stateless/02968_url_args.reference @@ -2,7 +2,7 @@ CREATE TABLE default.a\n(\n `x` Int64\n)\nENGINE = URL(\'https://example.com/ CREATE TABLE default.b\n(\n `x` Int64\n)\nENGINE = URL(\'https://example.com/\', \'CSV\', headers()) CREATE TABLE default.c\n(\n `x` Int64\n)\nENGINE = S3(\'https://example.s3.amazonaws.com/a.csv\', \'NOSIGN\', \'CSV\', headers(\'foo\' = \'[HIDDEN]\')) CREATE TABLE default.d\n(\n `x` Int64\n)\nENGINE = S3(\'https://example.s3.amazonaws.com/a.csv\', \'NOSIGN\', headers(\'foo\' = \'[HIDDEN]\')) -CREATE VIEW default.e\n(\n `x` Int64\n) AS\nSELECT count()\nFROM url(\'https://example.com/\', CSV, headers(\'foo\' = \'[HIDDEN]\', \'a\' = \'[HIDDEN]\')) -CREATE VIEW default.f\n(\n `x` Int64\n) AS\nSELECT count()\nFROM url(\'https://example.com/\', CSV, headers()) -CREATE VIEW default.g\n(\n `x` Int64\n) AS\nSELECT count()\nFROM s3(\'https://example.s3.amazonaws.com/a.csv\', CSV, headers(\'foo\' = \'[HIDDEN]\')) -CREATE VIEW default.h\n(\n `x` Int64\n) AS\nSELECT count()\nFROM s3(\'https://example.s3.amazonaws.com/a.csv\', headers(\'foo\' = \'[HIDDEN]\')) +CREATE VIEW default.e\n(\n `x` Int64\n)\nAS SELECT count()\nFROM url(\'https://example.com/\', CSV, headers(\'foo\' = \'[HIDDEN]\', \'a\' = \'[HIDDEN]\')) +CREATE VIEW default.f\n(\n `x` Int64\n)\nAS SELECT count()\nFROM url(\'https://example.com/\', CSV, headers()) +CREATE VIEW default.g\n(\n `x` Int64\n)\nAS SELECT count()\nFROM s3(\'https://example.s3.amazonaws.com/a.csv\', CSV, headers(\'foo\' = \'[HIDDEN]\')) +CREATE VIEW default.h\n(\n `x` Int64\n)\nAS SELECT count()\nFROM s3(\'https://example.s3.amazonaws.com/a.csv\', headers(\'foo\' = \'[HIDDEN]\')) diff --git a/tests/queries/0_stateless/02969_auto_format_detection.reference b/tests/queries/0_stateless/02969_auto_format_detection.reference new file mode 100644 index 00000000000..4b86be04996 --- /dev/null +++ b/tests/queries/0_stateless/02969_auto_format_detection.reference @@ -0,0 +1,123 @@ +Parquet +a Nullable(UInt64) +b Nullable(String) +c Array(Nullable(UInt64)) +d Tuple(\n a Nullable(UInt64),\n b Nullable(String)) +ORC +a Nullable(Int64) +b Nullable(String) +c Array(Nullable(Int64)) +d Tuple(\n a Nullable(Int64),\n b Nullable(String)) +Arrow +a Nullable(UInt64) +b Nullable(String) +c Array(Nullable(UInt64)) +d Tuple(\n a Nullable(UInt64),\n b Nullable(String)) +ArrowStream +a Nullable(UInt64) +b Nullable(String) +c Array(Nullable(UInt64)) +d Tuple(\n a Nullable(UInt64),\n b Nullable(String)) +Avro +a Int64 +b String +c Array(Int64) +d Tuple(\n a Int64,\n b String) +Native +a UInt64 +b String +c Array(UInt64) +d Tuple(\n a UInt64,\n b String) +BSONEachRow +a Nullable(Int64) +b Nullable(String) +c Array(Nullable(Int64)) +d Tuple(\n a Nullable(Int64),\n b Nullable(String)) +JSONCompact +a UInt64 +b String +c Array(UInt64) +d Tuple(\n a UInt64,\n b String) +Values +c1 Nullable(UInt64) +c2 Nullable(String) +c3 Array(Nullable(UInt64)) +c4 Tuple(Nullable(UInt64), Nullable(String)) +TSKV +a Nullable(String) +b Nullable(String) +c Array(Nullable(UInt64)) +d Nullable(String) +JSONObjectEachRow +a Nullable(String) +b Nullable(String) +c Array(Nullable(String)) +d Tuple(\n a Nullable(String),\n b Nullable(String)) +JSONColumns +a Nullable(String) +b Nullable(String) +c Array(Nullable(String)) +d Tuple(\n a Nullable(String),\n b Nullable(String)) +JSONCompactColumns +c1 Nullable(String) +c2 Nullable(String) +c3 Array(Nullable(String)) +c4 Tuple(\n a Nullable(String),\n b Nullable(String)) +JSONCompact +a UInt64 +b String +c Array(UInt64) +d Tuple(\n a UInt64,\n b String) +JSON +a UInt64 +b String +c Array(UInt64) +d Tuple(\n a UInt64,\n b String) +TSV +c1 Nullable(UInt64) +c2 Nullable(String) +c3 Array(Nullable(UInt64)) +c4 Tuple(Nullable(UInt64), Nullable(String)) +CSV +c1 Nullable(UInt64) +c2 Nullable(String) +c3 Array(Nullable(UInt64)) +c4 Nullable(UInt64) +c5 Nullable(String) +a Nullable(String) +b Nullable(String) +c Array(Nullable(String)) +d Tuple(\n a Nullable(String),\n b Nullable(String)) +a UInt64 +b String +c Array(UInt64) +d Tuple(\n a UInt64,\n b String) +a Nullable(String) +b Nullable(String) +c Array(Nullable(String)) +d Tuple(\n a Nullable(String),\n b Nullable(String)) +a Nullable(String) +b Nullable(String) +c Array(Nullable(String)) +d Tuple(\n a Nullable(String),\n b Nullable(String)) +a Nullable(String) +b Nullable(String) +c Array(Nullable(String)) +d Tuple(\n a Nullable(String),\n b Nullable(String)) +a Nullable(String) +b Nullable(String) +c Array(Nullable(String)) +d Tuple(\n a Nullable(String),\n b Nullable(String)) +a Nullable(String) +b Nullable(String) +c Array(Nullable(String)) +d Tuple(\n a Nullable(String),\n b Nullable(String)) +1 +a Nullable(String) +b Nullable(String) +c Array(Nullable(String)) +d Tuple(\n a Nullable(String),\n b Nullable(String)) +a Nullable(String) +b Nullable(String) +c Array(Nullable(String)) +d Tuple(\n a Nullable(String),\n b Nullable(String)) diff --git a/tests/queries/0_stateless/02969_auto_format_detection.sh b/tests/queries/0_stateless/02969_auto_format_detection.sh new file mode 100755 index 00000000000..88d6575e499 --- /dev/null +++ b/tests/queries/0_stateless/02969_auto_format_detection.sh @@ -0,0 +1,47 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +DATA_FILE=$CLICKHOUSE_TEST_UNIQUE_NAME.data + +for format in Parquet ORC Arrow ArrowStream Avro Native BSONEachRow JSONCompact Values TSKV JSONObjectEachRow JSONColumns JSONCompactColumns JSONCompact JSON TSV CSV +do + echo $format + $CLICKHOUSE_LOCAL -q "select * from generateRandom('a UInt64, b String, c Array(UInt64), d Tuple(a UInt64, b String)', 42) limit 10 format $format" > $DATA_FILE + $CLICKHOUSE_LOCAL -q "desc file('$DATA_FILE')" +done + +rm $DATA_FILE + +$CLICKHOUSE_LOCAL -q "select * from generateRandom('a UInt64, b String, c Array(UInt64), d Tuple(a UInt64, b String)', 42) limit 10 format JSONEachRow" > $DATA_FILE.jsonl +$CLICKHOUSE_LOCAL -q "desc file('$DATA_FILE*')" + + +$CLICKHOUSE_LOCAL -q "select * from generateRandom('a UInt64, b String, c Array(UInt64), d Tuple(a UInt64, b String)', 42) limit 10 format JSONEachRow" > $DATA_FILE + +$CLICKHOUSE_LOCAL -q "desc file('$DATA_FILE', auto, 'a UInt64, b String, c Array(UInt64), d Tuple(a UInt64, b String)')" + +$CLICKHOUSE_LOCAL -nmq " +desc file('$DATA_FILE'); +desc file('$DATA_FILE'); +" + +$CLICKHOUSE_LOCAL -nmq " +desc file('$DATA_FILE', JSONEachRow); +desc file('$DATA_FILE'); +" + +touch $DATA_FILE.1 +$CLICKHOUSE_LOCAL -q "select * from generateRandom('a UInt64, b String, c Array(UInt64), d Tuple(a UInt64, b String)', 42) limit 10 format JSONEachRow" > $DATA_FILE.2 +$CLICKHOUSE_LOCAL -q "desc file('$DATA_FILE.{1,2}')" +$CLICKHOUSE_LOCAL -q "desc file('$DATA_FILE.{1,2}') settings schema_inference_mode='union'" 2>&1 | grep -c "CANNOT_DETECT_FORMAT" + +$CLICKHOUSE_LOCAL -nmq " +desc file('$DATA_FILE.2'); +desc file('$DATA_FILE.{1,2}'); +" + +rm $DATA_FILE* diff --git a/tests/queries/0_stateless/02972_parallel_replicas_cte.reference b/tests/queries/0_stateless/02972_parallel_replicas_cte.reference index 449fe3d34e3..bbb5a960463 100644 --- a/tests/queries/0_stateless/02972_parallel_replicas_cte.reference +++ b/tests/queries/0_stateless/02972_parallel_replicas_cte.reference @@ -1,3 +1,6 @@ 990000 990000 10 +990000 +1 +1000000 diff --git a/tests/queries/0_stateless/02972_parallel_replicas_cte.sql b/tests/queries/0_stateless/02972_parallel_replicas_cte.sql index 3702184e336..c9ab83ff9ad 100644 --- a/tests/queries/0_stateless/02972_parallel_replicas_cte.sql +++ b/tests/queries/0_stateless/02972_parallel_replicas_cte.sql @@ -1,5 +1,6 @@ DROP TABLE IF EXISTS pr_1; DROP TABLE IF EXISTS pr_2; +DROP TABLE IF EXISTS numbers_1e6; CREATE TABLE pr_1 (`a` UInt32) ENGINE = MergeTree ORDER BY a PARTITION BY a % 10 AS SELECT 10 * intDiv(number, 10) + 1 FROM numbers(1_000_000); @@ -19,9 +20,62 @@ WITH filtered_groups AS (SELECT a FROM pr_1 WHERE a >= 10000) SELECT count() FROM pr_2 INNER JOIN filtered_groups ON pr_2.a = filtered_groups.a SETTINGS allow_experimental_analyzer = 0, allow_experimental_parallel_reading_from_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', max_parallel_replicas = 3; -- { serverError SUPPORT_IS_DISABLED } +-- Disabled for any value of allow_experimental_parallel_reading_from_replicas != 1, not just 2 +WITH filtered_groups AS (SELECT a FROM pr_1 WHERE a >= 10000) +SELECT count() FROM pr_2 INNER JOIN filtered_groups ON pr_2.a = filtered_groups.a +SETTINGS allow_experimental_analyzer = 0, allow_experimental_parallel_reading_from_replicas = 512, parallel_replicas_for_non_replicated_merge_tree = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', max_parallel_replicas = 3; -- { serverError SUPPORT_IS_DISABLED } + -- Sanitizer SELECT count() FROM pr_2 JOIN numbers(10) as pr_1 ON pr_2.a = pr_1.number SETTINGS allow_experimental_parallel_reading_from_replicas = 1, parallel_replicas_for_non_replicated_merge_tree = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', max_parallel_replicas = 3; +-- Parallel replicas detection should work inside subqueries +SELECT * +FROM +( + WITH filtered_groups AS (SELECT a FROM pr_1 WHERE a >= 10000) + SELECT count() FROM pr_2 INNER JOIN filtered_groups ON pr_2.a = filtered_groups.a +) +SETTINGS allow_experimental_parallel_reading_from_replicas = 1, parallel_replicas_for_non_replicated_merge_tree = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', max_parallel_replicas = 3; + +-- Subquery + subquery +SELECT count() +FROM +( + SELECT c + 1 + FROM + ( + WITH filtered_groups AS (SELECT a FROM pr_1 WHERE a >= 10000) + SELECT count() as c FROM pr_2 INNER JOIN filtered_groups ON pr_2.a = filtered_groups.a + ) +) +SETTINGS allow_experimental_parallel_reading_from_replicas = 1, parallel_replicas_for_non_replicated_merge_tree = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', max_parallel_replicas = 3; + +CREATE TABLE numbers_1e6 +( + `n` UInt64 +) +ENGINE = MergeTree +ORDER BY n +AS SELECT * FROM numbers(1_000_000); + +-- Same but nested CTE's +WITH + cte1 AS + ( + SELECT n + FROM numbers_1e6 + ), + cte2 AS + ( + SELECT n + FROM numbers_1e6 + WHERE n IN (cte1) + ) +SELECT count() +FROM cte2 +SETTINGS allow_experimental_parallel_reading_from_replicas = 1, parallel_replicas_for_non_replicated_merge_tree = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', max_parallel_replicas = 3; + +DROP TABLE IF EXISTS numbers_1e6; DROP TABLE IF EXISTS pr_1; DROP TABLE IF EXISTS pr_2; diff --git a/tests/queries/0_stateless/02975_intdiv_with_decimal.reference b/tests/queries/0_stateless/02975_intdiv_with_decimal.reference new file mode 100644 index 00000000000..5540734ae4c --- /dev/null +++ b/tests/queries/0_stateless/02975_intdiv_with_decimal.reference @@ -0,0 +1,68 @@ +2 +2 +1 +2 +2 +2 +2 +2 +2 +2 +2 +1 +2 +1 +2 +1 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +1 +1 +1 +1 +2 +2 +2 +2 +2 +2 +1 +2 +2 +2 +2 +2 +2 +2 +2 +1 +2 +1 +2 +1 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +1 +1 +1 +1 +2 +2 +2 +2 diff --git a/tests/queries/0_stateless/02975_intdiv_with_decimal.sql b/tests/queries/0_stateless/02975_intdiv_with_decimal.sql new file mode 100644 index 00000000000..0911a481251 --- /dev/null +++ b/tests/queries/0_stateless/02975_intdiv_with_decimal.sql @@ -0,0 +1,70 @@ +--intDiv-- +SELECT intDiv(4,2); +SELECT intDiv(toDecimal32(4.4, 2), 2); +SELECT intDiv(4, toDecimal32(2.2, 2)); +SELECT intDiv(toDecimal32(4.4, 2), 2); +SELECT intDiv(toDecimal32(4.4, 2), toDecimal32(2.2, 2)); +SELECT intDiv(toDecimal64(4.4, 3), 2); +SELECT intDiv(toDecimal64(4.4, 3), toDecimal32(2.2, 2)); +SELECT intDiv(toDecimal128(4.4, 4), 2); +SELECT intDiv(toDecimal128(4.4, 4), toDecimal32(2.2, 2)); +SELECT intDiv(toDecimal256(4.4, 5), 2); +SELECT intDiv(toDecimal256(4.4, 5), toDecimal32(2.2, 2)); +SELECT intDiv(4, toDecimal64(2.2, 2)); +SELECT intDiv(toDecimal32(4.4, 2), toDecimal64(2.2, 2)); +SELECT intDiv(4, toDecimal128(2.2, 3)); +SELECT intDiv(toDecimal32(4.4, 2), toDecimal128(2.2, 2)); +SELECT intDiv(4, toDecimal256(2.2, 4)); +SELECT intDiv(toDecimal32(4.4, 2), toDecimal256(2.2, 2)); +SELECT intDiv(toDecimal64(4.4, 2), toDecimal64(2.2, 2)); +SELECT intDiv(toDecimal128(4.4, 2), toDecimal64(2.2, 2)); +SELECT intDiv(toDecimal256(4.4, 2), toDecimal64(2.2, 2)); +SELECT intDiv(toDecimal64(4.4, 2), toDecimal128(2.2, 2)); +SELECT intDiv(toDecimal128(4.4, 2), toDecimal128(2.2, 2)); +SELECT intDiv(toDecimal256(4.4, 2), toDecimal128(2.2, 2)); +SELECT intDiv(toDecimal64(4.4, 2), toDecimal256(2.2, 2)); +SELECT intDiv(toDecimal128(4.4, 2), toDecimal256(2.2, 2)); +SELECT intDiv(toDecimal256(4.4, 2), toDecimal256(2.2, 2)); +SELECT intDiv(4.2, toDecimal32(2.2, 2)); +SELECT intDiv(4.2, toDecimal64(2.2, 2)); +SELECT intDiv(4.2, toDecimal128(2.2, 2)); +SELECT intDiv(4.2, toDecimal256(2.2, 2)); +SELECT intDiv(toDecimal32(4.4, 2), 2.2); +SELECT intDiv(toDecimal64(4.4, 2), 2.2); +SELECT intDiv(toDecimal128(4.4, 2), 2.2); +SELECT intDiv(toDecimal256(4.4, 2), 2.2); +--intDivOrZero-- +SELECT intDivOrZero(4,2); +SELECT intDivOrZero(toDecimal32(4.4, 2), 2); +SELECT intDivOrZero(4, toDecimal32(2.2, 2)); +SELECT intDivOrZero(toDecimal32(4.4, 2), 2); +SELECT intDivOrZero(toDecimal32(4.4, 2), toDecimal32(2.2, 2)); +SELECT intDivOrZero(toDecimal64(4.4, 3), 2); +SELECT intDivOrZero(toDecimal64(4.4, 3), toDecimal32(2.2, 2)); +SELECT intDivOrZero(toDecimal128(4.4, 4), 2); +SELECT intDivOrZero(toDecimal128(4.4, 4), toDecimal32(2.2, 2)); +SELECT intDivOrZero(toDecimal256(4.4, 5), 2); +SELECT intDivOrZero(toDecimal256(4.4, 5), toDecimal32(2.2, 2)); +SELECT intDivOrZero(4, toDecimal64(2.2, 2)); +SELECT intDivOrZero(toDecimal32(4.4, 2), toDecimal64(2.2, 2)); +SELECT intDivOrZero(4, toDecimal128(2.2, 3)); +SELECT intDivOrZero(toDecimal32(4.4, 2), toDecimal128(2.2, 2)); +SELECT intDivOrZero(4, toDecimal256(2.2, 4)); +SELECT intDivOrZero(toDecimal32(4.4, 2), toDecimal256(2.2, 2)); +SELECT intDivOrZero(toDecimal64(4.4, 2), toDecimal64(2.2, 2)); +SELECT intDivOrZero(toDecimal128(4.4, 2), toDecimal64(2.2, 2)); +SELECT intDivOrZero(toDecimal256(4.4, 2), toDecimal64(2.2, 2)); +SELECT intDivOrZero(toDecimal64(4.4, 2), toDecimal128(2.2, 2)); +SELECT intDivOrZero(toDecimal128(4.4, 2), toDecimal128(2.2, 2)); +SELECT intDivOrZero(toDecimal256(4.4, 2), toDecimal128(2.2, 2)); +SELECT intDivOrZero(toDecimal64(4.4, 2), toDecimal256(2.2, 2)); +SELECT intDivOrZero(toDecimal128(4.4, 2), toDecimal256(2.2, 2)); +SELECT intDivOrZero(toDecimal256(4.4, 2), toDecimal256(2.2, 2)); +SELECT intDivOrZero(4.2, toDecimal32(2.2, 2)); +SELECT intDivOrZero(4.2, toDecimal64(2.2, 2)); +SELECT intDivOrZero(4.2, toDecimal128(2.2, 2)); +SELECT intDivOrZero(4.2, toDecimal256(2.2, 2)); +SELECT intDivOrZero(toDecimal32(4.4, 2), 2.2); +SELECT intDivOrZero(toDecimal64(4.4, 2), 2.2); +SELECT intDivOrZero(toDecimal128(4.4, 2), 2.2); +SELECT intDivOrZero(toDecimal256(4.4, 2), 2.2); diff --git a/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_MergeTree.sh b/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_MergeTree.sh index 386c29704b6..12d08159012 100755 --- a/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_MergeTree.sh +++ b/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_MergeTree.sh @@ -3,6 +3,9 @@ # Tag no-fasttest: requires S3 # Tag no-random-settings, no-random-merge-tree-settings: to avoid creating extra files like serialization.json, this test too exocit anyway +# Creation of a database with Ordinary engine emits a warning. +CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL=fatal + CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh diff --git a/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_ReplicatedMergeTree.sh b/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_ReplicatedMergeTree.sh index bf20247c7aa..b079e67a000 100755 --- a/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_ReplicatedMergeTree.sh +++ b/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_ReplicatedMergeTree.sh @@ -3,6 +3,9 @@ # Tag no-fasttest: requires S3 # Tag no-random-settings, no-random-merge-tree-settings: to avoid creating extra files like serialization.json, this test too exocit anyway +# Creation of a database with Ordinary engine emits a warning. +CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL=fatal + CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh diff --git a/tests/queries/0_stateless/02981_insert_select_resize_to_max_insert_threads.reference b/tests/queries/0_stateless/02981_insert_select_resize_to_max_insert_threads.reference new file mode 100644 index 00000000000..1c6235170d5 --- /dev/null +++ b/tests/queries/0_stateless/02981_insert_select_resize_to_max_insert_threads.reference @@ -0,0 +1,6 @@ +inserting into a remote table from local with concurrency equal to max_insert_threads +9 +inserting into a remote table from remote with concurrency max_insert_threads +9 +inserting into a remote table from remote (reading with parallel replicas) with concurrency max_insert_threads +9 diff --git a/tests/queries/0_stateless/02981_insert_select_resize_to_max_insert_threads.sh b/tests/queries/0_stateless/02981_insert_select_resize_to_max_insert_threads.sh new file mode 100755 index 00000000000..e65c9654c9c --- /dev/null +++ b/tests/queries/0_stateless/02981_insert_select_resize_to_max_insert_threads.sh @@ -0,0 +1,56 @@ +#!/usr/bin/env bash +# Tags: no-random-settings + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + + +${CLICKHOUSE_CLIENT} -nq """ + CREATE TABLE t1_local + ( + n UInt64, + ) + ENGINE = MergeTree + ORDER BY n; + + CREATE TABLE t3_dist + ( + n UInt64, + ) + ENGINE = Distributed('test_cluster_two_shards', currentDatabase(), 't1_local', rand()); + + CREATE TABLE t4_pr + ( + n UInt64, + ) + ENGINE = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/02981_insert_select', '1') + ORDER BY n; + + SYSTEM STOP MERGES t1_local; + + INSERT INTO t1_local SELECT * FROM numbers_mt(1e6); +""" + +max_insert_threads=9 + +echo "inserting into a remote table from local with concurrency equal to max_insert_threads" +${CLICKHOUSE_CLIENT} --max_insert_threads "$max_insert_threads" -q """ + EXPLAIN PIPELINE + INSERT INTO t3_dist + SELECT * FROM t1_local; +""" | grep -v EmptySink | grep -c Sink + +echo "inserting into a remote table from remote with concurrency max_insert_threads" +${CLICKHOUSE_CLIENT} --max_insert_threads "$max_insert_threads" --parallel_distributed_insert_select 0 -q """ + EXPLAIN PIPELINE + INSERT INTO t3_dist + SELECT * FROM t3_dist; +""" | grep -v EmptySink | grep -c Sink + +echo "inserting into a remote table from remote (reading with parallel replicas) with concurrency max_insert_threads" +${CLICKHOUSE_CLIENT} --max_insert_threads "$max_insert_threads" --allow_experimental_parallel_reading_from_replicas 2 --cluster_for_parallel_replicas 'parallel_replicas' --max_parallel_replicas 3 -q """ + EXPLAIN PIPELINE + INSERT INTO t3_dist + SELECT * FROM t4_pr; +""" | grep -v EmptySink | grep -c Sink diff --git a/tests/queries/0_stateless/02981_nested_bad_types.reference b/tests/queries/0_stateless/02981_nested_bad_types.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02981_nested_bad_types.sql b/tests/queries/0_stateless/02981_nested_bad_types.sql new file mode 100644 index 00000000000..1620eca590d --- /dev/null +++ b/tests/queries/0_stateless/02981_nested_bad_types.sql @@ -0,0 +1,59 @@ +set allow_suspicious_low_cardinality_types=0; +set allow_suspicious_fixed_string_types=0; +set allow_experimental_variant_type=0; + + +select [42]::Array(LowCardinality(UInt64)); -- {serverError SUSPICIOUS_TYPE_FOR_LOW_CARDINALITY} +select [[[42]]]::Array(Array(Array(LowCardinality(UInt64)))); -- {serverError SUSPICIOUS_TYPE_FOR_LOW_CARDINALITY} +select map('a', 42)::Map(String, LowCardinality(UInt64)); -- {serverError SUSPICIOUS_TYPE_FOR_LOW_CARDINALITY} +select map('a', map('b', [42]))::Map(String, Map(String, Array(LowCardinality(UInt64)))); -- {serverError SUSPICIOUS_TYPE_FOR_LOW_CARDINALITY} +select tuple('a', 42)::Tuple(String, LowCardinality(UInt64)); -- {serverError SUSPICIOUS_TYPE_FOR_LOW_CARDINALITY} +select tuple('a', [map('b', 42)])::Tuple(String, Array(Map(String, LowCardinality(UInt64)))); -- {serverError SUSPICIOUS_TYPE_FOR_LOW_CARDINALITY} + +create table test (x Array(LowCardinality(UInt64))) engine=Memory; -- {serverError SUSPICIOUS_TYPE_FOR_LOW_CARDINALITY} +create table test (x Array(Array(LowCardinality(UInt64)))) engine=Memory; -- {serverError SUSPICIOUS_TYPE_FOR_LOW_CARDINALITY} +create table test (x Map(String, LowCardinality(UInt64))) engine=Memory; -- {serverError SUSPICIOUS_TYPE_FOR_LOW_CARDINALITY} +create table test (x Map(String, Map(String, LowCardinality(UInt64)))) engine=Memory; -- {serverError SUSPICIOUS_TYPE_FOR_LOW_CARDINALITY} +create table test (x Tuple(String, LowCardinality(UInt64))) engine=Memory; -- {serverError SUSPICIOUS_TYPE_FOR_LOW_CARDINALITY} +create table test (x Tuple(String, Array(Map(String, LowCardinality(UInt64))))) engine=Memory; -- {serverError SUSPICIOUS_TYPE_FOR_LOW_CARDINALITY} + + +select ['42']::Array(FixedString(1000000)); -- {serverError ILLEGAL_COLUMN} +select ['42']::Array(FixedString(1000000)); -- {serverError ILLEGAL_COLUMN} +select [[['42']]]::Array(Array(Array(FixedString(1000000)))); -- {serverError ILLEGAL_COLUMN} +select map('a', '42')::Map(String, FixedString(1000000)); -- {serverError ILLEGAL_COLUMN} +select map('a', map('b', ['42']))::Map(String, Map(String, Array(FixedString(1000000)))); -- {serverError ILLEGAL_COLUMN} +select tuple('a', '42')::Tuple(String, FixedString(1000000)); -- {serverError ILLEGAL_COLUMN} +select tuple('a', [map('b', '42')])::Tuple(String, Array(Map(String, FixedString(1000000)))); -- {serverError ILLEGAL_COLUMN} + +create table test (x Array(FixedString(1000000))) engine=Memory; -- {serverError ILLEGAL_COLUMN} +create table test (x Array(Array(FixedString(1000000)))) engine=Memory; -- {serverError ILLEGAL_COLUMN} +create table test (x Map(String, FixedString(1000000))) engine=Memory; -- {serverError ILLEGAL_COLUMN} +create table test (x Map(String, Map(String, FixedString(1000000)))) engine=Memory; -- {serverError ILLEGAL_COLUMN} +create table test (x Tuple(String, FixedString(1000000))) engine=Memory; -- {serverError ILLEGAL_COLUMN} +create table test (x Tuple(String, Array(Map(String, FixedString(1000000))))) engine=Memory; -- {serverError ILLEGAL_COLUMN} + +select [42]::Array(Variant(String, UInt64)); -- {serverError ILLEGAL_COLUMN} +select [[[42]]]::Array(Array(Array(Variant(String, UInt64)))); -- {serverError ILLEGAL_COLUMN} +select map('a', 42)::Map(String, Variant(String, UInt64)); -- {serverError ILLEGAL_COLUMN} +select map('a', map('b', [42]))::Map(String, Map(String, Array(Variant(String, UInt64)))); -- {serverError ILLEGAL_COLUMN} +select tuple('a', 42)::Tuple(String, Variant(String, UInt64)); -- {serverError ILLEGAL_COLUMN} +select tuple('a', [map('b', 42)])::Tuple(String, Array(Map(String, Variant(String, UInt64)))); -- {serverError ILLEGAL_COLUMN} + +create table test (x Array(Variant(String, UInt64))) engine=Memory; -- {serverError ILLEGAL_COLUMN} +create table test (x Array(Array(Variant(String, UInt64)))) engine=Memory; -- {serverError ILLEGAL_COLUMN} +create table test (x Map(String, Variant(String, UInt64))) engine=Memory; -- {serverError ILLEGAL_COLUMN} +create table test (x Map(String, Map(String, Variant(String, UInt64)))) engine=Memory; -- {serverError ILLEGAL_COLUMN} +create table test (x Tuple(String, Variant(String, UInt64))) engine=Memory; -- {serverError ILLEGAL_COLUMN} +create table test (x Tuple(String, Array(Map(String, Variant(String, UInt64))))) engine=Memory; -- {serverError ILLEGAL_COLUMN} + +set allow_experimental_variant_type=1; +select 42::Variant(String, LowCardinality(UInt64)) settings allow_experimental_variant_type=1; -- {serverError SUSPICIOUS_TYPE_FOR_LOW_CARDINALITY} +select tuple('a', [map('b', 42)])::Tuple(String, Array(Map(String, Variant(LowCardinality(UInt64), String)))); -- {serverError SUSPICIOUS_TYPE_FOR_LOW_CARDINALITY} +create table test (x Variant(LowCardinality(UInt64), String)) engine=Memory; -- {serverError SUSPICIOUS_TYPE_FOR_LOW_CARDINALITY} +create table test (x Tuple(String, Array(Map(String, Variant(LowCardinality(UInt64), String))))) engine=Memory; -- {serverError SUSPICIOUS_TYPE_FOR_LOW_CARDINALITY} + +select '42'::Variant(UInt64, FixedString(1000000)); -- {serverError ILLEGAL_COLUMN} +select tuple('a', [map('b', '42')])::Tuple(String, Array(Map(String, Variant(UInt32, FixedString(1000000))))); -- {serverError ILLEGAL_COLUMN} +create table test (x Variant(UInt64, FixedString(1000000))) engine=Memory; -- {serverError ILLEGAL_COLUMN} +create table test (x Tuple(String, Array(Map(String, FixedString(1000000))))) engine=Memory; -- {serverError ILLEGAL_COLUMN} diff --git a/tests/queries/0_stateless/02981_vertical_merges_memory_usage.sql b/tests/queries/0_stateless/02981_vertical_merges_memory_usage.sql index 6ca594ebc7d..b784e734457 100644 --- a/tests/queries/0_stateless/02981_vertical_merges_memory_usage.sql +++ b/tests/queries/0_stateless/02981_vertical_merges_memory_usage.sql @@ -1,4 +1,4 @@ --- Tags: long +-- Tags: long, no-random-merge-tree-settings DROP TABLE IF EXISTS t_vertical_merge_memory; @@ -14,7 +14,9 @@ SETTINGS merge_max_block_size_bytes = '10M'; INSERT INTO t_vertical_merge_memory SELECT number, arrayMap(x -> repeat('a', 50), range(1000)) FROM numbers(3000); -INSERT INTO t_vertical_merge_memory SELECT number, arrayMap(x -> repeat('a', 50), range(1000)) FROM numbers(3000); +-- Why 3001? - Deduplication, which is off with normal MergeTree by default but on for ReplicatedMergeTree and SharedMergeTree. +-- We automatically replace MergeTree with SharedMergeTree in ClickHouse Cloud. +INSERT INTO t_vertical_merge_memory SELECT number, arrayMap(x -> repeat('a', 50), range(1000)) FROM numbers(3001); OPTIMIZE TABLE t_vertical_merge_memory FINAL; diff --git a/tests/queries/0_stateless/02982_dont_infer_exponent_floats.reference b/tests/queries/0_stateless/02982_dont_infer_exponent_floats.reference new file mode 100644 index 00000000000..47e9b86237a --- /dev/null +++ b/tests/queries/0_stateless/02982_dont_infer_exponent_floats.reference @@ -0,0 +1,3 @@ +c1 Nullable(String) +c1 Nullable(Float64) +x Nullable(Float64) diff --git a/tests/queries/0_stateless/02982_dont_infer_exponent_floats.sql b/tests/queries/0_stateless/02982_dont_infer_exponent_floats.sql new file mode 100644 index 00000000000..4f78855f5ce --- /dev/null +++ b/tests/queries/0_stateless/02982_dont_infer_exponent_floats.sql @@ -0,0 +1,5 @@ +DESC format(CSV, '1E20\n1.1E20') settings input_format_try_infer_exponent_floats = 0; +DESC format(CSV, '1E20\n1.1E20') settings input_format_try_infer_exponent_floats = 1; +-- This setting should not take affect on JSON formats +DESC format(JSONEachRow, '{"x" : 1.1e20}') settings input_format_try_infer_exponent_floats = 0; + diff --git a/tests/queries/0_stateless/02982_unambiguous_alter_commands.reference b/tests/queries/0_stateless/02982_unambiguous_alter_commands.reference new file mode 100644 index 00000000000..74d4b631074 --- /dev/null +++ b/tests/queries/0_stateless/02982_unambiguous_alter_commands.reference @@ -0,0 +1,6 @@ +--- Alter commands in parens +ALTER TABLE a\n MODIFY TTL expr GROUP BY some_key,\n ADD COLUMN `a` Int64 +ALTER TABLE a\n MODIFY TTL expr TO VOLUME \'vol1\', expr2 + toIntervalYear(2) TO VOLUME \'vol2\',\n DROP COLUMN c +--- Check only consistent parens around alter commands are accepted +ALTER TABLE a\n DROP COLUMN b,\n DROP COLUMN c +ALTER TABLE a\n DROP COLUMN b,\n DROP COLUMN c diff --git a/tests/queries/0_stateless/02982_unambiguous_alter_commands.sql b/tests/queries/0_stateless/02982_unambiguous_alter_commands.sql new file mode 100644 index 00000000000..d25bccb65c3 --- /dev/null +++ b/tests/queries/0_stateless/02982_unambiguous_alter_commands.sql @@ -0,0 +1,9 @@ +SELECT '--- Alter commands in parens'; +SELECT formatQuery('ALTER TABLE a (MODIFY TTL expr GROUP BY some_key), (ADD COLUMN a Int64)'); +SELECT formatQuery('ALTER TABLE a (MODIFY TTL expr TO VOLUME \'vol1\', expr2 + INTERVAL 2 YEAR TO VOLUME \'vol2\'), (DROP COLUMN c)'); + +SELECT '--- Check only consistent parens around alter commands are accepted'; +SELECT formatQuery('ALTER TABLE a (DROP COLUMN b), DROP COLUMN c'); -- { serverError SYNTAX_ERROR } +SELECT formatQuery('ALTER TABLE a DROP COLUMN b, (DROP COLUMN c)'); -- { serverError SYNTAX_ERROR } +SELECT formatQuery('ALTER TABLE a (DROP COLUMN b), (DROP COLUMN c)'); +SELECT formatQuery('ALTER TABLE a DROP COLUMN b, DROP COLUMN c'); -- Make sure it is backward compatible diff --git a/tests/queries/0_stateless/02983_empty_map_hasToken.reference b/tests/queries/0_stateless/02983_empty_map_hasToken.reference new file mode 100644 index 00000000000..75378377541 --- /dev/null +++ b/tests/queries/0_stateless/02983_empty_map_hasToken.reference @@ -0,0 +1,10 @@ +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 diff --git a/tests/queries/0_stateless/02983_empty_map_hasToken.sql b/tests/queries/0_stateless/02983_empty_map_hasToken.sql new file mode 100644 index 00000000000..6d146150ac8 --- /dev/null +++ b/tests/queries/0_stateless/02983_empty_map_hasToken.sql @@ -0,0 +1,27 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/60223 + +CREATE TABLE test +( + t String, + id String, + h Map(String, String) +) +ENGINE = MergeTree +ORDER BY (t, id) SETTINGS index_granularity = 4096 ; + +insert into test values ('xxx', 'x', {'content-type':'text/plain','user-agent':'bulk-tests'}); +insert into test values ('xxx', 'y', {'content-type':'application/json','user-agent':'bulk-tests'}); +insert into test select 'xxx', number, map('content-type', 'x' ) FROM numbers(1e2); + +optimize table test final; + +SELECT count() FROM test PREWHERE hasToken(h['user-agent'], 'bulk') WHERE hasToken(h['user-agent'], 'tests') and t = 'xxx'; +SELECT count() FROM test PREWHERE hasToken(h['user-agent'], 'tests') WHERE hasToken(h['user-agent'], 'bulk') and t = 'xxx'; +SELECT count() FROM test WHERE hasToken(h['user-agent'], 'bulk') and hasToken(h['user-agent'], 'tests') and t = 'xxx'; +SELECT count() FROM test PREWHERE hasToken(h['user-agent'], 'bulk') and hasToken(h['user-agent'], 'tests') and t = 'xxx'; +SELECT count() FROM test PREWHERE hasToken(h['user-agent'], 'bulk') and hasToken(h['user-agent'], 'tests') WHERE t = 'xxx'; +SELECT count() FROM test PREWHERE hasToken(h['user-agent'], 'tests') and hasToken(h['user-agent'], 'bulk') WHERE t = 'xxx'; +SELECT count() FROM test PREWHERE hasToken(h['user-agent'], 'tests') and hasToken(h['user-agent'], 'bulk'); +SELECT count() FROM test PREWHERE hasToken(h['user-agent'], 'bulk') and hasToken(h['user-agent'], 'tests'); +SELECT count() FROM test WHERE hasToken(h['user-agent'], 'tests') and hasToken(h['user-agent'], 'bulk'); +SELECT count() FROM test WHERE hasToken(h['user-agent'], 'bulk') and hasToken(h['user-agent'], 'tests'); diff --git a/tests/queries/0_stateless/02985_dialects_with_distributed_tables.reference b/tests/queries/0_stateless/02985_dialects_with_distributed_tables.reference new file mode 100644 index 00000000000..f22e294ce86 --- /dev/null +++ b/tests/queries/0_stateless/02985_dialects_with_distributed_tables.reference @@ -0,0 +1,9 @@ +123 +234 +315 +123 +234 +315 +123 +234 +315 diff --git a/tests/queries/0_stateless/02985_dialects_with_distributed_tables.sql b/tests/queries/0_stateless/02985_dialects_with_distributed_tables.sql new file mode 100644 index 00000000000..6ac36cf5835 --- /dev/null +++ b/tests/queries/0_stateless/02985_dialects_with_distributed_tables.sql @@ -0,0 +1,30 @@ +-- Tags: no-fasttest, distributed + +DROP TABLE IF EXISTS shared_test_table; +DROP TABLE IF EXISTS distributed_test_table; + +CREATE TABLE shared_test_table (id UInt64) +ENGINE = MergeTree +ORDER BY (id); + +CREATE TABLE distributed_test_table +ENGINE = Distributed(test_cluster_two_shard_three_replicas_localhost, currentDatabase(), shared_test_table); + +INSERT INTO shared_test_table VALUES (123), (651), (446), (315), (234), (764); + +SELECT id FROM distributed_test_table LIMIT 3; + +SET dialect = 'kusto'; + +distributed_test_table | take 3; + +SET dialect = 'prql'; + +from distributed_test_table +select {id} +take 1..3; + +SET dialect = 'clickhouse'; + +DROP TABLE distributed_test_table; +DROP TABLE shared_test_table; diff --git a/tests/queries/0_stateless/02985_if_over_big_int_decimal.reference b/tests/queries/0_stateless/02985_if_over_big_int_decimal.reference new file mode 100644 index 00000000000..1dfad945ee2 --- /dev/null +++ b/tests/queries/0_stateless/02985_if_over_big_int_decimal.reference @@ -0,0 +1,12 @@ +49500 +49500 +49500 +49500 +49500 +49500 +450000 +450000 +450000 +450000 +450000 +450000 diff --git a/tests/queries/0_stateless/02985_if_over_big_int_decimal.sql b/tests/queries/0_stateless/02985_if_over_big_int_decimal.sql new file mode 100644 index 00000000000..0295a64a092 --- /dev/null +++ b/tests/queries/0_stateless/02985_if_over_big_int_decimal.sql @@ -0,0 +1,14 @@ +select sumIf(number::Int128, number % 10 == 0) from numbers(1000); +select sumIf(number::UInt128, number % 10 == 0) from numbers(1000); +select sumIf(number::Int256, number % 10 == 0) from numbers(1000); +select sumIf(number::UInt256, number % 10 == 0) from numbers(1000); +select sumIf(number::Decimal128(3), number % 10 == 0) from numbers(1000); +select sumIf(number::Decimal256(3), number % 10 == 0) from numbers(1000); + +-- Test when the condition is neither 0 nor 1 +select sumIf(number::Int128, number % 10) from numbers(1000); +select sumIf(number::UInt128, number % 10) from numbers(1000); +select sumIf(number::Int256, number % 10) from numbers(1000); +select sumIf(number::UInt256, number % 10) from numbers(1000); +select sumIf(number::Decimal128(3), number % 10) from numbers(1000); +select sumIf(number::Decimal256(3), number % 10) from numbers(1000); diff --git a/tests/queries/0_stateless/02985_parser_check_stack_size.reference b/tests/queries/0_stateless/02985_parser_check_stack_size.reference new file mode 100644 index 00000000000..f83e0818db2 --- /dev/null +++ b/tests/queries/0_stateless/02985_parser_check_stack_size.reference @@ -0,0 +1 @@ +TOO_DEEP diff --git a/tests/queries/0_stateless/02985_parser_check_stack_size.sh b/tests/queries/0_stateless/02985_parser_check_stack_size.sh new file mode 100755 index 00000000000..c91a0a3eacc --- /dev/null +++ b/tests/queries/0_stateless/02985_parser_check_stack_size.sh @@ -0,0 +1,7 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT --query "select 'create table test (x ' || repeat('Array(', 10000) || 'UInt64' || repeat(')', 10000) || ') engine=Memory' format TSVRaw" | $CLICKHOUSE_CURL "${CLICKHOUSE_URL}&max_parser_depth=100000" --data-binary @- | grep -o -F 'TOO_DEEP' diff --git a/tests/queries/0_stateless/02987_group_array_intersect.reference b/tests/queries/0_stateless/02987_group_array_intersect.reference new file mode 100644 index 00000000000..7ec64a889f5 --- /dev/null +++ b/tests/queries/0_stateless/02987_group_array_intersect.reference @@ -0,0 +1,21 @@ +[] +[] +[NULL] +[NULL] +[] +[[1,2,4,5]] +[] +[1,4,5] +[] +[] +1000000 +999999 +[9] +['a','c'] +1000000 +999999 +['1'] +[] +['2023-01-01 00:00:00'] +['2023-01-01'] +['2023-01-01'] diff --git a/tests/queries/0_stateless/02987_group_array_intersect.sql b/tests/queries/0_stateless/02987_group_array_intersect.sql new file mode 100644 index 00000000000..703914e464d --- /dev/null +++ b/tests/queries/0_stateless/02987_group_array_intersect.sql @@ -0,0 +1,91 @@ +DROP TABLE IF EXISTS test_empty; +CREATE TABLE test_empty (a Array(Int64)) engine=MergeTree ORDER BY a; +INSERT INTO test_empty VALUES ([]); +SELECT groupArrayIntersect(*) FROM test_empty; +INSERT INTO test_empty VALUES ([1]); +SELECT groupArrayIntersect(*) FROM test_empty; +DROP TABLE test_empty; + +DROP TABLE IF EXISTS test_null; +CREATE TABLE test_null (a Array(Nullable(Int64))) engine=MergeTree ORDER BY a SETTINGS allow_nullable_key=1; +INSERT INTO test_null VALUES ([NULL, NULL]); +SELECT groupArrayIntersect(*) FROM test_null; +INSERT INTO test_null VALUES ([NULL]); +SELECT groupArrayIntersect(*) FROM test_null; +INSERT INTO test_null VALUES ([1,2]); +SELECT groupArrayIntersect(*) FROM test_null; +DROP TABLE test_null; + +DROP TABLE IF EXISTS test_nested_arrays; +CREATE TABLE test_nested_arrays (a Array(Array(Int64))) engine=MergeTree ORDER BY a; +INSERT INTO test_nested_arrays VALUES ([[1,2,3,4,5,6], [1,2,4,5]]); +INSERT INTO test_nested_arrays VALUES ([[1,2,4,5]]); +SELECT groupArrayIntersect(*) FROM test_nested_arrays; +INSERT INTO test_nested_arrays VALUES ([[1,4,3,0,5,5,5]]); +SELECT groupArrayIntersect(*) FROM test_nested_arrays; +DROP TABLE test_nested_arrays; + +DROP TABLE IF EXISTS test_numbers; +CREATE TABLE test_numbers (a Array(Int64)) engine=MergeTree ORDER BY a; +INSERT INTO test_numbers VALUES ([1,2,3,4,5,6]); +INSERT INTO test_numbers VALUES ([1,2,4,5]); +INSERT INTO test_numbers VALUES ([1,4,3,0,5,5,5]); +SELECT groupArrayIntersect(*) FROM test_numbers; +INSERT INTO test_numbers VALUES ([9]); +SELECT groupArrayIntersect(*) FROM test_numbers; +DROP TABLE test_numbers; + +DROP TABLE IF EXISTS test_big_numbers_sep; +CREATE TABLE test_big_numbers_sep (a Array(Int64)) engine=MergeTree ORDER BY a; +INSERT INTO test_big_numbers_sep SELECT array(number) FROM numbers_mt(1000000); +SELECT groupArrayIntersect(*) FROM test_big_numbers_sep; +DROP TABLE test_big_numbers_sep; + +DROP TABLE IF EXISTS test_big_numbers; +CREATE TABLE test_big_numbers (a Array(Int64)) engine=MergeTree ORDER BY a; +INSERT INTO test_big_numbers SELECT range(1000000); +SELECT length(groupArrayIntersect(*)) FROM test_big_numbers; +INSERT INTO test_big_numbers SELECT range(999999); +SELECT length(groupArrayIntersect(*)) FROM test_big_numbers; +INSERT INTO test_big_numbers VALUES ([9]); +SELECT groupArrayIntersect(*) FROM test_big_numbers; +DROP TABLE test_big_numbers; + +DROP TABLE IF EXISTS test_string; +CREATE TABLE test_string (a Array(String)) engine=MergeTree ORDER BY a; +INSERT INTO test_string VALUES (['a', 'b', 'c', 'd', 'e', 'f']); +INSERT INTO test_string VALUES (['a', 'aa', 'b', 'bb', 'c', 'cc', 'd', 'dd', 'f', 'ff']); +INSERT INTO test_string VALUES (['ae', 'ab', 'a', 'bb', 'c']); +SELECT groupArrayIntersect(*) FROM test_string; +DROP TABLE test_string; + +DROP TABLE IF EXISTS test_big_string; +CREATE TABLE test_big_string (a Array(String)) engine=MergeTree ORDER BY a; +INSERT INTO test_big_string SELECT groupArray(toString(number)) FROM numbers_mt(1000000); +SELECT length(groupArrayIntersect(*)) FROM test_big_string; +INSERT INTO test_big_string SELECT groupArray(toString(number)) FROM numbers_mt(999999); +SELECT length(groupArrayIntersect(*)) FROM test_big_string; +INSERT INTO test_big_string VALUES (['1']); +SELECT groupArrayIntersect(*) FROM test_big_string; +INSERT INTO test_big_string VALUES (['a']); +SELECT groupArrayIntersect(*) FROM test_big_string; +DROP TABLE test_big_string; + +DROP TABLE IF EXISTS test_datetime; +CREATE TABLE test_datetime (a Array(DateTime)) engine=MergeTree ORDER BY a; +INSERT INTO test_datetime VALUES ([toDateTime('2023-01-01 00:00:00'), toDateTime('2023-01-01 01:02:03'), toDateTime('2023-01-01 02:03:04')]); +INSERT INTO test_datetime VALUES ([toDateTime('2023-01-01 00:00:00'), toDateTime('2023-01-01 01:02:04'), toDateTime('2023-01-01 02:03:05')]); +SELECT groupArrayIntersect(*) from test_datetime; +DROP TABLE test_datetime; + +DROP TABLE IF EXISTS test_date32; +CREATE TABLE test_date32 (a Array(Date32)) engine=MergeTree ORDER BY a; +INSERT INTO test_date32 VALUES ([toDate32('2023-01-01 00:00:00'), toDate32('2023-01-01 00:00:01')]); +SELECT groupArrayIntersect(*) from test_date32; +DROP TABLE test_date32; + +DROP TABLE IF EXISTS test_date; +CREATE TABLE test_date (a Array(Date)) engine=MergeTree ORDER BY a; +INSERT INTO test_date VALUES ([toDate('2023-01-01 00:00:00'), toDate('2023-01-01 00:00:01')]); +SELECT groupArrayIntersect(*) from test_date; +DROP TABLE test_date; diff --git a/tests/queries/0_stateless/02988_ordinary_database_warning.reference b/tests/queries/0_stateless/02988_ordinary_database_warning.reference new file mode 100644 index 00000000000..587579af915 --- /dev/null +++ b/tests/queries/0_stateless/02988_ordinary_database_warning.reference @@ -0,0 +1 @@ +Ok. diff --git a/tests/queries/0_stateless/02988_ordinary_database_warning.sql b/tests/queries/0_stateless/02988_ordinary_database_warning.sql new file mode 100644 index 00000000000..2a9756d6f66 --- /dev/null +++ b/tests/queries/0_stateless/02988_ordinary_database_warning.sql @@ -0,0 +1,10 @@ +DROP DATABASE IF EXISTS 02988_ordinary; + +SET send_logs_level = 'fatal'; +SET allow_deprecated_database_ordinary = 1; +-- Creation of a database with Ordinary engine emits a warning. +CREATE DATABASE 02988_ordinary ENGINE=Ordinary; + +SELECT DISTINCT 'Ok.' FROM system.warnings WHERE message ILIKE '%Ordinary%' and message ILIKE '%deprecated%'; + +DROP DATABASE IF EXISTS 02988_ordinary; diff --git a/tests/queries/0_stateless/02989_replicated_merge_tree_invalid_metadata_version.sql b/tests/queries/0_stateless/02989_replicated_merge_tree_invalid_metadata_version.sql index 3e37f368fd8..15633586aa8 100644 --- a/tests/queries/0_stateless/02989_replicated_merge_tree_invalid_metadata_version.sql +++ b/tests/queries/0_stateless/02989_replicated_merge_tree_invalid_metadata_version.sql @@ -10,7 +10,7 @@ CREATE TABLE test_table_replicated ALTER TABLE test_table_replicated ADD COLUMN insert_time DateTime; SELECT name, version FROM system.zookeeper -WHERE path = '/clickhouse/tables/' || currentDatabase() ||'/test_table_replicated/' +WHERE path = (SELECT zookeeper_path FROM system.replicas WHERE database = currentDatabase() AND table = 'test_table_replicated') AND name = 'metadata' FORMAT Vertical; DROP TABLE IF EXISTS test_table_replicated_second; @@ -26,7 +26,7 @@ DROP TABLE test_table_replicated; SELECT '--'; SELECT name, value FROM system.zookeeper -WHERE path = '/clickhouse/tables/' || currentDatabase() ||'/test_table_replicated/replicas/2_replica' +WHERE path = (SELECT replica_path FROM system.replicas WHERE database = currentDatabase() AND table = 'test_table_replicated_second') AND name = 'metadata_version' FORMAT Vertical; SYSTEM RESTART REPLICA test_table_replicated_second; diff --git a/tests/queries/0_stateless/02989_system_tables_metadata_version.reference b/tests/queries/0_stateless/02989_system_tables_metadata_version.reference new file mode 100644 index 00000000000..73f6a1ad346 --- /dev/null +++ b/tests/queries/0_stateless/02989_system_tables_metadata_version.reference @@ -0,0 +1,9 @@ +test_temporary_table_02989 0 +-- +test_table 0 +-- +test_table_replicated 0 +-- +test_table_replicated 1 +-- +test_table_replicated 2 diff --git a/tests/queries/0_stateless/02989_system_tables_metadata_version.sql b/tests/queries/0_stateless/02989_system_tables_metadata_version.sql new file mode 100644 index 00000000000..9534b1f2e82 --- /dev/null +++ b/tests/queries/0_stateless/02989_system_tables_metadata_version.sql @@ -0,0 +1,50 @@ +-- Tags: zookeeper, no-parallel + +DROP TABLE IF EXISTS test_temporary_table_02989; +CREATE TEMPORARY TABLE test_temporary_table_02989 +( + id UInt64, + value String +) ENGINE=MergeTree ORDER BY id; + +SELECT name, metadata_version FROM system.tables WHERE name = 'test_temporary_table_02989' AND is_temporary; + +DROP TABLE test_temporary_table_02989; + +DROP TABLE IF EXISTS test_table; +CREATE TABLE test_table +( + id UInt64, + value String +) ENGINE=MergeTree ORDER BY id; + +SELECT '--'; + +SELECT name, metadata_version FROM system.tables WHERE database = currentDatabase() AND name = 'test_table'; + +DROP TABLE test_table; + +DROP TABLE IF EXISTS test_table_replicated; +CREATE TABLE test_table_replicated +( + id UInt64, + value String +) ENGINE=ReplicatedMergeTree('/clickhouse/tables/{database}/test_table_replicated', '1_replica') ORDER BY id; + +SELECT '--'; + +SELECT name, metadata_version FROM system.tables WHERE database = currentDatabase() AND name = 'test_table_replicated'; + +ALTER TABLE test_table_replicated ADD COLUMN insert_time DateTime; + +SELECT '--'; + +SELECT name, metadata_version FROM system.tables WHERE database = currentDatabase() AND name = 'test_table_replicated'; + +ALTER TABLE test_table_replicated ADD COLUMN insert_time_updated DateTime; + +SELECT '--'; + +SELECT name, metadata_version FROM system.tables WHERE database = currentDatabase() AND name = 'test_table_replicated'; + +DROP TABLE test_table_replicated; diff --git a/tests/queries/0_stateless/02989_variant_comparison.reference b/tests/queries/0_stateless/02989_variant_comparison.reference new file mode 100644 index 00000000000..df192c6fe8c --- /dev/null +++ b/tests/queries/0_stateless/02989_variant_comparison.reference @@ -0,0 +1,299 @@ +order by v1 nulls first +\N +\N +\N +\N +[1,2,3] +[1,2,3] +[1,2,3] +[1,2,3] +[1,2,3] +[1,2,4] +abc +abc +abc +abc +abc +abd +42 +42 +42 +42 +42 +43 +order by v1 nulls last +[1,2,3] +[1,2,3] +[1,2,3] +[1,2,3] +[1,2,3] +[1,2,4] +abc +abc +abc +abc +abc +abd +42 +42 +42 +42 +42 +43 +\N +\N +\N +\N +order by v2 nulls first +\N +\N +\N +\N +[1,2,3] +[1,2,3] +[1,2,3] +[1,2,3] +[1,2,3] +[1,2,4] +abc +abc +abc +abc +abc +abd +42 +42 +42 +42 +42 +43 +order by v2 nulls last +[1,2,3] +[1,2,3] +[1,2,3] +[1,2,3] +[1,2,3] +[1,2,4] +abc +abc +abc +abc +abc +abd +42 +42 +42 +42 +42 +43 +\N +\N +\N +\N +order by v1, v2 nulls first +[1,2,3] \N +[1,2,3] [1,2,3] +[1,2,3] [1,2,4] +[1,2,3] abc +[1,2,3] 42 +[1,2,4] [1,2,3] +abc \N +abc [1,2,3] +abc abc +abc abd +abc 42 +abd abc +42 \N +42 [1,2,3] +42 abc +42 42 +42 43 +43 42 +\N \N +\N [1,2,3] +\N abc +\N 42 +order by v1, v2 nulls last +[1,2,3] [1,2,3] +[1,2,3] [1,2,4] +[1,2,3] abc +[1,2,3] 42 +[1,2,3] \N +[1,2,4] [1,2,3] +abc [1,2,3] +abc abc +abc abd +abc 42 +abc \N +abd abc +42 [1,2,3] +42 abc +42 42 +42 43 +42 \N +43 42 +\N [1,2,3] +\N abc +\N 42 +\N \N +order by v2, v1 nulls first +\N [1,2,3] +[1,2,3] [1,2,3] +[1,2,4] [1,2,3] +abc [1,2,3] +42 [1,2,3] +[1,2,3] [1,2,4] +\N abc +[1,2,3] abc +abc abc +abd abc +42 abc +abc abd +\N 42 +[1,2,3] 42 +abc 42 +42 42 +43 42 +42 43 +\N \N +[1,2,3] \N +abc \N +42 \N +order by v2, v1 nulls last +[1,2,3] [1,2,3] +[1,2,4] [1,2,3] +abc [1,2,3] +42 [1,2,3] +\N [1,2,3] +[1,2,3] [1,2,4] +[1,2,3] abc +abc abc +abd abc +42 abc +\N abc +abc abd +[1,2,3] 42 +abc 42 +42 42 +43 42 +\N 42 +42 43 +[1,2,3] \N +abc \N +42 \N +\N \N +v1 = v2 +[1,2,3] [1,2,3] 1 +[1,2,3] [1,2,4] 0 +[1,2,3] abc 0 +[1,2,3] 42 0 +[1,2,3] \N 0 +[1,2,4] [1,2,3] 0 +abc [1,2,3] 0 +abc abc 1 +abc abd 0 +abc 42 0 +abc \N 0 +abd abc 0 +42 [1,2,3] 0 +42 abc 0 +42 42 1 +42 43 0 +42 \N 0 +43 42 0 +\N [1,2,3] 0 +\N abc 0 +\N 42 0 +\N \N 1 +v1 < v2 +[1,2,3] [1,2,3] 0 +[1,2,3] [1,2,4] 1 +[1,2,3] abc 1 +[1,2,3] 42 1 +[1,2,3] \N 1 +[1,2,4] [1,2,3] 0 +abc [1,2,3] 0 +abc abc 0 +abc abd 1 +abc 42 1 +abc \N 1 +abd abc 0 +42 [1,2,3] 0 +42 abc 0 +42 42 0 +42 43 1 +42 \N 1 +43 42 0 +\N [1,2,3] 0 +\N abc 0 +\N 42 0 +\N \N 0 +v1 <= v2 +[1,2,3] [1,2,3] 1 +[1,2,3] [1,2,4] 1 +[1,2,3] abc 1 +[1,2,3] 42 1 +[1,2,3] \N 1 +[1,2,4] [1,2,3] 0 +abc [1,2,3] 0 +abc abc 1 +abc abd 1 +abc 42 1 +abc \N 1 +abd abc 0 +42 [1,2,3] 0 +42 abc 0 +42 42 1 +42 43 1 +42 \N 1 +43 42 0 +\N [1,2,3] 0 +\N abc 0 +\N 42 0 +\N \N 1 +v1 > v2 +[1,2,3] [1,2,3] 0 +[1,2,3] [1,2,4] 0 +[1,2,3] abc 0 +[1,2,3] 42 0 +[1,2,3] \N 0 +[1,2,4] [1,2,3] 1 +abc [1,2,3] 1 +abc abc 0 +abc abd 0 +abc 42 0 +abc \N 0 +abd abc 1 +42 [1,2,3] 1 +42 abc 1 +42 42 0 +42 43 0 +42 \N 0 +43 42 1 +\N [1,2,3] 1 +\N abc 1 +\N 42 1 +\N \N 0 +v1 >= v2 +[1,2,3] [1,2,3] 1 +[1,2,3] [1,2,4] 1 +[1,2,3] abc 1 +[1,2,3] 42 1 +[1,2,3] \N 1 +[1,2,4] [1,2,3] 1 +abc [1,2,3] 1 +abc abc 1 +abc abd 1 +abc 42 1 +abc \N 1 +abd abc 1 +42 [1,2,3] 1 +42 abc 1 +42 42 1 +42 43 1 +42 \N 1 +43 42 1 +\N [1,2,3] 1 +\N abc 1 +\N 42 1 +\N \N 1 diff --git a/tests/queries/0_stateless/02989_variant_comparison.sql b/tests/queries/0_stateless/02989_variant_comparison.sql new file mode 100644 index 00000000000..e0dcbc97c27 --- /dev/null +++ b/tests/queries/0_stateless/02989_variant_comparison.sql @@ -0,0 +1,79 @@ +set allow_experimental_variant_type=1; + +create table test (v1 Variant(String, UInt64, Array(UInt32)), v2 Variant(String, UInt64, Array(UInt32))) engine=Memory; + +insert into test values (42, 42); +insert into test values (42, 43); +insert into test values (43, 42); + +insert into test values ('abc', 'abc'); +insert into test values ('abc', 'abd'); +insert into test values ('abd', 'abc'); + +insert into test values ([1,2,3], [1,2,3]); +insert into test values ([1,2,3], [1,2,4]); +insert into test values ([1,2,4], [1,2,3]); + +insert into test values (NULL, NULL); + +insert into test values (42, 'abc'); +insert into test values ('abc', 42); + +insert into test values (42, [1,2,3]); +insert into test values ([1,2,3], 42); + +insert into test values (42, NULL); +insert into test values (NULL, 42); + +insert into test values ('abc', [1,2,3]); +insert into test values ([1,2,3], 'abc'); + +insert into test values ('abc', NULL); +insert into test values (NULL, 'abc'); + +insert into test values ([1,2,3], NULL); +insert into test values (NULL, [1,2,3]); + + +select 'order by v1 nulls first'; +select v1 from test order by v1 nulls first; + +select 'order by v1 nulls last'; +select v1 from test order by v1 nulls last; + +select 'order by v2 nulls first'; +select v2 from test order by v2 nulls first; + +select 'order by v2 nulls last'; +select v2 from test order by v2 nulls last; + + +select 'order by v1, v2 nulls first'; +select * from test order by v1, v2 nulls first; + +select 'order by v1, v2 nulls last'; +select * from test order by v1, v2 nulls last; + +select 'order by v2, v1 nulls first'; +select * from test order by v2, v1 nulls first; + +select 'order by v2, v1 nulls last'; +select * from test order by v2, v1 nulls last; + +select 'v1 = v2'; +select v1, v2, v1 = v2 from test order by v1, v2; + +select 'v1 < v2'; +select v1, v2, v1 < v2 from test order by v1, v2; + +select 'v1 <= v2'; +select v1, v2, v1 <= v2 from test order by v1, v2; + +select 'v1 > v2'; +select v1, v2, v1 > v2 from test order by v1, v2; + +select 'v1 >= v2'; +select v1, v2, v2 >= v2 from test order by v1, v2; + +drop table test; + diff --git a/tests/queries/0_stateless/02990_arrayFold_nullable_lc.reference b/tests/queries/0_stateless/02990_arrayFold_nullable_lc.reference new file mode 100644 index 00000000000..5bd5d7bbd90 --- /dev/null +++ b/tests/queries/0_stateless/02990_arrayFold_nullable_lc.reference @@ -0,0 +1,16 @@ +23 +23 +23 +23 +3 +3 +\N +1 +\N +\N +\N +23 +23 +23 +\N +\N diff --git a/tests/queries/0_stateless/02990_arrayFold_nullable_lc.sql b/tests/queries/0_stateless/02990_arrayFold_nullable_lc.sql new file mode 100644 index 00000000000..280defdfbb4 --- /dev/null +++ b/tests/queries/0_stateless/02990_arrayFold_nullable_lc.sql @@ -0,0 +1,35 @@ +SET allow_suspicious_low_cardinality_types=1; + +SELECT arrayFold((acc, x) -> (acc + (x * 2)), [1, 2, 3, 4], toInt64(3)); +SELECT arrayFold((acc, x) -> (acc + (x * 2)), [1, 2, 3, 4], toInt64(toNullable(3))); +SELECT arrayFold((acc, x) -> (acc + (x * 2)), [1, 2, 3, 4], materialize(toInt64(toNullable(3)))); + +SELECT arrayFold((acc, x) -> (acc + (x * 2)), [1, 2, 3, 4]::Array(Nullable(Int64)), toInt64(3)); -- { serverError TYPE_MISMATCH } +SELECT arrayFold((acc, x) -> (acc + (x * 2)), [1, 2, 3, 4]::Array(Nullable(Int64)), toInt64(toNullable(3))); + +SELECT arrayFold((acc, x) -> (acc + (x * 2)), []::Array(Int64), toInt64(3)); +SELECT arrayFold((acc, x) -> (acc + (x * 2)), []::Array(Nullable(Int64)), toInt64(toNullable(3))); +SELECT arrayFold((acc, x) -> (acc + (x * 2)), []::Array(Nullable(Int64)), toInt64(NULL)); + +SELECT arrayFold((acc, x) -> x, materialize(CAST('[0, 1]', 'Array(Nullable(UInt8))')), toUInt8(toNullable(0))); +SELECT arrayFold((acc, x) -> x, materialize(CAST([NULL], 'Array(Nullable(UInt8))')), toUInt8(toNullable(0))); +SELECT arrayFold((acc, x) -> acc + x, materialize(CAST([NULL], 'Array(Nullable(UInt8))')), toUInt64(toNullable(0))); +SELECT arrayFold((acc, x) -> acc + x, materialize(CAST([1, 2, NULL], 'Array(Nullable(UInt8))')), toUInt64(toNullable(0))); + +SELECT arrayFold((acc, x) -> toNullable(acc + (x * 2)), [1, 2, 3, 4], toInt64(3)); -- { serverError TYPE_MISMATCH } +SELECT arrayFold((acc, x) -> toNullable(acc + (x * 2)), [1, 2, 3, 4], toNullable(toInt64(3))); + +SELECT arrayFold((acc, x) -> (acc + (x * 2)), [1, 2, 3, 4], toLowCardinality(toInt64(3))); -- { serverError TYPE_MISMATCH } +SELECT arrayFold((acc, x) -> toLowCardinality(acc + (x * 2)), [1, 2, 3, 4], toLowCardinality(toInt64(3))); +SELECT arrayFold((acc, x) -> (acc + (x * 2)), [1, 2, 3, 4]::Array(LowCardinality(Int64)), toInt64(toLowCardinality(3))); -- { serverError TYPE_MISMATCH } +SELECT arrayFold((acc, x) -> toLowCardinality(acc + (x * 2)), [1, 2, 3, 4]::Array(LowCardinality(Int64)), toInt64(toLowCardinality(3))); + +SELECT arrayFold((acc, x) -> acc + (x * 2), [1, 2, 3, 4]::Array(Nullable(Int64)), toInt64(toLowCardinality(3))); -- { serverError TYPE_MISMATCH } +SELECT arrayFold((acc, x) -> toLowCardinality(acc + (x * 2)), [1, 2, 3, 4]::Array(Nullable(Int64)), toInt64(toLowCardinality(3))); -- { serverError TYPE_MISMATCH } +SELECT arrayFold((acc, x) -> toLowCardinality(acc + (x * 2)), [1, 2, 3, 4]::Array(Nullable(Int64)), toInt64(toNullable(3))); -- { serverError TYPE_MISMATCH } + +SELECT arrayFold((acc, x) -> (acc + (x * 2)), [1, 2, 3, 4], NULL); +-- It's debatable which one of the following 2 queries should work, but considering the return type must match the +-- accumulator type it makes sense to be the second one +SELECT arrayFold((acc, x) -> (acc + (x * 2)), [1, 2, 3, 4], NULL::LowCardinality(Nullable(Int64))); -- { serverError TYPE_MISMATCH } +SELECT arrayFold((acc, x) -> (acc + (x * 2))::LowCardinality(Nullable(Int64)), [1, 2, 3, 4], NULL::LowCardinality(Nullable(Int64))); diff --git a/tests/queries/0_stateless/02990_format_lambdas.reference b/tests/queries/0_stateless/02990_format_lambdas.reference new file mode 100644 index 00000000000..f898d6ffa0e --- /dev/null +++ b/tests/queries/0_stateless/02990_format_lambdas.reference @@ -0,0 +1,10 @@ +SELECT lambda(1, 1) +SELECT lambda(1, 1) +SELECT x -> 1 +SELECT x -> 1 +SELECT (x, y) -> 1 +SELECT (x, y) -> 1 +SELECT lambda(f(1), 1) +SELECT lambda(f(1), 1) +SELECT lambda(f(x), 1) +SELECT lambda(f(x), 1) diff --git a/tests/queries/0_stateless/02990_format_lambdas.sh b/tests/queries/0_stateless/02990_format_lambdas.sh new file mode 100755 index 00000000000..9dc5e0f0461 --- /dev/null +++ b/tests/queries/0_stateless/02990_format_lambdas.sh @@ -0,0 +1,11 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +QUERY="SELECT lambda(1, 1)"; QUERY2=$(${CLICKHOUSE_FORMAT} --query "$QUERY"); echo "$QUERY2"; QUERY3=$(${CLICKHOUSE_FORMAT} --query "$QUERY2"); echo "$QUERY3"; +QUERY="SELECT lambda(x, 1)"; QUERY2=$(${CLICKHOUSE_FORMAT} --query "$QUERY"); echo "$QUERY2"; QUERY3=$(${CLICKHOUSE_FORMAT} --query "$QUERY2"); echo "$QUERY3"; +QUERY="SELECT lambda((x, y), 1)"; QUERY2=$(${CLICKHOUSE_FORMAT} --query "$QUERY"); echo "$QUERY2"; QUERY3=$(${CLICKHOUSE_FORMAT} --query "$QUERY2"); echo "$QUERY3"; +QUERY="SELECT lambda(f(1), 1)"; QUERY2=$(${CLICKHOUSE_FORMAT} --query "$QUERY"); echo "$QUERY2"; QUERY3=$(${CLICKHOUSE_FORMAT} --query "$QUERY2"); echo "$QUERY3"; +QUERY="SELECT lambda(f(x), 1)"; QUERY2=$(${CLICKHOUSE_FORMAT} --query "$QUERY"); echo "$QUERY2"; QUERY3=$(${CLICKHOUSE_FORMAT} --query "$QUERY2"); echo "$QUERY3"; diff --git a/tests/queries/0_stateless/02990_format_not_precedence.reference b/tests/queries/0_stateless/02990_format_not_precedence.reference new file mode 100644 index 00000000000..f44cf2fdb52 --- /dev/null +++ b/tests/queries/0_stateless/02990_format_not_precedence.reference @@ -0,0 +1,13 @@ +-- { echoOn } +SELECT NOT 0 + NOT 0; +0 +SELECT NOT (0 + (NOT 0)); +0 +SELECT (NOT 0) + (NOT 0); +2 +SELECT formatQuery('SELECT NOT 0 + NOT 0'); +SELECT NOT (0 + (NOT 0)) +SELECT formatQuery('SELECT NOT (0 + (NOT 0))'); +SELECT NOT (0 + (NOT 0)) +SELECT formatQuery('SELECT (NOT 0) + (NOT 0)'); +SELECT (NOT 0) + (NOT 0) diff --git a/tests/queries/0_stateless/02990_format_not_precedence.sql b/tests/queries/0_stateless/02990_format_not_precedence.sql new file mode 100644 index 00000000000..98ef2c9e781 --- /dev/null +++ b/tests/queries/0_stateless/02990_format_not_precedence.sql @@ -0,0 +1,7 @@ +-- { echoOn } +SELECT NOT 0 + NOT 0; +SELECT NOT (0 + (NOT 0)); +SELECT (NOT 0) + (NOT 0); +SELECT formatQuery('SELECT NOT 0 + NOT 0'); +SELECT formatQuery('SELECT NOT (0 + (NOT 0))'); +SELECT formatQuery('SELECT (NOT 0) + (NOT 0)'); diff --git a/tests/queries/0_stateless/02990_format_select_from_explain.reference b/tests/queries/0_stateless/02990_format_select_from_explain.reference new file mode 100644 index 00000000000..7c8dcef3824 --- /dev/null +++ b/tests/queries/0_stateless/02990_format_select_from_explain.reference @@ -0,0 +1,9 @@ +SELECT explain +FROM +( + SELECT * + FROM viewExplain('EXPLAIN AST', '', ( + SELECT * + FROM system.numbers + )) +) diff --git a/tests/queries/0_stateless/02990_format_select_from_explain.sh b/tests/queries/0_stateless/02990_format_select_from_explain.sh new file mode 100755 index 00000000000..4955b733788 --- /dev/null +++ b/tests/queries/0_stateless/02990_format_select_from_explain.sh @@ -0,0 +1,7 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +${CLICKHOUSE_FORMAT} --query "SELECT explain FROM (EXPLAIN AST SELECT * FROM system.numbers)" diff --git a/tests/queries/0_stateless/02990_optimize_uniq_to_count_alias.reference b/tests/queries/0_stateless/02990_optimize_uniq_to_count_alias.reference new file mode 100644 index 00000000000..6ed281c757a --- /dev/null +++ b/tests/queries/0_stateless/02990_optimize_uniq_to_count_alias.reference @@ -0,0 +1,2 @@ +1 +1 diff --git a/tests/queries/0_stateless/02990_optimize_uniq_to_count_alias.sql b/tests/queries/0_stateless/02990_optimize_uniq_to_count_alias.sql new file mode 100644 index 00000000000..5ba0be39991 --- /dev/null +++ b/tests/queries/0_stateless/02990_optimize_uniq_to_count_alias.sql @@ -0,0 +1,37 @@ +--https://github.com/ClickHouse/ClickHouse/issues/59999 +DROP TABLE IF EXISTS tags; +CREATE TABLE tags (dev_tag String) ENGINE = Memory AS SELECT '1'; + +SELECT * +FROM +( + SELECT countDistinct(dev_tag) AS total_devtags + FROM + ( + SELECT dev_tag + FROM + ( + SELECT * + FROM tags + ) AS t + GROUP BY dev_tag + ) AS t +) SETTINGS optimize_uniq_to_count=0; + +SELECT * +FROM +( + SELECT countDistinct(dev_tag) AS total_devtags + FROM + ( + SELECT dev_tag + FROM + ( + SELECT * + FROM tags + ) AS t + GROUP BY dev_tag + ) AS t +) SETTINGS optimize_uniq_to_count=1; + +DROP TABLE IF EXISTS tags; diff --git a/tests/queries/0_stateless/02990_parts_splitter_invalid_ranges.reference b/tests/queries/0_stateless/02990_parts_splitter_invalid_ranges.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02990_parts_splitter_invalid_ranges.sql b/tests/queries/0_stateless/02990_parts_splitter_invalid_ranges.sql new file mode 100644 index 00000000000..e19c23acc2e --- /dev/null +++ b/tests/queries/0_stateless/02990_parts_splitter_invalid_ranges.sql @@ -0,0 +1,31 @@ +DROP TABLE IF EXISTS test_table; +CREATE TABLE test_table +( + `eventType` String, + `timestamp` UInt64, + `key` UInt64 +) +ENGINE = ReplacingMergeTree +PRIMARY KEY (eventType, timestamp) +ORDER BY (eventType, timestamp, key) +SETTINGS index_granularity = 1; + +SYSTEM STOP MERGES test_table; + +INSERT INTO test_table VALUES ('1', 1704472004759, 1), ('3', 1704153600000, 2), ('3', 1704153600000, 3), ('5', 1700161822134, 4); + +INSERT INTO test_table VALUES ('1', 1704468357009, 1), ('3', 1704153600000, 2), ('3', 1704153600000, 3), ('5', 1701458520878, 4); + +INSERT INTO test_table VALUES ('1', 1704470704762, 1), ('3', 1704153600000, 2), ('3', 1704153600000, 3), ('5', 1702609856302, 4); + +SELECT eventType, timestamp, key FROM test_table +WHERE (eventType IN ('2', '4')) AND + ((timestamp >= max2(toInt64('1698938519999'), toUnixTimestamp64Milli(now64() - toIntervalDay(90)))) AND + (timestamp <= (toInt64('1707143315452') - 1))); + +SELECT eventType, timestamp, key FROM test_table FINAL +WHERE (eventType IN ('2', '4')) AND + ((timestamp >= max2(toInt64('1698938519999'), toUnixTimestamp64Milli(now64() - toIntervalDay(90)))) AND + (timestamp <= (toInt64('1707143315452') - 1))); + +DROP TABLE test_table; diff --git a/tests/queries/0_stateless/02990_rmt_replica_path_uuid.reference b/tests/queries/0_stateless/02990_rmt_replica_path_uuid.reference new file mode 100644 index 00000000000..5521c015fcf --- /dev/null +++ b/tests/queries/0_stateless/02990_rmt_replica_path_uuid.reference @@ -0,0 +1,4 @@ +aaaaaaaa-1111-2222-3333-aaaaaaaaaaaa +/tables/default/aaaaaaaa-1111-2222-3333-aaaaaaaaaaaa/replicas/r1 +aaaaaaaa-1111-2222-3333-aaaaaaaaaaaa +/tables/default/aaaaaaaa-1111-2222-3333-aaaaaaaaaaaa/replicas/r1 diff --git a/tests/queries/0_stateless/02990_rmt_replica_path_uuid.sql b/tests/queries/0_stateless/02990_rmt_replica_path_uuid.sql new file mode 100644 index 00000000000..4fcdff2910f --- /dev/null +++ b/tests/queries/0_stateless/02990_rmt_replica_path_uuid.sql @@ -0,0 +1,23 @@ +-- Tags: no-parallel, no-ordinary-database, no-replicated-database +-- Tag no-parallel: static UUID +-- Tag no-ordinary-database: requires UUID +-- Tag no-replicated-database: executes with ON CLUSTER anyway + +-- Ignore "ATTACH TABLE query with full table definition is not recommended" +-- Ignore BAD_ARGUMENTS +SET send_logs_level='fatal'; + +DROP TABLE IF EXISTS x; + +ATTACH TABLE x UUID 'aaaaaaaa-1111-2222-3333-aaaaaaaaaaaa' (key Int) ENGINE = ReplicatedMergeTree('/tables/{database}/{uuid}', 'r1') ORDER BY tuple(); +SELECT uuid FROM system.tables WHERE database = currentDatabase() and table = 'x'; +SELECT replica_path FROM system.replicas WHERE database = currentDatabase() and table = 'x'; +DROP TABLE x; + +-- {uuid} macro forbidden for CREATE TABLE without explicit UUID +CREATE TABLE x (key Int) ENGINE = ReplicatedMergeTree('/tables/{database}/{uuid}', 'r1') ORDER BY tuple(); -- { serverError BAD_ARGUMENTS } + +CREATE TABLE x UUID 'aaaaaaaa-1111-2222-3333-aaaaaaaaaaaa' (key Int) ENGINE = ReplicatedMergeTree('/tables/{database}/{uuid}', 'r1') ORDER BY tuple(); +SELECT uuid FROM system.tables WHERE database = currentDatabase() and table = 'x'; +SELECT replica_path FROM system.replicas WHERE database = currentDatabase() and table = 'x'; +DROP TABLE x; diff --git a/tests/queries/0_stateless/02990_variant_where_cond.reference b/tests/queries/0_stateless/02990_variant_where_cond.reference new file mode 100644 index 00000000000..73b397be12d --- /dev/null +++ b/tests/queries/0_stateless/02990_variant_where_cond.reference @@ -0,0 +1,2 @@ +Hello +42 diff --git a/tests/queries/0_stateless/02990_variant_where_cond.sql b/tests/queries/0_stateless/02990_variant_where_cond.sql new file mode 100644 index 00000000000..8149988f1f2 --- /dev/null +++ b/tests/queries/0_stateless/02990_variant_where_cond.sql @@ -0,0 +1,11 @@ +set allow_experimental_variant_type=1; + +create table test (v Variant(String, UInt64)) engine=Memory; +insert into test values (42), ('Hello'), (NULL); + +select * from test where v = 'Hello'; +select * from test where v = 42; -- {serverError ILLEGAL_TYPE_OF_ARGUMENT} +select * from test where v = 42::UInt64::Variant(String, UInt64); + +drop table test; + diff --git a/tests/queries/0_stateless/02991_count_rewrite_analyzer.reference b/tests/queries/0_stateless/02991_count_rewrite_analyzer.reference new file mode 100644 index 00000000000..ccb266fc2b5 --- /dev/null +++ b/tests/queries/0_stateless/02991_count_rewrite_analyzer.reference @@ -0,0 +1,4 @@ +Nullable(UInt64) +UInt64 +Nullable(UInt64) +UInt64 diff --git a/tests/queries/0_stateless/02991_count_rewrite_analyzer.sql b/tests/queries/0_stateless/02991_count_rewrite_analyzer.sql new file mode 100644 index 00000000000..b11aeedd225 --- /dev/null +++ b/tests/queries/0_stateless/02991_count_rewrite_analyzer.sql @@ -0,0 +1,7 @@ +-- Regression test for https://github.com/ClickHouse/ClickHouse/issues/59919 +SET allow_experimental_analyzer=1; + +SELECT toTypeName(sum(toNullable('a') IN toNullable('a'))) AS x; +SELECT toTypeName(count(toNullable('a') IN toNullable('a'))) AS x; +SELECT toTypeName(sum(toFixedString('a', toLowCardinality(toNullable(1))) IN toFixedString('a', 1))) AS x; +SELECT toTypeName(count(toFixedString('a', toLowCardinality(toNullable(1))) IN toFixedString('a', 1))) AS x; diff --git a/tests/queries/0_stateless/02992_all_columns_should_have_comment.reference b/tests/queries/0_stateless/02992_all_columns_should_have_comment.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02992_all_columns_should_have_comment.sql b/tests/queries/0_stateless/02992_all_columns_should_have_comment.sql new file mode 100644 index 00000000000..b984c12e285 --- /dev/null +++ b/tests/queries/0_stateless/02992_all_columns_should_have_comment.sql @@ -0,0 +1,4 @@ +SYSTEM FLUSH LOGS; +SELECT 'Column ' || name || ' from table ' || concat(database, '.', table) || ' should have a comment' +FROM system.columns +WHERE (database = 'system') AND (comment = '') AND (table NOT ILIKE '%_log_%') AND (table NOT IN ('numbers', 'numbers_mt', 'one')) AND (default_kind != 'ALIAS'); diff --git a/tests/queries/0_stateless/02992_settings_overflow.reference b/tests/queries/0_stateless/02992_settings_overflow.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02992_settings_overflow.sql b/tests/queries/0_stateless/02992_settings_overflow.sql new file mode 100644 index 00000000000..d120c3400e5 --- /dev/null +++ b/tests/queries/0_stateless/02992_settings_overflow.sql @@ -0,0 +1 @@ +SET max_threads = -1; -- { serverError CANNOT_CONVERT_TYPE } diff --git a/tests/queries/0_stateless/02993_lazy_index_loading.reference b/tests/queries/0_stateless/02993_lazy_index_loading.reference new file mode 100644 index 00000000000..5bc329ae4eb --- /dev/null +++ b/tests/queries/0_stateless/02993_lazy_index_loading.reference @@ -0,0 +1,4 @@ +100000000 140000000 +0 0 +1 +100000000 100000000 diff --git a/tests/queries/0_stateless/02993_lazy_index_loading.sql b/tests/queries/0_stateless/02993_lazy_index_loading.sql new file mode 100644 index 00000000000..7de4af9ef0e --- /dev/null +++ b/tests/queries/0_stateless/02993_lazy_index_loading.sql @@ -0,0 +1,18 @@ +DROP TABLE IF EXISTS test; +CREATE TABLE test (s String) ENGINE = MergeTree ORDER BY s SETTINGS index_granularity = 1; + +INSERT INTO test SELECT randomString(1000) FROM numbers(100000); +SELECT round(primary_key_bytes_in_memory, -7), round(primary_key_bytes_in_memory_allocated, -7) FROM system.parts WHERE database = currentDatabase() AND table = 'test'; + +DETACH TABLE test; +SET max_memory_usage = '50M'; +ATTACH TABLE test; + +SELECT primary_key_bytes_in_memory, primary_key_bytes_in_memory_allocated FROM system.parts WHERE database = currentDatabase() AND table = 'test'; + +SET max_memory_usage = '200M'; +SELECT s != '' FROM test LIMIT 1; + +SELECT round(primary_key_bytes_in_memory, -7), round(primary_key_bytes_in_memory_allocated, -7) FROM system.parts WHERE database = currentDatabase() AND table = 'test'; + +DROP TABLE test; diff --git a/tests/queries/0_stateless/02993_values_escape_quote.reference b/tests/queries/0_stateless/02993_values_escape_quote.reference new file mode 100644 index 00000000000..29d6a133fec --- /dev/null +++ b/tests/queries/0_stateless/02993_values_escape_quote.reference @@ -0,0 +1,3 @@ +('foo')('foo\'bar')('foo\'\'bar') +output_format_values_escape_quote_with_quote=1 +('foo')('foo''bar')('foo''''bar') diff --git a/tests/queries/0_stateless/02993_values_escape_quote.sql b/tests/queries/0_stateless/02993_values_escape_quote.sql new file mode 100644 index 00000000000..e6fc5f1b280 --- /dev/null +++ b/tests/queries/0_stateless/02993_values_escape_quote.sql @@ -0,0 +1,12 @@ +select 'foo' format Values; +select 'foo\'bar' format Values; +select 'foo\'\'bar' format Values; + +select '\noutput_format_values_escape_quote_with_quote=1' format LineAsString; +set output_format_values_escape_quote_with_quote=1; + +select 'foo' format Values; +select 'foo\'bar' format Values; +select 'foo\'\'bar' format Values; +-- fix no newline at end of file +select '' format LineAsString; diff --git a/tests/queries/0_stateless/02994_cosineDistanceNullable.reference b/tests/queries/0_stateless/02994_cosineDistanceNullable.reference new file mode 100644 index 00000000000..e4fe1f97e7e --- /dev/null +++ b/tests/queries/0_stateless/02994_cosineDistanceNullable.reference @@ -0,0 +1,11 @@ +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N diff --git a/tests/queries/0_stateless/02994_cosineDistanceNullable.sql b/tests/queries/0_stateless/02994_cosineDistanceNullable.sql new file mode 100644 index 00000000000..a62216982f3 --- /dev/null +++ b/tests/queries/0_stateless/02994_cosineDistanceNullable.sql @@ -0,0 +1,3 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/59596 +SELECT cosineDistance((1, 1), (toNullable(0.5), 0.1)); +SELECT cosineDistance((1, 1), (toNullable(0.5), 0.1)) from numbers(10); diff --git a/tests/queries/0_stateless/02994_inconsistent_formatting.reference b/tests/queries/0_stateless/02994_inconsistent_formatting.reference new file mode 100644 index 00000000000..b9db04e880c --- /dev/null +++ b/tests/queries/0_stateless/02994_inconsistent_formatting.reference @@ -0,0 +1,4 @@ +1 +2 +3 +(1) \ No newline at end of file diff --git a/tests/queries/0_stateless/02994_inconsistent_formatting.sql b/tests/queries/0_stateless/02994_inconsistent_formatting.sql new file mode 100644 index 00000000000..f22f81513f6 --- /dev/null +++ b/tests/queries/0_stateless/02994_inconsistent_formatting.sql @@ -0,0 +1,10 @@ +CREATE TEMPORARY TABLE table (x UInt8); +INSERT INTO `table` FORMAT Values (1); +INSERT INTO TABLE `table` FORMAT Values (2); +INSERT INTO TABLE table FORMAT Values (3); +SELECT * FROM table ORDER BY x; +DROP TABLE table; + +CREATE TEMPORARY TABLE FORMAT (x UInt8); +INSERT INTO table FORMAT Values (1); +SELECT * FROM FORMAT FORMAT Values; diff --git a/tests/queries/0_stateless/02994_libarchive_compression.reference b/tests/queries/0_stateless/02994_libarchive_compression.reference new file mode 100644 index 00000000000..4e7159c4842 --- /dev/null +++ b/tests/queries/0_stateless/02994_libarchive_compression.reference @@ -0,0 +1,14 @@ +1 +4 +1 +4 +1 +4 +1 +4 +1 +4 +1 +4 +Unrecognized archive format +Unrecognized archive format diff --git a/tests/queries/0_stateless/02994_libarchive_compression.sh b/tests/queries/0_stateless/02994_libarchive_compression.sh new file mode 100755 index 00000000000..056647d2c7a --- /dev/null +++ b/tests/queries/0_stateless/02994_libarchive_compression.sh @@ -0,0 +1,43 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +EPOCHTIME=$(date "+%s") + +# Test tar format +$CLICKHOUSE_CLIENT --query "INSERT INTO TABLE FUNCTION file('02994_file_$EPOCHTIME/test.tar', 'TabSeparatedRaw', 'column1 String') VALUES (char(116,101,115,116,46,99,115,118,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,48,48,48,48,54,54,52,0,48,48,48,49,55,53,48,0,48,48,48,49,55,53,48,0,48,48,48,48,48,48,48,48,48,49,52,0,49,52,53,54,53,53,50,50,54,49,55,0,48,49,50,50,49,50,0,32,48,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,117,115,116,97,114,32,32,0,117,98,117,110,116,117,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,117,98,117,110,116,117,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,49,44,50,44,51,10,52,44,53,44,54,10,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0));" +$CLICKHOUSE_CLIENT --query "SELECT c1 FROM file('02994_file_$EPOCHTIME/test.tar :: test.csv', CSV);" + + +# Test tar.xz format +$CLICKHOUSE_CLIENT --query "INSERT INTO TABLE FUNCTION file('02994_file_$EPOCHTIME/test.tar.xz', 'TabSeparatedRaw', 'column1 String') VALUES (char(66,90,104,57,49,65,89,38,83,89,108,215,67,197,0,0,78,123,144,210,16,64,128,64,5,127,128,0,64,107,33,159,0,4,0,2,8,32,0,116,34,122,36,122,153,26,13,3,64,105,160,148,130,122,129,160,50,0,3,109,59,133,95,33,76,224,38,15,14,85,232,12,73,69,196,36,202,7,43,99,38,175,117,168,43,2,37,44,198,225,8,84,168,207,149,250,54,189,175,113,182,18,176,221,17,144,10,26,106,176,122,60,177,140,206,103,202,75,191,1,0,142,156,253,249,94,202,93,68,193,140,252,93,201,20,225,66,65,179,93,15,20));" +$CLICKHOUSE_CLIENT --query "SELECT c1 FROM file('02994_file_$EPOCHTIME/test.tar.xz :: test.csv', CSV);" + +# Test tar.gz format +$CLICKHOUSE_CLIENT --query "INSERT INTO TABLE FUNCTION file('02994_file_$EPOCHTIME/test.tar.gz', 'TabSeparatedRaw', 'column1 String') VALUES (char(31,139,8,0,230,112,212,101,0,3,237,207,193,10,131,48,12,6,96,207,62,69,30,160,140,164,77,235,243,148,178,194,46,10,166,219,243,91,45,122,146,221,220,16,242,93,126,2,33,252,41,79,41,143,36,159,238,66,136,24,152,97,205,33,248,45,209,182,185,33,6,98,31,60,58,75,132,80,103,55,112,7,120,101,169,221,91,74,156,107,21,137,99,153,227,152,94,146,166,179,189,186,150,243,151,59,237,19,56,242,38,200,88,227,122,54,222,132,254,223,93,148,82,74,253,206,2,148,165,35,158,0,8,0,0));" +$CLICKHOUSE_CLIENT --query "SELECT c1 FROM file('02994_file_$EPOCHTIME/test.tar.gz :: test.csv', CSV);" + + +# Test tar.bz2 format +$CLICKHOUSE_CLIENT --query "INSERT INTO TABLE FUNCTION file('02994_file_$EPOCHTIME/test.tar.bz2', 'TabSeparatedRaw', 'column1 String') VALUES (char(66,90,104,57,49,65,89,38,83,89,108,215,67,197,0,0,78,123,144,210,16,64,128,64,5,127,128,0,64,107,33,159,0,4,0,2,8,32,0,116,34,122,36,122,153,26,13,3,64,105,160,148,130,122,129,160,50,0,3,109,59,133,95,33,76,224,38,15,14,85,232,12,73,69,196,36,202,7,43,99,38,175,117,168,43,2,37,44,198,225,8,84,168,207,149,250,54,189,175,113,182,18,176,221,17,144,10,26,106,176,122,60,177,140,206,103,202,75,191,1,0,142,156,253,249,94,202,93,68,193,140,252,93,201,20,225,66,65,179,93,15,20));" +$CLICKHOUSE_CLIENT --query "SELECT c1 FROM file('02994_file_$EPOCHTIME/test.tar.bz2 :: test.csv', CSV);" + + +# Test 7z format +$CLICKHOUSE_CLIENT --query "INSERT INTO TABLE FUNCTION file('02994_file_$EPOCHTIME/test.7z', 'TabSeparatedRaw', 'column1 String') VALUES (char(55,122,188,175,39,28,0,4,248,166,182,96,16,0,0,0,0,0,0,0,90,0,0,0,0,0,0,0,3,235,205,15,1,0,11,49,44,50,44,51,10,52,44,53,44,54,10,0,1,4,6,0,1,9,16,0,7,11,1,0,1,33,33,1,0,12,12,0,8,10,1,180,193,159,219,0,0,5,1,25,12,0,0,0,0,0,0,0,0,0,0,0,0,17,19,0,116,0,101,0,115,0,116,0,46,0,99,0,115,0,118,0,0,0,25,0,20,10,1,0,188,83,33,33,187,99,218,1,21,6,1,0,32,128,164,129,0,0));" +$CLICKHOUSE_CLIENT --query "SELECT c1 FROM file('02994_file_$EPOCHTIME/test.7z :: test.csv', CSV);" + + +# Test zip format +$CLICKHOUSE_CLIENT --query "INSERT INTO TABLE FUNCTION file('02994_file_$EPOCHTIME/test.zip', 'TabSeparatedRaw', 'column1 String') VALUES (char(80,75,3,4,10,0,0,0,0,0,80,129,84,88,180,193,159,219,12,0,0,0,12,0,0,0,8,0,28,0,116,101,115,116,46,99,115,118,85,84,9,0,3,72,52,212,101,74,52,212,101,117,120,11,0,1,4,245,1,0,0,4,20,0,0,0,49,44,50,44,51,10,52,44,53,44,54,10,80,75,1,2,30,3,10,0,0,0,0,0,80,129,84,88,180,193,159,219,12,0,0,0,12,0,0,0,8,0,24,0,0,0,0,0,1,0,0,0,164,129,0,0,0,0,116,101,115,116,46,99,115,118,85,84,5,0,3,72,52,212,101,117,120,11,0,1,4,245,1,0,0,4,20,0,0,0,80,75,5,6,0,0,0,0,1,0,1,0,78,0,0,0,78,0,0,0,0,0));" +$CLICKHOUSE_CLIENT --query "SELECT c1 FROM file('02994_file_$EPOCHTIME/test.zip :: test.csv', CSV);" + +# Test rar file with 7z extension - this should fail +$CLICKHOUSE_CLIENT --query "INSERT INTO TABLE FUNCTION file('02994_file_$EPOCHTIME/test.rar.7z', 'TabSeparatedRaw', 'column1 String') VALUES (char(82,97,114,33,26,7,1,0,51,146,181,229,10,1,5,6,0,5,1,1,128,128,0,235,180,125,92,36,2,3,11,140,0,4,140,0,32,18,67,124,209,128,0,0,8,116,101,115,116,46,99,115,118,10,3,2,18,154,60,112,217,99,218,1,49,44,50,44,51,13,10,52,44,53,44,54,29,119,86,81,3,5,4,0));" +$CLICKHOUSE_CLIENT --query "SELECT c1 FROM file('02994_file_$EPOCHTIME/test.rar.7z :: test.csv', CSV);" 2>&1 | grep -o "Unrecognized archive format" + +# If the code reach here - return 0 otherwise we might confuse the test result with grep return value. +exit 0 \ No newline at end of file diff --git a/tests/queries/0_stateless/02994_sanity_check_settings.reference b/tests/queries/0_stateless/02994_sanity_check_settings.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02994_sanity_check_settings.sql b/tests/queries/0_stateless/02994_sanity_check_settings.sql new file mode 100644 index 00000000000..073cd9749e6 --- /dev/null +++ b/tests/queries/0_stateless/02994_sanity_check_settings.sql @@ -0,0 +1,24 @@ +CREATE TABLE data_02052_1_wide0__fuzz_48 +( + `key` Nullable(Int64), + `value` UInt8 +) + ENGINE = MergeTree + ORDER BY key + SETTINGS min_bytes_for_wide_part = 0, allow_nullable_key = 1 AS +SELECT + number, + repeat(toString(number), 5) +FROM numbers(1); + +SELECT * APPLY max +FROM data_02052_1_wide0__fuzz_48 +GROUP BY toFixedString(toFixedString(toFixedString(toFixedString(toFixedString(toLowCardinality('UInt256'), toFixedString(toNullable(toNullable(2)), toFixedString(toFixedString(7), 7)), 7), 7), materialize(toNullable(7))), 7), materialize(7)) +WITH CUBE + SETTINGS max_read_buffer_size = 7, max_threads = 9223372036854775807; -- { serverError INVALID_SETTING_VALUE } + +SELECT zero + 1 AS x +FROM system.zeros + SETTINGS max_block_size = 9223372036854775806, max_rows_to_read = 20, read_overflow_mode = 'break'; -- { serverError INVALID_SETTING_VALUE } + +EXPLAIN PIPELINE SELECT zero + 1 AS x FROM system.zeros SETTINGS max_block_size = 9223372036854775806, max_rows_to_read = 20, read_overflow_mode = 'break'; -- { serverError INVALID_SETTING_VALUE } diff --git a/tests/queries/0_stateless/02995_bad_formatting_union_intersect.reference b/tests/queries/0_stateless/02995_bad_formatting_union_intersect.reference new file mode 100644 index 00000000000..d00491fd7e5 --- /dev/null +++ b/tests/queries/0_stateless/02995_bad_formatting_union_intersect.reference @@ -0,0 +1 @@ +1 diff --git a/tests/queries/0_stateless/02995_bad_formatting_union_intersect.sql b/tests/queries/0_stateless/02995_bad_formatting_union_intersect.sql new file mode 100644 index 00000000000..227f407fc5c --- /dev/null +++ b/tests/queries/0_stateless/02995_bad_formatting_union_intersect.sql @@ -0,0 +1,2 @@ +create temporary table t1 engine=MergeTree() order by c as ( select 1 as c intersect (select 1 as c union all select 2 as c ) ); +SELECT * FROM t1; diff --git a/tests/queries/0_stateless/02995_baseline_23_12_1.tsv b/tests/queries/0_stateless/02995_baseline_23_12_1.tsv new file mode 100644 index 00000000000..4c0c9125b46 --- /dev/null +++ b/tests/queries/0_stateless/02995_baseline_23_12_1.tsv @@ -0,0 +1,940 @@ +add_http_cors_header 0 +additional_result_filter +additional_table_filters {} +aggregate_functions_null_for_empty 0 +aggregation_in_order_max_block_bytes 50000000 +aggregation_memory_efficient_merge_threads 0 +allow_aggregate_partitions_independently 0 +allow_asynchronous_read_from_io_pool_for_merge_tree 0 +allow_changing_replica_until_first_data_packet 0 +allow_create_index_without_type 0 +allow_custom_error_code_in_throwif 0 +allow_ddl 1 +allow_deprecated_database_ordinary 0 +allow_deprecated_syntax_for_merge_tree 0 +allow_distributed_ddl 1 +allow_drop_detached 0 +allow_execute_multiif_columnar 1 +allow_experimental_alter_materialized_view_structure 1 +allow_experimental_analyzer 0 +allow_experimental_annoy_index 0 +allow_experimental_bigint_types 1 +allow_experimental_codecs 0 +allow_experimental_database_atomic 1 +allow_experimental_database_materialized_mysql 0 +allow_experimental_database_materialized_postgresql 0 +allow_experimental_database_replicated 0 +allow_experimental_funnel_functions 0 +allow_experimental_geo_types 1 +allow_experimental_hash_functions 0 +allow_experimental_inverted_index 0 +allow_experimental_lightweight_delete 1 +allow_experimental_live_view 0 +allow_experimental_map_type 1 +allow_experimental_materialized_postgresql_table 0 +allow_experimental_nlp_functions 0 +allow_experimental_object_type 0 +allow_experimental_parallel_reading_from_replicas 0 +allow_experimental_projection_optimization 1 +allow_experimental_query_cache 1 +allow_experimental_query_deduplication 0 +allow_experimental_refreshable_materialized_view 0 +allow_experimental_s3queue 1 +allow_experimental_shared_merge_tree 0 +allow_experimental_statistic 0 +allow_experimental_undrop_table_query 1 +allow_experimental_usearch_index 0 +allow_experimental_window_functions 1 +allow_experimental_window_view 0 +allow_hyperscan 1 +allow_introspection_functions 0 +allow_named_collection_override_by_default 1 +allow_non_metadata_alters 1 +allow_nonconst_timezone_arguments 0 +allow_nondeterministic_mutations 0 +allow_nondeterministic_optimize_skip_unused_shards 0 +allow_prefetched_read_pool_for_local_filesystem 0 +allow_prefetched_read_pool_for_remote_filesystem 1 +allow_push_predicate_when_subquery_contains_with 1 +allow_settings_after_format_in_insert 0 +allow_simdjson 1 +allow_statistic_optimize 0 +allow_suspicious_codecs 0 +allow_suspicious_fixed_string_types 0 +allow_suspicious_indices 0 +allow_suspicious_low_cardinality_types 0 +allow_suspicious_ttl_expressions 0 +allow_unrestricted_reads_from_keeper 0 +alter_move_to_space_execute_async 0 +alter_partition_verbose_result 0 +alter_sync 1 +analyze_index_with_space_filling_curves 1 +annoy_index_search_k_nodes -1 +any_join_distinct_right_table_keys 0 +apply_deleted_mask 1 +apply_mutations_on_fly 0 +asterisk_include_alias_columns 0 +asterisk_include_materialized_columns 0 +async_insert 0 +async_insert_busy_timeout_ms 200 +async_insert_cleanup_timeout_ms 1000 +async_insert_deduplicate 0 +async_insert_max_data_size 1000000 +async_insert_max_query_number 450 +async_insert_stale_timeout_ms 0 +async_insert_threads 16 +async_query_sending_for_remote 1 +async_socket_for_remote 1 +azure_create_new_file_on_insert 0 +azure_list_object_keys_size 1000 +azure_max_single_part_upload_size 104857600 +azure_max_single_read_retries 4 +azure_truncate_on_insert 0 +background_buffer_flush_schedule_pool_size 16 +background_common_pool_size 8 +background_distributed_schedule_pool_size 16 +background_fetches_pool_size 8 +background_merges_mutations_concurrency_ratio 2 +background_message_broker_schedule_pool_size 16 +background_move_pool_size 8 +background_pool_size 16 +background_schedule_pool_size 128 +backup_restore_batch_size_for_keeper_multi 1000 +backup_restore_batch_size_for_keeper_multiread 10000 +backup_restore_keeper_fault_injection_probability 0 +backup_restore_keeper_fault_injection_seed 0 +backup_restore_keeper_max_retries 20 +backup_restore_keeper_retry_initial_backoff_ms 100 +backup_restore_keeper_retry_max_backoff_ms 5000 +backup_restore_keeper_value_max_size 1048576 +backup_threads 16 +bool_false_representation false +bool_true_representation true +cache_warmer_threads 4 +calculate_text_stack_trace 1 +cancel_http_readonly_queries_on_client_close 0 +cast_ipv4_ipv6_default_on_conversion_error 0 +cast_keep_nullable 0 +check_query_single_value_result 1 +check_referential_table_dependencies 0 +check_table_dependencies 1 +checksum_on_read 1 +cloud_mode 0 +cloud_mode_engine 1 +cluster_for_parallel_replicas +collect_hash_table_stats_during_aggregation 1 +column_names_for_schema_inference +compatibility +compatibility_ignore_auto_increment_in_create_table 0 +compatibility_ignore_collation_in_create_table 1 +compile_aggregate_expressions 1 +compile_expressions 0 +compile_sort_description 1 +connect_timeout 10 +connect_timeout_with_failover_ms 1000 +connect_timeout_with_failover_secure_ms 1000 +connection_pool_max_wait_ms 0 +connections_with_failover_max_tries 3 +convert_query_to_cnf 0 +count_distinct_implementation uniqExact +count_distinct_optimization 0 +create_index_ignore_unique 0 +create_replicated_merge_tree_fault_injection_probability 0 +create_table_empty_primary_key_by_default 0 +cross_to_inner_join_rewrite 1 +data_type_default_nullable 0 +database_atomic_wait_for_drop_and_detach_synchronously 0 +database_replicated_allow_only_replicated_engine 0 +database_replicated_allow_replicated_engine_arguments 1 +database_replicated_always_detach_permanently 0 +database_replicated_ddl_output 1 +database_replicated_enforce_synchronous_settings 0 +database_replicated_initial_query_timeout_sec 300 +date_time_input_format basic +date_time_output_format simple +date_time_overflow_behavior ignore +decimal_check_overflow 1 +deduplicate_blocks_in_dependent_materialized_views 0 +default_database_engine Atomic +default_max_bytes_in_join 1000000000 +default_table_engine None +default_temporary_table_engine Memory +describe_compact_output 0 +describe_extend_object_types 0 +describe_include_subcolumns 0 +describe_include_virtual_columns 0 +dialect clickhouse +dictionary_use_async_executor 0 +distinct_overflow_mode throw +distributed_aggregation_memory_efficient 1 +distributed_background_insert_batch 0 +distributed_background_insert_max_sleep_time_ms 30000 +distributed_background_insert_sleep_time_ms 100 +distributed_background_insert_split_batch_on_failure 0 +distributed_background_insert_timeout 0 +distributed_connections_pool_size 1024 +distributed_ddl_entry_format_version 5 +distributed_ddl_output_mode throw +distributed_ddl_task_timeout 180 +distributed_directory_monitor_batch_inserts 0 +distributed_directory_monitor_max_sleep_time_ms 30000 +distributed_directory_monitor_sleep_time_ms 100 +distributed_directory_monitor_split_batch_on_failure 0 +distributed_foreground_insert 0 +distributed_group_by_no_merge 0 +distributed_product_mode deny +distributed_push_down_limit 1 +distributed_replica_error_cap 1000 +distributed_replica_error_half_life 60 +distributed_replica_max_ignored_errors 0 +do_not_merge_across_partitions_select_final 0 +drain_timeout 3 +empty_result_for_aggregation_by_constant_keys_on_empty_set 1 +empty_result_for_aggregation_by_empty_set 0 +enable_debug_queries 0 +enable_deflate_qpl_codec 0 +enable_early_constant_folding 1 +enable_extended_results_for_datetime_functions 0 +enable_filesystem_cache 1 +enable_filesystem_cache_log 0 +enable_filesystem_cache_on_write_operations 0 +enable_filesystem_read_prefetches_log 0 +enable_global_with_statement 1 +enable_http_compression 0 +enable_job_stack_trace 0 +enable_lightweight_delete 1 +enable_memory_bound_merging_of_aggregation_results 1 +enable_multiple_prewhere_read_steps 1 +enable_optimize_predicate_expression 1 +enable_optimize_predicate_expression_to_final_subquery 1 +enable_order_by_all 1 +enable_positional_arguments 1 +enable_reads_from_query_cache 1 +enable_s3_requests_logging 0 +enable_scalar_subquery_optimization 1 +enable_sharing_sets_for_mutations 1 +enable_software_prefetch_in_aggregation 1 +enable_unaligned_array_join 0 +enable_url_encoding 1 +enable_writes_to_query_cache 1 +engine_file_allow_create_multiple_files 0 +engine_file_empty_if_not_exists 0 +engine_file_skip_empty_files 0 +engine_file_truncate_on_insert 0 +engine_url_skip_empty_files 0 +errors_output_format CSV +exact_rows_before_limit 0 +except_default_mode ALL +external_storage_connect_timeout_sec 10 +external_storage_max_read_bytes 0 +external_storage_max_read_rows 0 +external_storage_rw_timeout_sec 300 +external_table_functions_use_nulls 1 +external_table_strict_query 0 +extract_kvp_max_pairs_per_row 1000 +extremes 0 +fallback_to_stale_replicas_for_distributed_queries 1 +filesystem_cache_max_download_size 137438953472 +filesystem_cache_segments_batch_size 20 +filesystem_prefetch_max_memory_usage 1073741824 +filesystem_prefetch_min_bytes_for_single_read_task 2097152 +filesystem_prefetch_step_bytes 0 +filesystem_prefetch_step_marks 0 +filesystem_prefetches_limit 200 +final 0 +flatten_nested 1 +force_aggregate_partitions_independently 0 +force_aggregation_in_order 0 +force_data_skipping_indices +force_grouping_standard_compatibility 1 +force_index_by_date 0 +force_optimize_projection 0 +force_optimize_projection_name +force_optimize_skip_unused_shards 0 +force_optimize_skip_unused_shards_nesting 0 +force_primary_key 0 +force_remove_data_recursively_on_drop 0 +format_avro_schema_registry_url +format_binary_max_array_size 1073741824 +format_binary_max_string_size 1073741824 +format_capn_proto_enum_comparising_mode by_values +format_capn_proto_use_autogenerated_schema 1 +format_csv_allow_double_quotes 1 +format_csv_allow_single_quotes 0 +format_csv_delimiter , +format_csv_null_representation \\N +format_custom_escaping_rule Escaped +format_custom_field_delimiter \t +format_custom_result_after_delimiter +format_custom_result_before_delimiter +format_custom_row_after_delimiter \n +format_custom_row_before_delimiter +format_custom_row_between_delimiter +format_display_secrets_in_show_and_select 0 +format_json_object_each_row_column_for_object_name +format_protobuf_use_autogenerated_schema 1 +format_regexp +format_regexp_escaping_rule Raw +format_regexp_skip_unmatched 0 +format_schema +format_template_resultset +format_template_row +format_template_rows_between_delimiter \n +format_tsv_null_representation \\N +formatdatetime_f_prints_single_zero 0 +formatdatetime_format_without_leading_zeros 0 +formatdatetime_parsedatetime_m_is_month_name 1 +fsync_metadata 1 +function_implementation +function_json_value_return_type_allow_complex 0 +function_json_value_return_type_allow_nullable 0 +function_range_max_elements_in_block 500000000 +function_sleep_max_microseconds_per_block 3000000 +glob_expansion_max_elements 1000 +grace_hash_join_initial_buckets 1 +grace_hash_join_max_buckets 1024 +group_by_overflow_mode throw +group_by_two_level_threshold 100000 +group_by_two_level_threshold_bytes 50000000 +group_by_use_nulls 0 +handle_kafka_error_mode default +handshake_timeout_ms 10000 +hdfs_create_new_file_on_insert 0 +hdfs_replication 0 +hdfs_skip_empty_files 0 +hdfs_truncate_on_insert 0 +hedged_connection_timeout_ms 50 +hsts_max_age 0 +http_connection_timeout 1 +http_headers_progress_interval_ms 100 +http_make_head_request 1 +http_max_chunk_size 107374182400 +http_max_field_name_size 131072 +http_max_field_value_size 131072 +http_max_fields 1000000 +http_max_multipart_form_data_size 1073741824 +http_max_request_param_data_size 10485760 +http_max_tries 10 +http_max_uri_size 1048576 +http_native_compression_disable_checksumming_on_decompress 0 +http_receive_timeout 30 +http_response_buffer_size 0 +http_retry_initial_backoff_ms 100 +http_retry_max_backoff_ms 10000 +http_send_timeout 30 +http_skip_not_found_url_for_globs 1 +http_wait_end_of_query 0 +http_write_exception_in_output_format 1 +http_zlib_compression_level 3 +idle_connection_timeout 3600 +ignore_cold_parts_seconds 0 +ignore_data_skipping_indices +ignore_on_cluster_for_replicated_access_entities_queries 0 +ignore_on_cluster_for_replicated_udf_queries 0 +implicit_transaction 0 +input_format_allow_errors_num 0 +input_format_allow_errors_ratio 0 +input_format_allow_seeks 1 +input_format_arrow_allow_missing_columns 1 +input_format_arrow_case_insensitive_column_matching 0 +input_format_arrow_import_nested 0 +input_format_arrow_skip_columns_with_unsupported_types_in_schema_inference 0 +input_format_avro_allow_missing_fields 0 +input_format_avro_null_as_default 0 +input_format_bson_skip_fields_with_unsupported_types_in_schema_inference 0 +input_format_capn_proto_skip_fields_with_unsupported_types_in_schema_inference 0 +input_format_csv_allow_cr_end_of_line 0 +input_format_csv_allow_variable_number_of_columns 0 +input_format_csv_allow_whitespace_or_tab_as_delimiter 0 +input_format_csv_arrays_as_nested_csv 0 +input_format_csv_detect_header 1 +input_format_csv_empty_as_default 1 +input_format_csv_enum_as_number 0 +input_format_csv_skip_first_lines 0 +input_format_csv_skip_trailing_empty_lines 0 +input_format_csv_trim_whitespaces 1 +input_format_csv_try_infer_numbers_from_strings 0 +input_format_csv_use_best_effort_in_schema_inference 1 +input_format_csv_use_default_on_bad_values 0 +input_format_custom_allow_variable_number_of_columns 0 +input_format_custom_detect_header 1 +input_format_custom_skip_trailing_empty_lines 0 +input_format_defaults_for_omitted_fields 1 +input_format_hive_text_collection_items_delimiter  +input_format_hive_text_fields_delimiter  +input_format_hive_text_map_keys_delimiter  +input_format_import_nested_json 0 +input_format_ipv4_default_on_conversion_error 0 +input_format_ipv6_default_on_conversion_error 0 +input_format_json_compact_allow_variable_number_of_columns 0 +input_format_json_defaults_for_missing_elements_in_named_tuple 1 +input_format_json_ignore_unknown_keys_in_named_tuple 1 +input_format_json_infer_incomplete_types_as_strings 1 +input_format_json_named_tuples_as_objects 1 +input_format_json_read_arrays_as_strings 1 +input_format_json_read_bools_as_numbers 1 +input_format_json_read_numbers_as_strings 1 +input_format_json_read_objects_as_strings 1 +input_format_json_try_infer_named_tuples_from_objects 1 +input_format_json_try_infer_numbers_from_strings 0 +input_format_json_validate_types_from_metadata 1 +input_format_max_bytes_to_read_for_schema_inference 33554432 +input_format_max_rows_to_read_for_schema_inference 25000 +input_format_msgpack_number_of_columns 0 +input_format_mysql_dump_map_column_names 1 +input_format_mysql_dump_table_name +input_format_native_allow_types_conversion 1 +input_format_null_as_default 1 +input_format_orc_allow_missing_columns 1 +input_format_orc_case_insensitive_column_matching 0 +input_format_orc_filter_push_down 1 +input_format_orc_import_nested 0 +input_format_orc_row_batch_size 100000 +input_format_orc_skip_columns_with_unsupported_types_in_schema_inference 0 +input_format_orc_use_fast_decoder 1 +input_format_parallel_parsing 1 +input_format_parquet_allow_missing_columns 1 +input_format_parquet_case_insensitive_column_matching 0 +input_format_parquet_filter_push_down 1 +input_format_parquet_import_nested 0 +input_format_parquet_local_file_min_bytes_for_seek 8192 +input_format_parquet_max_block_size 8192 +input_format_parquet_preserve_order 0 +input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference 0 +input_format_protobuf_flatten_google_wrappers 0 +input_format_protobuf_skip_fields_with_unsupported_types_in_schema_inference 0 +input_format_record_errors_file_path +input_format_skip_unknown_fields 1 +input_format_try_infer_dates 1 +input_format_try_infer_datetimes 1 +input_format_try_infer_integers 1 +input_format_tsv_allow_variable_number_of_columns 0 +input_format_tsv_detect_header 1 +input_format_tsv_empty_as_default 0 +input_format_tsv_enum_as_number 0 +input_format_tsv_skip_first_lines 0 +input_format_tsv_skip_trailing_empty_lines 0 +input_format_tsv_use_best_effort_in_schema_inference 1 +input_format_values_accurate_types_of_literals 1 +input_format_values_allow_data_after_semicolon 0 +input_format_values_deduce_templates_of_expressions 1 +input_format_values_interpret_expressions 1 +input_format_with_names_use_header 1 +input_format_with_types_use_header 1 +insert_allow_materialized_columns 0 +insert_deduplicate 1 +insert_deduplication_token +insert_distributed_one_random_shard 0 +insert_distributed_sync 0 +insert_distributed_timeout 0 +insert_keeper_fault_injection_probability 0 +insert_keeper_fault_injection_seed 0 +insert_keeper_max_retries 20 +insert_keeper_retry_initial_backoff_ms 100 +insert_keeper_retry_max_backoff_ms 10000 +insert_null_as_default 1 +insert_quorum 0 +insert_quorum_parallel 1 +insert_quorum_timeout 600000 +insert_shard_id 0 +interactive_delay 100000 +intersect_default_mode ALL +interval_output_format numeric +join_algorithm default +join_any_take_last_row 0 +join_default_strictness ALL +join_on_disk_max_files_to_merge 64 +join_overflow_mode throw +join_use_nulls 0 +joined_subquery_requires_alias 1 +kafka_disable_num_consumers_limit 0 +kafka_max_wait_ms 5000 +keeper_map_strict_mode 0 +legacy_column_name_of_tuple_literal 0 +limit 0 +live_view_heartbeat_interval 15 +load_balancing random +load_balancing_first_offset 0 +load_marks_asynchronously 0 +local_filesystem_read_method pread_threadpool +local_filesystem_read_prefetch 0 +lock_acquire_timeout 120 +log_comment +log_formatted_queries 0 +log_processors_profiles 0 +log_profile_events 1 +log_queries 1 +log_queries_cut_to_length 100000 +log_queries_min_query_duration_ms 0 +log_queries_min_type QUERY_START +log_queries_probability 1 +log_query_settings 1 +log_query_threads 0 +log_query_views 1 +low_cardinality_allow_in_native_format 1 +low_cardinality_max_dictionary_size 8192 +low_cardinality_use_single_dictionary_for_part 0 +materialize_ttl_after_modify 1 +materialized_views_ignore_errors 0 +max_alter_threads \'auto(16)\' +max_analyze_depth 5000 +max_ast_depth 1000 +max_ast_elements 50000 +max_backup_bandwidth 0 +max_block_size 65409 +max_bytes_before_external_group_by 0 +max_bytes_before_external_sort 0 +max_bytes_before_remerge_sort 1000000000 +max_bytes_in_distinct 0 +max_bytes_in_join 0 +max_bytes_in_set 0 +max_bytes_to_read 0 +max_bytes_to_read_leaf 0 +max_bytes_to_sort 0 +max_bytes_to_transfer 0 +max_columns_to_read 0 +max_compress_block_size 1048576 +max_concurrent_queries_for_all_users 0 +max_concurrent_queries_for_user 0 +max_distributed_connections 1024 +max_distributed_depth 5 +max_download_buffer_size 10485760 +max_download_threads 4 +max_entries_for_hash_table_stats 10000 +max_execution_speed 0 +max_execution_speed_bytes 0 +max_execution_time 0 +max_execution_time_leaf 0 +max_expanded_ast_elements 500000 +max_fetch_partition_retries_count 5 +max_final_threads \'auto(16)\' +max_http_get_redirects 0 +max_hyperscan_regexp_length 0 +max_hyperscan_regexp_total_length 0 +max_insert_block_size 1048449 +max_insert_delayed_streams_for_parallel_write 0 +max_insert_threads 0 +max_joined_block_size_rows 65409 +max_limit_for_ann_queries 1000000 +max_live_view_insert_blocks_before_refresh 64 +max_local_read_bandwidth 0 +max_local_write_bandwidth 0 +max_memory_usage 0 +max_memory_usage_for_all_queries 0 +max_memory_usage_for_user 0 +max_network_bandwidth 0 +max_network_bandwidth_for_all_users 0 +max_network_bandwidth_for_user 0 +max_network_bytes 0 +max_number_of_partitions_for_independent_aggregation 128 +max_parallel_replicas 1 +max_parser_depth 1000 +max_partition_size_to_drop 50000000000 +max_partitions_per_insert_block 100 +max_partitions_to_read -1 +max_pipeline_depth 0 +max_query_size 262144 +max_read_buffer_size 1048576 +max_read_buffer_size_local_fs 131072 +max_read_buffer_size_remote_fs 0 +max_remote_read_network_bandwidth 0 +max_remote_read_network_bandwidth_for_server 0 +max_remote_write_network_bandwidth 0 +max_remote_write_network_bandwidth_for_server 0 +max_replica_delay_for_distributed_queries 300 +max_replicated_fetches_network_bandwidth_for_server 0 +max_replicated_sends_network_bandwidth_for_server 0 +max_result_bytes 0 +max_result_rows 0 +max_rows_in_distinct 0 +max_rows_in_join 0 +max_rows_in_set 0 +max_rows_in_set_to_optimize_join 100000 +max_rows_to_group_by 0 +max_rows_to_read 0 +max_rows_to_read_leaf 0 +max_rows_to_sort 0 +max_rows_to_transfer 0 +max_sessions_for_user 0 +max_size_to_preallocate_for_aggregation 100000000 +max_streams_for_merge_tree_reading 0 +max_streams_multiplier_for_merge_tables 5 +max_streams_to_max_threads_ratio 1 +max_subquery_depth 100 +max_table_size_to_drop 50000000000 +max_temporary_columns 0 +max_temporary_data_on_disk_size_for_query 0 +max_temporary_data_on_disk_size_for_user 0 +max_temporary_non_const_columns 0 +max_threads \'auto(16)\' +max_threads_for_annoy_index_creation 4 +max_threads_for_indexes 0 +max_untracked_memory 4194304 +memory_overcommit_ratio_denominator 1073741824 +memory_overcommit_ratio_denominator_for_user 1073741824 +memory_profiler_sample_max_allocation_size 0 +memory_profiler_sample_min_allocation_size 0 +memory_profiler_sample_probability 0 +memory_profiler_step 4194304 +memory_tracker_fault_probability 0 +memory_usage_overcommit_max_wait_microseconds 5000000 +merge_tree_clear_old_parts_interval_seconds 1 +merge_tree_clear_old_temporary_directories_interval_seconds 60 +merge_tree_coarse_index_granularity 8 +merge_tree_compact_parts_min_granules_to_multibuffer_read 16 +merge_tree_determine_task_size_by_prewhere_columns 1 +merge_tree_max_bytes_to_use_cache 2013265920 +merge_tree_max_rows_to_use_cache 1048576 +merge_tree_min_bytes_for_concurrent_read 251658240 +merge_tree_min_bytes_for_concurrent_read_for_remote_filesystem 251658240 +merge_tree_min_bytes_for_seek 0 +merge_tree_min_bytes_per_task_for_remote_reading 4194304 +merge_tree_min_rows_for_concurrent_read 163840 +merge_tree_min_rows_for_concurrent_read_for_remote_filesystem 163840 +merge_tree_min_rows_for_seek 0 +merge_tree_use_const_size_tasks_for_remote_reading 1 +metrics_perf_events_enabled 0 +metrics_perf_events_list +min_bytes_to_use_direct_io 0 +min_bytes_to_use_mmap_io 0 +min_chunk_bytes_for_parallel_parsing 10485760 +min_compress_block_size 65536 +min_count_to_compile_aggregate_expression 3 +min_count_to_compile_expression 3 +min_count_to_compile_sort_description 3 +min_execution_speed 0 +min_execution_speed_bytes 0 +min_free_disk_space_for_temporary_data 0 +min_hit_rate_to_use_consecutive_keys_optimization 0.5 +min_insert_block_size_bytes 268402944 +min_insert_block_size_bytes_for_materialized_views 0 +min_insert_block_size_rows 1048449 +min_insert_block_size_rows_for_materialized_views 0 +move_all_conditions_to_prewhere 1 +move_primary_key_columns_to_end_of_prewhere 1 +multiple_joins_rewriter_version 0 +multiple_joins_try_to_keep_original_names 0 +mutations_execute_nondeterministic_on_initiator 0 +mutations_execute_subqueries_on_initiator 0 +mutations_max_literal_size_to_replace 16384 +mutations_sync 0 +mysql_datatypes_support_level +mysql_map_fixed_string_to_text_in_show_columns 0 +mysql_map_string_to_text_in_show_columns 0 +mysql_max_rows_to_insert 65536 +network_compression_method LZ4 +network_zstd_compression_level 1 +normalize_function_names 1 +number_of_mutations_to_delay 0 +number_of_mutations_to_throw 0 +odbc_bridge_connection_pool_size 16 +odbc_bridge_use_connection_pooling 1 +odbc_max_field_size 0 +offset 0 +opentelemetry_start_trace_probability 0 +opentelemetry_trace_processors 0 +optimize_aggregation_in_order 0 +optimize_aggregators_of_group_by_keys 1 +optimize_append_index 0 +optimize_arithmetic_operations_in_aggregate_functions 1 +optimize_count_from_files 1 +optimize_distinct_in_order 1 +optimize_distributed_group_by_sharding_key 1 +optimize_duplicate_order_by_and_distinct 0 +optimize_functions_to_subcolumns 0 +optimize_fuse_sum_count_avg 0 +optimize_group_by_constant_keys 1 +optimize_group_by_function_keys 1 +optimize_if_chain_to_multiif 0 +optimize_if_transform_strings_to_enum 0 +optimize_injective_functions_inside_uniq 1 +optimize_min_equality_disjunction_chain_length 3 +optimize_min_inequality_conjunction_chain_length 3 +optimize_monotonous_functions_in_order_by 0 +optimize_move_functions_out_of_any 0 +optimize_move_to_prewhere 1 +optimize_move_to_prewhere_if_final 0 +optimize_multiif_to_if 1 +optimize_normalize_count_variants 1 +optimize_on_insert 1 +optimize_or_like_chain 0 +optimize_read_in_order 1 +optimize_read_in_window_order 1 +optimize_redundant_functions_in_order_by 1 +optimize_respect_aliases 1 +optimize_rewrite_aggregate_function_with_if 1 +optimize_rewrite_array_exists_to_has 0 +optimize_rewrite_sum_if_to_count_if 0 +optimize_skip_merged_partitions 0 +optimize_skip_unused_shards 0 +optimize_skip_unused_shards_limit 1000 +optimize_skip_unused_shards_nesting 0 +optimize_skip_unused_shards_rewrite_in 1 +optimize_sorting_by_input_stream_properties 1 +optimize_substitute_columns 0 +optimize_syntax_fuse_functions 0 +optimize_throw_if_noop 0 +optimize_trivial_approximate_count_query 0 +optimize_trivial_count_query 1 +optimize_trivial_insert_select 1 +optimize_uniq_to_count 1 +optimize_use_implicit_projections 1 +optimize_use_projections 1 +optimize_using_constraints 0 +os_thread_priority 0 +output_format_arrow_compression_method lz4_frame +output_format_arrow_fixed_string_as_fixed_byte_array 1 +output_format_arrow_low_cardinality_as_dictionary 0 +output_format_arrow_string_as_string 0 +output_format_avro_codec +output_format_avro_rows_in_file 1 +output_format_avro_string_column_pattern +output_format_avro_sync_interval 16384 +output_format_bson_string_as_string 0 +output_format_csv_crlf_end_of_line 0 +output_format_decimal_trailing_zeros 0 +output_format_enable_streaming 0 +output_format_json_array_of_rows 0 +output_format_json_escape_forward_slashes 1 +output_format_json_named_tuples_as_objects 1 +output_format_json_quote_64bit_floats 0 +output_format_json_quote_64bit_integers 1 +output_format_json_quote_decimals 0 +output_format_json_quote_denormals 0 +output_format_json_skip_null_value_in_named_tuples 0 +output_format_json_validate_utf8 0 +output_format_markdown_escape_special_characters 0 +output_format_msgpack_uuid_representation ext +output_format_orc_compression_method lz4 +output_format_orc_row_index_stride 10000 +output_format_orc_string_as_string 0 +output_format_parallel_formatting 1 +output_format_parquet_batch_size 1024 +output_format_parquet_compliant_nested_types 1 +output_format_parquet_compression_method lz4 +output_format_parquet_data_page_size 1048576 +output_format_parquet_fixed_string_as_fixed_byte_array 1 +output_format_parquet_parallel_encoding 1 +output_format_parquet_row_group_size 1000000 +output_format_parquet_row_group_size_bytes 536870912 +output_format_parquet_string_as_string 0 +output_format_parquet_use_custom_encoder 0 +output_format_parquet_version 2.latest +output_format_pretty_color 1 +output_format_pretty_grid_charset UTF-8 +output_format_pretty_max_column_pad_width 250 +output_format_pretty_max_rows 10000 +output_format_pretty_max_value_width 10000 +output_format_pretty_row_numbers 0 +output_format_protobuf_nullables_with_google_wrappers 0 +output_format_schema +output_format_sql_insert_include_column_names 1 +output_format_sql_insert_max_batch_size 65409 +output_format_sql_insert_quote_names 1 +output_format_sql_insert_table_name table +output_format_sql_insert_use_replace 0 +output_format_tsv_crlf_end_of_line 0 +output_format_write_statistics 1 +parallel_distributed_insert_select 0 +parallel_replica_offset 0 +parallel_replicas_count 0 +parallel_replicas_custom_key +parallel_replicas_custom_key_filter_type default +parallel_replicas_for_non_replicated_merge_tree 0 +parallel_replicas_min_number_of_granules_to_enable 0 +parallel_replicas_min_number_of_rows_per_replica 0 +parallel_replicas_single_task_marks_count_multiplier 2 +parallel_view_processing 0 +parallelize_output_from_storages 1 +parsedatetime_parse_without_leading_zeros 1 +partial_merge_join_left_table_buffer_bytes 0 +partial_merge_join_optimizations 0 +partial_merge_join_rows_in_right_blocks 65536 +partial_result_on_first_cancel 0 +parts_to_delay_insert 0 +parts_to_throw_insert 0 +periodic_live_view_refresh 60 +poll_interval 10 +postgresql_connection_pool_auto_close_connection 0 +postgresql_connection_pool_size 16 +postgresql_connection_pool_wait_timeout 5000 +precise_float_parsing 0 +prefer_column_name_to_alias 0 +prefer_global_in_and_join 0 +prefer_localhost_replica 1 +prefer_warmed_unmerged_parts_seconds 0 +preferred_block_size_bytes 1000000 +preferred_max_column_in_block_size_bytes 0 +preferred_optimize_projection_name +prefetch_buffer_size 1048576 +print_pretty_type_names 0 +priority 0 +query_cache_compress_entries 1 +query_cache_max_entries 0 +query_cache_max_size_in_bytes 0 +query_cache_min_query_duration 0 +query_cache_min_query_runs 0 +query_cache_nondeterministic_function_handling throw +query_cache_share_between_users 0 +query_cache_squash_partial_results 1 +query_cache_store_results_of_queries_with_nondeterministic_functions 0 +query_cache_ttl 60 +query_plan_aggregation_in_order 1 +query_plan_enable_multithreading_after_window_functions 1 +query_plan_enable_optimizations 1 +query_plan_execute_functions_after_sorting 1 +query_plan_filter_push_down 1 +query_plan_lift_up_array_join 1 +query_plan_lift_up_union 1 +query_plan_max_optimizations_to_apply 10000 +query_plan_merge_expressions 1 +query_plan_optimize_primary_key 1 +query_plan_optimize_projection 1 +query_plan_push_down_limit 1 +query_plan_read_in_order 1 +query_plan_remove_redundant_distinct 1 +query_plan_remove_redundant_sorting 1 +query_plan_reuse_storage_ordering_for_window_functions 1 +query_plan_split_filter 1 +query_profiler_cpu_time_period_ns 1000000000 +query_profiler_real_time_period_ns 1000000000 +queue_max_wait_ms 0 +rabbitmq_max_wait_ms 5000 +read_backoff_max_throughput 1048576 +read_backoff_min_concurrency 1 +read_backoff_min_events 2 +read_backoff_min_interval_between_events_ms 1000 +read_backoff_min_latency_ms 1000 +read_from_filesystem_cache_if_exists_otherwise_bypass_cache 0 +read_in_order_two_level_merge_threshold 100 +read_overflow_mode throw +read_overflow_mode_leaf throw +read_priority 0 +readonly 0 +receive_data_timeout_ms 2000 +receive_timeout 300 +regexp_dict_allow_hyperscan 1 +regexp_dict_flag_case_insensitive 0 +regexp_dict_flag_dotall 0 +regexp_max_matches_per_row 1000 +reject_expensive_hyperscan_regexps 1 +remerge_sort_lowered_memory_bytes_ratio 2 +remote_filesystem_read_method threadpool +remote_filesystem_read_prefetch 1 +remote_fs_read_backoff_max_tries 5 +remote_fs_read_max_backoff_ms 10000 +remote_read_min_bytes_for_seek 4194304 +rename_files_after_processing +replace_running_query 0 +replace_running_query_max_wait_ms 5000 +replication_alter_columns_timeout 60 +replication_alter_partitions_sync 1 +replication_wait_for_inactive_replica_timeout 120 +restore_threads 16 +result_overflow_mode throw +rewrite_count_distinct_if_with_count_distinct_implementation 0 +s3_allow_parallel_part_upload 1 +s3_check_objects_after_upload 0 +s3_create_new_file_on_insert 0 +s3_disable_checksum 0 +s3_http_connection_pool_size 1000 +s3_list_object_keys_size 1000 +s3_max_connections 1024 +s3_max_get_burst 0 +s3_max_get_rps 0 +s3_max_inflight_parts_for_one_file 20 +s3_max_put_burst 0 +s3_max_put_rps 0 +s3_max_redirects 10 +s3_max_single_part_upload_size 33554432 +s3_max_single_read_retries 4 +s3_max_unexpected_write_error_retries 4 +s3_max_upload_part_size 5368709120 +s3_min_upload_part_size 16777216 +s3_request_timeout_ms 30000 +s3_retry_attempts 100 +s3_skip_empty_files 0 +s3_strict_upload_part_size 0 +s3_throw_on_zero_files_match 0 +s3_truncate_on_insert 0 +s3_upload_part_size_multiply_factor 2 +s3_upload_part_size_multiply_parts_count_threshold 500 +s3_use_adaptive_timeouts 1 +s3queue_default_zookeeper_path /clickhouse/s3queue/ +s3queue_enable_logging_to_s3queue_log 0 +schema_inference_cache_require_modification_time_for_url 1 +schema_inference_hints +schema_inference_make_columns_nullable 1 +schema_inference_mode default +schema_inference_use_cache_for_azure 1 +schema_inference_use_cache_for_file 1 +schema_inference_use_cache_for_hdfs 1 +schema_inference_use_cache_for_s3 1 +schema_inference_use_cache_for_url 1 +select_sequential_consistency 0 +send_logs_level fatal +send_logs_source_regexp +send_progress_in_http_headers 0 +send_timeout 300 +session_timezone +set_overflow_mode throw +short_circuit_function_evaluation enable +show_table_uuid_in_table_create_query_if_not_nil 0 +single_join_prefer_left_table 1 +skip_download_if_exceeds_query_cache 1 +skip_unavailable_shards 0 +sleep_after_receiving_query_ms 0 +sleep_in_send_data_ms 0 +sleep_in_send_tables_status_ms 0 +sort_overflow_mode throw +splitby_max_substrings_includes_remaining_string 0 +stop_refreshable_materialized_views_on_startup 0 +storage_file_read_method pread +storage_system_stack_trace_pipe_read_timeout_ms 100 +stream_flush_interval_ms 7500 +stream_like_engine_allow_direct_select 0 +stream_like_engine_insert_queue +stream_poll_timeout_ms 500 +system_events_show_zero_values 0 +table_function_remote_max_addresses 1000 +tcp_keep_alive_timeout 290 +temporary_files_codec LZ4 +temporary_live_view_timeout 1 +throw_if_no_data_to_insert 1 +throw_on_error_from_cache_on_write_operations 0 +throw_on_max_partitions_per_insert_block 1 +throw_on_unsupported_query_inside_transaction 1 +timeout_before_checking_execution_speed 10 +timeout_overflow_mode throw +timeout_overflow_mode_leaf throw +totals_auto_threshold 0.5 +totals_mode after_having_exclusive +trace_profile_events 0 +transfer_overflow_mode throw +transform_null_in 0 +union_default_mode +unknown_packet_in_send_data 0 +use_cache_for_count_from_files 1 +use_client_time_zone 0 +use_compact_format_in_distributed_parts_names 1 +use_concurrency_control 1 +use_hedged_requests 1 +use_index_for_in_with_subqueries 1 +use_index_for_in_with_subqueries_max_values 0 +use_local_cache_for_remote_storage 1 +use_mysql_types_in_show_columns 0 +use_query_cache 0 +use_skip_indexes 1 +use_skip_indexes_if_final 0 +use_structure_from_insertion_table_in_table_functions 2 +use_uncompressed_cache 0 +use_with_fill_by_sorting_prefix 1 +validate_polygons 1 +wait_changes_become_visible_after_commit_mode wait_unknown +wait_for_async_insert 1 +wait_for_async_insert_timeout 120 +wait_for_window_view_fire_signal_timeout 10 +window_view_clean_interval 60 +window_view_heartbeat_interval 15 +workload default +zstd_window_log_max 0 diff --git a/tests/queries/0_stateless/02995_forget_partition.reference b/tests/queries/0_stateless/02995_forget_partition.reference new file mode 100644 index 00000000000..b2bc15b7bcb --- /dev/null +++ b/tests/queries/0_stateless/02995_forget_partition.reference @@ -0,0 +1,21 @@ +---before--- +20240101 +20240102 +20240103 +20240104 +20240105 +20240106 +20240107 +20240108 +20240109 +20240110 +---after--- +20240102 +20240103 +20240104 +20240105 +20240106 +20240107 +20240108 +20240109 +20240110 diff --git a/tests/queries/0_stateless/02995_forget_partition.sh b/tests/queries/0_stateless/02995_forget_partition.sh new file mode 100755 index 00000000000..8ece8d3ddb3 --- /dev/null +++ b/tests/queries/0_stateless/02995_forget_partition.sh @@ -0,0 +1,46 @@ +#!/usr/bin/env bash +# Tags: zookeeper, no-replicated-database + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + + +${CLICKHOUSE_CLIENT} --multiline --multiquery -q """ +drop table if exists forget_partition; + +create table forget_partition +( + k UInt64, + d Date, + v String +) +engine = ReplicatedMergeTree('/test/02995/{database}/rmt', '1') +order by (k, d) +partition by toYYYYMMDD(d); + +insert into forget_partition select number, '2024-01-01' + interval number day, randomString(20) from system.numbers limit 10; + +alter table forget_partition drop partition '20240101'; +alter table forget_partition drop partition '20240102'; +""" + +# DROP PARTITION do not wait for a part to be removed from memory due to possible concurrent SELECTs, so we have to do wait manually here +while [[ $(${CLICKHOUSE_CLIENT} -q "select count() from system.parts where database=currentDatabase() and table='forget_partition' and partition='20240101'") != 0 ]]; do sleep 0.1; done + +${CLICKHOUSE_CLIENT} --multiline --multiquery -q """ +set allow_unrestricted_reads_from_keeper=1; + +select '---before---'; +select name from system.zookeeper where path = '/test/02995/' || currentDatabase() || '/rmt/block_numbers' order by name; + +alter table forget_partition forget partition '20240103'; -- {serverError CANNOT_FORGET_PARTITION} +alter table forget_partition forget partition '20240203'; -- {serverError CANNOT_FORGET_PARTITION} +alter table forget_partition forget partition '20240101'; + + +select '---after---'; +select name from system.zookeeper where path = '/test/02995/' || currentDatabase() || '/rmt/block_numbers' order by name; + +drop table forget_partition; +""" diff --git a/tests/queries/0_stateless/02995_index.reference b/tests/queries/0_stateless/02995_index.reference new file mode 100644 index 00000000000..1e8639caa88 --- /dev/null +++ b/tests/queries/0_stateless/02995_index.reference @@ -0,0 +1,126 @@ +12 4 21722 2209341 4 1415 2333 4 61 64 3 +21 1134 11363 58484 1106 1458 1592 136 26 62 32 +22 210 4504 5729 196 291 767 124 47 54 8 +26 196 1327684 5221 195 4140 5661 161 28 49 19 +28 5 2034378 7102 5 325 3255 2 53 60 4 +29 53 45041 45189 45 1580 211 31 55 84 18 +38 424 1600675 4653 424 562 5944 244 60 65 6 +45 17 62743 674873 17 6239 6494 17 65 76 8 +72 1862 1210073 6200 1677 2498 528 859 51 61 11 +79 2 2255228 2255293 2 5495 7057 2 65 65 1 +85 459 1051571 1829638 459 6402 7131 334 32 61 25 +86 10 1748130 1754217 10 4374 7003 10 56 59 4 +91 165 5718 5802 75 282 7113 112 41 63 22 +94 20 1231916 2050003 20 4802 4917 19 53 59 7 +99 2 3665 36667 2 497 697 2 70 71 2 +103 1 2446615 2446615 1 2498 2498 1 58 58 1 +106 72 6149 6699 67 527 826 40 61 61 1 +111 43 2273186 5272 43 492 4923 4 54 72 15 +120 3129 45117 6735 2868 1030 1625 561 59 64 6 +138 2 49243 49374 2 1428 1519 2 47 48 2 +143 100 23321 63639 100 1115 1624 88 51 51 1 +145 1 2447976 2447976 1 6173 6173 1 44 44 1 +153 16 13748 16881 16 1506 1636 16 54 68 9 +159 19952 1525336 7131 12957 1280 6163 2668 24 66 39 +171 5 15042 16698 5 1302 1608 5 65 65 1 +179 6264 1362341 2686 6244 2554 7132 2705 61 67 7 +192 1 1639623 1639623 1 3406 3406 1 32 32 1 +193 1 1429969 1429969 1 7131 7131 1 45 45 1 +207 12 23057 32500 12 1491 1726 12 32 46 7 +221 5081 1366870 6649 3432 4527 5226 687 24 69 39 +228 73 12281 17929 71 1328 2034 63 49 71 18 +229 2 1617478 1723791 2 4590 5578 2 41 42 2 +230 3916 1332729 6949 3668 1330 4703 845 62 65 4 +238 25 2624456 2625673 24 2535 6465 25 58 75 14 +241 154 2554929 2616444 154 2626 7131 148 34 57 17 +248 276 15529 30987 274 1040 1222 136 37 79 27 +254 3018 33966 6635 2837 1057 1622 539 24 60 33 +255 20 1581774 1811334 20 6068 6301 18 33 57 10 +256 5 5145 6841 5 367 376 5 58 58 1 +270 2 2195579 2262119 2 7102 7123 2 33 34 2 +281 32 2379460 616215 32 6042 6086 23 53 64 12 +282 7 1292651 24244 7 1607 2455 6 46 55 5 +286 123 1521935 5269 123 3793 3940 81 40 66 22 +291 21 2419080 3567 21 297 4731 21 54 55 2 +316 4 5221 5616 4 505 558 4 32 35 3 +319 232 56480 63033 230 1599 313 50 33 64 26 +327 15 51647 51894 14 1292 1585 14 47 57 7 +332 24 23484 54948 24 1609 1726 16 32 49 11 +333 1 14189 14189 1 1550 1550 1 63 63 1 +342 49 2579220 2622432 49 4626 6933 48 34 54 14 +344 1 6486 6486 1 509 509 1 24 24 1 +346 1987 53016 6735 1823 1334 174 294 26 62 32 +358 45 59058 60844 44 6746 722 40 57 84 15 +363 1198 1260033 2568811 1196 5710 5790 82 55 80 26 +384 150 2361175 476024 150 7008 7123 81 38 64 22 +387 277 5200 6553 252 243 521 130 65 65 1 +392 1877 1607428 2030850 1875 1416 7131 1379 54 66 13 +396 8181 1380803 6186 7920 545 798 1743 24 67 39 +398 3 5183 5213 2 291 352 3 53 59 3 +399 62 51494 59203 61 7073 754 42 55 78 18 +412 2141 1360120 2189792 2136 2491 5658 1371 71 75 5 +413 2 2036037 2064917 2 3963 4666 2 43 45 2 +431 33 2302331 2348449 33 4425 6516 32 69 69 1 +447 59 25125 33094 59 1176 1817 56 53 58 6 +456 1 53157 53157 1 1556 1556 1 26 26 1 +462 5 5456 6280 5 348 4337 5 28 40 5 +472 1 1443716 1443716 1 6122 6122 1 42 42 1 +491 34 1066102 1183673 34 6606 6822 32 46 67 15 +498 896 2230163 3054 895 537 7131 714 24 59 28 +504 108 12281 25180 108 1318 1784 94 55 66 12 +515 22 1588883 2640809 22 6554 6571 15 46 59 12 +518 1 37743 37743 1 1558 1558 1 72 72 1 +530 1 3033 3033 1 561 561 1 59 59 1 +532 26 5721 6355 25 549 665 14 44 50 7 +546 156 2577874 48517 156 1105 324 133 44 51 8 +554 12 1665194 2640066 12 1817 2951 12 57 57 1 +564 3865 2028049 2083433 3722 1115 985 2203 44 84 41 +566 4432 50605 57509 3217 1191 267 459 26 72 39 +567 8 5221 5893 7 333 558 8 27 35 4 +582 1172 1320619 2019743 1172 5819 7131 757 26 63 30 +584 43100 2500 5594 22561 134 4573 1660 48 84 37 +589 28 6046 6068 19 345 564 27 55 62 8 +595 139 1585165 1683606 138 2231 3598 132 54 84 28 +615 3 1056081 1116230 3 5794 5796 2 59 62 3 +619 7 1543114 5241 7 2442 3105 7 41 45 3 +634 2722 1221058 4999 2686 2426 7131 1735 54 60 7 +635 237 2119333 4667 237 561 5999 176 49 60 12 +644 5 1774169 2056171 5 5591 6091 4 33 39 3 +647 8 51632 64403 8 1457 1624 8 26 34 5 +651 1325 1620565 6281 1301 528 792 815 62 63 2 +665 13 4598 4789 13 511 558 11 39 46 7 +679 1560 1613200 25940 1552 1569 3118 781 49 84 35 +704 2 14226 15594 2 1086 1116 2 65 71 2 +715 25 1199352 3490 25 5036 5112 23 34 55 13 +716 1253 61989 6735 1050 1203 1625 397 52 65 14 +730 2584 5560 6170 634 2421 627 293 56 69 14 +736 8 1433153 4941 8 339 4594 8 28 36 5 +749 2 1326176 1339862 2 4339 6213 2 49 50 2 +753 1 53157 53157 1 1556 1556 1 26 26 1 +761 63 1443230 6881 63 3154 3204 26 56 73 14 +762 49 1449596 1968154 49 2437 3753 48 54 62 9 +775 35107 5330 769436 2471 447 6607 656 70 81 12 +789 1 1552458 1552458 1 2441 2441 1 62 62 1 +794 158 5585 6585 155 495 929 67 24 50 20 +839 9 29223 46530 9 1336 1465 9 52 52 1 +844 5 2377545 2377635 5 5129 6321 5 53 69 5 +846 50 2172273 2589295 50 1582 3053 48 64 68 5 +847 2577 56656 63658 1582 1444 838 474 26 63 33 +861 1333 5570 6909 839 457 489 37 33 70 34 +873 2360 1519811 50487 2248 1310 1784 316 60 68 9 +879 228 6704 6785 79 279 507 121 35 66 24 +889 5130 2070007 39692 5040 1151 6791 2606 44 66 23 +896 4 511246 859452 4 6554 6561 4 67 71 4 +912 146 1322641 2238040 146 1366 6354 143 59 59 1 +913 82 5495 6870 78 350 565 67 24 43 15 +921 763 1580790 416881 763 6191 7131 509 63 64 2 +925 318 2500952 5025 309 476 6114 182 32 56 21 +931 12 4277 4809 12 238 256 9 63 83 9 +942 954 1331 2228193 952 1121 5047 788 65 70 6 +948 14 1785593 2600431 14 6550 6598 13 34 49 9 +956 5 5755 6023 5 359 411 5 43 48 4 +963 4 3812 3835 4 444 537 4 47 53 4 +978 5 51632 58212 5 1127 1556 5 24 32 5 +980 53 47201 59744 53 1537 1625 36 41 49 9 +987 6033 2020131 763444 4306 256 792 1832 60 64 5 +993 4 1615159 1718339 4 1570 3093 4 62 63 2 diff --git a/tests/queries/0_stateless/02995_index.sh b/tests/queries/0_stateless/02995_index.sh new file mode 100755 index 00000000000..5125d03904e --- /dev/null +++ b/tests/queries/0_stateless/02995_index.sh @@ -0,0 +1,44 @@ +#!/usr/bin/env bash +# Tags: long, no-debug, no-asan, no-tsan, no-msan, no-ubsan, no-sanitize-coverage + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +${CLICKHOUSE_CLIENT} --multiquery " + +DROP TABLE IF EXISTS test; +CREATE TABLE test (a String, b String, c String) ENGINE = MergeTree ORDER BY (a, b, c) SETTINGS index_granularity = 11; + +INSERT INTO test +SELECT round(pow(sipHash64(1, number), 1/4)), round(pow(sipHash64(2, number), 1/6)), round(pow(sipHash64(3, number), 1/10)) +FROM numbers(100000); + +INSERT INTO test +SELECT round(pow(sipHash64(1, number), 1/3)), round(pow(sipHash64(2, number), 1/5)), round(pow(sipHash64(3, number), 1/10)) +FROM numbers(100000); + +INSERT INTO test +SELECT round(pow(sipHash64(1, number), 1/5)), round(pow(sipHash64(2, number), 1/7)), round(pow(sipHash64(3, number), 1/10)) +FROM numbers(100000); + +DETACH TABLE test; +ATTACH TABLE test; +" + +for i in {1..1000} +do + echo " +WITH ${i} AS try +SELECT try, count(), min(a), max(a), uniqExact(a), min(b), max(b), uniqExact(b), min(c), max(c), uniqExact(c) FROM test +WHERE a >= (round(pow(sipHash64(1, try), 1 / (3 + sipHash64(2, try) % 8))) AS a1)::String + AND a <= (a1 + round(pow(sipHash64(3, try), 1 / (3 + sipHash64(4, try) % 8))))::String + AND b >= (round(pow(sipHash64(5, try), 1 / (3 + sipHash64(6, try) % 8))) AS b1)::String + AND b <= (b1 + round(pow(sipHash64(7, try), 1 / (3 + sipHash64(8, try) % 8))))::String + AND c >= (round(pow(sipHash64(9, try), 1 / (3 + sipHash64(10, try) % 8))) AS c1)::String + AND c <= (c1 + round(pow(sipHash64(11, try), 1 / (3 + sipHash64(12, try) % 8))))::String +HAVING count() > 0; +" +done | ${CLICKHOUSE_CLIENT} --multiquery + +${CLICKHOUSE_CLIENT} --multiquery "DROP TABLE test" diff --git a/tests/queries/0_stateless/02995_new_settings_history.reference b/tests/queries/0_stateless/02995_new_settings_history.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02995_new_settings_history.sh b/tests/queries/0_stateless/02995_new_settings_history.sh new file mode 100755 index 00000000000..8de98c55b6a --- /dev/null +++ b/tests/queries/0_stateless/02995_new_settings_history.sh @@ -0,0 +1,46 @@ +#!/usr/bin/env bash +# Tags: no-tsan, no-asan, no-ubsan, no-msan, no-cpu-aarch64, no-random-settings +# Some settings can be different for builds with sanitizers or aarch64 + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +# Note that this is a broad check. A per version check is done in the upgrade test +# Baseline generated with 23.12.1 +# clickhouse local --query "select name, default from system.settings order by name format TSV" > 02995_baseline_23_12_1.tsv +$CLICKHOUSE_LOCAL --query " + WITH old_settings AS + ( + SELECT * FROM file('${CUR_DIR}/02995_baseline_23_12_1.tsv', 'TSV', 'name String, default String') + ), + new_settings AS + ( + -- Ignore settings that depend on the machine config (max_threads and similar) + SELECT name, default FROM system.settings WHERE default NOT LIKE '%auto(%' + ) + SELECT * FROM + ( + SELECT 'PLEASE ADD THE NEW SETTING TO SettingsChangesHistory.h: ' || name || ' WAS ADDED', + FROM new_settings + WHERE (name NOT IN ( + SELECT name + FROM old_settings + )) AND (name NOT IN ( + SELECT arrayJoin(tupleElement(changes, 'name')) + FROM system.settings_changes + WHERE splitByChar('.', version())[1] >= '24' + )) + UNION ALL + ( + SELECT 'PLEASE ADD THE SETTING VALUE CHANGE TO SettingsChangesHistory.h: ' || name || ' WAS CHANGED FROM ' || old_settings.default || ' TO ' || new_settings.default, + FROM new_settings + LEFT JOIN old_settings ON new_settings.name = old_settings.name + WHERE (new_settings.default != old_settings.default) AND (name NOT IN ( + SELECT arrayJoin(tupleElement(changes, 'name')) + FROM system.settings_changes + WHERE splitByChar('.', version())[1] >= '24' + )) + ) + ) +" diff --git a/tests/queries/0_stateless/02995_preliminary_filters_duplicated_columns.reference b/tests/queries/0_stateless/02995_preliminary_filters_duplicated_columns.reference new file mode 100644 index 00000000000..aa47d0d46d4 --- /dev/null +++ b/tests/queries/0_stateless/02995_preliminary_filters_duplicated_columns.reference @@ -0,0 +1,2 @@ +0 +0 diff --git a/tests/queries/0_stateless/02995_preliminary_filters_duplicated_columns.sql b/tests/queries/0_stateless/02995_preliminary_filters_duplicated_columns.sql new file mode 100644 index 00000000000..060f16f8945 --- /dev/null +++ b/tests/queries/0_stateless/02995_preliminary_filters_duplicated_columns.sql @@ -0,0 +1,6 @@ +-- It is special because actions cannot be reused for SimpleAggregateFunction (see https://github.com/ClickHouse/ClickHouse/pull/54436) +drop table if exists data; +create table data (key Int) engine=AggregatingMergeTree() order by tuple(); +insert into data values (0); +select * from data final prewhere indexHint(_partition_id = 'all') or indexHint(_partition_id = 'all'); +select * from data final prewhere indexHint(_partition_id = 'all') or indexHint(_partition_id = 'all') or indexHint(_partition_id = 'all'); diff --git a/tests/queries/0_stateless/02995_preliminary_filters_duplicated_columns_SimpleAggregateFunction.reference b/tests/queries/0_stateless/02995_preliminary_filters_duplicated_columns_SimpleAggregateFunction.reference new file mode 100644 index 00000000000..573541ac970 --- /dev/null +++ b/tests/queries/0_stateless/02995_preliminary_filters_duplicated_columns_SimpleAggregateFunction.reference @@ -0,0 +1 @@ +0 diff --git a/tests/queries/0_stateless/02995_preliminary_filters_duplicated_columns_SimpleAggregateFunction.sql b/tests/queries/0_stateless/02995_preliminary_filters_duplicated_columns_SimpleAggregateFunction.sql new file mode 100644 index 00000000000..97df883fa48 --- /dev/null +++ b/tests/queries/0_stateless/02995_preliminary_filters_duplicated_columns_SimpleAggregateFunction.sql @@ -0,0 +1,5 @@ +-- It is special because actions cannot be reused for SimpleAggregateFunction (see https://github.com/ClickHouse/ClickHouse/pull/54436) +drop table if exists data; +create table data (key SimpleAggregateFunction(max, Int)) engine=AggregatingMergeTree() order by tuple(); +insert into data values (0); +select * from data final prewhere indexHint(_partition_id = 'all') and key >= -1 where key >= 0; diff --git a/tests/queries/0_stateless/02996_analyzer_prewhere_projection.reference b/tests/queries/0_stateless/02996_analyzer_prewhere_projection.reference new file mode 100644 index 00000000000..72749c905a3 --- /dev/null +++ b/tests/queries/0_stateless/02996_analyzer_prewhere_projection.reference @@ -0,0 +1 @@ +1 1 1 diff --git a/tests/queries/0_stateless/02996_analyzer_prewhere_projection.sql b/tests/queries/0_stateless/02996_analyzer_prewhere_projection.sql new file mode 100644 index 00000000000..9d676001010 --- /dev/null +++ b/tests/queries/0_stateless/02996_analyzer_prewhere_projection.sql @@ -0,0 +1,7 @@ +SET allow_suspicious_low_cardinality_types=1; + +CREATE TABLE t__fuzz_0 (`i` LowCardinality(Int32), `j` Int32, `k` Int32, PROJECTION p (SELECT * ORDER BY j)) ENGINE = MergeTree ORDER BY i SETTINGS index_granularity = 1; +INSERT INTO t__fuzz_0 Select number, number, number FROM numbers(100); + +SELECT * FROM t__fuzz_0 PREWHERE 7 AND (i < 2147483647) AND (j IN (2147483646, -2, 1)) +SETTINGS allow_experimental_analyzer = true; diff --git a/tests/queries/0_stateless/02996_index_compaction_counterexample.reference b/tests/queries/0_stateless/02996_index_compaction_counterexample.reference new file mode 100644 index 00000000000..6ed281c757a --- /dev/null +++ b/tests/queries/0_stateless/02996_index_compaction_counterexample.reference @@ -0,0 +1,2 @@ +1 +1 diff --git a/tests/queries/0_stateless/02996_index_compaction_counterexample.sql b/tests/queries/0_stateless/02996_index_compaction_counterexample.sql new file mode 100644 index 00000000000..1545f83c79f --- /dev/null +++ b/tests/queries/0_stateless/02996_index_compaction_counterexample.sql @@ -0,0 +1,8 @@ +DROP TABLE IF EXISTS b; +create table b (x Int64, y String) engine MergeTree order by (x, y) settings index_granularity=2; +insert into b values (0, 'a'), (1, 'b'), (1, 'c'); +select count() from b where x = 1 and y = 'b'; +detach table b; +attach table b; +select count() from b where x = 1 and y = 'b'; +DROP TABLE b; diff --git a/tests/queries/0_stateless/02996_nullable_arrayReduce.reference b/tests/queries/0_stateless/02996_nullable_arrayReduce.reference new file mode 100644 index 00000000000..96afb8546ef --- /dev/null +++ b/tests/queries/0_stateless/02996_nullable_arrayReduce.reference @@ -0,0 +1,15 @@ +-- { echoOn } +SELECT arrayReduce('sum', []::Array(UInt8)) as a, toTypeName(a); +0 UInt64 +SELECT arrayReduce('sumOrNull', []::Array(UInt8)) as a, toTypeName(a); +\N Nullable(UInt64) +SELECT arrayReduce('sum', [NULL]::Array(Nullable(UInt8))) as a, toTypeName(a); +\N Nullable(UInt64) +SELECT arrayReduce('sum', [NULL, 10]::Array(Nullable(UInt8))) as a, toTypeName(a); +10 Nullable(UInt64) +SELECT arrayReduce('any_respect_nulls', [NULL, 10]::Array(Nullable(UInt8))) as a, toTypeName(a); +\N Nullable(UInt8) +SELECT arrayReduce('any_respect_nulls', [10, NULL]::Array(Nullable(UInt8))) as a, toTypeName(a); +10 Nullable(UInt8) +SELECT arrayReduce('median', [toLowCardinality(toNullable(8))]) as t, toTypeName(t); +8 Nullable(Float64) diff --git a/tests/queries/0_stateless/02996_nullable_arrayReduce.sql b/tests/queries/0_stateless/02996_nullable_arrayReduce.sql new file mode 100644 index 00000000000..8f69296dbe5 --- /dev/null +++ b/tests/queries/0_stateless/02996_nullable_arrayReduce.sql @@ -0,0 +1,17 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/59600 +SELECT arrayReduce(toNullable('stddevSampOrNull'), [1]); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT arrayReduce(toNullable('median'), [toDecimal32OrNull(toFixedString('1', 1), 2), 8]); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT toFixedString('--- Int Empty ---', toLowCardinality(17)), arrayReduce(toNullable('avgOrNull'), [1]); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT arrayReduce('any', toNullable(3)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT arrayReduce(toLowCardinality('median'), [toLowCardinality(toNullable(8))]); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } + +-- { echoOn } +SELECT arrayReduce('sum', []::Array(UInt8)) as a, toTypeName(a); +SELECT arrayReduce('sumOrNull', []::Array(UInt8)) as a, toTypeName(a); +SELECT arrayReduce('sum', [NULL]::Array(Nullable(UInt8))) as a, toTypeName(a); +SELECT arrayReduce('sum', [NULL, 10]::Array(Nullable(UInt8))) as a, toTypeName(a); +SELECT arrayReduce('any_respect_nulls', [NULL, 10]::Array(Nullable(UInt8))) as a, toTypeName(a); +SELECT arrayReduce('any_respect_nulls', [10, NULL]::Array(Nullable(UInt8))) as a, toTypeName(a); + +SELECT arrayReduce('median', [toLowCardinality(toNullable(8))]) as t, toTypeName(t); +-- { echoOff } diff --git a/tests/queries/0_stateless/02997_fix_datetime64_scale_conversion.reference b/tests/queries/0_stateless/02997_fix_datetime64_scale_conversion.reference new file mode 100644 index 00000000000..c4ade2ace13 --- /dev/null +++ b/tests/queries/0_stateless/02997_fix_datetime64_scale_conversion.reference @@ -0,0 +1,100 @@ +2023-01-01 00:00:00 +2023-01-01 00:00:00 +2023-01-01 01:01:01 +2023-01-01 01:01:01 +2023-01-02 02:02:02 +2023-01-02 02:02:02 +2023-01-03 03:03:03 +2023-01-03 03:03:03 +2023-01-04 04:04:04 +2023-01-04 04:04:04 +2023-01-05 05:05:05 +2023-01-05 05:05:05 +2023-01-06 06:06:06 +2023-01-06 06:06:06 +2023-01-07 07:07:07 +2023-01-07 07:07:07 +2023-01-08 08:08:08 +2023-01-08 08:08:08 +2023-01-09 09:09:09 +2023-01-09 09:09:09 +2023-01-01 00:00:00.00 +2023-01-01 00:00:00.00 +2023-01-01 01:01:01.00 +2023-01-01 01:01:01.10 +2023-01-02 02:02:02.00 +2023-01-02 02:02:02.12 +2023-01-03 03:03:03.00 +2023-01-03 03:03:03.12 +2023-01-04 04:04:04.00 +2023-01-04 04:04:04.12 +2023-01-05 05:05:05.00 +2023-01-05 05:05:05.12 +2023-01-06 06:06:06.00 +2023-01-06 06:06:06.12 +2023-01-07 07:07:07.00 +2023-01-07 07:07:07.12 +2023-01-08 08:08:08.00 +2023-01-08 08:08:08.12 +2023-01-09 09:09:09.00 +2023-01-09 09:09:09.12 +2023-01-01 00:00:00.000 +2023-01-01 00:00:00.000 +2023-01-01 01:01:01.000 +2023-01-01 01:01:01.100 +2023-01-02 02:02:02.000 +2023-01-02 02:02:02.120 +2023-01-03 03:03:03.000 +2023-01-03 03:03:03.123 +2023-01-04 04:04:04.000 +2023-01-04 04:04:04.123 +2023-01-05 05:05:05.000 +2023-01-05 05:05:05.123 +2023-01-06 06:06:06.000 +2023-01-06 06:06:06.123 +2023-01-07 07:07:07.000 +2023-01-07 07:07:07.123 +2023-01-08 08:08:08.000 +2023-01-08 08:08:08.123 +2023-01-09 09:09:09.000 +2023-01-09 09:09:09.123 +2023-01-01 00:00:00.000000 +2023-01-01 00:00:00.000000 +2023-01-01 01:01:01.000000 +2023-01-01 01:01:01.100000 +2023-01-02 02:02:02.000000 +2023-01-02 02:02:02.120000 +2023-01-03 03:03:03.000000 +2023-01-03 03:03:03.123000 +2023-01-04 04:04:04.000000 +2023-01-04 04:04:04.123400 +2023-01-05 05:05:05.000000 +2023-01-05 05:05:05.123450 +2023-01-06 06:06:06.000000 +2023-01-06 06:06:06.123456 +2023-01-07 07:07:07.000000 +2023-01-07 07:07:07.123456 +2023-01-08 08:08:08.000000 +2023-01-08 08:08:08.123456 +2023-01-09 09:09:09.000000 +2023-01-09 09:09:09.123456 +2023-01-01 00:00:00.000000 +2023-01-01 00:00:00.000000 +2023-01-01 01:01:01.000000 +2023-01-01 01:01:01.100000 +2023-01-02 02:02:02.000000 +2023-01-02 02:02:02.120000 +2023-01-03 03:03:03.000000 +2023-01-03 03:03:03.123000 +2023-01-04 04:04:04.000000 +2023-01-04 04:04:04.123400 +2023-01-05 05:05:05.000000 +2023-01-05 05:05:05.123450 +2023-01-06 06:06:06.000000 +2023-01-06 06:06:06.123456 +2023-01-07 07:07:07.000000 +2023-01-07 07:07:07.123456 +2023-01-08 08:08:08.000000 +2023-01-08 08:08:08.123456 +2023-01-09 09:09:09.000000 +2023-01-09 09:09:09.123456 diff --git a/tests/queries/0_stateless/02997_fix_datetime64_scale_conversion.sql b/tests/queries/0_stateless/02997_fix_datetime64_scale_conversion.sql new file mode 100644 index 00000000000..b905ef2b972 --- /dev/null +++ b/tests/queries/0_stateless/02997_fix_datetime64_scale_conversion.sql @@ -0,0 +1,124 @@ +DROP TABLE IF EXISTS test_0; +CREATE TABLE IF NOT EXISTS test_0 (a DateTime64(0)) engine = MergeTree order by a; +INSERT INTO test_0 VALUES (toDateTime64('2023-01-01 00:00:00', 0)); +INSERT INTO test_0 VALUES (toDateTime64('2023-01-01 00:00:00.123456789', 0)); +INSERT INTO test_0 VALUES (toDateTime64('2023-01-01 01:01:01', 1)); +INSERT INTO test_0 VALUES (toDateTime64('2023-01-01 01:01:01.123456789', 1)); +INSERT INTO test_0 VALUES (toDateTime64('2023-01-02 02:02:02', 2)); +INSERT INTO test_0 VALUES (toDateTime64('2023-01-02 02:02:02.123456789', 2)); +INSERT INTO test_0 VALUES (toDateTime64('2023-01-03 03:03:03', 3)); +INSERT INTO test_0 VALUES (toDateTime64('2023-01-03 03:03:03.123456789', 3)); +INSERT INTO test_0 VALUES (toDateTime64('2023-01-04 04:04:04', 4)); +INSERT INTO test_0 VALUES (toDateTime64('2023-01-04 04:04:04.123456789', 4)); +INSERT INTO test_0 VALUES (toDateTime64('2023-01-05 05:05:05', 5)); +INSERT INTO test_0 VALUES (toDateTime64('2023-01-05 05:05:05.123456789', 5)); +INSERT INTO test_0 VALUES (toDateTime64('2023-01-06 06:06:06', 6)); +INSERT INTO test_0 VALUES (toDateTime64('2023-01-06 06:06:06.123456789', 6)); +INSERT INTO test_0 VALUES (toDateTime64('2023-01-07 07:07:07', 7)); +INSERT INTO test_0 VALUES (toDateTime64('2023-01-07 07:07:07.123456789', 7)); +INSERT INTO test_0 VALUES (toDateTime64('2023-01-08 08:08:08', 8)); +INSERT INTO test_0 VALUES (toDateTime64('2023-01-08 08:08:08.123456789', 8)); +INSERT INTO test_0 VALUES (toDateTime64('2023-01-09 09:09:09', 9)); +INSERT INTO test_0 VALUES (toDateTime64('2023-01-09 09:09:09.123456789', 9)); +SELECT * FROM test_0 ORDER BY a; +DROP TABLE test_0; + +DROP TABLE IF EXISTS test_2; +CREATE TABLE IF NOT EXISTS test_2 (a DateTime64(2)) engine = MergeTree order by a; +INSERT INTO test_2 VALUES (toDateTime64('2023-01-01 00:00:00', 0)); +INSERT INTO test_2 VALUES (toDateTime64('2023-01-01 00:00:00.123456789', 0)); +INSERT INTO test_2 VALUES (toDateTime64('2023-01-01 01:01:01', 1)); +INSERT INTO test_2 VALUES (toDateTime64('2023-01-01 01:01:01.123456789', 1)); +INSERT INTO test_2 VALUES (toDateTime64('2023-01-02 02:02:02', 2)); +INSERT INTO test_2 VALUES (toDateTime64('2023-01-02 02:02:02.123456789', 2)); +INSERT INTO test_2 VALUES (toDateTime64('2023-01-03 03:03:03', 3)); +INSERT INTO test_2 VALUES (toDateTime64('2023-01-03 03:03:03.123456789', 3)); +INSERT INTO test_2 VALUES (toDateTime64('2023-01-04 04:04:04', 4)); +INSERT INTO test_2 VALUES (toDateTime64('2023-01-04 04:04:04.123456789', 4)); +INSERT INTO test_2 VALUES (toDateTime64('2023-01-05 05:05:05', 5)); +INSERT INTO test_2 VALUES (toDateTime64('2023-01-05 05:05:05.123456789', 5)); +INSERT INTO test_2 VALUES (toDateTime64('2023-01-06 06:06:06', 6)); +INSERT INTO test_2 VALUES (toDateTime64('2023-01-06 06:06:06.123456789', 6)); +INSERT INTO test_2 VALUES (toDateTime64('2023-01-07 07:07:07', 7)); +INSERT INTO test_2 VALUES (toDateTime64('2023-01-07 07:07:07.123456789', 7)); +INSERT INTO test_2 VALUES (toDateTime64('2023-01-08 08:08:08', 8)); +INSERT INTO test_2 VALUES (toDateTime64('2023-01-08 08:08:08.123456789', 8)); +INSERT INTO test_2 VALUES (toDateTime64('2023-01-09 09:09:09', 9)); +INSERT INTO test_2 VALUES (toDateTime64('2023-01-09 09:09:09.123456789', 9)); +SELECT * FROM test_2 ORDER BY a; +DROP TABLE test_2; + +DROP TABLE IF EXISTS test_3; +CREATE TABLE IF NOT EXISTS test_3 (a DateTime64(3)) engine = MergeTree order by a; +INSERT INTO test_3 VALUES (toDateTime64('2023-01-01 00:00:00', 0)); +INSERT INTO test_3 VALUES (toDateTime64('2023-01-01 00:00:00.123456789', 0)); +INSERT INTO test_3 VALUES (toDateTime64('2023-01-01 01:01:01', 1)); +INSERT INTO test_3 VALUES (toDateTime64('2023-01-01 01:01:01.123456789', 1)); +INSERT INTO test_3 VALUES (toDateTime64('2023-01-02 02:02:02', 2)); +INSERT INTO test_3 VALUES (toDateTime64('2023-01-02 02:02:02.123456789', 2)); +INSERT INTO test_3 VALUES (toDateTime64('2023-01-03 03:03:03', 3)); +INSERT INTO test_3 VALUES (toDateTime64('2023-01-03 03:03:03.123456789', 3)); +INSERT INTO test_3 VALUES (toDateTime64('2023-01-04 04:04:04', 4)); +INSERT INTO test_3 VALUES (toDateTime64('2023-01-04 04:04:04.123456789', 4)); +INSERT INTO test_3 VALUES (toDateTime64('2023-01-05 05:05:05', 5)); +INSERT INTO test_3 VALUES (toDateTime64('2023-01-05 05:05:05.123456789', 5)); +INSERT INTO test_3 VALUES (toDateTime64('2023-01-06 06:06:06', 6)); +INSERT INTO test_3 VALUES (toDateTime64('2023-01-06 06:06:06.123456789', 6)); +INSERT INTO test_3 VALUES (toDateTime64('2023-01-07 07:07:07', 7)); +INSERT INTO test_3 VALUES (toDateTime64('2023-01-07 07:07:07.123456789', 7)); +INSERT INTO test_3 VALUES (toDateTime64('2023-01-08 08:08:08', 8)); +INSERT INTO test_3 VALUES (toDateTime64('2023-01-08 08:08:08.123456789', 8)); +INSERT INTO test_3 VALUES (toDateTime64('2023-01-09 09:09:09', 9)); +INSERT INTO test_3 VALUES (toDateTime64('2023-01-09 09:09:09.123456789', 9)); +SELECT * FROM test_3 ORDER BY a; +DROP TABLE test_3; + +DROP TABLE IF EXISTS test_6; +CREATE TABLE IF NOT EXISTS test_6 (a DateTime64(6)) engine = MergeTree order by a; +INSERT INTO test_6 VALUES (toDateTime64('2023-01-01 00:00:00', 0)); +INSERT INTO test_6 VALUES (toDateTime64('2023-01-01 00:00:00.123456789', 0)); +INSERT INTO test_6 VALUES (toDateTime64('2023-01-01 01:01:01', 1)); +INSERT INTO test_6 VALUES (toDateTime64('2023-01-01 01:01:01.123456789', 1)); +INSERT INTO test_6 VALUES (toDateTime64('2023-01-02 02:02:02', 2)); +INSERT INTO test_6 VALUES (toDateTime64('2023-01-02 02:02:02.123456789', 2)); +INSERT INTO test_6 VALUES (toDateTime64('2023-01-03 03:03:03', 3)); +INSERT INTO test_6 VALUES (toDateTime64('2023-01-03 03:03:03.123456789', 3)); +INSERT INTO test_6 VALUES (toDateTime64('2023-01-04 04:04:04', 4)); +INSERT INTO test_6 VALUES (toDateTime64('2023-01-04 04:04:04.123456789', 4)); +INSERT INTO test_6 VALUES (toDateTime64('2023-01-05 05:05:05', 5)); +INSERT INTO test_6 VALUES (toDateTime64('2023-01-05 05:05:05.123456789', 5)); +INSERT INTO test_6 VALUES (toDateTime64('2023-01-06 06:06:06', 6)); +INSERT INTO test_6 VALUES (toDateTime64('2023-01-06 06:06:06.123456789', 6)); +INSERT INTO test_6 VALUES (toDateTime64('2023-01-07 07:07:07', 7)); +INSERT INTO test_6 VALUES (toDateTime64('2023-01-07 07:07:07.123456789', 7)); +INSERT INTO test_6 VALUES (toDateTime64('2023-01-08 08:08:08', 8)); +INSERT INTO test_6 VALUES (toDateTime64('2023-01-08 08:08:08.123456789', 8)); +INSERT INTO test_6 VALUES (toDateTime64('2023-01-09 09:09:09', 9)); +INSERT INTO test_6 VALUES (toDateTime64('2023-01-09 09:09:09.123456789', 9)); +SELECT * FROM test_6 ORDER BY a; +DROP TABLE test_6; + +DROP TABLE IF EXISTS test_9; +CREATE TABLE IF NOT EXISTS test_9 (a DateTime64(6)) engine = MergeTree order by a; +INSERT INTO test_9 VALUES (toDateTime64('2023-01-01 00:00:00', 0)); +INSERT INTO test_9 VALUES (toDateTime64('2023-01-01 00:00:00.123456789', 0)); +INSERT INTO test_9 VALUES (toDateTime64('2023-01-01 01:01:01', 1)); +INSERT INTO test_9 VALUES (toDateTime64('2023-01-01 01:01:01.123456789', 1)); +INSERT INTO test_9 VALUES (toDateTime64('2023-01-02 02:02:02', 2)); +INSERT INTO test_9 VALUES (toDateTime64('2023-01-02 02:02:02.123456789', 2)); +INSERT INTO test_9 VALUES (toDateTime64('2023-01-03 03:03:03', 3)); +INSERT INTO test_9 VALUES (toDateTime64('2023-01-03 03:03:03.123456789', 3)); +INSERT INTO test_9 VALUES (toDateTime64('2023-01-04 04:04:04', 4)); +INSERT INTO test_9 VALUES (toDateTime64('2023-01-04 04:04:04.123456789', 4)); +INSERT INTO test_9 VALUES (toDateTime64('2023-01-05 05:05:05', 5)); +INSERT INTO test_9 VALUES (toDateTime64('2023-01-05 05:05:05.123456789', 5)); +INSERT INTO test_9 VALUES (toDateTime64('2023-01-06 06:06:06', 6)); +INSERT INTO test_9 VALUES (toDateTime64('2023-01-06 06:06:06.123456789', 6)); +INSERT INTO test_9 VALUES (toDateTime64('2023-01-07 07:07:07', 7)); +INSERT INTO test_9 VALUES (toDateTime64('2023-01-07 07:07:07.123456789', 7)); +INSERT INTO test_9 VALUES (toDateTime64('2023-01-08 08:08:08', 8)); +INSERT INTO test_9 VALUES (toDateTime64('2023-01-08 08:08:08.123456789', 8)); +INSERT INTO test_9 VALUES (toDateTime64('2023-01-09 09:09:09', 9)); +INSERT INTO test_9 VALUES (toDateTime64('2023-01-09 09:09:09.123456789', 9)); +SELECT * FROM test_9 ORDER BY a; +DROP TABLE test_9; diff --git a/tests/queries/0_stateless/02997_insert_select_too_many_parts_multithread.reference b/tests/queries/0_stateless/02997_insert_select_too_many_parts_multithread.reference new file mode 100644 index 00000000000..29d6383b52c --- /dev/null +++ b/tests/queries/0_stateless/02997_insert_select_too_many_parts_multithread.reference @@ -0,0 +1 @@ +100 diff --git a/tests/queries/0_stateless/02997_insert_select_too_many_parts_multithread.sql b/tests/queries/0_stateless/02997_insert_select_too_many_parts_multithread.sql new file mode 100644 index 00000000000..2dfc8094115 --- /dev/null +++ b/tests/queries/0_stateless/02997_insert_select_too_many_parts_multithread.sql @@ -0,0 +1,16 @@ +-- Tags: disabled +-- TODO: Fix parts_to_throw_insert logic for parallel MergeTreeSink onStart calls +DROP TABLE IF EXISTS too_many_parts; + +CREATE TABLE too_many_parts (x UInt64) ENGINE = MergeTree ORDER BY tuple() SETTINGS parts_to_delay_insert = 5, parts_to_throw_insert = 5; + +SYSTEM STOP MERGES too_many_parts; +SET max_block_size = 1, min_insert_block_size_rows = 0, min_insert_block_size_bytes = 0, max_threads=100, max_insert_threads=100; + +-- exception is not thrown if threshold is exceeded when multi-block INSERT is already started. +INSERT INTO too_many_parts SELECT * FROM numbers_mt(100); +SELECT count() FROM too_many_parts; + +INSERT INTO too_many_parts SELECT * FROM numbers_mt(10); -- { serverError 252 } + +DROP TABLE too_many_parts; diff --git a/tests/queries/0_stateless/02997_projections_formatting.reference b/tests/queries/0_stateless/02997_projections_formatting.reference new file mode 100644 index 00000000000..6a60da1089a --- /dev/null +++ b/tests/queries/0_stateless/02997_projections_formatting.reference @@ -0,0 +1,26 @@ +CREATE TEMPORARY TABLE t_proj +( + `t` DateTime, + `id` UInt64, + PROJECTION p + ( + SELECT + id, + t + ORDER BY toStartOfDay(t) + ) +) +ENGINE = MergeTree +ORDER BY id +CREATE TEMPORARY TABLE t_proj2 +( + `a` UInt32, + `b` UInt32, + PROJECTION p + ( + SELECT a + ORDER BY b * 2 + ) +) +ENGINE = MergeTree +ORDER BY a diff --git a/tests/queries/0_stateless/02997_projections_formatting.sql b/tests/queries/0_stateless/02997_projections_formatting.sql new file mode 100644 index 00000000000..b593c2576b1 --- /dev/null +++ b/tests/queries/0_stateless/02997_projections_formatting.sql @@ -0,0 +1,5 @@ +CREATE TEMPORARY TABLE t_proj (t DateTime, id UInt64, PROJECTION p (SELECT id, t ORDER BY toStartOfDay(t))) ENGINE = MergeTree ORDER BY id; +SHOW CREATE TEMPORARY TABLE t_proj FORMAT TSVRaw; + +CREATE TEMPORARY TABLE t_proj2 (a UInt32, b UInt32, PROJECTION p (SELECT a ORDER BY b * 2)) ENGINE = MergeTree ORDER BY a; +SHOW CREATE TEMPORARY TABLE t_proj2 FORMAT TSVRaw; diff --git a/tests/queries/0_stateless/02998_analyzer_prewhere_report.reference b/tests/queries/0_stateless/02998_analyzer_prewhere_report.reference new file mode 100644 index 00000000000..3ae61a44a3f --- /dev/null +++ b/tests/queries/0_stateless/02998_analyzer_prewhere_report.reference @@ -0,0 +1 @@ +2024-01-01 [2,3] diff --git a/tests/queries/0_stateless/02998_analyzer_prewhere_report.sql b/tests/queries/0_stateless/02998_analyzer_prewhere_report.sql new file mode 100644 index 00000000000..b3027181901 --- /dev/null +++ b/tests/queries/0_stateless/02998_analyzer_prewhere_report.sql @@ -0,0 +1,18 @@ +--https://github.com/ClickHouse/ClickHouse/issues/60232 +CREATE TABLE hits +( + `date` Date, + `data` Array(UInt32) +) +ENGINE = MergeTree +PARTITION BY toYYYYMM(date) +ORDER BY date; + +INSERT INTO hits values('2024-01-01', [1, 2, 3]); + +SELECT + hits.date, + arrayFilter(x -> (x IN (2, 3)), data) AS filtered +FROM hits +WHERE arrayExists(x -> (x IN (2, 3)), data) +SETTINGS allow_experimental_analyzer = 1; diff --git a/tests/queries/0_stateless/02998_analyzer_secret_args_tree_node.reference b/tests/queries/0_stateless/02998_analyzer_secret_args_tree_node.reference new file mode 100644 index 00000000000..ffd2f68990b --- /dev/null +++ b/tests/queries/0_stateless/02998_analyzer_secret_args_tree_node.reference @@ -0,0 +1,71 @@ +-- { echoOn } +SET allow_experimental_analyzer = 1; +EXPLAIN QUERY TREE SELECT encrypt('aes-256-ofb', (SELECT 'qwerty'), '12345678901234567890123456789012'), encrypt('aes-256-ofb', (SELECT 'asdf'), '12345678901234567890123456789012'); +QUERY id: 0 + PROJECTION COLUMNS + encrypt(\'aes-256-ofb\', [HIDDEN id: 1], [HIDDEN id: 2]) Nullable(String) + encrypt(\'aes-256-ofb\', [HIDDEN id: 3], [HIDDEN id: 2]) Nullable(String) + PROJECTION + LIST id: 1, nodes: 2 + CONSTANT id: 2, constant_value: \'\\nãì&\', constant_value_type: Nullable(String) + EXPRESSION + FUNCTION id: 3, function_name: encrypt, function_type: ordinary, result_type: Nullable(String) + ARGUMENTS + LIST id: 4, nodes: 3 + CONSTANT id: 5, constant_value: \'aes-256-ofb\', constant_value_type: String + CONSTANT id: 6, constant_value: [HIDDEN id: 1], constant_value_type: Nullable(String) + CONSTANT id: 7, constant_value: [HIDDEN id: 2], constant_value_type: String + CONSTANT id: 8, constant_value: \'çø\', constant_value_type: Nullable(String) + EXPRESSION + FUNCTION id: 9, function_name: encrypt, function_type: ordinary, result_type: Nullable(String) + ARGUMENTS + LIST id: 10, nodes: 3 + CONSTANT id: 11, constant_value: \'aes-256-ofb\', constant_value_type: String + CONSTANT id: 12, constant_value: [HIDDEN id: 3], constant_value_type: Nullable(String) + CONSTANT id: 13, constant_value: [HIDDEN id: 2], constant_value_type: String + JOIN TREE + TABLE id: 14, alias: __table1, table_name: system.one +SET format_display_secrets_in_show_and_select = 1; +EXPLAIN QUERY TREE SELECT encrypt('aes-256-ofb', (SELECT 'qwerty'), '12345678901234567890123456789012'), encrypt('aes-256-ofb', (SELECT 'asdf'), '12345678901234567890123456789012'); +QUERY id: 0 + PROJECTION COLUMNS + encrypt(\'aes-256-ofb\', _subquery_1, \'12345678901234567890123456789012\') Nullable(String) + encrypt(\'aes-256-ofb\', _subquery_2, \'12345678901234567890123456789012\') Nullable(String) + PROJECTION + LIST id: 1, nodes: 2 + CONSTANT id: 2, constant_value: \'\\nãì&\', constant_value_type: Nullable(String) + EXPRESSION + FUNCTION id: 3, function_name: encrypt, function_type: ordinary, result_type: Nullable(String) + ARGUMENTS + LIST id: 4, nodes: 3 + CONSTANT id: 5, constant_value: \'aes-256-ofb\', constant_value_type: String + CONSTANT id: 6, constant_value: \'qwerty\', constant_value_type: Nullable(String) + EXPRESSION + QUERY id: 7, is_subquery: 1 + PROJECTION COLUMNS + \'qwerty\' String + PROJECTION + LIST id: 8, nodes: 1 + CONSTANT id: 9, constant_value: \'qwerty\', constant_value_type: String + JOIN TREE + TABLE id: 10, table_name: system.one + CONSTANT id: 11, constant_value: \'12345678901234567890123456789012\', constant_value_type: String + CONSTANT id: 12, constant_value: \'çø\', constant_value_type: Nullable(String) + EXPRESSION + FUNCTION id: 13, function_name: encrypt, function_type: ordinary, result_type: Nullable(String) + ARGUMENTS + LIST id: 14, nodes: 3 + CONSTANT id: 15, constant_value: \'aes-256-ofb\', constant_value_type: String + CONSTANT id: 16, constant_value: \'asdf\', constant_value_type: Nullable(String) + EXPRESSION + QUERY id: 17, is_subquery: 1 + PROJECTION COLUMNS + \'asdf\' String + PROJECTION + LIST id: 18, nodes: 1 + CONSTANT id: 19, constant_value: \'asdf\', constant_value_type: String + JOIN TREE + TABLE id: 20, table_name: system.one + CONSTANT id: 21, constant_value: \'12345678901234567890123456789012\', constant_value_type: String + JOIN TREE + TABLE id: 22, alias: __table1, table_name: system.one diff --git a/tests/queries/0_stateless/02998_analyzer_secret_args_tree_node.sql b/tests/queries/0_stateless/02998_analyzer_secret_args_tree_node.sql new file mode 100644 index 00000000000..f40b40b6c8c --- /dev/null +++ b/tests/queries/0_stateless/02998_analyzer_secret_args_tree_node.sql @@ -0,0 +1,12 @@ +-- Tags: no-fasttest +-- encrypt function doesn't exist in the fastest build + +-- { echoOn } +SET allow_experimental_analyzer = 1; + +EXPLAIN QUERY TREE SELECT encrypt('aes-256-ofb', (SELECT 'qwerty'), '12345678901234567890123456789012'), encrypt('aes-256-ofb', (SELECT 'asdf'), '12345678901234567890123456789012'); + +SET format_display_secrets_in_show_and_select = 1; + +EXPLAIN QUERY TREE SELECT encrypt('aes-256-ofb', (SELECT 'qwerty'), '12345678901234567890123456789012'), encrypt('aes-256-ofb', (SELECT 'asdf'), '12345678901234567890123456789012'); +-- { echoOff } diff --git a/tests/queries/0_stateless/02998_attach_partition_not_allowed_if_structure_differs_due_to_materialized_column.reference b/tests/queries/0_stateless/02998_attach_partition_not_allowed_if_structure_differs_due_to_materialized_column.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02998_attach_partition_not_allowed_if_structure_differs_due_to_materialized_column.sql b/tests/queries/0_stateless/02998_attach_partition_not_allowed_if_structure_differs_due_to_materialized_column.sql new file mode 100644 index 00000000000..c92d71893c4 --- /dev/null +++ b/tests/queries/0_stateless/02998_attach_partition_not_allowed_if_structure_differs_due_to_materialized_column.sql @@ -0,0 +1,21 @@ +CREATE TABLE attach_partition_t7 ( + a UInt32, + b UInt32 +) + ENGINE = MergeTree +PARTITION BY a ORDER BY a; + +ALTER TABLE attach_partition_t7 + ADD COLUMN mat_column + UInt32 MATERIALIZED a+b; + +insert into attach_partition_t7 values (1, 2); + +CREATE TABLE attach_partition_t8 ( + a UInt32, + b UInt32 +) + ENGINE = MergeTree +PARTITION BY a ORDER BY a; + +ALTER TABLE attach_partition_t8 ATTACH PARTITION ID '1' FROM attach_partition_t7; -- {serverError INCOMPATIBLE_COLUMNS}; diff --git a/tests/queries/0_stateless/02998_http_redirects.reference b/tests/queries/0_stateless/02998_http_redirects.reference new file mode 100644 index 00000000000..527e7df71c3 --- /dev/null +++ b/tests/queries/0_stateless/02998_http_redirects.reference @@ -0,0 +1,5 @@ +Ok. +HTTP/1.1 302 Found +Location: /?query=SELECT+'Pepyaka' +HTTP/1.1 404 Not Found +Pepyaka diff --git a/tests/queries/0_stateless/02998_http_redirects.sh b/tests/queries/0_stateless/02998_http_redirects.sh new file mode 100755 index 00000000000..8a8df884f9f --- /dev/null +++ b/tests/queries/0_stateless/02998_http_redirects.sh @@ -0,0 +1,19 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +URL="${CLICKHOUSE_PORT_HTTP_PROTO}://${CLICKHOUSE_HOST}:${CLICKHOUSE_PORT_HTTP}" + +# Ping handler +${CLICKHOUSE_CURL} -s -S "${URL}/" + +# A handler that is configured to return a redirect +${CLICKHOUSE_CURL} -s -S -I "${URL}/upyachka" | grep -i -P '^HTTP|Location' + +# This handler is configured to not accept any query string +${CLICKHOUSE_CURL} -s -S -I "${URL}/upyachka?hello=world" | grep -i -P '^HTTP|Location' + +# Check that actual redirect works +${CLICKHOUSE_CURL} -s -S -L "${URL}/upyachka" diff --git a/tests/queries/0_stateless/02998_ipv6_hashing.reference b/tests/queries/0_stateless/02998_ipv6_hashing.reference new file mode 100644 index 00000000000..d9753de02c3 --- /dev/null +++ b/tests/queries/0_stateless/02998_ipv6_hashing.reference @@ -0,0 +1,20 @@ +fe80::62:5aff:fed1:daf0 ÷T—ºÖ÷t{Ö±P¶ö›¢ÞãTñ£K{êaƒ—;xdP +fe80::62:5aff:fed1:daf0 ÷T—ºÖ÷t{Ö±P¶ö›¢ÞãTñ£K{êaƒ—;xdP +fe80::62:5aff:fed1:daf0 ÷T—ºÖ÷t{Ö±P¶ö›¢ÞãTñ£K{êaƒ—;xdP +fe80::62:5aff:fed1:daf0 ÷T—ºÖ÷t{Ö±P¶ö›¢ÞãTñ£K{êaƒ—;xdP +fe80::62:5aff:fed1:daf0 ÷T—ºÖ÷t{Ö±P¶ö›¢ÞãTñ£K{êaƒ—;xdP +fe80::62:5aff:fed1:daf0 ÷T—ºÖ÷t{Ö±P¶ö›¢ÞãTñ£K{êaƒ—;xdP +fe80::62:5aff:fed1:daf0 ÷T—ºÖ÷t{Ö±P¶ö›¢ÞãTñ£K{êaƒ—;xdP +fe80::62:5aff:fed1:daf0 ÷T—ºÖ÷t{Ö±P¶ö›¢ÞãTñ£K{êaƒ—;xdP +fe80::62:5aff:fed1:daf0 ÷T—ºÖ÷t{Ö±P¶ö›¢ÞãTñ£K{êaƒ—;xdP +fe80::62:5aff:fed1:daf0 ÷T—ºÖ÷t{Ö±P¶ö›¢ÞãTñ£K{êaƒ—;xdP +fe80::62:5aff:fed1:daf0 ÷T—ºÖ÷t{Ö±P¶ö›¢ÞãTñ£K{êaƒ—;xdP +fe80::62:5aff:fed1:daf0 ÷T—ºÖ÷t{Ö±P¶ö›¢ÞãTñ£K{êaƒ—;xdP +fe80::62:5aff:fed1:daf0 ÷T—ºÖ÷t{Ö±P¶ö›¢ÞãTñ£K{êaƒ—;xdP +fe80::62:5aff:fed1:daf0 ÷T—ºÖ÷t{Ö±P¶ö›¢ÞãTñ£K{êaƒ—;xdP +fe80::62:5aff:fed1:daf0 ÷T—ºÖ÷t{Ö±P¶ö›¢ÞãTñ£K{êaƒ—;xdP +fe80::62:5aff:fed1:daf0 ÷T—ºÖ÷t{Ö±P¶ö›¢ÞãTñ£K{êaƒ—;xdP +fe80::62:5aff:fed1:daf0 ÷T—ºÖ÷t{Ö±P¶ö›¢ÞãTñ£K{êaƒ—;xdP +fe80::62:5aff:fed1:daf0 ÷T—ºÖ÷t{Ö±P¶ö›¢ÞãTñ£K{êaƒ—;xdP +fe80::62:5aff:fed1:daf0 ÷T—ºÖ÷t{Ö±P¶ö›¢ÞãTñ£K{êaƒ—;xdP +fe80::62:5aff:fed1:daf0 ÷T—ºÖ÷t{Ö±P¶ö›¢ÞãTñ£K{êaƒ—;xdP diff --git a/tests/queries/0_stateless/02998_ipv6_hashing.sql b/tests/queries/0_stateless/02998_ipv6_hashing.sql new file mode 100644 index 00000000000..a836792748c --- /dev/null +++ b/tests/queries/0_stateless/02998_ipv6_hashing.sql @@ -0,0 +1,5 @@ +-- Tags: no-fasttest + +SELECT toIPv6(materialize(toLowCardinality('fe80::62:5aff:fed1:daf0'))) AS ipv6, SHA256(ipv6) from numbers(10); +SELECT toIPv6(materialize('fe80::62:5aff:fed1:daf0')) AS ipv6, SHA256(ipv6) from numbers(10); + diff --git a/tests/queries/0_stateless/02998_operator_respect_nulls.reference b/tests/queries/0_stateless/02998_operator_respect_nulls.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02998_operator_respect_nulls.sql b/tests/queries/0_stateless/02998_operator_respect_nulls.sql new file mode 100644 index 00000000000..240992e1ff8 --- /dev/null +++ b/tests/queries/0_stateless/02998_operator_respect_nulls.sql @@ -0,0 +1 @@ +SELECT plus(1, 1) RESPECT NULLS; -- { serverError SYNTAX_ERROR } diff --git a/tests/queries/0_stateless/02998_pretty_format_print_readable_number_on_single_value.reference b/tests/queries/0_stateless/02998_pretty_format_print_readable_number_on_single_value.reference new file mode 100644 index 00000000000..496e0337209 --- /dev/null +++ b/tests/queries/0_stateless/02998_pretty_format_print_readable_number_on_single_value.reference @@ -0,0 +1,360 @@ +â”â”â”â”â”â”â”â”â”â”┓ +┃ a ┃ +┡â”â”â”â”â”â”â”â”â”┩ +│ 1000000 │ +└─────────┘ +â”â”â”â”â”â”â”â”â”â”┓ +┃ a ┃ +┡â”â”â”â”â”â”â”â”â”┩ +│ 1000000 │ +└─────────┘ +â”â”â”â”â”â”â”â”â”â”┓ +┃ a ┃ +┡â”â”â”â”â”â”â”â”â”┩ +│ 1000000 │ +└─────────┘ +â”â”â”â”â”â”â”â”â”â”┓ +┃ a ┃ +┡â”â”â”â”â”â”â”â”â”┩ +│ 1000000 │ +└─────────┘ +┌───────a─┠+│ 1000000 │ +└─────────┘ +┌───────a─┠+│ 1000000 │ +└─────────┘ +┌───────a─┠+│ 1000000 │ +└─────────┘ +┌───────a─┠+│ 1000000 │ +└─────────┘ + a + + 1000000 + a + + 1000000 + a + + 1000000 + a + + 1000000 +â”â”â”â”â”â”â”â”â”â”┓ +┃ a ┃ +┡â”â”â”â”â”â”â”â”â”┩ +│ 1000000 │ -- 1.00 million +└─────────┘ +â”â”â”â”â”â”â”â”â”â”┓ +┃ a ┃ +┡â”â”â”â”â”â”â”â”â”┩ +│ 1000000 │ -- 1.00 million +└─────────┘ +â”â”â”â”â”â”â”â”â”â”┓ +┃ a ┃ +┡â”â”â”â”â”â”â”â”â”┩ +│ 1000000 │ -- 1.00 million +└─────────┘ +â”â”â”â”â”â”â”â”â”â”┓ +┃ a ┃ +┡â”â”â”â”â”â”â”â”â”┩ +│ 1000000 │ -- 1.00 million +└─────────┘ +┌───────a─┠+│ 1000000 │ -- 1.00 million +└─────────┘ +┌───────a─┠+│ 1000000 │ -- 1.00 million +└─────────┘ +┌───────a─┠+│ 1000000 │ -- 1.00 million +└─────────┘ +┌───────a─┠+│ 1000000 │ -- 1.00 million +└─────────┘ + a + + 1000000 -- 1.00 million + a + + 1000000 -- 1.00 million + a + + 1000000 -- 1.00 million + a + + 1000000 -- 1.00 million +â”â”â”â”â”â”â”â”â”â”┓ +┃ a ┃ +┡â”â”â”â”â”â”â”â”â”┩ +│ 1000001 │ -- 1.00 million +└─────────┘ +â”â”â”â”â”â”â”â”â”â”┓ +┃ a ┃ +┡â”â”â”â”â”â”â”â”â”┩ +│ 1000001 │ -- 1.00 million +└─────────┘ +â”â”â”â”â”â”â”â”â”â”┓ +┃ a ┃ +┡â”â”â”â”â”â”â”â”â”┩ +│ 1000001 │ -- 1.00 million +└─────────┘ +â”â”â”â”â”â”â”â”â”â”┓ +┃ a ┃ +┡â”â”â”â”â”â”â”â”â”┩ +│ 1000001 │ -- 1.00 million +└─────────┘ +┌───────a─┠+│ 1000001 │ -- 1.00 million +└─────────┘ +┌───────a─┠+│ 1000001 │ -- 1.00 million +└─────────┘ +┌───────a─┠+│ 1000001 │ -- 1.00 million +└─────────┘ +┌───────a─┠+│ 1000001 │ -- 1.00 million +└─────────┘ + a + + 1000001 -- 1.00 million + a + + 1000001 -- 1.00 million + a + + 1000001 -- 1.00 million + a + + 1000001 -- 1.00 million +â”â”â”â”â”â”â”â”â”â”â”â”â”┓ +┃ a ┃ +┡â”â”â”â”â”â”â”â”â”â”â”â”┩ +│ 1000000000 │ -- 1.00 billion +└────────────┘ +â”â”â”â”â”â”â”â”â”â”â”â”â”┓ +┃ a ┃ +┡â”â”â”â”â”â”â”â”â”â”â”â”┩ +│ 1000000000 │ -- 1.00 billion +└────────────┘ +â”â”â”â”â”â”â”â”â”â”â”â”â”┓ +┃ a ┃ +┡â”â”â”â”â”â”â”â”â”â”â”â”┩ +│ 1000000000 │ -- 1.00 billion +└────────────┘ +â”â”â”â”â”â”â”â”â”â”â”â”â”┓ +┃ a ┃ +┡â”â”â”â”â”â”â”â”â”â”â”â”┩ +│ 1000000000 │ -- 1.00 billion +└────────────┘ +┌──────────a─┠+│ 1000000000 │ -- 1.00 billion +└────────────┘ +┌──────────a─┠+│ 1000000000 │ -- 1.00 billion +└────────────┘ +┌──────────a─┠+│ 1000000000 │ -- 1.00 billion +└────────────┘ +┌──────────a─┠+│ 1000000000 │ -- 1.00 billion +└────────────┘ + a + + 1000000000 -- 1.00 billion + a + + 1000000000 -- 1.00 billion + a + + 1000000000 -- 1.00 billion + a + + 1000000000 -- 1.00 billion +â”â”â”â”â”â”â”â”â”â”â”â”â”┳â”â”â”â”â”â”â”â”â”â”â”â”┓ +┃ a ┃ b ┃ +┡â”â”â”â”â”â”â”â”â”â”â”â”╇â”â”â”â”â”â”â”â”â”â”â”â”┩ +│ 1000000000 │ 1000000000 │ +└────────────┴────────────┘ +â”â”â”â”â”â”â”â”â”â”â”â”â”┳â”â”â”â”â”â”â”â”â”â”â”â”┓ +┃ a ┃ b ┃ +┡â”â”â”â”â”â”â”â”â”â”â”â”╇â”â”â”â”â”â”â”â”â”â”â”â”┩ +│ 1000000000 │ 1000000000 │ +└────────────┴────────────┘ +â”â”â”â”â”â”â”â”â”â”â”â”â”┳â”â”â”â”â”â”â”â”â”â”â”â”┓ +┃ a ┃ b ┃ +┡â”â”â”â”â”â”â”â”â”â”â”â”╇â”â”â”â”â”â”â”â”â”â”â”â”┩ +│ 1000000000 │ 1000000000 │ +└────────────┴────────────┘ +â”â”â”â”â”â”â”â”â”â”â”â”â”┳â”â”â”â”â”â”â”â”â”â”â”â”┓ +┃ a ┃ b ┃ +┡â”â”â”â”â”â”â”â”â”â”â”â”╇â”â”â”â”â”â”â”â”â”â”â”â”┩ +│ 1000000000 │ 1000000000 │ +└────────────┴────────────┘ +┌──────────a─┬──────────b─┠+│ 1000000000 │ 1000000000 │ +└────────────┴────────────┘ +┌──────────a─┬──────────b─┠+│ 1000000000 │ 1000000000 │ +└────────────┴────────────┘ +┌──────────a─┬──────────b─┠+│ 1000000000 │ 1000000000 │ +└────────────┴────────────┘ +┌──────────a─┬──────────b─┠+│ 1000000000 │ 1000000000 │ +└────────────┴────────────┘ + a b + + 1000000000 1000000000 + a b + + 1000000000 1000000000 + a b + + 1000000000 1000000000 + a b + + 1000000000 1000000000 +â”â”â”â”â”â”â”â”â”â”â”â”â”┓ +┃ a ┃ +┡â”â”â”â”â”â”â”â”â”â”â”â”┩ +│ 1000000000 │ +├────────────┤ +│ 1000000000 │ +└────────────┘ +â”â”â”â”â”â”â”â”â”â”â”â”â”┓ +┃ a ┃ +┡â”â”â”â”â”â”â”â”â”â”â”â”┩ +│ 1000000000 │ +├────────────┤ +│ 1000000000 │ +└────────────┘ +â”â”â”â”â”â”â”â”â”â”â”â”â”┓ +┃ a ┃ +┡â”â”â”â”â”â”â”â”â”â”â”â”┩ +│ 1000000000 │ +├────────────┤ +│ 1000000000 │ +└────────────┘ +â”â”â”â”â”â”â”â”â”â”â”â”â”┓ +┃ a ┃ +┡â”â”â”â”â”â”â”â”â”â”â”â”┩ +│ 1000000000 │ +├────────────┤ +│ 1000000000 │ +└────────────┘ +┌──────────a─┠+│ 1000000000 │ +│ 1000000000 │ +└────────────┘ +┌──────────a─┠+│ 1000000000 │ +│ 1000000000 │ +└────────────┘ +┌──────────a─┠+│ 1000000000 │ +│ 1000000000 │ +└────────────┘ +┌──────────a─┠+│ 1000000000 │ +│ 1000000000 │ +└────────────┘ + a + + 1000000000 + 1000000000 + a + + 1000000000 + 1000000000 + a + + 1000000000 + 1000000000 + a + + 1000000000 + 1000000000 +â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”┓ +┃ CAST('2024-02-29', 'Date') ┃ +┡â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”┩ +│ 2024-02-29 │ +└────────────────────────────┘ +â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”┓ +┃ CAST('2024-02-29', 'Date') ┃ +┡â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”┩ +│ 2024-02-29 │ +└────────────────────────────┘ +â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”┓ +┃ CAST('2024-02-29', 'Date') ┃ +┡â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”┩ +│ 2024-02-29 │ +└────────────────────────────┘ +â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”┓ +┃ CAST('2024-02-29', 'Date') ┃ +┡â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”┩ +│ 2024-02-29 │ +└────────────────────────────┘ +â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”┓ +┃ CAST('2024-02-29', 'Date32') ┃ +┡â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”┩ +│ 2024-02-29 │ +└──────────────────────────────┘ +â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”┓ +┃ CAST('2024-02-29', 'Date32') ┃ +┡â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”┩ +│ 2024-02-29 │ +└──────────────────────────────┘ +â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”┓ +┃ CAST('2024-02-29', 'Date32') ┃ +┡â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”┩ +│ 2024-02-29 │ +└──────────────────────────────┘ +â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”┓ +┃ CAST('2024-02-29', 'Date32') ┃ +┡â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”┩ +│ 2024-02-29 │ +└──────────────────────────────┘ +â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”┓ +┃ CAST('2024-02-29 00:00:00', 'DateTime') ┃ +┡â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”┩ +│ 2024-02-29 00:00:00 │ +└─────────────────────────────────────────┘ +â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”┓ +┃ CAST('2024-02-29 00:00:00', 'DateTime') ┃ +┡â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”┩ +│ 2024-02-29 00:00:00 │ +└─────────────────────────────────────────┘ +â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”┓ +┃ CAST('2024-02-29 00:00:00', 'DateTime') ┃ +┡â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”┩ +│ 2024-02-29 00:00:00 │ +└─────────────────────────────────────────┘ +â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”┓ +┃ CAST('2024-02-29 00:00:00', 'DateTime') ┃ +┡â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”┩ +│ 2024-02-29 00:00:00 │ +└─────────────────────────────────────────┘ +â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”┓ +┃ CAST(CAST('2024-02-29 00:00:00', 'DateTime'), 'DateTime64') ┃ +┡â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”┩ +│ 2024-02-29 00:00:00.000 │ +└─────────────────────────────────────────────────────────────┘ +â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”┓ +┃ CAST(CAST('2024-02-29 00:00:00', 'DateTime'), 'DateTime64') ┃ +┡â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”┩ +│ 2024-02-29 00:00:00.000 │ +└─────────────────────────────────────────────────────────────┘ +â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”┓ +┃ CAST(CAST('2024-02-29 00:00:00', 'DateTime'), 'DateTime64') ┃ +┡â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”┩ +│ 2024-02-29 00:00:00.000 │ +└─────────────────────────────────────────────────────────────┘ +â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”┓ +┃ CAST(CAST('2024-02-29 00:00:00', 'DateTime'), 'DateTime64') ┃ +┡â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”┩ +│ 2024-02-29 00:00:00.000 │ +└─────────────────────────────────────────────────────────────┘ diff --git a/tests/queries/0_stateless/02998_pretty_format_print_readable_number_on_single_value.sql b/tests/queries/0_stateless/02998_pretty_format_print_readable_number_on_single_value.sql new file mode 100644 index 00000000000..5dc69488cea --- /dev/null +++ b/tests/queries/0_stateless/02998_pretty_format_print_readable_number_on_single_value.sql @@ -0,0 +1,96 @@ +SELECT 1_000_000 as a FORMAT Pretty; +SELECT 1_000_000 as a FORMAT PrettyNoEscapes; +SELECT 1_000_000 as a FORMAT PrettyMonoBlock; +SELECT 1_000_000 as a FORMAT PrettyNoEscapesMonoBlock; +SELECT 1_000_000 as a FORMAT PrettyCompact; +SELECT 1_000_000 as a FORMAT PrettyCompactNoEscapes; +SELECT 1_000_000 as a FORMAT PrettyCompactMonoBlock; +SELECT 1_000_000 as a FORMAT PrettyCompactNoEscapesMonoBlock; +SELECT 1_000_000 as a FORMAT PrettySpace; +SELECT 1_000_000 as a FORMAT PrettySpaceNoEscapes; +SELECT 1_000_000 as a FORMAT PrettySpaceMonoBlock; +SELECT 1_000_000 as a FORMAT PrettySpaceNoEscapesMonoBlock; + + +SELECT 1_000_000 as a SETTINGS output_format_pretty_single_large_number_tip_threshold = 1000 FORMAT Pretty; +SELECT 1_000_000 as a SETTINGS output_format_pretty_single_large_number_tip_threshold = 1000 FORMAT PrettyNoEscapes; +SELECT 1_000_000 as a SETTINGS output_format_pretty_single_large_number_tip_threshold = 1000 FORMAT PrettyMonoBlock; +SELECT 1_000_000 as a SETTINGS output_format_pretty_single_large_number_tip_threshold = 1000 FORMAT PrettyNoEscapesMonoBlock; +SELECT 1_000_000 as a SETTINGS output_format_pretty_single_large_number_tip_threshold = 1000 FORMAT PrettyCompact; +SELECT 1_000_000 as a SETTINGS output_format_pretty_single_large_number_tip_threshold = 1000 FORMAT PrettyCompactNoEscapes; +SELECT 1_000_000 as a SETTINGS output_format_pretty_single_large_number_tip_threshold = 1000 FORMAT PrettyCompactMonoBlock; +SELECT 1_000_000 as a SETTINGS output_format_pretty_single_large_number_tip_threshold = 1000 FORMAT PrettyCompactNoEscapesMonoBlock; +SELECT 1_000_000 as a SETTINGS output_format_pretty_single_large_number_tip_threshold = 1000 FORMAT PrettySpace; +SELECT 1_000_000 as a SETTINGS output_format_pretty_single_large_number_tip_threshold = 1000 FORMAT PrettySpaceNoEscapes; +SELECT 1_000_000 as a SETTINGS output_format_pretty_single_large_number_tip_threshold = 1000 FORMAT PrettySpaceMonoBlock; +SELECT 1_000_000 as a SETTINGS output_format_pretty_single_large_number_tip_threshold = 1000 FORMAT PrettySpaceNoEscapesMonoBlock; + +SELECT 1_000_001 as a FORMAT Pretty; +SELECT 1_000_001 as a FORMAT PrettyNoEscapes; +SELECT 1_000_001 as a FORMAT PrettyMonoBlock; +SELECT 1_000_001 as a FORMAT PrettyNoEscapesMonoBlock; +SELECT 1_000_001 as a FORMAT PrettyCompact; +SELECT 1_000_001 as a FORMAT PrettyCompactNoEscapes; +SELECT 1_000_001 as a FORMAT PrettyCompactMonoBlock; +SELECT 1_000_001 as a FORMAT PrettyCompactNoEscapesMonoBlock; +SELECT 1_000_001 as a FORMAT PrettySpace; +SELECT 1_000_001 as a FORMAT PrettySpaceNoEscapes; +SELECT 1_000_001 as a FORMAT PrettySpaceMonoBlock; +SELECT 1_000_001 as a FORMAT PrettySpaceNoEscapesMonoBlock; + +SELECT 1_000_000_000 as a FORMAT Pretty; +SELECT 1_000_000_000 as a FORMAT PrettyNoEscapes; +SELECT 1_000_000_000 as a FORMAT PrettyMonoBlock; +SELECT 1_000_000_000 as a FORMAT PrettyNoEscapesMonoBlock; +SELECT 1_000_000_000 as a FORMAT PrettyCompact; +SELECT 1_000_000_000 as a FORMAT PrettyCompactNoEscapes; +SELECT 1_000_000_000 as a FORMAT PrettyCompactMonoBlock; +SELECT 1_000_000_000 as a FORMAT PrettyCompactNoEscapesMonoBlock; +SELECT 1_000_000_000 as a FORMAT PrettySpace; +SELECT 1_000_000_000 as a FORMAT PrettySpaceNoEscapes; +SELECT 1_000_000_000 as a FORMAT PrettySpaceMonoBlock; +SELECT 1_000_000_000 as a FORMAT PrettySpaceNoEscapesMonoBlock; + +SELECT 1_000_000_000 as a, 1_000_000_000 as b FORMAT Pretty; +SELECT 1_000_000_000 as a, 1_000_000_000 as b FORMAT PrettyNoEscapes; +SELECT 1_000_000_000 as a, 1_000_000_000 as b FORMAT PrettyMonoBlock; +SELECT 1_000_000_000 as a, 1_000_000_000 as b FORMAT PrettyNoEscapesMonoBlock; +SELECT 1_000_000_000 as a, 1_000_000_000 as b FORMAT PrettyCompact; +SELECT 1_000_000_000 as a, 1_000_000_000 as b FORMAT PrettyCompactNoEscapes; +SELECT 1_000_000_000 as a, 1_000_000_000 as b FORMAT PrettyCompactMonoBlock; +SELECT 1_000_000_000 as a, 1_000_000_000 as b FORMAT PrettyCompactNoEscapesMonoBlock; +SELECT 1_000_000_000 as a, 1_000_000_000 as b FORMAT PrettySpace; +SELECT 1_000_000_000 as a, 1_000_000_000 as b FORMAT PrettySpaceNoEscapes; +SELECT 1_000_000_000 as a, 1_000_000_000 as b FORMAT PrettySpaceMonoBlock; +SELECT 1_000_000_000 as a, 1_000_000_000 as b FORMAT PrettySpaceNoEscapesMonoBlock; + +SELECT 1_000_000_000 as a FROM system.numbers LIMIT 2 FORMAT Pretty; +SELECT 1_000_000_000 as a FROM system.numbers LIMIT 2 FORMAT PrettyNoEscapes; +SELECT 1_000_000_000 as a FROM system.numbers LIMIT 2 FORMAT PrettyMonoBlock; +SELECT 1_000_000_000 as a FROM system.numbers LIMIT 2 FORMAT PrettyNoEscapesMonoBlock; +SELECT 1_000_000_000 as a FROM system.numbers LIMIT 2 FORMAT PrettyCompact; +SELECT 1_000_000_000 as a FROM system.numbers LIMIT 2 FORMAT PrettyCompactNoEscapes; +SELECT 1_000_000_000 as a FROM system.numbers LIMIT 2 FORMAT PrettyCompactMonoBlock; +SELECT 1_000_000_000 as a FROM system.numbers LIMIT 2 FORMAT PrettyCompactNoEscapesMonoBlock; +SELECT 1_000_000_000 as a FROM system.numbers LIMIT 2 FORMAT PrettySpace; +SELECT 1_000_000_000 as a FROM system.numbers LIMIT 2 FORMAT PrettySpaceNoEscapes; +SELECT 1_000_000_000 as a FROM system.numbers LIMIT 2 FORMAT PrettySpaceMonoBlock; +SELECT 1_000_000_000 as a FROM system.numbers LIMIT 2 FORMAT PrettySpaceNoEscapesMonoBlock; + +SET output_format_pretty_single_large_number_tip_threshold=1; +SELECT '2024-02-29'::Date FORMAT Pretty; +SELECT '2024-02-29'::Date FORMAT PrettyNoEscapes; +SELECT '2024-02-29'::Date FORMAT PrettyMonoBlock; +SELECT '2024-02-29'::Date FORMAT PrettyNoEscapesMonoBlock; +SELECT '2024-02-29'::Date32 FORMAT Pretty; +SELECT '2024-02-29'::Date32 FORMAT PrettyNoEscapes; +SELECT '2024-02-29'::Date32 FORMAT PrettyMonoBlock; +SELECT '2024-02-29'::Date32 FORMAT PrettyNoEscapesMonoBlock; +SELECT '2024-02-29 00:00:00'::DateTime FORMAT Pretty; +SELECT '2024-02-29 00:00:00'::DateTime FORMAT PrettyNoEscapes; +SELECT '2024-02-29 00:00:00'::DateTime FORMAT PrettyMonoBlock; +SELECT '2024-02-29 00:00:00'::DateTime FORMAT PrettyNoEscapesMonoBlock; +SELECT '2024-02-29 00:00:00'::DateTime::DateTime64 FORMAT Pretty; +SELECT '2024-02-29 00:00:00'::DateTime::DateTime64 FORMAT PrettyNoEscapes; +SELECT '2024-02-29 00:00:00'::DateTime::DateTime64 FORMAT PrettyMonoBlock; +SELECT '2024-02-29 00:00:00'::DateTime::DateTime64 FORMAT PrettyNoEscapesMonoBlock; diff --git a/tests/queries/0_stateless/02998_primary_key_skip_columns.reference b/tests/queries/0_stateless/02998_primary_key_skip_columns.reference new file mode 100644 index 00000000000..f3c4a4f40fe --- /dev/null +++ b/tests/queries/0_stateless/02998_primary_key_skip_columns.reference @@ -0,0 +1,18 @@ +100000 +14954 +798 +15908 +108 +120 +2334 +19 +Key size: 2400000 +100000 +14954 +798 +15908 +108 +120 +2334 +19 +Key size: 800000 diff --git a/tests/queries/0_stateless/02998_primary_key_skip_columns.sql b/tests/queries/0_stateless/02998_primary_key_skip_columns.sql new file mode 100644 index 00000000000..b567155ab1f --- /dev/null +++ b/tests/queries/0_stateless/02998_primary_key_skip_columns.sql @@ -0,0 +1,35 @@ +-- Tags: no-asan, no-tsan, no-msan, no-ubsan + +DROP TABLE IF EXISTS test; + +CREATE TABLE test (a UInt64, b UInt64, c UInt64) ENGINE = MergeTree ORDER BY (a, b, c) SETTINGS index_granularity = 1, primary_key_ratio_of_unique_prefix_values_to_skip_suffix_columns = 1; +INSERT INTO test SELECT sipHash64(number, 1), sipHash64(number, 2), sipHash64(number, 3) FROM numbers(100000); + +SELECT count() FROM test; +SELECT count() FROM test WHERE a > 1849813033528774208 AND a < 4594276315503201760; +SELECT count() FROM test WHERE b > 7898976344263989848 AND b < 8040320939819153137; +SELECT count() FROM test WHERE c > 13239894303140990071 AND c < 16179795840886947236; +SELECT count() FROM test WHERE a > 1849813033528774208 AND a < 4594276315503201760 AND b > 7898976344263989848 AND b < 8040320939819153137; +SELECT count() FROM test WHERE b > 7898976344263989848 AND b < 8040320939819153137 AND c > 13239894303140990071 AND c < 16179795840886947236; +SELECT count() FROM test WHERE a > 1849813033528774208 AND a < 4594276315503201760 AND c > 13239894303140990071 AND c < 16179795840886947236; +SELECT count() FROM test WHERE a > 1849813033528774208 AND a < 4594276315503201760 AND b > 7898976344263989848 AND b < 8040320939819153137 AND c > 13239894303140990071 AND c < 16179795840886947236; + +SELECT 'Key size: ', round(sum(primary_key_bytes_in_memory), -5) FROM system.parts WHERE database = currentDatabase() AND table = 'test'; + +ALTER TABLE test MODIFY SETTING primary_key_ratio_of_unique_prefix_values_to_skip_suffix_columns = 0.9; + +DETACH TABLE test; +ATTACH TABLE test; + +SELECT count() FROM test; +SELECT count() FROM test WHERE a > 1849813033528774208 AND a < 4594276315503201760; +SELECT count() FROM test WHERE b > 7898976344263989848 AND b < 8040320939819153137; +SELECT count() FROM test WHERE c > 13239894303140990071 AND c < 16179795840886947236; +SELECT count() FROM test WHERE a > 1849813033528774208 AND a < 4594276315503201760 AND b > 7898976344263989848 AND b < 8040320939819153137; +SELECT count() FROM test WHERE b > 7898976344263989848 AND b < 8040320939819153137 AND c > 13239894303140990071 AND c < 16179795840886947236; +SELECT count() FROM test WHERE a > 1849813033528774208 AND a < 4594276315503201760 AND c > 13239894303140990071 AND c < 16179795840886947236; +SELECT count() FROM test WHERE a > 1849813033528774208 AND a < 4594276315503201760 AND b > 7898976344263989848 AND b < 8040320939819153137 AND c > 13239894303140990071 AND c < 16179795840886947236; + +SELECT 'Key size: ', round(sum(primary_key_bytes_in_memory), -5) FROM system.parts WHERE database = currentDatabase() AND table = 'test'; + +DROP TABLE test; diff --git a/tests/queries/0_stateless/02998_projection_after_attach_partition.reference b/tests/queries/0_stateless/02998_projection_after_attach_partition.reference new file mode 100644 index 00000000000..1cb984f0f34 --- /dev/null +++ b/tests/queries/0_stateless/02998_projection_after_attach_partition.reference @@ -0,0 +1,31 @@ +-- { echoOn } +DROP TABLE IF EXISTS visits_order; +DROP TABLE IF EXISTS visits_order_dst; +CREATE TABLE visits_order +( + user_id UInt64, + user_name String, + some_int UInt64 +) ENGINE = MergeTree() PRIMARY KEY user_id PARTITION BY user_id; +CREATE TABLE visits_order_dst +( + user_id UInt64, + user_name String, + some_int UInt64 +) ENGINE = MergeTree() PRIMARY KEY user_id PARTITION BY user_id; +ALTER TABLE visits_order ADD PROJECTION user_name_projection (SELECT * ORDER BY user_name); +ALTER TABLE visits_order_dst ADD PROJECTION user_name_projection (SELECT * ORDER BY user_name); +INSERT INTO visits_order SELECT 2, 'user2', number from numbers(1, 10); +INSERT INTO visits_order SELECT 2, 'another_user2', number*2 from numbers(1, 10); +INSERT INTO visits_order SELECT 2, 'yet_another_user2', number*3 from numbers(1, 10); +ALTER TABLE visits_order_dst ATTACH PARTITION ID '2' FROM visits_order; +SET allow_experimental_analyzer=0; +EXPLAIN SELECT * FROM visits_order_dst WHERE user_name='another_user2'; +Expression ((Projection + Before ORDER BY)) + Filter + ReadFromMergeTree (user_name_projection) +SET allow_experimental_analyzer=1; +EXPLAIN SELECT * FROM visits_order_dst WHERE user_name='another_user2'; +Expression ((Project names + Projection)) + Filter + ReadFromMergeTree (user_name_projection) diff --git a/tests/queries/0_stateless/02998_projection_after_attach_partition.sql b/tests/queries/0_stateless/02998_projection_after_attach_partition.sql new file mode 100644 index 00000000000..4e0121dafe9 --- /dev/null +++ b/tests/queries/0_stateless/02998_projection_after_attach_partition.sql @@ -0,0 +1,34 @@ +-- { echoOn } +DROP TABLE IF EXISTS visits_order; +DROP TABLE IF EXISTS visits_order_dst; + +CREATE TABLE visits_order +( + user_id UInt64, + user_name String, + some_int UInt64 +) ENGINE = MergeTree() PRIMARY KEY user_id PARTITION BY user_id; + +CREATE TABLE visits_order_dst +( + user_id UInt64, + user_name String, + some_int UInt64 +) ENGINE = MergeTree() PRIMARY KEY user_id PARTITION BY user_id; + +ALTER TABLE visits_order ADD PROJECTION user_name_projection (SELECT * ORDER BY user_name); +ALTER TABLE visits_order_dst ADD PROJECTION user_name_projection (SELECT * ORDER BY user_name); + +INSERT INTO visits_order SELECT 2, 'user2', number from numbers(1, 10); +INSERT INTO visits_order SELECT 2, 'another_user2', number*2 from numbers(1, 10); +INSERT INTO visits_order SELECT 2, 'yet_another_user2', number*3 from numbers(1, 10); + +ALTER TABLE visits_order_dst ATTACH PARTITION ID '2' FROM visits_order; + +SET allow_experimental_analyzer=0; + +EXPLAIN SELECT * FROM visits_order_dst WHERE user_name='another_user2'; + +SET allow_experimental_analyzer=1; + +EXPLAIN SELECT * FROM visits_order_dst WHERE user_name='another_user2'; diff --git a/tests/queries/0_stateless/02998_system_dns_cache_table.reference b/tests/queries/0_stateless/02998_system_dns_cache_table.reference new file mode 100644 index 00000000000..ed6cb000142 --- /dev/null +++ b/tests/queries/0_stateless/02998_system_dns_cache_table.reference @@ -0,0 +1 @@ +localhost 127.0.0.1 IPv4 1 diff --git a/tests/queries/0_stateless/02998_system_dns_cache_table.sh b/tests/queries/0_stateless/02998_system_dns_cache_table.sh new file mode 100755 index 00000000000..b74fc00ab3b --- /dev/null +++ b/tests/queries/0_stateless/02998_system_dns_cache_table.sh @@ -0,0 +1,26 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +# Retries are necessary because the DNS cache may be flushed before second statement is executed +i=0 +retries=5 +while [[ $i -lt $retries ]]; do + ${CLICKHOUSE_CURL} -sS --fail --data "SELECT * FROM url('http://localhost:8123/ping', CSV, 'auto', headers())" "${CLICKHOUSE_URL}" | grep -oP -q 'Ok.' || continue + + RECORDS=$(${CLICKHOUSE_CURL} -sS --fail --data "SELECT hostname, ip_address, ip_family, (isNotNull(cached_at) AND cached_at > '1970-01-01 00:00:00') FROM system.dns_cache WHERE hostname = 'localhost' and ip_family = 'IPv4';" "${CLICKHOUSE_URL}") + + if [[ -n "${RECORDS}" ]]; then + echo "${RECORDS}" + exit 0 + fi + + ((++i)) + sleep 0.2 +done + +echo "All tries to fetch entries for localhost failed, no rows returned. +Probably the DNS cache is disabled or the ClickHouse instance not responds to ping." +exit 1 diff --git a/tests/queries/0_stateless/02998_to_milliseconds.reference b/tests/queries/0_stateless/02998_to_milliseconds.reference new file mode 100644 index 00000000000..05139c19d1d --- /dev/null +++ b/tests/queries/0_stateless/02998_to_milliseconds.reference @@ -0,0 +1,8 @@ +2023-04-21 10:20:30 0 0 +2023-04-21 10:20:30 0 0 +2023-04-21 10:20:30.123 123 123 +2023-04-21 10:20:30.123456 123 123 +2023-04-21 10:20:30.123456789 123 123 +120 +2023-04-21 10:20:30 0 +2023-04-21 10:20:30 0 diff --git a/tests/queries/0_stateless/02998_to_milliseconds.sql b/tests/queries/0_stateless/02998_to_milliseconds.sql new file mode 100644 index 00000000000..f159f6aab50 --- /dev/null +++ b/tests/queries/0_stateless/02998_to_milliseconds.sql @@ -0,0 +1,17 @@ +-- Negative tests +SELECT toMillisecond(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT toMillisecond('string'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT toMillisecond(toDate('2024-02-28')); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT toMillisecond(toDate32('2024-02-28')); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } + +-- Tests with constant and non-constant arguments +SELECT toDateTime('2023-04-21 10:20:30') AS dt, toMillisecond(dt), toMillisecond(materialize(dt)); +SELECT toDateTime64('2023-04-21 10:20:30', 0) AS dt64, toMillisecond(dt64), toMillisecond(materialize(dt64)); +SELECT toDateTime64('2023-04-21 10:20:30.123', 3) AS dt64, toMillisecond(dt64), toMillisecond(materialize(dt64)); +SELECT toDateTime64('2023-04-21 10:20:30.123456', 6) AS dt64, toMillisecond(dt64), toMillisecond(materialize(dt64)); +SELECT toDateTime64('2023-04-21 10:20:30.123456789', 9) AS dt64, toMillisecond(dt64), toMillisecond(materialize(dt64)); + +-- Special cases +SELECT MILLISECOND(toDateTime64('2023-04-21 10:20:30.123456', 2)); -- Alias +SELECT toNullable(toDateTime('2023-04-21 10:20:30')) AS dt, toMillisecond(dt); -- Nullable +SELECT toLowCardinality(toDateTime('2023-04-21 10:20:30')) AS dt, toMillisecond(dt); -- LowCardinality diff --git a/tests/queries/0_stateless/02999_analyzer_preimage_null.reference b/tests/queries/0_stateless/02999_analyzer_preimage_null.reference new file mode 100644 index 00000000000..c5eb6b23d0d --- /dev/null +++ b/tests/queries/0_stateless/02999_analyzer_preimage_null.reference @@ -0,0 +1,121 @@ +-- { echoOn } +EXPLAIN QUERY TREE run_passes = 1 +SELECT * +FROM date_t__fuzz_0 +WHERE ((toYear(date1) AS b) != toNullable(1993)) AND (id <= b); +QUERY id: 0 + PROJECTION COLUMNS + id UInt32 + value1 String + date1 Date + PROJECTION + LIST id: 1, nodes: 3 + COLUMN id: 2, column_name: id, result_type: UInt32, source_id: 3 + COLUMN id: 4, column_name: value1, result_type: String, source_id: 3 + COLUMN id: 5, column_name: date1, result_type: Date, source_id: 3 + JOIN TREE + TABLE id: 3, alias: __table1, table_name: default.date_t__fuzz_0 + WHERE + FUNCTION id: 6, function_name: and, function_type: ordinary, result_type: Nullable(UInt8) + ARGUMENTS + LIST id: 7, nodes: 2 + FUNCTION id: 8, function_name: notEquals, function_type: ordinary, result_type: Nullable(UInt8) + ARGUMENTS + LIST id: 9, nodes: 2 + FUNCTION id: 10, function_name: toYear, function_type: ordinary, result_type: UInt16 + ARGUMENTS + LIST id: 11, nodes: 1 + COLUMN id: 12, column_name: date1, result_type: Date, source_id: 3 + CONSTANT id: 13, constant_value: UInt64_1993, constant_value_type: Nullable(UInt16) + EXPRESSION + FUNCTION id: 14, function_name: toNullable, function_type: ordinary, result_type: Nullable(UInt16) + ARGUMENTS + LIST id: 15, nodes: 1 + CONSTANT id: 16, constant_value: UInt64_1993, constant_value_type: UInt16 + FUNCTION id: 17, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 18, nodes: 2 + COLUMN id: 19, column_name: id, result_type: UInt32, source_id: 3 + FUNCTION id: 10, function_name: toYear, function_type: ordinary, result_type: UInt16 + ARGUMENTS + LIST id: 11, nodes: 1 + COLUMN id: 12, column_name: date1, result_type: Date, source_id: 3 +EXPLAIN QUERY TREE run_passes = 1 +SELECT * +FROM date_t__fuzz_0 +WHERE ((toYear(date1) AS b) != 1993) AND (id <= b) SETTINGS optimize_time_filter_with_preimage=0; +QUERY id: 0 + PROJECTION COLUMNS + id UInt32 + value1 String + date1 Date + PROJECTION + LIST id: 1, nodes: 3 + COLUMN id: 2, column_name: id, result_type: UInt32, source_id: 3 + COLUMN id: 4, column_name: value1, result_type: String, source_id: 3 + COLUMN id: 5, column_name: date1, result_type: Date, source_id: 3 + JOIN TREE + TABLE id: 3, alias: __table1, table_name: default.date_t__fuzz_0 + WHERE + FUNCTION id: 6, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 7, nodes: 2 + FUNCTION id: 8, function_name: notEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 9, nodes: 2 + FUNCTION id: 10, function_name: toYear, function_type: ordinary, result_type: UInt16 + ARGUMENTS + LIST id: 11, nodes: 1 + COLUMN id: 12, column_name: date1, result_type: Date, source_id: 3 + CONSTANT id: 13, constant_value: UInt64_1993, constant_value_type: UInt16 + FUNCTION id: 14, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 15, nodes: 2 + COLUMN id: 16, column_name: id, result_type: UInt32, source_id: 3 + FUNCTION id: 10, function_name: toYear, function_type: ordinary, result_type: UInt16 + ARGUMENTS + LIST id: 11, nodes: 1 + COLUMN id: 12, column_name: date1, result_type: Date, source_id: 3 + SETTINGS optimize_time_filter_with_preimage=0 +EXPLAIN QUERY TREE run_passes = 1 +SELECT * +FROM date_t__fuzz_0 +WHERE ((toYear(date1) AS b) != 1993) AND (id <= b) SETTINGS optimize_time_filter_with_preimage=1; +QUERY id: 0 + PROJECTION COLUMNS + id UInt32 + value1 String + date1 Date + PROJECTION + LIST id: 1, nodes: 3 + COLUMN id: 2, column_name: id, result_type: UInt32, source_id: 3 + COLUMN id: 4, column_name: value1, result_type: String, source_id: 3 + COLUMN id: 5, column_name: date1, result_type: Date, source_id: 3 + JOIN TREE + TABLE id: 3, alias: __table1, table_name: default.date_t__fuzz_0 + WHERE + FUNCTION id: 6, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 7, nodes: 2 + FUNCTION id: 8, function_name: or, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 9, nodes: 2 + FUNCTION id: 10, function_name: less, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 11, nodes: 2 + COLUMN id: 12, column_name: date1, result_type: Date, source_id: 3 + CONSTANT id: 13, constant_value: \'1993-01-01\', constant_value_type: String + FUNCTION id: 14, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 15, nodes: 2 + COLUMN id: 12, column_name: date1, result_type: Date, source_id: 3 + CONSTANT id: 16, constant_value: \'1994-01-01\', constant_value_type: String + FUNCTION id: 17, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 18, nodes: 2 + COLUMN id: 19, column_name: id, result_type: UInt32, source_id: 3 + FUNCTION id: 20, function_name: toYear, function_type: ordinary, result_type: UInt16 + ARGUMENTS + LIST id: 21, nodes: 1 + COLUMN id: 12, column_name: date1, result_type: Date, source_id: 3 + SETTINGS optimize_time_filter_with_preimage=1 diff --git a/tests/queries/0_stateless/02999_analyzer_preimage_null.sql b/tests/queries/0_stateless/02999_analyzer_preimage_null.sql new file mode 100644 index 00000000000..07d3a0f69c1 --- /dev/null +++ b/tests/queries/0_stateless/02999_analyzer_preimage_null.sql @@ -0,0 +1,20 @@ +SET allow_experimental_analyzer=1; +SET optimize_time_filter_with_preimage=1; + +CREATE TABLE date_t__fuzz_0 (`id` UInt32, `value1` String, `date1` Date) ENGINE = ReplacingMergeTree ORDER BY id SETTINGS allow_nullable_key=1; + +-- { echoOn } +EXPLAIN QUERY TREE run_passes = 1 +SELECT * +FROM date_t__fuzz_0 +WHERE ((toYear(date1) AS b) != toNullable(1993)) AND (id <= b); + +EXPLAIN QUERY TREE run_passes = 1 +SELECT * +FROM date_t__fuzz_0 +WHERE ((toYear(date1) AS b) != 1993) AND (id <= b) SETTINGS optimize_time_filter_with_preimage=0; + +EXPLAIN QUERY TREE run_passes = 1 +SELECT * +FROM date_t__fuzz_0 +WHERE ((toYear(date1) AS b) != 1993) AND (id <= b) SETTINGS optimize_time_filter_with_preimage=1; diff --git a/tests/queries/0_stateless/02999_scalar_subqueries_bug_1.reference b/tests/queries/0_stateless/02999_scalar_subqueries_bug_1.reference new file mode 100644 index 00000000000..0740afe92c6 --- /dev/null +++ b/tests/queries/0_stateless/02999_scalar_subqueries_bug_1.reference @@ -0,0 +1,66 @@ +0 0 +0 0 +0 0 +0 0 +1 \N +1 \N +2 \N +2 \N +3 \N +3 \N +4 \N +4 \N +5 \N +5 \N +6 \N +6 \N +7 \N +7 \N +8 \N +8 \N +9 \N +9 \N +10 10 +10 10 +10 10 +10 10 +11 \N +11 \N +12 \N +12 \N +13 \N +13 \N +14 \N +14 \N +15 \N +15 \N +16 \N +16 \N +17 \N +17 \N +18 \N +18 \N +19 \N +19 \N +20 20 +20 20 +20 20 +20 20 +21 \N +21 \N +22 \N +22 \N +23 \N +23 \N +24 \N +24 \N +25 \N +25 \N +26 \N +26 \N +27 \N +27 \N +28 \N +28 \N +29 \N +29 \N diff --git a/tests/queries/0_stateless/02999_scalar_subqueries_bug_1.sql b/tests/queries/0_stateless/02999_scalar_subqueries_bug_1.sql new file mode 100644 index 00000000000..88bcdeb7f77 --- /dev/null +++ b/tests/queries/0_stateless/02999_scalar_subqueries_bug_1.sql @@ -0,0 +1,8 @@ +drop table if exists t_table_select; +CREATE TABLE t_table_select (id UInt32) ENGINE = MergeTree ORDER BY id; +INSERT INTO t_table_select (id) SELECT number FROM numbers(30); + +CREATE TEMPORARY TABLE t_test (x UInt32, y Nullable(UInt32)) AS SELECT a.id, b.id FROM remote('127.0.0.{1,2}', currentDatabase(), t_table_select) AS a GLOBAL LEFT JOIN (SELECT id FROM remote('127.0.0.{1,2}', currentDatabase(), t_table_select) AS b WHERE (b.id % 10) = 0) AS b ON b.id = a.id SETTINGS join_use_nulls = 1; + +select * from t_test order by x; + diff --git a/tests/queries/0_stateless/02999_scalar_subqueries_bug_2.reference b/tests/queries/0_stateless/02999_scalar_subqueries_bug_2.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02999_scalar_subqueries_bug_2.sql b/tests/queries/0_stateless/02999_scalar_subqueries_bug_2.sql new file mode 100644 index 00000000000..03ac91e401a --- /dev/null +++ b/tests/queries/0_stateless/02999_scalar_subqueries_bug_2.sql @@ -0,0 +1,18 @@ +drop table if exists source; +drop table if exists target1; +drop table if exists target2; +drop table if exists v_heavy; + + +create table source(type String) engine=MergeTree order by type; + +create view v_heavy as +with nums as (select number from numbers(1e5)) +select count(*) n from (select number from numbers(1e5) n1 cross join nums); + +create table target1(type String) engine=MergeTree order by type; +create table target2(type String) engine=MergeTree order by type; + +set max_execution_time=2; +-- we should not execute scalar subquery here +create materialized view vm_target2 to target2 as select * from source where type='two' and (select sum(sleepEachRow(0.1)) from numbers(30)); diff --git a/tests/queries/0_stateless/02999_ulid_short_circuit.reference b/tests/queries/0_stateless/02999_ulid_short_circuit.reference new file mode 100644 index 00000000000..51460c40e48 --- /dev/null +++ b/tests/queries/0_stateless/02999_ulid_short_circuit.reference @@ -0,0 +1,2 @@ +2024-02-20 16:53:57.105 +2024-02-21 12:00:00.000 diff --git a/tests/queries/0_stateless/02999_ulid_short_circuit.sql b/tests/queries/0_stateless/02999_ulid_short_circuit.sql new file mode 100644 index 00000000000..4453d9dbe47 --- /dev/null +++ b/tests/queries/0_stateless/02999_ulid_short_circuit.sql @@ -0,0 +1,5 @@ +-- Tags: no-fasttest + +SET session_timezone='Europe/Madrid'; -- disable time zone randomization in CI +SELECT if(length(x) = 26, ULIDStringToDateTime(x, 'Europe/Madrid'), toDateTime('2024-02-21 12:00:00', 'Europe/Madrid')) AS datetime +FROM values('x String', '01HQ3KJJKHRWP357YVYBX32WHY', '01HQ3KJJKH') diff --git a/tests/queries/0_stateless/02999_variant_suspicious_types.reference b/tests/queries/0_stateless/02999_variant_suspicious_types.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02999_variant_suspicious_types.sql b/tests/queries/0_stateless/02999_variant_suspicious_types.sql new file mode 100644 index 00000000000..8cdbfc13adb --- /dev/null +++ b/tests/queries/0_stateless/02999_variant_suspicious_types.sql @@ -0,0 +1,7 @@ +set allow_suspicious_variant_types=0; +select 42::Variant(UInt32, Int64); -- {serverError ILLEGAL_COLUMN} +select [42]::Variant(Array(UInt32), Array(Int64)); -- {serverError ILLEGAL_COLUMN} +select 'Hello'::Variant(String, LowCardinality(String)); -- {serverError ILLEGAL_COLUMN} +select (1, 'Hello')::Variant(Tuple(UInt32, String), Tuple(Int64, String)); -- {serverError ILLEGAL_COLUMN} +select map(42, 42)::Variant(Map(UInt64, UInt32), Map(UInt64, Int64)); -- {serverError ILLEGAL_COLUMN} + diff --git a/tests/queries/0_stateless/03000_minmax_index_first.reference b/tests/queries/0_stateless/03000_minmax_index_first.reference new file mode 100644 index 00000000000..7cf792d8ed4 --- /dev/null +++ b/tests/queries/0_stateless/03000_minmax_index_first.reference @@ -0,0 +1,2 @@ +Name: v_mm +Name: v_set diff --git a/tests/queries/0_stateless/03000_minmax_index_first.sql b/tests/queries/0_stateless/03000_minmax_index_first.sql new file mode 100644 index 00000000000..d978e6ea8fa --- /dev/null +++ b/tests/queries/0_stateless/03000_minmax_index_first.sql @@ -0,0 +1,18 @@ +DROP TABLE IF EXISTS skip_table; + +CREATE TABLE skip_table +( + k UInt64, + v UInt64, + INDEX v_set v TYPE set(100) GRANULARITY 2, -- set index is declared before minmax intentionally + INDEX v_mm v TYPE minmax GRANULARITY 2 +) +ENGINE = MergeTree +PRIMARY KEY k +SETTINGS index_granularity = 8192; + +INSERT INTO skip_table SELECT number, intDiv(number, 4096) FROM numbers(100000); + +SELECT trim(explain) FROM ( EXPLAIN indexes = 1 SELECT * FROM skip_table WHERE v = 125) WHERE explain like '%Name%'; + +DROP TABLE skip_table; diff --git a/tests/queries/0_stateless/03000_too_big_max_execution_time_setting.reference b/tests/queries/0_stateless/03000_too_big_max_execution_time_setting.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03000_too_big_max_execution_time_setting.sql b/tests/queries/0_stateless/03000_too_big_max_execution_time_setting.sql new file mode 100644 index 00000000000..7aa86891b42 --- /dev/null +++ b/tests/queries/0_stateless/03000_too_big_max_execution_time_setting.sql @@ -0,0 +1,2 @@ +select 1 settings max_execution_time = 9223372036854775808; -- {clientError BAD_ARGUMENTS} + diff --git a/tests/queries/0_stateless/03000_virtual_columns_in_prewhere.reference b/tests/queries/0_stateless/03000_virtual_columns_in_prewhere.reference new file mode 100644 index 00000000000..06b63ea6c2f --- /dev/null +++ b/tests/queries/0_stateless/03000_virtual_columns_in_prewhere.reference @@ -0,0 +1 @@ +0 0 0 diff --git a/tests/queries/0_stateless/03000_virtual_columns_in_prewhere.sql b/tests/queries/0_stateless/03000_virtual_columns_in_prewhere.sql new file mode 100644 index 00000000000..d57db9151b9 --- /dev/null +++ b/tests/queries/0_stateless/03000_virtual_columns_in_prewhere.sql @@ -0,0 +1,10 @@ +drop table if exists x; + +create table x (i int, j int, k int) engine MergeTree order by tuple() settings index_granularity=8192, index_granularity_bytes = '10Mi', min_bytes_for_wide_part=0, min_rows_for_wide_part=0, ratio_of_defaults_for_sparse_serialization=1; + +insert into x select number, number * 2, number * 3 from numbers(100000); + +-- One granule, (_part_offset (8 bytes) + (4 bytes)) * 8192 + (8 bytes) * 1 = 98312 +select * from x prewhere _part_offset = 0 settings max_bytes_to_read = 98312; + +drop table x; diff --git a/tests/queries/0_stateless/03001_analyzer_nullable_nothing.reference b/tests/queries/0_stateless/03001_analyzer_nullable_nothing.reference new file mode 100644 index 00000000000..2ad74d50ebd --- /dev/null +++ b/tests/queries/0_stateless/03001_analyzer_nullable_nothing.reference @@ -0,0 +1 @@ +0 \N diff --git a/tests/queries/0_stateless/03001_analyzer_nullable_nothing.sql b/tests/queries/0_stateless/03001_analyzer_nullable_nothing.sql new file mode 100644 index 00000000000..32c378ebf0a --- /dev/null +++ b/tests/queries/0_stateless/03001_analyzer_nullable_nothing.sql @@ -0,0 +1,6 @@ +--https://github.com/ClickHouse/ClickHouse/issues/58906 +SELECT + count(_CAST(NULL, 'Nullable(Nothing)')), + round(avg(_CAST(NULL, 'Nullable(Nothing)'))) AS k +FROM numbers(256) + SETTINGS allow_experimental_analyzer = 1; diff --git a/tests/queries/0_stateless/03001_bad_error_message_higher_order_functions.reference b/tests/queries/0_stateless/03001_bad_error_message_higher_order_functions.reference new file mode 100644 index 00000000000..d4e027274e2 --- /dev/null +++ b/tests/queries/0_stateless/03001_bad_error_message_higher_order_functions.reference @@ -0,0 +1 @@ +Argument 3 has size 2 which differs with the size of another argument, 3 diff --git a/tests/queries/0_stateless/03001_bad_error_message_higher_order_functions.sh b/tests/queries/0_stateless/03001_bad_error_message_higher_order_functions.sh new file mode 100755 index 00000000000..967453fd375 --- /dev/null +++ b/tests/queries/0_stateless/03001_bad_error_message_higher_order_functions.sh @@ -0,0 +1,7 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +${CLICKHOUSE_CLIENT} --query "SELECT arrayMap((x,y) -> x + y, [1,2,3], [1,2])" 2>&1 | grep -o -F --max-count 1 'Argument 3 has size 2 which differs with the size of another argument, 3' \ No newline at end of file diff --git a/tests/queries/0_stateless/03001_insert_threads_deduplication.reference b/tests/queries/0_stateless/03001_insert_threads_deduplication.reference new file mode 100644 index 00000000000..b6d6006f84c --- /dev/null +++ b/tests/queries/0_stateless/03001_insert_threads_deduplication.reference @@ -0,0 +1,4 @@ +3 +6 +12 +18 diff --git a/tests/queries/0_stateless/03001_insert_threads_deduplication.sql b/tests/queries/0_stateless/03001_insert_threads_deduplication.sql new file mode 100644 index 00000000000..093d2b3185d --- /dev/null +++ b/tests/queries/0_stateless/03001_insert_threads_deduplication.sql @@ -0,0 +1,69 @@ +-- Tags: distributed + +DROP TABLE IF EXISTS landing SYNC; +DROP TABLE IF EXISTS landing_dist SYNC; +DROP TABLE IF EXISTS ds SYNC; + +CREATE TABLE landing +( + timestamp DateTime64(3), + status String, + id String +) +ENGINE = MergeTree() +ORDER BY timestamp; + +CREATE TABLE landing_dist +( + timestamp DateTime64(3), + status String, + id String +) +ENGINE = Distributed('test_cluster_two_shards', currentDatabase(), 'landing', rand()); + +SYSTEM STOP MERGES landing; -- Stopping merges to force 3 parts + +INSERT INTO landing (status, id, timestamp) SELECT * FROM generateRandom() LIMIT 1; +INSERT INTO landing (status, id, timestamp) SELECT * FROM generateRandom() LIMIT 1; +INSERT INTO landing (status, id, timestamp) SELECT * FROM generateRandom() LIMIT 1; + +CREATE TABLE ds +( + timestamp DateTime64(3), + status String, + id String +) +ENGINE = MergeTree() +ORDER BY timestamp +SETTINGS non_replicated_deduplication_window=1000; + +INSERT INTO ds SELECT * FROM landing +SETTINGS insert_deduplicate=1, insert_deduplication_token='token1', + max_insert_threads=5; + +SELECT count() FROM ds; + +INSERT INTO ds SELECT * FROM landing +SETTINGS insert_deduplicate=1, insert_deduplication_token='token2', + max_insert_threads=1; + +SELECT count() FROM ds; + +-- When reading from distributed table, 6 rows are going to be retrieved +-- due to the being using the two shards cluster + +INSERT INTO ds SELECT * FROM landing_dist +SETTINGS insert_deduplicate=1, insert_deduplication_token='token3', + max_insert_threads=5; + +SELECT count() FROM ds; + +INSERT INTO ds SELECT * FROM landing_dist +SETTINGS insert_deduplicate=1, insert_deduplication_token='token4', + max_insert_threads=1; + +SELECT count() FROM ds; + +DROP TABLE IF EXISTS landing SYNC; +DROP TABLE IF EXISTS landing_dist SYNC; +DROP TABLE IF EXISTS ds SYNC; diff --git a/tests/queries/0_stateless/03001_parallel_parsing_deadlock.reference b/tests/queries/0_stateless/03001_parallel_parsing_deadlock.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03001_parallel_parsing_deadlock.sh b/tests/queries/0_stateless/03001_parallel_parsing_deadlock.sh new file mode 100755 index 00000000000..6cd5c3b486c --- /dev/null +++ b/tests/queries/0_stateless/03001_parallel_parsing_deadlock.sh @@ -0,0 +1,11 @@ +#!/usr/bin/env bash +# Tags: no-fasttest, no-cpu-aarch64 + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +DATA_FILE=$CLICKHOUSE_TEST_UNIQUE_NAME.csv +$CLICKHOUSE_LOCAL -q "select number > 1000000 ? 'error' : toString(number) from numbers(2000000) format CSV" > $DATA_FILE +$CLICKHOUSE_LOCAL -q "select * from file($DATA_FILE, CSV, 'x UInt64') format Null settings input_format_allow_errors_ratio=1" +rm $DATA_FILE diff --git a/tests/queries/0_stateless/03002_analyzer_prewhere.reference b/tests/queries/0_stateless/03002_analyzer_prewhere.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03002_analyzer_prewhere.sql b/tests/queries/0_stateless/03002_analyzer_prewhere.sql new file mode 100644 index 00000000000..0edf16f1cbe --- /dev/null +++ b/tests/queries/0_stateless/03002_analyzer_prewhere.sql @@ -0,0 +1,12 @@ +SET max_threads = 16, receive_timeout = 10., receive_data_timeout_ms = 10000, allow_suspicious_low_cardinality_types = true, enable_positional_arguments = false, log_queries = true, table_function_remote_max_addresses = 200, any_join_distinct_right_table_keys = true, joined_subquery_requires_alias = false, allow_experimental_analyzer = true, max_execution_time = 10., max_memory_usage = 10000000000, log_comment = '/workspace/ch/tests/queries/0_stateless/01710_projection_in_index.sql', send_logs_level = 'fatal', enable_optimize_predicate_expression = false, prefer_localhost_replica = true, allow_introspection_functions = true, optimize_functions_to_subcolumns = false, transform_null_in = true, optimize_use_projections = true, allow_deprecated_syntax_for_merge_tree = true, parallelize_output_from_storages = false; + +CREATE TABLE t__fuzz_0 (`i` Int32, `j` Nullable(Int32), `k` Int32, PROJECTION p (SELECT * ORDER BY j)) ENGINE = MergeTree ORDER BY i SETTINGS index_granularity = 1, allow_nullable_key=1; + +INSERT INTO t__fuzz_0 SELECT * FROM generateRandom() LIMIT 3; +INSERT INTO t__fuzz_0 SELECT * FROM generateRandom() LIMIT 3; +INSERT INTO t__fuzz_0 SELECT * FROM generateRandom() LIMIT 3; +INSERT INTO t__fuzz_0 SELECT * FROM generateRandom() LIMIT 3; +INSERT INTO t__fuzz_0 SELECT * FROM generateRandom() LIMIT 3; + +SELECT * FROM t__fuzz_0 PREWHERE (i < 5) AND (j IN (1, 2)) WHERE i < 5; +DROP TABLE t__fuzz_0; diff --git a/tests/queries/0_stateless/03002_filter_skip_virtual_columns_with_non_deterministic_functions.reference b/tests/queries/0_stateless/03002_filter_skip_virtual_columns_with_non_deterministic_functions.reference new file mode 100644 index 00000000000..4c9646d6ffa --- /dev/null +++ b/tests/queries/0_stateless/03002_filter_skip_virtual_columns_with_non_deterministic_functions.reference @@ -0,0 +1,11 @@ +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +1 diff --git a/tests/queries/0_stateless/03002_filter_skip_virtual_columns_with_non_deterministic_functions.sql b/tests/queries/0_stateless/03002_filter_skip_virtual_columns_with_non_deterministic_functions.sql new file mode 100644 index 00000000000..9f8bc6bd3d7 --- /dev/null +++ b/tests/queries/0_stateless/03002_filter_skip_virtual_columns_with_non_deterministic_functions.sql @@ -0,0 +1,6 @@ +create table test (number UInt64) engine=MergeTree order by number; +insert into test select * from numbers(100000000); +select ignore(number) from test where RAND() > 4292390314 limit 10; +select count() > 0 from test where RAND() > 4292390314; +drop table test; + diff --git a/tests/queries/0_stateless/03002_int_div_decimal_with_date_bug.reference b/tests/queries/0_stateless/03002_int_div_decimal_with_date_bug.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03002_int_div_decimal_with_date_bug.sql b/tests/queries/0_stateless/03002_int_div_decimal_with_date_bug.sql new file mode 100644 index 00000000000..1668821200c --- /dev/null +++ b/tests/queries/0_stateless/03002_int_div_decimal_with_date_bug.sql @@ -0,0 +1,5 @@ +SELECT intDiv(CAST('1.0', 'Decimal256(3)'), today()); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT intDiv(CAST('1.0', 'Decimal256(3)'), toDate('2023-01-02')); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT intDiv(CAST('1.0', 'Decimal256(2)'), toDate32('2023-01-02 12:12:12')); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT intDiv(CAST('1.0', 'Decimal256(2)'), toDateTime('2023-01-02 12:12:12')); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT intDiv(CAST('1.0', 'Decimal256(2)'), toDateTime64('2023-01-02 12:12:12.002', 3)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } diff --git a/tests/queries/0_stateless/03002_map_array_functions_with_low_cardinality.reference b/tests/queries/0_stateless/03002_map_array_functions_with_low_cardinality.reference new file mode 100644 index 00000000000..d00491fd7e5 --- /dev/null +++ b/tests/queries/0_stateless/03002_map_array_functions_with_low_cardinality.reference @@ -0,0 +1 @@ +1 diff --git a/tests/queries/0_stateless/03002_map_array_functions_with_low_cardinality.sql b/tests/queries/0_stateless/03002_map_array_functions_with_low_cardinality.sql new file mode 100644 index 00000000000..8820a433da8 --- /dev/null +++ b/tests/queries/0_stateless/03002_map_array_functions_with_low_cardinality.sql @@ -0,0 +1 @@ +SELECT mapContainsKeyLike(map('aa', toLowCardinality(1), 'bb', toLowCardinality(2)), toLowCardinality('a%')); diff --git a/tests/queries/0_stateless/03003_analyzer_setting.reference b/tests/queries/0_stateless/03003_analyzer_setting.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03003_analyzer_setting.sql b/tests/queries/0_stateless/03003_analyzer_setting.sql new file mode 100644 index 00000000000..2e5cab71277 --- /dev/null +++ b/tests/queries/0_stateless/03003_analyzer_setting.sql @@ -0,0 +1,9 @@ +CREATE TABLE test (dummy Int8) ENGINE = Distributed(test_cluster_two_shards, 'system', 'one'); + +SET allow_experimental_analyzer = 0; + +SELECT * FROM (SELECT * FROM test SETTINGS allow_experimental_analyzer = 1); -- { serverError INCORRECT_QUERY } + +SET allow_experimental_analyzer = 1; + +SELECT * FROM (SELECT * FROM test SETTINGS allow_experimental_analyzer = 0); -- { serverError INCORRECT_QUERY } diff --git a/tests/queries/0_stateless/03003_arrayEnumerate_crash.reference b/tests/queries/0_stateless/03003_arrayEnumerate_crash.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03003_arrayEnumerate_crash.sql b/tests/queries/0_stateless/03003_arrayEnumerate_crash.sql new file mode 100644 index 00000000000..21102ddbb6a --- /dev/null +++ b/tests/queries/0_stateless/03003_arrayEnumerate_crash.sql @@ -0,0 +1,2 @@ +SELECT arrayEnumerateUniqRanked(arrayEnumerateUniqRanked([[1, 2, 3], [2, 2, 1], [3]]), materialize(1 AS x) OR toLowCardinality(-9223372036854775808)); -- { serverError BAD_ARGUMENTS } +SELECT arrayEnumerateUniqRanked([[1, 2, 3], [2, 2, 1], [3]], number) FROM numbers(10); -- { serverError BAD_ARGUMENTS } diff --git a/tests/queries/0_stateless/03003_codec_multiple_buffer_overflow.reference b/tests/queries/0_stateless/03003_codec_multiple_buffer_overflow.reference new file mode 100644 index 00000000000..93d120dac01 --- /dev/null +++ b/tests/queries/0_stateless/03003_codec_multiple_buffer_overflow.reference @@ -0,0 +1,2 @@ +Too large +Wrong data diff --git a/tests/queries/0_stateless/03003_codec_multiple_buffer_overflow.sh b/tests/queries/0_stateless/03003_codec_multiple_buffer_overflow.sh new file mode 100755 index 00000000000..93290f62c58 --- /dev/null +++ b/tests/queries/0_stateless/03003_codec_multiple_buffer_overflow.sh @@ -0,0 +1,11 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +echo -ne 'checksumchecksum\x91\xa4\x0a\x00\x00\x41\x00\x00\x20\x41\x41\x41\x40\x41\x00\x41\x41\x41\x41\x40\x41\x00\x00\x00\x00\x00\x0c\x00\x20\x41\x41\xbe\x22\x41\x41\x41\x41\x41\x00\x00\x00\x00\x00\x01\xfe\x7f\x00\x00\x41\x00\x00\x00\x41\x92\x6b\x00\x41\x41\x0b\x00\x00\x00\x00\x00\x41\x92\x6b\x00\x41\x41\x0b\x00\x00\x82\x82\x82\x82\x63\x82\xff\xff\xff\xff\xff\xff\xff\xff\x95\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x41\x41\x41\x41\x41\x41\x41\x41\x41\x40\x08\x08\x08\x08\x08\x08\x00\x06\x00\x00\x00\x08\x00\x20\x00\x00\xef\xff\xff\xff\xe1\x40\x26\x41\x00\x1d\x01\x00\x00\x41\x42\x0b\xff\xff\xff\xe4\x41\x41\x4e\x41\x41\x06\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x7e\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x00\x04\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x9c\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x4f\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x6c\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\xa9\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x4f\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x6c\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x9b\x8f\x8f\x8f\x20\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f' | + ${CLICKHOUSE_CURL} "${CLICKHOUSE_URL}&decompress=1&http_native_compression_disable_checksumming_on_decompress=1" --data-binary @- | grep -o -F 'Too large' + +echo -ne 'checksumchecksum\x91\x2b\x01\x00\x00\xbe\xe1\x02\x02\x02\x02\x02\x02\x02\x02\x02\x02\x02\x02\x02\x02\x02\x02\x02\x02\x02\x02\x02\x02\x02\x02\x02\x02\x02\x02\x00\x04\x04\x04\x00\x08\x7f\x00\x01\x06\x82\x82\x82\x82\x82\x82\x82\x80\x41\x41\x41\x41\x41\x9a\x75\x6d\x63\x68\x65\x63\x6b\x73\x6d\x63\x68\x65\x63\x6b\x73\x75\x00\x00\x00\x41\x41\x41\x42\x64\x41\x41\x41\x0c\x00\x1c\x41\x41\xbe\x22\x41\x41\x00\x00\x00\x00\x11\x00\x41\x41\x75\x00\x00\x00\x41\x41\x41\x42\x64\x41\x41\x41\x0c\x00\x20\x41\x41\xbe\x22\x41\x41\x41\x41\x41\x00\x00\x00\x00\x00\x01\x14\xff\x7f\x00\x41\x00\x00\x00\x00\x00\x00\x41\x41\x75\x00\x00\x00\x41\x41\x41\x42\x64\x41\x61\x41\x0c\x00\x20\x41\x41\xbe\x22\x41\x41\x41\x00\x41\x14\x14\x41\x14\x14\x14\x14\x14\x14\x14\x14\x14\x14\x14\x0f\x0f\x0f\x0f\x0f\x41\x41\x41\x41\x64\x00\x30\x00\xcf\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x41\x41\x0b\x00\x00\x00\x41\x41\x41\xe8\x1f\xe1\x00\x01\x00\xff\x00\x41\x41\xbf\x41\x41\x40\x40\xe1\x00\x00\x00\x00\x1a\x00\x20\x00\x00\x00\x41\x00\x00\x00\x42\x64\x41\x41\x41\x0c\x00\x1c\x41\x41\xbe\x22\x41\x41\x00\x00\x00\x00\x00\x00\x41\x41\x75\x00\x00\x00\x41\x41\x41\x42\x64\x00\x00\x0b\x00\xe6\xff\x00\x00\x00\x00\x00' | + ${CLICKHOUSE_CURL} "${CLICKHOUSE_URL}&decompress=1&http_native_compression_disable_checksumming_on_decompress=1" --data-binary @- | grep -o -F 'Wrong data' diff --git a/tests/queries/0_stateless/03003_compatibility_setting_bad_value.reference b/tests/queries/0_stateless/03003_compatibility_setting_bad_value.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03003_compatibility_setting_bad_value.sql b/tests/queries/0_stateless/03003_compatibility_setting_bad_value.sql new file mode 100644 index 00000000000..48e98798c51 --- /dev/null +++ b/tests/queries/0_stateless/03003_compatibility_setting_bad_value.sql @@ -0,0 +1,2 @@ +select 42 settings compatibility=NULL; -- {clientError BAD_ARGUMENTS} + diff --git a/tests/queries/0_stateless/03003_database_filesystem_format_detection.reference b/tests/queries/0_stateless/03003_database_filesystem_format_detection.reference new file mode 100644 index 00000000000..87d15e83593 --- /dev/null +++ b/tests/queries/0_stateless/03003_database_filesystem_format_detection.reference @@ -0,0 +1,2 @@ +a Nullable(Int64) +42 diff --git a/tests/queries/0_stateless/03003_database_filesystem_format_detection.sh b/tests/queries/0_stateless/03003_database_filesystem_format_detection.sh new file mode 100755 index 00000000000..8b476a1f82e --- /dev/null +++ b/tests/queries/0_stateless/03003_database_filesystem_format_detection.sh @@ -0,0 +1,15 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + + +DATA_FILE=$CLICKHOUSE_TEST_UNIQUE_NAME.data + +echo '{"a" : 42}' > $DATA_FILE +$CLICKHOUSE_LOCAL -q "desc table \`$DATA_FILE\`" +$CLICKHOUSE_LOCAL -q "select * from \`$DATA_FILE\`" + +rm $DATA_FILE + diff --git a/tests/queries/0_stateless/03003_enum_and_string_compatible.reference b/tests/queries/0_stateless/03003_enum_and_string_compatible.reference new file mode 100644 index 00000000000..acf5fe0d423 --- /dev/null +++ b/tests/queries/0_stateless/03003_enum_and_string_compatible.reference @@ -0,0 +1 @@ +['Hello','Goodbye','test'] diff --git a/tests/queries/0_stateless/03003_enum_and_string_compatible.sql b/tests/queries/0_stateless/03003_enum_and_string_compatible.sql new file mode 100644 index 00000000000..0abba6741ac --- /dev/null +++ b/tests/queries/0_stateless/03003_enum_and_string_compatible.sql @@ -0,0 +1 @@ +WITH 'Hello'::Enum8('Hello', 'World') AS enum1, 'test'::Enum8('test', 'best') AS enum2 SELECT [enum1, 'Goodbye', enum2]; diff --git a/tests/queries/0_stateless/03003_prql_panic.reference b/tests/queries/0_stateless/03003_prql_panic.reference new file mode 100644 index 00000000000..8e0782c44f2 --- /dev/null +++ b/tests/queries/0_stateless/03003_prql_panic.reference @@ -0,0 +1 @@ +SYNTAX_ERROR diff --git a/tests/queries/0_stateless/03003_prql_panic.sh b/tests/queries/0_stateless/03003_prql_panic.sh new file mode 100755 index 00000000000..01d7beed99b --- /dev/null +++ b/tests/queries/0_stateless/03003_prql_panic.sh @@ -0,0 +1,13 @@ +#!/usr/bin/env bash +# Tags: no-fasttest +# Requires Rust, which is not built for Fast Test. + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +# Before [1] this causes a panic, but it will be fixed soon, so do not check +# for panic, but just for SYNTAX_ERROR. +# +# [1]: https://github.com/PRQL/prql/pull/4285 +$CLICKHOUSE_CLIENT --dialect prql -q "SELECT id FROM distributed_test_table GROUP BY x -> concat(concat(materialize(toNullable(NULL)))) LIMIT 3" |& grep -o -m1 SYNTAX_ERROR diff --git a/tests/queries/0_stateless/03003_sql_json_nonsense.reference b/tests/queries/0_stateless/03003_sql_json_nonsense.reference new file mode 100644 index 00000000000..8b137891791 --- /dev/null +++ b/tests/queries/0_stateless/03003_sql_json_nonsense.reference @@ -0,0 +1 @@ + diff --git a/tests/queries/0_stateless/03003_sql_json_nonsense.sql b/tests/queries/0_stateless/03003_sql_json_nonsense.sql new file mode 100644 index 00000000000..9b7beb42cf3 --- /dev/null +++ b/tests/queries/0_stateless/03003_sql_json_nonsense.sql @@ -0,0 +1 @@ +SELECT JSON_QUERY('{"x":1}', '$[\'hello\']', materialize(toLowCardinality('x'))); diff --git a/tests/queries/0_stateless/03004_json_named_tuples_inference_ambiguous_paths_as_string.reference b/tests/queries/0_stateless/03004_json_named_tuples_inference_ambiguous_paths_as_string.reference new file mode 100644 index 00000000000..0318b136ade --- /dev/null +++ b/tests/queries/0_stateless/03004_json_named_tuples_inference_ambiguous_paths_as_string.reference @@ -0,0 +1,3 @@ +obj Tuple(\n a Nullable(String)) +('42') +('{"b" : 42}') diff --git a/tests/queries/0_stateless/03004_json_named_tuples_inference_ambiguous_paths_as_string.sql b/tests/queries/0_stateless/03004_json_named_tuples_inference_ambiguous_paths_as_string.sql new file mode 100644 index 00000000000..4b986c94868 --- /dev/null +++ b/tests/queries/0_stateless/03004_json_named_tuples_inference_ambiguous_paths_as_string.sql @@ -0,0 +1,4 @@ +set input_format_json_use_string_type_for_ambiguous_paths_in_named_tuples_inference_from_objects=1; +desc format(JSONEachRow, '{"obj" : {"a" : 42}}, {"obj" : {"a" : {"b" : 42}}}'); +select * from format(JSONEachRow, '{"obj" : {"a" : 42}}, {"obj" : {"a" : {"b" : 42}}}'); + diff --git a/tests/queries/0_stateless/03005_input_function_in_join.reference b/tests/queries/0_stateless/03005_input_function_in_join.reference new file mode 100644 index 00000000000..0cfbf08886f --- /dev/null +++ b/tests/queries/0_stateless/03005_input_function_in_join.reference @@ -0,0 +1 @@ +2 diff --git a/tests/queries/0_stateless/03005_input_function_in_join.sql b/tests/queries/0_stateless/03005_input_function_in_join.sql new file mode 100644 index 00000000000..8a6b4a48a8d --- /dev/null +++ b/tests/queries/0_stateless/03005_input_function_in_join.sql @@ -0,0 +1,14 @@ +drop table if exists test; +create table test (a Int8) engine = MergeTree order by tuple(); +INSERT INTO test +SELECT x.number FROM ( + SELECT number + FROM system.numbers + LIMIT 10 +) AS x +INNER JOIN input('a UInt64') AS y ON x.number = y.a +Format CSV 2 +; +select * from test; +drop table test; + diff --git a/tests/queries/0_stateless/03006_analyzer_executable_table_function.reference b/tests/queries/0_stateless/03006_analyzer_executable_table_function.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03006_analyzer_executable_table_function.sql b/tests/queries/0_stateless/03006_analyzer_executable_table_function.sql new file mode 100644 index 00000000000..4edbcc97f50 --- /dev/null +++ b/tests/queries/0_stateless/03006_analyzer_executable_table_function.sql @@ -0,0 +1,4 @@ +SELECT + toFixedString(toFixedString(toLowCardinality(toFixedString('--------------------', toNullable(20))), toLowCardinality(20)), 20), + * +FROM executable('data String', SETTINGS max_command_execution_time = 100); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH} diff --git a/tests/queries/0_stateless/03006_async_insert_deadlock_log.reference b/tests/queries/0_stateless/03006_async_insert_deadlock_log.reference new file mode 100644 index 00000000000..d00491fd7e5 --- /dev/null +++ b/tests/queries/0_stateless/03006_async_insert_deadlock_log.reference @@ -0,0 +1 @@ +1 diff --git a/tests/queries/0_stateless/03006_async_insert_deadlock_log.sh b/tests/queries/0_stateless/03006_async_insert_deadlock_log.sh new file mode 100755 index 00000000000..f01c34cdbda --- /dev/null +++ b/tests/queries/0_stateless/03006_async_insert_deadlock_log.sh @@ -0,0 +1,12 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT --query "CREATE TABLE t_async_insert_deadlock (a UInt64) ENGINE = Log" + +echo '{"a": 1}' | $CLICKHOUSE_CLIENT --async_insert 1 --wait_for_async_insert 1 --query "INSERT INTO t_async_insert_deadlock FORMAT JSONEachRow" + +$CLICKHOUSE_CLIENT --query "SELECT * FROM t_async_insert_deadlock ORDER BY a" +$CLICKHOUSE_CLIENT --query "DROP TABLE t_async_insert_deadlock" diff --git a/tests/queries/0_stateless/03006_buffer_overflow_join.reference b/tests/queries/0_stateless/03006_buffer_overflow_join.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03006_buffer_overflow_join.sql b/tests/queries/0_stateless/03006_buffer_overflow_join.sql new file mode 100644 index 00000000000..8c1fa3cecc0 --- /dev/null +++ b/tests/queries/0_stateless/03006_buffer_overflow_join.sql @@ -0,0 +1,6 @@ +CREATE TABLE 03006_buffer_overflow_l (`a` String, `b` Tuple(String, String)) ENGINE = Memory; +INSERT INTO 03006_buffer_overflow_l SELECT * FROM generateRandom() limit 1000; +CREATE TABLE 03006_buffer_overflow_r (`a` LowCardinality(Nullable(String)), `c` Tuple(LowCardinality(String), LowCardinality(String))) ENGINE = Memory; +INSERT INTO 03006_buffer_overflow_r SELECT * FROM generateRandom() limit 1000; + +SELECT a FROM 03006_buffer_overflow_l RIGHT JOIN 03006_buffer_overflow_r USING (a) ORDER BY a ASC NULLS FIRST FORMAT Null; diff --git a/tests/queries/0_stateless/03006_mv_deduplication_throw_if_async_insert.reference b/tests/queries/0_stateless/03006_mv_deduplication_throw_if_async_insert.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03006_mv_deduplication_throw_if_async_insert.sql b/tests/queries/0_stateless/03006_mv_deduplication_throw_if_async_insert.sql new file mode 100644 index 00000000000..808317c917e --- /dev/null +++ b/tests/queries/0_stateless/03006_mv_deduplication_throw_if_async_insert.sql @@ -0,0 +1,19 @@ +DROP TABLE IF EXISTS 02985_test; + +SET async_insert = 1; +SET deduplicate_blocks_in_dependent_materialized_views = 1; + +CREATE TABLE 03006_test +( + d Date, + value UInt64 +) +ENGINE = MergeTree ORDER BY tuple(); + +INSERT INTO 03006_test VALUES ('2024-03-05', 1), ('2024-03-05', 2), ('2024-03-05', 1); -- { serverError SUPPORT_IS_DISABLED } +INSERT INTO 03006_test SETTINGS compatibility='24.1' VALUES ('2024-03-05', 1), ('2024-03-05', 2), ('2024-03-05', 1); +INSERT INTO 03006_test SETTINGS async_insert=0 VALUES ('2024-03-05', 1), ('2024-03-05', 2), ('2024-03-05', 1); +INSERT INTO 03006_test SETTINGS deduplicate_blocks_in_dependent_materialized_views=0 VALUES ('2024-03-05', 1), ('2024-03-05', 2), ('2024-03-05', 1); +INSERT INTO 03006_test SETTINGS throw_if_deduplication_in_dependent_materialized_views_enabled_with_async_insert=0 VALUES ('2024-03-05', 1), ('2024-03-05', 2), ('2024-03-05', 1); + +DROP TABLE IF EXISTS 02985_test; diff --git a/tests/queries/0_stateless/03006_parallel_replicas_prewhere.reference b/tests/queries/0_stateless/03006_parallel_replicas_prewhere.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03006_parallel_replicas_prewhere.sql b/tests/queries/0_stateless/03006_parallel_replicas_prewhere.sql new file mode 100644 index 00000000000..4b84646c034 --- /dev/null +++ b/tests/queries/0_stateless/03006_parallel_replicas_prewhere.sql @@ -0,0 +1,31 @@ +DROP POLICY IF EXISTS url_na_log_policy0 ON url_na_log; +DROP TABLE IF EXISTS url_na_log; + +CREATE TABLE url_na_log +( + `SiteId` UInt32, + `DateVisit` Date +) +ENGINE = MergeTree +PRIMARY KEY SiteId +ORDER BY (SiteId, DateVisit) +SETTINGS index_granularity = 1000, min_bytes_for_wide_part = 0; + +CREATE ROW POLICY url_na_log_policy0 ON url_na_log FOR SELECT USING (DateVisit < '2022-08-11') OR (DateVisit > '2022-08-19') TO default; + +INSERT INTO url_na_log +SETTINGS max_insert_block_size = 200000 +SELECT + 209, + CAST('2022-08-09', 'Date') + toIntervalDay(intDiv(number, 10000)) +FROM numbers(130000) +SETTINGS max_insert_block_size = 200000; + +EXPLAIN ESTIMATE +SELECT count() +FROM url_na_log +PREWHERE (DateVisit >= toFixedString('2022-08-10', 10)) AND (DateVisit <= '2022-08-20') +SETTINGS max_block_size = 1048576, max_threads = 1, allow_experimental_parallel_reading_from_replicas = 1, parallel_replicas_for_non_replicated_merge_tree = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', max_parallel_replicas = 3, parallel_replicas_min_number_of_rows_per_replica=10000; + +DROP POLICY url_na_log_policy0 ON url_na_log; +DROP TABLE url_na_log; diff --git a/tests/queries/0_stateless/03007_column_nullable_uninitialzed_value.reference b/tests/queries/0_stateless/03007_column_nullable_uninitialzed_value.reference new file mode 100644 index 00000000000..bead7ee1474 --- /dev/null +++ b/tests/queries/0_stateless/03007_column_nullable_uninitialzed_value.reference @@ -0,0 +1,3 @@ +\N 1000 + +\N 1000 diff --git a/tests/queries/0_stateless/03007_column_nullable_uninitialzed_value.sql b/tests/queries/0_stateless/03007_column_nullable_uninitialzed_value.sql new file mode 100644 index 00000000000..9479044e0e0 --- /dev/null +++ b/tests/queries/0_stateless/03007_column_nullable_uninitialzed_value.sql @@ -0,0 +1 @@ +SELECT count(NULL) IGNORE NULLS > avg(toDecimal32(NULL)) IGNORE NULLS, count() FROM numbers(1000) WITH TOTALS SETTINGS allow_experimental_analyzer = 1; diff --git a/tests/queries/0_stateless/03008_filter_projections_non_deterministoc_functions.reference b/tests/queries/0_stateless/03008_filter_projections_non_deterministoc_functions.reference new file mode 100644 index 00000000000..8233925d609 --- /dev/null +++ b/tests/queries/0_stateless/03008_filter_projections_non_deterministoc_functions.reference @@ -0,0 +1,55 @@ +-- count +100000 all_10_10_0 +100000 all_1_1_0 +100000 all_2_2_0 +100000 all_3_3_0 +100000 all_4_4_0 +100000 all_5_5_0 +100000 all_6_6_0 +100000 all_7_7_0 +100000 all_8_8_0 +100000 all_9_9_0 +-- rand()%2=0: +1 all_10_10_0 +1 all_1_1_0 +1 all_2_2_0 +1 all_3_3_0 +1 all_4_4_0 +1 all_5_5_0 +1 all_6_6_0 +1 all_7_7_0 +1 all_8_8_0 +1 all_9_9_0 +-- optimize_use_implicit_projections=0 +1 all_10_10_0 +1 all_1_1_0 +1 all_2_2_0 +1 all_3_3_0 +1 all_4_4_0 +1 all_5_5_0 +1 all_6_6_0 +1 all_7_7_0 +1 all_8_8_0 +1 all_9_9_0 +-- optimize_trivial_count_query=0 +1 all_10_10_0 +1 all_1_1_0 +1 all_2_2_0 +1 all_3_3_0 +1 all_4_4_0 +1 all_5_5_0 +1 all_6_6_0 +1 all_7_7_0 +1 all_8_8_0 +1 all_9_9_0 +-- optimize_trivial_count_query=0, optimize_use_implicit_projections=0 +1 all_10_10_0 +1 all_1_1_0 +1 all_2_2_0 +1 all_3_3_0 +1 all_4_4_0 +1 all_5_5_0 +1 all_6_6_0 +1 all_7_7_0 +1 all_8_8_0 +1 all_9_9_0 diff --git a/tests/queries/0_stateless/03008_filter_projections_non_deterministoc_functions.sql b/tests/queries/0_stateless/03008_filter_projections_non_deterministoc_functions.sql new file mode 100644 index 00000000000..3be9bc3982a --- /dev/null +++ b/tests/queries/0_stateless/03008_filter_projections_non_deterministoc_functions.sql @@ -0,0 +1,28 @@ +create table test (number UInt64) engine=MergeTree order by number; +system stop merges test; +INSERT INTO test select number from numbers(100000); +INSERT INTO test select number from numbers(100000); +INSERT INTO test select number from numbers(100000); +INSERT INTO test select number from numbers(100000); +INSERT INTO test select number from numbers(100000); +INSERT INTO test select number from numbers(100000); +INSERT INTO test select number from numbers(100000); +INSERT INTO test select number from numbers(100000); +INSERT INTO test select number from numbers(100000); +INSERT INTO test select number from numbers(100000); + +select '-- count'; +SELECT count(), _part FROM test GROUP BY _part ORDER BY _part; + +select '-- rand()%2=0:'; +SELECT count() > 0 AND count() < 100000, _part FROM test WHERE rand(1)%2=1 GROUP BY _part ORDER BY _part; + +select '-- optimize_use_implicit_projections=0'; +SELECT count() > 0 AND count() < 100000, _part FROM test WHERE rand(2)%2=1 GROUP BY _part ORDER BY _part settings optimize_use_implicit_projections=0; + +select '-- optimize_trivial_count_query=0'; +SELECT count() > 0 AND count() < 100000, _part FROM test WHERE rand(3)%2=1 GROUP BY _part ORDER BY _part settings optimize_trivial_count_query=0; + +select '-- optimize_trivial_count_query=0, optimize_use_implicit_projections=0'; +SELECT count() > 0 AND count() < 100000, _part FROM test WHERE rand(4)%2=1 GROUP BY _part ORDER BY _part settings optimize_trivial_count_query=0,optimize_use_implicit_projections=0; + diff --git a/tests/queries/0_stateless/03008_groupSortedArray_field.reference b/tests/queries/0_stateless/03008_groupSortedArray_field.reference new file mode 100644 index 00000000000..a7f89ebcf58 --- /dev/null +++ b/tests/queries/0_stateless/03008_groupSortedArray_field.reference @@ -0,0 +1,3 @@ +0A01003C79A557B3C43400C4865AA84C3B4B01000650BC18F7DE0B00FAAF43E708213401008ED706EA0A9F13007228F915F5602C0100C692CA8FB81405003A6D357047EB1A01008416B7C3239EE3FF7BE9483CDC61DC01003E133A7C081AF5FFC1ECC583F7E5EA01000000000000000000000000000000000100C4865AA84C3BCBFF3B79A557B3C4B4010024C46EF500F1ECFFDB3B910AFF0ED301005E2FC14EBAEAE5FFA1D03EB14515DA +070109000000010600000001080000000103000000010500000001040000000107000000 AggregateFunction(groupArraySorted(10), Nullable(Decimal(3, 0))) +[3,4,5,6,7,8,9] diff --git a/tests/queries/0_stateless/03008_groupSortedArray_field.sql b/tests/queries/0_stateless/03008_groupSortedArray_field.sql new file mode 100644 index 00000000000..6d2aea641a5 --- /dev/null +++ b/tests/queries/0_stateless/03008_groupSortedArray_field.sql @@ -0,0 +1,6 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/61186 +SELECT hex(CAST(unhex('0A01003C79A557B3C43400C4865AA84C3B4B01000650BC18F7DE0B00FAAF43E708213401008ED706EA0A9F13007228F915F5602C0100C692CA8FB81405003A6D357047EB1A01008416B7C3239EE3FF7BE9483CDC61DC01003E133A7C081AF5FFC1ECC583F7E5EA01000000000000000000000000000000000100C4865AA84C3BCBFF3B79A557B3C4B4010024C46EF500F1ECFFDB3B910AFF0ED301005E2FC14EBAEAE5FFA1D03EB14515DA'), + 'AggregateFunction(groupArraySorted(10), Decimal(38, 38))')); + +Select hex(groupArraySortedState(10)((number < 3 ? NULL : number)::Nullable(Decimal(3))) as t), toTypeName(t) from numbers(10); +Select finalizeAggregation(unhex('070109000000010600000001080000000103000000010500000001040000000107000000')::AggregateFunction(groupArraySorted(10), Nullable(Decimal(3, 0)))); diff --git a/tests/queries/0_stateless/03008_index_small.reference b/tests/queries/0_stateless/03008_index_small.reference new file mode 100644 index 00000000000..a5c8806279f --- /dev/null +++ b/tests/queries/0_stateless/03008_index_small.reference @@ -0,0 +1,2 @@ +3 +3 diff --git a/tests/queries/0_stateless/03008_index_small.sql b/tests/queries/0_stateless/03008_index_small.sql new file mode 100644 index 00000000000..72213ed4fc9 --- /dev/null +++ b/tests/queries/0_stateless/03008_index_small.sql @@ -0,0 +1,19 @@ +DROP TABLE IF EXISTS test; + +CREATE TABLE test (a UInt8, b UInt8) ENGINE = MergeTree ORDER BY (a, b) +SETTINGS index_granularity = 1, primary_key_ratio_of_unique_prefix_values_to_skip_suffix_columns = 0.01; + +SET optimize_move_to_prewhere = 0; + +INSERT INTO test +SELECT number DIV 2, number +FROM numbers(3); + +SELECT count() FROM test WHERE b >= 0; + +DETACH TABLE test; +ATTACH TABLE test; + +SELECT count() FROM test WHERE b >= 0; + +DROP TABLE test; diff --git a/tests/queries/0_stateless/03009_format_show_database.reference b/tests/queries/0_stateless/03009_format_show_database.reference new file mode 100644 index 00000000000..83cfd4c1a68 --- /dev/null +++ b/tests/queries/0_stateless/03009_format_show_database.reference @@ -0,0 +1,2 @@ +CREATE DATABASE default +UNKNOWN_DATABASE diff --git a/tests/queries/0_stateless/03009_format_show_database.sh b/tests/queries/0_stateless/03009_format_show_database.sh new file mode 100755 index 00000000000..7f33ad7b1e1 --- /dev/null +++ b/tests/queries/0_stateless/03009_format_show_database.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +$CLICKHOUSE_LOCAL -q "show database default" +$CLICKHOUSE_LOCAL -q "show database default2" 2>&1 | grep -o 'UNKNOWN_DATABASE' diff --git a/tests/queries/0_stateless/03009_range_dict_get_or_default.reference b/tests/queries/0_stateless/03009_range_dict_get_or_default.reference new file mode 100644 index 00000000000..0cfbf08886f --- /dev/null +++ b/tests/queries/0_stateless/03009_range_dict_get_or_default.reference @@ -0,0 +1 @@ +2 diff --git a/tests/queries/0_stateless/03009_range_dict_get_or_default.sql b/tests/queries/0_stateless/03009_range_dict_get_or_default.sql new file mode 100644 index 00000000000..1f4b4073b9f --- /dev/null +++ b/tests/queries/0_stateless/03009_range_dict_get_or_default.sql @@ -0,0 +1,34 @@ +DROP DICTIONARY IF EXISTS range_dictionary; +DROP TABLE IF EXISTS range_dictionary_nullable_source_table; + + +CREATE TABLE range_dictionary_nullable_source_table +( + key UInt64, + start_date Date, + end_date Date, + value Nullable(UInt64) +) +ENGINE = TinyLog; + +INSERT INTO range_dictionary_nullable_source_table VALUES (0, toDate('2019-05-05'), toDate('2019-05-20'), 0), (1, toDate('2019-05-05'), toDate('2019-05-20'), NULL); + +CREATE DICTIONARY range_dictionary +( + key UInt64, + start_date Date, + end_date Date, + value Nullable(UInt64) DEFAULT NULL +) +PRIMARY KEY key +SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() TABLE 'range_dictionary_nullable_source_table')) +LIFETIME(MIN 1 MAX 1000) +LAYOUT(RANGE_HASHED()) +RANGE(MIN start_date MAX end_date); + +SELECT dictGetOrDefault('range_dictionary', 'value', toUInt64(2), toDate(toLowCardinality(materialize('2019-05-15'))), 2); + + +DROP DICTIONARY IF EXISTS range_dictionary; +DROP TABLE IF EXISTS range_dictionary_nullable_source_table; + diff --git a/tests/queries/1_stateful/00098_primary_key_memory_allocated.reference b/tests/queries/1_stateful/00098_primary_key_memory_allocated.reference new file mode 100644 index 00000000000..72749c905a3 --- /dev/null +++ b/tests/queries/1_stateful/00098_primary_key_memory_allocated.reference @@ -0,0 +1 @@ +1 1 1 diff --git a/tests/queries/1_stateful/00098_primary_key_memory_allocated.sql b/tests/queries/1_stateful/00098_primary_key_memory_allocated.sql new file mode 100644 index 00000000000..7371678a0f6 --- /dev/null +++ b/tests/queries/1_stateful/00098_primary_key_memory_allocated.sql @@ -0,0 +1 @@ +SELECT primary_key_bytes_in_memory < 16000, primary_key_bytes_in_memory_allocated < 16000, primary_key_bytes_in_memory_allocated / primary_key_bytes_in_memory < 1.1 FROM system.parts WHERE database = 'test' AND table = 'hits'; diff --git a/tests/queries/1_stateful/00157_cache_dictionary.sql b/tests/queries/1_stateful/00157_cache_dictionary.sql index 9699843af8f..3621ff82126 100644 --- a/tests/queries/1_stateful/00157_cache_dictionary.sql +++ b/tests/queries/1_stateful/00157_cache_dictionary.sql @@ -1,8 +1,5 @@ -- Tags: no-tsan, no-parallel --- Suppress "ReadWriteBufferFromHTTP: HTTP request to `{}` failed at try 1/10 with bytes read: 311149/378695. Error: DB::HTTPException: Received error from remote server {}. (Current backoff wait is 100/10000 ms)" errors -SET send_logs_level='error'; - DROP TABLE IF EXISTS test.hits_1m; CREATE TABLE test.hits_1m AS test.hits diff --git a/tests/sqllogic/connection.py b/tests/sqllogic/connection.py index 2d5e1f8a9e9..169e0f0f440 100644 --- a/tests/sqllogic/connection.py +++ b/tests/sqllogic/connection.py @@ -1,18 +1,15 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- -import datetime -import logging -import pyodbc -import sqlite3 -import traceback import enum +import logging import random +import sqlite3 import string from contextlib import contextmanager +import pyodbc # pylint:disable=import-error; for style check from exceptions import ProgramError - logger = logging.getLogger("connection") logger.setLevel(logging.DEBUG) @@ -22,9 +19,7 @@ class OdbcConnectingArgs: self._kwargs = kwargs def __str__(self): - conn_str = ";".join( - ["{}={}".format(x, y) for x, y in self._kwargs.items() if y] - ) + conn_str = ";".join([f"{x}={y}" for x, y in self._kwargs.items() if y]) return conn_str def update_database(self, database): @@ -49,6 +44,7 @@ class OdbcConnectingArgs: for kv in conn_str.split(";"): if kv: k, v = kv.split("=", 1) + # pylint:disable-next=protected-access args._kwargs[k] = v return args @@ -63,7 +59,10 @@ def default_clickhouse_odbc_conn_str(): OdbcConnectingArgs.create_from_kw( dsn="ClickHouse DSN (ANSI)", Timeout="300", - Url="http://localhost:8123/query?default_format=ODBCDriver2&default_table_engine=MergeTree&union_default_mode=DISTINCT&group_by_use_nulls=1&join_use_nulls=1&allow_create_index_without_type=1&create_index_ignore_unique=1", + Url="http://localhost:8123/query?default_format=ODBCDriver2&" + "default_table_engine=MergeTree&union_default_mode=DISTINCT&" + "group_by_use_nulls=1&join_use_nulls=1&allow_create_index_without_type=1&" + "create_index_ignore_unique=1", ) ) @@ -82,7 +81,7 @@ class KnownDBMS(str, enum.Enum): clickhouse = "ClickHouse" -class ConnectionWrap(object): +class ConnectionWrap: def __init__(self, connection=None, factory=None, factory_kwargs=None): self._factory = factory self._factory_kwargs = factory_kwargs @@ -126,7 +125,7 @@ class ConnectionWrap(object): f"SELECT name FROM system.tables WHERE database='{self.DATABASE_NAME}'" ) elif self.DBMS_NAME == KnownDBMS.sqlite.value: - list_query = f"SELECT name FROM sqlite_master WHERE type='table'" + list_query = "SELECT name FROM sqlite_master WHERE type='table'" else: logger.warning( "unable to drop all tables for unknown database: %s", self.DBMS_NAME @@ -154,7 +153,7 @@ class ConnectionWrap(object): self._use_database(database) logger.info( "currentDatabase : %s", - execute_request(f"SELECT currentDatabase()", self).get_result(), + execute_request("SELECT currentDatabase()", self).get_result(), ) @contextmanager @@ -174,7 +173,7 @@ class ConnectionWrap(object): def __exit__(self, *args): if hasattr(self._connection, "close"): - return self._connection.close() + self._connection.close() def setup_connection(engine, conn_str=None, make_debug_request=True): @@ -263,7 +262,7 @@ class ExecResult: def assert_no_exception(self): if self.has_exception(): raise ProgramError( - f"request doesn't have a result set, it has the exception", + "request doesn't have a result set, it has the exception", parent=self._exception, ) diff --git a/tests/sqllogic/exceptions.py b/tests/sqllogic/exceptions.py index 30c8983d80f..2e4da3fd78b 100644 --- a/tests/sqllogic/exceptions.py +++ b/tests/sqllogic/exceptions.py @@ -1,8 +1,6 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- -from enum import Enum - class Error(Exception): def __init__( @@ -45,16 +43,8 @@ class Error(Exception): @property def reason(self): - return ", ".join( - ( - str(x) - for x in [ - super().__str__(), - "details: {}".format(self._details) if self._details else "", - ] - if x - ) - ) + details = f"details: {self._details}" if self._details else "" + return ", ".join((str(x) for x in [super().__str__(), details] if x)) def set_details(self, file=None, name=None, pos=None, request=None, details=None): if file is not None: @@ -88,16 +78,8 @@ class ErrorWithParent(Error): @property def reason(self): - return ", ".join( - ( - str(x) - for x in [ - super().reason, - "exception: {}".format(str(self._parent)) if self._parent else "", - ] - if x - ) - ) + exception = f"exception: {self._parent}" if self._parent else "" + return ", ".join((str(x) for x in [super().reason, exception] if x)) class ProgramError(ErrorWithParent): diff --git a/tests/sqllogic/runner.py b/tests/sqllogic/runner.py index 5f4baf8e59b..2e8e098a099 100755 --- a/tests/sqllogic/runner.py +++ b/tests/sqllogic/runner.py @@ -2,20 +2,25 @@ # -*- coding: utf-8 -*- import argparse -import enum -import os -import logging import csv +import enum import json +import logging import multiprocessing +import os from functools import reduce -from deepdiff import DeepDiff -from connection import setup_connection, Engines, default_clickhouse_odbc_conn_str -from test_runner import TestRunner, Status, RequestType +# isort: off +from deepdiff import DeepDiff # pylint:disable=import-error; for style check +# isort: on -LEVEL_NAMES = [x.lower() for x in logging._nameToLevel.keys() if x != logging.NOTSET] +from connection import Engines, default_clickhouse_odbc_conn_str, setup_connection +from test_runner import RequestType, Status, TestRunner + +LEVEL_NAMES = [ # pylint:disable-next=protected-access + l.lower() for l, n in logging._nameToLevel.items() if n != logging.NOTSET +] def setup_logger(args): @@ -41,7 +46,7 @@ def __write_check_status(status_row, out_dir): if len(status_row) > 140: status_row = status_row[0:135] + "..." check_status_path = os.path.join(out_dir, "check_status.tsv") - with open(check_status_path, "a") as stream: + with open(check_status_path, "a", encoding="utf-8") as stream: writer = csv.writer(stream, delimiter="\t", lineterminator="\n") writer.writerow(status_row) @@ -60,7 +65,7 @@ def __write_test_result( ): all_stages = reports.keys() test_results_path = os.path.join(out_dir, "test_results.tsv") - with open(test_results_path, "a") as stream: + with open(test_results_path, "a", encoding="utf-8") as stream: writer = csv.writer(stream, delimiter="\t", lineterminator="\n") for stage in all_stages: report = reports[stage] @@ -182,7 +187,7 @@ def mode_check_statements(parser): input_dir, f"check statements:: not a dir {input_dir}" ) - reports = dict() + reports = {} out_stages_dir = os.path.join(out_dir, f"{args.mode}-stages") @@ -242,7 +247,7 @@ def mode_check_complete(parser): input_dir, f"check statements:: not a dir {input_dir}" ) - reports = dict() + reports = {} out_stages_dir = os.path.join(out_dir, f"{args.mode}-stages") @@ -286,9 +291,9 @@ def make_actual_report(reports): return {stage: report.get_map() for stage, report in reports.items()} -def write_actual_report(actial, out_dir): - with open(os.path.join(out_dir, "actual_report.json"), "w") as f: - f.write(json.dumps(actial)) +def write_actual_report(actual, out_dir): + with open(os.path.join(out_dir, "actual_report.json"), "w", encoding="utf-8") as f: + f.write(json.dumps(actual)) def read_canonic_report(input_dir): @@ -296,13 +301,15 @@ def read_canonic_report(input_dir): if not os.path.exists(file): return {} - with open(os.path.join(input_dir, "canonic_report.json"), "r") as f: + with open( + os.path.join(input_dir, "canonic_report.json"), "r", encoding="utf-8" + ) as f: data = f.read() return json.loads(data) def write_canonic_report(canonic, out_dir): - with open(os.path.join(out_dir, "canonic_report.json"), "w") as f: + with open(os.path.join(out_dir, "canonic_report.json"), "w", encoding="utf-8") as f: f.write(json.dumps(canonic)) @@ -370,7 +377,7 @@ def mode_self_test(parser): if not os.path.isdir(out_dir): raise NotADirectoryError(out_dir, f"self test: not a dir {out_dir}") - reports = dict() + reports = {} out_stages_dir = os.path.join(out_dir, f"{args.mode}-stages") diff --git a/tests/sqllogic/test_parser.py b/tests/sqllogic/test_parser.py index cb1144d7dd9..648fa9f6bf6 100755 --- a/tests/sqllogic/test_parser.py +++ b/tests/sqllogic/test_parser.py @@ -2,24 +2,27 @@ # -*- coding: utf-8 -*- import logging -import os - -from itertools import chain from enum import Enum -from hashlib import md5 from functools import reduce +from hashlib import md5 +from itertools import chain + +# isort: off +# pylint:disable=import-error; for style check import sqlglot -from sqlglot.expressions import PrimaryKeyColumnConstraint, ColumnDef +from sqlglot.expressions import ColumnDef, PrimaryKeyColumnConstraint + +# pylint:enable=import-error; for style check +# isort: on from exceptions import ( - Error, - ProgramError, - ErrorWithParent, DataResultDiffer, + Error, + ErrorWithParent, + ProgramError, QueryExecutionError, ) - logger = logging.getLogger("parser") logger.setLevel(logging.DEBUG) @@ -248,6 +251,7 @@ class FileBlockBase: ) block.with_result(result) return block + raise ValueError(f"Unknown block_type {block_type}") def dump_to(self, output): if output is None: @@ -258,9 +262,6 @@ class FileBlockBase: class FileBlockComments(FileBlockBase): - def __init__(self, parser, start, end): - super().__init__(parser, start, end) - def get_block_type(self): return BlockType.comments @@ -469,20 +470,18 @@ class QueryResult: ( str(x) for x in [ - "rows: {}".format(self.rows) if self.rows else "", - "values_count: {}".format(self.values_count) - if self.values_count - else "", - "data_hash: {}".format(self.data_hash) if self.data_hash else "", - "exception: {}".format(self.exception) if self.exception else "", - "hash_threshold: {}".format(self.hash_threshold) + f"rows: {self.rows}" if self.rows else "", + f"values_count: {self.values_count}" if self.values_count else "", + f"data_hash: {self.data_hash}" if self.data_hash else "", + f"exception: {self.exception}" if self.exception else "", + f"hash_threshold: {self.hash_threshold}" if self.hash_threshold else "", ] if x ) ) - return "QueryResult({})".format(params) + return f"QueryResult({params})" def __iter__(self): if self.rows is not None: @@ -491,12 +490,10 @@ class QueryResult: if self.values_count <= self.hash_threshold: return iter(self.rows) if self.data_hash is not None: - return iter( - [["{} values hashing to {}".format(self.values_count, self.data_hash)]] - ) + return iter([[f"{self.values_count} values hashing to {self.data_hash}"]]) if self.exception is not None: - return iter([["exception: {}".format(self.exception)]]) - raise ProgramError("Query result is empty", details="{}".format(self.__str__())) + return iter([[f"exception: {self.exception}"]]) + raise ProgramError("Query result is empty", details=str(self)) @staticmethod def __value_count(rows): @@ -528,7 +525,7 @@ class QueryResult: for row in rows: res_row = [] for c, t in zip(row, types): - logger.debug(f"Builging row. c:{c} t:{t}") + logger.debug("Builging row. c:%s t:%s", c, t) if c is None: res_row.append("NULL") continue @@ -541,7 +538,7 @@ class QueryResult: elif t == "I": try: res_row.append(str(int(c))) - except ValueError as ex: + except ValueError: # raise QueryExecutionError( # f"Got non-integer result '{c}' for I type." # ) @@ -549,7 +546,7 @@ class QueryResult: except OverflowError as ex: raise QueryExecutionError( f"Got overflowed result '{c}' for I type." - ) + ) from ex elif t == "R": res_row.append(f"{c:.3f}") @@ -567,6 +564,7 @@ class QueryResult: values = list(chain(*rows)) values.sort() return [values] if values else [] + return [] @staticmethod def __calculate_hash(rows): @@ -595,9 +593,9 @@ class QueryResult: # do not print details to the test file # but print original exception if isinstance(e, ErrorWithParent): - message = "{}, original is: {}".format(e, e.get_parent()) + message = f"{e}, original is: {e.get_parent()}" else: - message = "{}".format(e) + message = str(e) return QueryResult(exception=message) @@ -616,9 +614,8 @@ class QueryResult: "canonic and actual results have different exceptions", details=f"canonic: {canonic.exception}, actual: {actual.exception}", ) - else: - # exceptions are the same - return + # exceptions are the same + return elif canonic.exception is not None: raise DataResultDiffer( "canonic result has exception and actual result doesn't", @@ -639,9 +636,8 @@ class QueryResult: if canonic.values_count != actual.values_count: raise DataResultDiffer( "canonic and actual results have different value count", - details="canonic values count {}, actual {}".format( - canonic.values_count, actual.values_count - ), + details=f"canonic values count {canonic.values_count}, " + f"actual {actual.values_count}", ) if canonic.data_hash != actual.data_hash: raise DataResultDiffer( @@ -653,9 +649,8 @@ class QueryResult: if canonic.values_count != actual.values_count: raise DataResultDiffer( "canonic and actual results have different value count", - details="canonic values count {}, actual {}".format( - canonic.values_count, actual.values_count - ), + details=f"canonic values count {canonic.values_count}, " + f"actual {actual.values_count}", ) if canonic.rows != actual.rows: raise DataResultDiffer( @@ -665,5 +660,5 @@ class QueryResult: raise ProgramError( "Unable to compare results", - details="actual {}, canonic {}".format(actual, canonic), + details=f"actual {actual}, canonic {canonic}", ) diff --git a/tests/sqllogic/test_runner.py b/tests/sqllogic/test_runner.py index baec0dc7924..8f2242a45b9 100644 --- a/tests/sqllogic/test_runner.py +++ b/tests/sqllogic/test_runner.py @@ -1,25 +1,23 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- import enum -import logging -import os -import traceback import io import json +import logging +import os import test_parser +from connection import execute_request from exceptions import ( + DataResultDiffer, Error, ProgramError, - DataResultDiffer, - StatementExecutionError, - StatementSuccess, QueryExecutionError, QuerySuccess, SchemeResultDiffer, + StatementExecutionError, + StatementSuccess, ) -from connection import execute_request - logger = logging.getLogger("parser") logger.setLevel(logging.DEBUG) @@ -55,6 +53,7 @@ class Status(str, enum.Enum): class TestStatus: def __init__(self): + self.name = None self.status = None self.file = None self.position = None @@ -155,7 +154,7 @@ class SimpleStats: self.success += 1 def get_map(self): - result = dict() + result = {} result["success"] = self.success result["fail"] = self.fail return result @@ -187,7 +186,7 @@ class Stats: choose.update(status) def get_map(self): - result = dict() + result = {} result["statements"] = self.statements.get_map() result["queries"] = self.queries.get_map() result["total"] = self.total.get_map() @@ -205,7 +204,7 @@ class OneReport: self.test_name = test_name self.test_file = test_file self.stats = Stats() - self.requests = dict() # type: dict(int, TestStatus) + self.requests = {} def update(self, status): if not isinstance(status, TestStatus): @@ -218,11 +217,11 @@ class OneReport: return str(self.get_map()) def get_map(self): - result = dict() + result = {} result["test_name"] = self.test_name result["test_file"] = self.test_file result["stats"] = self.stats.get_map() - result["requests"] = dict() + result["requests"] = {} requests = result["requests"] for pos, status in self.requests.items(): requests[pos] = status.get_map() @@ -233,7 +232,7 @@ class Report: def __init__(self, dbms_name, input_dir=None): self.dbms_name = dbms_name self.stats = Stats() - self.tests = dict() # type: dict(str, OneReport) + self.tests = {} self.input_dir = input_dir self.output_dir = None @@ -256,7 +255,7 @@ class Report: self.output_dir = res_dir def get_map(self): - result = dict() + result = {} result["dbms_name"] = self.dbms_name result["stats"] = self.stats.get_map() result["input_dir"] = self.input_dir @@ -264,7 +263,7 @@ class Report: result["input_dir"] = self.input_dir if self.output_dir is not None: result["output_dir"] = self.output_dir - result["tests"] = dict() + result["tests"] = {} tests = result["tests"] for test_name, one_report in self.tests.items(): tests.update({test_name: one_report.get_map()}) @@ -297,8 +296,8 @@ class Report: def write_report(self, report_dir): report_path = os.path.join(report_dir, "report.json") - logger.info(f"create file {report_path}") - with open(report_path, "w") as stream: + logger.info("create file %s", report_path) + with open(report_path, "w", encoding="utf-8") as stream: stream.write(json.dumps(self.get_map(), indent=4)) @@ -434,38 +433,34 @@ class TestRunner: details=f"expected error: {expected_error}", parent=exec_res.get_exception(), ) - else: - clogger.debug("errors matched") - raise QuerySuccess() - else: - clogger.debug("missed error") - raise QueryExecutionError( - "query is expected to fail with error", - details="expected error: {}".format(expected_error), + clogger.debug("errors matched") + raise QuerySuccess() + clogger.debug("missed error") + raise QueryExecutionError( + "query is expected to fail with error", + details=f"expected error: {expected_error}", + ) + clogger.debug("success is expected") + if exec_res.has_exception(): + clogger.debug("had error") + if self.verify: + clogger.debug("verify mode") + canonic = test_parser.QueryResult.parse_it( + block.get_result(), 10 ) - else: - clogger.debug("success is expected") - if exec_res.has_exception(): - clogger.debug("had error") - if self.verify: - clogger.debug("verify mode") - canonic = test_parser.QueryResult.parse_it( - block.get_result(), 10 - ) - exception = QueryExecutionError( - "query execution failed with an exception", - parent=exec_res.get_exception(), - ) - actual = test_parser.QueryResult.as_exception(exception) - test_parser.QueryResult.assert_eq(canonic, actual) - block.with_result(actual) - raise QuerySuccess() - else: - clogger.debug("completion mode") - raise QueryExecutionError( - "query execution failed with an exception", - parent=exec_res.get_exception(), - ) + exception = QueryExecutionError( + "query execution failed with an exception", + parent=exec_res.get_exception(), + ) + actual = test_parser.QueryResult.as_exception(exception) + test_parser.QueryResult.assert_eq(canonic, actual) + block.with_result(actual) + raise QuerySuccess() + clogger.debug("completion mode") + raise QueryExecutionError( + "query execution failed with an exception", + parent=exec_res.get_exception(), + ) canonic_types = block.get_types() clogger.debug("canonic types %s", canonic_types) @@ -476,9 +471,8 @@ class TestRunner: if canonic_columns_count != actual_columns_count: raise SchemeResultDiffer( "canonic and actual columns count differ", - details="expected columns {}, actual columns {}".format( - canonic_columns_count, actual_columns_count - ), + details=f"expected columns {canonic_columns_count}, " + f"actual columns {actual_columns_count}", ) actual = test_parser.QueryResult.make_it( @@ -528,7 +522,7 @@ class TestRunner: self.report = Report(self.dbms_name, self._input_dir) if self.results is None: - self.results = dict() + self.results = {} if self.dbms_name == "ClickHouse" and test_name in [ "test/select5.test", @@ -536,7 +530,7 @@ class TestRunner: "test/evidence/slt_lang_replace.test", "test/evidence/slt_lang_droptrigger.test", ]: - logger.info(f"Let's skip test %s for ClickHouse", test_name) + logger.info("Let's skip test %s for ClickHouse", test_name) return with self.connection.with_one_test_scope(): @@ -565,7 +559,7 @@ class TestRunner: test_name = os.path.relpath(test_file, start=self._input_dir) logger.debug("open file %s", test_name) - with open(test_file, "r") as stream: + with open(test_file, "r", encoding="utf-8") as stream: self.run_one_test(stream, test_name, test_file) def run_all_tests_from_dir(self, input_dir): @@ -582,10 +576,10 @@ class TestRunner: for test_name, stream in self.results.items(): test_file = os.path.join(dir_path, test_name) - logger.info(f"create file {test_file}") + logger.info("create file %s", test_file) result_dir = os.path.dirname(test_file) os.makedirs(result_dir, exist_ok=True) - with open(test_file, "w") as output: + with open(test_file, "w", encoding="utf-8") as output: output.write(stream.getvalue()) def write_report(self, report_dir): diff --git a/tests/tsan_suppressions.txt b/tests/tsan_suppressions.txt index 67c7eae08f3..4f29925761b 100644 --- a/tests/tsan_suppressions.txt +++ b/tests/tsan_suppressions.txt @@ -1,2 +1,4 @@ # https://github.com/ClickHouse/ClickHouse/issues/55629 race:rd_kafka_broker_set_nodename +# https://github.com/ClickHouse/ClickHouse/issues/60443 +race:rd_kafka_stats_emit_all diff --git a/utils/check-marks/main.cpp b/utils/check-marks/main.cpp index df6f6e5267e..b4cd44d6eb7 100644 --- a/utils/check-marks/main.cpp +++ b/utils/check-marks/main.cpp @@ -23,7 +23,7 @@ static void checkByCompressedReadBuffer(const std::string & mrk_path, const std: DB::CompressedReadBufferFromFile bin_in(DB::createReadBufferFromFileBase(bin_path, /* settings= */ {})); DB::WriteBufferFromFileDescriptor out(STDOUT_FILENO); - bool mrk2_format = boost::algorithm::ends_with(mrk_path, ".mrk2"); + bool mrk2_format = mrk_path.ends_with(".mrk2"); for (size_t mark_num = 0; !mrk_in.eof(); ++mark_num) { diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index 0b29b0f0709..57a8e0d5840 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -1,4 +1,4 @@ -personal_ws-1.1 en 2724 +personal_ws-1.1 en 2758 AArch ACLs ALTERs @@ -112,6 +112,7 @@ CDMA CESU CIDR CIDRToRange +CKMAN CLOB CLion CMPLNT @@ -259,6 +260,7 @@ ExactEdgeLengthRads ExecutablePool ExtType ExternalDistributed +FFFFFFFF FFFD FIPS FOSDEM @@ -545,6 +547,8 @@ MinIO MinMax MindsDB Mongodb +mortonDecode +mortonEncode MsgPack MultiPolygon Multiline @@ -684,7 +688,6 @@ PartsCommitted PartsCompact PartsDeleteOnDestroy PartsDeleting -PartsInMemory PartsOutdated PartsPreActive PartsPreCommitted @@ -840,6 +843,7 @@ Sematext SendExternalTables SendScalars ShareAlike +SharedMergeTree Shortkeys SimHash Simhash @@ -991,6 +995,7 @@ VIEWs Vadim Valgrind Vectorized +VersionBadge VersionInteger VersionedCollapsingMergeTree VideoContainer @@ -1076,6 +1081,8 @@ anyheavy anylast appendTrailingCharIfAbsent approximative +approxtopk +approxtopsum argMax argMin argmax @@ -1236,6 +1243,7 @@ buildable builtins byteHammingDistance byteSize +byteSlice byteSwap bytebase bytesToCutForIPv @@ -1265,6 +1273,8 @@ centroid certificateFile cetera cfg +cgroup +cgroups chadmin changelog changelogs @@ -1284,6 +1294,7 @@ cipherList ciphertext cityHash cityhash +ckman clangd cli clickcache @@ -1611,6 +1622,7 @@ greaterorequals greenspace groupArray groupArrayInsertAt +groupArrayIntersect groupArrayLast groupArrayMovingAvg groupArrayMovingSum @@ -1626,6 +1638,7 @@ groupBitmapXor groupUniqArray grouparray grouparrayinsertat +grouparrayintersect grouparraylast grouparraymovingavg grouparraymovingsum @@ -1833,6 +1846,7 @@ linearized lineasstring linefeeds lineorder +linestring linux llvm loadDefaultCAFile @@ -1888,6 +1902,7 @@ mdadm meanZTest meanztest mebibytes +mergeTreeIndex mergeable mergetree messageID @@ -2190,7 +2205,9 @@ rankCorr rapidjson rawblob readWKTMultiPolygon +readWKTPoint readWKTPolygon +readWKTRing readahead readline readme @@ -2313,6 +2330,14 @@ shortcircuit shortkeys shoutout simdjson +simpleJSON +simpleJSONExtractBool +simpleJSONExtractFloat +simpleJSONExtractInt +simpleJSONExtractRaw +simpleJSONExtractString +simpleJSONExtractUInt +simpleJSONHas simpleLinearRegression simpleaggregatefunction simplelinearregression @@ -2415,6 +2440,7 @@ subranges subreddits subseconds subsequence +substreams substring substringIndex substringIndexUTF @@ -2478,6 +2504,7 @@ theilsu themself threadpool throwIf +timeDiff timeSlot timeSlots timeZone @@ -2529,6 +2556,7 @@ toRelativeSecondNum toRelativeWeekNum toRelativeYearNum toSecond +toMillisecond toStartOfDay toStartOfFifteenMinutes toStartOfFiveMinutes @@ -2664,7 +2692,9 @@ userver utils uuid varPop +varPopStable varSamp +varSampStable variadic variantElement variantType @@ -2714,6 +2744,7 @@ wordShingleSimHashCaseInsensitive wordShingleSimHashCaseInsensitiveUTF wordShingleSimHashUTF wordshingleMinHash +writability wrt xcode xeus @@ -2725,6 +2756,7 @@ xz yaml yandex youtube +ZCurve zLib zLinux zabbix diff --git a/utils/check-style/check-large-objects.sh b/utils/check-style/check-large-objects.sh index 6b3fe86d310..5c1276e5732 100755 --- a/utils/check-style/check-large-objects.sh +++ b/utils/check-style/check-large-objects.sh @@ -4,8 +4,6 @@ TU_EXCLUDES=( CastOverloadResolver - AggregateFunctionMax - AggregateFunctionMin AggregateFunctionUniq FunctionsConversion diff --git a/utils/check-style/check-pylint b/utils/check-style/check-pylint new file mode 100755 index 00000000000..7959a414023 --- /dev/null +++ b/utils/check-style/check-pylint @@ -0,0 +1,15 @@ +#!/usr/bin/env bash + + +LC_ALL="en_US.UTF-8" +ROOT_PATH=$(git rev-parse --show-toplevel) + +function xargs-pylint { + # $1 is number maximum arguments per pylint process + sort | awk '$2=="text/x-script.python" {print $1}' | \ + xargs -P "$(nproc)" -n "$1" pylint --rcfile="$ROOT_PATH/pyproject.toml" --persistent=no --score=n +} + +find "$ROOT_PATH/tests" -maxdepth 2 -type f -exec file -F' ' --mime-type {} + | xargs-pylint 50 +# Beware, there lambdas are checked. All of them contain `app`, and it causes brain-cucumber-zalgo +find "$ROOT_PATH/tests/ci" -mindepth 2 -type f -exec file -F' ' --mime-type {} + | xargs-pylint 1 diff --git a/utils/check-style/check-style b/utils/check-style/check-style index badfd173172..d7387c3f843 100755 --- a/utils/check-style/check-style +++ b/utils/check-style/check-style @@ -50,11 +50,6 @@ find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' 2>/dev/n # Broken symlinks find -L $ROOT_PATH -type l 2>/dev/null | grep -v contrib && echo "^ Broken symlinks found" -# Double whitespaces -find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' 2>/dev/null | - grep -vP $EXCLUDE_DIRS | - while read i; do $ROOT_PATH/utils/check-style/double-whitespaces.pl < $i || echo -e "^ File $i contains double whitespaces\n"; done - # Unused/Undefined/Duplicates ErrorCodes/ProfileEvents/CurrentMetrics declare -A EXTERN_TYPES EXTERN_TYPES[ErrorCodes]=int @@ -76,6 +71,7 @@ EXTERN_TYPES_EXCLUDES=( ProfileEvents::getProfileEvents ProfileEvents::ThreadIdToCountersSnapshot ProfileEvents::LOCAL_NAME + ProfileEvents::keeper_profile_events ProfileEvents::CountersIncrement CurrentMetrics::add @@ -87,6 +83,7 @@ EXTERN_TYPES_EXCLUDES=( CurrentMetrics::Metric CurrentMetrics::values CurrentMetrics::Value + CurrentMetrics::keeper_metrics ErrorCodes::ErrorCode ErrorCodes::getName @@ -106,7 +103,7 @@ for extern_type in ${!EXTERN_TYPES[@]}; do find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' | { # NOTE: the check is pretty dumb and distinguish only by the type_of_extern, # and this matches with zkutil::CreateMode - grep -v 'src/Common/ZooKeeper/Types.h' + grep -v -e 'src/Common/ZooKeeper/Types.h' -e 'src/Coordination/KeeperConstants.cpp' } | { grep -vP $EXCLUDE_DIRS | xargs grep -l -P "extern const $type_of_extern $allowed_chars" } | while read file; do @@ -155,9 +152,6 @@ find $ROOT_PATH/{src,base,programs,utils} -name '*.xml' | grep -vP $EXCLUDE_DIRS | xargs xmllint --noout --nonet -# FIXME: for now only clickhouse-test -pylint --rcfile=$ROOT_PATH/.pylintrc --persistent=no --score=n $ROOT_PATH/tests/clickhouse-test $ROOT_PATH/tests/ci/*.py - find $ROOT_PATH -not -path $ROOT_PATH'/contrib*' \( -name '*.yaml' -or -name '*.yml' \) -type f | grep -vP $EXCLUDE_DIRS | xargs yamllint --config-file=$ROOT_PATH/.yamllint @@ -178,6 +172,8 @@ for test_case in "${tests_with_query_log[@]}"; do } || echo "Queries to system.query_log/system.query_thread_log does not have current_database = currentDatabase() condition in $test_case" done +grep -iE 'SYSTEM STOP MERGES;?$' -R $ROOT_PATH/tests/queries && echo "Merges cannot be disabled globally in fast/stateful/stateless tests, because it will break concurrently running queries" + # There shouldn't be large jumps between test numbers (since they should be consecutive) max_diff=$( find $ROOT_PATH/tests/queries -iname '*.sql' -or -iname '*.sh' -or -iname '*.py' -or -iname '*.j2' | @@ -440,3 +436,14 @@ ls -1d $ROOT_PATH/contrib/*-cmake | xargs -I@ find @ -name 'CMakeLists.txt' -or # DOS/Windows newlines find $ROOT_PATH/{base,src,programs,utils,docs} -name '*.md' -or -name '*.h' -or -name '*.cpp' -or -name '*.js' -or -name '*.py' -or -name '*.html' | xargs grep -l -P '\r$' && echo "^ Files contain DOS/Windows newlines (\r\n instead of \n)." + +# Wrong spelling of abbreviations, e.g. SQL is right, Sql is wrong. XMLHttpRequest is very wrong. +find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' | + grep -vP $EXCLUDE_DIRS | + xargs grep -P 'Sql|Html|Xml|Cpu|Tcp|Udp|Http|Db|Json|Yaml' | grep -v -P 'RabbitMQ|Azure|Aws|aws|Avro|IO/S3' && + echo "Abbreviations such as SQL, XML, HTTP, should be in all caps. For example, SQL is right, Sql is wrong. XMLHttpRequest is very wrong." + +find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' | + grep -vP $EXCLUDE_DIRS | + xargs grep -F -i 'ErrorCodes::LOGICAL_ERROR, "Logical error:' && + echo "If an exception has LOGICAL_ERROR code, there is no need to include the text 'Logical error' in the exception message, because then the phrase 'Logical error' will be printed twice." diff --git a/utils/check-style/check-whitespaces b/utils/check-style/check-whitespaces index 5a20569868d..507b1dd2ede 100755 --- a/utils/check-style/check-whitespaces +++ b/utils/check-style/check-whitespaces @@ -2,8 +2,9 @@ ROOT_PATH=$(git rev-parse --show-toplevel) EXCLUDE_DIRS='build/|integration/|widechar_width/|glibc-compatibility/|memcpy/|consistent-hashing/|Parsers/New' +NPROC=$(($(nproc) + 3)) # Double whitespaces find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' 2>/dev/null | grep -vP $EXCLUDE_DIRS | - while read i; do $ROOT_PATH/utils/check-style/double-whitespaces.pl < $i || echo -e "^ File $i contains double whitespaces\n"; done + xargs -P "$NPROC" -n 20 "${ROOT_PATH}/utils/check-style/double-whitespaces.pl" diff --git a/utils/check-style/check_cpp.sh b/utils/check-style/check_cpp.sh new file mode 100755 index 00000000000..ea90d79418c --- /dev/null +++ b/utils/check-style/check_cpp.sh @@ -0,0 +1,38 @@ +#!/bin/bash + +# yaml check is not the best one + +cd /ClickHouse/utils/check-style || echo -e "failure\tRepo not found" > /test_output/check_status.tsv + +start_total=`date +%s` + +# FIXME: 30 sec to wait +# echo "Check duplicates" | ts +# ./check-duplicate-includes.sh |& tee /test_output/duplicate_includes_output.txt + +start=`date +%s` +./check-style -n |& tee /test_output/style_output.txt +runtime=$((`date +%s`-start)) +echo "Check style. Done. $runtime seconds." + +start=`date +%s` +./check-whitespaces -n |& tee /test_output/whitespaces_output.txt +runtime=$((`date +%s`-start)) +echo "Check whitespaces. Done. $runtime seconds." + +start=`date +%s` +./check-workflows |& tee /test_output/workflows_output.txt +runtime=$((`date +%s`-start)) +echo "Check workflows. Done. $runtime seconds." + +start=`date +%s` +./check-submodules |& tee /test_output/submodules_output.txt +runtime=$((`date +%s`-start)) +echo "Check submodules. Done. $runtime seconds." + +# FIXME: 6 min to wait +# echo "Check shell scripts with shellcheck" | ts +# ./shellcheck-run.sh |& tee /test_output/shellcheck_output.txt + +runtime=$((`date +%s`-start_total)) +echo "Check style total. Done. $runtime seconds." diff --git a/utils/check-style/check_docs.sh b/utils/check-style/check_docs.sh new file mode 100755 index 00000000000..78b8a402ea0 --- /dev/null +++ b/utils/check-style/check_docs.sh @@ -0,0 +1,20 @@ +#!/bin/bash + +# yaml check is not the best one + +cd /ClickHouse/utils/check-style || echo -e "failure\tRepo not found" > /test_output/check_status.tsv + +start_total=`date +%s` + +start=`date +%s` +./check-typos |& tee /test_output/typos_output.txt +runtime=$((`date +%s`-start)) +echo "Check typos. Done. $runtime seconds." + +start=`date +%s` +./check-doc-aspell |& tee /test_output/docs_spelling_output.txt +runtime=$((`date +%s`-start)) +echo "Check docs spelling. Done. $runtime seconds." + +runtime=$((`date +%s`-start_total)) +echo "Check Docs, total. Done. $runtime seconds." diff --git a/utils/check-style/check_py.sh b/utils/check-style/check_py.sh new file mode 100755 index 00000000000..13f4e754ed3 --- /dev/null +++ b/utils/check-style/check_py.sh @@ -0,0 +1,22 @@ +#!/bin/bash + +cd /ClickHouse/utils/check-style || echo -e "failure\tRepo not found" > /test_output/check_status.tsv + +start_total=`date +%s` + +# FIXME: 1 min to wait + head checkout +echo "Check python formatting with black" | ts +./check-black -n |& tee /test_output/black_output.txt + +start=`date +%s` +./check-pylint -n |& tee /test_output/pylint_output.txt +runtime=$((`date +%s`-start)) +echo "Check pylint. Done. $runtime seconds." + +start=`date +%s` +./check-mypy -n |& tee /test_output/mypy_output.txt +runtime=$((`date +%s`-start)) +echo "Check python type hinting with mypy. Done. $runtime seconds." + +runtime=$((`date +%s`-start_total)) +echo "Check python total. Done. $runtime seconds." diff --git a/utils/check-style/double-whitespaces.pl b/utils/check-style/double-whitespaces.pl index 47b03cb74ab..daeddecbd27 100755 --- a/utils/check-style/double-whitespaces.pl +++ b/utils/check-style/double-whitespaces.pl @@ -5,27 +5,31 @@ use strict; # Find double whitespace such as "a, b, c" that looks very ugly and annoying. # But skip double whitespaces if they are used as an alignment - by comparing to surrounding lines. -my @array; - -while (<>) -{ - push @array, $_; -} - my $ret = 0; -for (my $i = 1; $i < $#array; ++$i) +foreach my $file (@ARGV) { - if ($array[$i] =~ ',( {2,3})[^ /]') - { - # https://stackoverflow.com/questions/87380/how-can-i-find-the-location-of-a-regex-match-in-perl + my @array; - if ((substr($array[$i - 1], $+[1] - 1, 2) !~ /^[ -][^ ]$/) # whitespaces are not part of alignment - && (substr($array[$i + 1], $+[1] - 1, 2) !~ /^[ -][^ ]$/) - && $array[$i] !~ /(-?\d+\w*,\s+){3,}/) # this is not a number table like { 10, -1, 2 } + open (FH,'<',$file); + while () + { + push @array, $_; + } + + for (my $i = 1; $i < $#array; ++$i) + { + if ($array[$i] =~ ',( {2,3})[^ /]') { - print(($i + 1) . ":" . $array[$i]); - $ret = 1; + # https://stackoverflow.com/questions/87380/how-can-i-find-the-location-of-a-regex-match-in-perl + + if ((substr($array[$i - 1], $+[1] - 1, 2) !~ /^[ -][^ ]$/) # whitespaces are not part of alignment + && (substr($array[$i + 1], $+[1] - 1, 2) !~ /^[ -][^ ]$/) + && $array[$i] !~ /(-?\d+\w*,\s+){3,}/) # this is not a number table like { 10, -1, 2 } + { + print($file . ":" . ($i + 1) . $array[$i]); + $ret = 1; + } } } } diff --git a/docker/test/style/process_style_check_result.py b/utils/check-style/process_style_check_result.py similarity index 80% rename from docker/test/style/process_style_check_result.py rename to utils/check-style/process_style_check_result.py index bc06df1af31..8c9837b4fc6 100755 --- a/docker/test/style/process_style_check_result.py +++ b/utils/check-style/process_style_check_result.py @@ -1,9 +1,9 @@ #!/usr/bin/env python3 -import os -import logging import argparse import csv +import logging +import os # TODO: add typing and log files to the fourth column, think about launching @@ -13,9 +13,10 @@ def process_result(result_folder): description = "" test_results = [] checks = ( - "duplicate includes", - "shellcheck", + # "duplicate includes", + # "shellcheck", "style", + "pylint", "black", "mypy", "typos", @@ -29,11 +30,15 @@ def process_result(result_folder): out_file = name.replace(" ", "_") + "_output.txt" full_path = os.path.join(result_folder, out_file) if not os.path.exists(full_path): - logging.info("No %s check log on path %s", name, full_path) - return "exception", f"No {name} check log", [] + test_results.append((f"Check {name}", "SKIPPED")) elif os.stat(full_path).st_size != 0: + with open(full_path, "r") as file: + lines = file.readlines() + if len(lines) > 100: + lines = lines[:100] + ["====TRIMMED===="] + content = "\n".join(lines) description += f"Check {name} failed. " - test_results.append((f"Check {name}", "FAIL")) + test_results.append((f"Check {name}", "FAIL", None, content)) status = "failure" else: test_results.append((f"Check {name}", "OK")) @@ -41,6 +46,8 @@ def process_result(result_folder): if not description: description += "Style check success" + assert test_results, "No single style-check output found" + return status, description, test_results diff --git a/utils/check-style/shellcheck-run.sh b/utils/check-style/shellcheck-run.sh index bdb0f681c31..5930e537703 100755 --- a/utils/check-style/shellcheck-run.sh +++ b/utils/check-style/shellcheck-run.sh @@ -2,13 +2,13 @@ ROOT_PATH=$(git rev-parse --show-toplevel) NPROC=$(($(nproc) + 3)) # Check sh tests with Shellcheck -( cd "$ROOT_PATH/tests/queries/0_stateless/" && \ - find "$ROOT_PATH/tests/queries/"{0_stateless,1_stateful} -name '*.sh' -print0 | \ - xargs -0 -P "$NPROC" -n 20 shellcheck --check-sourced --external-sources --severity info --exclude SC1071,SC2086,SC2016 -) +find "$ROOT_PATH/tests/queries/"{0_stateless,1_stateful} -name '*.sh' -print0 | \ + xargs -0 -P "$NPROC" -n 20 shellcheck --check-sourced --external-sources --source-path=SCRIPTDIR \ + --severity info --exclude SC1071,SC2086,SC2016 # Check docker scripts with shellcheck -find "$ROOT_PATH/docker" -executable -type f -exec file -F' ' --mime-type {} \; | \ - awk -F' ' '$2==" text/x-shellscript" {print $1}' | \ +# Do not check sourced files, since it causes broken --source-path=SCRIPTDIR +find "$ROOT_PATH/docker" -type f -exec file -F' ' --mime-type {} + | \ + awk '$2=="text/x-shellscript" {print $1}' | \ grep -v "compare.sh" | \ - xargs -P "$NPROC" -n 20 shellcheck + xargs -P "$NPROC" -n 20 shellcheck --external-sources --source-path=SCRIPTDIR diff --git a/utils/clickhouse-diagnostics/README.md b/utils/clickhouse-diagnostics/README.md index 9a86ad535fd..01bb543c9a5 100644 --- a/utils/clickhouse-diagnostics/README.md +++ b/utils/clickhouse-diagnostics/README.md @@ -318,7 +318,7 @@ SHOW ACCESS ``` **result** ``` -CREATE USER default IDENTIFIED WITH plaintext_password SETTINGS PROFILE default +CREATE USER default IDENTIFIED WITH plaintext_password SETTINGS PROFILE `default` CREATE SETTINGS PROFILE default SETTINGS max_memory_usage = 10000000000, load_balancing = 'random' CREATE SETTINGS PROFILE readonly SETTINGS readonly = 1 CREATE QUOTA default KEYED BY user_name FOR INTERVAL 1 hour TRACKING ONLY TO default diff --git a/utils/keeper-data-dumper/main.cpp b/utils/keeper-data-dumper/main.cpp index 351a4ab90bc..21626665a42 100644 --- a/utils/keeper-data-dumper/main.cpp +++ b/utils/keeper-data-dumper/main.cpp @@ -63,11 +63,11 @@ int main(int argc, char *argv[]) ResponsesQueue queue(std::numeric_limits::max()); SnapshotsQueue snapshots_queue{1}; CoordinationSettingsPtr settings = std::make_shared(); - KeeperContextPtr keeper_context = std::make_shared(true); + KeeperContextPtr keeper_context = std::make_shared(true, settings); keeper_context->setLogDisk(std::make_shared("LogDisk", argv[2])); keeper_context->setSnapshotDisk(std::make_shared("SnapshotDisk", argv[1])); - auto state_machine = std::make_shared(queue, snapshots_queue, settings, keeper_context, nullptr); + auto state_machine = std::make_shared(queue, snapshots_queue, keeper_context, nullptr); state_machine->init(); size_t last_commited_index = state_machine->last_commit_index(); diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv index 23fc0032056..e372e407ce1 100644 --- a/utils/list-versions/version_date.tsv +++ b/utils/list-versions/version_date.tsv @@ -1,3 +1,4 @@ +v24.2.1.2248-stable 2024-02-29 v24.1.5.6-stable 2024-02-14 v24.1.4.20-stable 2024-02-14 v24.1.3.31-stable 2024-02-09 @@ -24,6 +25,7 @@ v23.9.4.11-stable 2023-11-08 v23.9.3.12-stable 2023-10-31 v23.9.2.56-stable 2023-10-19 v23.9.1.1854-stable 2023-09-29 +v23.8.10.43-lts 2024-03-05 v23.8.9.54-lts 2024-01-05 v23.8.8.20-lts 2023-11-25 v23.8.7.24-lts 2023-11-17 @@ -53,6 +55,7 @@ v23.4.4.16-stable 2023-06-17 v23.4.3.48-stable 2023-06-12 v23.4.2.11-stable 2023-05-02 v23.4.1.1943-stable 2023-04-27 +v23.3.20.27-lts 2024-03-05 v23.3.19.32-lts 2024-01-05 v23.3.18.15-lts 2023-11-25 v23.3.17.13-lts 2023-11-17 diff --git a/utils/memcpy-bench/FastMemcpy.h b/utils/memcpy-bench/FastMemcpy.h index 85d09c5f53e..650a6761771 100644 --- a/utils/memcpy-bench/FastMemcpy.h +++ b/utils/memcpy-bench/FastMemcpy.h @@ -33,9 +33,11 @@ #endif #endif +/// NOLINTBEGIN(modernize-use-using) typedef __attribute__((__aligned__(1))) uint16_t uint16_unaligned_t; typedef __attribute__((__aligned__(1))) uint32_t uint32_unaligned_t; typedef __attribute__((__aligned__(1))) uint64_t uint64_unaligned_t; +/// NOLINTEND(modernize-use-using) //--------------------------------------------------------------------- // fast copy for different sizes @@ -98,7 +100,7 @@ __attribute__((__no_sanitize__("undefined"))) inline void *memcpy_tiny(void * __ unsigned char *dd = ((unsigned char*)dst) + size; const unsigned char *ss = ((const unsigned char*)src) + size; - switch (size) + switch (size) /// NOLINT(bugprone-switch-missing-default-case) { case 64: memcpy_sse2_64(dd - 64, ss - 64); @@ -652,7 +654,7 @@ __attribute__((__no_sanitize__("undefined"))) inline void *memcpy_tiny(void * __ //--------------------------------------------------------------------- // main routine //--------------------------------------------------------------------- -void* memcpy_fast_sse(void * __restrict destination, const void * __restrict source, size_t size) +void* memcpy_fast_sse(void * __restrict destination, const void * __restrict source, size_t size) /// NOLINT(misc-definitions-in-headers) { unsigned char *dst = (unsigned char*)destination; const unsigned char *src = (const unsigned char*)source; diff --git a/utils/memcpy-bench/FastMemcpy_Avx.h b/utils/memcpy-bench/FastMemcpy_Avx.h index ee7d4e19536..aecf8abbe03 100644 --- a/utils/memcpy-bench/FastMemcpy_Avx.h +++ b/utils/memcpy-bench/FastMemcpy_Avx.h @@ -103,7 +103,7 @@ static INLINE void *memcpy_tiny_avx(void * __restrict dst, const void * __restri unsigned char *dd = reinterpret_cast(dst) + size; const unsigned char *ss = reinterpret_cast(src) + size; - switch (size) + switch (size) /// NOLINT(bugprone-switch-missing-default-case) { case 128: memcpy_avx_128(dd - 128, ss - 128); [[fallthrough]]; case 0: break; @@ -371,7 +371,7 @@ static INLINE void *memcpy_tiny_avx(void * __restrict dst, const void * __restri //--------------------------------------------------------------------- // main routine //--------------------------------------------------------------------- -void* memcpy_fast_avx(void * __restrict destination, const void * __restrict source, size_t size) +void* memcpy_fast_avx(void * __restrict destination, const void * __restrict source, size_t size) /// NOLINT(misc-definitions-in-headers) { unsigned char *dst = reinterpret_cast(destination); const unsigned char *src = reinterpret_cast(source); diff --git a/utils/zookeeper-cli/zookeeper-cli.cpp b/utils/zookeeper-cli/zookeeper-cli.cpp index fe11c66ea9c..4aef29847ce 100644 --- a/utils/zookeeper-cli/zookeeper-cli.cpp +++ b/utils/zookeeper-cli/zookeeper-cli.cpp @@ -69,7 +69,7 @@ int main(int argc, char ** argv) Poco::Logger::root().setChannel(channel); Poco::Logger::root().setLevel("trace"); - zkutil::ZooKeeper zk{zkutil::ZooKeeperArgs(argv[1])}; + auto zk = zkutil::ZooKeeper::createWithoutKillingPreviousSessions(zkutil::ZooKeeperArgs(argv[1])); DB::LineReader lr({}, false, {"\\"}, {}); do @@ -96,7 +96,7 @@ int main(int argc, char ** argv) ss >> w; bool watch = w == "w"; zkutil::EventPtr event = watch ? std::make_shared() : nullptr; - std::vector v = zk.getChildren(path, nullptr, event); + std::vector v = zk->getChildren(path, nullptr, event); for (const auto & child : v) std::cout << child << std::endl; if (watch) @@ -132,15 +132,15 @@ int main(int argc, char ** argv) std::cout << "Bad create mode" << std::endl; continue; } - std::cout << zk.create(path, data, m) << std::endl; + std::cout << zk->create(path, data, m) << std::endl; } else if (cmd == "remove") { - zk.remove(path); + zk->remove(path); } else if (cmd == "rmr") { - zk.removeRecursive(path); + zk->removeRecursive(path); } else if (cmd == "exists") { @@ -149,7 +149,7 @@ int main(int argc, char ** argv) bool watch = w == "w"; zkutil::EventPtr event = watch ? std::make_shared() : nullptr; Coordination::Stat stat; - bool e = zk.exists(path, &stat, event); + bool e = zk->exists(path, &stat, event); if (e) printStat(stat); else @@ -164,7 +164,7 @@ int main(int argc, char ** argv) bool watch = w == "w"; zkutil::EventPtr event = watch ? std::make_shared() : nullptr; Coordination::Stat stat; - std::string data = zk.get(path, &stat, event); + std::string data = zk->get(path, &stat, event); std::cout << "Data: " << data << std::endl; printStat(stat); if (watch) @@ -188,7 +188,7 @@ int main(int argc, char ** argv) DB::readText(version, in); Coordination::Stat stat; - zk.set(path, data, version, &stat); + zk->set(path, data, version, &stat); printStat(stat); } else if (!cmd.empty()) diff --git a/utils/zookeeper-dump-tree/main.cpp b/utils/zookeeper-dump-tree/main.cpp index d85762df640..798eeeaebae 100644 --- a/utils/zookeeper-dump-tree/main.cpp +++ b/utils/zookeeper-dump-tree/main.cpp @@ -33,7 +33,7 @@ int main(int argc, char ** argv) bool dump_ctime = options.count("ctime"); - zkutil::ZooKeeperPtr zookeeper = std::make_shared(options.at("address").as()); + zkutil::ZooKeeperPtr zookeeper = zkutil::ZooKeeper::createWithoutKillingPreviousSessions(options.at("address").as()); std::string initial_path = options.at("path").as(); diff --git a/utils/zookeeper-remove-by-list/main.cpp b/utils/zookeeper-remove-by-list/main.cpp index cf194bd4861..4de300ca98a 100644 --- a/utils/zookeeper-remove-by-list/main.cpp +++ b/utils/zookeeper-remove-by-list/main.cpp @@ -26,7 +26,7 @@ try return 1; } - zkutil::ZooKeeper zookeeper(options.at("address").as()); + auto zookeeper = zkutil::ZooKeeper::createWithoutKillingPreviousSessions(options.at("address").as()); DB::ReadBufferFromFileDescriptor in(STDIN_FILENO); std::list> futures; @@ -37,7 +37,7 @@ try std::string path; DB::readEscapedString(path, in); DB::assertString("\n", in); - futures.emplace_back(zookeeper.asyncRemove(path)); + futures.emplace_back(zookeeper->asyncRemove(path)); std::cerr << "."; } std::cerr << "\n";