diff --git a/.github/workflows/anchore-analysis.yml b/.github/workflows/anchore-analysis.yml new file mode 100644 index 00000000000..01cd0e271c8 --- /dev/null +++ b/.github/workflows/anchore-analysis.yml @@ -0,0 +1,45 @@ +# This workflow checks out code, performs an Anchore container image +# vulnerability and compliance scan, and integrates the results with +# GitHub Advanced Security code scanning feature. For more information on +# the Anchore scan action usage and parameters, see +# https://github.com/anchore/scan-action. For more information on +# Anchore container image scanning in general, see +# https://docs.anchore.com. + +name: Docker Container Scan (clickhouse-server) + +on: + pull_request: + paths: + - docker/server/Dockerfile + - .github/workflows/anchore-analysis.yml + schedule: + - cron: '0 21 * * *' + +jobs: + Anchore-Build-Scan: + runs-on: ubuntu-latest + steps: + - name: Checkout the code + uses: actions/checkout@v2 + - name: Build the Docker image + run: | + cd docker/server + perl -pi -e 's|=\$version||g' Dockerfile + docker build . --file Dockerfile --tag localbuild/testimage:latest + - name: Run the local Anchore scan action itself with GitHub Advanced Security code scanning integration enabled + uses: anchore/scan-action@master + with: + image-reference: "localbuild/testimage:latest" + dockerfile-path: "docker/server/Dockerfile" + acs-report-enable: true + fail-build: true + - name: Upload artifact + uses: actions/upload-artifact@v1.0.0 + with: + name: AnchoreReports + path: ./anchore-reports/ + - name: Upload Anchore Scan Report + uses: github/codeql-action/upload-sarif@v1 + with: + sarif_file: results.sarif diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml new file mode 100644 index 00000000000..bd8e8deef9e --- /dev/null +++ b/.github/workflows/codeql-analysis.yml @@ -0,0 +1,33 @@ +name: "CodeQL Scanning" + +on: + schedule: + - cron: '0 19 * * *' +jobs: + CodeQL-Build: + + runs-on: self-hosted + timeout-minutes: 1440 + + steps: + - name: Checkout repository + uses: actions/checkout@v2 + with: + fetch-depth: 2 + submodules: 'recursive' + + - run: git checkout HEAD^2 + if: ${{ github.event_name == 'pull_request' }} + + - name: Initialize CodeQL + uses: github/codeql-action/init@v1 + + with: + languages: cpp + + - run: sudo apt-get update && sudo apt-get install -y git cmake python ninja-build gcc-9 g++-9 && mkdir build + - run: cd build && CC=gcc-9 CXX=g++-9 cmake .. + - run: cd build && ninja + + - name: Perform CodeQL Analysis + uses: github/codeql-action/analyze@v1 diff --git a/.gitmodules b/.gitmodules index 2fed57a519d..3c1510e265e 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,6 +1,6 @@ [submodule "contrib/poco"] path = contrib/poco - url = https://github.com/ClickHouse-Extras/poco + url = https://github.com/ClickHouse-Extras/poco.git branch = clickhouse [submodule "contrib/zstd"] path = contrib/zstd @@ -157,6 +157,9 @@ [submodule "contrib/openldap"] path = contrib/openldap url = https://github.com/openldap/openldap.git +[submodule "contrib/AMQP-CPP"] + path = contrib/AMQP-CPP + url = https://github.com/CopernicaMarketingSoftware/AMQP-CPP.git [submodule "contrib/cassandra"] path = contrib/cassandra url = https://github.com/ClickHouse-Extras/cpp-driver.git diff --git a/CHANGELOG.md b/CHANGELOG.md index 6a4666f08bb..54f574cc347 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,444 @@ +## ClickHouse release 20.5 + +### ClickHouse release v20.5.2.7-stable 2020-07-02 + +#### Backward Incompatible Change + +* Return non-Nullable result from COUNT(DISTINCT), and `uniq` aggregate functions family. If all passed values are NULL, return zero instead. This improves SQL compatibility. [#11661](https://github.com/ClickHouse/ClickHouse/pull/11661) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Added a check for the case when user-level setting is specified in a wrong place. User-level settings should be specified in `users.xml` inside `` section for specific user profile (or in `` for default settings). The server won't start with exception message in log. This fixes [#9051](https://github.com/ClickHouse/ClickHouse/issues/9051). If you want to skip the check, you can either move settings to the appropriate place or add `1` to config.xml. [#11449](https://github.com/ClickHouse/ClickHouse/pull/11449) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* The setting `input_format_with_names_use_header` is enabled by default. It will affect parsing of input formats `-WithNames` and `-WithNamesAndTypes`. [#10937](https://github.com/ClickHouse/ClickHouse/pull/10937) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Remove `experimental_use_processors` setting. It is enabled by default. [#10924](https://github.com/ClickHouse/ClickHouse/pull/10924) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Update `zstd` to 1.4.4. It has some minor improvements in performance and compression ratio. If you run replicas with different versions of ClickHouse you may see reasonable error messages `Data after merge is not byte-identical to data on another replicas.` with explanation. These messages are Ok and you should not worry. This change is backward compatible but we list it here in changelog in case you will wonder about these messages. [#10663](https://github.com/ClickHouse/ClickHouse/pull/10663) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Added a check for meaningless codecs and a setting `allow_suspicious_codecs` to control this check. This closes [#4966](https://github.com/ClickHouse/ClickHouse/issues/4966). [#10645](https://github.com/ClickHouse/ClickHouse/pull/10645) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Several Kafka setting changes their defaults. See [#11388](https://github.com/ClickHouse/ClickHouse/pull/11388). + +#### New Feature + +* `TTL DELETE WHERE` and `TTL GROUP BY` for automatic data coarsening and rollup in tables. [#10537](https://github.com/ClickHouse/ClickHouse/pull/10537) ([expl0si0nn](https://github.com/expl0si0nn)). +* Implementation of PostgreSQL wire protocol. [#10242](https://github.com/ClickHouse/ClickHouse/pull/10242) ([Movses](https://github.com/MovElb)). +* Added system tables for users, roles, grants, settings profiles, quotas, row policies; added commands SHOW USER, SHOW [CURRENT|ENABLED] ROLES, SHOW SETTINGS PROFILES. [#10387](https://github.com/ClickHouse/ClickHouse/pull/10387) ([Vitaly Baranov](https://github.com/vitlibar)). +* Support writes in ODBC Table function [#10554](https://github.com/ClickHouse/ClickHouse/pull/10554) ([ageraab](https://github.com/ageraab)). [#10901](https://github.com/ClickHouse/ClickHouse/pull/10901) ([tavplubix](https://github.com/tavplubix)). +* Add query performance metrics based on Linux `perf_events` (these metrics are calculated with hardware CPU counters and OS counters). It is optional and requires `CAP_SYS_ADMIN` to be set on clickhouse binary. [#9545](https://github.com/ClickHouse/ClickHouse/pull/9545) [Andrey Skobtsov](https://github.com/And42). [#11226](https://github.com/ClickHouse/ClickHouse/pull/11226) ([Alexander Kuzmenkov](https://github.com/akuzm)). +* Now support `NULL` and `NOT NULL` modifiers for data types in `CREATE` query. [#11057](https://github.com/ClickHouse/ClickHouse/pull/11057) ([Павел Потемкин](https://github.com/Potya)). +* Add `ArrowStream` input and output format. [#11088](https://github.com/ClickHouse/ClickHouse/pull/11088) ([hcz](https://github.com/hczhcz)). +* Support Cassandra as external dictionary source. [#4978](https://github.com/ClickHouse/ClickHouse/pull/4978) ([favstovol](https://github.com/favstovol)). +* Added a new layout `direct` which loads all the data directly from the source for each query, without storing or caching data. [#10622](https://github.com/ClickHouse/ClickHouse/pull/10622) ([Artem Streltsov](https://github.com/kekekekule)). +* Added new `complex_key_direct` layout to dictionaries, that does not store anything locally during query execution. [#10850](https://github.com/ClickHouse/ClickHouse/pull/10850) ([Artem Streltsov](https://github.com/kekekekule)). +* Added support for MySQL style global variables syntax (stub). This is needed for compatibility of MySQL protocol. [#11832](https://github.com/ClickHouse/ClickHouse/pull/11832) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Added syntax highligting to `clickhouse-client` using `replxx`. [#11422](https://github.com/ClickHouse/ClickHouse/pull/11422) ([Tagir Kuskarov](https://github.com/kuskarov)). +* `minMap` and `maxMap` functions were added. [#11603](https://github.com/ClickHouse/ClickHouse/pull/11603) ([Ildus Kurbangaliev](https://github.com/ildus)). +* Add the `system.asynchronous_metric_log` table that logs historical metrics from `system.asynchronous_metrics`. [#11588](https://github.com/ClickHouse/ClickHouse/pull/11588) ([Alexander Kuzmenkov](https://github.com/akuzm)). +* Add functions `extractAllGroupsHorizontal(haystack, re)` and `extractAllGroupsVertical(haystack, re)`. [#11554](https://github.com/ClickHouse/ClickHouse/pull/11554) ([Vasily Nemkov](https://github.com/Enmk)). +* Add SHOW CLUSTER(S) queries. [#11467](https://github.com/ClickHouse/ClickHouse/pull/11467) ([hexiaoting](https://github.com/hexiaoting)). +* Add `netloc` function for extracting network location, similar to `urlparse(url)`, `netloc` in python. [#11356](https://github.com/ClickHouse/ClickHouse/pull/11356) ([Guillaume Tassery](https://github.com/YiuRULE)). +* Add 2 more virtual columns for engine=Kafka to access message headers. [#11283](https://github.com/ClickHouse/ClickHouse/pull/11283) ([filimonov](https://github.com/filimonov)). +* Add `_timestamp_ms` virtual column for Kafka engine (type is `Nullable(DateTime64(3))`). [#11260](https://github.com/ClickHouse/ClickHouse/pull/11260) ([filimonov](https://github.com/filimonov)). +* Add function `randomFixedString`. [#10866](https://github.com/ClickHouse/ClickHouse/pull/10866) ([Andrei Nekrashevich](https://github.com/xolm)). +* Add function `fuzzBits` that randomly flips bits in a string with given probability. [#11237](https://github.com/ClickHouse/ClickHouse/pull/11237) ([Andrei Nekrashevich](https://github.com/xolm)). +* Allow comparison of numbers with constant string in comparison operators, IN and VALUES sections. [#11647](https://github.com/ClickHouse/ClickHouse/pull/11647) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Add `round_robin` load_balancing mode. [#11645](https://github.com/ClickHouse/ClickHouse/pull/11645) ([Azat Khuzhin](https://github.com/azat)). +* Add `cast_keep_nullable` setting. If set `CAST(something_nullable AS Type)` return `Nullable(Type)`. [#11733](https://github.com/ClickHouse/ClickHouse/pull/11733) ([Artem Zuikov](https://github.com/4ertus2)). +* Added column `position` to `system.columns` table and `column_position` to `system.parts_columns` table. It contains ordinal position of a column in a table starting with 1. This closes [#7744](https://github.com/ClickHouse/ClickHouse/issues/7744). [#11655](https://github.com/ClickHouse/ClickHouse/pull/11655) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* ON CLUSTER support for SYSTEM {FLUSH DISTRIBUTED,STOP/START DISTRIBUTED SEND}. [#11415](https://github.com/ClickHouse/ClickHouse/pull/11415) ([Azat Khuzhin](https://github.com/azat)). +* Add system.distribution_queue table. [#11394](https://github.com/ClickHouse/ClickHouse/pull/11394) ([Azat Khuzhin](https://github.com/azat)). +* Support for all format settings in Kafka, expose some setting on table level, adjust the defaults for better performance. [#11388](https://github.com/ClickHouse/ClickHouse/pull/11388) ([filimonov](https://github.com/filimonov)). +* Add `port` function (to extract port from URL). [#11120](https://github.com/ClickHouse/ClickHouse/pull/11120) ([Azat Khuzhin](https://github.com/azat)). +* Now `dictGet*` functions accept table names. [#11050](https://github.com/ClickHouse/ClickHouse/pull/11050) ([Vitaly Baranov](https://github.com/vitlibar)). +* The `clickhouse-format` tool is now able to format multiple queries when the `-n` argument is used. [#10852](https://github.com/ClickHouse/ClickHouse/pull/10852) ([Darío](https://github.com/dgrr)). +* Possibility to configure proxy-resolver for DiskS3. [#10744](https://github.com/ClickHouse/ClickHouse/pull/10744) ([Pavel Kovalenko](https://github.com/Jokser)). +* Make `pointInPolygon` work with non-constant polygon. PointInPolygon now can take Array(Array(Tuple(..., ...))) as second argument, array of polygon and holes. [#10623](https://github.com/ClickHouse/ClickHouse/pull/10623) ([Alexey Ilyukhov](https://github.com/livace)) [#11421](https://github.com/ClickHouse/ClickHouse/pull/11421) ([Alexey Ilyukhov](https://github.com/livace)). +* Added `move_ttl_info` to `system.parts` in order to provide introspection of move TTL functionality. [#10591](https://github.com/ClickHouse/ClickHouse/pull/10591) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Possibility to work with S3 through proxies. [#10576](https://github.com/ClickHouse/ClickHouse/pull/10576) ([Pavel Kovalenko](https://github.com/Jokser)). +* Add `NCHAR` and `NVARCHAR` synonims for data types. [#11025](https://github.com/ClickHouse/ClickHouse/pull/11025) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Resolved [#7224](https://github.com/ClickHouse/ClickHouse/issues/7224): added `FailedQuery`, `FailedSelectQuery` and `FailedInsertQuery` metrics to `system.events` table. [#11151](https://github.com/ClickHouse/ClickHouse/pull/11151) ([Nikita Orlov](https://github.com/naorlov)). +* Add more `jemalloc` statistics to `system.asynchronous_metrics`, and ensure that we see up-to-date values for them. [#11748](https://github.com/ClickHouse/ClickHouse/pull/11748) ([Alexander Kuzmenkov](https://github.com/akuzm)). +* Allow to specify default S3 credentials and custom auth headers. [#11134](https://github.com/ClickHouse/ClickHouse/pull/11134) ([Grigory Pervakov](https://github.com/GrigoryPervakov)). +* Added new functions to import/export DateTime64 as Int64 with various precision: `to-/fromUnixTimestamp64Milli/-Micro/-Nano`. [#10923](https://github.com/ClickHouse/ClickHouse/pull/10923) ([Vasily Nemkov](https://github.com/Enmk)). +* Allow specifying `mongodb://` URI for MongoDB dictionaries. [#10915](https://github.com/ClickHouse/ClickHouse/pull/10915) ([Alexander Kuzmenkov](https://github.com/akuzm)). +* OFFSET keyword can now be used without an affiliated LIMIT clause. [#10802](https://github.com/ClickHouse/ClickHouse/pull/10802) ([Guillaume Tassery](https://github.com/YiuRULE)). +* Added `system.licenses` table. This table contains licenses of third-party libraries that are located in `contrib` directory. This closes [#2890](https://github.com/ClickHouse/ClickHouse/issues/2890). [#10795](https://github.com/ClickHouse/ClickHouse/pull/10795) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* New function function toStartOfSecond(DateTime64) -> DateTime64 that nullifies sub-second part of DateTime64 value. [#10722](https://github.com/ClickHouse/ClickHouse/pull/10722) ([Vasily Nemkov](https://github.com/Enmk)). +* Add new input format `JSONAsString` that accepts a sequence of JSON objects separated by newlines, spaces and/or commas. [#10607](https://github.com/ClickHouse/ClickHouse/pull/10607) ([Kruglov Pavel](https://github.com/Avogar)). +* Allowed to profile memory with finer granularity steps than 4 MiB. Added sampling memory profiler to capture random allocations/deallocations. [#10598](https://github.com/ClickHouse/ClickHouse/pull/10598) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* `SimpleAggregateFunction` now also supports `sumMap`. [#10000](https://github.com/ClickHouse/ClickHouse/pull/10000) ([Ildus Kurbangaliev](https://github.com/ildus)). +* Support `ALTER RENAME COLUMN` for the distributed table engine. Continuation of [#10727](https://github.com/ClickHouse/ClickHouse/issues/10727). Fixes [#10747](https://github.com/ClickHouse/ClickHouse/issues/10747). [#10887](https://github.com/ClickHouse/ClickHouse/pull/10887) ([alesapin](https://github.com/alesapin)). + +#### Bug Fix + +* Fix UBSan report in Decimal parse. This fixes [#7540](https://github.com/ClickHouse/ClickHouse/issues/7540). [#10512](https://github.com/ClickHouse/ClickHouse/pull/10512) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix potential floating point exception when parsing DateTime64. This fixes [#11374](https://github.com/ClickHouse/ClickHouse/issues/11374). [#11875](https://github.com/ClickHouse/ClickHouse/pull/11875) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix rare crash caused by using `Nullable` column in prewhere condition. [#11895](https://github.com/ClickHouse/ClickHouse/pull/11895) [#11608](https://github.com/ClickHouse/ClickHouse/issues/11608) [#11869](https://github.com/ClickHouse/ClickHouse/pull/11869) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Don't allow arrayJoin inside higher order functions. It was leading to broken protocol synchronization. This closes [#3933](https://github.com/ClickHouse/ClickHouse/issues/3933). [#11846](https://github.com/ClickHouse/ClickHouse/pull/11846) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix wrong result of comparison of FixedString with constant String. This fixes [#11393](https://github.com/ClickHouse/ClickHouse/issues/11393). This bug appeared in version 20.4. [#11828](https://github.com/ClickHouse/ClickHouse/pull/11828) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix wrong result for `if` with NULLs in condition. [#11807](https://github.com/ClickHouse/ClickHouse/pull/11807) ([Artem Zuikov](https://github.com/4ertus2)). +* Fix using too many threads for queries. [#11788](https://github.com/ClickHouse/ClickHouse/pull/11788) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fixed `Scalar doesn't exist` exception when using `WITH ...` in `SELECT ... FROM merge_tree_table ...` https://github.com/ClickHouse/ClickHouse/issues/11621. [#11767](https://github.com/ClickHouse/ClickHouse/pull/11767) ([Amos Bird](https://github.com/amosbird)). +* Fix unexpected behaviour of queries like `SELECT *, xyz.*` which were success while an error expected. [#11753](https://github.com/ClickHouse/ClickHouse/pull/11753) ([hexiaoting](https://github.com/hexiaoting)). +* Now replicated fetches will be cancelled during metadata alter. [#11744](https://github.com/ClickHouse/ClickHouse/pull/11744) ([alesapin](https://github.com/alesapin)). +* Parse metadata stored in zookeeper before checking for equality. [#11739](https://github.com/ClickHouse/ClickHouse/pull/11739) ([Azat Khuzhin](https://github.com/azat)). +* Fixed LOGICAL_ERROR caused by wrong type deduction of complex literals in Values input format. [#11732](https://github.com/ClickHouse/ClickHouse/pull/11732) ([tavplubix](https://github.com/tavplubix)). +* Fix `ORDER BY ... WITH FILL` over const columns. [#11697](https://github.com/ClickHouse/ClickHouse/pull/11697) ([Anton Popov](https://github.com/CurtizJ)). +* Fix very rare race condition in SYSTEM SYNC REPLICA. If the replicated table is created and at the same time from the separate connection another client is issuing `SYSTEM SYNC REPLICA` command on that table (this is unlikely, because another client should be aware that the table is created), it's possible to get nullptr dereference. [#11691](https://github.com/ClickHouse/ClickHouse/pull/11691) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Pass proper timeouts when communicating with XDBC bridge. Recently timeouts were not respected when checking bridge liveness and receiving meta info. [#11690](https://github.com/ClickHouse/ClickHouse/pull/11690) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix `LIMIT n WITH TIES` usage together with `ORDER BY` statement, which contains aliases. [#11689](https://github.com/ClickHouse/ClickHouse/pull/11689) ([Anton Popov](https://github.com/CurtizJ)). +* Fix possible `Pipeline stuck` for selects with parallel `FINAL`. Fixes [#11636](https://github.com/ClickHouse/ClickHouse/issues/11636). [#11682](https://github.com/ClickHouse/ClickHouse/pull/11682) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix error which leads to an incorrect state of `system.mutations`. It may show that whole mutation is already done but the server still has `MUTATE_PART` tasks in the replication queue and tries to execute them. This fixes [#11611](https://github.com/ClickHouse/ClickHouse/issues/11611). [#11681](https://github.com/ClickHouse/ClickHouse/pull/11681) ([alesapin](https://github.com/alesapin)). +* Fix syntax hilite in CREATE USER query. [#11664](https://github.com/ClickHouse/ClickHouse/pull/11664) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Add support for regular expressions with case-insensitive flags. This fixes [#11101](https://github.com/ClickHouse/ClickHouse/issues/11101) and fixes [#11506](https://github.com/ClickHouse/ClickHouse/issues/11506). [#11649](https://github.com/ClickHouse/ClickHouse/pull/11649) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Remove trivial count query optimization if row-level security is set. In previous versions the user get total count of records in a table instead filtered. This fixes [#11352](https://github.com/ClickHouse/ClickHouse/issues/11352). [#11644](https://github.com/ClickHouse/ClickHouse/pull/11644) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix bloom filters for String (data skipping indices). [#11638](https://github.com/ClickHouse/ClickHouse/pull/11638) ([Azat Khuzhin](https://github.com/azat)). +* Without `-q` option the database does not get created at startup. [#11604](https://github.com/ClickHouse/ClickHouse/pull/11604) ([giordyb](https://github.com/giordyb)). +* Fix error `Block structure mismatch` for queries with sampling reading from `Buffer` table. [#11602](https://github.com/ClickHouse/ClickHouse/pull/11602) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix wrong exit code of the clickhouse-client, when `exception.code() % 256 == 0`. [#11601](https://github.com/ClickHouse/ClickHouse/pull/11601) ([filimonov](https://github.com/filimonov)). +* Fix race conditions in CREATE/DROP of different replicas of ReplicatedMergeTree. Continue to work if the table was not removed completely from ZooKeeper or not created successfully. This fixes [#11432](https://github.com/ClickHouse/ClickHouse/issues/11432). [#11592](https://github.com/ClickHouse/ClickHouse/pull/11592) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix trivial error in log message about "Mark cache size was lowered" at server startup. This closes [#11399](https://github.com/ClickHouse/ClickHouse/issues/11399). [#11589](https://github.com/ClickHouse/ClickHouse/pull/11589) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix error `Size of offsets doesn't match size of column` for queries with `PREWHERE column in (subquery)` and `ARRAY JOIN`. [#11580](https://github.com/ClickHouse/ClickHouse/pull/11580) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fixed rare segfault in `SHOW CREATE TABLE` Fixes [#11490](https://github.com/ClickHouse/ClickHouse/issues/11490). [#11579](https://github.com/ClickHouse/ClickHouse/pull/11579) ([tavplubix](https://github.com/tavplubix)). +* All queries in HTTP session have had the same query_id. It is fixed. [#11578](https://github.com/ClickHouse/ClickHouse/pull/11578) ([tavplubix](https://github.com/tavplubix)). +* Now clickhouse-server docker container will prefer IPv6 checking server aliveness. [#11550](https://github.com/ClickHouse/ClickHouse/pull/11550) ([Ivan Starkov](https://github.com/istarkov)). +* Fix the error `Data compressed with different methods` that can happen if `min_bytes_to_use_direct_io` is enabled and PREWHERE is active and using SAMPLE or high number of threads. This fixes [#11539](https://github.com/ClickHouse/ClickHouse/issues/11539). [#11540](https://github.com/ClickHouse/ClickHouse/pull/11540) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix shard_num/replica_num for `` (breaks use_compact_format_in_distributed_parts_names). [#11528](https://github.com/ClickHouse/ClickHouse/pull/11528) ([Azat Khuzhin](https://github.com/azat)). +* Fix async INSERT into Distributed for prefer_localhost_replica=0 and w/o internal_replication. [#11527](https://github.com/ClickHouse/ClickHouse/pull/11527) ([Azat Khuzhin](https://github.com/azat)). +* Fix memory leak when exception is thrown in the middle of aggregation with `-State` functions. This fixes [#8995](https://github.com/ClickHouse/ClickHouse/issues/8995). [#11496](https://github.com/ClickHouse/ClickHouse/pull/11496) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix `Pipeline stuck` exception for `INSERT SELECT FINAL` where `SELECT` (`max_threads`>1) has multiple streams but `INSERT` has only one (`max_insert_threads`==0). [#11455](https://github.com/ClickHouse/ClickHouse/pull/11455) ([Azat Khuzhin](https://github.com/azat)). +* Fix wrong result in queries like `select count() from t, u`. [#11454](https://github.com/ClickHouse/ClickHouse/pull/11454) ([Artem Zuikov](https://github.com/4ertus2)). +* Fix return compressed size for codecs. [#11448](https://github.com/ClickHouse/ClickHouse/pull/11448) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix server crash when a column has compression codec with non-literal arguments. Fixes [#11365](https://github.com/ClickHouse/ClickHouse/issues/11365). [#11431](https://github.com/ClickHouse/ClickHouse/pull/11431) ([alesapin](https://github.com/alesapin)). +* Fix potential uninitialized memory read in MergeTree shutdown if table was not created successfully. [#11420](https://github.com/ClickHouse/ClickHouse/pull/11420) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix crash in JOIN over `LowCarinality(T)` and `Nullable(T)`. [#11380](https://github.com/ClickHouse/ClickHouse/issues/11380). [#11414](https://github.com/ClickHouse/ClickHouse/pull/11414) ([Artem Zuikov](https://github.com/4ertus2)). +* Fix error code for wrong `USING` key. [#11373](https://github.com/ClickHouse/ClickHouse/issues/11373). [#11404](https://github.com/ClickHouse/ClickHouse/pull/11404) ([Artem Zuikov](https://github.com/4ertus2)). +* Fixed `geohashesInBox` with arguments outside of latitude/longitude range. [#11403](https://github.com/ClickHouse/ClickHouse/pull/11403) ([Vasily Nemkov](https://github.com/Enmk)). +* Better errors for `joinGet()` functions. [#11389](https://github.com/ClickHouse/ClickHouse/pull/11389) ([Artem Zuikov](https://github.com/4ertus2)). +* Fix possible `Pipeline stuck` error for queries with external sort and limit. Fixes [#11359](https://github.com/ClickHouse/ClickHouse/issues/11359). [#11366](https://github.com/ClickHouse/ClickHouse/pull/11366) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Remove redundant lock during parts send in ReplicatedMergeTree. [#11354](https://github.com/ClickHouse/ClickHouse/pull/11354) ([alesapin](https://github.com/alesapin)). +* Fix support for `\G` (vertical output) in clickhouse-client in multiline mode. This closes [#9933](https://github.com/ClickHouse/ClickHouse/issues/9933). [#11350](https://github.com/ClickHouse/ClickHouse/pull/11350) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix potential segfault when using `Lazy` database. [#11348](https://github.com/ClickHouse/ClickHouse/pull/11348) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix crash in direct selects from `Join` table engine (without JOIN) and wrong nullability. [#11340](https://github.com/ClickHouse/ClickHouse/pull/11340) ([Artem Zuikov](https://github.com/4ertus2)). +* Fix crash in `quantilesExactWeightedArray`. [#11337](https://github.com/ClickHouse/ClickHouse/pull/11337) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Now merges stopped before change metadata in `ALTER` queries. [#11335](https://github.com/ClickHouse/ClickHouse/pull/11335) ([alesapin](https://github.com/alesapin)). +* Make writing to `MATERIALIZED VIEW` with setting `parallel_view_processing = 1` parallel again. Fixes [#10241](https://github.com/ClickHouse/ClickHouse/issues/10241). [#11330](https://github.com/ClickHouse/ClickHouse/pull/11330) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix `visitParamExtractRaw` when extracted JSON has strings with unbalanced { or [. [#11318](https://github.com/ClickHouse/ClickHouse/pull/11318) ([Ewout](https://github.com/devwout)). +* Fix very rare race condition in ThreadPool. [#11314](https://github.com/ClickHouse/ClickHouse/pull/11314) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix insignificant data race in `clickhouse-copier`. Found by integration tests. [#11313](https://github.com/ClickHouse/ClickHouse/pull/11313) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix potential uninitialized memory in conversion. Example: `SELECT toIntervalSecond(now64())`. [#11311](https://github.com/ClickHouse/ClickHouse/pull/11311) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix the issue when index analysis cannot work if a table has Array column in primary key and if a query is filtering by this column with `empty` or `notEmpty` functions. This fixes [#11286](https://github.com/ClickHouse/ClickHouse/issues/11286). [#11303](https://github.com/ClickHouse/ClickHouse/pull/11303) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix bug when query speed estimation can be incorrect and the limit of `min_execution_speed` may not work or work incorrectly if the query is throttled by `max_network_bandwidth`, `max_execution_speed` or `priority` settings. Change the default value of `timeout_before_checking_execution_speed` to non-zero, because otherwise the settings `min_execution_speed` and `max_execution_speed` have no effect. This fixes [#11297](https://github.com/ClickHouse/ClickHouse/issues/11297). This fixes [#5732](https://github.com/ClickHouse/ClickHouse/issues/5732). This fixes [#6228](https://github.com/ClickHouse/ClickHouse/issues/6228). Usability improvement: avoid concatenation of exception message with progress bar in `clickhouse-client`. [#11296](https://github.com/ClickHouse/ClickHouse/pull/11296) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix crash when `SET DEFAULT ROLE` is called with wrong arguments. This fixes https://github.com/ClickHouse/ClickHouse/issues/10586. [#11278](https://github.com/ClickHouse/ClickHouse/pull/11278) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix crash while reading malformed data in `Protobuf` format. This fixes https://github.com/ClickHouse/ClickHouse/issues/5957, fixes https://github.com/ClickHouse/ClickHouse/issues/11203. [#11258](https://github.com/ClickHouse/ClickHouse/pull/11258) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fixed a bug when `cache` dictionary could return default value instead of normal (when there are only expired keys). This affects only string fields. [#11233](https://github.com/ClickHouse/ClickHouse/pull/11233) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Fix error `Block structure mismatch in QueryPipeline` while reading from `VIEW` with constants in inner query. Fixes [#11181](https://github.com/ClickHouse/ClickHouse/issues/11181). [#11205](https://github.com/ClickHouse/ClickHouse/pull/11205) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix possible exception `Invalid status for associated output`. [#11200](https://github.com/ClickHouse/ClickHouse/pull/11200) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Now `primary.idx` will be checked if it's defined in `CREATE` query. [#11199](https://github.com/ClickHouse/ClickHouse/pull/11199) ([alesapin](https://github.com/alesapin)). +* Fix possible error `Cannot capture column` for higher-order functions with `Array(Array(LowCardinality))` captured argument. [#11185](https://github.com/ClickHouse/ClickHouse/pull/11185) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fixed `S3` globbing which could fail in case of more than 1000 keys and some backends. [#11179](https://github.com/ClickHouse/ClickHouse/pull/11179) ([Vladimir Chebotarev](https://github.com/excitoon)). +* If data skipping index is dependent on columns that are going to be modified during background merge (for SummingMergeTree, AggregatingMergeTree as well as for TTL GROUP BY), it was calculated incorrectly. This issue is fixed by moving index calculation after merge so the index is calculated on merged data. [#11162](https://github.com/ClickHouse/ClickHouse/pull/11162) ([Azat Khuzhin](https://github.com/azat)). +* Fix for the hang which was happening sometimes during DROP of table engine=Kafka (or during server restarts). [#11145](https://github.com/ClickHouse/ClickHouse/pull/11145) ([filimonov](https://github.com/filimonov)). +* Fix excessive reserving of threads for simple queries (optimization for reducing the number of threads, which was partly broken after changes in pipeline). [#11114](https://github.com/ClickHouse/ClickHouse/pull/11114) ([Azat Khuzhin](https://github.com/azat)). +* Remove logging from mutation finalization task if nothing was finalized. [#11109](https://github.com/ClickHouse/ClickHouse/pull/11109) ([alesapin](https://github.com/alesapin)). +* Fixed deadlock during server startup after update with changes in structure of system log tables. [#11106](https://github.com/ClickHouse/ClickHouse/pull/11106) ([alesapin](https://github.com/alesapin)). +* Fixed memory leak in registerDiskS3. [#11074](https://github.com/ClickHouse/ClickHouse/pull/11074) ([Pavel Kovalenko](https://github.com/Jokser)). +* Fix error `No such name in Block::erase()` when JOIN appears with PREWHERE or `optimize_move_to_prewhere` makes PREWHERE from WHERE. [#11051](https://github.com/ClickHouse/ClickHouse/pull/11051) ([Artem Zuikov](https://github.com/4ertus2)). +* Fixes the potential missed data during termination of Kafka engine table. [#11048](https://github.com/ClickHouse/ClickHouse/pull/11048) ([filimonov](https://github.com/filimonov)). +* Fixed parseDateTime64BestEffort argument resolution bugs. [#10925](https://github.com/ClickHouse/ClickHouse/issues/10925). [#11038](https://github.com/ClickHouse/ClickHouse/pull/11038) ([Vasily Nemkov](https://github.com/Enmk)). +* Now it's possible to `ADD/DROP` and `RENAME` the same one column in a single `ALTER` query. Exception message for simultaneous `MODIFY` and `RENAME` became more clear. Partially fixes [#10669](https://github.com/ClickHouse/ClickHouse/issues/10669). [#11037](https://github.com/ClickHouse/ClickHouse/pull/11037) ([alesapin](https://github.com/alesapin)). +* Fixed parsing of S3 URLs. [#11036](https://github.com/ClickHouse/ClickHouse/pull/11036) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Fix memory tracking for two-level `GROUP BY` when there is a `LIMIT`. [#11022](https://github.com/ClickHouse/ClickHouse/pull/11022) ([Azat Khuzhin](https://github.com/azat)). +* Fix very rare potential use-after-free error in MergeTree if table was not created successfully. [#10986](https://github.com/ClickHouse/ClickHouse/pull/10986) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix metadata (relative path for rename) and data (relative path for symlink) handling for Atomic database. [#10980](https://github.com/ClickHouse/ClickHouse/pull/10980) ([Azat Khuzhin](https://github.com/azat)). +* Fix server crash on concurrent `ALTER` and `DROP DATABASE` queries with `Atomic` database engine. [#10968](https://github.com/ClickHouse/ClickHouse/pull/10968) ([tavplubix](https://github.com/tavplubix)). +* Fix incorrect raw data size in method getRawData(). [#10964](https://github.com/ClickHouse/ClickHouse/pull/10964) ([Igr](https://github.com/ObjatieGroba)). +* Fix incompatibility of two-level aggregation between versions 20.1 and earlier. This incompatibility happens when different versions of ClickHouse are used on initiator node and remote nodes and the size of GROUP BY result is large and aggregation is performed by a single String field. It leads to several unmerged rows for a single key in result. [#10952](https://github.com/ClickHouse/ClickHouse/pull/10952) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Avoid sending partially written files by the DistributedBlockOutputStream. [#10940](https://github.com/ClickHouse/ClickHouse/pull/10940) ([Azat Khuzhin](https://github.com/azat)). +* Fix crash in `SELECT count(notNullIn(NULL, []))`. [#10920](https://github.com/ClickHouse/ClickHouse/pull/10920) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix for the hang which was happening sometimes during DROP of table engine=Kafka (or during server restarts). [#10910](https://github.com/ClickHouse/ClickHouse/pull/10910) ([filimonov](https://github.com/filimonov)). +* Now it's possible to execute multiple `ALTER RENAME` like `a TO b, c TO a`. [#10895](https://github.com/ClickHouse/ClickHouse/pull/10895) ([alesapin](https://github.com/alesapin)). +* Fix possible race which could happen when you get result from aggregate function state from multiple thread for the same column. The only way (which I found) it can happen is when you use `finalizeAggregation` function while reading from table with `Memory` engine which stores `AggregateFunction` state for `quanite*` function. [#10890](https://github.com/ClickHouse/ClickHouse/pull/10890) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix backward compatibility with tuples in Distributed tables. [#10889](https://github.com/ClickHouse/ClickHouse/pull/10889) ([Anton Popov](https://github.com/CurtizJ)). +* Fix SIGSEGV in StringHashTable (if such key does not exist). [#10870](https://github.com/ClickHouse/ClickHouse/pull/10870) ([Azat Khuzhin](https://github.com/azat)). +* Fixed `WATCH` hangs after `LiveView` table was dropped from database with `Atomic` engine. [#10859](https://github.com/ClickHouse/ClickHouse/pull/10859) ([tavplubix](https://github.com/tavplubix)). +* Fixed bug in `ReplicatedMergeTree` which might cause some `ALTER` on `OPTIMIZE` query to hang waiting for some replica after it become inactive. [#10849](https://github.com/ClickHouse/ClickHouse/pull/10849) ([tavplubix](https://github.com/tavplubix)). +* Now constraints are updated if the column participating in `CONSTRAINT` expression was renamed. Fixes [#10844](https://github.com/ClickHouse/ClickHouse/issues/10844). [#10847](https://github.com/ClickHouse/ClickHouse/pull/10847) ([alesapin](https://github.com/alesapin)). +* Fix potential read of uninitialized memory in cache dictionary. [#10834](https://github.com/ClickHouse/ClickHouse/pull/10834) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix columns order after Block::sortColumns() (also add a test that shows that it affects some real use case - Buffer engine). [#10826](https://github.com/ClickHouse/ClickHouse/pull/10826) ([Azat Khuzhin](https://github.com/azat)). +* Fix the issue with ODBC bridge when no quoting of identifiers is requested. This fixes [#7984](https://github.com/ClickHouse/ClickHouse/issues/7984). [#10821](https://github.com/ClickHouse/ClickHouse/pull/10821) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix UBSan and MSan report in DateLUT. [#10798](https://github.com/ClickHouse/ClickHouse/pull/10798) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Make use of `src_type` for correct type conversion in key conditions. Fixes [#6287](https://github.com/ClickHouse/ClickHouse/issues/6287). [#10791](https://github.com/ClickHouse/ClickHouse/pull/10791) ([Andrew Onyshchuk](https://github.com/oandrew)). +* Get rid of old libunwind patches. https://github.com/ClickHouse-Extras/libunwind/commit/500aa227911bd185a94bfc071d68f4d3b03cb3b1#r39048012 This allows to disable `-fno-omit-frame-pointer` in `clang` builds that improves performance at least by 1% in average. [#10761](https://github.com/ClickHouse/ClickHouse/pull/10761) ([Amos Bird](https://github.com/amosbird)). +* Fix avgWeighted when using floating-point weight over multiple shards. [#10758](https://github.com/ClickHouse/ClickHouse/pull/10758) ([Baudouin Giard](https://github.com/bgiard)). +* Fix `parallel_view_processing` behavior. Now all insertions into `MATERIALIZED VIEW` without exception should be finished if exception happened. Fixes [#10241](https://github.com/ClickHouse/ClickHouse/issues/10241). [#10757](https://github.com/ClickHouse/ClickHouse/pull/10757) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix combinator -OrNull and -OrDefault when combined with -State. [#10741](https://github.com/ClickHouse/ClickHouse/pull/10741) ([hcz](https://github.com/hczhcz)). +* Fix crash in `generateRandom` with nested types. Fixes [#10583](https://github.com/ClickHouse/ClickHouse/issues/10583). [#10734](https://github.com/ClickHouse/ClickHouse/pull/10734) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix data corruption for `LowCardinality(FixedString)` key column in `SummingMergeTree` which could have happened after merge. Fixes [#10489](https://github.com/ClickHouse/ClickHouse/issues/10489). [#10721](https://github.com/ClickHouse/ClickHouse/pull/10721) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix usage of primary key wrapped into a function with 'FINAL' modifier and 'ORDER BY' optimization. [#10715](https://github.com/ClickHouse/ClickHouse/pull/10715) ([Anton Popov](https://github.com/CurtizJ)). +* Fix possible buffer overflow in function `h3EdgeAngle`. [#10711](https://github.com/ClickHouse/ClickHouse/pull/10711) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix disappearing totals. Totals could have being filtered if query had had join or subquery with external where condition. Fixes [#10674](https://github.com/ClickHouse/ClickHouse/issues/10674). [#10698](https://github.com/ClickHouse/ClickHouse/pull/10698) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix atomicity of HTTP insert. This fixes [#9666](https://github.com/ClickHouse/ClickHouse/issues/9666). [#10687](https://github.com/ClickHouse/ClickHouse/pull/10687) ([Andrew Onyshchuk](https://github.com/oandrew)). +* Fix multiple usages of `IN` operator with the identical set in one query. [#10686](https://github.com/ClickHouse/ClickHouse/pull/10686) ([Anton Popov](https://github.com/CurtizJ)). +* Fixed bug, which causes http requests stuck on client close when `readonly=2` and `cancel_http_readonly_queries_on_client_close=1`. Fixes [#7939](https://github.com/ClickHouse/ClickHouse/issues/7939), [#7019](https://github.com/ClickHouse/ClickHouse/issues/7019), [#7736](https://github.com/ClickHouse/ClickHouse/issues/7736), [#7091](https://github.com/ClickHouse/ClickHouse/issues/7091). [#10684](https://github.com/ClickHouse/ClickHouse/pull/10684) ([tavplubix](https://github.com/tavplubix)). +* Fix order of parameters in AggregateTransform constructor. [#10667](https://github.com/ClickHouse/ClickHouse/pull/10667) ([palasonic1](https://github.com/palasonic1)). +* Fix the lack of parallel execution of remote queries with `distributed_aggregation_memory_efficient` enabled. Fixes [#10655](https://github.com/ClickHouse/ClickHouse/issues/10655). [#10664](https://github.com/ClickHouse/ClickHouse/pull/10664) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix possible incorrect number of rows for queries with `LIMIT`. Fixes [#10566](https://github.com/ClickHouse/ClickHouse/issues/10566), [#10709](https://github.com/ClickHouse/ClickHouse/issues/10709). [#10660](https://github.com/ClickHouse/ClickHouse/pull/10660) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix bug which locks concurrent alters when table has a lot of parts. [#10659](https://github.com/ClickHouse/ClickHouse/pull/10659) ([alesapin](https://github.com/alesapin)). +* Fix nullptr dereference in StorageBuffer if server was shutdown before table startup. [#10641](https://github.com/ClickHouse/ClickHouse/pull/10641) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix predicates optimization for distributed queries (`enable_optimize_predicate_expression=1`) for queries with `HAVING` section (i.e. when filtering on the server initiator is required), by preserving the order of expressions (and this is enough to fix), and also force aggregator use column names over indexes. Fixes: [#10613](https://github.com/ClickHouse/ClickHouse/issues/10613), [#11413](https://github.com/ClickHouse/ClickHouse/issues/11413). [#10621](https://github.com/ClickHouse/ClickHouse/pull/10621) ([Azat Khuzhin](https://github.com/azat)). +* Fix optimize_skip_unused_shards with LowCardinality. [#10611](https://github.com/ClickHouse/ClickHouse/pull/10611) ([Azat Khuzhin](https://github.com/azat)). +* Fix segfault in StorageBuffer when exception on server startup. Fixes [#10550](https://github.com/ClickHouse/ClickHouse/issues/10550). [#10609](https://github.com/ClickHouse/ClickHouse/pull/10609) ([tavplubix](https://github.com/tavplubix)). +* On `SYSTEM DROP DNS CACHE` query also drop caches, which are used to check if user is allowed to connect from some IP addresses. [#10608](https://github.com/ClickHouse/ClickHouse/pull/10608) ([tavplubix](https://github.com/tavplubix)). +* Fixed incorrect scalar results inside inner query of `MATERIALIZED VIEW` in case if this query contained dependent table. [#10603](https://github.com/ClickHouse/ClickHouse/pull/10603) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fixed handling condition variable for synchronous mutations. In some cases signals to that condition variable could be lost. [#10588](https://github.com/ClickHouse/ClickHouse/pull/10588) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Fixes possible crash `createDictionary()` is called before `loadStoredObject()` has finished. [#10587](https://github.com/ClickHouse/ClickHouse/pull/10587) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix error `the BloomFilter false positive must be a double number between 0 and 1` [#10551](https://github.com/ClickHouse/ClickHouse/issues/10551). [#10569](https://github.com/ClickHouse/ClickHouse/pull/10569) ([Winter Zhang](https://github.com/zhang2014)). +* Fix SELECT of column ALIAS which default expression type different from column type. [#10563](https://github.com/ClickHouse/ClickHouse/pull/10563) ([Azat Khuzhin](https://github.com/azat)). +* Implemented comparison between DateTime64 and String values (just like for DateTime). [#10560](https://github.com/ClickHouse/ClickHouse/pull/10560) ([Vasily Nemkov](https://github.com/Enmk)). +* Fix index corruption, which may accur in some cases after merge compact parts into another compact part. [#10531](https://github.com/ClickHouse/ClickHouse/pull/10531) ([Anton Popov](https://github.com/CurtizJ)). +* Disable GROUP BY sharding_key optimization by default (`optimize_distributed_group_by_sharding_key` had been introduced and turned of by default, due to trickery of sharding_key analyzing, simple example is `if` in sharding key) and fix it for WITH ROLLUP/CUBE/TOTALS. [#10516](https://github.com/ClickHouse/ClickHouse/pull/10516) ([Azat Khuzhin](https://github.com/azat)). +* Fixes: [#10263](https://github.com/ClickHouse/ClickHouse/issues/10263) (after that PR dist send via INSERT had been postponing on each INSERT) Fixes: [#8756](https://github.com/ClickHouse/ClickHouse/issues/8756) (that PR breaks distributed sends with all of the following conditions met (unlikely setup for now I guess): `internal_replication == false`, multiple local shards (activates the hardlinking code) and `distributed_storage_policy` (makes `link(2)` fails on `EXDEV`)). [#10486](https://github.com/ClickHouse/ClickHouse/pull/10486) ([Azat Khuzhin](https://github.com/azat)). +* Fixed error with "max_rows_to_sort" limit. [#10268](https://github.com/ClickHouse/ClickHouse/pull/10268) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Get dictionary and check access rights only once per each call of any function reading external dictionaries. [#10928](https://github.com/ClickHouse/ClickHouse/pull/10928) ([Vitaly Baranov](https://github.com/vitlibar)). + +#### Improvement + +* Apply `TTL` for old data, after `ALTER MODIFY TTL` query. This behaviour is controlled by setting `materialize_ttl_after_modify`, which is enabled by default. [#11042](https://github.com/ClickHouse/ClickHouse/pull/11042) ([Anton Popov](https://github.com/CurtizJ)). +* When parsing C-style backslash escapes in string literals, VALUES and various text formats (this is an extension to SQL standard that is endemic for ClickHouse and MySQL), keep backslash if unknown escape sequence is found (e.g. `\%` or `\w`) that will make usage of `LIKE` and `match` regular expressions more convenient (it's enough to write `name LIKE 'used\_cars'` instead of `name LIKE 'used\\_cars'`) and more compatible at the same time. This fixes [#10922](https://github.com/ClickHouse/ClickHouse/issues/10922). [#11208](https://github.com/ClickHouse/ClickHouse/pull/11208) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* When reading Decimal value, cut extra digits after point. This behaviour is more compatible with MySQL and PostgreSQL. This fixes [#10202](https://github.com/ClickHouse/ClickHouse/issues/10202). [#11831](https://github.com/ClickHouse/ClickHouse/pull/11831) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Allow to DROP replicated table if the metadata in ZooKeeper was already removed and does not exist (this is also the case when using TestKeeper for testing and the server was restarted). Allow to RENAME replicated table even if there is an error communicating with ZooKeeper. This fixes [#10720](https://github.com/ClickHouse/ClickHouse/issues/10720). [#11652](https://github.com/ClickHouse/ClickHouse/pull/11652) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Slightly improve diagnostic of reading decimal from string. This closes [#10202](https://github.com/ClickHouse/ClickHouse/issues/10202). [#11829](https://github.com/ClickHouse/ClickHouse/pull/11829) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix sleep invocation in signal handler. It was sleeping for less amount of time than expected. [#11825](https://github.com/ClickHouse/ClickHouse/pull/11825) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* (Only Linux) OS related performance metrics (for CPU and I/O) will work even without `CAP_NET_ADMIN` capability. [#10544](https://github.com/ClickHouse/ClickHouse/pull/10544) ([Alexander Kazakov](https://github.com/Akazz)). +* Added `hostname` as an alias to function `hostName`. This feature was suggested by Victor Tarnavskiy from Yandex.Metrica. [#11821](https://github.com/ClickHouse/ClickHouse/pull/11821) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Added support for distributed `DDL` (update/delete/drop partition) on cross replication clusters. [#11703](https://github.com/ClickHouse/ClickHouse/pull/11703) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Emit warning instead of error in server log at startup if we cannot listen one of the listen addresses (e.g. IPv6 is unavailable inside Docker). Note that if server fails to listen all listed addresses, it will refuse to startup as before. This fixes [#4406](https://github.com/ClickHouse/ClickHouse/issues/4406). [#11687](https://github.com/ClickHouse/ClickHouse/pull/11687) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Default user and database creation on docker image starting. [#10637](https://github.com/ClickHouse/ClickHouse/pull/10637) ([Paramtamtam](https://github.com/tarampampam)). +* When multiline query is printed to server log, the lines are joined. Make it to work correct in case of multiline string literals, identifiers and single-line comments. This fixes [#3853](https://github.com/ClickHouse/ClickHouse/issues/3853). [#11686](https://github.com/ClickHouse/ClickHouse/pull/11686) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Multiple names are now allowed in commands: CREATE USER, CREATE ROLE, ALTER USER, SHOW CREATE USER, SHOW GRANTS and so on. [#11670](https://github.com/ClickHouse/ClickHouse/pull/11670) ([Vitaly Baranov](https://github.com/vitlibar)). +* Add support for distributed DDL (`UPDATE/DELETE/DROP PARTITION`) on cross replication clusters. [#11508](https://github.com/ClickHouse/ClickHouse/pull/11508) ([frank lee](https://github.com/etah000)). +* Clear password from command line in `clickhouse-client` and `clickhouse-benchmark` if the user has specified it with explicit value. This prevents password exposure by `ps` and similar tools. [#11665](https://github.com/ClickHouse/ClickHouse/pull/11665) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Don't use debug info from ELF file if it doesn't correspond to the running binary. It is needed to avoid printing wrong function names and source locations in stack traces. This fixes [#7514](https://github.com/ClickHouse/ClickHouse/issues/7514). [#11657](https://github.com/ClickHouse/ClickHouse/pull/11657) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Return NULL/zero when value is not parsed completely in parseDateTimeBestEffortOrNull/Zero functions. This fixes [#7876](https://github.com/ClickHouse/ClickHouse/issues/7876). [#11653](https://github.com/ClickHouse/ClickHouse/pull/11653) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Skip empty parameters in requested URL. They may appear when you write `http://localhost:8123/?&a=b` or `http://localhost:8123/?a=b&&c=d`. This closes [#10749](https://github.com/ClickHouse/ClickHouse/issues/10749). [#11651](https://github.com/ClickHouse/ClickHouse/pull/11651) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Allow using `groupArrayArray` and `groupUniqArrayArray` as `SimpleAggregateFunction`. [#11650](https://github.com/ClickHouse/ClickHouse/pull/11650) ([Volodymyr Kuznetsov](https://github.com/ksvladimir)). +* Allow comparison with constant strings by implicit conversions when analysing index conditions on other types. This may close [#11630](https://github.com/ClickHouse/ClickHouse/issues/11630). [#11648](https://github.com/ClickHouse/ClickHouse/pull/11648) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* https://github.com/ClickHouse/ClickHouse/pull/7572#issuecomment-642815377 Support config default HTTPHandlers. [#11628](https://github.com/ClickHouse/ClickHouse/pull/11628) ([Winter Zhang](https://github.com/zhang2014)). +* Make more input formats to work with Kafka engine. Fix the issue with premature flushes. Fix the performance issue when `kafka_num_consumers` is greater than number of partitions in topic. [#11599](https://github.com/ClickHouse/ClickHouse/pull/11599) ([filimonov](https://github.com/filimonov)). +* Improve `multiple_joins_rewriter_version=2` logic. Fix unknown columns error for lambda aliases. [#11587](https://github.com/ClickHouse/ClickHouse/pull/11587) ([Artem Zuikov](https://github.com/4ertus2)). +* Better exception message when cannot parse columns declaration list. This closes [#10403](https://github.com/ClickHouse/ClickHouse/issues/10403). [#11537](https://github.com/ClickHouse/ClickHouse/pull/11537) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Improve `enable_optimize_predicate_expression=1` logic for VIEW. [#11513](https://github.com/ClickHouse/ClickHouse/pull/11513) ([Artem Zuikov](https://github.com/4ertus2)). +* Adding support for PREWHERE in live view tables. [#11495](https://github.com/ClickHouse/ClickHouse/pull/11495) ([vzakaznikov](https://github.com/vzakaznikov)). +* Automatically update DNS cache, which is used to check if user is allowed to connect from an address. [#11487](https://github.com/ClickHouse/ClickHouse/pull/11487) ([tavplubix](https://github.com/tavplubix)). +* OPTIMIZE FINAL will force merge even if concurrent merges are performed. This closes [#11309](https://github.com/ClickHouse/ClickHouse/issues/11309) and closes [#11322](https://github.com/ClickHouse/ClickHouse/issues/11322). [#11346](https://github.com/ClickHouse/ClickHouse/pull/11346) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Suppress output of cancelled queries in clickhouse-client. In previous versions result may continue to print in terminal even after you press Ctrl+C to cancel query. This closes [#9473](https://github.com/ClickHouse/ClickHouse/issues/9473). [#11342](https://github.com/ClickHouse/ClickHouse/pull/11342) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Now history file is updated after each query and there is no race condition if multiple clients use one history file. This fixes [#9897](https://github.com/ClickHouse/ClickHouse/issues/9897). [#11453](https://github.com/ClickHouse/ClickHouse/pull/11453) ([Tagir Kuskarov](https://github.com/kuskarov)). +* Better log messages in while reloading configuration. [#11341](https://github.com/ClickHouse/ClickHouse/pull/11341) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Remove trailing whitespaces from formatted queries in `clickhouse-client` or `clickhouse-format` in some cases. [#11325](https://github.com/ClickHouse/ClickHouse/pull/11325) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Add setting "output_format_pretty_max_value_width". If value is longer, it will be cut to avoid output of too large values in terminal. This closes [#11140](https://github.com/ClickHouse/ClickHouse/issues/11140). [#11324](https://github.com/ClickHouse/ClickHouse/pull/11324) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Better exception message in case when there is shortage of memory mappings. This closes [#11027](https://github.com/ClickHouse/ClickHouse/issues/11027). [#11316](https://github.com/ClickHouse/ClickHouse/pull/11316) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Support (U)Int8, (U)Int16, Date in ASOF JOIN. [#11301](https://github.com/ClickHouse/ClickHouse/pull/11301) ([Artem Zuikov](https://github.com/4ertus2)). +* Support kafka_client_id parameter for Kafka tables. It also changes the default `client.id` used by ClickHouse when communicating with Kafka to be more verbose and usable. [#11252](https://github.com/ClickHouse/ClickHouse/pull/11252) ([filimonov](https://github.com/filimonov)). +* Keep the value of `DistributedFilesToInsert` metric on exceptions. In previous versions, the value was set when we are going to send some files, but it is zero, if there was an exception and some files are still pending. Now it corresponds to the number of pending files in filesystem. [#11220](https://github.com/ClickHouse/ClickHouse/pull/11220) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Add support for multi-word data type names (such as `DOUBLE PRECISION` and `CHAR VARYING`) for better SQL compatibility. [#11214](https://github.com/ClickHouse/ClickHouse/pull/11214) ([Павел Потемкин](https://github.com/Potya)). +* Provide synonyms for some data types. [#10856](https://github.com/ClickHouse/ClickHouse/pull/10856) ([Павел Потемкин](https://github.com/Potya)). +* The query log is now enabled by default. [#11184](https://github.com/ClickHouse/ClickHouse/pull/11184) ([Ivan Blinkov](https://github.com/blinkov)). +* Show authentication type in table system.users and while executing SHOW CREATE USER query. [#11080](https://github.com/ClickHouse/ClickHouse/pull/11080) ([Vitaly Baranov](https://github.com/vitlibar)). +* Remove data on explicit `DROP DATABASE` for `Memory` database engine. Fixes [#10557](https://github.com/ClickHouse/ClickHouse/issues/10557). [#11021](https://github.com/ClickHouse/ClickHouse/pull/11021) ([tavplubix](https://github.com/tavplubix)). +* Set thread names for internal threads of rdkafka library. Make logs from rdkafka available in server logs. [#10983](https://github.com/ClickHouse/ClickHouse/pull/10983) ([Azat Khuzhin](https://github.com/azat)). +* Support for unicode whitespaces in queries. This helps when queries are copy-pasted from Word or from web page. This fixes [#10896](https://github.com/ClickHouse/ClickHouse/issues/10896). [#10903](https://github.com/ClickHouse/ClickHouse/pull/10903) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Allow large UInt types as the index in function `tupleElement`. [#10874](https://github.com/ClickHouse/ClickHouse/pull/10874) ([hcz](https://github.com/hczhcz)). +* Respect prefer_localhost_replica/load_balancing on INSERT into Distributed. [#10867](https://github.com/ClickHouse/ClickHouse/pull/10867) ([Azat Khuzhin](https://github.com/azat)). +* Introduce `min_insert_block_size_rows_for_materialized_views`, `min_insert_block_size_bytes_for_materialized_views` settings. This settings are similar to `min_insert_block_size_rows` and `min_insert_block_size_bytes`, but applied only for blocks inserted into `MATERIALIZED VIEW`. It helps to control blocks squashing while pushing to MVs and avoid excessive memory usage. [#10858](https://github.com/ClickHouse/ClickHouse/pull/10858) ([Azat Khuzhin](https://github.com/azat)). +* Get rid of exception from replicated queue during server shutdown. Fixes [#10819](https://github.com/ClickHouse/ClickHouse/issues/10819). [#10841](https://github.com/ClickHouse/ClickHouse/pull/10841) ([alesapin](https://github.com/alesapin)). +* Ensure that `varSamp`, `varPop` cannot return negative results due to numerical errors and that `stddevSamp`, `stddevPop` cannot be calculated from negative variance. This fixes [#10532](https://github.com/ClickHouse/ClickHouse/issues/10532). [#10829](https://github.com/ClickHouse/ClickHouse/pull/10829) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Better DNS exception message. This fixes [#10813](https://github.com/ClickHouse/ClickHouse/issues/10813). [#10828](https://github.com/ClickHouse/ClickHouse/pull/10828) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Change HTTP response code in case of some parse errors to 400 Bad Request. This fix [#10636](https://github.com/ClickHouse/ClickHouse/issues/10636). [#10640](https://github.com/ClickHouse/ClickHouse/pull/10640) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Print a message if clickhouse-client is newer than clickhouse-server. [#10627](https://github.com/ClickHouse/ClickHouse/pull/10627) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Adding support for `INSERT INTO [db.]table WATCH` query. [#10498](https://github.com/ClickHouse/ClickHouse/pull/10498) ([vzakaznikov](https://github.com/vzakaznikov)). +* Allow to pass quota_key in clickhouse-client. This closes [#10227](https://github.com/ClickHouse/ClickHouse/issues/10227). [#10270](https://github.com/ClickHouse/ClickHouse/pull/10270) ([alexey-milovidov](https://github.com/alexey-milovidov)). + +#### Performance Improvement + +* Allow multiple replicas to assign merges, mutations, partition drop, move and replace concurrently. This closes [#10367](https://github.com/ClickHouse/ClickHouse/issues/10367). [#11639](https://github.com/ClickHouse/ClickHouse/pull/11639) ([alexey-milovidov](https://github.com/alexey-milovidov)) [#11795](https://github.com/ClickHouse/ClickHouse/pull/11795) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Optimization of GROUP BY with respect to table sorting key, enabled with `optimize_aggregation_in_order` setting. [#9113](https://github.com/ClickHouse/ClickHouse/pull/9113) ([dimarub2000](https://github.com/dimarub2000)). +* Selects with final are executed in parallel. Added setting `max_final_threads` to limit the number of threads used. [#10463](https://github.com/ClickHouse/ClickHouse/pull/10463) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Improve performance for INSERT queries via `INSERT SELECT` or INSERT with clickhouse-client when small blocks are generated (typical case with parallel parsing). This fixes [#11275](https://github.com/ClickHouse/ClickHouse/issues/11275). Fix the issue that CONSTRAINTs were not working for DEFAULT fields. This fixes [#11273](https://github.com/ClickHouse/ClickHouse/issues/11273). Fix the issue that CONSTRAINTS were ignored for TEMPORARY tables. This fixes [#11274](https://github.com/ClickHouse/ClickHouse/issues/11274). [#11276](https://github.com/ClickHouse/ClickHouse/pull/11276) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Optimization that eliminates min/max/any aggregators of GROUP BY keys in SELECT section, enabled with `optimize_aggregators_of_group_by_keys` setting. [#11667](https://github.com/ClickHouse/ClickHouse/pull/11667) ([xPoSx](https://github.com/xPoSx)). [#11806](https://github.com/ClickHouse/ClickHouse/pull/11806) ([Azat Khuzhin](https://github.com/azat)). +* New optimization that takes all operations out of `any` function, enabled with `optimize_move_functions_out_of_any` [#11529](https://github.com/ClickHouse/ClickHouse/pull/11529) ([Ruslan](https://github.com/kamalov-ruslan)). +* Improve performance of `clickhouse-client` in interactive mode when Pretty formats are used. In previous versions, significant amount of time can be spent calculating visible width of UTF-8 string. This closes [#11323](https://github.com/ClickHouse/ClickHouse/issues/11323). [#11323](https://github.com/ClickHouse/ClickHouse/pull/11323) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Improved performance for queries with `ORDER BY` and small `LIMIT` (less, then `max_block_size`). [#11171](https://github.com/ClickHouse/ClickHouse/pull/11171) ([Albert Kidrachev](https://github.com/Provet)). +* Add runtime CPU detection to select and dispatch the best function implementation. Add support for codegeneration for multiple targets. This closes [#1017](https://github.com/ClickHouse/ClickHouse/issues/1017). [#10058](https://github.com/ClickHouse/ClickHouse/pull/10058) ([DimasKovas](https://github.com/DimasKovas)). +* Enable `mlock` of clickhouse binary by default. It will prevent clickhouse executable from being paged out under high IO load. [#11139](https://github.com/ClickHouse/ClickHouse/pull/11139) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Make queries with `sum` aggregate function and without GROUP BY keys to run multiple times faster. [#10992](https://github.com/ClickHouse/ClickHouse/pull/10992) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Improving radix sort (used in `ORDER BY` with simple keys) by removing some redundant data moves. [#10981](https://github.com/ClickHouse/ClickHouse/pull/10981) ([Arslan Gumerov](https://github.com/g-arslan)). +* Sort bigger parts of the left table in MergeJoin. Buffer left blocks in memory. Add `partial_merge_join_left_table_buffer_bytes` setting to manage the left blocks buffers sizes. [#10601](https://github.com/ClickHouse/ClickHouse/pull/10601) ([Artem Zuikov](https://github.com/4ertus2)). +* Remove duplicate ORDER BY and DISTINCT from subqueries, this optimization is enabled with `optimize_duplicate_order_by_and_distinct` [#10067](https://github.com/ClickHouse/ClickHouse/pull/10067) ([Mikhail Malafeev](https://github.com/demo-99)). +* This feature eliminates functions of other keys in GROUP BY section, enabled with `optimize_group_by_function_keys` [#10051](https://github.com/ClickHouse/ClickHouse/pull/10051) ([xPoSx](https://github.com/xPoSx)). +* New optimization that takes arithmetic operations out of aggregate functions, enabled with `optimize_arithmetic_operations_in_aggregate_functions` [#10047](https://github.com/ClickHouse/ClickHouse/pull/10047) ([Ruslan](https://github.com/kamalov-ruslan)). +* Use HTTP client for S3 based on Poco instead of curl. This will improve performance and lower memory usage of s3 storage and table functions. [#11230](https://github.com/ClickHouse/ClickHouse/pull/11230) ([Pavel Kovalenko](https://github.com/Jokser)). +* Fix Kafka performance issue related to reschedules based on limits, which were always applied. [#11149](https://github.com/ClickHouse/ClickHouse/pull/11149) ([filimonov](https://github.com/filimonov)). +* Enable percpu_arena:percpu for jemalloc (This will reduce memory fragmentation due to thread pool). [#11084](https://github.com/ClickHouse/ClickHouse/pull/11084) ([Azat Khuzhin](https://github.com/azat)). +* Optimize memory usage when reading a response from an S3 HTTP client. [#11561](https://github.com/ClickHouse/ClickHouse/pull/11561) ([Pavel Kovalenko](https://github.com/Jokser)). +* Adjust the default Kafka settings for better performance. [#11388](https://github.com/ClickHouse/ClickHouse/pull/11388) ([filimonov](https://github.com/filimonov)). + +#### Experimental Feature + +* Add data type `Point` (Tuple(Float64, Float64)) and `Polygon` (Array(Array(Tuple(Float64, Float64))). [#10678](https://github.com/ClickHouse/ClickHouse/pull/10678) ([Alexey Ilyukhov](https://github.com/livace)). +* Add's a `hasSubstr` function that allows for look for subsequences in arrays. Note: this function is likely to be renamed without further notice. [#11071](https://github.com/ClickHouse/ClickHouse/pull/11071) ([Ryad Zenine](https://github.com/r-zenine)). +* Added OpenCL support and bitonic sort algorithm, which can be used for sorting integer types of data in single column. Needs to be build with flag `-DENABLE_OPENCL=1`. For using bitonic sort algorithm instead of others you need to set `bitonic_sort` for Setting's option `special_sort` and make sure that OpenCL is available. This feature does not improve performance or anything else, it is only provided as an example and for demonstration purposes. It is likely to be removed in near future if there will be no further development in this direction. [#10232](https://github.com/ClickHouse/ClickHouse/pull/10232) ([Ri](https://github.com/margaritiko)). + +#### Build/Testing/Packaging Improvement + +* Enable clang-tidy for programs and utils. [#10991](https://github.com/ClickHouse/ClickHouse/pull/10991) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Remove dependency on `tzdata`: do not fail if `/usr/share/zoneinfo` directory does not exist. Note that all timezones work in ClickHouse even without tzdata installed in system. [#11827](https://github.com/ClickHouse/ClickHouse/pull/11827) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Added MSan and UBSan stress tests. Note that we already have MSan, UBSan for functional tests and "stress" test is another kind of tests. [#10871](https://github.com/ClickHouse/ClickHouse/pull/10871) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Print compiler build id in crash messages. It will make us slightly more certain about what binary has crashed. Added new function `buildId`. [#11824](https://github.com/ClickHouse/ClickHouse/pull/11824) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Added a test to ensure that mutations continue to work after FREEZE query. [#11820](https://github.com/ClickHouse/ClickHouse/pull/11820) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Don't allow tests with "fail" substring in their names because it makes looking at the tests results in browser less convenient when you type Ctrl+F and search for "fail". [#11817](https://github.com/ClickHouse/ClickHouse/pull/11817) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Removes unused imports from HTTPHandlerFactory. [#11660](https://github.com/ClickHouse/ClickHouse/pull/11660) ([Bharat Nallan](https://github.com/bharatnc)). +* Added a random sampling of instances where copier is executed. It is needed to avoid `Too many simultaneous queries` error. Also increased timeout and decreased fault probability. [#11573](https://github.com/ClickHouse/ClickHouse/pull/11573) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Fix missed include. [#11525](https://github.com/ClickHouse/ClickHouse/pull/11525) ([Matwey V. Kornilov](https://github.com/matwey)). +* Speed up build by removing old example programs. Also found some orphan functional test. [#11486](https://github.com/ClickHouse/ClickHouse/pull/11486) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Increase ccache size for builds in CI. [#11450](https://github.com/ClickHouse/ClickHouse/pull/11450) ([alesapin](https://github.com/alesapin)). +* Leave only unit_tests_dbms in deb build. [#11429](https://github.com/ClickHouse/ClickHouse/pull/11429) ([Ilya Yatsishin](https://github.com/qoega)). +* Update librdkafka to version [1.4.2](https://github.com/edenhill/librdkafka/releases/tag/v1.4.2). [#11256](https://github.com/ClickHouse/ClickHouse/pull/11256) ([filimonov](https://github.com/filimonov)). +* Refactor CMake build files. [#11390](https://github.com/ClickHouse/ClickHouse/pull/11390) ([Ivan](https://github.com/abyss7)). +* Fix several flaky integration tests. [#11355](https://github.com/ClickHouse/ClickHouse/pull/11355) ([alesapin](https://github.com/alesapin)). +* Add support for unit tests run with UBSan. [#11345](https://github.com/ClickHouse/ClickHouse/pull/11345) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Remove redundant timeout from integration test `test_insertion_sync_fails_with_timeout`. [#11343](https://github.com/ClickHouse/ClickHouse/pull/11343) ([alesapin](https://github.com/alesapin)). +* Better check for hung queries in clickhouse-test. [#11321](https://github.com/ClickHouse/ClickHouse/pull/11321) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Emit a warning if server was build in debug or with sanitizers. [#11304](https://github.com/ClickHouse/ClickHouse/pull/11304) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Now clickhouse-test check the server aliveness before tests run. [#11285](https://github.com/ClickHouse/ClickHouse/pull/11285) ([alesapin](https://github.com/alesapin)). +* Fix potentially flacky test `00731_long_merge_tree_select_opened_files.sh`. It does not fail frequently but we have discovered potential race condition in this test while experimenting with ThreadFuzzer: [#9814](https://github.com/ClickHouse/ClickHouse/issues/9814) See [link](https://clickhouse-test-reports.s3.yandex.net/9814/40e3023e215df22985d275bf85f4d2290897b76b/functional_stateless_tests_(unbundled).html#fail1) for the example. [#11270](https://github.com/ClickHouse/ClickHouse/pull/11270) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Repeat test in CI if `curl` invocation was timed out. It is possible due to system hangups for 10+ seconds that are typical in our CI infrastructure. This fixes [#11267](https://github.com/ClickHouse/ClickHouse/issues/11267). [#11268](https://github.com/ClickHouse/ClickHouse/pull/11268) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Add a test for Join table engine from @donmikel. This closes [#9158](https://github.com/ClickHouse/ClickHouse/issues/9158). [#11265](https://github.com/ClickHouse/ClickHouse/pull/11265) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix several non significant errors in unit tests. [#11262](https://github.com/ClickHouse/ClickHouse/pull/11262) ([alesapin](https://github.com/alesapin)). +* Now parts of linker command for `cctz` library will not be shuffled with other libraries. [#11213](https://github.com/ClickHouse/ClickHouse/pull/11213) ([alesapin](https://github.com/alesapin)). +* Split /programs/server into actual program and library. [#11186](https://github.com/ClickHouse/ClickHouse/pull/11186) ([Ivan](https://github.com/abyss7)). +* Improve build scripts for protobuf & gRPC. [#11172](https://github.com/ClickHouse/ClickHouse/pull/11172) ([Vitaly Baranov](https://github.com/vitlibar)). +* Enable performance test that was not working. [#11158](https://github.com/ClickHouse/ClickHouse/pull/11158) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Create root S3 bucket for tests before any CH instance is started. [#11142](https://github.com/ClickHouse/ClickHouse/pull/11142) ([Pavel Kovalenko](https://github.com/Jokser)). +* Add performance test for non-constant polygons. [#11141](https://github.com/ClickHouse/ClickHouse/pull/11141) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fixing `00979_live_view_watch_continuous_aggregates` test. [#11024](https://github.com/ClickHouse/ClickHouse/pull/11024) ([vzakaznikov](https://github.com/vzakaznikov)). +* Add ability to run zookeeper in integration tests over tmpfs. [#11002](https://github.com/ClickHouse/ClickHouse/pull/11002) ([alesapin](https://github.com/alesapin)). +* Wait for odbc-bridge with exponential backoff. Previous wait time of 200 ms was not enough in our CI environment. [#10990](https://github.com/ClickHouse/ClickHouse/pull/10990) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix non-deterministic test. [#10989](https://github.com/ClickHouse/ClickHouse/pull/10989) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Added a test for empty external data. [#10926](https://github.com/ClickHouse/ClickHouse/pull/10926) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Database is recreated for every test. This improves separation of tests. [#10902](https://github.com/ClickHouse/ClickHouse/pull/10902) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Added more asserts in columns code. [#10833](https://github.com/ClickHouse/ClickHouse/pull/10833) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Better cooperation with sanitizers. Print information about query_id in the message of sanitizer failure. [#10832](https://github.com/ClickHouse/ClickHouse/pull/10832) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix obvious race condition in "Split build smoke test" check. [#10820](https://github.com/ClickHouse/ClickHouse/pull/10820) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix (false) MSan report in MergeTreeIndexFullText. The issue first appeared in [#9968](https://github.com/ClickHouse/ClickHouse/issues/9968). [#10801](https://github.com/ClickHouse/ClickHouse/pull/10801) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Add MSan suppression for MariaDB Client library. [#10800](https://github.com/ClickHouse/ClickHouse/pull/10800) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* GRPC make couldn't find protobuf files, changed make file by adding the right link. [#10794](https://github.com/ClickHouse/ClickHouse/pull/10794) ([mnkonkova](https://github.com/mnkonkova)). +* Enable extra warnings (`-Weverything`) for base, utils, programs. Note that we already have it for the most of the code. [#10779](https://github.com/ClickHouse/ClickHouse/pull/10779) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Suppressions of warnings from libraries was mistakenly declared as public in [#10396](https://github.com/ClickHouse/ClickHouse/issues/10396). [#10776](https://github.com/ClickHouse/ClickHouse/pull/10776) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Restore a patch that was accidentially deleted in [#10396](https://github.com/ClickHouse/ClickHouse/issues/10396). [#10774](https://github.com/ClickHouse/ClickHouse/pull/10774) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix performance tests errors, part 2. [#10773](https://github.com/ClickHouse/ClickHouse/pull/10773) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix performance test errors. [#10766](https://github.com/ClickHouse/ClickHouse/pull/10766) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Update cross-builds to use clang-10 compiler. [#10724](https://github.com/ClickHouse/ClickHouse/pull/10724) ([Ivan](https://github.com/abyss7)). +* Update instruction to install RPM packages. This was suggested by Denis (TG login @ldviolet) and implemented by Arkady Shejn. [#10707](https://github.com/ClickHouse/ClickHouse/pull/10707) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Trying to fix `tests/queries/0_stateless/01246_insert_into_watch_live_view.py` test. [#10670](https://github.com/ClickHouse/ClickHouse/pull/10670) ([vzakaznikov](https://github.com/vzakaznikov)). +* Fixing and re-enabling 00979_live_view_watch_continuous_aggregates.py test. [#10658](https://github.com/ClickHouse/ClickHouse/pull/10658) ([vzakaznikov](https://github.com/vzakaznikov)). +* Fix OOM in ASan stress test. [#10646](https://github.com/ClickHouse/ClickHouse/pull/10646) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix UBSan report (adding zero to nullptr) in HashTable that appeared after migration to clang-10. [#10638](https://github.com/ClickHouse/ClickHouse/pull/10638) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Remove external call to `ld` (bfd) linker during tzdata processing in compile time. [#10634](https://github.com/ClickHouse/ClickHouse/pull/10634) ([alesapin](https://github.com/alesapin)). +* Allow to use `lld` to link blobs (resources). [#10632](https://github.com/ClickHouse/ClickHouse/pull/10632) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix UBSan report in `LZ4` library. [#10631](https://github.com/ClickHouse/ClickHouse/pull/10631) ([alexey-milovidov](https://github.com/alexey-milovidov)). See also [https://github.com/lz4/lz4/issues/857](https://github.com/lz4/lz4/issues/857) +* Update LZ4 to the latest dev branch. [#10630](https://github.com/ClickHouse/ClickHouse/pull/10630) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Added auto-generated machine-readable file with the list of stable versions. [#10628](https://github.com/ClickHouse/ClickHouse/pull/10628) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix `capnproto` version check for `capnp::UnalignedFlatArrayMessageReader`. [#10618](https://github.com/ClickHouse/ClickHouse/pull/10618) ([Matwey V. Kornilov](https://github.com/matwey)). +* Lower memory usage in tests. [#10617](https://github.com/ClickHouse/ClickHouse/pull/10617) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fixing hard coded timeouts in new live view tests. [#10604](https://github.com/ClickHouse/ClickHouse/pull/10604) ([vzakaznikov](https://github.com/vzakaznikov)). +* Increasing timeout when opening a client in tests/queries/0_stateless/helpers/client.py. [#10599](https://github.com/ClickHouse/ClickHouse/pull/10599) ([vzakaznikov](https://github.com/vzakaznikov)). +* Enable ThinLTO for clang builds, continuation of https://github.com/ClickHouse/ClickHouse/pull/10435. [#10585](https://github.com/ClickHouse/ClickHouse/pull/10585) ([Amos Bird](https://github.com/amosbird)). +* Adding fuzzers and preparing for oss-fuzz integration. [#10546](https://github.com/ClickHouse/ClickHouse/pull/10546) ([kyprizel](https://github.com/kyprizel)). +* Fix FreeBSD build. [#10150](https://github.com/ClickHouse/ClickHouse/pull/10150) ([Ivan](https://github.com/abyss7)). +* Add new build for query tests using pytest framework. [#10039](https://github.com/ClickHouse/ClickHouse/pull/10039) ([Ivan](https://github.com/abyss7)). + + ## ClickHouse release v20.4 +### ClickHouse release v20.4.6.53-stable 2020-06-25 + +#### Bug Fix + +* Fix rare crash caused by using `Nullable` column in prewhere condition. Continuation of [#11608](https://github.com/ClickHouse/ClickHouse/issues/11608). [#11869](https://github.com/ClickHouse/ClickHouse/pull/11869) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Don't allow arrayJoin inside higher order functions. It was leading to broken protocol synchronization. This closes [#3933](https://github.com/ClickHouse/ClickHouse/issues/3933). [#11846](https://github.com/ClickHouse/ClickHouse/pull/11846) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix wrong result of comparison of FixedString with constant String. This fixes [#11393](https://github.com/ClickHouse/ClickHouse/issues/11393). This bug appeared in version 20.4. [#11828](https://github.com/ClickHouse/ClickHouse/pull/11828) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix wrong result for `if()` with NULLs in condition. [#11807](https://github.com/ClickHouse/ClickHouse/pull/11807) ([Artem Zuikov](https://github.com/4ertus2)). +* Fix using too many threads for queries. [#11788](https://github.com/ClickHouse/ClickHouse/pull/11788) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix unexpected behaviour of queries like `SELECT *, xyz.*` which were success while an error expected. [#11753](https://github.com/ClickHouse/ClickHouse/pull/11753) ([hexiaoting](https://github.com/hexiaoting)). +* Now replicated fetches will be cancelled during metadata alter. [#11744](https://github.com/ClickHouse/ClickHouse/pull/11744) ([alesapin](https://github.com/alesapin)). +* Fixed LOGICAL_ERROR caused by wrong type deduction of complex literals in Values input format. [#11732](https://github.com/ClickHouse/ClickHouse/pull/11732) ([tavplubix](https://github.com/tavplubix)). +* Fix `ORDER BY ... WITH FILL` over const columns. [#11697](https://github.com/ClickHouse/ClickHouse/pull/11697) ([Anton Popov](https://github.com/CurtizJ)). +* Pass proper timeouts when communicating with XDBC bridge. Recently timeouts were not respected when checking bridge liveness and receiving meta info. [#11690](https://github.com/ClickHouse/ClickHouse/pull/11690) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix `LIMIT n WITH TIES` usage together with `ORDER BY` statement, which contains aliases. [#11689](https://github.com/ClickHouse/ClickHouse/pull/11689) ([Anton Popov](https://github.com/CurtizJ)). +* Fix error which leads to an incorrect state of `system.mutations`. It may show that whole mutation is already done but the server still has `MUTATE_PART` tasks in the replication queue and tries to execute them. This fixes [#11611](https://github.com/ClickHouse/ClickHouse/issues/11611). [#11681](https://github.com/ClickHouse/ClickHouse/pull/11681) ([alesapin](https://github.com/alesapin)). +* Add support for regular expressions with case-insensitive flags. This fixes [#11101](https://github.com/ClickHouse/ClickHouse/issues/11101) and fixes [#11506](https://github.com/ClickHouse/ClickHouse/issues/11506). [#11649](https://github.com/ClickHouse/ClickHouse/pull/11649) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Remove trivial count query optimization if row-level security is set. In previous versions the user get total count of records in a table instead filtered. This fixes [#11352](https://github.com/ClickHouse/ClickHouse/issues/11352). [#11644](https://github.com/ClickHouse/ClickHouse/pull/11644) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix bloom filters for String (data skipping indices). [#11638](https://github.com/ClickHouse/ClickHouse/pull/11638) ([Azat Khuzhin](https://github.com/azat)). +* Fix rare crash caused by using `Nullable` column in prewhere condition. (Probably it is connected with [#11572](https://github.com/ClickHouse/ClickHouse/issues/11572) somehow). [#11608](https://github.com/ClickHouse/ClickHouse/pull/11608) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix error `Block structure mismatch` for queries with sampling reading from `Buffer` table. [#11602](https://github.com/ClickHouse/ClickHouse/pull/11602) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix wrong exit code of the clickhouse-client, when exception.code() % 256 = 0. [#11601](https://github.com/ClickHouse/ClickHouse/pull/11601) ([filimonov](https://github.com/filimonov)). +* Fix trivial error in log message about "Mark cache size was lowered" at server startup. This closes [#11399](https://github.com/ClickHouse/ClickHouse/issues/11399). [#11589](https://github.com/ClickHouse/ClickHouse/pull/11589) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix error `Size of offsets doesn't match size of column` for queries with `PREWHERE column in (subquery)` and `ARRAY JOIN`. [#11580](https://github.com/ClickHouse/ClickHouse/pull/11580) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fixed rare segfault in `SHOW CREATE TABLE` Fixes [#11490](https://github.com/ClickHouse/ClickHouse/issues/11490). [#11579](https://github.com/ClickHouse/ClickHouse/pull/11579) ([tavplubix](https://github.com/tavplubix)). +* All queries in HTTP session have had the same query_id. It is fixed. [#11578](https://github.com/ClickHouse/ClickHouse/pull/11578) ([tavplubix](https://github.com/tavplubix)). +* Now clickhouse-server docker container will prefer IPv6 checking server aliveness. [#11550](https://github.com/ClickHouse/ClickHouse/pull/11550) ([Ivan Starkov](https://github.com/istarkov)). +* Fix shard_num/replica_num for `` (breaks use_compact_format_in_distributed_parts_names). [#11528](https://github.com/ClickHouse/ClickHouse/pull/11528) ([Azat Khuzhin](https://github.com/azat)). +* Fix race condition which may lead to an exception during table drop. It's a bit tricky and not dangerous at all. If you want an explanation, just notice me in telegram. [#11523](https://github.com/ClickHouse/ClickHouse/pull/11523) ([alesapin](https://github.com/alesapin)). +* Fix memory leak when exception is thrown in the middle of aggregation with -State functions. This fixes [#8995](https://github.com/ClickHouse/ClickHouse/issues/8995). [#11496](https://github.com/ClickHouse/ClickHouse/pull/11496) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* If data skipping index is dependent on columns that are going to be modified during background merge (for SummingMergeTree, AggregatingMergeTree as well as for TTL GROUP BY), it was calculated incorrectly. This issue is fixed by moving index calculation after merge so the index is calculated on merged data. [#11162](https://github.com/ClickHouse/ClickHouse/pull/11162) ([Azat Khuzhin](https://github.com/azat)). +* Get rid of old libunwind patches. https://github.com/ClickHouse-Extras/libunwind/commit/500aa227911bd185a94bfc071d68f4d3b03cb3b1#r39048012 This allows to disable `-fno-omit-frame-pointer` in `clang` builds that improves performance at least by 1% in average. [#10761](https://github.com/ClickHouse/ClickHouse/pull/10761) ([Amos Bird](https://github.com/amosbird)). +* Fix usage of primary key wrapped into a function with 'FINAL' modifier and 'ORDER BY' optimization. [#10715](https://github.com/ClickHouse/ClickHouse/pull/10715) ([Anton Popov](https://github.com/CurtizJ)). + +#### Build/Testing/Packaging Improvement + +* Fix several non significant errors in unit tests. [#11262](https://github.com/ClickHouse/ClickHouse/pull/11262) ([alesapin](https://github.com/alesapin)). +* Fix (false) MSan report in MergeTreeIndexFullText. The issue first appeared in [#9968](https://github.com/ClickHouse/ClickHouse/issues/9968). [#10801](https://github.com/ClickHouse/ClickHouse/pull/10801) ([alexey-milovidov](https://github.com/alexey-milovidov)). + + +### ClickHouse release v20.4.5.36-stable 2020-06-10 + +#### Bug Fix + +* Fix the error `Data compressed with different methods` that can happen if `min_bytes_to_use_direct_io` is enabled and PREWHERE is active and using SAMPLE or high number of threads. This fixes [#11539](https://github.com/ClickHouse/ClickHouse/issues/11539). [#11540](https://github.com/ClickHouse/ClickHouse/pull/11540) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix return compressed size for codecs. [#11448](https://github.com/ClickHouse/ClickHouse/pull/11448) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix server crash when a column has compression codec with non-literal arguments. Fixes [#11365](https://github.com/ClickHouse/ClickHouse/issues/11365). [#11431](https://github.com/ClickHouse/ClickHouse/pull/11431) ([alesapin](https://github.com/alesapin)). +* Fix pointInPolygon with nan as point. Fixes https://github.com/ClickHouse/ClickHouse/issues/11375. [#11421](https://github.com/ClickHouse/ClickHouse/pull/11421) ([Alexey Ilyukhov](https://github.com/livace)). +* Fix potential uninitialized memory read in MergeTree shutdown if table was not created successfully. [#11420](https://github.com/ClickHouse/ClickHouse/pull/11420) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fixed geohashesInBox with arguments outside of latitude/longitude range. [#11403](https://github.com/ClickHouse/ClickHouse/pull/11403) ([Vasily Nemkov](https://github.com/Enmk)). +* Fix possible `Pipeline stuck` error for queries with external sort and limit. Fixes [#11359](https://github.com/ClickHouse/ClickHouse/issues/11359). [#11366](https://github.com/ClickHouse/ClickHouse/pull/11366) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Remove redundant lock during parts send in ReplicatedMergeTree. [#11354](https://github.com/ClickHouse/ClickHouse/pull/11354) ([alesapin](https://github.com/alesapin)). +* Fix support for `\G` (vertical output) in clickhouse-client in multiline mode. This closes [#9933](https://github.com/ClickHouse/ClickHouse/issues/9933). [#11350](https://github.com/ClickHouse/ClickHouse/pull/11350) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix potential segfault when using `Lazy` database. [#11348](https://github.com/ClickHouse/ClickHouse/pull/11348) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix crash in `quantilesExactWeightedArray`. [#11337](https://github.com/ClickHouse/ClickHouse/pull/11337) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Now merges stopped before change metadata in `ALTER` queries. [#11335](https://github.com/ClickHouse/ClickHouse/pull/11335) ([alesapin](https://github.com/alesapin)). +* Make writing to `MATERIALIZED VIEW` with setting `parallel_view_processing = 1` parallel again. Fixes [#10241](https://github.com/ClickHouse/ClickHouse/issues/10241). [#11330](https://github.com/ClickHouse/ClickHouse/pull/11330) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix visitParamExtractRaw when extracted JSON has strings with unbalanced { or [. [#11318](https://github.com/ClickHouse/ClickHouse/pull/11318) ([Ewout](https://github.com/devwout)). +* Fix very rare race condition in ThreadPool. [#11314](https://github.com/ClickHouse/ClickHouse/pull/11314) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix insignificant data race in clickhouse-copier. Found by integration tests. [#11313](https://github.com/ClickHouse/ClickHouse/pull/11313) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix potential uninitialized memory in conversion. Example: `SELECT toIntervalSecond(now64())`. [#11311](https://github.com/ClickHouse/ClickHouse/pull/11311) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix the issue when index analysis cannot work if a table has Array column in primary key and if a query is filtering by this column with `empty` or `notEmpty` functions. This fixes [#11286](https://github.com/ClickHouse/ClickHouse/issues/11286). [#11303](https://github.com/ClickHouse/ClickHouse/pull/11303) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix bug when query speed estimation can be incorrect and the limit of `min_execution_speed` may not work or work incorrectly if the query is throttled by `max_network_bandwidth`, `max_execution_speed` or `priority` settings. Change the default value of `timeout_before_checking_execution_speed` to non-zero, because otherwise the settings `min_execution_speed` and `max_execution_speed` have no effect. This fixes [#11297](https://github.com/ClickHouse/ClickHouse/issues/11297). This fixes [#5732](https://github.com/ClickHouse/ClickHouse/issues/5732). This fixes [#6228](https://github.com/ClickHouse/ClickHouse/issues/6228). Usability improvement: avoid concatenation of exception message with progress bar in `clickhouse-client`. [#11296](https://github.com/ClickHouse/ClickHouse/pull/11296) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix crash when SET DEFAULT ROLE is called with wrong arguments. This fixes https://github.com/ClickHouse/ClickHouse/issues/10586. [#11278](https://github.com/ClickHouse/ClickHouse/pull/11278) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix crash while reading malformed data in Protobuf format. This fixes https://github.com/ClickHouse/ClickHouse/issues/5957, fixes https://github.com/ClickHouse/ClickHouse/issues/11203. [#11258](https://github.com/ClickHouse/ClickHouse/pull/11258) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fixed a bug when cache-dictionary could return default value instead of normal (when there are only expired keys). This affects only string fields. [#11233](https://github.com/ClickHouse/ClickHouse/pull/11233) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Fix error `Block structure mismatch in QueryPipeline` while reading from `VIEW` with constants in inner query. Fixes [#11181](https://github.com/ClickHouse/ClickHouse/issues/11181). [#11205](https://github.com/ClickHouse/ClickHouse/pull/11205) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix possible exception `Invalid status for associated output`. [#11200](https://github.com/ClickHouse/ClickHouse/pull/11200) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix possible error `Cannot capture column` for higher-order functions with `Array(Array(LowCardinality))` captured argument. [#11185](https://github.com/ClickHouse/ClickHouse/pull/11185) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fixed S3 globbing which could fail in case of more than 1000 keys and some backends. [#11179](https://github.com/ClickHouse/ClickHouse/pull/11179) ([Vladimir Chebotarev](https://github.com/excitoon)). +* If data skipping index is dependent on columns that are going to be modified during background merge (for SummingMergeTree, AggregatingMergeTree as well as for TTL GROUP BY), it was calculated incorrectly. This issue is fixed by moving index calculation after merge so the index is calculated on merged data. [#11162](https://github.com/ClickHouse/ClickHouse/pull/11162) ([Azat Khuzhin](https://github.com/azat)). +* Fix Kafka performance issue related to reschedules based on limits, which were always applied. [#11149](https://github.com/ClickHouse/ClickHouse/pull/11149) ([filimonov](https://github.com/filimonov)). +* Fix for the hang which was happening sometimes during DROP of table engine=Kafka (or during server restarts). [#11145](https://github.com/ClickHouse/ClickHouse/pull/11145) ([filimonov](https://github.com/filimonov)). +* Fix excessive reserving of threads for simple queries (optimization for reducing the number of threads, which was partly broken after changes in pipeline). [#11114](https://github.com/ClickHouse/ClickHouse/pull/11114) ([Azat Khuzhin](https://github.com/azat)). +* Fix predicates optimization for distributed queries (`enable_optimize_predicate_expression=1`) for queries with `HAVING` section (i.e. when filtering on the server initiator is required), by preserving the order of expressions (and this is enough to fix), and also force aggregator use column names over indexes. Fixes: [#10613](https://github.com/ClickHouse/ClickHouse/issues/10613), [#11413](https://github.com/ClickHouse/ClickHouse/issues/11413). [#10621](https://github.com/ClickHouse/ClickHouse/pull/10621) ([Azat Khuzhin](https://github.com/azat)). + +#### Build/Testing/Packaging Improvement + +* Fix several flaky integration tests. [#11355](https://github.com/ClickHouse/ClickHouse/pull/11355) ([alesapin](https://github.com/alesapin)). + +### ClickHouse release v20.4.4.18-stable 2020-05-26 + +No changes compared to v20.4.3.16-stable. + ### ClickHouse release v20.4.3.16-stable 2020-05-23 #### Bug Fix @@ -323,6 +762,81 @@ ## ClickHouse release v20.3 +### ClickHouse release v20.3.12.112-lts 2020-06-25 + +#### Bug Fix + +* Fix rare crash caused by using `Nullable` column in prewhere condition. Continuation of [#11608](https://github.com/ClickHouse/ClickHouse/issues/11608). [#11869](https://github.com/ClickHouse/ClickHouse/pull/11869) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Don't allow arrayJoin inside higher order functions. It was leading to broken protocol synchronization. This closes [#3933](https://github.com/ClickHouse/ClickHouse/issues/3933). [#11846](https://github.com/ClickHouse/ClickHouse/pull/11846) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix using too many threads for queries. [#11788](https://github.com/ClickHouse/ClickHouse/pull/11788) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix unexpected behaviour of queries like `SELECT *, xyz.*` which were success while an error expected. [#11753](https://github.com/ClickHouse/ClickHouse/pull/11753) ([hexiaoting](https://github.com/hexiaoting)). +* Now replicated fetches will be cancelled during metadata alter. [#11744](https://github.com/ClickHouse/ClickHouse/pull/11744) ([alesapin](https://github.com/alesapin)). +* Fixed LOGICAL_ERROR caused by wrong type deduction of complex literals in Values input format. [#11732](https://github.com/ClickHouse/ClickHouse/pull/11732) ([tavplubix](https://github.com/tavplubix)). +* Fix `ORDER BY ... WITH FILL` over const columns. [#11697](https://github.com/ClickHouse/ClickHouse/pull/11697) ([Anton Popov](https://github.com/CurtizJ)). +* Pass proper timeouts when communicating with XDBC bridge. Recently timeouts were not respected when checking bridge liveness and receiving meta info. [#11690](https://github.com/ClickHouse/ClickHouse/pull/11690) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix error which leads to an incorrect state of `system.mutations`. It may show that whole mutation is already done but the server still has `MUTATE_PART` tasks in the replication queue and tries to execute them. This fixes [#11611](https://github.com/ClickHouse/ClickHouse/issues/11611). [#11681](https://github.com/ClickHouse/ClickHouse/pull/11681) ([alesapin](https://github.com/alesapin)). +* Add support for regular expressions with case-insensitive flags. This fixes [#11101](https://github.com/ClickHouse/ClickHouse/issues/11101) and fixes [#11506](https://github.com/ClickHouse/ClickHouse/issues/11506). [#11649](https://github.com/ClickHouse/ClickHouse/pull/11649) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Remove trivial count query optimization if row-level security is set. In previous versions the user get total count of records in a table instead filtered. This fixes [#11352](https://github.com/ClickHouse/ClickHouse/issues/11352). [#11644](https://github.com/ClickHouse/ClickHouse/pull/11644) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix bloom filters for String (data skipping indices). [#11638](https://github.com/ClickHouse/ClickHouse/pull/11638) ([Azat Khuzhin](https://github.com/azat)). +* Fix rare crash caused by using `Nullable` column in prewhere condition. (Probably it is connected with [#11572](https://github.com/ClickHouse/ClickHouse/issues/11572) somehow). [#11608](https://github.com/ClickHouse/ClickHouse/pull/11608) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix error `Block structure mismatch` for queries with sampling reading from `Buffer` table. [#11602](https://github.com/ClickHouse/ClickHouse/pull/11602) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix wrong exit code of the clickhouse-client, when exception.code() % 256 = 0. [#11601](https://github.com/ClickHouse/ClickHouse/pull/11601) ([filimonov](https://github.com/filimonov)). +* Fix trivial error in log message about "Mark cache size was lowered" at server startup. This closes [#11399](https://github.com/ClickHouse/ClickHouse/issues/11399). [#11589](https://github.com/ClickHouse/ClickHouse/pull/11589) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix error `Size of offsets doesn't match size of column` for queries with `PREWHERE column in (subquery)` and `ARRAY JOIN`. [#11580](https://github.com/ClickHouse/ClickHouse/pull/11580) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* All queries in HTTP session have had the same query_id. It is fixed. [#11578](https://github.com/ClickHouse/ClickHouse/pull/11578) ([tavplubix](https://github.com/tavplubix)). +* Now clickhouse-server docker container will prefer IPv6 checking server aliveness. [#11550](https://github.com/ClickHouse/ClickHouse/pull/11550) ([Ivan Starkov](https://github.com/istarkov)). +* Fix shard_num/replica_num for `` (breaks use_compact_format_in_distributed_parts_names). [#11528](https://github.com/ClickHouse/ClickHouse/pull/11528) ([Azat Khuzhin](https://github.com/azat)). +* Fix memory leak when exception is thrown in the middle of aggregation with -State functions. This fixes [#8995](https://github.com/ClickHouse/ClickHouse/issues/8995). [#11496](https://github.com/ClickHouse/ClickHouse/pull/11496) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix wrong results of distributed queries when alias could override qualified column name. Fixes [#9672](https://github.com/ClickHouse/ClickHouse/issues/9672) [#9714](https://github.com/ClickHouse/ClickHouse/issues/9714). [#9972](https://github.com/ClickHouse/ClickHouse/pull/9972) ([Artem Zuikov](https://github.com/4ertus2)). + + +### ClickHouse release v20.3.11.97-lts 2020-06-10 + +#### New Feature + +* Now ClickHouse controls timeouts of dictionary sources on its side. Two new settings added to cache dictionary configuration: `strict_max_lifetime_seconds`, which is `max_lifetime` by default and `query_wait_timeout_milliseconds`, which is one minute by default. The first settings is also useful with `allow_read_expired_keys` settings (to forbid reading very expired keys). [#10337](https://github.com/ClickHouse/ClickHouse/pull/10337) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). + +#### Bug Fix + +* Fix the error `Data compressed with different methods` that can happen if `min_bytes_to_use_direct_io` is enabled and PREWHERE is active and using SAMPLE or high number of threads. This fixes [#11539](https://github.com/ClickHouse/ClickHouse/issues/11539). [#11540](https://github.com/ClickHouse/ClickHouse/pull/11540) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix return compressed size for codecs. [#11448](https://github.com/ClickHouse/ClickHouse/pull/11448) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix server crash when a column has compression codec with non-literal arguments. Fixes [#11365](https://github.com/ClickHouse/ClickHouse/issues/11365). [#11431](https://github.com/ClickHouse/ClickHouse/pull/11431) ([alesapin](https://github.com/alesapin)). +* Fix pointInPolygon with nan as point. Fixes https://github.com/ClickHouse/ClickHouse/issues/11375. [#11421](https://github.com/ClickHouse/ClickHouse/pull/11421) ([Alexey Ilyukhov](https://github.com/livace)). +* Fix crash in JOIN over LowCarinality(T) and Nullable(T). [#11380](https://github.com/ClickHouse/ClickHouse/issues/11380). [#11414](https://github.com/ClickHouse/ClickHouse/pull/11414) ([Artem Zuikov](https://github.com/4ertus2)). +* Fix error code for wrong `USING` key. [#11373](https://github.com/ClickHouse/ClickHouse/issues/11373). [#11404](https://github.com/ClickHouse/ClickHouse/pull/11404) ([Artem Zuikov](https://github.com/4ertus2)). +* Fixed geohashesInBox with arguments outside of latitude/longitude range. [#11403](https://github.com/ClickHouse/ClickHouse/pull/11403) ([Vasily Nemkov](https://github.com/Enmk)). +* Better errors for `joinGet()` functions. [#11389](https://github.com/ClickHouse/ClickHouse/pull/11389) ([Artem Zuikov](https://github.com/4ertus2)). +* Fix possible `Pipeline stuck` error for queries with external sort and limit. Fixes [#11359](https://github.com/ClickHouse/ClickHouse/issues/11359). [#11366](https://github.com/ClickHouse/ClickHouse/pull/11366) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Remove redundant lock during parts send in ReplicatedMergeTree. [#11354](https://github.com/ClickHouse/ClickHouse/pull/11354) ([alesapin](https://github.com/alesapin)). +* Fix support for `\G` (vertical output) in clickhouse-client in multiline mode. This closes [#9933](https://github.com/ClickHouse/ClickHouse/issues/9933). [#11350](https://github.com/ClickHouse/ClickHouse/pull/11350) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix crash in direct selects from StorageJoin (without JOIN) and wrong nullability. [#11340](https://github.com/ClickHouse/ClickHouse/pull/11340) ([Artem Zuikov](https://github.com/4ertus2)). +* Fix crash in `quantilesExactWeightedArray`. [#11337](https://github.com/ClickHouse/ClickHouse/pull/11337) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Now merges stopped before change metadata in `ALTER` queries. [#11335](https://github.com/ClickHouse/ClickHouse/pull/11335) ([alesapin](https://github.com/alesapin)). +* Make writing to `MATERIALIZED VIEW` with setting `parallel_view_processing = 1` parallel again. Fixes [#10241](https://github.com/ClickHouse/ClickHouse/issues/10241). [#11330](https://github.com/ClickHouse/ClickHouse/pull/11330) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix visitParamExtractRaw when extracted JSON has strings with unbalanced { or [. [#11318](https://github.com/ClickHouse/ClickHouse/pull/11318) ([Ewout](https://github.com/devwout)). +* Fix very rare race condition in ThreadPool. [#11314](https://github.com/ClickHouse/ClickHouse/pull/11314) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix potential uninitialized memory in conversion. Example: `SELECT toIntervalSecond(now64())`. [#11311](https://github.com/ClickHouse/ClickHouse/pull/11311) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix the issue when index analysis cannot work if a table has Array column in primary key and if a query is filtering by this column with `empty` or `notEmpty` functions. This fixes [#11286](https://github.com/ClickHouse/ClickHouse/issues/11286). [#11303](https://github.com/ClickHouse/ClickHouse/pull/11303) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix bug when query speed estimation can be incorrect and the limit of `min_execution_speed` may not work or work incorrectly if the query is throttled by `max_network_bandwidth`, `max_execution_speed` or `priority` settings. Change the default value of `timeout_before_checking_execution_speed` to non-zero, because otherwise the settings `min_execution_speed` and `max_execution_speed` have no effect. This fixes [#11297](https://github.com/ClickHouse/ClickHouse/issues/11297). This fixes [#5732](https://github.com/ClickHouse/ClickHouse/issues/5732). This fixes [#6228](https://github.com/ClickHouse/ClickHouse/issues/6228). Usability improvement: avoid concatenation of exception message with progress bar in `clickhouse-client`. [#11296](https://github.com/ClickHouse/ClickHouse/pull/11296) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix crash while reading malformed data in Protobuf format. This fixes https://github.com/ClickHouse/ClickHouse/issues/5957, fixes https://github.com/ClickHouse/ClickHouse/issues/11203. [#11258](https://github.com/ClickHouse/ClickHouse/pull/11258) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fixed a bug when cache-dictionary could return default value instead of normal (when there are only expired keys). This affects only string fields. [#11233](https://github.com/ClickHouse/ClickHouse/pull/11233) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Fix error `Block structure mismatch in QueryPipeline` while reading from `VIEW` with constants in inner query. Fixes [#11181](https://github.com/ClickHouse/ClickHouse/issues/11181). [#11205](https://github.com/ClickHouse/ClickHouse/pull/11205) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix possible exception `Invalid status for associated output`. [#11200](https://github.com/ClickHouse/ClickHouse/pull/11200) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix possible error `Cannot capture column` for higher-order functions with `Array(Array(LowCardinality))` captured argument. [#11185](https://github.com/ClickHouse/ClickHouse/pull/11185) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fixed S3 globbing which could fail in case of more than 1000 keys and some backends. [#11179](https://github.com/ClickHouse/ClickHouse/pull/11179) ([Vladimir Chebotarev](https://github.com/excitoon)). +* If data skipping index is dependent on columns that are going to be modified during background merge (for SummingMergeTree, AggregatingMergeTree as well as for TTL GROUP BY), it was calculated incorrectly. This issue is fixed by moving index calculation after merge so the index is calculated on merged data. [#11162](https://github.com/ClickHouse/ClickHouse/pull/11162) ([Azat Khuzhin](https://github.com/azat)). +* Fix excessive reserving of threads for simple queries (optimization for reducing the number of threads, which was partly broken after changes in pipeline). [#11114](https://github.com/ClickHouse/ClickHouse/pull/11114) ([Azat Khuzhin](https://github.com/azat)). +* Fix predicates optimization for distributed queries (`enable_optimize_predicate_expression=1`) for queries with `HAVING` section (i.e. when filtering on the server initiator is required), by preserving the order of expressions (and this is enough to fix), and also force aggregator use column names over indexes. Fixes: [#10613](https://github.com/ClickHouse/ClickHouse/issues/10613), [#11413](https://github.com/ClickHouse/ClickHouse/issues/11413). [#10621](https://github.com/ClickHouse/ClickHouse/pull/10621) ([Azat Khuzhin](https://github.com/azat)). +* Introduce commit retry logic to decrease the possibility of getting duplicates from Kafka in rare cases when offset commit was failed. [#9884](https://github.com/ClickHouse/ClickHouse/pull/9884) ([filimonov](https://github.com/filimonov)). + +#### Performance Improvement + +* Get dictionary and check access rights only once per each call of any function reading external dictionaries. [#10928](https://github.com/ClickHouse/ClickHouse/pull/10928) ([Vitaly Baranov](https://github.com/vitlibar)). + +#### Build/Testing/Packaging Improvement + +* Fix several flaky integration tests. [#11355](https://github.com/ClickHouse/ClickHouse/pull/11355) ([alesapin](https://github.com/alesapin)). + ### ClickHouse release v20.3.10.75-lts 2020-05-23 #### Bug Fix @@ -724,6 +1238,82 @@ ## ClickHouse release v20.1 +### ClickHouse release v20.1.16.120-stable 2020-60-26 + +#### Bug Fix + +* Fix rare crash caused by using `Nullable` column in prewhere condition. Continuation of [#11608](https://github.com/ClickHouse/ClickHouse/issues/11608). [#11869](https://github.com/ClickHouse/ClickHouse/pull/11869) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Don't allow arrayJoin inside higher order functions. It was leading to broken protocol synchronization. This closes [#3933](https://github.com/ClickHouse/ClickHouse/issues/3933). [#11846](https://github.com/ClickHouse/ClickHouse/pull/11846) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix unexpected behaviour of queries like `SELECT *, xyz.*` which were success while an error expected. [#11753](https://github.com/ClickHouse/ClickHouse/pull/11753) ([hexiaoting](https://github.com/hexiaoting)). +* Fixed LOGICAL_ERROR caused by wrong type deduction of complex literals in Values input format. [#11732](https://github.com/ClickHouse/ClickHouse/pull/11732) ([tavplubix](https://github.com/tavplubix)). +* Fix `ORDER BY ... WITH FILL` over const columns. [#11697](https://github.com/ClickHouse/ClickHouse/pull/11697) ([Anton Popov](https://github.com/CurtizJ)). +* Pass proper timeouts when communicating with XDBC bridge. Recently timeouts were not respected when checking bridge liveness and receiving meta info. [#11690](https://github.com/ClickHouse/ClickHouse/pull/11690) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Add support for regular expressions with case-insensitive flags. This fixes [#11101](https://github.com/ClickHouse/ClickHouse/issues/11101) and fixes [#11506](https://github.com/ClickHouse/ClickHouse/issues/11506). [#11649](https://github.com/ClickHouse/ClickHouse/pull/11649) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix bloom filters for String (data skipping indices). [#11638](https://github.com/ClickHouse/ClickHouse/pull/11638) ([Azat Khuzhin](https://github.com/azat)). +* Fix rare crash caused by using `Nullable` column in prewhere condition. (Probably it is connected with [#11572](https://github.com/ClickHouse/ClickHouse/issues/11572) somehow). [#11608](https://github.com/ClickHouse/ClickHouse/pull/11608) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix wrong exit code of the clickhouse-client, when exception.code() % 256 = 0. [#11601](https://github.com/ClickHouse/ClickHouse/pull/11601) ([filimonov](https://github.com/filimonov)). +* Fix trivial error in log message about "Mark cache size was lowered" at server startup. This closes [#11399](https://github.com/ClickHouse/ClickHouse/issues/11399). [#11589](https://github.com/ClickHouse/ClickHouse/pull/11589) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Now clickhouse-server docker container will prefer IPv6 checking server aliveness. [#11550](https://github.com/ClickHouse/ClickHouse/pull/11550) ([Ivan Starkov](https://github.com/istarkov)). +* Fix memory leak when exception is thrown in the middle of aggregation with -State functions. This fixes [#8995](https://github.com/ClickHouse/ClickHouse/issues/8995). [#11496](https://github.com/ClickHouse/ClickHouse/pull/11496) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix usage of primary key wrapped into a function with 'FINAL' modifier and 'ORDER BY' optimization. [#10715](https://github.com/ClickHouse/ClickHouse/pull/10715) ([Anton Popov](https://github.com/CurtizJ)). + + +### ClickHouse release v20.1.15.109-stable 2020-06-19 + +#### Bug Fix + +* Fix excess lock for structure during alter. [#11790](https://github.com/ClickHouse/ClickHouse/pull/11790) ([alesapin](https://github.com/alesapin)). + + +### ClickHouse release v20.1.14.107-stable 2020-06-11 + +#### Bug Fix + +* Fix error `Size of offsets doesn't match size of column` for queries with `PREWHERE column in (subquery)` and `ARRAY JOIN`. [#11580](https://github.com/ClickHouse/ClickHouse/pull/11580) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). + + +### ClickHouse release v20.1.13.105-stable 2020-06-10 + +#### Bug Fix + +* Fix the error `Data compressed with different methods` that can happen if `min_bytes_to_use_direct_io` is enabled and PREWHERE is active and using SAMPLE or high number of threads. This fixes [#11539](https://github.com/ClickHouse/ClickHouse/issues/11539). [#11540](https://github.com/ClickHouse/ClickHouse/pull/11540) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix return compressed size for codecs. [#11448](https://github.com/ClickHouse/ClickHouse/pull/11448) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix server crash when a column has compression codec with non-literal arguments. Fixes [#11365](https://github.com/ClickHouse/ClickHouse/issues/11365). [#11431](https://github.com/ClickHouse/ClickHouse/pull/11431) ([alesapin](https://github.com/alesapin)). +* Fix pointInPolygon with nan as point. Fixes https://github.com/ClickHouse/ClickHouse/issues/11375. [#11421](https://github.com/ClickHouse/ClickHouse/pull/11421) ([Alexey Ilyukhov](https://github.com/livace)). +* Fixed geohashesInBox with arguments outside of latitude/longitude range. [#11403](https://github.com/ClickHouse/ClickHouse/pull/11403) ([Vasily Nemkov](https://github.com/Enmk)). +* Fix possible `Pipeline stuck` error for queries with external sort and limit. Fixes [#11359](https://github.com/ClickHouse/ClickHouse/issues/11359). [#11366](https://github.com/ClickHouse/ClickHouse/pull/11366) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix crash in `quantilesExactWeightedArray`. [#11337](https://github.com/ClickHouse/ClickHouse/pull/11337) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Make writing to `MATERIALIZED VIEW` with setting `parallel_view_processing = 1` parallel again. Fixes [#10241](https://github.com/ClickHouse/ClickHouse/issues/10241). [#11330](https://github.com/ClickHouse/ClickHouse/pull/11330) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix visitParamExtractRaw when extracted JSON has strings with unbalanced { or [. [#11318](https://github.com/ClickHouse/ClickHouse/pull/11318) ([Ewout](https://github.com/devwout)). +* Fix very rare race condition in ThreadPool. [#11314](https://github.com/ClickHouse/ClickHouse/pull/11314) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix potential uninitialized memory in conversion. Example: `SELECT toIntervalSecond(now64())`. [#11311](https://github.com/ClickHouse/ClickHouse/pull/11311) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix the issue when index analysis cannot work if a table has Array column in primary key and if a query is filtering by this column with `empty` or `notEmpty` functions. This fixes [#11286](https://github.com/ClickHouse/ClickHouse/issues/11286). [#11303](https://github.com/ClickHouse/ClickHouse/pull/11303) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix bug when query speed estimation can be incorrect and the limit of `min_execution_speed` may not work or work incorrectly if the query is throttled by `max_network_bandwidth`, `max_execution_speed` or `priority` settings. Change the default value of `timeout_before_checking_execution_speed` to non-zero, because otherwise the settings `min_execution_speed` and `max_execution_speed` have no effect. This fixes [#11297](https://github.com/ClickHouse/ClickHouse/issues/11297). This fixes [#5732](https://github.com/ClickHouse/ClickHouse/issues/5732). This fixes [#6228](https://github.com/ClickHouse/ClickHouse/issues/6228). Usability improvement: avoid concatenation of exception message with progress bar in `clickhouse-client`. [#11296](https://github.com/ClickHouse/ClickHouse/pull/11296) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix crash while reading malformed data in Protobuf format. This fixes https://github.com/ClickHouse/ClickHouse/issues/5957, fixes https://github.com/ClickHouse/ClickHouse/issues/11203. [#11258](https://github.com/ClickHouse/ClickHouse/pull/11258) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix possible error `Cannot capture column` for higher-order functions with `Array(Array(LowCardinality))` captured argument. [#11185](https://github.com/ClickHouse/ClickHouse/pull/11185) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* If data skipping index is dependent on columns that are going to be modified during background merge (for SummingMergeTree, AggregatingMergeTree as well as for TTL GROUP BY), it was calculated incorrectly. This issue is fixed by moving index calculation after merge so the index is calculated on merged data. [#11162](https://github.com/ClickHouse/ClickHouse/pull/11162) ([Azat Khuzhin](https://github.com/azat)). +* Remove logging from mutation finalization task if nothing was finalized. [#11109](https://github.com/ClickHouse/ClickHouse/pull/11109) ([alesapin](https://github.com/alesapin)). +* Fixed parseDateTime64BestEffort argument resolution bugs. [#10925](https://github.com/ClickHouse/ClickHouse/issues/10925). [#11038](https://github.com/ClickHouse/ClickHouse/pull/11038) ([Vasily Nemkov](https://github.com/Enmk)). +* Fix incorrect raw data size in method getRawData(). [#10964](https://github.com/ClickHouse/ClickHouse/pull/10964) ([Igr](https://github.com/ObjatieGroba)). +* Fix backward compatibility with tuples in Distributed tables. [#10889](https://github.com/ClickHouse/ClickHouse/pull/10889) ([Anton Popov](https://github.com/CurtizJ)). +* Fix SIGSEGV in StringHashTable (if such key does not exist). [#10870](https://github.com/ClickHouse/ClickHouse/pull/10870) ([Azat Khuzhin](https://github.com/azat)). +* Fixed bug in `ReplicatedMergeTree` which might cause some `ALTER` on `OPTIMIZE` query to hang waiting for some replica after it become inactive. [#10849](https://github.com/ClickHouse/ClickHouse/pull/10849) ([tavplubix](https://github.com/tavplubix)). +* Fix columns order after Block::sortColumns() (also add a test that shows that it affects some real use case - Buffer engine). [#10826](https://github.com/ClickHouse/ClickHouse/pull/10826) ([Azat Khuzhin](https://github.com/azat)). +* Fix the issue with ODBC bridge when no quoting of identifiers is requested. This fixes [#7984](https://github.com/ClickHouse/ClickHouse/issues/7984). [#10821](https://github.com/ClickHouse/ClickHouse/pull/10821) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix UBSan and MSan report in DateLUT. [#10798](https://github.com/ClickHouse/ClickHouse/pull/10798) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* - Make use of `src_type` for correct type conversion in key conditions. Fixes [#6287](https://github.com/ClickHouse/ClickHouse/issues/6287). [#10791](https://github.com/ClickHouse/ClickHouse/pull/10791) ([Andrew Onyshchuk](https://github.com/oandrew)). +* Fix `parallel_view_processing` behavior. Now all insertions into `MATERIALIZED VIEW` without exception should be finished if exception happened. Fixes [#10241](https://github.com/ClickHouse/ClickHouse/issues/10241). [#10757](https://github.com/ClickHouse/ClickHouse/pull/10757) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix combinator -OrNull and -OrDefault when combined with -State. [#10741](https://github.com/ClickHouse/ClickHouse/pull/10741) ([hcz](https://github.com/hczhcz)). +* Fix disappearing totals. Totals could have being filtered if query had had join or subquery with external where condition. Fixes [#10674](https://github.com/ClickHouse/ClickHouse/issues/10674). [#10698](https://github.com/ClickHouse/ClickHouse/pull/10698) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix multiple usages of `IN` operator with the identical set in one query. [#10686](https://github.com/ClickHouse/ClickHouse/pull/10686) ([Anton Popov](https://github.com/CurtizJ)). +* Fix order of parameters in AggregateTransform constructor. [#10667](https://github.com/ClickHouse/ClickHouse/pull/10667) ([palasonic1](https://github.com/palasonic1)). +* Fix the lack of parallel execution of remote queries with `distributed_aggregation_memory_efficient` enabled. Fixes [#10655](https://github.com/ClickHouse/ClickHouse/issues/10655). [#10664](https://github.com/ClickHouse/ClickHouse/pull/10664) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix predicates optimization for distributed queries (`enable_optimize_predicate_expression=1`) for queries with `HAVING` section (i.e. when filtering on the server initiator is required), by preserving the order of expressions (and this is enough to fix), and also force aggregator use column names over indexes. Fixes: [#10613](https://github.com/ClickHouse/ClickHouse/issues/10613), [#11413](https://github.com/ClickHouse/ClickHouse/issues/11413). [#10621](https://github.com/ClickHouse/ClickHouse/pull/10621) ([Azat Khuzhin](https://github.com/azat)). +* Fix error `the BloomFilter false positive must be a double number between 0 and 1` [#10551](https://github.com/ClickHouse/ClickHouse/issues/10551). [#10569](https://github.com/ClickHouse/ClickHouse/pull/10569) ([Winter Zhang](https://github.com/zhang2014)). +* Fix SELECT of column ALIAS which default expression type different from column type. [#10563](https://github.com/ClickHouse/ClickHouse/pull/10563) ([Azat Khuzhin](https://github.com/azat)). +* * Implemented comparison between DateTime64 and String values (just like for DateTime). [#10560](https://github.com/ClickHouse/ClickHouse/pull/10560) ([Vasily Nemkov](https://github.com/Enmk)). + + ### ClickHouse release v20.1.12.86, 2020-05-26 #### Bug Fix diff --git a/CMakeLists.txt b/CMakeLists.txt index 943bc6412b3..d4d325818e8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -342,6 +342,7 @@ include (cmake/find/sparsehash.cmake) include (cmake/find/re2.cmake) include (cmake/find/libgsasl.cmake) include (cmake/find/rdkafka.cmake) +include (cmake/find/amqpcpp.cmake) include (cmake/find/capnp.cmake) include (cmake/find/llvm.cmake) include (cmake/find/opencl.cmake) @@ -420,3 +421,5 @@ add_subdirectory (tests) add_subdirectory (utils) include (cmake/print_include_directories.cmake) + +include (cmake/sanitize_target_link_libraries.cmake) diff --git a/README.md b/README.md index ef39a163807..ef4e02c5434 100644 --- a/README.md +++ b/README.md @@ -13,3 +13,8 @@ ClickHouse is an open-source column-oriented database management system that all * [Yandex.Messenger channel](https://yandex.ru/chat/#/join/20e380d9-c7be-4123-ab06-e95fb946975e) shares announcements and useful links in Russian. * [Contacts](https://clickhouse.tech/#contacts) can help to get your questions answered if there are any. * You can also [fill this form](https://clickhouse.tech/#meet) to meet Yandex ClickHouse team in person. + +## Upcoming Events + +* [ClickHouse for genetic data (in Russian)](https://cloud.yandex.ru/events/152) on July 14, 2020. +* [ClickHouse virtual office hours](https://www.meetup.com/San-Francisco-Bay-Area-ClickHouse-Meetup/events/271522978/) on July 15, 2020. diff --git a/SECURITY.md b/SECURITY.md index 7210db23183..b95b8b30a3d 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -11,7 +11,10 @@ currently being supported with security updates: | 18.x | :x: | | 19.x | :x: | | 19.14 | :white_check_mark: | -| 20.x | :white_check_mark: | +| 20.1 | :x: | +| 20.3 | :white_check_mark: | +| 20.4 | :white_check_mark: | +| 20.5 | :white_check_mark: | ## Reporting a Vulnerability diff --git a/base/common/CMakeLists.txt b/base/common/CMakeLists.txt index 074f73b158b..f09335f0ca0 100644 --- a/base/common/CMakeLists.txt +++ b/base/common/CMakeLists.txt @@ -77,10 +77,8 @@ target_link_libraries (common Poco::Util Poco::Foundation replxx - fmt - - PRIVATE cctz + fmt ) if (ENABLE_TESTS) diff --git a/base/common/LineReader.h b/base/common/LineReader.h index 6f6e8176c9e..77dc70d8808 100644 --- a/base/common/LineReader.h +++ b/base/common/LineReader.h @@ -48,7 +48,7 @@ protected: }; const String history_file_path; - static constexpr char word_break_characters[] = " \t\v\f\a\b\r\n`~!@#$%^&*()-=+[{]}\\|;:'\",<.>/?_"; + static constexpr char word_break_characters[] = " \t\v\f\a\b\r\n`~!@#$%^&*()-=+[{]}\\|;:'\",<.>/?"; String input; diff --git a/base/common/ya.make b/base/common/ya.make index d40b1f5abfd..b64ab93f2fc 100644 --- a/base/common/ya.make +++ b/base/common/ya.make @@ -2,7 +2,7 @@ LIBRARY() ADDINCL( GLOBAL clickhouse/base - contrib/libs/cctz/include + GLOBAL contrib/libs/cctz/include ) CFLAGS (GLOBAL -DARCADIA_BUILD) diff --git a/base/daemon/BaseDaemon.cpp b/base/daemon/BaseDaemon.cpp index e7ccf84d7da..711bbd0290a 100644 --- a/base/daemon/BaseDaemon.cpp +++ b/base/daemon/BaseDaemon.cpp @@ -628,7 +628,7 @@ void BaseDaemon::initialize(Application & self) /// Create pid file. if (config().has("pid")) - pid.emplace(config().getString("pid")); + pid.emplace(config().getString("pid"), DB::StatusFile::write_pid); /// Change path for logging. if (!log_path.empty()) @@ -812,63 +812,6 @@ void BaseDaemon::defineOptions(Poco::Util::OptionSet & new_options) Poco::Util::ServerApplication::defineOptions(new_options); } -bool isPidRunning(pid_t pid) -{ - return getpgid(pid) >= 0; -} - -BaseDaemon::PID::PID(const std::string & file_) -{ - file = Poco::Path(file_).absolute().toString(); - Poco::File poco_file(file); - - if (poco_file.exists()) - { - pid_t pid_read = 0; - { - std::ifstream in(file); - if (in.good()) - { - in >> pid_read; - if (pid_read && isPidRunning(pid_read)) - throw Poco::Exception("Pid file exists and program running with pid = " + std::to_string(pid_read) + ", should not start daemon."); - } - } - std::cerr << "Old pid file exists (with pid = " << pid_read << "), removing." << std::endl; - poco_file.remove(); - } - - int fd = open(file.c_str(), - O_CREAT | O_EXCL | O_WRONLY, - S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH); - - if (-1 == fd) - { - if (EEXIST == errno) - throw Poco::Exception("Pid file exists, should not start daemon."); - throw Poco::CreateFileException("Cannot create pid file."); - } - - SCOPE_EXIT({ close(fd); }); - - std::stringstream s; - s << getpid(); - if (static_cast(s.str().size()) != write(fd, s.str().c_str(), s.str().size())) - throw Poco::Exception("Cannot write to pid file."); -} - -BaseDaemon::PID::~PID() -{ - try - { - Poco::File(file).remove(); - } - catch (...) - { - DB::tryLogCurrentException(__PRETTY_FUNCTION__); - } -} - void BaseDaemon::handleSignal(int signal_id) { if (signal_id == SIGINT || diff --git a/base/daemon/BaseDaemon.h b/base/daemon/BaseDaemon.h index 2a3262dd26f..41d4ad58869 100644 --- a/base/daemon/BaseDaemon.h +++ b/base/daemon/BaseDaemon.h @@ -22,6 +22,7 @@ #include #include #include +#include #include @@ -163,16 +164,7 @@ protected: std::unique_ptr task_manager; - /// RAII wrapper for pid file. - struct PID - { - std::string file; - - PID(const std::string & file_); - ~PID(); - }; - - std::optional pid; + std::optional pid; std::atomic_bool is_cancelled{false}; diff --git a/base/daemon/CMakeLists.txt b/base/daemon/CMakeLists.txt index 04d2f059b39..26d59a57e7f 100644 --- a/base/daemon/CMakeLists.txt +++ b/base/daemon/CMakeLists.txt @@ -8,6 +8,5 @@ target_include_directories (daemon PUBLIC ..) target_link_libraries (daemon PUBLIC loggers PRIVATE clickhouse_common_io clickhouse_common_config common ${EXECINFO_LIBRARIES}) if (USE_SENTRY) - target_link_libraries (daemon PRIVATE curl) target_link_libraries (daemon PRIVATE ${SENTRY_LIBRARY}) endif () diff --git a/base/ext/chrono_io.h b/base/ext/chrono_io.h index 392ec25d526..0b1c47d3874 100644 --- a/base/ext/chrono_io.h +++ b/base/ext/chrono_io.h @@ -1,19 +1,16 @@ #pragma once #include -#include #include -#include #include +#include namespace ext { inline std::string to_string(const std::time_t & time) { - std::stringstream ss; - ss << std::put_time(std::localtime(&time), "%Y-%m-%d %X"); - return ss.str(); + return cctz::format("%Y-%m-%d %H:%M:%S", std::chrono::system_clock::from_time_t(time), cctz::local_time_zone()); } template diff --git a/cmake/find/amqpcpp.cmake b/cmake/find/amqpcpp.cmake new file mode 100644 index 00000000000..147824ff395 --- /dev/null +++ b/cmake/find/amqpcpp.cmake @@ -0,0 +1,20 @@ +SET(ENABLE_AMQPCPP 1) +if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/AMQP-CPP/CMakeLists.txt") + message (WARNING "submodule contrib/AMQP-CPP is missing. to fix try run: \n git submodule update --init --recursive") + set (ENABLE_AMQPCPP 0) +endif () + +if (ENABLE_AMQPCPP) + + set (USE_AMQPCPP 1) + set (AMQPCPP_LIBRARY AMQP-CPP) + + set (AMQPCPP_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/AMQP-CPP/include") + + list (APPEND AMQPCPP_INCLUDE_DIR + "${ClickHouse_SOURCE_DIR}/contrib/AMQP-CPP/include" + "${ClickHouse_SOURCE_DIR}/contrib/AMQP-CPP") + +endif() + +message (STATUS "Using AMQP-CPP=${USE_AMQPCPP}: ${AMQPCPP_INCLUDE_DIR} : ${AMQPCPP_LIBRARY}") diff --git a/cmake/find/sentry.cmake b/cmake/find/sentry.cmake index eadf071141e..84425220f12 100644 --- a/cmake/find/sentry.cmake +++ b/cmake/find/sentry.cmake @@ -6,9 +6,7 @@ if (NOT EXISTS "${SENTRY_INCLUDE_DIR}/sentry.h") endif () if (NOT OS_FREEBSD AND NOT SPLIT_SHARED_LIBRARIES AND NOT_UNBUNDLED AND NOT (OS_DARWIN AND COMPILER_CLANG)) - option (USE_SENTRY "Use Sentry" ON) - set (CURL_LIBRARY ${ClickHouse_SOURCE_DIR}/contrib/curl/lib) - set (CURL_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/curl/include) + option (USE_SENTRY "Use Sentry" ${ENABLE_LIBRARIES}) set (SENTRY_TRANSPORT "curl" CACHE STRING "") set (SENTRY_BACKEND "none" CACHE STRING "") set (SENTRY_EXPORT_SYMBOLS OFF CACHE BOOL "") diff --git a/cmake/sanitize_target_link_libraries.cmake b/cmake/sanitize_target_link_libraries.cmake new file mode 100644 index 00000000000..d66ea338a52 --- /dev/null +++ b/cmake/sanitize_target_link_libraries.cmake @@ -0,0 +1,56 @@ +# When you will try to link target with the directory (that exists), cmake will +# skip this without an error, only the following warning will be reported: +# +# target_link_libraries(main /tmp) +# +# WARNING: Target "main" requests linking to directory "/tmp". Targets may link only to libraries. CMake is dropping the item. +# +# And there is no cmake policy that controls this. +# (I guess the reason that it is allowed is because of FRAMEWORK for OSX). +# +# So to avoid error-prone cmake rules, this can be sanitized. +# There are the following ways: +# - overwrite target_link_libraries()/link_libraries() and check *before* +# calling real macro, but this requires duplicate all supported syntax +# -- too complex +# - overwrite target_link_libraries() and check LINK_LIBRARIES property, this +# works great +# -- but cannot be used with link_libraries() +# - use BUILDSYSTEM_TARGETS property to get list of all targets and sanitize +# -- this will work. + +# https://stackoverflow.com/a/62311397/328260 +function (get_all_targets var) + set (targets) + get_all_targets_recursive (targets ${CMAKE_CURRENT_SOURCE_DIR}) + set (${var} ${targets} PARENT_SCOPE) +endfunction() +macro (get_all_targets_recursive targets dir) + get_property (subdirectories DIRECTORY ${dir} PROPERTY SUBDIRECTORIES) + foreach (subdir ${subdirectories}) + get_all_targets_recursive (${targets} ${subdir}) + endforeach () + get_property (current_targets DIRECTORY ${dir} PROPERTY BUILDSYSTEM_TARGETS) + list (APPEND ${targets} ${current_targets}) +endmacro () + +macro (sanitize_link_libraries target) + get_target_property(target_type ${target} TYPE) + if (${target_type} STREQUAL "INTERFACE_LIBRARY") + get_property(linked_libraries TARGET ${target} PROPERTY INTERFACE_LINK_LIBRARIES) + else() + get_property(linked_libraries TARGET ${target} PROPERTY LINK_LIBRARIES) + endif() + foreach (linked_library ${linked_libraries}) + if (TARGET ${linked_library}) + # just in case, skip if TARGET + elseif (IS_DIRECTORY ${linked_library}) + message(FATAL_ERROR "${target} requested to link with directory: ${linked_library}") + endif() + endforeach() +endmacro() + +get_all_targets (all_targets) +foreach (target ${all_targets}) + sanitize_link_libraries(${target}) +endforeach() diff --git a/contrib/AMQP-CPP b/contrib/AMQP-CPP new file mode 160000 index 00000000000..1c08399ab0a --- /dev/null +++ b/contrib/AMQP-CPP @@ -0,0 +1 @@ +Subproject commit 1c08399ab0ab9e4042ef8e2bbe9e208e5dcbc13b diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index f2222797bff..e0cb8ad760a 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -106,6 +106,12 @@ if (ENABLE_LDAP AND USE_INTERNAL_LDAP_LIBRARY) add_subdirectory (openldap-cmake) endif () +# Should go before: +# - mariadb-connector-c +# - aws-s3-cmake +# - sentry-native +add_subdirectory (curl-cmake) + function(mysql_support) set(CLIENT_PLUGIN_CACHING_SHA2_PASSWORD STATIC) set(CLIENT_PLUGIN_SHA256_PASSWORD STATIC) @@ -263,23 +269,6 @@ if (USE_INTERNAL_GRPC_LIBRARY) add_subdirectory(grpc-cmake) endif () -if (USE_INTERNAL_AWS_S3_LIBRARY OR USE_SENTRY) - set (save_CMAKE_C_FLAGS ${CMAKE_C_FLAGS}) - set (save_CMAKE_REQUIRED_LIBRARIES ${CMAKE_REQUIRED_LIBRARIES}) - set (save_CMAKE_REQUIRED_INCLUDES ${CMAKE_REQUIRED_INCLUDES}) - set (save_CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS}) - set (save_CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH}) - add_subdirectory(curl-cmake) - set (CMAKE_C_FLAGS ${save_CMAKE_C_FLAGS}) - set (CMAKE_REQUIRED_LIBRARIES ${save_CMAKE_REQUIRED_LIBRARIES}) - set (CMAKE_CMAKE_REQUIRED_INCLUDES ${save_CMAKE_REQUIRED_INCLUDES}) - set (CMAKE_REQUIRED_FLAGS ${save_CMAKE_REQUIRED_FLAGS}) - set (CMAKE_CMAKE_MODULE_PATH ${save_CMAKE_MODULE_PATH}) - - # The library is large - avoid bloat. - target_compile_options (curl PRIVATE -g0) -endif () - if (USE_INTERNAL_AWS_S3_LIBRARY) add_subdirectory(aws-s3-cmake) @@ -301,6 +290,10 @@ if (USE_FASTOPS) add_subdirectory (fastops-cmake) endif() +if (USE_AMQPCPP) + add_subdirectory (amqpcpp-cmake) +endif() + if (USE_CASSANDRA) add_subdirectory (libuv) add_subdirectory (cassandra) diff --git a/contrib/FastMemcpy/FastMemcpy.h b/contrib/FastMemcpy/FastMemcpy.h index dd89a55dbe9..5dcbfcf1656 100644 --- a/contrib/FastMemcpy/FastMemcpy.h +++ b/contrib/FastMemcpy/FastMemcpy.h @@ -86,7 +86,10 @@ static INLINE void memcpy_sse2_128(void *dst, const void *src) { //--------------------------------------------------------------------- // tiny memory copy with jump table optimized //--------------------------------------------------------------------- -static INLINE void *memcpy_tiny(void *dst, const void *src, size_t size) { +/// Attribute is used to avoid an error with undefined behaviour sanitizer +/// ../contrib/FastMemcpy/FastMemcpy.h:91:56: runtime error: applying zero offset to null pointer +/// Found by 01307_orc_output_format.sh, cause - ORCBlockInputFormat and external ORC library. +__attribute__((__no_sanitize__("undefined"))) static INLINE void *memcpy_tiny(void *dst, const void *src, size_t size) { unsigned char *dd = ((unsigned char*)dst) + size; const unsigned char *ss = ((const unsigned char*)src) + size; diff --git a/contrib/amqpcpp-cmake/CMakeLists.txt b/contrib/amqpcpp-cmake/CMakeLists.txt new file mode 100644 index 00000000000..452a5f7f6aa --- /dev/null +++ b/contrib/amqpcpp-cmake/CMakeLists.txt @@ -0,0 +1,44 @@ +set (LIBRARY_DIR ${ClickHouse_SOURCE_DIR}/contrib/AMQP-CPP) + +set (SRCS + ${LIBRARY_DIR}/src/array.cpp + ${LIBRARY_DIR}/src/channel.cpp + ${LIBRARY_DIR}/src/channelimpl.cpp + ${LIBRARY_DIR}/src/connectionimpl.cpp + ${LIBRARY_DIR}/src/deferredcancel.cpp + ${LIBRARY_DIR}/src/deferredconfirm.cpp + ${LIBRARY_DIR}/src/deferredconsumer.cpp + ${LIBRARY_DIR}/src/deferredextreceiver.cpp + ${LIBRARY_DIR}/src/deferredget.cpp + ${LIBRARY_DIR}/src/deferredpublisher.cpp + ${LIBRARY_DIR}/src/deferredreceiver.cpp + ${LIBRARY_DIR}/src/field.cpp + ${LIBRARY_DIR}/src/flags.cpp + ${LIBRARY_DIR}/src/linux_tcp/openssl.cpp + ${LIBRARY_DIR}/src/linux_tcp/tcpconnection.cpp + ${LIBRARY_DIR}/src/receivedframe.cpp + ${LIBRARY_DIR}/src/table.cpp + ${LIBRARY_DIR}/src/watchable.cpp +) + +add_library(amqp-cpp ${SRCS}) + +target_compile_options (amqp-cpp + PUBLIC + -Wno-old-style-cast + -Wno-inconsistent-missing-destructor-override + -Wno-deprecated + -Wno-unused-parameter + -Wno-shadow + -Wno-tautological-type-limit-compare + -Wno-extra-semi +# NOTE: disable all warnings at last because the warning: + # "conversion function converting 'XXX' to itself will never be used" + # doesn't have it's own diagnostic flag yet. + -w +) + +target_include_directories (amqp-cpp PUBLIC ${LIBRARY_DIR}/include) + +target_link_libraries (amqp-cpp PUBLIC ssl) + diff --git a/contrib/curl-cmake/CMakeLists.txt b/contrib/curl-cmake/CMakeLists.txt index d0f6a7773b0..3c3226cae9e 100644 --- a/contrib/curl-cmake/CMakeLists.txt +++ b/contrib/curl-cmake/CMakeLists.txt @@ -1,152 +1,187 @@ -set (CURL_DIR ${ClickHouse_SOURCE_DIR}/contrib/curl) -set (CURL_LIBRARY ${ClickHouse_SOURCE_DIR}/contrib/curl/lib) -set (CURL_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/curl/include) +option (ENABLE_CURL "Enable curl" ${ENABLE_LIBRARIES}) -set (SRCS - ${CURL_DIR}/lib/file.c - ${CURL_DIR}/lib/timeval.c - ${CURL_DIR}/lib/base64.c - ${CURL_DIR}/lib/hostip.c - ${CURL_DIR}/lib/progress.c - ${CURL_DIR}/lib/formdata.c - ${CURL_DIR}/lib/cookie.c - ${CURL_DIR}/lib/http.c - ${CURL_DIR}/lib/sendf.c - ${CURL_DIR}/lib/url.c - ${CURL_DIR}/lib/dict.c - ${CURL_DIR}/lib/if2ip.c - ${CURL_DIR}/lib/speedcheck.c - ${CURL_DIR}/lib/ldap.c - ${CURL_DIR}/lib/version.c - ${CURL_DIR}/lib/getenv.c - ${CURL_DIR}/lib/escape.c - ${CURL_DIR}/lib/mprintf.c - ${CURL_DIR}/lib/telnet.c - ${CURL_DIR}/lib/netrc.c - ${CURL_DIR}/lib/getinfo.c - ${CURL_DIR}/lib/transfer.c - ${CURL_DIR}/lib/strcase.c - ${CURL_DIR}/lib/easy.c - ${CURL_DIR}/lib/security.c - ${CURL_DIR}/lib/curl_fnmatch.c - ${CURL_DIR}/lib/fileinfo.c - ${CURL_DIR}/lib/wildcard.c - ${CURL_DIR}/lib/krb5.c - ${CURL_DIR}/lib/memdebug.c - ${CURL_DIR}/lib/http_chunks.c - ${CURL_DIR}/lib/strtok.c - ${CURL_DIR}/lib/connect.c - ${CURL_DIR}/lib/llist.c - ${CURL_DIR}/lib/hash.c - ${CURL_DIR}/lib/multi.c - ${CURL_DIR}/lib/content_encoding.c - ${CURL_DIR}/lib/share.c - ${CURL_DIR}/lib/http_digest.c - ${CURL_DIR}/lib/md4.c - ${CURL_DIR}/lib/md5.c - ${CURL_DIR}/lib/http_negotiate.c - ${CURL_DIR}/lib/inet_pton.c - ${CURL_DIR}/lib/strtoofft.c - ${CURL_DIR}/lib/strerror.c - ${CURL_DIR}/lib/amigaos.c - ${CURL_DIR}/lib/hostasyn.c - ${CURL_DIR}/lib/hostip4.c - ${CURL_DIR}/lib/hostip6.c - ${CURL_DIR}/lib/hostsyn.c - ${CURL_DIR}/lib/inet_ntop.c - ${CURL_DIR}/lib/parsedate.c - ${CURL_DIR}/lib/select.c - ${CURL_DIR}/lib/splay.c - ${CURL_DIR}/lib/strdup.c - ${CURL_DIR}/lib/socks.c - ${CURL_DIR}/lib/curl_addrinfo.c - ${CURL_DIR}/lib/socks_gssapi.c - ${CURL_DIR}/lib/socks_sspi.c - ${CURL_DIR}/lib/curl_sspi.c - ${CURL_DIR}/lib/slist.c - ${CURL_DIR}/lib/nonblock.c - ${CURL_DIR}/lib/curl_memrchr.c - ${CURL_DIR}/lib/imap.c - ${CURL_DIR}/lib/pop3.c - ${CURL_DIR}/lib/smtp.c - ${CURL_DIR}/lib/pingpong.c - ${CURL_DIR}/lib/rtsp.c - ${CURL_DIR}/lib/curl_threads.c - ${CURL_DIR}/lib/warnless.c - ${CURL_DIR}/lib/hmac.c - ${CURL_DIR}/lib/curl_rtmp.c - ${CURL_DIR}/lib/openldap.c - ${CURL_DIR}/lib/curl_gethostname.c - ${CURL_DIR}/lib/gopher.c - ${CURL_DIR}/lib/idn_win32.c - ${CURL_DIR}/lib/http_proxy.c - ${CURL_DIR}/lib/non-ascii.c - ${CURL_DIR}/lib/asyn-thread.c - ${CURL_DIR}/lib/curl_gssapi.c - ${CURL_DIR}/lib/http_ntlm.c - ${CURL_DIR}/lib/curl_ntlm_wb.c - ${CURL_DIR}/lib/curl_ntlm_core.c - ${CURL_DIR}/lib/curl_sasl.c - ${CURL_DIR}/lib/rand.c - ${CURL_DIR}/lib/curl_multibyte.c - ${CURL_DIR}/lib/hostcheck.c - ${CURL_DIR}/lib/conncache.c - ${CURL_DIR}/lib/dotdot.c - ${CURL_DIR}/lib/x509asn1.c - ${CURL_DIR}/lib/http2.c - ${CURL_DIR}/lib/smb.c - ${CURL_DIR}/lib/curl_endian.c - ${CURL_DIR}/lib/curl_des.c - ${CURL_DIR}/lib/system_win32.c - ${CURL_DIR}/lib/mime.c - ${CURL_DIR}/lib/sha256.c - ${CURL_DIR}/lib/setopt.c - ${CURL_DIR}/lib/curl_path.c - ${CURL_DIR}/lib/curl_ctype.c - ${CURL_DIR}/lib/curl_range.c - ${CURL_DIR}/lib/psl.c - ${CURL_DIR}/lib/doh.c - ${CURL_DIR}/lib/urlapi.c - ${CURL_DIR}/lib/curl_get_line.c - ${CURL_DIR}/lib/altsvc.c - ${CURL_DIR}/lib/socketpair.c - ${CURL_DIR}/lib/vauth/vauth.c - ${CURL_DIR}/lib/vauth/cleartext.c - ${CURL_DIR}/lib/vauth/cram.c - ${CURL_DIR}/lib/vauth/digest.c - ${CURL_DIR}/lib/vauth/digest_sspi.c - ${CURL_DIR}/lib/vauth/krb5_gssapi.c - ${CURL_DIR}/lib/vauth/krb5_sspi.c - ${CURL_DIR}/lib/vauth/ntlm.c - ${CURL_DIR}/lib/vauth/ntlm_sspi.c - ${CURL_DIR}/lib/vauth/oauth2.c - ${CURL_DIR}/lib/vauth/spnego_gssapi.c - ${CURL_DIR}/lib/vauth/spnego_sspi.c - ${CURL_DIR}/lib/vtls/openssl.c - ${CURL_DIR}/lib/vtls/gtls.c - ${CURL_DIR}/lib/vtls/vtls.c - ${CURL_DIR}/lib/vtls/nss.c - ${CURL_DIR}/lib/vtls/polarssl.c - ${CURL_DIR}/lib/vtls/polarssl_threadlock.c - ${CURL_DIR}/lib/vtls/wolfssl.c - ${CURL_DIR}/lib/vtls/schannel.c - ${CURL_DIR}/lib/vtls/schannel_verify.c - ${CURL_DIR}/lib/vtls/sectransp.c - ${CURL_DIR}/lib/vtls/gskit.c - ${CURL_DIR}/lib/vtls/mbedtls.c - ${CURL_DIR}/lib/vtls/mesalink.c - ${CURL_DIR}/lib/vtls/bearssl.c - ${CURL_DIR}/lib/vquic/ngtcp2.c - ${CURL_DIR}/lib/vquic/quiche.c - ${CURL_DIR}/lib/vssh/libssh2.c - ${CURL_DIR}/lib/vssh/libssh.c -) +if (ENABLE_CURL) + option (USE_INTERNAL_CURL "Use internal curl library" ${NOT_UNBUNDLED}) -add_library (curl ${SRCS}) + if (USE_INTERNAL_CURL) + set (LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/curl") -target_compile_definitions(curl PRIVATE HAVE_CONFIG_H BUILDING_LIBCURL CURL_HIDDEN_SYMBOLS libcurl_EXPORTS) -target_include_directories(curl PUBLIC ${CURL_DIR}/include ${CURL_DIR}/lib .) + set (SRCS + ${LIBRARY_DIR}/lib/file.c + ${LIBRARY_DIR}/lib/timeval.c + ${LIBRARY_DIR}/lib/base64.c + ${LIBRARY_DIR}/lib/hostip.c + ${LIBRARY_DIR}/lib/progress.c + ${LIBRARY_DIR}/lib/formdata.c + ${LIBRARY_DIR}/lib/cookie.c + ${LIBRARY_DIR}/lib/http.c + ${LIBRARY_DIR}/lib/sendf.c + ${LIBRARY_DIR}/lib/url.c + ${LIBRARY_DIR}/lib/dict.c + ${LIBRARY_DIR}/lib/if2ip.c + ${LIBRARY_DIR}/lib/speedcheck.c + ${LIBRARY_DIR}/lib/ldap.c + ${LIBRARY_DIR}/lib/version.c + ${LIBRARY_DIR}/lib/getenv.c + ${LIBRARY_DIR}/lib/escape.c + ${LIBRARY_DIR}/lib/mprintf.c + ${LIBRARY_DIR}/lib/telnet.c + ${LIBRARY_DIR}/lib/netrc.c + ${LIBRARY_DIR}/lib/getinfo.c + ${LIBRARY_DIR}/lib/transfer.c + ${LIBRARY_DIR}/lib/strcase.c + ${LIBRARY_DIR}/lib/easy.c + ${LIBRARY_DIR}/lib/security.c + ${LIBRARY_DIR}/lib/curl_fnmatch.c + ${LIBRARY_DIR}/lib/fileinfo.c + ${LIBRARY_DIR}/lib/wildcard.c + ${LIBRARY_DIR}/lib/krb5.c + ${LIBRARY_DIR}/lib/memdebug.c + ${LIBRARY_DIR}/lib/http_chunks.c + ${LIBRARY_DIR}/lib/strtok.c + ${LIBRARY_DIR}/lib/connect.c + ${LIBRARY_DIR}/lib/llist.c + ${LIBRARY_DIR}/lib/hash.c + ${LIBRARY_DIR}/lib/multi.c + ${LIBRARY_DIR}/lib/content_encoding.c + ${LIBRARY_DIR}/lib/share.c + ${LIBRARY_DIR}/lib/http_digest.c + ${LIBRARY_DIR}/lib/md4.c + ${LIBRARY_DIR}/lib/md5.c + ${LIBRARY_DIR}/lib/http_negotiate.c + ${LIBRARY_DIR}/lib/inet_pton.c + ${LIBRARY_DIR}/lib/strtoofft.c + ${LIBRARY_DIR}/lib/strerror.c + ${LIBRARY_DIR}/lib/amigaos.c + ${LIBRARY_DIR}/lib/hostasyn.c + ${LIBRARY_DIR}/lib/hostip4.c + ${LIBRARY_DIR}/lib/hostip6.c + ${LIBRARY_DIR}/lib/hostsyn.c + ${LIBRARY_DIR}/lib/inet_ntop.c + ${LIBRARY_DIR}/lib/parsedate.c + ${LIBRARY_DIR}/lib/select.c + ${LIBRARY_DIR}/lib/splay.c + ${LIBRARY_DIR}/lib/strdup.c + ${LIBRARY_DIR}/lib/socks.c + ${LIBRARY_DIR}/lib/curl_addrinfo.c + ${LIBRARY_DIR}/lib/socks_gssapi.c + ${LIBRARY_DIR}/lib/socks_sspi.c + ${LIBRARY_DIR}/lib/curl_sspi.c + ${LIBRARY_DIR}/lib/slist.c + ${LIBRARY_DIR}/lib/nonblock.c + ${LIBRARY_DIR}/lib/curl_memrchr.c + ${LIBRARY_DIR}/lib/imap.c + ${LIBRARY_DIR}/lib/pop3.c + ${LIBRARY_DIR}/lib/smtp.c + ${LIBRARY_DIR}/lib/pingpong.c + ${LIBRARY_DIR}/lib/rtsp.c + ${LIBRARY_DIR}/lib/curl_threads.c + ${LIBRARY_DIR}/lib/warnless.c + ${LIBRARY_DIR}/lib/hmac.c + ${LIBRARY_DIR}/lib/curl_rtmp.c + ${LIBRARY_DIR}/lib/openldap.c + ${LIBRARY_DIR}/lib/curl_gethostname.c + ${LIBRARY_DIR}/lib/gopher.c + ${LIBRARY_DIR}/lib/idn_win32.c + ${LIBRARY_DIR}/lib/http_proxy.c + ${LIBRARY_DIR}/lib/non-ascii.c + ${LIBRARY_DIR}/lib/asyn-thread.c + ${LIBRARY_DIR}/lib/curl_gssapi.c + ${LIBRARY_DIR}/lib/http_ntlm.c + ${LIBRARY_DIR}/lib/curl_ntlm_wb.c + ${LIBRARY_DIR}/lib/curl_ntlm_core.c + ${LIBRARY_DIR}/lib/curl_sasl.c + ${LIBRARY_DIR}/lib/rand.c + ${LIBRARY_DIR}/lib/curl_multibyte.c + ${LIBRARY_DIR}/lib/hostcheck.c + ${LIBRARY_DIR}/lib/conncache.c + ${LIBRARY_DIR}/lib/dotdot.c + ${LIBRARY_DIR}/lib/x509asn1.c + ${LIBRARY_DIR}/lib/http2.c + ${LIBRARY_DIR}/lib/smb.c + ${LIBRARY_DIR}/lib/curl_endian.c + ${LIBRARY_DIR}/lib/curl_des.c + ${LIBRARY_DIR}/lib/system_win32.c + ${LIBRARY_DIR}/lib/mime.c + ${LIBRARY_DIR}/lib/sha256.c + ${LIBRARY_DIR}/lib/setopt.c + ${LIBRARY_DIR}/lib/curl_path.c + ${LIBRARY_DIR}/lib/curl_ctype.c + ${LIBRARY_DIR}/lib/curl_range.c + ${LIBRARY_DIR}/lib/psl.c + ${LIBRARY_DIR}/lib/doh.c + ${LIBRARY_DIR}/lib/urlapi.c + ${LIBRARY_DIR}/lib/curl_get_line.c + ${LIBRARY_DIR}/lib/altsvc.c + ${LIBRARY_DIR}/lib/socketpair.c + ${LIBRARY_DIR}/lib/vauth/vauth.c + ${LIBRARY_DIR}/lib/vauth/cleartext.c + ${LIBRARY_DIR}/lib/vauth/cram.c + ${LIBRARY_DIR}/lib/vauth/digest.c + ${LIBRARY_DIR}/lib/vauth/digest_sspi.c + ${LIBRARY_DIR}/lib/vauth/krb5_gssapi.c + ${LIBRARY_DIR}/lib/vauth/krb5_sspi.c + ${LIBRARY_DIR}/lib/vauth/ntlm.c + ${LIBRARY_DIR}/lib/vauth/ntlm_sspi.c + ${LIBRARY_DIR}/lib/vauth/oauth2.c + ${LIBRARY_DIR}/lib/vauth/spnego_gssapi.c + ${LIBRARY_DIR}/lib/vauth/spnego_sspi.c + ${LIBRARY_DIR}/lib/vtls/openssl.c + ${LIBRARY_DIR}/lib/vtls/gtls.c + ${LIBRARY_DIR}/lib/vtls/vtls.c + ${LIBRARY_DIR}/lib/vtls/nss.c + ${LIBRARY_DIR}/lib/vtls/polarssl.c + ${LIBRARY_DIR}/lib/vtls/polarssl_threadlock.c + ${LIBRARY_DIR}/lib/vtls/wolfssl.c + ${LIBRARY_DIR}/lib/vtls/schannel.c + ${LIBRARY_DIR}/lib/vtls/schannel_verify.c + ${LIBRARY_DIR}/lib/vtls/sectransp.c + ${LIBRARY_DIR}/lib/vtls/gskit.c + ${LIBRARY_DIR}/lib/vtls/mbedtls.c + ${LIBRARY_DIR}/lib/vtls/mesalink.c + ${LIBRARY_DIR}/lib/vtls/bearssl.c + ${LIBRARY_DIR}/lib/vquic/ngtcp2.c + ${LIBRARY_DIR}/lib/vquic/quiche.c + ${LIBRARY_DIR}/lib/vssh/libssh2.c + ${LIBRARY_DIR}/lib/vssh/libssh.c + ) -target_compile_definitions(curl PRIVATE OS="${CMAKE_SYSTEM_NAME}") + add_library (curl ${SRCS}) -target_link_libraries(curl PRIVATE ssl) + target_compile_definitions (curl PRIVATE + HAVE_CONFIG_H + BUILDING_LIBCURL + CURL_HIDDEN_SYMBOLS + libcurl_EXPORTS + OS="${CMAKE_SYSTEM_NAME}" + ) + target_include_directories (curl PUBLIC + ${LIBRARY_DIR}/include + ${LIBRARY_DIR}/lib + . # curl_config.h + ) + + target_link_libraries (curl PRIVATE ssl) + + # The library is large - avoid bloat (XXX: is it?) + target_compile_options (curl PRIVATE -g0) + + # find_package(CURL) compatibility for the following packages that uses + # find_package(CURL)/include(FindCURL): + # - mariadb-connector-c + # - aws-s3-cmake + # - sentry-native + set (CURL_FOUND ON CACHE BOOL "") + set (CURL_ROOT_DIR ${LIBRARY_DIR} CACHE PATH "") + set (CURL_INCLUDE_DIR ${LIBRARY_DIR}/include CACHE PATH "") + set (CURL_INCLUDE_DIRS ${LIBRARY_DIR}/include CACHE PATH "") + set (CURL_LIBRARY curl CACHE STRING "") + set (CURL_LIBRARIES ${CURL_LIBRARY} CACHE STRING "") + set (CURL_VERSION_STRING 7.67.0 CACHE STRING "") + add_library (CURL::libcurl ALIAS ${CURL_LIBRARY}) + else () + find_package (CURL REQUIRED) + endif () +endif () + +message (STATUS "Using curl: ${CURL_INCLUDE_DIRS} : ${CURL_LIBRARIES}") diff --git a/contrib/jemalloc-cmake/CMakeLists.txt b/contrib/jemalloc-cmake/CMakeLists.txt index 79b351c3721..13f7ea3326b 100644 --- a/contrib/jemalloc-cmake/CMakeLists.txt +++ b/contrib/jemalloc-cmake/CMakeLists.txt @@ -22,9 +22,14 @@ if (ENABLE_JEMALLOC) # # By enabling percpu_arena number of arenas limited to number of CPUs and hence # this problem should go away. - set (JEMALLOC_CONFIG_MALLOC_CONF "percpu_arena:percpu,oversize_threshold:0") + # + # muzzy_decay_ms -- use MADV_FREE when available on newer Linuxes, to + # avoid spurious latencies and additional work associated with + # MADV_DONTNEED. See + # https://github.com/ClickHouse/ClickHouse/issues/11121 for motivation. + set (JEMALLOC_CONFIG_MALLOC_CONF "percpu_arena:percpu,oversize_threshold:0,muzzy_decay_ms:10000") else() - set (JEMALLOC_CONFIG_MALLOC_CONF "oversize_threshold:0") + set (JEMALLOC_CONFIG_MALLOC_CONF "oversize_threshold:0,muzzy_decay_ms:10000") endif() # CACHE variable is empty, to allow changing defaults without necessity # to purge cache diff --git a/contrib/libdivide/libdivide.h b/contrib/libdivide/libdivide.h index a153e7f9c5e..81057b7b43d 100644 --- a/contrib/libdivide/libdivide.h +++ b/contrib/libdivide/libdivide.h @@ -76,7 +76,7 @@ do { \ fprintf(stderr, "libdivide.h:%d: %s(): Error: %s\n", \ __LINE__, LIBDIVIDE_FUNCTION, msg); \ - exit(-1); \ + abort(); \ } while (0) #if defined(LIBDIVIDE_ASSERTIONS_ON) @@ -85,7 +85,7 @@ if (!(x)) { \ fprintf(stderr, "libdivide.h:%d: %s(): Assertion failed: %s\n", \ __LINE__, LIBDIVIDE_FUNCTION, #x); \ - exit(-1); \ + abort(); \ } \ } while (0) #else @@ -290,10 +290,17 @@ static inline int32_t libdivide_count_leading_zeros32(uint32_t val) { } return 0; #else - int32_t result = 0; - uint32_t hi = 1U << 31; - for (; ~val & hi; hi >>= 1) { - result++; + if (val == 0) + return 32; + int32_t result = 8; + uint32_t hi = 0xFFU << 24; + while ((val & hi) == 0) { + hi >>= 8; + result += 8; + } + while (val & hi) { + result -= 1; + hi <<= 1; } return result; #endif diff --git a/contrib/poco b/contrib/poco index be2ab90ba5d..74c93443342 160000 --- a/contrib/poco +++ b/contrib/poco @@ -1 +1 @@ -Subproject commit be2ab90ba5dccd46919a116e3fe4fa77bb85063b +Subproject commit 74c93443342f6028fa6402057684733b316aa737 diff --git a/contrib/poco-cmake/Data/ODBC/CMakeLists.txt b/contrib/poco-cmake/Data/ODBC/CMakeLists.txt index a0e4f83a7cc..b53b58b0d54 100644 --- a/contrib/poco-cmake/Data/ODBC/CMakeLists.txt +++ b/contrib/poco-cmake/Data/ODBC/CMakeLists.txt @@ -24,7 +24,7 @@ if (ENABLE_ODBC) target_include_directories (_poco_data_odbc SYSTEM PUBLIC ${LIBRARY_DIR}/Data/ODBC/include) target_link_libraries (_poco_data_odbc PUBLIC Poco::Data unixodbc) else () - add_library (Poco::Data::ODBC UNKNOWN IMPORTED) + add_library (Poco::Data::ODBC UNKNOWN IMPORTED GLOBAL) find_library(LIBRARY_POCO_DATA_ODBC PocoDataODBC) find_path(INCLUDE_POCO_DATA_ODBC Poco/Data/ODBC/ODBC.h) diff --git a/contrib/unixodbc-cmake/CMakeLists.txt b/contrib/unixodbc-cmake/CMakeLists.txt index 6d1922075a6..658fa3329d3 100644 --- a/contrib/unixodbc-cmake/CMakeLists.txt +++ b/contrib/unixodbc-cmake/CMakeLists.txt @@ -307,7 +307,7 @@ if (ENABLE_ODBC) set_target_properties (unixodbc PROPERTIES INTERFACE_INCLUDE_DIRECTORIES ${INCLUDE_ODBC}) endif () - target_compile_definitions (unixodbc PUBLIC USE_ODBC=1) + target_compile_definitions (unixodbc INTERFACE USE_ODBC=1) message (STATUS "Using unixodbc") else () diff --git a/docker/bare/Dockerfile b/docker/bare/Dockerfile new file mode 100644 index 00000000000..d0ee8661cad --- /dev/null +++ b/docker/bare/Dockerfile @@ -0,0 +1,2 @@ +FROM scratch +ADD root / diff --git a/docker/bare/README.md b/docker/bare/README.md new file mode 100644 index 00000000000..7b5ab6f5ea9 --- /dev/null +++ b/docker/bare/README.md @@ -0,0 +1,37 @@ +## The bare minimum ClickHouse Docker image. + +It is intented as a showcase to check the amount of implicit dependencies of ClickHouse from the OS in addition to the OS kernel. + +Example usage: + +``` +./prepare +docker build --tag clickhouse-bare . +``` + +Run clickhouse-local: +``` +docker run -it --rm --network host clickhouse-bare /clickhouse local --query "SELECT 1" +``` + +Run clickhouse-client in interactive mode: +``` +docker run -it --rm --network host clickhouse-bare /clickhouse client +``` + +Run clickhouse-server: +``` +docker run -it --rm --network host clickhouse-bare /clickhouse server +``` + +It can be also run in chroot instead of Docker (first edit the `prepare` script to enable `proc`): + +``` +sudo chroot . /clickhouse server +``` + +## What does it miss? + +- creation of `clickhouse` user to run the server; +- VOLUME for server; +- most of the details, see other docker images for comparison. diff --git a/docker/bare/prepare b/docker/bare/prepare new file mode 100755 index 00000000000..10d791cac73 --- /dev/null +++ b/docker/bare/prepare @@ -0,0 +1,24 @@ +#!/bin/bash + +set -e + +SRC_DIR=../.. +BUILD_DIR=${SRC_DIR}/build + +# BTW, .so files are acceptable from any Linux distribution for the last 12 years (at least). +# See https://presentations.clickhouse.tech/cpp_russia_2020/ for the details. + +mkdir root +pushd root +mkdir lib lib64 etc tmp root +cp ${BUILD_DIR}/programs/clickhouse . +cp ${SRC_DIR}/programs/server/{config,users}.xml . +cp /lib/x86_64-linux-gnu/{libc.so.6,libdl.so.2,libm.so.6,libpthread.so.0,librt.so.1,libnss_dns.so.2,libresolv.so.2} lib +cp /lib64/ld-linux-x86-64.so.2 lib64 +cp /etc/resolv.conf ./etc +strip clickhouse + +# This is needed for chroot but not needed for Docker: + +# mkdir proc +# sudo mount --bind /proc proc diff --git a/docker/client/Dockerfile b/docker/client/Dockerfile index c4683d35e99..efa85c2a366 100644 --- a/docker/client/Dockerfile +++ b/docker/client/Dockerfile @@ -6,6 +6,7 @@ ARG version=20.6.1.* RUN apt-get update \ && apt-get install --yes --no-install-recommends \ apt-transport-https \ + ca-certificates \ dirmngr \ gnupg \ && mkdir -p /etc/apt/sources.list.d \ diff --git a/docker/images.json b/docker/images.json index c1174327097..23f8cc0d9fd 100644 --- a/docker/images.json +++ b/docker/images.json @@ -11,7 +11,8 @@ "docker/packager/binary": { "name": "yandex/clickhouse-binary-builder", "dependent": [ - "docker/test/split_build_smoke_test" + "docker/test/split_build_smoke_test", + "docker/test/pvs" ] }, "docker/test/coverage": { diff --git a/docker/packager/binary/build.sh b/docker/packager/binary/build.sh index 070e1f8c2db..30a576a5d76 100755 --- a/docker/packager/binary/build.sh +++ b/docker/packager/binary/build.sh @@ -18,7 +18,7 @@ ccache --zero-stats ||: ln -s /usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 /usr/lib/libOpenCL.so ||: rm -f CMakeCache.txt cmake --debug-trycompile --verbose=1 -DCMAKE_VERBOSE_MAKEFILE=1 -LA -DCMAKE_BUILD_TYPE=$BUILD_TYPE -DSANITIZE=$SANITIZER $CMAKE_FLAGS .. -ninja -v clickhouse-bundle +ninja clickhouse-bundle mv ./programs/clickhouse* /output mv ./src/unit_tests_dbms /output find . -name '*.so' -print -exec mv '{}' /output \; diff --git a/docker/packager/deb/Dockerfile b/docker/packager/deb/Dockerfile index 4f1be197668..48e90d16f5d 100644 --- a/docker/packager/deb/Dockerfile +++ b/docker/packager/deb/Dockerfile @@ -68,6 +68,7 @@ RUN apt-get --allow-unauthenticated update -y \ libre2-dev \ libjemalloc-dev \ libmsgpack-dev \ + libcurl4-openssl-dev \ opencl-headers \ ocl-icd-libopencl1 \ intel-opencl-icd \ diff --git a/docker/packager/packager b/docker/packager/packager index ccb01a4df92..c3e0778e10a 100755 --- a/docker/packager/packager +++ b/docker/packager/packager @@ -31,7 +31,7 @@ def pull_image(image_name): def build_image(image_name, filepath): subprocess.check_call("docker build --network=host -t {} -f {} .".format(image_name, filepath), shell=True) -def run_docker_image_with_env(image_name, output, env_variables, ch_root, ccache_dir): +def run_docker_image_with_env(image_name, output, env_variables, ch_root, ccache_dir, docker_image_version): env_part = " -e ".join(env_variables) if env_part: env_part = " -e " + env_part @@ -46,7 +46,7 @@ def run_docker_image_with_env(image_name, output, env_variables, ch_root, ccache ch_root=ch_root, ccache_dir=ccache_dir, env=env_part, - img_name=image_name, + img_name=image_name + ":" + docker_image_version, interactive=interactive ) @@ -189,6 +189,7 @@ if __name__ == "__main__": parser.add_argument("--alien-pkgs", nargs='+', default=[]) parser.add_argument("--with-coverage", action="store_true") parser.add_argument("--with-binaries", choices=("programs", "tests", ""), default="") + parser.add_argument("--docker-image-version", default="latest") args = parser.parse_args() if not os.path.isabs(args.output_dir): @@ -212,12 +213,14 @@ if __name__ == "__main__": logging.info("Should place {} to output".format(args.with_binaries)) dockerfile = os.path.join(ch_root, "docker/packager", image_type, "Dockerfile") + image_with_version = image_name + ":" + args.docker_image_version if image_type != "freebsd" and not check_image_exists_locally(image_name) or args.force_build_image: - if not pull_image(image_name) or args.force_build_image: - build_image(image_name, dockerfile) + if not pull_image(image_with_version) or args.force_build_image: + build_image(image_with_version, dockerfile) env_prepared = parse_env_variables( args.build_type, args.compiler, args.sanitizer, args.package_type, image_type, args.cache, args.distcc_hosts, args.unbundled, args.split_binary, args.clang_tidy, args.version, args.author, args.official, args.alien_pkgs, args.with_coverage, args.with_binaries) - run_docker_image_with_env(image_name, args.output_dir, env_prepared, ch_root, args.ccache_dir) + + run_docker_image_with_env(image_name, args.output_dir, env_prepared, ch_root, args.ccache_dir, args.docker_image_version) logging.info("Output placed into {}".format(args.output_dir)) diff --git a/docker/server/Dockerfile b/docker/server/Dockerfile index 132a5d89959..1d23cda5fb3 100644 --- a/docker/server/Dockerfile +++ b/docker/server/Dockerfile @@ -1,4 +1,4 @@ -FROM ubuntu:18.04 +FROM ubuntu:20.04 ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/" ARG version=20.6.1.* @@ -7,19 +7,21 @@ ARG gosu_ver=1.10 RUN apt-get update \ && apt-get install --yes --no-install-recommends \ apt-transport-https \ + ca-certificates \ dirmngr \ gnupg \ && mkdir -p /etc/apt/sources.list.d \ && apt-key adv --keyserver keyserver.ubuntu.com --recv E0C56BD4 \ && echo $repository > /etc/apt/sources.list.d/clickhouse.list \ && apt-get update \ + && env DEBIAN_FRONTEND=noninteractive \ + apt-get --yes -o "Dpkg::Options::=--force-confdef" -o "Dpkg::Options::=--force-confold" upgrade \ && env DEBIAN_FRONTEND=noninteractive \ apt-get install --allow-unauthenticated --yes --no-install-recommends \ clickhouse-common-static=$version \ clickhouse-client=$version \ clickhouse-server=$version \ locales \ - ca-certificates \ wget \ && rm -rf \ /var/lib/apt/lists/* \ diff --git a/docker/test/integration/README.md b/docker/test/integration/README.md index 4aa10d6db80..a11cf059655 100644 --- a/docker/test/integration/README.md +++ b/docker/test/integration/README.md @@ -1,6 +1,6 @@ ## Docker containers for integration tests - `base` container with required packages - `runner` container with that runs integration tests in docker -- `compose` contains docker_compose YaML files that are used in tests +- `runnner/compose` contains docker\_compose YaML files that are used in tests -How to run integration tests is described in tests/integration/README.md \ No newline at end of file +How to run integration tests is described in tests/integration/README.md diff --git a/docker/test/integration/runner/Dockerfile b/docker/test/integration/runner/Dockerfile index 9c1fe66cf7b..7608666ecc2 100644 --- a/docker/test/integration/runner/Dockerfile +++ b/docker/test/integration/runner/Dockerfile @@ -26,6 +26,7 @@ RUN apt-get update \ liblua5.1-dev \ luajit \ libssl-dev \ + libcurl4-openssl-dev \ gdb \ && rm -rf \ /var/lib/apt/lists/* \ @@ -62,6 +63,7 @@ RUN set -eux; \ COPY modprobe.sh /usr/local/bin/modprobe COPY dockerd-entrypoint.sh /usr/local/bin/ +COPY compose/ /compose/ RUN set -x \ && addgroup --system dockremap \ diff --git a/docker/test/integration/compose/docker_compose_cassandra.yml b/docker/test/integration/runner/compose/docker_compose_cassandra.yml similarity index 100% rename from docker/test/integration/compose/docker_compose_cassandra.yml rename to docker/test/integration/runner/compose/docker_compose_cassandra.yml diff --git a/docker/test/integration/compose/docker_compose_hdfs.yml b/docker/test/integration/runner/compose/docker_compose_hdfs.yml similarity index 100% rename from docker/test/integration/compose/docker_compose_hdfs.yml rename to docker/test/integration/runner/compose/docker_compose_hdfs.yml diff --git a/docker/test/integration/compose/docker_compose_kafka.yml b/docker/test/integration/runner/compose/docker_compose_kafka.yml similarity index 100% rename from docker/test/integration/compose/docker_compose_kafka.yml rename to docker/test/integration/runner/compose/docker_compose_kafka.yml diff --git a/docker/test/integration/compose/docker_compose_minio.yml b/docker/test/integration/runner/compose/docker_compose_minio.yml similarity index 100% rename from docker/test/integration/compose/docker_compose_minio.yml rename to docker/test/integration/runner/compose/docker_compose_minio.yml diff --git a/docker/test/integration/compose/docker_compose_mongo.yml b/docker/test/integration/runner/compose/docker_compose_mongo.yml similarity index 100% rename from docker/test/integration/compose/docker_compose_mongo.yml rename to docker/test/integration/runner/compose/docker_compose_mongo.yml diff --git a/docker/test/integration/compose/docker_compose_mysql.yml b/docker/test/integration/runner/compose/docker_compose_mysql.yml similarity index 100% rename from docker/test/integration/compose/docker_compose_mysql.yml rename to docker/test/integration/runner/compose/docker_compose_mysql.yml diff --git a/docker/test/integration/compose/docker_compose_net.yml b/docker/test/integration/runner/compose/docker_compose_net.yml similarity index 100% rename from docker/test/integration/compose/docker_compose_net.yml rename to docker/test/integration/runner/compose/docker_compose_net.yml diff --git a/docker/test/integration/compose/docker_compose_postgres.yml b/docker/test/integration/runner/compose/docker_compose_postgres.yml similarity index 100% rename from docker/test/integration/compose/docker_compose_postgres.yml rename to docker/test/integration/runner/compose/docker_compose_postgres.yml diff --git a/docker/test/integration/runner/compose/docker_compose_rabbitmq.yml b/docker/test/integration/runner/compose/docker_compose_rabbitmq.yml new file mode 100644 index 00000000000..1e9c3777505 --- /dev/null +++ b/docker/test/integration/runner/compose/docker_compose_rabbitmq.yml @@ -0,0 +1,12 @@ +version: '2.3' + +services: + rabbitmq1: + image: rabbitmq:3-management + hostname: rabbitmq1 + ports: + - "5672:5672" + - "15672:15672" + environment: + RABBITMQ_DEFAULT_USER: "root" + RABBITMQ_DEFAULT_PASS: "clickhouse" diff --git a/docker/test/integration/compose/docker_compose_redis.yml b/docker/test/integration/runner/compose/docker_compose_redis.yml similarity index 100% rename from docker/test/integration/compose/docker_compose_redis.yml rename to docker/test/integration/runner/compose/docker_compose_redis.yml diff --git a/docker/test/integration/compose/docker_compose_zookeeper.yml b/docker/test/integration/runner/compose/docker_compose_zookeeper.yml similarity index 100% rename from docker/test/integration/compose/docker_compose_zookeeper.yml rename to docker/test/integration/runner/compose/docker_compose_zookeeper.yml diff --git a/docker/test/performance-comparison/compare.sh b/docker/test/performance-comparison/compare.sh index 1899a440074..d3debe82c56 100755 --- a/docker/test/performance-comparison/compare.sh +++ b/docker/test/performance-comparison/compare.sh @@ -157,7 +157,11 @@ function run_tests TIMEFORMAT=$(printf "$test_name\t%%3R\t%%3U\t%%3S\n") # the grep is to filter out set -x output and keep only time output - { time "$script_dir/perf.py" --host localhost localhost --port 9001 9002 -- "$test" > "$test_name-raw.tsv" 2> "$test_name-err.log" ; } 2>&1 >/dev/null | grep -v ^+ >> "wall-clock-times.tsv" || continue + { \ + time "$script_dir/perf.py" --host localhost localhost --port 9001 9002 \ + -- "$test" > "$test_name-raw.tsv" 2> "$test_name-err.log" ; \ + } 2>&1 >/dev/null | grep -v ^+ >> "wall-clock-times.tsv" \ + || echo "Test $test_name failed with error code $?" >> "$test_name-err.log" done unset TIMEFORMAT @@ -274,10 +278,11 @@ for test_file in $(find . -maxdepth 1 -name "*-raw.tsv" -print) do test_name=$(basename "$test_file" "-raw.tsv") sed -n "s/^query\t/$test_name\t/p" < "$test_file" >> "analyze/query-runs.tsv" - sed -n "s/^client-time/$test_name/p" < "$test_file" >> "analyze/client-times.tsv" - sed -n "s/^report-threshold/$test_name/p" < "$test_file" >> "analyze/report-thresholds.tsv" - sed -n "s/^skipped/$test_name/p" < "$test_file" >> "analyze/skipped-tests.tsv" - sed -n "s/^display-name/$test_name/p" < "$test_file" >> "analyze/query-display-names.tsv" + sed -n "s/^client-time\t/$test_name\t/p" < "$test_file" >> "analyze/client-times.tsv" + sed -n "s/^report-threshold\t/$test_name\t/p" < "$test_file" >> "analyze/report-thresholds.tsv" + sed -n "s/^skipped\t/$test_name\t/p" < "$test_file" >> "analyze/skipped-tests.tsv" + sed -n "s/^display-name\t/$test_name\t/p" < "$test_file" >> "analyze/query-display-names.tsv" + sed -n "s/^partial\t/$test_name\t/p" < "$test_file" >> "analyze/partial-queries.tsv" done unset IFS @@ -286,6 +291,18 @@ clickhouse-local --query " create view query_runs as select * from file('analyze/query-runs.tsv', TSV, 'test text, query_index int, query_id text, version UInt8, time float'); +create view partial_queries as select test, query_index + from file('analyze/partial-queries.tsv', TSV, + 'test text, query_index int, servers Array(int)'); + +create table partial_query_times engine File(TSVWithNamesAndTypes, + 'analyze/partial-query-times.tsv') + as select test, query_index, stddevPop(time) time_stddev, median(time) time_median + from query_runs + where (test, query_index) in partial_queries + group by test, query_index + ; + create view left_query_log as select * from file('left-query-log.tsv', TSVWithNamesAndTypes, '$(cat "left-query-log.tsv.columns")'); @@ -329,6 +346,7 @@ create table query_run_metrics_full engine File(TSV, 'analyze/query-run-metrics- right join query_runs on query_logs.query_id = query_runs.query_id and query_logs.version = query_runs.version + where (test, query_index) not in partial_queries ; create table query_run_metrics engine File( @@ -350,6 +368,7 @@ create table query_run_metric_names engine File(TSV, 'analyze/query-run-metric-n # query. We also don't have lateral joins. So I just put all runs of each # query into a separate file, and then compute randomization distribution # for each file. I do this in parallel using GNU parallel. +( set +x # do not bloat the log IFS=$'\n' for prefix in $(cut -f1,2 "analyze/query-run-metrics.tsv" | sort | uniq) do @@ -366,6 +385,7 @@ do done wait unset IFS +) parallel --joblog analyze/parallel-log.txt --null < analyze/commands.txt 2>> analyze/errors.log } @@ -389,12 +409,20 @@ create view query_display_names as select * from 'test text, query_index int, query_display_name text') ; +create table partial_queries_report engine File(TSV, 'report/partial-queries-report.tsv') + as select floor(time_median, 3) m, floor(time_stddev / time_median, 3) v, + test, query_index, query_display_name + from file('analyze/partial-query-times.tsv', TSVWithNamesAndTypes, + 'test text, query_index int, time_stddev float, time_median float') t + join query_display_names using (test, query_index) + order by test, query_index + ; + -- WITH, ARRAY JOIN and CROSS JOIN do not like each other: -- https://github.com/ClickHouse/ClickHouse/issues/11868 -- https://github.com/ClickHouse/ClickHouse/issues/11757 -- Because of this, we make a view with arrays first, and then apply all the -- array joins. - create view query_metric_stat_arrays as with (select * from file('analyze/query-run-metric-names.tsv', TSV, 'n Array(String)')) as metric_name @@ -766,9 +794,16 @@ done wait unset IFS -# Remember that grep sets error code when nothing is found, hence the bayan -# operator. -grep -H -m2 -i '\(Exception\|Error\):[^:]' ./*-err.log | sed 's/:/\t/' >> run-errors.tsv ||: +# Prefer to grep for clickhouse_driver exception messages, but if there are none, +# just show a couple of lines from the log. +for log in *-err.log +do + test=$(basename "$log" "-err.log") + { + grep -H -m2 -i '\(Exception\|Error\):[^:]' "$log" \ + || head -2 "$log" + } | sed "s/^/$test\t/" >> run-errors.tsv ||: +done } function report_metrics @@ -858,10 +893,6 @@ case "$stage" in cat "/proc/$pid/smaps" > "$pid-smaps.txt" ||: done - # Sleep for five minutes to see how the servers enter a quiescent state (e.g. - # how fast the memory usage drops). - sleep 300 - # We had a bug where getting profiles froze sometimes, so try to save some # logs if this happens again. Give the servers some time to collect all info, # then trace and kill. Start in a subshell, so that both function don't diff --git a/docker/test/performance-comparison/config/config.d/perf-comparison-tweaks-config.xml b/docker/test/performance-comparison/config/config.d/perf-comparison-tweaks-config.xml index 5dcc3c51eca..6f1726ab36b 100644 --- a/docker/test/performance-comparison/config/config.d/perf-comparison-tweaks-config.xml +++ b/docker/test/performance-comparison/config/config.d/perf-comparison-tweaks-config.xml @@ -20,4 +20,6 @@ 1000000000 + + 10 diff --git a/docker/test/performance-comparison/download.sh b/docker/test/performance-comparison/download.sh index befc23ad041..93888a9ab02 100755 --- a/docker/test/performance-comparison/download.sh +++ b/docker/test/performance-comparison/download.sh @@ -24,14 +24,32 @@ dataset_paths["values"]="https://clickhouse-datasets.s3.yandex.net/values_with_e function download { + # Historically there were various path for the performance test package. + # Test all of them. + for path in "https://clickhouse-builds.s3.yandex.net/$left_pr/$left_sha/"{,clickhouse_build_check/}"performance/performance.tgz" + do + if curl --fail --head "$path" + then + left_path="$path" + fi + done + + for path in "https://clickhouse-builds.s3.yandex.net/$right_pr/$right_sha/"{,clickhouse_build_check/}"performance/performance.tgz" + do + if curl --fail --head "$path" + then + right_path="$path" + fi + done + # might have the same version on left and right - if ! [ "$left_sha" = "$right_sha" ] + if ! [ "$left_path" = "$right_path" ] then - wget -nv -nd -c "https://clickhouse-builds.s3.yandex.net/$left_pr/$left_sha/clickhouse_build_check/performance/performance.tgz" -O- | tar -C left --strip-components=1 -zxv & - wget -nv -nd -c "https://clickhouse-builds.s3.yandex.net/$right_pr/$right_sha/clickhouse_build_check/performance/performance.tgz" -O- | tar -C right --strip-components=1 -zxv & + wget -nv -nd -c "$left_path" -O- | tar -C left --strip-components=1 -zxv & + wget -nv -nd -c "$right_path" -O- | tar -C right --strip-components=1 -zxv & else mkdir right ||: - wget -nv -nd -c "https://clickhouse-builds.s3.yandex.net/$left_pr/$left_sha/clickhouse_build_check/performance/performance.tgz" -O- | tar -C left --strip-components=1 -zxv && cp -a left/* right & + wget -nv -nd -c "$left_path" -O- | tar -C left --strip-components=1 -zxv && cp -a left/* right & fi for dataset_name in $datasets diff --git a/docker/test/performance-comparison/entrypoint.sh b/docker/test/performance-comparison/entrypoint.sh index 6dfd2f9c454..64336d0a038 100755 --- a/docker/test/performance-comparison/entrypoint.sh +++ b/docker/test/performance-comparison/entrypoint.sh @@ -50,10 +50,18 @@ function find_reference_sha # FIXME sometimes we have testing tags on commits without published builds -- # normally these are documentation commits. Loop to skip them. - if curl --fail --head "https://clickhouse-builds.s3.yandex.net/0/$REF_SHA/clickhouse_build_check/performance/performance.tgz" - then - break - fi + # Historically there were various path for the performance test package. + # Test all of them. + unset found + for path in "https://clickhouse-builds.s3.yandex.net/0/$REF_SHA/"{,clickhouse_build_check/}"performance/performance.tgz" + do + if curl --fail --head "$path" + then + found="$path" + break + fi + done + if [ -n "$found" ] ; then break; fi start_ref="$REF_SHA~" done diff --git a/docker/test/performance-comparison/perf.py b/docker/test/performance-comparison/perf.py index 9a6081b751c..e8323fbcca0 100755 --- a/docker/test/performance-comparison/perf.py +++ b/docker/test/performance-comparison/perf.py @@ -7,6 +7,7 @@ import clickhouse_driver import xml.etree.ElementTree as et import argparse import pprint +import re import string import time import traceback @@ -102,10 +103,11 @@ for s in servers: # connection loses the changes in settings. drop_query_templates = [q.text for q in root.findall('drop_query')] drop_queries = substitute_parameters(drop_query_templates) -for c in connections: +for conn_index, c in enumerate(connections): for q in drop_queries: try: c.execute(q) + print(f'drop\t{conn_index}\t{c.last_query.elapsed}\t{tsv_escape(q)}') except: pass @@ -117,10 +119,12 @@ for c in connections: # configurable). So the end result is uncertain, but hopefully we'll be able to # run at least some queries. settings = root.findall('settings/*') -for c in connections: +for conn_index, c in enumerate(connections): for s in settings: try: - c.execute("set {} = '{}'".format(s.tag, s.text)) + q = f"set {s.tag} = '{s.text}'" + c.execute(q) + print(f'set\t{conn_index}\t{c.last_query.elapsed}\t{tsv_escape(q)}') except: print(traceback.format_exc(), file=sys.stderr) @@ -139,16 +143,28 @@ for t in tables: # Run create queries create_query_templates = [q.text for q in root.findall('create_query')] create_queries = substitute_parameters(create_query_templates) -for c in connections: + +# Disallow temporary tables, because the clickhouse_driver reconnects on errors, +# and temporary tables are destroyed. We want to be able to continue after some +# errors. +for q in create_queries: + if re.search('create temporary table', q, flags=re.IGNORECASE): + print(f"Temporary tables are not allowed in performance tests: '{q}'", + file = sys.stderr) + sys.exit(1) + +for conn_index, c in enumerate(connections): for q in create_queries: c.execute(q) + print(f'create\t{conn_index}\t{c.last_query.elapsed}\t{tsv_escape(q)}') # Run fill queries fill_query_templates = [q.text for q in root.findall('fill_query')] fill_queries = substitute_parameters(fill_query_templates) -for c in connections: +for conn_index, c in enumerate(connections): for q in fill_queries: c.execute(q) + print(f'fill\t{conn_index}\t{c.last_query.elapsed}\t{tsv_escape(q)}') # Run test queries for query_index, q in enumerate(test_queries): @@ -165,31 +181,47 @@ for query_index, q in enumerate(test_queries): # Prewarm: run once on both servers. Helps to bring the data into memory, # precompile the queries, etc. - try: - for conn_index, c in enumerate(connections): + # A query might not run on the old server if it uses a function added in the + # new one. We want to run them on the new server only, so that the PR author + # can ensure that the test works properly. Remember the errors we had on + # each server. + query_error_on_connection = [None] * len(connections); + for conn_index, c in enumerate(connections): + try: prewarm_id = f'{query_prefix}.prewarm0' res = c.execute(q, query_id = prewarm_id) print(f'prewarm\t{query_index}\t{prewarm_id}\t{conn_index}\t{c.last_query.elapsed}') - except KeyboardInterrupt: - raise - except: - # If prewarm fails for some query -- skip it, and try to test the others. - # This might happen if the new test introduces some function that the - # old server doesn't support. Still, report it as an error. - # FIXME the driver reconnects on error and we lose settings, so this might - # lead to further errors or unexpected behavior. - print(traceback.format_exc(), file=sys.stderr) + except KeyboardInterrupt: + raise + except: + # FIXME the driver reconnects on error and we lose settings, so this + # might lead to further errors or unexpected behavior. + query_error_on_connection[conn_index] = traceback.format_exc(); + continue + + # If prewarm fails for the query on both servers -- report the error, skip + # the query and continue testing the next query. + if query_error_on_connection.count(None) == 0: + print(query_error_on_connection[0], file = sys.stderr) continue + # If prewarm fails on one of the servers, run the query on the rest of them. + # Useful for queries that use new functions added in the new server version. + if query_error_on_connection.count(None) < len(query_error_on_connection): + no_error = [i for i, e in enumerate(query_error_on_connection) if not e] + print(f'partial\t{query_index}\t{no_error}') + # Now, perform measured runs. - # Track the time spent by the client to process this query, so that we can notice - # out the queries that take long to process on the client side, e.g. by sending - # excessive data. + # Track the time spent by the client to process this query, so that we can + # notice the queries that take long to process on the client side, e.g. by + # sending excessive data. start_seconds = time.perf_counter() server_seconds = 0 for run in range(0, args.runs): run_id = f'{query_prefix}.run{run}' for conn_index, c in enumerate(connections): + if query_error_on_connection[conn_index]: + continue res = c.execute(q, query_id = run_id) print(f'query\t{query_index}\t{run_id}\t{conn_index}\t{c.last_query.elapsed}') server_seconds += c.last_query.elapsed @@ -198,8 +230,8 @@ for query_index, q in enumerate(test_queries): print(f'client-time\t{query_index}\t{client_seconds}\t{server_seconds}') # Run drop queries -drop_query_templates = [q.text for q in root.findall('drop_query')] drop_queries = substitute_parameters(drop_query_templates) -for c in connections: +for conn_index, c in enumerate(connections): for q in drop_queries: c.execute(q) + print(f'drop\t{conn_index}\t{c.last_query.elapsed}\t{tsv_escape(q)}') diff --git a/docker/test/performance-comparison/report.py b/docker/test/performance-comparison/report.py index 227722a8bea..fffac4bbc6b 100755 --- a/docker/test/performance-comparison/report.py +++ b/docker/test/performance-comparison/report.py @@ -7,6 +7,7 @@ import csv import itertools import json import os +import os.path import pprint import sys import traceback @@ -23,6 +24,7 @@ faster_queries = 0 slower_queries = 0 unstable_queries = 0 very_unstable_queries = 0 +unstable_partial_queries = 0 # max seconds to run one query by itself, not counting preparation allowed_single_run_time = 2 @@ -194,6 +196,31 @@ if args.report == 'main': ['Client time, s', 'Server time, s', 'Ratio', 'Test', 'Query'], slow_on_client_rows) + def print_partial(): + rows = tsvRows('report/partial-queries-report.tsv') + if not rows: + return + global unstable_partial_queries, slow_average_tests + print(tableStart('Partial queries')) + columns = ['Median time, s', 'Relative time variance', 'Test', '#', 'Query'] + print(tableHeader(columns)) + attrs = ['' for c in columns] + for row in rows: + if float(row[1]) > 0.10: + attrs[1] = f'style="background: {color_bad}"' + unstable_partial_queries += 1 + else: + attrs[1] = '' + if float(row[0]) > allowed_single_run_time: + attrs[0] = f'style="background: {color_bad}"' + slow_average_tests += 1 + else: + attrs[0] = '' + print(tableRow(row, attrs)) + print(tableEnd()) + + print_partial() + def print_changes(): rows = tsvRows('report/changed-perf.tsv') if not rows: @@ -324,6 +351,9 @@ if args.report == 'main': print_test_times() def print_benchmark_results(): + if not os.path.isfile('benchmark/website-left.json'): + return + json_reports = [json.load(open(f'benchmark/website-{x}.json')) for x in ['left', 'right']] stats = [next(iter(x.values()))["statistics"] for x in json_reports] qps = [x["QPS"] for x in stats] @@ -417,6 +447,11 @@ if args.report == 'main': status = 'failure' message_array.append(str(slower_queries) + ' slower') + if unstable_partial_queries: + unstable_queries += unstable_partial_queries + error_tests += unstable_partial_queries + status = 'failure' + if unstable_queries: message_array.append(str(unstable_queries) + ' unstable') diff --git a/docker/test/pvs/Dockerfile b/docker/test/pvs/Dockerfile index 5a6aea5d320..8d9fb784a3e 100644 --- a/docker/test/pvs/Dockerfile +++ b/docker/test/pvs/Dockerfile @@ -25,7 +25,7 @@ ENV PKG_VERSION="pvs-studio-7.08.39365.50-amd64.deb" RUN wget "https://files.viva64.com/$PKG_VERSION" RUN sudo dpkg -i "$PKG_VERSION" -CMD cd /repo_folder && pvs-studio-analyzer credentials $LICENCE_NAME $LICENCE_KEY -o ./licence.lic \ +CMD echo "Running PVS version $PKG_VERSION" && cd /repo_folder && pvs-studio-analyzer credentials $LICENCE_NAME $LICENCE_KEY -o ./licence.lic \ && cmake . && ninja re2_st && \ pvs-studio-analyzer analyze -o pvs-studio.log -e contrib -j 4 -l ./licence.lic; \ plog-converter -a GA:1,2 -t fullhtml -o /test_output/pvs-studio-html-report pvs-studio.log; \ diff --git a/docker/test/stateful/Dockerfile b/docker/test/stateful/Dockerfile index 3aff49bf5a1..bdf397a70e0 100644 --- a/docker/test/stateful/Dockerfile +++ b/docker/test/stateful/Dockerfile @@ -53,4 +53,4 @@ CMD dpkg -i package_folder/clickhouse-common-static_*.deb; \ && clickhouse-client --query "RENAME TABLE datasets.hits_v1 TO test.hits" \ && clickhouse-client --query "RENAME TABLE datasets.visits_v1 TO test.visits" \ && clickhouse-client --query "SHOW TABLES FROM test" \ - && clickhouse-test --testname --shard --zookeeper --no-stateless $ADDITIONAL_OPTIONS $SKIP_TESTS_OPTION 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt + && clickhouse-test --testname --shard --zookeeper --no-stateless --use-skip-list $ADDITIONAL_OPTIONS $SKIP_TESTS_OPTION 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt diff --git a/docker/test/stateful_with_coverage/run.sh b/docker/test/stateful_with_coverage/run.sh index b946f5b187d..13b69c73b89 100755 --- a/docker/test/stateful_with_coverage/run.sh +++ b/docker/test/stateful_with_coverage/run.sh @@ -105,7 +105,7 @@ LLVM_PROFILE_FILE='client_%h_%p_%m.profraw' clickhouse-client --query "SHOW TABL LLVM_PROFILE_FILE='client_%h_%p_%m.profraw' clickhouse-client --query "RENAME TABLE datasets.hits_v1 TO test.hits" LLVM_PROFILE_FILE='client_%h_%p_%m.profraw' clickhouse-client --query "RENAME TABLE datasets.visits_v1 TO test.visits" LLVM_PROFILE_FILE='client_%h_%p_%m.profraw' clickhouse-client --query "SHOW TABLES FROM test" -LLVM_PROFILE_FILE='client_%h_%p_%m.profraw' clickhouse-test --testname --shard --zookeeper --no-stateless $ADDITIONAL_OPTIONS $SKIP_TESTS_OPTION 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt +LLVM_PROFILE_FILE='client_%h_%p_%m.profraw' clickhouse-test --testname --shard --zookeeper --no-stateless --use-skip-list $ADDITIONAL_OPTIONS $SKIP_TESTS_OPTION 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt kill_clickhouse diff --git a/docker/test/stateless/Dockerfile b/docker/test/stateless/Dockerfile index 41a53f8a3f5..eee493d4430 100644 --- a/docker/test/stateless/Dockerfile +++ b/docker/test/stateless/Dockerfile @@ -83,4 +83,4 @@ CMD dpkg -i package_folder/clickhouse-common-static_*.deb; \ if [[ -n "$USE_DATABASE_ATOMIC" ]] && [[ "$USE_DATABASE_ATOMIC" -eq 1 ]]; then ln -s /usr/share/clickhouse-test/config/database_atomic_usersd.xml /etc/clickhouse-server/users.d/; fi; \ ln -sf /usr/share/clickhouse-test/config/client_config.xml /etc/clickhouse-client/config.xml; \ service zookeeper start; sleep 5; \ - service clickhouse-server start && sleep 5 && clickhouse-test --testname --shard --zookeeper $ADDITIONAL_OPTIONS $SKIP_TESTS_OPTION 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt + service clickhouse-server start && sleep 5 && clickhouse-test --testname --shard --zookeeper --use-skip-list $ADDITIONAL_OPTIONS $SKIP_TESTS_OPTION 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt diff --git a/docker/test/stateless_with_coverage/run.sh b/docker/test/stateless_with_coverage/run.sh index 185dc95c783..37ad286e004 100755 --- a/docker/test/stateless_with_coverage/run.sh +++ b/docker/test/stateless_with_coverage/run.sh @@ -76,7 +76,7 @@ start_clickhouse sleep 10 -LLVM_PROFILE_FILE='client_coverage.profraw' clickhouse-test --testname --shard --zookeeper $ADDITIONAL_OPTIONS $SKIP_TESTS_OPTION 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt +LLVM_PROFILE_FILE='client_coverage.profraw' clickhouse-test --testname --shard --zookeeper --use-skip-list $ADDITIONAL_OPTIONS $SKIP_TESTS_OPTION 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt kill_clickhouse diff --git a/docker/test/stress/stress b/docker/test/stress/stress index 92dac8bc2d1..46fa3e95f55 100755 --- a/docker/test/stress/stress +++ b/docker/test/stress/stress @@ -17,13 +17,13 @@ def run_perf_test(cmd, xmls_path, output_folder): def run_func_test(cmd, output_prefix, num_processes, skip_tests_option): output_paths = [os.path.join(output_prefix, "stress_test_run_{}.txt".format(i)) for i in range(num_processes)] f = open(output_paths[0], 'w') - main_command = "{} {}".format(cmd, skip_tests_option) + main_command = "{} --use-skip-list {}".format(cmd, skip_tests_option) logging.info("Run func tests main cmd '%s'", main_command) pipes = [Popen(main_command, shell=True, stdout=f, stderr=f)] for output_path in output_paths[1:]: time.sleep(0.5) f = open(output_path, 'w') - full_command = "{} --order=random {}".format(cmd, skip_tests_option) + full_command = "{} --use-skip-list --order=random {}".format(cmd, skip_tests_option) logging.info("Run func tests '%s'", full_command) p = Popen(full_command, shell=True, stdout=f, stderr=f) pipes.append(p) diff --git a/docs/en/commercial/index.md b/docs/en/commercial/index.md index f9065c7cd50..a8358f48b97 100644 --- a/docs/en/commercial/index.md +++ b/docs/en/commercial/index.md @@ -1,7 +1,18 @@ --- toc_folder_title: Commercial toc_priority: 70 -toc_title: Commercial +toc_title: Introduction --- +# ClickHouse Commercial Services +This section is a directory of commercial service providers specializing in ClickHouse. They are independent companies not necessarily affiliated with Yandex. + +Service categories: + +- [Cloud](cloud.md) +- [Support](support.md) + + +!!! note "For service providers" + If you happen to represent one of them, feel free to open a pull request adding your company to the respective section (or even adding a new section if the service doesn't fit into existing categories). The easiest way to open a pull-request for documentation page is by using a “pencil” edit button in the top-right corner. If your service available in some local market, make sure to mention it in a localized documentation page as well (or at least point it out in a pull-request description). diff --git a/docs/en/development/architecture.md b/docs/en/development/architecture.md index 4c98ec12541..c2f7a86fc9f 100644 --- a/docs/en/development/architecture.md +++ b/docs/en/development/architecture.md @@ -120,7 +120,7 @@ There are ordinary functions and aggregate functions. For aggregate functions, s Ordinary functions don’t change the number of rows – they work as if they are processing each row independently. In fact, functions are not called for individual rows, but for `Block`’s of data to implement vectorized query execution. -There are some miscellaneous functions, like [blockSize](../sql-reference/functions/other-functions.md#function-blocksize), [rowNumberInBlock](../sql-reference/functions/other-functions.md#function-rownumberinblock), and [runningAccumulate](../sql-reference/functions/other-functions.md#function-runningaccumulate), that exploit block processing and violate the independence of rows. +There are some miscellaneous functions, like [blockSize](../sql-reference/functions/other-functions.md#function-blocksize), [rowNumberInBlock](../sql-reference/functions/other-functions.md#function-rownumberinblock), and [runningAccumulate](../sql-reference/functions/other-functions.md#runningaccumulatexploit block processing and violate the independence of rows. ClickHouse has strong typing, so there’s no implicit type conversion. If a function doesn't support a specific combination of types, it throws an exception. But functions can work (be overloaded) for many different combinations of types. For example, the `plus` function (to implement the `+` operator) works for any combination of numeric types: `UInt8` + `Float32`, `UInt16` + `Int8`, and so on. Also, some variadic functions can accept any number of arguments, such as the `concat` function. diff --git a/docs/en/engines/table-engines/integrations/hdfs.md b/docs/en/engines/table-engines/integrations/hdfs.md index 847b6fa90ee..565d67ce010 100644 --- a/docs/en/engines/table-engines/integrations/hdfs.md +++ b/docs/en/engines/table-engines/integrations/hdfs.md @@ -1,5 +1,5 @@ --- -toc_priority: 36 +toc_priority: 4 toc_title: HDFS --- diff --git a/docs/en/engines/table-engines/integrations/index.md b/docs/en/engines/table-engines/integrations/index.md index dd005919ad1..cfe94c8d305 100644 --- a/docs/en/engines/table-engines/integrations/index.md +++ b/docs/en/engines/table-engines/integrations/index.md @@ -3,4 +3,14 @@ toc_folder_title: Integrations toc_priority: 30 --- +# Table Engines for Integrations +ClickHouse provides various means for integrating with external systems, including table engines. Like with all other table engines, the configuration is done using `CREATE TABLE` or `ALTER TABLE` queries. Then from a user perspective, the configured integration looks like a normal table, but queries to it are proxied to the external system. This transparent querying is one of the key advantages of this approach over alternative integration methods, like external dictionaries or table functions, which require to use custom query methods on each use. + +List of supported integrations: + +- [ODBC](odbc.md) +- [JDBC](jdbc.md) +- [MySQL](mysql.md) +- [HDFS](hdfs.md) +- [Kafka](kafka.md) diff --git a/docs/en/engines/table-engines/integrations/jdbc.md b/docs/en/engines/table-engines/integrations/jdbc.md index 08ddc19520a..2144be9f1e3 100644 --- a/docs/en/engines/table-engines/integrations/jdbc.md +++ b/docs/en/engines/table-engines/integrations/jdbc.md @@ -1,5 +1,5 @@ --- -toc_priority: 34 +toc_priority: 2 toc_title: JDBC --- diff --git a/docs/en/engines/table-engines/integrations/kafka.md b/docs/en/engines/table-engines/integrations/kafka.md index cff9ab3a0c4..3324386e1c5 100644 --- a/docs/en/engines/table-engines/integrations/kafka.md +++ b/docs/en/engines/table-engines/integrations/kafka.md @@ -1,5 +1,5 @@ --- -toc_priority: 32 +toc_priority: 5 toc_title: Kafka --- diff --git a/docs/en/engines/table-engines/integrations/mysql.md b/docs/en/engines/table-engines/integrations/mysql.md index c98d492322f..805cb4817a5 100644 --- a/docs/en/engines/table-engines/integrations/mysql.md +++ b/docs/en/engines/table-engines/integrations/mysql.md @@ -1,9 +1,9 @@ --- -toc_priority: 33 +toc_priority: 3 toc_title: MySQL --- -# Mysql {#mysql} +# MySQL {#mysql} The MySQL engine allows you to perform `SELECT` queries on data that is stored on a remote MySQL server. diff --git a/docs/en/engines/table-engines/integrations/odbc.md b/docs/en/engines/table-engines/integrations/odbc.md index 04387760fb4..b2924df4831 100644 --- a/docs/en/engines/table-engines/integrations/odbc.md +++ b/docs/en/engines/table-engines/integrations/odbc.md @@ -1,5 +1,5 @@ --- -toc_priority: 35 +toc_priority: 1 toc_title: ODBC --- diff --git a/docs/en/engines/table-engines/special/distributed.md b/docs/en/engines/table-engines/special/distributed.md index 3446f820a71..12e60512e47 100644 --- a/docs/en/engines/table-engines/special/distributed.md +++ b/docs/en/engines/table-engines/special/distributed.md @@ -50,6 +50,8 @@ Clusters are set like this: false + + 1 example01-01-1 9000 diff --git a/docs/en/engines/table-engines/special/index.md b/docs/en/engines/table-engines/special/index.md index b1789f34347..2c796a109da 100644 --- a/docs/en/engines/table-engines/special/index.md +++ b/docs/en/engines/table-engines/special/index.md @@ -3,4 +3,12 @@ toc_folder_title: Special toc_priority: 31 --- +# Special Table Engines +There are three main categories of table engines: + +- [MergeTree engine family](../../../engines/table-engines/mergetree-family/index.md) for main production use. +- [Log engine family](../../../engines/table-engines/log-family/index.md) for small temporary data. +- [Table engines for integrations](../../../engines/table-engines/integrations/index.md). + +The remaining engines are unique in their purpose and are not grouped into families yet, thus they are placed in this “special” category. diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md index 3be3490cffe..ae96cb6dda4 100644 --- a/docs/en/interfaces/formats.md +++ b/docs/en/interfaces/formats.md @@ -1055,11 +1055,11 @@ Each Avro message embeds a schema id that can be resolved to the actual schema w Schemas are cached once resolved. -Schema Registry URL is configured with [format\_avro\_schema\_registry\_url](../operations/settings/settings.md#settings-format_avro_schema_registry_url) +Schema Registry URL is configured with [format\_avro\_schema\_registry\_url](../operations/settings/settings.md#format_avro_schema_registry_url). ### Data Types Matching {#data_types-matching-1} -Same as [Avro](#data-format-avro) +Same as [Avro](#data-format-avro). ### Usage {#usage} @@ -1093,7 +1093,7 @@ SELECT * FROM topic1_stream; ``` !!! note "Warning" - Setting `format_avro_schema_registry_url` needs to be configured in `users.xml` to maintain it’s value after a restart. + Setting `format_avro_schema_registry_url` needs to be configured in `users.xml` to maintain it’s value after a restart. Also you can use the `format_avro_schema_registry_url` setting of the `Kafka` table engine. ## Parquet {#data-format-parquet} diff --git a/docs/en/interfaces/third-party/index.md b/docs/en/interfaces/third-party/index.md index 84291a199eb..9d7d9106e63 100644 --- a/docs/en/interfaces/third-party/index.md +++ b/docs/en/interfaces/third-party/index.md @@ -3,4 +3,15 @@ toc_folder_title: Third-Party toc_priority: 24 --- +# Third-Party Interfaces +This is a collection of links to third-party tools that provide some sort of interface to ClickHouse. It can be either visual interface, command-line interface or an API: + +- [Client libraries](client-libraries.md) +- [Integrations](integrations.md) +- [GUI](gui.md) +- [Proxies](proxy.md) + + +!!! note "Note" + Generic tools that support common API like [ODBC](../../interfaces/odbc.md) or [JDBC](../../interfaces/jdbc.md) usually can work with ClickHouse as well, but are not listed here because there are way too many of them. diff --git a/docs/en/introduction/adopters.md b/docs/en/introduction/adopters.md index 3ebadd6d002..01669e012d6 100644 --- a/docs/en/introduction/adopters.md +++ b/docs/en/introduction/adopters.md @@ -50,7 +50,8 @@ toc_title: Adopters | Pragma Innovation | Telemetry and Big Data Analysis | Main product | — | — | [Slides in English, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup18/4_pragma_innovation.pdf) | | QINGCLOUD | Cloud services | Main product | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/4.%20Cloud%20%2B%20TSDB%20for%20ClickHouse%20张健%20QingCloud.pdf) | | Qrator | DDoS protection | Main product | — | — | [Blog Post, March 2019](https://blog.qrator.net/en/clickhouse-ddos-mitigation_37/) | -| Percent 百分点 | Analytics | Main Product | — | — | [Slides in Chinese, June 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup24/4.%20ClickHouse万亿数据双中心的设计与实践%20.pdf) | +| Percent 百分点 | Analytics | Main Product | — | — | [Slides in Chinese, June 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup24/4.%20ClickHouse万亿数据双中心的设计与实践%20.pdf) | +| Plausible | Analytics | Main Product | — | — | [Blog post, June 2020](https://twitter.com/PlausibleHQ/status/1273889629087969280) | | Rambler | Internet services | Analytics | — | — | [Talk in Russian, April 2018](https://medium.com/@ramblertop/разработка-api-clickhouse-для-рамблер-топ-100-f4c7e56f3141) | | Tencent | Messaging | Logging | — | — | [Talk in Chinese, November 2019](https://youtu.be/T-iVQRuw-QY?t=5050) | | Traffic Stars | AD network | — | — | — | [Slides in Russian, May 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup15/lightning/ninja.pdf) | diff --git a/docs/en/operations/access-rights.md b/docs/en/operations/access-rights.md index 78db369e8e8..f7e1b98d6e3 100644 --- a/docs/en/operations/access-rights.md +++ b/docs/en/operations/access-rights.md @@ -34,7 +34,7 @@ By default, the ClickHouse server provides the `default` user account which is n If you just started using ClickHouse, consider the following scenario: 1. [Enable](#enabling-access-control) SQL-driven access control and account management for the `default` user. -2. Log in to the `default` user account and create all the required users. Don’t forget to create an administrator account (`GRANT ALL ON *.* WITH GRANT OPTION TO admin_user_account`). +2. Log in to the `default` user account and create all the required users. Don’t forget to create an administrator account (`GRANT ALL ON *.* TO admin_user_account WITH GRANT OPTION`). 3. [Restrict permissions](../operations/settings/permissions-for-queries.md#permissions_for_queries) for the `default` user and disable SQL-driven access control and account management for it. ### Properties of Current Solution {#access-control-properties} diff --git a/docs/en/operations/optimizing-performance/index.md b/docs/en/operations/optimizing-performance/index.md index 6e6cef109c1..ca7b96268d8 100644 --- a/docs/en/operations/optimizing-performance/index.md +++ b/docs/en/operations/optimizing-performance/index.md @@ -1,6 +1,9 @@ --- toc_folder_title: Optimizing Performance toc_priority: 52 +toc_hidden: true --- +# Optimizing Performance +- [Sampling query profiler](sampling-query-profiler.md) diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md index f90b418b4a9..f1ffc011776 100644 --- a/docs/en/operations/server-configuration-parameters/settings.md +++ b/docs/en/operations/server-configuration-parameters/settings.md @@ -398,6 +398,27 @@ The cache is shared for the server and memory is allocated as needed. The cache 5368709120 ``` + +## max_server_memory_usage {#max_server_memory_usage} + +Limits total RAM usage by the ClickHouse server. You can specify it only for the default profile. + +Possible values: + +- Positive integer. +- 0 — Unlimited. + +Default value: `0`. + +**Additional Info** + +On hosts with low RAM and swap, you possibly need setting `max_server_memory_usage_to_ram_ratio > 1`. + +**See also** + +- [max_memory_usage](../settings/query-complexity.md#settings_max_memory_usage) + + ## max\_concurrent\_queries {#max-concurrent-queries} The maximum number of simultaneously processed requests. diff --git a/docs/en/operations/settings/query-complexity.md b/docs/en/operations/settings/query-complexity.md index 812056785da..42ac9573b91 100644 --- a/docs/en/operations/settings/query-complexity.md +++ b/docs/en/operations/settings/query-complexity.md @@ -36,7 +36,7 @@ Memory usage is not monitored for the states of certain aggregate functions. Memory usage is not fully tracked for states of the aggregate functions `min`, `max`, `any`, `anyLast`, `argMin`, `argMax` from `String` and `Array` arguments. -Memory consumption is also restricted by the parameters `max_memory_usage_for_user` and `max_memory_usage_for_all_queries`. +Memory consumption is also restricted by the parameters `max_memory_usage_for_user` and [max_server_memory_usage](../server-configuration-parameters/settings.md#max_server_memory_usage). ## max\_memory\_usage\_for\_user {#max-memory-usage-for-user} @@ -46,18 +46,10 @@ Default values are defined in [Settings.h](https://github.com/ClickHouse/ClickHo See also the description of [max\_memory\_usage](#settings_max_memory_usage). -## max\_memory\_usage\_for\_all\_queries {#max-memory-usage-for-all-queries} - -The maximum amount of RAM to use for running all queries on a single server. - -Default values are defined in [Settings.h](https://github.com/ClickHouse/ClickHouse/blob/master/src/Core/Settings.h#L289). By default, the amount is not restricted (`max_memory_usage_for_all_queries = 0`). - -See also the description of [max\_memory\_usage](#settings_max_memory_usage). ## max\_rows\_to\_read {#max-rows-to-read} The following restrictions can be checked on each block (instead of on each row). That is, the restrictions can be broken a little. -When running a query in multiple threads, the following restrictions apply to each thread separately. A maximum number of rows that can be read from a table when running a query. diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index bbb878995d6..7d1f9a72a21 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -727,6 +727,17 @@ The INSERT query also contains data for INSERT that is processed by a separate s Default value: 256 KiB. +## max\_parser\_depth {#max_parser_depth} + +Limits maximum recursion depth in the recursive descent parser. Allows to control stack size. + +Possible values: + +- Positive integer. +- 0 — Recursion depth is unlimited. + +Default value: 1000. + ## interactive\_delay {#interactive-delay} The interval in microseconds for checking whether request execution has been cancelled and sending the progress. @@ -1368,13 +1379,11 @@ Possible values: 32 (32 bytes) - 1073741824 (1 GiB) Default value: 32768 (32 KiB) -## format\_avro\_schema\_registry\_url {#settings-format_avro_schema_registry_url} +## format\_avro\_schema\_registry\_url {#format_avro_schema_registry_url} -Sets Confluent Schema Registry URL to use with [AvroConfluent](../../interfaces/formats.md#data-format-avro-confluent) format +Sets [Confluent Schema Registry](https://docs.confluent.io/current/schema-registry/index.html) URL to use with [AvroConfluent](../../interfaces/formats.md#data-format-avro-confluent) format. -Type: URL - -Default value: Empty +Default value: `Empty`. ## background\_pool\_size {#background_pool_size} @@ -1418,6 +1427,23 @@ Possible values: Default value: 16. +## always_fetch_merged_part {#always_fetch_merged_part} + +Prohibits data parts merging in [Replicated*MergeTree](../../engines/table-engines/mergetree-family/replication.md)-engine tables. + +When merging is prohibited, the replica never merges parts and always downloads merged parts from other replicas. If there is no required data yet, the replica waits for it. CPU and disk load on the replica server decreases, but the network load on cluster increases. This setting can be useful on servers with relatively weak CPUs or slow disks, such as servers for backups storage. + +Possible values: + +- 0 — `Replicated*MergeTree`-engine tables merge data parts at the replica. +- 1 — `Replicated*MergeTree`-engine tables don't merge data parts at the replica. The tables download merged data parts from other replicas. + +Default value: 0. + +**See Also** + +- [Data Replication](../../engines/table-engines/mergetree-family/replication.md) + ## background\_distributed\_schedule\_pool\_size {#background_distributed_schedule_pool_size} Sets the number of threads performing background tasks for [distributed](../../engines/table-engines/special/distributed.md) sends. This setting is applied at ClickHouse server start and can’t be changed in a user session. diff --git a/docs/en/operations/tips.md b/docs/en/operations/tips.md index b18daedf3d6..c42108ee40e 100644 --- a/docs/en/operations/tips.md +++ b/docs/en/operations/tips.md @@ -120,6 +120,7 @@ zoo.cfg: tickTime=2000 # The number of ticks that the initial # synchronization phase can take +# This value is not quite motivated initLimit=30000 # The number of ticks that can pass between # sending a request and getting an acknowledgement @@ -127,6 +128,9 @@ syncLimit=10 maxClientCnxns=2000 +# It is the maximum value that client may request and the server will accept. +# It is Ok to have high maxSessionTimeout on server to allow clients to work with high session timeout if they want. +# But we request session timeout of 30 seconds by default (you can change it with session_timeout_ms in ClickHouse config). maxSessionTimeout=60000000 # the directory where the snapshot is stored. dataDir=/opt/zookeeper/{{ '{{' }} cluster['name'] {{ '}}' }}/data diff --git a/docs/en/sql-reference/aggregate-functions/combinators.md b/docs/en/sql-reference/aggregate-functions/combinators.md index 6d70637236b..554969b80a3 100644 --- a/docs/en/sql-reference/aggregate-functions/combinators.md +++ b/docs/en/sql-reference/aggregate-functions/combinators.md @@ -33,7 +33,7 @@ To work with these states, use: - [AggregatingMergeTree](../../engines/table-engines/mergetree-family/aggregatingmergetree.md) table engine. - [finalizeAggregation](../../sql-reference/functions/other-functions.md#function-finalizeaggregation) function. -- [runningAccumulate](../../sql-reference/functions/other-functions.md#function-runningaccumulate) function. +- [runningAccumulate](../../sql-reference/functions/other-functions.md#runningaccumulate) function. - [-Merge](#aggregate_functions_combinators-merge) combinator. - [-MergeState](#aggregate_functions_combinators-mergestate) combinator. diff --git a/docs/en/sql-reference/data-types/domains/index.md b/docs/en/sql-reference/data-types/domains/index.md index ddcb5b21d82..30aca1eb059 100644 --- a/docs/en/sql-reference/data-types/domains/index.md +++ b/docs/en/sql-reference/data-types/domains/index.md @@ -1,6 +1,31 @@ --- -toc_folder_title: Domains toc_priority: 56 +toc_folder_title: Domains +toc_title: Overview --- +# Domains {#domains} +Domains are special-purpose types that add some extra features atop of existing base type, but leaving on-wire and on-disc format of the underlying data type intact. At the moment, ClickHouse does not support user-defined domains. + +You can use domains anywhere corresponding base type can be used, for example: + +- Create a column of a domain type +- Read/write values from/to domain column +- Use it as an index if a base type can be used as an index +- Call functions with values of domain column + +### Extra Features of Domains {#extra-features-of-domains} + +- Explicit column type name in `SHOW CREATE TABLE` or `DESCRIBE TABLE` +- Input from human-friendly format with `INSERT INTO domain_table(domain_column) VALUES(...)` +- Output to human-friendly format for `SELECT domain_column FROM domain_table` +- Loading data from an external source in the human-friendly format: `INSERT INTO domain_table FORMAT CSV ...` + +### Limitations {#limitations} + +- Can’t convert index column of base type to domain type via `ALTER TABLE`. +- Can’t implicitly convert string values into domain values when inserting data from another column or table. +- Domain adds no constrains on stored values. + +[Original article](https://clickhouse.tech/docs/en/data_types/domains/) diff --git a/docs/en/sql-reference/data-types/domains/overview.md b/docs/en/sql-reference/data-types/domains/overview.md deleted file mode 100644 index aea7307d048..00000000000 --- a/docs/en/sql-reference/data-types/domains/overview.md +++ /dev/null @@ -1,30 +0,0 @@ ---- -toc_priority: 58 -toc_title: Overview ---- - -# Domains {#domains} - -Domains are special-purpose types that add some extra features atop of existing base type, but leaving on-wire and on-disc format of the underlying data type intact. At the moment, ClickHouse does not support user-defined domains. - -You can use domains anywhere corresponding base type can be used, for example: - -- Create a column of a domain type -- Read/write values from/to domain column -- Use it as an index if a base type can be used as an index -- Call functions with values of domain column - -### Extra Features of Domains {#extra-features-of-domains} - -- Explicit column type name in `SHOW CREATE TABLE` or `DESCRIBE TABLE` -- Input from human-friendly format with `INSERT INTO domain_table(domain_column) VALUES(...)` -- Output to human-friendly format for `SELECT domain_column FROM domain_table` -- Loading data from an external source in the human-friendly format: `INSERT INTO domain_table FORMAT CSV ...` - -### Limitations {#limitations} - -- Can’t convert index column of base type to domain type via `ALTER TABLE`. -- Can’t implicitly convert string values into domain values when inserting data from another column or table. -- Domain adds no constrains on stored values. - -[Original article](https://clickhouse.tech/docs/en/data_types/domains/overview) diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md index 3cbc7c73543..4eb316e0455 100644 --- a/docs/en/sql-reference/functions/date-time-functions.md +++ b/docs/en/sql-reference/functions/date-time-functions.md @@ -149,6 +149,63 @@ Rounds down a date with time to the start of the hour. Rounds down a date with time to the start of the minute. +## toStartOfSecond {#tostartofsecond} + +Truncates sub-seconds. + +**Syntax** + +``` sql +toStartOfSecond(value[, timezone]) +``` + +**Parameters** + +- `value` — Date and time. [DateTime64](../data-types/datetime64.md). +- `timezone` — [Timezone](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). If not specified, the function uses the timezone of the `value` parameter. [String](../data-types/string.md). + +**Returned value** + +- Input value without sub-seconds. + +Type: [DateTime64](../data-types/datetime64.md). + +**Examples** + +Query without timezone: + +``` sql +WITH toDateTime64('2020-01-01 10:20:30.999', 3) AS dt64 +SELECT toStartOfSecond(dt64); +``` + +Result: + +``` text +┌───toStartOfSecond(dt64)─┐ +│ 2020-01-01 10:20:30.000 │ +└─────────────────────────┘ +``` + +Query with timezone: + +``` sql +WITH toDateTime64('2020-01-01 10:20:30.999', 3) AS dt64 +SELECT toStartOfSecond(dt64, 'Europe/Moscow'); +``` + +Result: + +``` text +┌─toStartOfSecond(dt64, 'Europe/Moscow')─┐ +│ 2020-01-01 13:20:30.000 │ +└────────────────────────────────────────┘ +``` + +**See also** + +- [Timezone](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) server configuration parameter. + ## toStartOfFiveMinute {#tostartoffiveminute} Rounds down a date with time to the start of the five-minute interval. diff --git a/docs/en/sql-reference/functions/geo.md b/docs/en/sql-reference/functions/geo.md index 65925f8a64b..0e8deb7a2c7 100644 --- a/docs/en/sql-reference/functions/geo.md +++ b/docs/en/sql-reference/functions/geo.md @@ -267,7 +267,7 @@ SELECT geohashesInBox(24.48, 40.56, 24.785, 40.81, 4) AS thasos ## h3GetBaseCell {#h3getbasecell} -Returns the base cell number of the index. +Returns the base cell number of the H3 index. **Syntax** @@ -275,20 +275,22 @@ Returns the base cell number of the index. h3GetBaseCell(index) ``` -**Parameters** +**Parameter** - `index` — Hexagon index number. Type: [UInt64](../../sql-reference/data-types/int-uint.md). -**Returned values** +**Returned value** -- Hexagon base cell number. Type: [UInt8](../../sql-reference/data-types/int-uint.md). +- Hexagon base cell number. + +Type: [UInt8](../../sql-reference/data-types/int-uint.md). **Example** Query: ``` sql -SELECT h3GetBaseCell(612916788725809151) as basecell +SELECT h3GetBaseCell(612916788725809151) as basecell; ``` Result: @@ -301,7 +303,7 @@ Result: ## h3HexAreaM2 {#h3hexaream2} -Average hexagon area in square meters at the given resolution. +Returns average hexagon area in square meters at the given resolution. **Syntax** @@ -309,20 +311,22 @@ Average hexagon area in square meters at the given resolution. h3HexAreaM2(resolution) ``` -**Parameters** +**Parameter** - `resolution` — Index resolution. Range: `[0, 15]`. Type: [UInt8](../../sql-reference/data-types/int-uint.md). -**Returned values** +**Returned value** -- Area in m². Type: [Float64](../../sql-reference/data-types/float.md). +- Area in square meters. + +Type: [Float64](../../sql-reference/data-types/float.md). **Example** Query: ``` sql -SELECT h3HexAreaM2(13) as area +SELECT h3HexAreaM2(13) as area; ``` Result: @@ -335,7 +339,7 @@ Result: ## h3IndexesAreNeighbors {#h3indexesareneighbors} -Returns whether or not the provided H3Indexes are neighbors. +Returns whether or not the provided H3 indexes are neighbors. **Syntax** @@ -348,16 +352,19 @@ h3IndexesAreNeighbors(index1, index2) - `index1` — Hexagon index number. Type: [UInt64](../../sql-reference/data-types/int-uint.md). - `index2` — Hexagon index number. Type: [UInt64](../../sql-reference/data-types/int-uint.md). -**Returned values** +**Returned value** -- Returns `1` if the indexes are neighbors, `0` otherwise. Type: [UInt8](../../sql-reference/data-types/int-uint.md). +- `1` — Indexes are neighbours. +- `0` — Indexes are not neighbours. + +Type: [UInt8](../../sql-reference/data-types/int-uint.md). **Example** Query: ``` sql -SELECT h3IndexesAreNeighbors(617420388351344639, 617420388352655359) AS n +SELECT h3IndexesAreNeighbors(617420388351344639, 617420388352655359) AS n; ``` Result: @@ -370,7 +377,7 @@ Result: ## h3ToChildren {#h3tochildren} -Returns an array with the child indexes of the given index. +Returns an array of child indexes for the given H3 index. **Syntax** @@ -385,14 +392,16 @@ h3ToChildren(index, resolution) **Returned values** -- Array with the child H3 indexes. Array of type: [UInt64](../../sql-reference/data-types/int-uint.md). +- Array of the child H3-indexes. + +Type: [Array](../../sql-reference/data-types/array.md)([UInt64](../../sql-reference/data-types/int-uint.md)). **Example** Query: ``` sql -SELECT h3ToChildren(599405990164561919, 6) AS children +SELECT h3ToChildren(599405990164561919, 6) AS children; ``` Result: @@ -405,7 +414,7 @@ Result: ## h3ToParent {#h3toparent} -Returns the parent (coarser) index containing the given index. +Returns the parent (coarser) index containing the given H3 index. **Syntax** @@ -418,16 +427,18 @@ h3ToParent(index, resolution) - `index` — Hexagon index number. Type: [UInt64](../../sql-reference/data-types/int-uint.md). - `resolution` — Index resolution. Range: `[0, 15]`. Type: [UInt8](../../sql-reference/data-types/int-uint.md). -**Returned values** +**Returned value** -- Parent H3 index. Type: [UInt64](../../sql-reference/data-types/int-uint.md). +- Parent H3 index. + +Type: [UInt64](../../sql-reference/data-types/int-uint.md). **Example** Query: ``` sql -SELECT h3ToParent(599405990164561919, 3) as parent +SELECT h3ToParent(599405990164561919, 3) as parent; ``` Result: @@ -440,26 +451,28 @@ Result: ## h3ToString {#h3tostring} -Converts the H3Index representation of the index to the string representation. +Converts the `H3Index` representation of the index to the string representation. ``` sql h3ToString(index) ``` -**Parameters** +**Parameter** - `index` — Hexagon index number. Type: [UInt64](../../sql-reference/data-types/int-uint.md). -**Returned values** +**Returned value** -- String representation of the H3 index. Type: [String](../../sql-reference/data-types/string.md). +- String representation of the H3 index. + +Type: [String](../../sql-reference/data-types/string.md). **Example** Query: ``` sql -SELECT h3ToString(617420388352917503) as h3_string +SELECT h3ToString(617420388352917503) as h3_string; ``` Result: @@ -472,17 +485,19 @@ Result: ## stringToH3 {#stringtoh3} -Converts the string representation to H3Index (UInt64) representation. +Converts the string representation to the `H3Index` (UInt64) representation. + +**Syntax** ``` sql stringToH3(index_str) ``` -**Parameters** +**Parameter** - `index_str` — String representation of the H3 index. Type: [String](../../sql-reference/data-types/string.md). -**Returned values** +**Returned value** - Hexagon index number. Returns 0 on error. Type: [UInt64](../../sql-reference/data-types/int-uint.md). @@ -491,7 +506,7 @@ stringToH3(index_str) Query: ``` sql -SELECT stringToH3('89184926cc3ffff') as index +SELECT stringToH3('89184926cc3ffff') as index; ``` Result: @@ -504,7 +519,7 @@ Result: ## h3GetResolution {#h3getresolution} -Returns the resolution of the index. +Returns the resolution of the H3 index. **Syntax** @@ -512,11 +527,11 @@ Returns the resolution of the index. h3GetResolution(index) ``` -**Parameters** +**Parameter** - `index` — Hexagon index number. Type: [UInt64](../../sql-reference/data-types/int-uint.md). -**Returned values** +**Returned value** - Index resolution. Range: `[0, 15]`. Type: [UInt8](../../sql-reference/data-types/int-uint.md). @@ -525,7 +540,7 @@ h3GetResolution(index) Query: ``` sql -SELECT h3GetResolution(617420388352917503) as res +SELECT h3GetResolution(617420388352917503) as res; ``` Result: @@ -536,4 +551,4 @@ Result: └─────┘ ``` -[Original article](https://clickhouse.tech/docs/en/query_language/functions/geo/) +[Original article](https://clickhouse.tech/docs/en/sql-reference/functions/geo/) diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md index e979aff2ee7..55ddc5c029c 100644 --- a/docs/en/sql-reference/functions/other-functions.md +++ b/docs/en/sql-reference/functions/other-functions.md @@ -1054,11 +1054,110 @@ Result: Takes state of aggregate function. Returns result of aggregation (finalized state). -## runningAccumulate {#function-runningaccumulate} +## runningAccumulate {#runningaccumulate} -Takes the states of the aggregate function and returns a column with values, are the result of the accumulation of these states for a set of block lines, from the first to the current line. -For example, takes state of aggregate function (example runningAccumulate(uniqState(UserID))), and for each row of block, return result of aggregate function on merge of states of all previous rows and current row. -So, result of function depends on partition of data to blocks and on order of data in block. +Accumulates states of an aggregate function for each row of a data block. + +!!! warning "Warning" + The state is reset for each new data block. + +**Syntax** + +```sql +runningAccumulate(agg_state[, grouping]); +``` + +**Parameters** + +- `agg_state` — State of the aggregate function. [AggregateFunction](../../sql-reference/data-types/aggregatefunction.md#data-type-aggregatefunction). +- `grouping` — Grouping key. Optional. The state of the function is reset if the `grouping` value is changed. It can be any of the [supported data types](../../sql-reference/data-types/index.md) for which the equality operator is defined. + +**Returned value** + +- Each resulting row contains a result of the aggregate function, accumulated for all the input rows from 0 to the current position. `runningAccumulate` resets states for each new data block or when the `grouping` value changes. + +Type depends on the aggregate function used. + +**Examples** + +Consider how you can use `runningAccumulate` to find the cumulative sum of numbers without and with grouping. + +Query: + +```sql +SELECT k, runningAccumulate(sum_k) AS res FROM (SELECT number as k, sumState(k) AS sum_k FROM numbers(10) GROUP BY k ORDER BY k); +``` + +Result: + +```text +┌─k─┬─res─┐ +│ 0 │ 0 │ +│ 1 │ 1 │ +│ 2 │ 3 │ +│ 3 │ 6 │ +│ 4 │ 10 │ +│ 5 │ 15 │ +│ 6 │ 21 │ +│ 7 │ 28 │ +│ 8 │ 36 │ +│ 9 │ 45 │ +└───┴─────┘ +``` + +The subquery generates `sumState` for every number from `0` to `9`. `sumState` returns the state of the [sum](../aggregate-functions/reference/sum.md) function that contains the sum of a single number. + +The whole query does the following: + +1. For the first row, `runningAccumulate` takes `sumState(0)` and returns `0`. +2. For the second row, the function merges `sumState(0)` and `sumState(1)` resulting in `sumState(0 + 1)`, and returns `1` as a result. +3. For the third row, the function merges `sumState(0 + 1)` and `sumState(2)` resulting in `sumState(0 + 1 + 2)`, and returns `3` as a result. +4. The actions are repeated until the block ends. + +The following example shows the `groupping` parameter usage: + +Query: + +```sql +SELECT + grouping, + item, + runningAccumulate(state, grouping) AS res +FROM +( + SELECT + toInt8(number / 4) AS grouping, + number AS item, + sumState(number) AS state + FROM numbers(15) + GROUP BY item + ORDER BY item ASC +); +``` + +Result: + +```text +┌─grouping─┬─item─┬─res─┐ +│ 0 │ 0 │ 0 │ +│ 0 │ 1 │ 1 │ +│ 0 │ 2 │ 3 │ +│ 0 │ 3 │ 6 │ +│ 1 │ 4 │ 4 │ +│ 1 │ 5 │ 9 │ +│ 1 │ 6 │ 15 │ +│ 1 │ 7 │ 22 │ +│ 2 │ 8 │ 8 │ +│ 2 │ 9 │ 17 │ +│ 2 │ 10 │ 27 │ +│ 2 │ 11 │ 38 │ +│ 3 │ 12 │ 12 │ +│ 3 │ 13 │ 25 │ +│ 3 │ 14 │ 39 │ +└──────────┴──────┴─────┘ +``` + +As you can see, `runningAccumulate` merges states for each group of rows separately. ## joinGet {#joinget} diff --git a/docs/en/sql-reference/functions/type-conversion-functions.md b/docs/en/sql-reference/functions/type-conversion-functions.md index 254dceef29b..929c861281b 100644 --- a/docs/en/sql-reference/functions/type-conversion-functions.md +++ b/docs/en/sql-reference/functions/type-conversion-functions.md @@ -521,6 +521,80 @@ Result: - [toDate](#todate) - [toDateTime](#todatetime) +## parseDateTimeBestEffortUS {#parsedatetimebesteffortUS} + +This function is similar to ['parseDateTimeBestEffort'](#parsedatetimebesteffort), the only difference is that this function prefers US style (`MM/DD/YYYY` etc) in case of ambiguouty. + +**Syntax** + +``` sql +parseDateTimeBestEffortUS(time_string [, time_zone]); +``` + +**Parameters** + +- `time_string` — String containing a date and time to convert. [String](../../sql-reference/data-types/string.md). +- `time_zone` — Time zone. The function parses `time_string` according to the time zone. [String](../../sql-reference/data-types/string.md). + +**Supported non-standard formats** + +- A string containing 9..10 digit [unix timestamp](https://en.wikipedia.org/wiki/Unix_time). +- A string with a date and a time component: `YYYYMMDDhhmmss`, `MM/DD/YYYY hh:mm:ss`, `MM-DD-YY hh:mm`, `YYYY-MM-DD hh:mm:ss`, etc. +- A string with a date, but no time component: `YYYY`, `YYYYMM`, `YYYY*MM`, `MM/DD/YYYY`, `MM-DD-YY` etc. +- A string with a day and time: `DD`, `DD hh`, `DD hh:mm`. In this case `YYYY-MM` are substituted as `2000-01`. +- A string that includes the date and time along with time zone offset information: `YYYY-MM-DD hh:mm:ss ±h:mm`, etc. For example, `2020-12-12 17:36:00 -5:00`. + +**Returned value** + +- `time_string` converted to the `DateTime` data type. + +**Examples** + +Query: + +``` sql +SELECT parseDateTimeBestEffortUS('09/12/2020 12:12:57') +AS parseDateTimeBestEffortUS; +``` + +Result: + +``` text +┌─parseDateTimeBestEffortUS─┐ +│ 2020-09-12 12:12:57 │ +└─────────────────────────——┘ +``` + +Query: + +``` sql +SELECT parseDateTimeBestEffortUS('09-12-2020 12:12:57') +AS parseDateTimeBestEffortUS; +``` + +Result: + +``` text +┌─parseDateTimeBestEffortUS─┐ +│ 2020-09-12 12:12:57 │ +└─────────────────────────——┘ +``` + +Query: + +``` sql +SELECT parseDateTimeBestEffortUS('09.12.2020 12:12:57') +AS parseDateTimeBestEffortUS; +``` + +Result: + +``` text +┌─parseDateTimeBestEffortUS─┐ +│ 2020-09-12 12:12:57 │ +└─────────────────────────——┘ +``` + ## parseDateTimeBestEffortOrNull {#parsedatetimebesteffortornull} Same as for [parseDateTimeBestEffort](#parsedatetimebesteffort) except that it returns null when it encounters a date format that cannot be processed. diff --git a/docs/en/sql-reference/statements/index.md b/docs/en/sql-reference/statements/index.md index 034c541a1c4..3a2c09d94cb 100644 --- a/docs/en/sql-reference/statements/index.md +++ b/docs/en/sql-reference/statements/index.md @@ -1,6 +1,19 @@ --- toc_folder_title: Statements toc_priority: 31 +toc_hidden: true --- +# ClickHouse SQL Statements +Statements represent various kinds of action you can perform using SQL queries. Each kind of statement has it's own syntax and usage details that are described separately: + +- [SELECT](select/index.md) +- [INSERT INTO](insert-into.md) +- [CREATE](create.md) +- [ALTER](alter.md) +- [SYSTEM](system.md) +- [SHOW](show.md) +- [GRANT](grant.md) +- [REVOKE](revoke.md) +- [Other](misc.md) diff --git a/docs/en/sql-reference/table-functions/cluster.md b/docs/en/sql-reference/table-functions/cluster.md new file mode 100644 index 00000000000..2cd5b14caa4 --- /dev/null +++ b/docs/en/sql-reference/table-functions/cluster.md @@ -0,0 +1,39 @@ +--- +toc_priority: 50 +toc_title: cluster +--- + +# cluster, clusterAllReplicas {#cluster-clusterallreplicas} + +Allows to access all shards in an existing cluster which configured in `remote_servers` section without creating a [Distributed](../../engines/table-engines/special/distributed.md) table. One replica of each shard is queried. +`clusterAllReplicas` - same as `cluster` but all replicas are queried. Each replica in a cluster is used as separate shard/connection. + +!!! note "Note" + All available clusters are listed in the `system.clusters` table. + + +Signatures: + +``` sql +cluster('cluster_name', db.table) +cluster('cluster_name', db, table) +clusterAllReplicas('cluster_name', db.table) +clusterAllReplicas('cluster_name', db, table) +``` + +`cluster_name` – Name of a cluster that is used to build a set of addresses and connection parameters to remote and local servers. + +Using the `cluster` and `clusterAllReplicas` table functions are less efficient than creating a `Distributed` table because in this case, the server connection is re-established for every request. When processing a large number of queries, please always create the `Distributed` table ahead of time, and don’t use the `cluster` and `clusterAllReplicas` table functions. + +The `cluster` and `clusterAllReplicas` table functions can be useful in the following cases: + +- Accessing a specific cluster for data comparison, debugging, and testing. +- Queries to various ClickHouse clusters and replicas for research purposes. +- Infrequent distributed requests that are made manually. + +Connection settings like `host`, `port`, `user`, `password`, `compression`, `secure` are taken from `` config section. See details in [Distributed engine](../../engines/table-engines/special/distributed.md). + +**See Also** + +- [skip\_unavailable\_shards](../../operations/settings/settings.md#settings-skip_unavailable_shards) +- [load\_balancing](../../operations/settings/settings.md#settings-load_balancing) diff --git a/docs/en/sql-reference/table-functions/remote.md b/docs/en/sql-reference/table-functions/remote.md index a2ba0046cfb..a7bdca6218e 100644 --- a/docs/en/sql-reference/table-functions/remote.md +++ b/docs/en/sql-reference/table-functions/remote.md @@ -12,6 +12,8 @@ Signatures: ``` sql remote('addresses_expr', db, table[, 'user'[, 'password']]) remote('addresses_expr', db.table[, 'user'[, 'password']]) +remoteSecure('addresses_expr', db, table[, 'user'[, 'password']]) +remoteSecure('addresses_expr', db.table[, 'user'[, 'password']]) ``` `addresses_expr` – An expression that generates addresses of remote servers. This may be just one server address. The server address is `host:port`, or just `host`. The host can be specified as the server name, or as the IPv4 or IPv6 address. An IPv6 address is specified in square brackets. The port is the TCP port on the remote server. If the port is omitted, it uses `tcp_port` from the server’s config file (by default, 9000). diff --git a/docs/es/operations/settings/query-complexity.md b/docs/es/operations/settings/query-complexity.md index 218952dff1f..d65fb03ad13 100644 --- a/docs/es/operations/settings/query-complexity.md +++ b/docs/es/operations/settings/query-complexity.md @@ -59,7 +59,6 @@ Ver también la descripción de [Método de codificación de datos:](#settings_m ## ¿Qué puedes encontrar en Neodigit {#max-rows-to-read} Las siguientes restricciones se pueden verificar en cada bloque (en lugar de en cada fila). Es decir, las restricciones se pueden romper un poco. -Al ejecutar una consulta en varios subprocesos, las siguientes restricciones se aplican a cada subproceso por separado. Un número máximo de filas que se pueden leer de una tabla al ejecutar una consulta. diff --git a/docs/es/sql-reference/data-types/domains/index.md b/docs/es/sql-reference/data-types/domains/index.md index f4bfc581b91..136058e35c8 100644 --- a/docs/es/sql-reference/data-types/domains/index.md +++ b/docs/es/sql-reference/data-types/domains/index.md @@ -1,8 +1,33 @@ --- machine_translated: true machine_translated_rev: 72537a2d527c63c07aa5d2361a8829f3895cf2bd -toc_folder_title: Dominio toc_priority: 56 +toc_folder_title: Dominio +toc_title: "Descripci\xF3n" --- +# Dominio {#domains} +Los dominios son tipos de propósito especial que agregan algunas características adicionales encima del tipo base existente, pero dejando intacto el formato en cable y en disco del tipo de datos subyacente. Por el momento, ClickHouse no admite dominios definidos por el usuario. + +Puede usar dominios en cualquier lugar que se pueda usar el tipo base correspondiente, por ejemplo: + +- Crear una columna de un tipo de dominio +- Leer/escribir valores desde/a la columna de dominio +- Úselo como un índice si un tipo base se puede usar como un índice +- Funciones de llamada con valores de la columna de dominio + +### Características adicionales de los dominios {#extra-features-of-domains} + +- Nombre de tipo de columna explícito en `SHOW CREATE TABLE` o `DESCRIBE TABLE` +- Entrada del formato humano-amistoso con `INSERT INTO domain_table(domain_column) VALUES(...)` +- Salida al formato humano-amistoso para `SELECT domain_column FROM domain_table` +- Carga de datos desde una fuente externa en el formato de uso humano: `INSERT INTO domain_table FORMAT CSV ...` + +### Limitacion {#limitations} + +- No se puede convertir la columna de índice del tipo base al tipo de dominio a través de `ALTER TABLE`. +- No se pueden convertir implícitamente valores de cadena en valores de dominio al insertar datos de otra columna o tabla. +- Domain no agrega restricciones en los valores almacenados. + +[Artículo Original](https://clickhouse.tech/docs/en/data_types/domains/) diff --git a/docs/es/sql-reference/data-types/domains/overview.md b/docs/es/sql-reference/data-types/domains/overview.md deleted file mode 100644 index c6fc9057c81..00000000000 --- a/docs/es/sql-reference/data-types/domains/overview.md +++ /dev/null @@ -1,32 +0,0 @@ ---- -machine_translated: true -machine_translated_rev: 72537a2d527c63c07aa5d2361a8829f3895cf2bd -toc_priority: 58 -toc_title: "Descripci\xF3n" ---- - -# Dominio {#domains} - -Los dominios son tipos de propósito especial que agregan algunas características adicionales encima del tipo base existente, pero dejando intacto el formato en cable y en disco del tipo de datos subyacente. Por el momento, ClickHouse no admite dominios definidos por el usuario. - -Puede usar dominios en cualquier lugar que se pueda usar el tipo base correspondiente, por ejemplo: - -- Crear una columna de un tipo de dominio -- Leer/escribir valores desde/a la columna de dominio -- Úselo como un índice si un tipo base se puede usar como un índice -- Funciones de llamada con valores de la columna de dominio - -### Características adicionales de los dominios {#extra-features-of-domains} - -- Nombre de tipo de columna explícito en `SHOW CREATE TABLE` o `DESCRIBE TABLE` -- Entrada del formato humano-amistoso con `INSERT INTO domain_table(domain_column) VALUES(...)` -- Salida al formato humano-amistoso para `SELECT domain_column FROM domain_table` -- Carga de datos desde una fuente externa en el formato de uso humano: `INSERT INTO domain_table FORMAT CSV ...` - -### Limitacion {#limitations} - -- No se puede convertir la columna de índice del tipo base al tipo de dominio a través de `ALTER TABLE`. -- No se pueden convertir implícitamente valores de cadena en valores de dominio al insertar datos de otra columna o tabla. -- Domain no agrega restricciones en los valores almacenados. - -[Artículo Original](https://clickhouse.tech/docs/en/data_types/domains/overview) diff --git a/docs/es/sql-reference/table-functions/remote.md b/docs/es/sql-reference/table-functions/remote.md index 8d49348c76e..d614c7b3c0e 100644 --- a/docs/es/sql-reference/table-functions/remote.md +++ b/docs/es/sql-reference/table-functions/remote.md @@ -14,6 +14,8 @@ Firma: ``` sql remote('addresses_expr', db, table[, 'user'[, 'password']]) remote('addresses_expr', db.table[, 'user'[, 'password']]) +remoteSecure('addresses_expr', db, table[, 'user'[, 'password']]) +remoteSecure('addresses_expr', db.table[, 'user'[, 'password']]) ``` `addresses_expr` – An expression that generates addresses of remote servers. This may be just one server address. The server address is `host:port` o simplemente `host`. El host se puede especificar como nombre de servidor o como dirección IPv4 o IPv6. Una dirección IPv6 se especifica entre corchetes. El puerto es el puerto TCP del servidor remoto. Si se omite el puerto, utiliza `tcp_port` del archivo de configuración del servidor (por defecto, 9000). diff --git a/docs/fa/sql-reference/data-types/domains/index.md b/docs/fa/sql-reference/data-types/domains/index.md index 089e1c43eed..a05eea1b59c 100644 --- a/docs/fa/sql-reference/data-types/domains/index.md +++ b/docs/fa/sql-reference/data-types/domains/index.md @@ -1,8 +1,33 @@ --- machine_translated: true machine_translated_rev: 72537a2d527c63c07aa5d2361a8829f3895cf2bd -toc_folder_title: "\u062F\u0627\u0645\u0646\u0647" toc_priority: 56 +toc_folder_title: "\u062F\u0627\u0645\u0646\u0647" +toc_title: "\u0628\u0631\u0631\u0633\u06CC \u0627\u062C\u0645\u0627\u0644\u06CC" --- +# دامنه {#domains} +دامنه انواع خاصی است که اضافه کردن برخی از ویژگی های اضافی در بالای نوع پایه موجود, اما ترک بر روی سیم و بر روی دیسک فرمت از نوع داده اساسی دست نخورده. درحال حاضر, تاتر می کند دامنه تعریف شده توسط کاربر را پشتیبانی نمی کند. + +شما می توانید دامنه در هر نقطه نوع پایه مربوطه استفاده می شود, مثلا: + +- ایجاد یک ستون از یک نوع دامنه +- خواندن / نوشتن مقادیر از / به ستون دامنه +- اگر یک نوع پایه می تواند به عنوان یک شاخص استفاده می شود به عنوان شاخص استفاده می شود +- توابع تماس با مقادیر ستون دامنه + +### ویژگی های اضافی از دامنه {#extra-features-of-domains} + +- صریح نام نوع ستون در `SHOW CREATE TABLE` یا `DESCRIBE TABLE` +- ورودی از فرمت انسان دوستانه با `INSERT INTO domain_table(domain_column) VALUES(...)` +- خروجی به فرمت انسان دوستانه برای `SELECT domain_column FROM domain_table` +- بارگیری داده ها از یک منبع خارجی در قالب انسان دوستانه: `INSERT INTO domain_table FORMAT CSV ...` + +### محدودیت ها {#limitations} + +- می توانید ستون شاخص از نوع پایه به نوع دامنه از طریق تبدیل کنید `ALTER TABLE`. +- نمی تواند به طور ضمنی تبدیل مقادیر رشته به ارزش دامنه در هنگام قرار دادن داده ها از ستون یا جدول دیگر. +- دامنه می افزاید: هیچ محدودیتی در مقادیر ذخیره شده. + +[مقاله اصلی](https://clickhouse.tech/docs/en/data_types/domains/overview) diff --git a/docs/fa/sql-reference/data-types/domains/overview.md b/docs/fa/sql-reference/data-types/domains/overview.md deleted file mode 100644 index 4507ca850ef..00000000000 --- a/docs/fa/sql-reference/data-types/domains/overview.md +++ /dev/null @@ -1,32 +0,0 @@ ---- -machine_translated: true -machine_translated_rev: 72537a2d527c63c07aa5d2361a8829f3895cf2bd -toc_priority: 58 -toc_title: "\u0628\u0631\u0631\u0633\u06CC \u0627\u062C\u0645\u0627\u0644\u06CC" ---- - -# دامنه {#domains} - -دامنه انواع خاصی است که اضافه کردن برخی از ویژگی های اضافی در بالای نوع پایه موجود, اما ترک بر روی سیم و بر روی دیسک فرمت از نوع داده اساسی دست نخورده. درحال حاضر, تاتر می کند دامنه تعریف شده توسط کاربر را پشتیبانی نمی کند. - -شما می توانید دامنه در هر نقطه نوع پایه مربوطه استفاده می شود, مثلا: - -- ایجاد یک ستون از یک نوع دامنه -- خواندن / نوشتن مقادیر از / به ستون دامنه -- اگر یک نوع پایه می تواند به عنوان یک شاخص استفاده می شود به عنوان شاخص استفاده می شود -- توابع تماس با مقادیر ستون دامنه - -### ویژگی های اضافی از دامنه {#extra-features-of-domains} - -- صریح نام نوع ستون در `SHOW CREATE TABLE` یا `DESCRIBE TABLE` -- ورودی از فرمت انسان دوستانه با `INSERT INTO domain_table(domain_column) VALUES(...)` -- خروجی به فرمت انسان دوستانه برای `SELECT domain_column FROM domain_table` -- بارگیری داده ها از یک منبع خارجی در قالب انسان دوستانه: `INSERT INTO domain_table FORMAT CSV ...` - -### محدودیت ها {#limitations} - -- می توانید ستون شاخص از نوع پایه به نوع دامنه از طریق تبدیل کنید `ALTER TABLE`. -- نمی تواند به طور ضمنی تبدیل مقادیر رشته به ارزش دامنه در هنگام قرار دادن داده ها از ستون یا جدول دیگر. -- دامنه می افزاید: هیچ محدودیتی در مقادیر ذخیره شده. - -[مقاله اصلی](https://clickhouse.tech/docs/en/data_types/domains/overview) diff --git a/docs/fa/sql-reference/table-functions/remote.md b/docs/fa/sql-reference/table-functions/remote.md index 23a6753fd26..dd6e6725be4 100644 --- a/docs/fa/sql-reference/table-functions/remote.md +++ b/docs/fa/sql-reference/table-functions/remote.md @@ -14,6 +14,8 @@ toc_title: "\u062F\u0648\u0631" ``` sql remote('addresses_expr', db, table[, 'user'[, 'password']]) remote('addresses_expr', db.table[, 'user'[, 'password']]) +remoteSecure('addresses_expr', db, table[, 'user'[, 'password']]) +remoteSecure('addresses_expr', db.table[, 'user'[, 'password']]) ``` `addresses_expr` – An expression that generates addresses of remote servers. This may be just one server address. The server address is `host:port` یا فقط `host`. میزبان را می توان به عنوان نام سرور مشخص, و یا به عنوان ایپو4 یا ایپو6 نشانی. نشانی اینترنتی6 در براکت مربع مشخص شده است. پورت پورت تی سی پی بر روی سرور از راه دور است. اگر پورت حذف شده است, با استفاده از `tcp_port` از فایل پیکربندی سرور (به طور پیش فرض, 9000). diff --git a/docs/fr/sql-reference/data-types/domains/index.md b/docs/fr/sql-reference/data-types/domains/index.md index ffe64acb834..7e11f9a8a68 100644 --- a/docs/fr/sql-reference/data-types/domains/index.md +++ b/docs/fr/sql-reference/data-types/domains/index.md @@ -3,6 +3,31 @@ machine_translated: true machine_translated_rev: 72537a2d527c63c07aa5d2361a8829f3895cf2bd toc_folder_title: Domaine toc_priority: 56 +toc_title: "Aper\xE7u" --- +# Domaine {#domains} +Les domaines sont des types spéciaux qui ajoutent des fonctionnalités supplémentaires au sommet du type de base existant, mais en laissant le format on-wire et on-disc du type de données sous-jacent intact. À l'heure actuelle, ClickHouse ne prend pas en charge les domaines définis par l'utilisateur. + +Vous pouvez utiliser des domaines partout type de base correspondant peut être utilisé, par exemple: + +- Créer une colonne d'un type de domaine +- Valeurs de lecture / écriture depuis / vers la colonne de domaine +- L'utiliser comme un indice si un type de base peut être utilisée comme un indice +- Fonctions d'appel avec des valeurs de colonne de domaine + +### Fonctionnalités supplémentaires des domaines {#extra-features-of-domains} + +- Nom de type de colonne explicite dans `SHOW CREATE TABLE` ou `DESCRIBE TABLE` +- Entrée du format convivial avec `INSERT INTO domain_table(domain_column) VALUES(...)` +- Sortie au format convivial pour `SELECT domain_column FROM domain_table` +- Chargement de données à partir d'une source externe dans un format convivial: `INSERT INTO domain_table FORMAT CSV ...` + +### Limitation {#limitations} + +- Impossible de convertir la colonne d'index du type de base en type de domaine via `ALTER TABLE`. +- Impossible de convertir implicitement des valeurs de chaîne en valeurs de domaine lors de l'insertion de données d'une autre colonne ou table. +- Le domaine n'ajoute aucune contrainte sur les valeurs stockées. + +[Article Original](https://clickhouse.tech/docs/en/data_types/domains/overview) diff --git a/docs/fr/sql-reference/data-types/domains/overview.md b/docs/fr/sql-reference/data-types/domains/overview.md deleted file mode 100644 index 149ea84ba7b..00000000000 --- a/docs/fr/sql-reference/data-types/domains/overview.md +++ /dev/null @@ -1,32 +0,0 @@ ---- -machine_translated: true -machine_translated_rev: 72537a2d527c63c07aa5d2361a8829f3895cf2bd -toc_priority: 58 -toc_title: "Aper\xE7u" ---- - -# Domaine {#domains} - -Les domaines sont des types spéciaux qui ajoutent des fonctionnalités supplémentaires au sommet du type de base existant, mais en laissant le format on-wire et on-disc du type de données sous-jacent intact. À l'heure actuelle, ClickHouse ne prend pas en charge les domaines définis par l'utilisateur. - -Vous pouvez utiliser des domaines partout type de base correspondant peut être utilisé, par exemple: - -- Créer une colonne d'un type de domaine -- Valeurs de lecture / écriture depuis / vers la colonne de domaine -- L'utiliser comme un indice si un type de base peut être utilisée comme un indice -- Fonctions d'appel avec des valeurs de colonne de domaine - -### Fonctionnalités supplémentaires des domaines {#extra-features-of-domains} - -- Nom de type de colonne explicite dans `SHOW CREATE TABLE` ou `DESCRIBE TABLE` -- Entrée du format convivial avec `INSERT INTO domain_table(domain_column) VALUES(...)` -- Sortie au format convivial pour `SELECT domain_column FROM domain_table` -- Chargement de données à partir d'une source externe dans un format convivial: `INSERT INTO domain_table FORMAT CSV ...` - -### Limitation {#limitations} - -- Impossible de convertir la colonne d'index du type de base en type de domaine via `ALTER TABLE`. -- Impossible de convertir implicitement des valeurs de chaîne en valeurs de domaine lors de l'insertion de données d'une autre colonne ou table. -- Le domaine n'ajoute aucune contrainte sur les valeurs stockées. - -[Article Original](https://clickhouse.tech/docs/en/data_types/domains/overview) diff --git a/docs/fr/sql-reference/table-functions/remote.md b/docs/fr/sql-reference/table-functions/remote.md index 2a2fa7d829d..3e911b61d75 100644 --- a/docs/fr/sql-reference/table-functions/remote.md +++ b/docs/fr/sql-reference/table-functions/remote.md @@ -14,6 +14,8 @@ Signature: ``` sql remote('addresses_expr', db, table[, 'user'[, 'password']]) remote('addresses_expr', db.table[, 'user'[, 'password']]) +remoteSecure('addresses_expr', db, table[, 'user'[, 'password']]) +remoteSecure('addresses_expr', db.table[, 'user'[, 'password']]) ``` `addresses_expr` – An expression that generates addresses of remote servers. This may be just one server address. The server address is `host:port` ou juste `host`. L'hôte peut être spécifié comme nom de serveur ou l'adresse IPv4 ou IPv6. Une adresse IPv6 est indiquée entre crochets. Le port est le port TCP sur le serveur distant. Si le port est omis, il utilise `tcp_port` à partir du fichier de configuration du serveur (par défaut, 9000). diff --git a/docs/ja/sql-reference/data-types/domains/index.md b/docs/ja/sql-reference/data-types/domains/index.md index d476fcefb51..4f8c2b7add8 100644 --- a/docs/ja/sql-reference/data-types/domains/index.md +++ b/docs/ja/sql-reference/data-types/domains/index.md @@ -3,6 +3,31 @@ machine_translated: true machine_translated_rev: 72537a2d527c63c07aa5d2361a8829f3895cf2bd toc_folder_title: "\u30C9\u30E1\u30A4\u30F3" toc_priority: 56 +toc_title: "\u6982\u8981" --- +# ドメイン {#domains} +ドメインは、既存の基本型の上にいくつかの余分な機能を追加する特殊な目的の型ですが、基になるデータ型のオンワイヤおよびオンディスク形式は 現時点では、ClickHouseはユーザー定義ドメインをサポートしていません。 + +たとえば、対応する基本タイプを使用できる任意の場所でドメインを使用できます: + +- ドメイン型の列を作成する +- ドメイン列から/への読み取り/書き込み値 +- 基本型をインデックスとして使用できる場合は、インデックスとして使用します +- ドメイン列の値を持つ関数の呼び出し + +### ドメインの追加機能 {#extra-features-of-domains} + +- 明示的な列タイプ名 `SHOW CREATE TABLE` または `DESCRIBE TABLE` +- 人間に優しいフォーマットからの入力 `INSERT INTO domain_table(domain_column) VALUES(...)` +- 人間に優しいフォーマットへの出力 `SELECT domain_column FROM domain_table` +- 人間に優しい形式で外部ソースからデータを読み込む: `INSERT INTO domain_table FORMAT CSV ...` + +### 制限 {#limitations} + +- 基本型のインデックス列をドメイン型に変換できません `ALTER TABLE`. +- 別の列または表からデータを挿入するときに、文字列値を暗黙的にドメイン値に変換できません。 +- ドメインは、格納された値に制約を追加しません。 + +[元の記事](https://clickhouse.tech/docs/en/data_types/domains/overview) diff --git a/docs/ja/sql-reference/data-types/domains/overview.md b/docs/ja/sql-reference/data-types/domains/overview.md deleted file mode 100644 index 958fce5beb4..00000000000 --- a/docs/ja/sql-reference/data-types/domains/overview.md +++ /dev/null @@ -1,32 +0,0 @@ ---- -machine_translated: true -machine_translated_rev: 72537a2d527c63c07aa5d2361a8829f3895cf2bd -toc_priority: 58 -toc_title: "\u6982\u8981" ---- - -# ドメイン {#domains} - -ドメインは、既存の基本型の上にいくつかの余分な機能を追加する特殊な目的の型ですが、基になるデータ型のオンワイヤおよびオンディスク形式は 現時点では、ClickHouseはユーザー定義ドメインをサポートしていません。 - -たとえば、対応する基本タイプを使用できる任意の場所でドメインを使用できます: - -- ドメイン型の列を作成する -- ドメイン列から/への読み取り/書き込み値 -- 基本型をインデックスとして使用できる場合は、インデックスとして使用します -- ドメイン列の値を持つ関数の呼び出し - -### ドメインの追加機能 {#extra-features-of-domains} - -- 明示的な列タイプ名 `SHOW CREATE TABLE` または `DESCRIBE TABLE` -- 人間に優しいフォーマットからの入力 `INSERT INTO domain_table(domain_column) VALUES(...)` -- 人間に優しいフォーマットへの出力 `SELECT domain_column FROM domain_table` -- 人間に優しい形式で外部ソースからデータを読み込む: `INSERT INTO domain_table FORMAT CSV ...` - -### 制限 {#limitations} - -- 基本型のインデックス列をドメイン型に変換できません `ALTER TABLE`. -- 別の列または表からデータを挿入するときに、文字列値を暗黙的にドメイン値に変換できません。 -- ドメインは、格納された値に制約を追加しません。 - -[元の記事](https://clickhouse.tech/docs/en/data_types/domains/overview) diff --git a/docs/ja/sql-reference/table-functions/remote.md b/docs/ja/sql-reference/table-functions/remote.md index 5a68776d93f..368584551fc 100644 --- a/docs/ja/sql-reference/table-functions/remote.md +++ b/docs/ja/sql-reference/table-functions/remote.md @@ -14,6 +14,8 @@ toc_title: "\u30EA\u30E2\u30FC\u30C8" ``` sql remote('addresses_expr', db, table[, 'user'[, 'password']]) remote('addresses_expr', db.table[, 'user'[, 'password']]) +remoteSecure('addresses_expr', db, table[, 'user'[, 'password']]) +remoteSecure('addresses_expr', db.table[, 'user'[, 'password']]) ``` `addresses_expr` – An expression that generates addresses of remote servers. This may be just one server address. The server address is `host:port`、または単に `host`. ホストは、サーバー名またはIPv4またはIPv6アドレスとして指定できます。 IPv6アドレスは角かっこで指定します。 ポートは、リモートサーバー上のTCPポートです。 ポートが省略されると、次のようになります `tcp_port` サーバーの設定ファイルから(デフォルトでは9000)。 diff --git a/docs/redirects.txt b/docs/redirects.txt index 13392e5dce5..b4c93ac4908 100644 --- a/docs/redirects.txt +++ b/docs/redirects.txt @@ -161,6 +161,7 @@ interfaces/third-party/client_libraries.md interfaces/third-party/client-librari interfaces/third-party_client_libraries.md interfaces/third-party/client-libraries.md interfaces/third-party_gui.md interfaces/third-party/gui.md interfaces/third_party/index.md interfaces/third-party/index.md +introduction/index.md introduction/distinctive_features.md introduction/distinctive-features.md introduction/features_considered_disadvantages.md introduction/distinctive-features.md introduction/possible_silly_questions.md faq/general.md @@ -289,6 +290,7 @@ query_language/table_functions/remote.md sql-reference/table-functions/remote.md query_language/table_functions/url.md sql-reference/table-functions/url.md roadmap.md whats-new/roadmap.md security_changelog.md whats-new/security-changelog.md +sql-reference/data-types/domains/overview.md sql-reference/data-types/domains/index.md sql_reference/aggregate_functions/combinators.md sql-reference/aggregate-functions/combinators.md sql_reference/aggregate_functions/index.md sql-reference/aggregate-functions/index.md sql_reference/aggregate_functions/parametric_functions.md sql-reference/aggregate-functions/parametric-functions.md diff --git a/docs/ru/development/architecture.md b/docs/ru/development/architecture.md index 811af7a75f3..a9ed231af08 100644 --- a/docs/ru/development/architecture.md +++ b/docs/ru/development/architecture.md @@ -116,7 +116,7 @@ ClickHouse - полноценная колоночная СУБД. Данные Обычный функции не изменяют число строк и работают так, как если бы обрабатывали каждую строку независимо. В действительности же, функции вызываются не к отдельным строкам, а блокам данных для реализации векторизованного выполнения запросов. -Некоторые функции, такие как [blockSize](../sql-reference/functions/other-functions.md#function-blocksize), [rowNumberInBlock](../sql-reference/functions/other-functions.md#function-rownumberinblock), и [runningAccumulate](../sql-reference/functions/other-functions.md#function-runningaccumulate), эксплуатируют блочную обработку и нарушают независимость строк. +Некоторые функции, такие как [blockSize](../sql-reference/functions/other-functions.md#function-blocksize), [rowNumberInBlock](../sql-reference/functions/other-functions.md#function-rownumberinblock), и [runningAccumulate](../sql-reference/functions/other-functions.md#runningaccumulate), эксплуатируют блочную обработку и нарушают независимость строк. ClickHouse имеет сильную типизацию, поэтому нет никакого неявного преобразования типов. Если функция не поддерживает определенную комбинацию типов, она создает исключение. Но функции могут работать (перегружаться) для многих различных комбинаций типов. Например, функция `plus` (для реализации `+` оператор) работает для любой комбинации числовых типов: `UInt8` + `Float32`, `UInt16` + `Int8` и так далее. Кроме того, некоторые вариадические функции, такие как `concat`, могут принимать любое количество аргументов. diff --git a/docs/ru/engines/table-engines/index.md b/docs/ru/engines/table-engines/index.md index 580ee10250f..6a954313c60 100644 --- a/docs/ru/engines/table-engines/index.md +++ b/docs/ru/engines/table-engines/index.md @@ -53,7 +53,7 @@ - [Distributed](special/distributed.md#distributed) - [MaterializedView](special/materializedview.md#materializedview) - [Dictionary](special/dictionary.md#dictionary) -- [Merge](special/merge.md#merge +- [Merge](special/merge.md#merge) - [File](special/file.md#file) - [Null](special/null.md#null) - [Set](special/set.md#set) diff --git a/docs/ru/engines/table-engines/special/distributed.md b/docs/ru/engines/table-engines/special/distributed.md index cc88bab6264..53c5a02e752 100644 --- a/docs/ru/engines/table-engines/special/distributed.md +++ b/docs/ru/engines/table-engines/special/distributed.md @@ -44,6 +44,8 @@ logs - имя кластера в конфигурационном файле с false + + 1 example01-01-1 9000 diff --git a/docs/ru/interfaces/formats.md b/docs/ru/interfaces/formats.md index 822f5543f9b..9eebc3f8bac 100644 --- a/docs/ru/interfaces/formats.md +++ b/docs/ru/interfaces/formats.md @@ -945,6 +945,55 @@ message MessageType { ClickHouse пишет и читает сообщения `Protocol Buffers` в формате `length-delimited`. Это означает, что перед каждым сообщением пишется его длина в формате [varint](https://developers.google.com/protocol-buffers/docs/encoding#varints). См. также [как читать и записывать сообщения Protocol Buffers в формате length-delimited в различных языках программирования](https://cwiki.apache.org/confluence/display/GEODE/Delimiting+Protobuf+Messages). +## Avro {#data-format-avro} + +## AvroConfluent {#data-format-avro-confluent} + +Для формата `AvroConfluent` ClickHouse поддерживает декодирование сообщений `Avro` с одним объектом. Такие сообщения используются с [Kafka] (http://kafka.apache.org/) и реестром схем [Confluent](https://docs.confluent.io/current/schema-registry/index.html). + +Каждое сообщение `Avro` содержит идентификатор схемы, который может быть разрешен для фактической схемы с помощью реестра схем. + +Схемы кэшируются после разрешения. + +URL-адрес реестра схем настраивается с помощью [format\_avro\_schema\_registry\_url](../operations/settings/settings.md#format_avro_schema_registry_url). + +### Соответствие типов данных {#sootvetstvie-tipov-dannykh-0} + +Такое же, как в [Avro](#data-format-avro). + +### Использование {#ispolzovanie} + +Чтобы быстро проверить разрешение схемы, используйте [kafkacat](https://github.com/edenhill/kafkacat) с языком запросов [clickhouse-local](../operations/utilities/clickhouse-local.md): + +``` bash +$ kafkacat -b kafka-broker -C -t topic1 -o beginning -f '%s' -c 3 | clickhouse-local --input-format AvroConfluent --format_avro_schema_registry_url 'http://schema-registry' -S "field1 Int64, field2 String" -q 'select * from table' +1 a +2 b +3 c +``` + +Чтобы использовать `AvroConfluent` с [Kafka](../engines/table-engines/integrations/kafka.md): + +``` sql +CREATE TABLE topic1_stream +( + field1 String, + field2 String +) +ENGINE = Kafka() +SETTINGS +kafka_broker_list = 'kafka-broker', +kafka_topic_list = 'topic1', +kafka_group_name = 'group1', +kafka_format = 'AvroConfluent'; + +SET format_avro_schema_registry_url = 'http://schema-registry'; + +SELECT * FROM topic1_stream; +``` +!!! note "Внимание" + `format_avro_schema_registry_url` необходимо настроить в `users.xml`, чтобы сохранить значение после перезапуска. Также можно использовать настройку `format_avro_schema_registry_url` табличного движка `Kafka`. + ## Parquet {#data-format-parquet} [Apache Parquet](http://parquet.apache.org/) — формат поколоночного хранения данных, который распространён в экосистеме Hadoop. Для формата `Parquet` ClickHouse поддерживает операции чтения и записи. diff --git a/docs/ru/operations/access-rights.md b/docs/ru/operations/access-rights.md index 99da2550e70..de8265e3ba1 100644 --- a/docs/ru/operations/access-rights.md +++ b/docs/ru/operations/access-rights.md @@ -31,7 +31,7 @@ ClickHouse поддерживает управление доступом на Если вы начали пользоваться ClickHouse недавно, попробуйте следующий сценарий: 1. [Включите](#enabling-access-control) SQL-ориентированное управление доступом для пользователя `default`. -2. Войдите под пользователем `default` и создайте всех необходимых пользователей. Не забудьте создать аккаунт администратора (`GRANT ALL ON *.* WITH GRANT OPTION TO admin_user_account`). +2. Войдите под пользователем `default` и создайте всех необходимых пользователей. Не забудьте создать аккаунт администратора (`GRANT ALL ON *.* TO admin_user_account WITH GRANT OPTION`). 3. [Ограничьте разрешения](settings/permissions-for-queries.md#permissions_for_queries) для пользователя `default` и отключите для него SQL-ориентированное управление доступом. ### Особенности реализации {#access-control-properties} diff --git a/docs/ru/operations/server-configuration-parameters/settings.md b/docs/ru/operations/server-configuration-parameters/settings.md index 5bfedf4c520..2c933d87157 100644 --- a/docs/ru/operations/server-configuration-parameters/settings.md +++ b/docs/ru/operations/server-configuration-parameters/settings.md @@ -372,6 +372,25 @@ ClickHouse проверит условия `min_part_size` и `min_part_size_rat 100 ``` +## max_server_memory_usage {#max_server_memory_usage} + +Ограничивает объём оперативной памяти, используемой сервером ClickHouse. Настройка может быть задана только для профиля `default`. + +Возможные значения: + +- Положительное целое число. +- 0 — объём используемой памяти не ограничен. + +Значение по умолчанию: `0`. + +**Дополнительная информация** + +На серверах с небольшим объёмом RAM и файла подкачки может потребоваться настройка `max_server_memory_usage_to_ram_ratio > 1`. + +**См. также** + +- [max_memory_usage](../settings/query-complexity.md#settings_max_memory_usage) + ## max\_connections {#max-connections} Максимальное количество входящих соединений. diff --git a/docs/ru/operations/settings/query-complexity.md b/docs/ru/operations/settings/query-complexity.md index 651f597c4d2..74c99968bc0 100644 --- a/docs/ru/operations/settings/query-complexity.md +++ b/docs/ru/operations/settings/query-complexity.md @@ -1,4 +1,4 @@ -# Ограничения на сложность запроса {#ogranicheniia-na-slozhnost-zaprosa} +# Ограничения на сложность запроса {#restrictions-on-query-complexity} Ограничения на сложность запроса - часть настроек. Используются, чтобы обеспечить более безопасное исполнение запросов из пользовательского интерфейса. @@ -32,7 +32,7 @@ Потребление памяти не полностью учитывается для состояний агрегатных функций `min`, `max`, `any`, `anyLast`, `argMin`, `argMax` от аргументов `String` и `Array`. -Потребление памяти ограничивается также параметрами `max_memory_usage_for_user` и `max_memory_usage_for_all_queries`. +Потребление памяти ограничивается также параметрами `max_memory_usage_for_user` и [max_server_memory_usage](../server-configuration-parameters/settings.md#max_server_memory_usage). ## max\_memory\_usage\_for\_user {#max-memory-usage-for-user} @@ -42,18 +42,9 @@ Смотрите также описание настройки [max\_memory\_usage](#settings_max_memory_usage). -## max\_memory\_usage\_for\_all\_queries {#max-memory-usage-for-all-queries} - -Максимальный возможный объём оперативной памяти для всех запросов на одном сервере. - -Значения по умолчанию определены в файле [Settings.h](https://github.com/ClickHouse/ClickHouse/blob/master/src/Core/Settings.h#L289). По умолчанию размер не ограничен (`max_memory_usage_for_all_queries = 0`). - -Смотрите также описание настройки [max\_memory\_usage](#settings_max_memory_usage). - ## max\_rows\_to\_read {#max-rows-to-read} Следующие ограничения могут проверяться на каждый блок (а не на каждую строку). То есть, ограничения могут быть немного нарушены. -При выполнении запроса в несколько потоков, следующие ограничения действуют в каждом потоке по отдельности. Максимальное количество строчек, которое можно прочитать из таблицы при выполнении запроса. diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index 4cccaa4e2d7..fd6bbf4121d 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -644,6 +644,17 @@ log_query_threads=1 Значение по умолчанию: 256 Кб. +## max\_parser\_depth {#max_parser_depth} + +Ограничивает максимальную глубину рекурсии в парсере рекурсивного спуска. Позволяет контролировать размер стека. + +Возможные значения: + +- Положительное целое число. +- 0 — Глубина рекурсии не ограничена. + +Значение по умолчанию: 1000. + ## interactive\_delay {#interactive-delay} Интервал в микросекундах для проверки, не запрошена ли остановка выполнения запроса, и отправки прогресса. @@ -1205,6 +1216,23 @@ Default value: 0. Значение по умолчанию: 16. +## always_fetch_merged_part {#always_fetch_merged_part} + +Запрещает слияние данных для таблиц семейства [Replicated*MergeTree](../../engines/table-engines/mergetree-family/replication.md). + +Если слияние запрещено, реплика никогда не выполняет слияние отдельных кусков данных, а всегда загружает объединённые данные из других реплик. Если объединённых данных пока нет, реплика ждет их появления. Нагрузка на процессор и диски на реплике уменьшается, но нагрузка на сеть в кластере возрастает. Настройка может быть полезна на репликах с относительно слабыми процессорами или медленными дисками, например, на репликах для хранения архивных данных. + +Возможные значения: + +- 0 — таблицы семейства `Replicated*MergeTree` выполняют слияние данных на реплике. +- 1 — таблицы семейства `Replicated*MergeTree` не выполняют слияние данных на реплике, а загружают объединённые данные из других реплик. + +Значение по умолчанию: 0. + +**См. также:** + +- [Репликация данных](../../engines/table-engines/mergetree-family/replication.md) + ## transform_null_in {#transform_null_in} Разрешает сравнивать значения [NULL](../../sql-reference/syntax.md#null-literal) в операторе [IN](../../sql-reference/operators/in.md). @@ -1313,6 +1341,12 @@ SELECT idx, i FROM null_in WHERE i IN (1, NULL) SETTINGS transform_null_in = 1; Значение по умолчанию: 16. +## format\_avro\_schema\_registry\_url {#format_avro_schema_registry_url} + +Задает URL реестра схем [Confluent](https://docs.confluent.io/current/schema-registry/index.html) для использования с форматом [AvroConfluent](../../interfaces/formats.md#data-format-avro-confluent). + +Значение по умолчанию: `Пустая строка`. + ## min_insert_block_size_rows_for_materialized_views {#min-insert-block-size-rows-for-materialized-views} Устанавливает минимальное количество строк в блоке, который может быть вставлен в таблицу запросом `INSERT`. Блоки меньшего размера склеиваются в блоки большего размера. Настройка применяется только для блоков, вставляемых в [материализованное представление](../../sql-reference/statements/create.md#create-view). Настройка позволяет избежать избыточного потребления памяти. diff --git a/docs/ru/sql-reference/aggregate-functions/combinators.md b/docs/ru/sql-reference/aggregate-functions/combinators.md index 95264976857..ec325d62b02 100644 --- a/docs/ru/sql-reference/aggregate-functions/combinators.md +++ b/docs/ru/sql-reference/aggregate-functions/combinators.md @@ -29,7 +29,7 @@ - Движок таблиц [AggregatingMergeTree](../../engines/table-engines/mergetree-family/aggregatingmergetree.md). - Функция [finalizeAggregation](../../sql-reference/aggregate-functions/combinators.md#function-finalizeaggregation). -- Функция [runningAccumulate](../../sql-reference/aggregate-functions/combinators.md#function-runningaccumulate). +- Функция [runningAccumulate](../../sql-reference/aggregate-functions/combinators.md#runningaccumulate). - Комбинатор [-Merge](#aggregate_functions_combinators-merge). - Комбинатор [-MergeState](#aggregate_functions_combinators-mergestate). diff --git a/docs/ru/sql-reference/data-types/domains/index.md b/docs/ru/sql-reference/data-types/domains/index.md index ddcb5b21d82..fe5c7ab7349 100644 --- a/docs/ru/sql-reference/data-types/domains/index.md +++ b/docs/ru/sql-reference/data-types/domains/index.md @@ -1,6 +1,33 @@ --- -toc_folder_title: Domains +toc_folder_title: Домены +toc_title_title: Обзор toc_priority: 56 --- +# Домены {#domeny} + +Домены — это типы данных специального назначения, которые добавляют некоторые дополнительные функции поверх существующего базового типа. На данный момент ClickHouse не поддерживает пользовательские домены. + +Вы можете использовать домены везде, где можно использовать соответствующий базовый тип: + +- Создание столбца с доменным типом данных. +- Чтение/запись значений из/в столбец с доменным типом данных. +- Используйте его как индекс, если базовый тип можно использовать в качестве индекса. +- Вызов функций со значениями столбца, имеющего доменный тип данных. +- и так далее. + +### Дополнительные возможности доменов {#dopolnitelnye-vozmozhnosti-domenov} + +- Явное название типа данных столбца в запросах `SHOW CREATE TABLE` и `DESCRIBE TABLE` +- Ввод данных в удобном человеку формате `INSERT INTO domain_table(domain_column) VALUES(...)` +- Вывод данных в удобном человеку формате `SELECT domain_column FROM domain_table` +- Загрузка данных из внешнего источника в удобном для человека формате: `INSERT INTO domain_table FORMAT CSV ...` + +### Ограничения {#ogranicheniia} + +- Невозможно преобразовать базовый тип данных в доменный для индексного столбца с помощью `ALTER TABLE`. +- Невозможно неявно преобразовывать строковые значение в значения с доменным типом данных при вставке данных из другого столбца или таблицы. +- Домен не добавляет ограничения на хранимые значения. + +[Оригинальная статья](https://clickhouse.tech/docs/ru/data_types/domains/overview) diff --git a/docs/ru/sql-reference/data-types/domains/overview.md b/docs/ru/sql-reference/data-types/domains/overview.md deleted file mode 100644 index 6feac834e5e..00000000000 --- a/docs/ru/sql-reference/data-types/domains/overview.md +++ /dev/null @@ -1,26 +0,0 @@ -# Домены {#domeny} - -Домены — это типы данных специального назначения, которые добавляют некоторые дополнительные функции поверх существующего базового типа. На данный момент ClickHouse не поддерживает пользовательские домены. - -Вы можете использовать домены везде, где можно использовать соответствующий базовый тип: - -- Создание столбца с доменным типом данных. -- Чтение/запись значений из/в столбец с доменным типом данных. -- Используйте его как индекс, если базовый тип можно использовать в качестве индекса. -- Вызов функций со значениями столбца, имеющего доменный тип данных. -- и так далее. - -### Дополнительные возможности доменов {#dopolnitelnye-vozmozhnosti-domenov} - -- Явное название типа данных столбца в запросах `SHOW CREATE TABLE` и `DESCRIBE TABLE` -- Ввод данных в удобном человеку формате `INSERT INTO domain_table(domain_column) VALUES(...)` -- Вывод данных в удобном человеку формате `SELECT domain_column FROM domain_table` -- Загрузка данных из внешнего источника в удобном для человека формате: `INSERT INTO domain_table FORMAT CSV ...` - -### Ограничения {#ogranicheniia} - -- Невозможно преобразовать базовый тип данных в доменный для индексного столбца с помощью `ALTER TABLE`. -- Невозможно неявно преобразовывать строковые значение в значения с доменным типом данных при вставке данных из другого столбца или таблицы. -- Домен не добавляет ограничения на хранимые значения. - -[Оригинальная статья](https://clickhouse.tech/docs/ru/data_types/domains/overview) diff --git a/docs/ru/sql-reference/functions/array-functions.md b/docs/ru/sql-reference/functions/array-functions.md index 24b81d743f3..99eda1bf45e 100644 --- a/docs/ru/sql-reference/functions/array-functions.md +++ b/docs/ru/sql-reference/functions/array-functions.md @@ -948,7 +948,7 @@ flatten(array_of_arrays) **Параметры** -- `array_of_arrays` — [Массивов](../../sql-reference/functions/array-functions.md) массивов. Например, `[[1,2,3], [4,5]]`. +- `array_of_arrays` — [Массив](../../sql-reference/functions/array-functions.md) массивов. Например, `[[1,2,3], [4,5]]`. **Примеры** diff --git a/docs/ru/sql-reference/functions/date-time-functions.md b/docs/ru/sql-reference/functions/date-time-functions.md index ecd9c760fbc..c4bc42c6ece 100644 --- a/docs/ru/sql-reference/functions/date-time-functions.md +++ b/docs/ru/sql-reference/functions/date-time-functions.md @@ -121,6 +121,62 @@ Result: Округляет дату-с-временем вниз до начала минуты. +## toStartOfSecond {#tostartofsecond} + +Отсекает доли секунды. + +**Синтаксис** + +``` sql +toStartOfSecond(value[, timezone]) +``` + +**Параметры** + +- `value` — Дата и время. [DateTime64](../data-types/datetime64.md). +- `timezone` — [Часовой пояс](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) для возвращаемого значения (необязательно). Если параметр не задан, используется часовой пояс параметра `value`. [String](../data-types/string.md). + +**Возвращаемое значение** + +- Входное значение с отсеченными долями секунды. + +Тип: [DateTime64](../data-types/datetime64.md). + +**Примеры** + +Пример без часового пояса: + +``` sql +WITH toDateTime64('2020-01-01 10:20:30.999', 3) AS dt64 SELECT toStartOfSecond(dt64); +``` + +Результат: + +``` text +┌───toStartOfSecond(dt64)─┐ +│ 2020-01-01 10:20:30.000 │ +└─────────────────────────┘ +``` + +Пример с часовым поясом: + +``` sql +WITH toDateTime64('2020-01-01 10:20:30.999', 3) AS dt64 SELECT toStartOfSecond(dt64, 'Europe/Moscow'); +``` + +Результат: + +``` text +┌─toStartOfSecond(dt64, 'Europe/Moscow')─┐ +│ 2020-01-01 13:20:30.000 │ +└────────────────────────────────────────┘ +``` + +**См. также** + +- Часовая зона сервера, конфигурационный параметр [timezone](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone). + + ## toStartOfFiveMinute {#tostartoffiveminute} Округляет дату-с-временем вниз до начала пятиминутного интервала. diff --git a/docs/ru/sql-reference/functions/geo.md b/docs/ru/sql-reference/functions/geo.md index 45c30b3c2cd..bf3f90ed47e 100644 --- a/docs/ru/sql-reference/functions/geo.md +++ b/docs/ru/sql-reference/functions/geo.md @@ -382,4 +382,295 @@ SELECT arrayJoin(h3kRing(644325529233966508, 1)) AS h3index └────────────────────┘ ``` -[Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/functions/geo/) +## h3GetBaseCell {#h3getbasecell} + +Определяет номер базовой (верхнеуровневой) шестиугольной H3-ячейки для указанной ячейки. + +**Синтаксис** + +``` sql +h3GetBaseCell(index) +``` + +**Параметр** + +- `index` — индекс шестиугольной ячейки. Тип: [UInt64](../../sql-reference/data-types/int-uint.md). + +**Возвращаемое значение** + +- Индекс базовой шестиугольной ячейки. + +Тип: [UInt8](../../sql-reference/data-types/int-uint.md). + +**Пример** + +Запрос: + +``` sql +SELECT h3GetBaseCell(612916788725809151) as basecell; +``` + +Результат: + +``` text +┌─basecell─┐ +│ 12 │ +└──────────┘ +``` + +## h3HexAreaM2 {#h3hexaream2} + +Определяет среднюю площадь шестиугольной H3-ячейки заданного разрешения в квадратных метрах. + +**Синтаксис** + +``` sql +h3HexAreaM2(resolution) +``` + +**Параметр** + +- `resolution` — разрешение. Диапазон: `[0, 15]`. + +Тип: [UInt8](../../sql-reference/data-types/int-uint.md). + +**Возвращаемое значение** + +- Площадь в квадратных метрах. Тип: [Float64](../../sql-reference/data-types/float.md). + +**Пример** + +Запрос: + +``` sql +SELECT h3HexAreaM2(13) as area; +``` + +Результат: + +``` text +┌─area─┐ +│ 43.9 │ +└──────┘ +``` + +## h3IndexesAreNeighbors {#h3indexesareneighbors} + +Определяет, являются ли H3-ячейки соседями. + +**Синтаксис** + +``` sql +h3IndexesAreNeighbors(index1, index2) +``` + +**Параметры** + +- `index1` — индекс шестиугольной ячейки. Тип: [UInt64](../../sql-reference/data-types/int-uint.md). +- `index2` — индекс шестиугольной ячейки. Тип: [UInt64](../../sql-reference/data-types/int-uint.md). + +**Возвращаемое значение** + +- `1` — ячейки являются соседями. +- `0` — ячейки не являются соседями. + +Тип: [UInt8](../../sql-reference/data-types/int-uint.md). + +**Пример** + +Запрос: + +``` sql +SELECT h3IndexesAreNeighbors(617420388351344639, 617420388352655359) AS n; +``` + +Результат: + +``` text +┌─n─┐ +│ 1 │ +└───┘ +``` + +## h3ToChildren {#h3tochildren} + +Формирует массив дочерних (вложенных) H3-ячеек для указанной ячейки. + +**Синтаксис** + +``` sql +h3ToChildren(index, resolution) +``` + +**Параметры** + +- `index` — индекс шестиугольной ячейки. Тип: [UInt64](../../sql-reference/data-types/int-uint.md). +- `resolution` — разрешение. Диапазон: `[0, 15]`. Тип: [UInt8](../../sql-reference/data-types/int-uint.md). + +**Возвращаемое значение** + +- Массив дочерних H3-ячеек. + +Тип: [Array](../../sql-reference/data-types/array.md)([UInt64](../../sql-reference/data-types/int-uint.md)). + +**Пример** + +Запрос: + +``` sql +SELECT h3ToChildren(599405990164561919, 6) AS children; +``` + +Результат: + +``` text +┌─children───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ +│ [603909588852408319,603909588986626047,603909589120843775,603909589255061503,603909589389279231,603909589523496959,603909589657714687] │ +└────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ +``` + +## h3ToParent {#h3toparent} + +Определяет родительскую (более крупную) H3-ячейку, содержащую указанную ячейку. + +**Синтаксис** + +``` sql +h3ToParent(index, resolution) +``` + +**Параметры** + +- `index` — индекс шестиугольной ячейки. Тип: [UInt64](../../sql-reference/data-types/int-uint.md). +- `resolution` — разрешение. Диапазон: `[0, 15]`. Тип: [UInt8](../../sql-reference/data-types/int-uint.md). + +**Возвращаемое значение** + +- Индекс родительской H3-ячейки. + +Тип: [UInt64](../../sql-reference/data-types/int-uint.md). + +**Пример** + +Запрос: + +``` sql +SELECT h3ToParent(599405990164561919, 3) as parent; +``` + +Результат: + +``` text +┌─────────────parent─┐ +│ 590398848891879423 │ +└────────────────────┘ +``` + +## h3ToString {#h3tostring} + +Преобразует H3-индекс из числового представления `H3Index` в строковое. + +``` sql +h3ToString(index) +``` + +**Параметр** + +- `index` — индекс шестиугольной ячейки. Тип: [UInt64](../../sql-reference/data-types/int-uint.md). + +**Возвращаемое значение** + +- Строковое представление H3-индекса. + +Тип: [String](../../sql-reference/data-types/string.md). + +**Пример** + +Запрос: + +``` sql +SELECT h3ToString(617420388352917503) as h3_string; +``` + +Результат: + +``` text +┌─h3_string───────┐ +│ 89184926cdbffff │ +└─────────────────┘ +``` + +## stringToH3 {#stringtoh3} + +Преобразует H3-индекс из строкового представления в числовое представление `H3Index`. + +**Синтаксис** + +``` sql +stringToH3(index_str) +``` + +**Параметр** + +- `index_str` — строковое представление H3-индекса. Тип: [String](../../sql-reference/data-types/string.md). + +**Возвращаемое значение** + +- Числовое представление индекса шестиугольной ячейки. +- `0`, если при преобразовании возникла ошибка. + +Тип: [UInt64](../../sql-reference/data-types/int-uint.md). + +**Пример** + +Запрос: + +``` sql +SELECT stringToH3('89184926cc3ffff') as index; +``` + +Результат: + +``` text +┌──────────────index─┐ +│ 617420388351344639 │ +└────────────────────┘ +``` + +## h3GetResolution {#h3getresolution} + +Определяет разрешение H3-ячейки. + +**Синтаксис** + +``` sql +h3GetResolution(index) +``` + +**Параметр** + +- `index` — индекс шестиугольной ячейки. Тип: [UInt64](../../sql-reference/data-types/int-uint.md). + +**Возвращаемое значение** + +- Разрешение ячейки. Диапазон: `[0, 15]`. + +Тип: [UInt8](../../sql-reference/data-types/int-uint.md). + +**Пример** + +Запрос: + +``` sql +SELECT h3GetResolution(617420388352917503) as res; +``` + +Результат: + +``` text +┌─res─┐ +│ 9 │ +└─────┘ +``` + +[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/functions/geo/) diff --git a/docs/ru/sql-reference/functions/other-functions.md b/docs/ru/sql-reference/functions/other-functions.md index 0a78de66a2d..c6648963dad 100644 --- a/docs/ru/sql-reference/functions/other-functions.md +++ b/docs/ru/sql-reference/functions/other-functions.md @@ -1,4 +1,4 @@ -# Прочие функции {#prochie-funktsii} +# Прочие функции {#other-functions} ## hostName() {#hostname} @@ -1036,9 +1036,110 @@ SELECT formatReadableSize(filesystemCapacity()) AS "Capacity", toTypeName(filesy Принимает состояние агрегатной функции. Возвращает результат агрегирования. -## runningAccumulate {#function-runningaccumulate} +## runningAccumulate {#runningaccumulate} -Принимает на вход состояния агрегатной функции и возвращает столбец со значениями, которые представляют собой результат мёржа этих состояний для выборки строк из блока от первой до текущей строки. Например, принимает состояние агрегатной функции (например, `runningAccumulate(uniqState(UserID))`), и для каждой строки блока возвращает результат агрегатной функции после мёржа состояний функции для всех предыдущих строк и текущей. Таким образом, результат зависит от разбиения данных по блокам и от порядка данных в блоке. +Накапливает состояния агрегатной функции для каждой строки блока данных. + +!!! warning "Warning" + Функция обнуляет состояние для каждого нового блока. + +**Синтаксис** + +```sql +runningAccumulate(agg_state[, grouping]); +``` + +**Параметры** + +- `agg_state` — Состояние агрегатной функции. [AggregateFunction](../../sql-reference/data-types/aggregatefunction.md#data-type-aggregatefunction). +- `grouping` — Ключ группировки. Опциональный параметр. Состояние функции обнуляется, если значение `grouping` меняется. Параметр может быть любого [поддерживаемого типа данных](../../sql-reference/data-types/index.md), для которого определен оператор равенства. + +**Возвращаемое значение** + +- Каждая результирующая строка содержит результат агрегатной функции, накопленный для всех входных строк от 0 до текущей позиции. `runningAccumulate` обнуляет состояния для каждого нового блока данных или при изменении значения `grouping`. + +Тип зависит от используемой агрегатной функции. + +**Примеры** + +Рассмотрим примеры использования `runningAccumulate` для нахождения кумулятивной суммы чисел без и с группировкой. + +Запрос: + +```sql +SELECT k, runningAccumulate(sum_k) AS res FROM (SELECT number as k, sumState(k) AS sum_k FROM numbers(10) GROUP BY k ORDER BY k); +``` + +Результат: + +```text +┌─k─┬─res─┐ +│ 0 │ 0 │ +│ 1 │ 1 │ +│ 2 │ 3 │ +│ 3 │ 6 │ +│ 4 │ 10 │ +│ 5 │ 15 │ +│ 6 │ 21 │ +│ 7 │ 28 │ +│ 8 │ 36 │ +│ 9 │ 45 │ +└───┴─────┘ +``` + +Подзапрос формирует `sumState` для каждого числа от `0` до `9`. `sumState` возвращает состояние функции [sum](../../sql-reference/aggregate-functions/reference.md#agg_function-sum), содержащее сумму одного числа. + +Весь запрос делает следующее: + +1. Для первой строки `runningAccumulate` берет `sumState(0)` и возвращает `0`. +2. Для второй строки функция объединяет `sumState (0)` и `sumState (1)`, что приводит к `sumState (0 + 1)`, и возвращает в результате `1`. +3. Для третьей строки функция объединяет `sumState (0 + 1)` и `sumState (2)`, что приводит к `sumState (0 + 1 + 2)`, и в результате возвращает `3`. +4. Действия повторяются до тех пор, пока не закончится блок. + +В следующем примере показано использование параметра `grouping`: + +Запрос: + +```sql +SELECT + grouping, + item, + runningAccumulate(state, grouping) AS res +FROM +( + SELECT + toInt8(number / 4) AS grouping, + number AS item, + sumState(number) AS state + FROM numbers(15) + GROUP BY item + ORDER BY item ASC +); +``` + +Результат: + +```text +┌─grouping─┬─item─┬─res─┐ +│ 0 │ 0 │ 0 │ +│ 0 │ 1 │ 1 │ +│ 0 │ 2 │ 3 │ +│ 0 │ 3 │ 6 │ +│ 1 │ 4 │ 4 │ +│ 1 │ 5 │ 9 │ +│ 1 │ 6 │ 15 │ +│ 1 │ 7 │ 22 │ +│ 2 │ 8 │ 8 │ +│ 2 │ 9 │ 17 │ +│ 2 │ 10 │ 27 │ +│ 2 │ 11 │ 38 │ +│ 3 │ 12 │ 12 │ +│ 3 │ 13 │ 25 │ +│ 3 │ 14 │ 39 │ +└──────────┴──────┴─────┘ +``` + +Как вы можете видеть, `runningAccumulate` объединяет состояния для каждой группы строк отдельно. ## joinGet {#joinget} diff --git a/docs/ru/sql-reference/table-functions/cluster.md b/docs/ru/sql-reference/table-functions/cluster.md new file mode 100644 index 00000000000..7663e89b64d --- /dev/null +++ b/docs/ru/sql-reference/table-functions/cluster.md @@ -0,0 +1,36 @@ +--- +toc_priority: 50 +toc_title: cluster +--- + +# cluster, clusterAllReplicas {#cluster-clusterallreplicas} + +Позволяет обратиться ко всем серверам существующего кластера, который присутствует в таблице `system.clusters` и сконфигурирован в секцци `remote_servers` без создания таблицы типа `Distributed`. +`clusterAllReplicas` - работает также как `cluster` но каждая реплика в кластере будет использована как отдельный шард/отдельное соединение. + + +Сигнатуры: + +``` sql +cluster('cluster_name', db.table) +cluster('cluster_name', db, table) +clusterAllReplicas('cluster_name', db.table) +clusterAllReplicas('cluster_name', db, table) +``` + +`cluster_name` – имя кластера, который обязан присутствовать в таблице `system.clusters` и обозначает подмножество адресов и параметров подключения к удаленным и локальным серверам, входящим в кластер. + +Использование табличных функций `cluster` и `clusterAllReplicas` менее оптимальное чем создание таблицы типа `Distributed`, поскольку в этом случае соединение с сервером переустанавливается на каждый запрос. При обработке большого количества запросов, всегда создавайте `Distributed` таблицу заранее и не используйте табличные функции `cluster` и `clusterAllReplicas`. + +Табличные функции `cluster` and `clusterAllReplicas` могут быть полезны в следующих случаях: + +- Чтение данных из конкретного кластера для сравнения данных, отладки и тестирования. +- Запросы к разным ClickHouse кластерам и репликам в целях исследования. +- Нечастых распределенных запросов которые делаются вручную. + +Настройки соединения `user`, `password`, `host`, `post`, `compression`, `secure` берутся из секции `` файлов конфигурации. См. подробности в разделе [Distributed](../../engines/table-engines/special/distributed.md) + +**See Also** + +- [skip\_unavailable\_shards](../../operations/settings/settings.md#settings-skip_unavailable_shards) +- [load\_balancing](../../operations/settings/settings.md#settings-load_balancing) \ No newline at end of file diff --git a/docs/ru/sql-reference/table-functions/remote.md b/docs/ru/sql-reference/table-functions/remote.md index 55c602d4bbf..ab216d5de36 100644 --- a/docs/ru/sql-reference/table-functions/remote.md +++ b/docs/ru/sql-reference/table-functions/remote.md @@ -7,6 +7,8 @@ ``` sql remote('addresses_expr', db, table[, 'user'[, 'password']]) remote('addresses_expr', db.table[, 'user'[, 'password']]) +remoteSecure('addresses_expr', db, table[, 'user'[, 'password']]) +remoteSecure('addresses_expr', db.table[, 'user'[, 'password']]) ``` `addresses_expr` - выражение, генерирующее адреса удалённых серверов. Это может быть просто один адрес сервера. Адрес сервера - это `хост:порт`, или только `хост`. Хост может быть указан в виде имени сервера, или в виде IPv4 или IPv6 адреса. IPv6 адрес указывается в квадратных скобках. Порт - TCP-порт удалённого сервера. Если порт не указан, используется `tcp_port` из конфигурационного файла сервера (по умолчанию - 9000). diff --git a/docs/tools/blog.py b/docs/tools/blog.py index f5415bec608..c3261f61d4d 100644 --- a/docs/tools/blog.py +++ b/docs/tools/blog.py @@ -80,7 +80,8 @@ def build_for_lang(lang, args): includes_dir=os.path.join(os.path.dirname(__file__), '..', '_includes'), is_amp=False, is_blog=True, - post_meta=post_meta + post_meta=post_meta, + today=datetime.date.today().isoformat() ) ) @@ -89,6 +90,13 @@ def build_for_lang(lang, args): redirects.build_blog_redirects(args) + env = util.init_jinja2_env(args) + with open(os.path.join(args.website_dir, 'templates', 'blog', 'rss.xml'), 'rb') as f: + rss_template_string = f.read().decode('utf-8').strip() + rss_template = env.from_string(rss_template_string) + with open(os.path.join(args.blog_output_dir, lang, 'rss.xml'), 'w') as f: + f.write(rss_template.render({'config': raw_config})) + # TODO: AMP for blog # if not args.skip_amp: # amp.build_amp(lang, args, cfg) diff --git a/docs/tools/build.py b/docs/tools/build.py index 1c8165fb36f..ac675897fca 100755 --- a/docs/tools/build.py +++ b/docs/tools/build.py @@ -169,7 +169,8 @@ def build_docs(args): if lang: tasks.append((lang, args,)) util.run_function_in_parallel(build_for_lang, tasks, threads=False) - redirects.build_docs_redirects(args) + if not args.version_prefix: + redirects.build_docs_redirects(args) def build(args): diff --git a/docs/tools/mdx_clickhouse.py b/docs/tools/mdx_clickhouse.py index 0c431fec106..80ecf829341 100755 --- a/docs/tools/mdx_clickhouse.py +++ b/docs/tools/mdx_clickhouse.py @@ -14,9 +14,6 @@ import macros.plugin import slugify as slugify_impl -import amp -import website - def slugify(value, separator): return slugify_impl.slugify(value, separator=separator, word_boundary=True, save_order=True) @@ -119,6 +116,7 @@ class PatchedMacrosPlugin(macros.plugin.MacrosPlugin): ]) def on_env(self, env, config, files): + import util env.add_extension('jinja2.ext.i18n') dirname = os.path.join(config.data['theme'].dirs[0], 'locale') lang = config.data['theme']['language'] @@ -126,10 +124,7 @@ class PatchedMacrosPlugin(macros.plugin.MacrosPlugin): get_translations(dirname, lang), newstyle=True ) - chunk_size = 10240 - env.filters['chunks'] = lambda line: [line[i:i+chunk_size] for i in range(0, len(line), chunk_size)] - env.filters['html_to_amp'] = amp.html_to_amp - env.filters['adjust_markdown_html'] = website.adjust_markdown_html + util.init_jinja2_filters(env) return env def render(self, markdown): diff --git a/docs/tools/requirements.txt b/docs/tools/requirements.txt index c7186ac1e6a..1b172199839 100644 --- a/docs/tools/requirements.txt +++ b/docs/tools/requirements.txt @@ -25,7 +25,7 @@ protobuf==3.12.2 numpy==1.18.5 Pygments==2.5.2 pymdown-extensions==7.1 -python-slugify==1.2.6 +python-slugify==4.0.1 PyYAML==5.3.1 repackage==0.7.3 requests==2.24.0 diff --git a/docs/tools/translate/requirements.txt b/docs/tools/translate/requirements.txt index 289cf749b36..24a4f343c08 100644 --- a/docs/tools/translate/requirements.txt +++ b/docs/tools/translate/requirements.txt @@ -5,7 +5,7 @@ googletrans==3.0.0 idna==2.10 Jinja2==2.11.2 pandocfilters==1.4.2 -python-slugify==4.0.0 +python-slugify==4.0.1 PyYAML==5.3.1 requests==2.24.0 text-unidecode==1.3 diff --git a/docs/tools/util.py b/docs/tools/util.py index a5a751020f0..b840dc1168a 100644 --- a/docs/tools/util.py +++ b/docs/tools/util.py @@ -1,5 +1,6 @@ import collections import contextlib +import datetime import multiprocessing import os import shutil @@ -8,6 +9,7 @@ import socket import tempfile import threading +import jinja2 import yaml @@ -111,3 +113,35 @@ def represent_ordereddict(dumper, data): yaml.add_representer(collections.OrderedDict, represent_ordereddict) + + +def init_jinja2_filters(env): + import amp + import website + chunk_size = 10240 + env.filters['chunks'] = lambda line: [line[i:i + chunk_size] for i in range(0, len(line), chunk_size)] + env.filters['html_to_amp'] = amp.html_to_amp + env.filters['adjust_markdown_html'] = website.adjust_markdown_html + env.filters['to_rfc882'] = lambda d: datetime.datetime.strptime(d, '%Y-%m-%d').strftime('%a, %d %b %Y %H:%M:%S GMT') + + +def init_jinja2_env(args): + import mdx_clickhouse + env = jinja2.Environment( + loader=jinja2.FileSystemLoader([ + args.website_dir, + os.path.join(args.docs_dir, '_includes') + ]), + extensions=[ + 'jinja2.ext.i18n', + 'jinja2_highlight.HighlightExtension' + ] + ) + env.extend(jinja2_highlight_cssclass='syntax p-3 my-3') + translations_dir = os.path.join(args.website_dir, 'locale') + env.install_gettext_translations( + mdx_clickhouse.get_translations(translations_dir, 'en'), + newstyle=True + ) + init_jinja2_filters(env) + return env diff --git a/docs/tools/website.py b/docs/tools/website.py index d69371665ce..688fb887ff7 100644 --- a/docs/tools/website.py +++ b/docs/tools/website.py @@ -11,15 +11,21 @@ import bs4 import closure import cssmin import htmlmin -import jinja2 import jsmin -import mdx_clickhouse +import util def handle_iframe(iframe, soup): - if not iframe.attrs['src'].startswith('https://www.youtube.com/'): - raise RuntimeError('iframes are allowed only for YouTube') + allowed_domains = ['https://www.youtube.com/', 'https://datalens.yandex/'] + illegal_domain = True + iframe_src = iframe.attrs['src'] + for domain in allowed_domains: + if iframe_src.startswith(domain): + illegal_domain = False + break + if illegal_domain: + raise RuntimeError(f'iframe from illegal domain: {iframe_src}') wrapper = soup.new_tag('div') wrapper.attrs['class'] = ['embed-responsive', 'embed-responsive-16by9'] iframe.insert_before(wrapper) @@ -43,8 +49,11 @@ def adjust_markdown_html(content): for a in soup.find_all('a'): a_class = a.attrs.get('class') + a_href = a.attrs.get('href') if a_class and 'headerlink' in a_class: a.string = '\xa0' + if a_href and a_href.startswith('http'): + a.attrs['target'] = '_blank' for iframe in soup.find_all('iframe'): handle_iframe(iframe, soup) @@ -121,22 +130,7 @@ def minify_html(content): def build_website(args): logging.info('Building website') - env = jinja2.Environment( - loader=jinja2.FileSystemLoader([ - args.website_dir, - os.path.join(args.docs_dir, '_includes') - ]), - extensions=[ - 'jinja2.ext.i18n', - 'jinja2_highlight.HighlightExtension' - ] - ) - env.extend(jinja2_highlight_cssclass='syntax p-3 my-3') - translations_dir = os.path.join(args.website_dir, 'locale') - env.install_gettext_translations( - mdx_clickhouse.get_translations(translations_dir, 'en'), - newstyle=True - ) + env = util.init_jinja2_env(args) shutil.copytree( args.website_dir, diff --git a/docs/tr/sql-reference/data-types/domains/index.md b/docs/tr/sql-reference/data-types/domains/index.md index e439d110325..7ef688b3578 100644 --- a/docs/tr/sql-reference/data-types/domains/index.md +++ b/docs/tr/sql-reference/data-types/domains/index.md @@ -1,8 +1,34 @@ --- machine_translated: true machine_translated_rev: 72537a2d527c63c07aa5d2361a8829f3895cf2bd +toc_priority: 58 +toc_title: "Genel bak\u0131\u015F" toc_folder_title: Etkiler toc_priority: 56 --- +# Etkiler {#domains} +Etki alanları, varolan temel türün üstüne bazı ek özellikler ekleyen, ancak temel veri türünün kablolu ve disk üstü biçimini sağlam bırakan özel amaçlı türlerdir. Şu anda, ClickHouse kullanıcı tanımlı etki alanlarını desteklemiyor. + +Örneğin, ilgili taban türünün kullanılabileceği her yerde etki alanlarını kullanabilirsiniz: + +- Etki alanı türünde bir sütun oluşturma +- Alan sütunundan/alanına değerleri okuma / yazma +- Bir temel türü bir dizin olarak kullanılabilir, bir dizin olarak kullanın +- Etki alanı sütun değerleri ile çağrı fonksiyonları + +### Alanların ekstra özellikleri {#extra-features-of-domains} + +- Açık sütun türü adı `SHOW CREATE TABLE` veya `DESCRIBE TABLE` +- İle insan dostu format inputtan giriş `INSERT INTO domain_table(domain_column) VALUES(...)` +- İçin insan dostu forma outputta çıktı `SELECT domain_column FROM domain_table` +- Harici bir kaynaktan insan dostu biçimde veri yükleme: `INSERT INTO domain_table FORMAT CSV ...` + +### Sınırlamalar {#limitations} + +- Temel türün dizin sütununu etki alanı türüne dönüştürülemiyor `ALTER TABLE`. +- Başka bir sütun veya tablodan veri eklerken dize değerlerini dolaylı olarak etki alanı değerlerine dönüştüremez. +- Etki alanı, depolanan değerler üzerinde hiçbir kısıtlama ekler. + +[Orijinal makale](https://clickhouse.tech/docs/en/data_types/domains/overview) diff --git a/docs/tr/sql-reference/data-types/domains/overview.md b/docs/tr/sql-reference/data-types/domains/overview.md deleted file mode 100644 index cfab9f3701e..00000000000 --- a/docs/tr/sql-reference/data-types/domains/overview.md +++ /dev/null @@ -1,32 +0,0 @@ ---- -machine_translated: true -machine_translated_rev: 72537a2d527c63c07aa5d2361a8829f3895cf2bd -toc_priority: 58 -toc_title: "Genel bak\u0131\u015F" ---- - -# Etkiler {#domains} - -Etki alanları, varolan temel türün üstüne bazı ek özellikler ekleyen, ancak temel veri türünün kablolu ve disk üstü biçimini sağlam bırakan özel amaçlı türlerdir. Şu anda, ClickHouse kullanıcı tanımlı etki alanlarını desteklemiyor. - -Örneğin, ilgili taban türünün kullanılabileceği her yerde etki alanlarını kullanabilirsiniz: - -- Etki alanı türünde bir sütun oluşturma -- Alan sütunundan/alanına değerleri okuma / yazma -- Bir temel türü bir dizin olarak kullanılabilir, bir dizin olarak kullanın -- Etki alanı sütun değerleri ile çağrı fonksiyonları - -### Alanların ekstra özellikleri {#extra-features-of-domains} - -- Açık sütun türü adı `SHOW CREATE TABLE` veya `DESCRIBE TABLE` -- İle insan dostu format inputtan giriş `INSERT INTO domain_table(domain_column) VALUES(...)` -- İçin insan dostu forma outputta çıktı `SELECT domain_column FROM domain_table` -- Harici bir kaynaktan insan dostu biçimde veri yükleme: `INSERT INTO domain_table FORMAT CSV ...` - -### Sınırlamalar {#limitations} - -- Temel türün dizin sütununu etki alanı türüne dönüştürülemiyor `ALTER TABLE`. -- Başka bir sütun veya tablodan veri eklerken dize değerlerini dolaylı olarak etki alanı değerlerine dönüştüremez. -- Etki alanı, depolanan değerler üzerinde hiçbir kısıtlama ekler. - -[Orijinal makale](https://clickhouse.tech/docs/en/data_types/domains/overview) diff --git a/docs/tr/sql-reference/table-functions/remote.md b/docs/tr/sql-reference/table-functions/remote.md index e324564b97b..059e8aba72b 100644 --- a/docs/tr/sql-reference/table-functions/remote.md +++ b/docs/tr/sql-reference/table-functions/remote.md @@ -14,6 +14,8 @@ Oluşturmadan uzak sunuculara erişmenizi sağlar. `Distributed` Tablo. ``` sql remote('addresses_expr', db, table[, 'user'[, 'password']]) remote('addresses_expr', db.table[, 'user'[, 'password']]) +remoteSecure('addresses_expr', db, table[, 'user'[, 'password']]) +remoteSecure('addresses_expr', db.table[, 'user'[, 'password']]) ``` `addresses_expr` – An expression that generates addresses of remote servers. This may be just one server address. The server address is `host:port` ya da sadece `host`. Ana bilgisayar sunucu adı veya IPv4 veya IPv6 adresi olarak belirtilebilir. Köşeli parantez içinde bir IPv6 adresi belirtilir. Bağlantı noktası, uzak sunucudaki TCP bağlantı noktasıdır. Bağlantı noktası atlanırsa, kullanır `tcp_port` sunucunun yapılandırma dosyasından (varsayılan olarak, 9000). diff --git a/docs/zh/development/browse-code.md b/docs/zh/development/browse-code.md index 222c773c774..49da72a63aa 100644 --- a/docs/zh/development/browse-code.md +++ b/docs/zh/development/browse-code.md @@ -1,14 +1,12 @@ --- -machine_translated: true -machine_translated_rev: 72537a2d527c63c07aa5d2361a8829f3895cf2bd toc_priority: 63 toc_title: "\u6D4F\u89C8\u6E90\u4EE3\u7801" --- # 浏览ClickHouse源代码 {#browse-clickhouse-source-code} -您可以使用 **Woboq** 在线代码浏览器可用 [这里](https://clickhouse.tech/codebrowser/html_report/ClickHouse/src/index.html). 它提供了代码导航和语义突出显示,搜索和索引。 代码快照每天更新。 +您可以使用 **Woboq** 在线代码浏览器 [点击这里](https://clickhouse.tech/codebrowser/html_report/ClickHouse/src/index.html). 它提供了代码导航和语义突出显示、搜索和索引。 代码快照每天更新。 -此外,您还可以浏览源 [GitHub](https://github.com/ClickHouse/ClickHouse) 像往常一样 +此外,您还可以像往常一样浏览源代码 [GitHub](https://github.com/ClickHouse/ClickHouse) -如果你有兴趣使用什么样的IDE,我们建议CLion,QT Creator,VS Code和KDevelop(有注意事项)。 您可以使用任何喜欢的IDE。 Vim和Emacs也算数。 +如果你希望了解哪种IDE较好,我们推荐使用CLion,QT Creator,VS Code和KDevelop(有注意事项)。 您可以使用任何您喜欢的IDE。 Vim和Emacs也可以。 diff --git a/docs/zh/development/build-osx.md b/docs/zh/development/build-osx.md index 2fc68f6d2fb..24923f75207 100644 --- a/docs/zh/development/build-osx.md +++ b/docs/zh/development/build-osx.md @@ -6,13 +6,13 @@ ClickHouse 支持在 Mac OS X 10.12 版本中编译。若您在用更早的操 ## 安装 Homebrew {#an-zhuang-homebrew} ``` bash -/usr/bin/ruby -e "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/master/install)" +$ /usr/bin/ruby -e "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/master/install)" ``` ## 安装编译器,工具库 {#an-zhuang-bian-yi-qi-gong-ju-ku} ``` bash -brew install cmake ninja gcc icu4c mariadb-connector-c openssl libtool gettext +$ brew install cmake ninja libtool gettext ``` ## 拉取 ClickHouse 源码 {#la-qu-clickhouse-yuan-ma} @@ -27,11 +27,11 @@ cd ClickHouse ## 编译 ClickHouse {#bian-yi-clickhouse} ``` bash -mkdir build -cd build -cmake .. -DCMAKE_CXX_COMPILER=`which g++-8` -DCMAKE_C_COMPILER=`which gcc-8` -ninja -cd .. +$ mkdir build +$ cd build +$ cmake .. -DCMAKE_CXX_COMPILER=`which clang++` -DCMAKE_C_COMPILER=`which clang` +$ ninja +$ cd .. ``` ## 注意事项 {#zhu-yi-shi-xiang} diff --git a/docs/zh/engines/table-engines/index.md b/docs/zh/engines/table-engines/index.md index b025eb1f6c9..f31fa257135 100644 --- a/docs/zh/engines/table-engines/index.md +++ b/docs/zh/engines/table-engines/index.md @@ -52,7 +52,7 @@ - [Distributed](special/distributed.md#distributed) - [MaterializedView](special/materializedview.md#materializedview) - [Dictionary](special/dictionary.md#dictionary) -- [Merge](special/merge.md#merge +- [Merge](special/merge.md#merge) - [File](special/file.md#file) - [Null](special/null.md#null) - [Set](special/set.md#set) diff --git a/docs/zh/engines/table-engines/integrations/kafka.md b/docs/zh/engines/table-engines/integrations/kafka.md index c96b3bc22ed..557ab6a7845 100644 --- a/docs/zh/engines/table-engines/integrations/kafka.md +++ b/docs/zh/engines/table-engines/integrations/kafka.md @@ -1,4 +1,4 @@ -# 卡夫卡 {#kafka} +# Kafka {#kafka} 此引擎与 [Apache Kafka](http://kafka.apache.org/) 结合使用。 diff --git a/docs/zh/operations/settings/query-complexity.md b/docs/zh/operations/settings/query-complexity.md index 090862903f4..1e87bdf8879 100644 --- a/docs/zh/operations/settings/query-complexity.md +++ b/docs/zh/operations/settings/query-complexity.md @@ -60,7 +60,6 @@ Restrictions on the «maximum amount of something» can take the value 0, which ## max\_rows\_to\_read {#max-rows-to-read} 可以在每个块(而不是每行)上检查以下限制。 也就是说,限制可以打破一点。 -在多个线程中运行查询时,以下限制单独应用于每个线程。 运行查询时可从表中读取的最大行数。 diff --git a/docs/zh/sql-reference/data-types/domains/index.md b/docs/zh/sql-reference/data-types/domains/index.md index e05c61e0dbb..6ef788b0650 100644 --- a/docs/zh/sql-reference/data-types/domains/index.md +++ b/docs/zh/sql-reference/data-types/domains/index.md @@ -1,8 +1,34 @@ --- -machine_translated: true -machine_translated_rev: 72537a2d527c63c07aa5d2361a8829f3895cf2bd toc_folder_title: "\u57DF" +toc_title: "域" toc_priority: 56 --- +# 域 {#domains} + +Domain类型是特定实现的类型,它总是与某个现存的基础类型保持二进制兼容的同时添加一些额外的特性,以能够在维持磁盘数据不变的情况下使用这些额外的特性。目前ClickHouse暂不支持自定义domain类型。 + +如果你可以在一个地方使用与Domain类型二进制兼容的基础类型,那么在相同的地方您也可以使用Domain类型,例如: + +- 使用Domain类型作为表中列的类型 +- 对Domain类型的列进行读/写数据 +- 如果与Domain二进制兼容的基础类型可以作为索引,那么Domain类型也可以作为索引 +- 将Domain类型作为参数传递给函数使用 +- 其他 + +### Domains的额外特性 {#domainsde-e-wai-te-xing} + +- 在执行SHOW CREATE TABLE 或 DESCRIBE TABLE时,其对应的列总是展示为Domain类型的名称 +- 在INSERT INTO domain\_table(domain\_column) VALUES(…)中输入数据总是以更人性化的格式进行输入 +- 在SELECT domain\_column FROM domain\_table中数据总是以更人性化的格式输出 +- 在INSERT INTO domain\_table FORMAT CSV …中,实现外部源数据以更人性化的格式载入 + +### Domains类型的限制 {#domainslei-xing-de-xian-zhi} + +- 无法通过`ALTER TABLE`将基础类型的索引转换为Domain类型的索引。 +- 当从其他列或表插入数据时,无法将string类型的值隐式地转换为Domain类型的值。 +- 无法对存储为Domain类型的值添加约束。 + +[来源文章](https://clickhouse.tech/docs/en/data_types/domains/overview) + diff --git a/docs/zh/sql-reference/data-types/domains/overview.md b/docs/zh/sql-reference/data-types/domains/overview.md deleted file mode 100644 index 689acb9cb60..00000000000 --- a/docs/zh/sql-reference/data-types/domains/overview.md +++ /dev/null @@ -1,26 +0,0 @@ -# 域 {#domains} - -Domain类型是特定实现的类型,它总是与某个现存的基础类型保持二进制兼容的同时添加一些额外的特性,以能够在维持磁盘数据不变的情况下使用这些额外的特性。目前ClickHouse暂不支持自定义domain类型。 - -如果你可以在一个地方使用与Domain类型二进制兼容的基础类型,那么在相同的地方您也可以使用Domain类型,例如: - -- 使用Domain类型作为表中列的类型 -- 对Domain类型的列进行读/写数据 -- 如果与Domain二进制兼容的基础类型可以作为索引,那么Domain类型也可以作为索引 -- 将Domain类型作为参数传递给函数使用 -- 其他 - -### Domains的额外特性 {#domainsde-e-wai-te-xing} - -- 在执行SHOW CREATE TABLE 或 DESCRIBE TABLE时,其对应的列总是展示为Domain类型的名称 -- 在INSERT INTO domain\_table(domain\_column) VALUES(…)中输入数据总是以更人性化的格式进行输入 -- 在SELECT domain\_column FROM domain\_table中数据总是以更人性化的格式输出 -- 在INSERT INTO domain\_table FORMAT CSV …中,实现外部源数据以更人性化的格式载入 - -### Domains类型的限制 {#domainslei-xing-de-xian-zhi} - -- 无法通过`ALTER TABLE`将基础类型的索引转换为Domain类型的索引。 -- 当从其他列或表插入数据时,无法将string类型的值隐式地转换为Domain类型的值。 -- 无法对存储为Domain类型的值添加约束。 - -[来源文章](https://clickhouse.tech/docs/en/data_types/domains/overview) diff --git a/docs/zh/sql-reference/functions/array-functions.md b/docs/zh/sql-reference/functions/array-functions.md index 546b916b942..858a97940c6 100644 --- a/docs/zh/sql-reference/functions/array-functions.md +++ b/docs/zh/sql-reference/functions/array-functions.md @@ -1,6 +1,6 @@ # 数组函数 {#shu-zu-han-shu} -## 空 {#empty} +## empty {#empty} 对于空数组返回1,对于非空数组返回0。 结果类型是UInt8。 @@ -12,7 +12,7 @@ 结果类型是UInt8。 该函数也适用于字符串。 -## 长度 {#array_functions-length} +## length {#array_functions-length} 返回数组中的元素个数。 结果类型是UInt64。 @@ -24,9 +24,9 @@ ## emptyArrayFloat32,emptyArrayFloat64 {#emptyarrayfloat32-emptyarrayfloat64} -## 空空漫步,空空漫步时间 {#emptyarraydate-emptyarraydatetime} +## emptyArrayDate,emptyArrayDateTime {#emptyarraydate-emptyarraydatetime} -## 空字符串 {#emptyarraystring} +## emptyArrayString {#emptyarraystring} 不接受任何参数并返回适当类型的空数组。 @@ -34,7 +34,7 @@ 接受一个空数组并返回一个仅包含一个默认值元素的数组。 -## 范围(N) {#rangen} +## range(N) {#rangen} 返回从0到N-1的数字数组。 以防万一,如果在数据块中创建总长度超过100,000,000个元素的数组,则抛出异常。 @@ -74,7 +74,7 @@ SELECT arrayConcat([1, 2], [3, 4], [5, 6]) AS res 如果索引超出数组的边界,则返回默认值(数字为0,字符串为空字符串等)。 -## 有(arr,elem) {#hasarr-elem} +## has(arr,elem) {#hasarr-elem} 检查’arr’数组是否具有’elem’元素。 如果元素不在数组中,则返回0;如果在,则返回1。 @@ -186,7 +186,7 @@ SELECT arrayConcat([1, 2], [3, 4], [5, 6]) AS res │ 2 │ └──────────────────────────────────────┘ -## ツ暗ェツ氾环催ツ団ツ法ツ人) {#array_functions-arrayenumerate} +## arrayEnumerate(arr) {#array_functions-arrayenumerate} 返回 Array \[1, 2, 3, …, length (arr) \] @@ -658,7 +658,7 @@ SELECT arrayReduce(‘agg\_func’,arr1,…) - 将聚合函数`agg_func`应用于数组`arr1 ...`。如果传递了多个数组,则相应位置上的元素将作为多个参数传递给聚合函数。例如:SELECT arrayReduce(‘max’,\[1,2,3\])= 3 -## ツ暗ェツ氾环催ツ団ツ法ツ人) {#arrayreversearr} +## arrayReverse(arr) {#arrayreversearr} 返回与源数组大小相同的数组,包含反转源数组的所有元素的结果。 diff --git a/docs/zh/sql-reference/table-functions/remote.md b/docs/zh/sql-reference/table-functions/remote.md index 1f3bc58111b..1125353e2fa 100644 --- a/docs/zh/sql-reference/table-functions/remote.md +++ b/docs/zh/sql-reference/table-functions/remote.md @@ -14,6 +14,8 @@ toc_title: "\u8FDC\u7A0B" ``` sql remote('addresses_expr', db, table[, 'user'[, 'password']]) remote('addresses_expr', db.table[, 'user'[, 'password']]) +remoteSecure('addresses_expr', db, table[, 'user'[, 'password']]) +remoteSecure('addresses_expr', db.table[, 'user'[, 'password']]) ``` `addresses_expr` – An expression that generates addresses of remote servers. This may be just one server address. The server address is `host:port`,或者只是 `host`. 主机可以指定为服务器名称,也可以指定为IPv4或IPv6地址。 IPv6地址在方括号中指定。 端口是远程服务器上的TCP端口。 如果省略端口,它使用 `tcp_port` 从服务器的配置文件(默认情况下,9000)。 diff --git a/programs/client/Suggest.cpp b/programs/client/Suggest.cpp index 4ac5e735fd5..713aa82bb3e 100644 --- a/programs/client/Suggest.cpp +++ b/programs/client/Suggest.cpp @@ -79,7 +79,7 @@ Suggest::Suggest() "IN", "KILL", "QUERY", "SYNC", "ASYNC", "TEST", "BETWEEN", "TRUNCATE", "USER", "ROLE", "PROFILE", "QUOTA", "POLICY", "ROW", "GRANT", "REVOKE", "OPTION", "ADMIN", "EXCEPT", "REPLACE", "IDENTIFIED", "HOST", "NAME", "READONLY", "WRITABLE", "PERMISSIVE", "FOR", "RESTRICTIVE", "FOR", "RANDOMIZED", - "INTERVAL", "LIMITS", "ONLY", "TRACKING", "IP", "REGEXP"}; + "INTERVAL", "LIMITS", "ONLY", "TRACKING", "IP", "REGEXP", "ILIKE"}; } void Suggest::loadImpl(Connection & connection, const ConnectionTimeouts & timeouts, size_t suggestion_limit) diff --git a/programs/copier/ClusterCopierApp.cpp b/programs/copier/ClusterCopierApp.cpp index ce4bf94589e..e702d2f6353 100644 --- a/programs/copier/ClusterCopierApp.cpp +++ b/programs/copier/ClusterCopierApp.cpp @@ -1,4 +1,6 @@ #include "ClusterCopierApp.h" +#include + namespace DB { @@ -91,7 +93,7 @@ void ClusterCopierApp::defineOptions(Poco::Util::OptionSet & options) void ClusterCopierApp::mainImpl() { - StatusFile status_file(process_path + "/status"); + StatusFile status_file(process_path + "/status", StatusFile::write_full_info); ThreadStatus thread_status; auto * log = &logger(); diff --git a/programs/copier/Internals.h b/programs/copier/Internals.h index 8e4f7afeb6e..b1a94e1a5ca 100644 --- a/programs/copier/Internals.h +++ b/programs/copier/Internals.h @@ -66,7 +66,6 @@ #include #include #include -#include #include "Aliases.h" diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index 8641287c3ec..c19495a0bb0 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -248,7 +248,7 @@ try if (!context->getPath().empty()) { /// Lock path directory before read - status.emplace(context->getPath() + "status"); + status.emplace(context->getPath() + "status", StatusFile::write_full_info); LOG_DEBUG(log, "Loading metadata from {}", context->getPath()); loadMetadataSystem(*context); diff --git a/programs/odbc-bridge/CMakeLists.txt b/programs/odbc-bridge/CMakeLists.txt index 4b63ed2596d..8cfa110adad 100644 --- a/programs/odbc-bridge/CMakeLists.txt +++ b/programs/odbc-bridge/CMakeLists.txt @@ -8,6 +8,7 @@ set (CLICKHOUSE_ODBC_BRIDGE_SOURCES ODBCBlockOutputStream.cpp ODBCBridge.cpp PingHandler.cpp + SchemaAllowedHandler.cpp validateODBCConnectionString.cpp ) set (CLICKHOUSE_ODBC_BRIDGE_LINK diff --git a/programs/odbc-bridge/HandlerFactory.cpp b/programs/odbc-bridge/HandlerFactory.cpp index a02fcadea52..0cc40480b87 100644 --- a/programs/odbc-bridge/HandlerFactory.cpp +++ b/programs/odbc-bridge/HandlerFactory.cpp @@ -29,6 +29,12 @@ Poco::Net::HTTPRequestHandler * HandlerFactory::createRequestHandler(const Poco: return new IdentifierQuoteHandler(keep_alive_timeout, context); #else return nullptr; +#endif + else if (uri.getPath() == "/schema_allowed") +#if USE_ODBC + return new SchemaAllowedHandler(keep_alive_timeout, context); +#else + return nullptr; #endif else if (uri.getPath() == "/write") return new ODBCHandler(pool_map, keep_alive_timeout, context, "write"); diff --git a/programs/odbc-bridge/HandlerFactory.h b/programs/odbc-bridge/HandlerFactory.h index 35835de5dad..1d4edfc9dd1 100644 --- a/programs/odbc-bridge/HandlerFactory.h +++ b/programs/odbc-bridge/HandlerFactory.h @@ -6,6 +6,7 @@ #include "MainHandler.h" #include "ColumnInfoHandler.h" #include "IdentifierQuoteHandler.h" +#include "SchemaAllowedHandler.h" #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wunused-parameter" @@ -15,7 +16,7 @@ namespace DB { -/** Factory for '/ping', '/', '/columns_info', '/identifier_quote' handlers. +/** Factory for '/ping', '/', '/columns_info', '/identifier_quote', '/schema_allowed' handlers. * Also stores Session pools for ODBC connections */ class HandlerFactory : public Poco::Net::HTTPRequestHandlerFactory diff --git a/programs/odbc-bridge/SchemaAllowedHandler.cpp b/programs/odbc-bridge/SchemaAllowedHandler.cpp new file mode 100644 index 00000000000..5aaba57399e --- /dev/null +++ b/programs/odbc-bridge/SchemaAllowedHandler.cpp @@ -0,0 +1,76 @@ +#include "SchemaAllowedHandler.h" + +#if USE_ODBC + +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include "validateODBCConnectionString.h" + +# define POCO_SQL_ODBC_CLASS Poco::Data::ODBC + +namespace DB +{ +namespace +{ + bool isSchemaAllowed(SQLHDBC hdbc) + { + std::string identifier; + + SQLSMALLINT t; + SQLRETURN r = POCO_SQL_ODBC_CLASS::SQLGetInfo(hdbc, SQL_SCHEMA_USAGE, nullptr, 0, &t); + + if (POCO_SQL_ODBC_CLASS::Utility::isError(r)) + throw POCO_SQL_ODBC_CLASS::ConnectionException(hdbc); + + return t != 0; + } +} + + +void SchemaAllowedHandler::handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response) +{ + Poco::Net::HTMLForm params(request, request.stream()); + LOG_TRACE(log, "Request URI: {}", request.getURI()); + + auto process_error = [&response, this](const std::string & message) + { + response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_INTERNAL_SERVER_ERROR); + if (!response.sent()) + response.send() << message << std::endl; + LOG_WARNING(log, message); + }; + + if (!params.has("connection_string")) + { + process_error("No 'connection_string' in request URL"); + return; + } + + try + { + std::string connection_string = params.get("connection_string"); + POCO_SQL_ODBC_CLASS::SessionImpl session(validateODBCConnectionString(connection_string), DBMS_DEFAULT_CONNECT_TIMEOUT_SEC); + SQLHDBC hdbc = session.dbc().handle(); + + bool result = isSchemaAllowed(hdbc); + + WriteBufferFromHTTPServerResponse out(request, response, keep_alive_timeout); + writeBoolText(result, out); + } + catch (...) + { + process_error("Error getting schema usage from ODBC '" + getCurrentExceptionMessage(false) + "'"); + tryLogCurrentException(log); + } +} + +} + +#endif diff --git a/programs/odbc-bridge/SchemaAllowedHandler.h b/programs/odbc-bridge/SchemaAllowedHandler.h new file mode 100644 index 00000000000..76aa23b903c --- /dev/null +++ b/programs/odbc-bridge/SchemaAllowedHandler.h @@ -0,0 +1,31 @@ +#pragma once + +#include +#include + +#if USE_ODBC + +namespace DB +{ +class Context; + + +/// This handler establishes connection to database, and retrieve whether schema is allowed. +class SchemaAllowedHandler : public Poco::Net::HTTPRequestHandler +{ +public: + SchemaAllowedHandler(size_t keep_alive_timeout_, Context &) + : log(&Poco::Logger::get("SchemaAllowedHandler")), keep_alive_timeout(keep_alive_timeout_) + { + } + + void handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response) override; + +private: + Poco::Logger * log; + size_t keep_alive_timeout; +}; + +} + +#endif diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 8789a3b7416..037996ae43a 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -378,7 +378,7 @@ int Server::main(const std::vector & /*args*/) global_context->setPath(path); - StatusFile status{path + "status"}; + StatusFile status{path + "status", StatusFile::write_full_info}; SCOPE_EXIT({ /** Ask to cancel background jobs all table engines, diff --git a/programs/server/config.xml b/programs/server/config.xml index ca2e6072cd2..85f6023f2a9 100644 --- a/programs/server/config.xml +++ b/programs/server/config.xml @@ -66,7 +66,6 @@ 8443 9440 --> - @@ -262,9 +261,13 @@ + + localhost 9000 + + diff --git a/src/Access/AccessControlManager.cpp b/src/Access/AccessControlManager.cpp index 1c1215a0e28..94a45e3e1c1 100644 --- a/src/Access/AccessControlManager.cpp +++ b/src/Access/AccessControlManager.cpp @@ -40,27 +40,8 @@ class AccessControlManager::ContextAccessCache public: explicit ContextAccessCache(const AccessControlManager & manager_) : manager(manager_) {} - std::shared_ptr getContextAccess( - const UUID & user_id, - const boost::container::flat_set & current_roles, - bool use_default_roles, - const Settings & settings, - const String & current_database, - const ClientInfo & client_info) + std::shared_ptr getContextAccess(const ContextAccessParams & params) { - ContextAccess::Params params; - params.user_id = user_id; - params.current_roles = current_roles; - params.use_default_roles = use_default_roles; - params.current_database = current_database; - params.readonly = settings.readonly; - params.allow_ddl = settings.allow_ddl; - params.allow_introspection = settings.allow_introspection_functions; - params.interface = client_info.interface; - params.http_method = client_info.http_method; - params.address = client_info.current_address.host(); - params.quota_key = client_info.quota_key; - std::lock_guard lock{mutex}; auto x = cache.get(params); if (x) @@ -119,7 +100,25 @@ std::shared_ptr AccessControlManager::getContextAccess( const String & current_database, const ClientInfo & client_info) const { - return context_access_cache->getContextAccess(user_id, current_roles, use_default_roles, settings, current_database, client_info); + ContextAccessParams params; + params.user_id = user_id; + params.current_roles = current_roles; + params.use_default_roles = use_default_roles; + params.current_database = current_database; + params.readonly = settings.readonly; + params.allow_ddl = settings.allow_ddl; + params.allow_introspection = settings.allow_introspection_functions; + params.interface = client_info.interface; + params.http_method = client_info.http_method; + params.address = client_info.current_address.host(); + params.quota_key = client_info.quota_key; + return getContextAccess(params); +} + + +std::shared_ptr AccessControlManager::getContextAccess(const ContextAccessParams & params) const +{ + return context_access_cache->getContextAccess(params); } diff --git a/src/Access/AccessControlManager.h b/src/Access/AccessControlManager.h index 6bcf8d7c504..d244ecd07d2 100644 --- a/src/Access/AccessControlManager.h +++ b/src/Access/AccessControlManager.h @@ -21,6 +21,7 @@ namespace Poco namespace DB { class ContextAccess; +struct ContextAccessParams; struct User; using UserPtr = std::shared_ptr; class EnabledRoles; @@ -58,6 +59,8 @@ public: const String & current_database, const ClientInfo & client_info) const; + std::shared_ptr getContextAccess(const ContextAccessParams & params) const; + std::shared_ptr getEnabledRoles( const boost::container::flat_set & current_roles, const boost::container::flat_set & current_roles_with_admin_option) const; diff --git a/src/Access/AccessRights.cpp b/src/Access/AccessRights.cpp index a4e446750a7..988e8305605 100644 --- a/src/Access/AccessRights.cpp +++ b/src/Access/AccessRights.cpp @@ -1,7 +1,9 @@ #include #include #include +#include #include +#include #include namespace DB @@ -9,7 +11,6 @@ namespace DB namespace ErrorCodes { extern const int INVALID_GRANT; - extern const int LOGICAL_ERROR; } @@ -58,12 +59,194 @@ namespace const AccessFlags system_reload_embedded_dictionaries = AccessType::SYSTEM_RELOAD_EMBEDDED_DICTIONARIES; }; - std::string_view checkCurrentDatabase(const std::string_view & current_database) + using Kind = AccessRightsElementWithOptions::Kind; + + struct ProtoElement { - if (current_database.empty()) - throw Exception("No current database", ErrorCodes::LOGICAL_ERROR); - return current_database; - } + AccessFlags access_flags; + boost::container::small_vector full_name; + bool grant_option = false; + Kind kind = Kind::GRANT; + + friend bool operator<(const ProtoElement & left, const ProtoElement & right) + { + static constexpr auto compare_name = [](const boost::container::small_vector & left_name, + const boost::container::small_vector & right_name, + size_t i) + { + if (i < left_name.size()) + { + if (i < right_name.size()) + return left_name[i].compare(right_name[i]); + else + return 1; /// left_name is longer => left_name > right_name + } + else if (i < right_name.size()) + return 1; /// right_name is longer => left < right + else + return 0; /// left_name == right_name + }; + + if (int cmp = compare_name(left.full_name, right.full_name, 0)) + return cmp < 0; + + if (int cmp = compare_name(left.full_name, right.full_name, 1)) + return cmp < 0; + + if (left.kind != right.kind) + return (left.kind == Kind::GRANT); + + if (left.grant_option != right.grant_option) + return right.grant_option; + + if (int cmp = compare_name(left.full_name, right.full_name, 2)) + return cmp < 0; + + return (left.access_flags < right.access_flags); + } + + AccessRightsElementWithOptions getResult() const + { + AccessRightsElementWithOptions res; + res.access_flags = access_flags; + res.grant_option = grant_option; + res.kind = kind; + switch (full_name.size()) + { + case 0: + { + res.any_database = true; + res.any_table = true; + res.any_column = true; + break; + } + case 1: + { + res.any_database = false; + res.database = full_name[0]; + res.any_table = true; + res.any_column = true; + break; + } + case 2: + { + res.any_database = false; + res.database = full_name[0]; + res.any_table = false; + res.table = full_name[1]; + res.any_column = true; + break; + } + case 3: + { + res.any_database = false; + res.database = full_name[0]; + res.any_table = false; + res.table = full_name[1]; + res.any_column = false; + res.columns.emplace_back(full_name[2]); + break; + } + } + return res; + } + }; + + class ProtoElements : public std::vector + { + public: + AccessRightsElementsWithOptions getResult() const + { + ProtoElements sorted = *this; + boost::range::sort(sorted); + AccessRightsElementsWithOptions res; + res.reserve(sorted.size()); + + for (size_t i = 0; i != sorted.size();) + { + size_t count_elements_with_diff_columns = sorted.countElementsWithDifferenceInColumnOnly(i); + if (count_elements_with_diff_columns == 1) + { + /// Easy case: one Element is converted to one AccessRightsElement. + const auto & element = sorted[i]; + if (element.access_flags) + res.emplace_back(element.getResult()); + ++i; + } + else + { + /// Difficult case: multiple Elements are converted to one or multiple AccessRightsElements. + sorted.appendResultWithElementsWithDifferenceInColumnOnly(i, count_elements_with_diff_columns, res); + i += count_elements_with_diff_columns; + } + } + return res; + } + + private: + size_t countElementsWithDifferenceInColumnOnly(size_t start) const + { + const auto & start_element = (*this)[start]; + if ((start_element.full_name.size() != 3) || (start == size() - 1)) + return 1; + + auto it = std::find_if(begin() + start + 1, end(), [&](const ProtoElement & element) + { + return (element.full_name.size() != 3) || (element.full_name[0] != start_element.full_name[0]) + || (element.full_name[1] != start_element.full_name[1]) || (element.grant_option != start_element.grant_option) + || (element.kind != start_element.kind); + }); + + return it - (begin() + start); + } + + /// Collects columns together to write multiple columns into one AccessRightsElement. + /// That procedure allows to output access rights in more compact way, + /// e.g. "SELECT(x, y)" instead of "SELECT(x), SELECT(y)". + void appendResultWithElementsWithDifferenceInColumnOnly(size_t start, size_t count, AccessRightsElementsWithOptions & res) const + { + const auto * pbegin = data() + start; + const auto * pend = pbegin + count; + AccessFlags handled_flags; + + while (pbegin < pend) + { + while (pbegin < pend && !(pbegin->access_flags - handled_flags)) + ++pbegin; + + while (pbegin < pend && !((pend - 1)->access_flags - handled_flags)) + --pend; + + if (pbegin >= pend) + break; + + AccessFlags common_flags = (pbegin->access_flags - handled_flags); + for (const auto * element = pbegin + 1; element != pend; ++element) + { + if (auto new_common_flags = (element->access_flags - handled_flags) & common_flags) + common_flags = new_common_flags; + } + + res.emplace_back(); + auto & back = res.back(); + back.grant_option = pbegin->grant_option; + back.kind = pbegin->kind; + back.any_database = false; + back.database = pbegin->full_name[0]; + back.any_table = false; + back.table = pbegin->full_name[1]; + back.any_column = false; + back.access_flags = common_flags; + for (const auto * element = pbegin; element != pend; ++element) + { + if (((element->access_flags - handled_flags) & common_flags) == common_flags) + back.columns.emplace_back(element->full_name[2]); + } + + handled_flags |= common_flags; + } + } + }; } @@ -243,23 +426,43 @@ public: friend bool operator!=(const Node & left, const Node & right) { return !(left == right); } - void merge(const Node & other, const Helper & helper) + void makeUnion(const Node & other, const Helper & helper) { - mergeAccessRec(other); + makeUnionRec(other); calculateFinalAccessRec(helper); } - void logTree(Poco::Logger * log) const + void makeIntersection(const Node & other, const Helper & helper) { - LOG_TRACE(log, "Tree({}): name={}, access={}, final_access={}, min_access={}, max_access={}, num_children={}", - level, node_name ? *node_name : "NULL", access.toString(), + makeIntersectionRec(other); + calculateFinalAccessRec(helper); + } + + ProtoElements getElements() const + { + ProtoElements res; + getElementsRec(res, {}, *this, {}); + return res; + } + + static ProtoElements getElements(const Node * node, const Node * node_with_grant_option) + { + ProtoElements res; + getElementsRec(res, {}, node, {}, node_with_grant_option, {}); + return res; + } + + void logTree(Poco::Logger * log, const String & title) const + { + LOG_TRACE(log, "Tree({}): level={}, name={}, access={}, final_access={}, min_access={}, max_access={}, num_children={}", + title, level, node_name ? *node_name : "NULL", access.toString(), final_access.toString(), min_access.toString(), max_access.toString(), (children ? children->size() : 0)); if (children) { for (auto & child : *children | boost::adaptors::map_values) - child.logTree(log); + child.logTree(log, title); } } @@ -342,6 +545,93 @@ private: } } + static void getElementsRec( + ProtoElements & res, + const boost::container::small_vector & full_name, + const Node & node, + const AccessFlags & parent_access) + { + auto access = node.access; + auto revokes = parent_access - access; + auto grants = access - parent_access; + + if (revokes) + res.push_back(ProtoElement{revokes, full_name, false, Kind::REVOKE}); + + if (grants) + res.push_back(ProtoElement{grants, full_name, false, Kind::GRANT}); + + if (node.children) + { + for (const auto & [child_name, child] : *node.children) + { + boost::container::small_vector child_full_name = full_name; + child_full_name.push_back(child_name); + getElementsRec(res, child_full_name, child, access); + } + } + } + + static void getElementsRec( + ProtoElements & res, + const boost::container::small_vector & full_name, + const Node * node, + const AccessFlags & parent_access, + const Node * node_go, + const AccessFlags & parent_access_go) + { + auto access = node ? node->access : parent_access; + auto access_go = node_go ? node_go->access : parent_access_go; + auto revokes = parent_access - access; + auto revokes_go = parent_access_go - access_go - revokes; + auto grants_go = access_go - parent_access_go; + auto grants = access - parent_access - grants_go; + + if (revokes) + res.push_back(ProtoElement{revokes, full_name, false, Kind::REVOKE}); + + if (revokes_go) + res.push_back(ProtoElement{revokes_go, full_name, true, Kind::REVOKE}); + + if (grants) + res.push_back(ProtoElement{grants, full_name, false, Kind::GRANT}); + + if (grants_go) + res.push_back(ProtoElement{grants_go, full_name, true, Kind::GRANT}); + + if (node && node->children) + { + for (const auto & [child_name, child] : *node->children) + { + boost::container::small_vector child_full_name = full_name; + child_full_name.push_back(child_name); + const Node * child_node = &child; + const Node * child_node_go = nullptr; + if (node_go && node_go->children) + { + auto it = node_go->children->find(child_name); + if (it != node_go->children->end()) + child_node_go = &it->second; + } + getElementsRec(res, child_full_name, child_node, access, child_node_go, access_go); + } + + } + if (node_go && node_go->children) + { + for (const auto & [child_name, child] : *node_go->children) + { + if (node && node->children && node->children->count(child_name)) + continue; /// already processed + boost::container::small_vector child_full_name = full_name; + child_full_name.push_back(child_name); + const Node * child_node = nullptr; + const Node * child_node_go = &child; + getElementsRec(res, child_full_name, child_node, access, child_node_go, access_go); + } + } + } + void calculateFinalAccessRec(const Helper & helper) { /// Traverse tree. @@ -438,12 +728,12 @@ private: max_access = final_access | max_access_among_children; } - void mergeAccessRec(const Node & rhs) + void makeUnionRec(const Node & rhs) { if (rhs.children) { for (const auto & [rhs_childname, rhs_child] : *rhs.children) - getChild(rhs_childname).mergeAccessRec(rhs_child); + getChild(rhs_childname).makeUnionRec(rhs_child); } access |= rhs.access; if (children) @@ -455,6 +745,24 @@ private: } } } + + void makeIntersectionRec(const Node & rhs) + { + if (rhs.children) + { + for (const auto & [rhs_childname, rhs_child] : *rhs.children) + getChild(rhs_childname).makeIntersectionRec(rhs_child); + } + access &= rhs.access; + if (children) + { + for (auto & [lhs_childname, lhs_child] : *children) + { + if (!rhs.tryGetChild(lhs_childname)) + lhs_child.access &= rhs.access; + } + } + } }; @@ -476,6 +784,10 @@ AccessRights & AccessRights::operator =(const AccessRights & src) root = std::make_unique(*src.root); else root = nullptr; + if (src.root_with_grant_option) + root_with_grant_option = std::make_unique(*src.root_with_grant_option); + else + root_with_grant_option = nullptr; return *this; } @@ -488,302 +800,267 @@ AccessRights::AccessRights(const AccessFlags & access) bool AccessRights::isEmpty() const { - return !root; + return !root && !root_with_grant_option; } void AccessRights::clear() { root = nullptr; + root_with_grant_option = nullptr; } -template +template void AccessRights::grantImpl(const AccessFlags & flags, const Args &... args) { - if (!root) - root = std::make_unique(); - root->grant(flags, Helper::instance(), args...); - if (!root->access && !root->children) - root = nullptr; + auto helper = [&](std::unique_ptr & root_node) + { + if (!root_node) + root_node = std::make_unique(); + root_node->grant(flags, Helper::instance(), args...); + if (!root_node->access && !root_node->children) + root_node = nullptr; + }; + helper(root); + + if constexpr (with_grant_option) + helper(root_with_grant_option); } -void AccessRights::grant(const AccessFlags & flags) { grantImpl(flags); } -void AccessRights::grant(const AccessFlags & flags, const std::string_view & database) { grantImpl(flags, database); } -void AccessRights::grant(const AccessFlags & flags, const std::string_view & database, const std::string_view & table) { grantImpl(flags, database, table); } -void AccessRights::grant(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const std::string_view & column) { grantImpl(flags, database, table, column); } -void AccessRights::grant(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const std::vector & columns) { grantImpl(flags, database, table, columns); } -void AccessRights::grant(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const Strings & columns) { grantImpl(flags, database, table, columns); } - -void AccessRights::grant(const AccessRightsElement & element, std::string_view current_database) +template +void AccessRights::grantImpl(const AccessRightsElement & element) { if (element.any_database) - { - grant(element.access_flags); - } + grantImpl(element.access_flags); else if (element.any_table) - { - if (element.database.empty()) - grant(element.access_flags, checkCurrentDatabase(current_database)); - else - grant(element.access_flags, element.database); - } + grantImpl(element.access_flags, element.database); else if (element.any_column) - { - if (element.database.empty()) - grant(element.access_flags, checkCurrentDatabase(current_database), element.table); - else - grant(element.access_flags, element.database, element.table); - } + grantImpl(element.access_flags, element.database, element.table); else - { - if (element.database.empty()) - grant(element.access_flags, checkCurrentDatabase(current_database), element.table, element.columns); - else - grant(element.access_flags, element.database, element.table, element.columns); - } + grantImpl(element.access_flags, element.database, element.table, element.columns); } -void AccessRights::grant(const AccessRightsElements & elements, std::string_view current_database) +template +void AccessRights::grantImpl(const AccessRightsElements & elements) { for (const auto & element : elements) - grant(element, current_database); + grantImpl(element); } +void AccessRights::grant(const AccessFlags & flags) { grantImpl(flags); } +void AccessRights::grant(const AccessFlags & flags, const std::string_view & database) { grantImpl(flags, database); } +void AccessRights::grant(const AccessFlags & flags, const std::string_view & database, const std::string_view & table) { grantImpl(flags, database, table); } +void AccessRights::grant(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const std::string_view & column) { grantImpl(flags, database, table, column); } +void AccessRights::grant(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const std::vector & columns) { grantImpl(flags, database, table, columns); } +void AccessRights::grant(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const Strings & columns) { grantImpl(flags, database, table, columns); } +void AccessRights::grant(const AccessRightsElement & element) { grantImpl(element); } +void AccessRights::grant(const AccessRightsElements & elements) { grantImpl(elements); } -template +void AccessRights::grantWithGrantOption(const AccessFlags & flags) { grantImpl(flags); } +void AccessRights::grantWithGrantOption(const AccessFlags & flags, const std::string_view & database) { grantImpl(flags, database); } +void AccessRights::grantWithGrantOption(const AccessFlags & flags, const std::string_view & database, const std::string_view & table) { grantImpl(flags, database, table); } +void AccessRights::grantWithGrantOption(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const std::string_view & column) { grantImpl(flags, database, table, column); } +void AccessRights::grantWithGrantOption(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const std::vector & columns) { grantImpl(flags, database, table, columns); } +void AccessRights::grantWithGrantOption(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const Strings & columns) { grantImpl(flags, database, table, columns); } +void AccessRights::grantWithGrantOption(const AccessRightsElement & element) { grantImpl(element); } +void AccessRights::grantWithGrantOption(const AccessRightsElements & elements) { grantImpl(elements); } + + +template void AccessRights::revokeImpl(const AccessFlags & flags, const Args &... args) { - if (!root) - return; - root->revoke(flags, Helper::instance(), args...); - if (!root->access && !root->children) - root = nullptr; + auto helper = [&](std::unique_ptr & root_node) + { + if (!root_node) + return; + root_node->revoke(flags, Helper::instance(), args...); + if (!root_node->access && !root_node->children) + root_node = nullptr; + }; + helper(root_with_grant_option); + + if constexpr (!grant_option) + helper(root); } -void AccessRights::revoke(const AccessFlags & flags) { revokeImpl(flags); } -void AccessRights::revoke(const AccessFlags & flags, const std::string_view & database) { revokeImpl(flags, database); } -void AccessRights::revoke(const AccessFlags & flags, const std::string_view & database, const std::string_view & table) { revokeImpl(flags, database, table); } -void AccessRights::revoke(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const std::string_view & column) { revokeImpl(flags, database, table, column); } -void AccessRights::revoke(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const std::vector & columns) { revokeImpl(flags, database, table, columns); } -void AccessRights::revoke(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const Strings & columns) { revokeImpl(flags, database, table, columns); } - - -void AccessRights::revoke(const AccessRightsElement & element, std::string_view current_database) +template +void AccessRights::revokeImpl(const AccessRightsElement & element) { if (element.any_database) - { - revoke(element.access_flags); - } + revokeImpl(element.access_flags); else if (element.any_table) - { - if (element.database.empty()) - revoke(element.access_flags, checkCurrentDatabase(current_database)); - else - revoke(element.access_flags, element.database); - } + revokeImpl(element.access_flags, element.database); else if (element.any_column) - { - if (element.database.empty()) - revoke(element.access_flags, checkCurrentDatabase(current_database), element.table); - else - revoke(element.access_flags, element.database, element.table); - } + revokeImpl(element.access_flags, element.database, element.table); else - { - if (element.database.empty()) - revoke(element.access_flags, checkCurrentDatabase(current_database), element.table, element.columns); - else - revoke(element.access_flags, element.database, element.table, element.columns); - } + revokeImpl(element.access_flags, element.database, element.table, element.columns); } -void AccessRights::revoke(const AccessRightsElements & elements, std::string_view current_database) +template +void AccessRights::revokeImpl(const AccessRightsElements & elements) { for (const auto & element : elements) - revoke(element, current_database); + revokeImpl(element); } +void AccessRights::revoke(const AccessFlags & flags) { revokeImpl(flags); } +void AccessRights::revoke(const AccessFlags & flags, const std::string_view & database) { revokeImpl(flags, database); } +void AccessRights::revoke(const AccessFlags & flags, const std::string_view & database, const std::string_view & table) { revokeImpl(flags, database, table); } +void AccessRights::revoke(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const std::string_view & column) { revokeImpl(flags, database, table, column); } +void AccessRights::revoke(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const std::vector & columns) { revokeImpl(flags, database, table, columns); } +void AccessRights::revoke(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const Strings & columns) { revokeImpl(flags, database, table, columns); } +void AccessRights::revoke(const AccessRightsElement & element) { revokeImpl(element); } +void AccessRights::revoke(const AccessRightsElements & elements) { revokeImpl(elements); } -AccessRightsElements AccessRights::getGrants() const -{ - AccessRightsElements grants; - getGrantsAndPartialRevokesImpl(&grants, nullptr); - return grants; -} - -AccessRightsElements AccessRights::getPartialRevokes() const -{ - AccessRightsElements partial_revokes; - getGrantsAndPartialRevokesImpl(nullptr, &partial_revokes); - return partial_revokes; -} - -AccessRights::GrantsAndPartialRevokes AccessRights::getGrantsAndPartialRevokes() const -{ - GrantsAndPartialRevokes res; - getGrantsAndPartialRevokesImpl(&res.grants, &res.revokes); - return res; -} - - -void AccessRights::getGrantsAndPartialRevokesImpl(AccessRightsElements * out_grants, AccessRightsElements * out_partial_revokes) const +void AccessRights::revokeGrantOption(const AccessFlags & flags) { revokeImpl(flags); } +void AccessRights::revokeGrantOption(const AccessFlags & flags, const std::string_view & database) { revokeImpl(flags, database); } +void AccessRights::revokeGrantOption(const AccessFlags & flags, const std::string_view & database, const std::string_view & table) { revokeImpl(flags, database, table); } +void AccessRights::revokeGrantOption(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const std::string_view & column) { revokeImpl(flags, database, table, column); } +void AccessRights::revokeGrantOption(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const std::vector & columns) { revokeImpl(flags, database, table, columns); } +void AccessRights::revokeGrantOption(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const Strings & columns) { revokeImpl(flags, database, table, columns); } +void AccessRights::revokeGrantOption(const AccessRightsElement & element) { revokeImpl(element); } +void AccessRights::revokeGrantOption(const AccessRightsElements & elements) { revokeImpl(elements); } + + +AccessRightsElementsWithOptions AccessRights::getElements() const { +#if 0 + logTree(); +#endif if (!root) - return; - auto global_access = root->access; - if (out_grants && global_access) - out_grants->push_back({global_access}); - if (root->children) - { - for (const auto & [db_name, db_node] : *root->children) - { - if (out_grants) - { - if (auto db_grants = db_node.access - global_access) - out_grants->push_back({db_grants, db_name}); - } - if (out_partial_revokes) - { - if (auto db_partial_revokes = global_access - db_node.access) - out_partial_revokes->push_back({db_partial_revokes, db_name}); - } - if (db_node.children) - { - for (const auto & [table_name, table_node] : *db_node.children) - { - if (out_grants) - { - if (auto table_grants = table_node.access - db_node.access) - out_grants->push_back({table_grants, db_name, table_name}); - } - if (out_partial_revokes) - { - if (auto table_partial_revokes = db_node.access - table_node.access) - out_partial_revokes->push_back({table_partial_revokes, db_name, table_name}); - } - if (table_node.children) - { - for (const auto & [column_name, column_node] : *table_node.children) - { - if (out_grants) - { - if (auto column_grants = column_node.access - table_node.access) - out_grants->push_back({column_grants, db_name, table_name, column_name}); - } - if (out_partial_revokes) - { - if (auto column_partial_revokes = table_node.access - column_node.access) - out_partial_revokes->push_back({column_partial_revokes, db_name, table_name, column_name}); - } - } - - } - } - } - } - } + return {}; + if (!root_with_grant_option) + return root->getElements().getResult(); + return Node::getElements(root.get(), root_with_grant_option.get()).getResult(); } String AccessRights::toString() const { - String res; - auto gr = getGrantsAndPartialRevokes(); - if (!gr.grants.empty()) - { - res += "GRANT "; - res += gr.grants.toString(); - } - if (!gr.revokes.empty()) - { - if (!res.empty()) - res += ", "; - res += "REVOKE "; - res += gr.revokes.toString(); - } - if (res.empty()) - res = "GRANT USAGE ON *.*"; - return res; + return getElements().toString(); } -template +template bool AccessRights::isGrantedImpl(const AccessFlags & flags, const Args &... args) const { - if (!root) - return flags.isEmpty(); - return root->isGranted(flags, args...); + auto helper = [&](const std::unique_ptr & root_node) -> bool + { + if (!root_node) + return flags.isEmpty(); + return root_node->isGranted(flags, args...); + }; + if constexpr (grant_option) + return helper(root_with_grant_option); + else + return helper(root); } -bool AccessRights::isGranted(const AccessFlags & flags) const { return isGrantedImpl(flags); } -bool AccessRights::isGranted(const AccessFlags & flags, const std::string_view & database) const { return isGrantedImpl(flags, database); } -bool AccessRights::isGranted(const AccessFlags & flags, const std::string_view & database, const std::string_view & table) const { return isGrantedImpl(flags, database, table); } -bool AccessRights::isGranted(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const std::string_view & column) const { return isGrantedImpl(flags, database, table, column); } -bool AccessRights::isGranted(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const std::vector & columns) const { return isGrantedImpl(flags, database, table, columns); } -bool AccessRights::isGranted(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const Strings & columns) const { return isGrantedImpl(flags, database, table, columns); } - -bool AccessRights::isGranted(const AccessRightsElement & element, std::string_view current_database) const +template +bool AccessRights::isGrantedImpl(const AccessRightsElement & element) const { if (element.any_database) - { - return isGranted(element.access_flags); - } + return isGrantedImpl(element.access_flags); else if (element.any_table) - { - if (element.database.empty()) - return isGranted(element.access_flags, checkCurrentDatabase(current_database)); - else - return isGranted(element.access_flags, element.database); - } + return isGrantedImpl(element.access_flags, element.database); else if (element.any_column) - { - if (element.database.empty()) - return isGranted(element.access_flags, checkCurrentDatabase(current_database), element.table); - else - return isGranted(element.access_flags, element.database, element.table); - } + return isGrantedImpl(element.access_flags, element.database, element.table); else - { - if (element.database.empty()) - return isGranted(element.access_flags, checkCurrentDatabase(current_database), element.table, element.columns); - else - return isGranted(element.access_flags, element.database, element.table, element.columns); - } + return isGrantedImpl(element.access_flags, element.database, element.table, element.columns); } -bool AccessRights::isGranted(const AccessRightsElements & elements, std::string_view current_database) const +template +bool AccessRights::isGrantedImpl(const AccessRightsElements & elements) const { for (const auto & element : elements) - if (!isGranted(element, current_database)) + if (!isGrantedImpl(element)) return false; return true; } +bool AccessRights::isGranted(const AccessFlags & flags) const { return isGrantedImpl(flags); } +bool AccessRights::isGranted(const AccessFlags & flags, const std::string_view & database) const { return isGrantedImpl(flags, database); } +bool AccessRights::isGranted(const AccessFlags & flags, const std::string_view & database, const std::string_view & table) const { return isGrantedImpl(flags, database, table); } +bool AccessRights::isGranted(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const std::string_view & column) const { return isGrantedImpl(flags, database, table, column); } +bool AccessRights::isGranted(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const std::vector & columns) const { return isGrantedImpl(flags, database, table, columns); } +bool AccessRights::isGranted(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const Strings & columns) const { return isGrantedImpl(flags, database, table, columns); } +bool AccessRights::isGranted(const AccessRightsElement & element) const { return isGrantedImpl(element); } +bool AccessRights::isGranted(const AccessRightsElements & elements) const { return isGrantedImpl(elements); } + +bool AccessRights::hasGrantOption(const AccessFlags & flags) const { return isGrantedImpl(flags); } +bool AccessRights::hasGrantOption(const AccessFlags & flags, const std::string_view & database) const { return isGrantedImpl(flags, database); } +bool AccessRights::hasGrantOption(const AccessFlags & flags, const std::string_view & database, const std::string_view & table) const { return isGrantedImpl(flags, database, table); } +bool AccessRights::hasGrantOption(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const std::string_view & column) const { return isGrantedImpl(flags, database, table, column); } +bool AccessRights::hasGrantOption(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const std::vector & columns) const { return isGrantedImpl(flags, database, table, columns); } +bool AccessRights::hasGrantOption(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const Strings & columns) const { return isGrantedImpl(flags, database, table, columns); } +bool AccessRights::hasGrantOption(const AccessRightsElement & element) const { return isGrantedImpl(element); } +bool AccessRights::hasGrantOption(const AccessRightsElements & elements) const { return isGrantedImpl(elements); } + bool operator ==(const AccessRights & left, const AccessRights & right) { - if (!left.root) - return !right.root; - if (!right.root) - return false; - return *left.root == *right.root; + auto helper = [](const std::unique_ptr & left_node, const std::unique_ptr & right_node) + { + if (!left_node) + return !right_node; + if (!right_node) + return false; + return *left_node == *right_node; + }; + return helper(left.root, right.root) && helper(left.root_with_grant_option, right.root_with_grant_option); } -void AccessRights::merge(const AccessRights & other) +void AccessRights::makeUnion(const AccessRights & other) { - if (!root) + auto helper = [](std::unique_ptr & root_node, const std::unique_ptr & other_root_node) { - *this = other; - return; - } - if (other.root) + if (!root_node) + { + if (other_root_node) + root_node = std::make_unique(*other_root_node); + return; + } + if (other_root_node) + { + root_node->makeUnion(*other_root_node, Helper::instance()); + if (!root_node->access && !root_node->children) + root_node = nullptr; + } + }; + helper(root, other.root); + helper(root_with_grant_option, other.root_with_grant_option); +} + + +void AccessRights::makeIntersection(const AccessRights & other) +{ + auto helper = [](std::unique_ptr & root_node, const std::unique_ptr & other_root_node) { - root->merge(*other.root, Helper::instance()); - if (!root->access && !root->children) - root = nullptr; - } + if (!root_node) + { + if (other_root_node) + root_node = std::make_unique(*other_root_node); + return; + } + if (other_root_node) + { + root_node->makeIntersection(*other_root_node, Helper::instance()); + if (!root_node->access && !root_node->children) + root_node = nullptr; + } + }; + helper(root, other.root); + helper(root_with_grant_option, other.root_with_grant_option); +} + + +AccessRights AccessRights::getFullAccess() +{ + AccessRights res; + res.grantWithGrantOption(AccessType::ALL); + return res; } @@ -791,7 +1068,11 @@ void AccessRights::logTree() const { auto * log = &Poco::Logger::get("AccessRights"); if (root) - root->logTree(log); + { + root->logTree(log, ""); + if (root_with_grant_option) + root->logTree(log, "go"); + } else LOG_TRACE(log, "Tree: NULL"); } diff --git a/src/Access/AccessRights.h b/src/Access/AccessRights.h index c32514e8feb..d1cd35f09c5 100644 --- a/src/Access/AccessRights.h +++ b/src/Access/AccessRights.h @@ -26,6 +26,12 @@ public: /// Revokes everything. It's the same as revoke(AccessType::ALL). void clear(); + /// Returns the information about all the access granted as a string. + String toString() const; + + /// Returns the information about all the access granted. + AccessRightsElementsWithOptions getElements() const; + /// Grants access on a specified database/table/column. /// Does nothing if the specified access has been already granted. void grant(const AccessFlags & flags); @@ -34,8 +40,17 @@ public: void grant(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const std::string_view & column); void grant(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const std::vector & columns); void grant(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const Strings & columns); - void grant(const AccessRightsElement & element, std::string_view current_database = {}); - void grant(const AccessRightsElements & elements, std::string_view current_database = {}); + void grant(const AccessRightsElement & element); + void grant(const AccessRightsElements & elements); + + void grantWithGrantOption(const AccessFlags & flags); + void grantWithGrantOption(const AccessFlags & flags, const std::string_view & database); + void grantWithGrantOption(const AccessFlags & flags, const std::string_view & database, const std::string_view & table); + void grantWithGrantOption(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const std::string_view & column); + void grantWithGrantOption(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const std::vector & columns); + void grantWithGrantOption(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const Strings & columns); + void grantWithGrantOption(const AccessRightsElement & element); + void grantWithGrantOption(const AccessRightsElements & elements); /// Revokes a specified access granted earlier on a specified database/table/column. /// For example, revoke(AccessType::ALL) revokes all grants at all, just like clear(); @@ -45,21 +60,17 @@ public: void revoke(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const std::string_view & column); void revoke(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const std::vector & columns); void revoke(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const Strings & columns); - void revoke(const AccessRightsElement & element, std::string_view current_database = {}); - void revoke(const AccessRightsElements & elements, std::string_view current_database = {}); + void revoke(const AccessRightsElement & element); + void revoke(const AccessRightsElements & elements); - /// Returns the information about all the access granted. - struct GrantsAndPartialRevokes - { - AccessRightsElements grants; - AccessRightsElements revokes; - }; - AccessRightsElements getGrants() const; - AccessRightsElements getPartialRevokes() const; - GrantsAndPartialRevokes getGrantsAndPartialRevokes() const; - - /// Returns the information about all the access granted as a string. - String toString() const; + void revokeGrantOption(const AccessFlags & flags); + void revokeGrantOption(const AccessFlags & flags, const std::string_view & database); + void revokeGrantOption(const AccessFlags & flags, const std::string_view & database, const std::string_view & table); + void revokeGrantOption(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const std::string_view & column); + void revokeGrantOption(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const std::vector & columns); + void revokeGrantOption(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const Strings & columns); + void revokeGrantOption(const AccessRightsElement & element); + void revokeGrantOption(const AccessRightsElements & elements); /// Whether a specified access granted. bool isGranted(const AccessFlags & flags) const; @@ -68,38 +79,62 @@ public: bool isGranted(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const std::string_view & column) const; bool isGranted(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const std::vector & columns) const; bool isGranted(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const Strings & columns) const; - bool isGranted(const AccessRightsElement & element, std::string_view current_database = {}) const; - bool isGranted(const AccessRightsElements & elements, std::string_view current_database = {}) const; + bool isGranted(const AccessRightsElement & element) const; + bool isGranted(const AccessRightsElements & elements) const; + + bool hasGrantOption(const AccessFlags & flags) const; + bool hasGrantOption(const AccessFlags & flags, const std::string_view & database) const; + bool hasGrantOption(const AccessFlags & flags, const std::string_view & database, const std::string_view & table) const; + bool hasGrantOption(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const std::string_view & column) const; + bool hasGrantOption(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const std::vector & columns) const; + bool hasGrantOption(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const Strings & columns) const; + bool hasGrantOption(const AccessRightsElement & element) const; + bool hasGrantOption(const AccessRightsElements & elements) const; + + /// Merges two sets of access rights together. + /// It's used to combine access rights from multiple roles. + void makeUnion(const AccessRights & other); + + void makeIntersection(const AccessRights & other); friend bool operator ==(const AccessRights & left, const AccessRights & right); friend bool operator !=(const AccessRights & left, const AccessRights & right) { return !(left == right); } - /// Merges two sets of access rights together. - /// It's used to combine access rights from multiple roles. - void merge(const AccessRights & other); + static AccessRights getFullAccess(); private: - template + template void grantImpl(const AccessFlags & flags, const Args &... args); - template + template + void grantImpl(const AccessRightsElement & element); + + template + void grantImpl(const AccessRightsElements & elements); + + template void revokeImpl(const AccessFlags & flags, const Args &... args); - template + template + void revokeImpl(const AccessRightsElement & element); + + template + void revokeImpl(const AccessRightsElements & elements); + + template bool isGrantedImpl(const AccessFlags & flags, const Args &... args) const; - bool isGrantedImpl(const AccessRightsElement & element, std::string_view current_database) const; - bool isGrantedImpl(const AccessRightsElements & elements, std::string_view current_database) const; + template + bool isGrantedImpl(const AccessRightsElement & element) const; - template - AccessFlags getAccessImpl(const Args &... args) const; - - void getGrantsAndPartialRevokesImpl(AccessRightsElements * grants, AccessRightsElements * partial_revokes) const; + template + bool isGrantedImpl(const AccessRightsElements & elements) const; void logTree() const; struct Node; std::unique_ptr root; + std::unique_ptr root_with_grant_option; }; } diff --git a/src/Access/AccessRightsElement.cpp b/src/Access/AccessRightsElement.cpp index db1ea5d3d5c..e69fb6d3b74 100644 --- a/src/Access/AccessRightsElement.cpp +++ b/src/Access/AccessRightsElement.cpp @@ -12,222 +12,158 @@ namespace DB { namespace { - size_t groupElements(AccessRightsElements & elements, size_t start) + using Kind = AccessRightsElementWithOptions::Kind; + + String formatOptions(bool grant_option, Kind kind, const String & inner_part) { - auto & start_element = elements[start]; - auto it = std::find_if(elements.begin() + start + 1, elements.end(), - [&](const AccessRightsElement & element) + if (kind == Kind::REVOKE) { - return (element.database != start_element.database) || - (element.any_database != start_element.any_database) || - (element.table != start_element.table) || - (element.any_table != start_element.any_table) || - (element.any_column != start_element.any_column); - }); - size_t end = it - elements.begin(); - - /// All the elements at indices from start to end here specify - /// the same database and table. - - if (start_element.any_column) - { - /// Easy case: the elements don't specify columns. - /// All we need is to combine the access flags. - for (size_t i = start + 1; i != end; ++i) - { - start_element.access_flags |= elements[i].access_flags; - elements[i].access_flags = {}; - } - return end; + if (grant_option) + return "REVOKE GRANT OPTION " + inner_part; + else + return "REVOKE " + inner_part; } - - /// Difficult case: the elements specify columns. - /// We have to find groups of columns with common access flags. - for (size_t i = start; i != end; ++i) + else { - if (!elements[i].access_flags) - continue; - - AccessFlags common_flags = elements[i].access_flags; - size_t num_elements_with_common_flags = 1; - for (size_t j = i + 1; j != end; ++j) - { - auto new_common_flags = common_flags & elements[j].access_flags; - if (new_common_flags) - { - common_flags = new_common_flags; - ++num_elements_with_common_flags; - } - } - - if (num_elements_with_common_flags == 1) - continue; - - if (elements[i].access_flags != common_flags) - { - elements.insert(elements.begin() + i + 1, elements[i]); - elements[i].access_flags = common_flags; - elements[i].columns.clear(); - ++end; - } - - for (size_t j = i + 1; j != end; ++j) - { - if ((elements[j].access_flags & common_flags) == common_flags) - { - boost::range::push_back(elements[i].columns, elements[j].columns); - elements[j].access_flags -= common_flags; - } - } + if (grant_option) + return "GRANT " + inner_part + " WITH GRANT OPTION"; + else + return "GRANT " + inner_part; } - - return end; } - /// Tries to combine elements to decrease their number. - void groupElements(AccessRightsElements & elements) + + String formatONClause(const String & database, bool any_database, const String & table, bool any_table) { - if (!boost::range::is_sorted(elements)) - boost::range::sort(elements); /// Algorithm in groupElement() requires elements to be sorted. - for (size_t start = 0; start != elements.size();) - start = groupElements(elements, start); + String msg = "ON "; + + if (any_database) + msg += "*."; + else if (!database.empty()) + msg += backQuoteIfNeed(database) + "."; + + if (any_table) + msg += "*"; + else + msg += backQuoteIfNeed(table); + return msg; } - /// Removes unnecessary elements, sorts elements and makes them unique. - void sortElementsAndMakeUnique(AccessRightsElements & elements) + + String formatAccessFlagsWithColumns(const AccessFlags & access_flags, const Strings & columns, bool any_column) { - /// Remove empty elements. - boost::range::remove_erase_if(elements, [](const AccessRightsElement & element) + String columns_in_parentheses; + if (!any_column) { - return !element.access_flags || (!element.any_column && element.columns.empty()); - }); - - /// Sort columns and make them unique. - for (auto & element : elements) - { - if (element.any_column) - continue; - - if (!boost::range::is_sorted(element.columns)) - boost::range::sort(element.columns); - element.columns.erase(std::unique(element.columns.begin(), element.columns.end()), element.columns.end()); + if (columns.empty()) + return "USAGE"; + for (const auto & column : columns) + { + columns_in_parentheses += columns_in_parentheses.empty() ? "(" : ", "; + columns_in_parentheses += backQuoteIfNeed(column); + } + columns_in_parentheses += ")"; } - /// Sort elements themselves. - boost::range::sort(elements); - elements.erase(std::unique(elements.begin(), elements.end()), elements.end()); + auto keywords = access_flags.toKeywords(); + if (keywords.empty()) + return "USAGE"; + + String msg; + for (const std::string_view & keyword : keywords) + { + if (!msg.empty()) + msg += ", "; + msg += String{keyword} + columns_in_parentheses; + } + return msg; } } -void AccessRightsElement::setDatabase(const String & new_database) -{ - database = new_database; - any_database = false; -} - - -void AccessRightsElement::replaceEmptyDatabase(const String & new_database) -{ - if (isEmptyDatabase()) - setDatabase(new_database); -} - - -bool AccessRightsElement::isEmptyDatabase() const -{ - return !any_database && database.empty(); -} - String AccessRightsElement::toString() const { - String msg = toStringWithoutON(); - msg += " ON "; - - if (any_database) - msg += "*."; - else if (!database.empty()) - msg += backQuoteIfNeed(database) + "."; - - if (any_table) - msg += "*"; - else - msg += backQuoteIfNeed(table); - return msg; + return formatAccessFlagsWithColumns(access_flags, columns, any_column) + " " + formatONClause(database, any_database, table, any_table); } -String AccessRightsElement::toStringWithoutON() const +String AccessRightsElementWithOptions::toString() const { - String columns_in_parentheses; - if (!any_column) - { - if (columns.empty()) - return "USAGE"; - for (const auto & column : columns) - { - columns_in_parentheses += columns_in_parentheses.empty() ? "(" : ", "; - columns_in_parentheses += backQuoteIfNeed(column); - } - columns_in_parentheses += ")"; - } - - auto keywords = access_flags.toKeywords(); - if (keywords.empty()) - return "USAGE"; - - String msg; - for (const std::string_view & keyword : keywords) - { - if (!msg.empty()) - msg += ", "; - msg += String{keyword} + columns_in_parentheses; - } - return msg; + return formatOptions(grant_option, kind, AccessRightsElement::toString()); } - -void AccessRightsElements::replaceEmptyDatabase(const String & new_database) +String AccessRightsElements::toString() const { - for (auto & element : *this) - element.replaceEmptyDatabase(new_database); -} - - -String AccessRightsElements::toString() -{ - normalize(); - if (empty()) return "USAGE ON *.*"; - String msg; - bool need_comma = false; + String res; + String inner_part; + for (size_t i = 0; i != size(); ++i) { const auto & element = (*this)[i]; - if (std::exchange(need_comma, true)) - msg += ", "; - bool next_element_on_same_db_and_table = false; + + if (!inner_part.empty()) + inner_part += ", "; + inner_part += formatAccessFlagsWithColumns(element.access_flags, element.columns, element.any_column); + + bool next_element_uses_same_table = false; if (i != size() - 1) { const auto & next_element = (*this)[i + 1]; - if ((element.database == next_element.database) && (element.any_database == next_element.any_database) - && (element.table == next_element.table) && (element.any_table == next_element.any_table)) - next_element_on_same_db_and_table = true; + if (element.sameDatabaseAndTable(next_element)) + next_element_uses_same_table = true; + } + + if (!next_element_uses_same_table) + { + if (!res.empty()) + res += ", "; + res += inner_part + " " + formatONClause(element.database, element.any_database, element.table, element.any_table); + inner_part.clear(); } - if (next_element_on_same_db_and_table) - msg += element.toStringWithoutON(); - else - msg += element.toString(); } - return msg; + + return res; } - -void AccessRightsElements::normalize() +String AccessRightsElementsWithOptions::toString() const { - groupElements(*this); - sortElementsAndMakeUnique(*this); + if (empty()) + return "GRANT USAGE ON *.*"; + + String res; + String inner_part; + + for (size_t i = 0; i != size(); ++i) + { + const auto & element = (*this)[i]; + + if (!inner_part.empty()) + inner_part += ", "; + inner_part += formatAccessFlagsWithColumns(element.access_flags, element.columns, element.any_column); + + bool next_element_uses_same_mode_and_table = false; + if (i != size() - 1) + { + const auto & next_element = (*this)[i + 1]; + if (element.sameDatabaseAndTable(next_element) && element.sameOptions(next_element)) + next_element_uses_same_mode_and_table = true; + } + + if (!next_element_uses_same_mode_and_table) + { + if (!res.empty()) + res += ", "; + res += formatOptions( + element.grant_option, + element.kind, + inner_part + " " + formatONClause(element.database, element.any_database, element.table, element.any_table)); + inner_part.clear(); + } + } + + return res; } } diff --git a/src/Access/AccessRightsElement.h b/src/Access/AccessRightsElement.h index 70eb95c2d17..f9f7c433308 100644 --- a/src/Access/AccessRightsElement.h +++ b/src/Access/AccessRightsElement.h @@ -71,26 +71,48 @@ struct AccessRightsElement { } - auto toTuple() const { return std::tie(access_flags, database, any_database, table, any_table, columns, any_column); } + auto toTuple() const { return std::tie(access_flags, any_database, database, any_table, table, any_column, columns); } friend bool operator==(const AccessRightsElement & left, const AccessRightsElement & right) { return left.toTuple() == right.toTuple(); } - friend bool operator!=(const AccessRightsElement & left, const AccessRightsElement & right) { return left.toTuple() != right.toTuple(); } - friend bool operator<(const AccessRightsElement & left, const AccessRightsElement & right) { return left.toTuple() < right.toTuple(); } - friend bool operator>(const AccessRightsElement & left, const AccessRightsElement & right) { return left.toTuple() > right.toTuple(); } - friend bool operator<=(const AccessRightsElement & left, const AccessRightsElement & right) { return left.toTuple() <= right.toTuple(); } - friend bool operator>=(const AccessRightsElement & left, const AccessRightsElement & right) { return left.toTuple() >= right.toTuple(); } + friend bool operator!=(const AccessRightsElement & left, const AccessRightsElement & right) { return !(left == right); } - /// Sets the database. - void setDatabase(const String & new_database); + bool sameDatabaseAndTable(const AccessRightsElement & other) const + { + return (database == other.database) && (any_database == other.any_database) && (table == other.table) + && (any_table == other.any_table); + } + + bool isEmptyDatabase() const { return !any_database && database.empty(); } /// If the database is empty, replaces it with `new_database`. Otherwise does nothing. void replaceEmptyDatabase(const String & new_database); - bool isEmptyDatabase() const; - /// Returns a human-readable representation like "SELECT, UPDATE(x, y) ON db.table". - /// The returned string isn't prefixed with the "GRANT" keyword. String toString() const; - String toStringWithoutON() const; +}; + + +struct AccessRightsElementWithOptions : public AccessRightsElement +{ + bool grant_option = false; + + enum class Kind + { + GRANT, + REVOKE, + }; + Kind kind = Kind::GRANT; + + bool sameOptions(const AccessRightsElementWithOptions & other) const + { + return (grant_option == other.grant_option) && (kind == other.kind); + } + + auto toTuple() const { return std::tie(access_flags, any_database, database, any_table, table, any_column, columns, grant_option, kind); } + friend bool operator==(const AccessRightsElementWithOptions & left, const AccessRightsElementWithOptions & right) { return left.toTuple() == right.toTuple(); } + friend bool operator!=(const AccessRightsElementWithOptions & left, const AccessRightsElementWithOptions & right) { return !(left == right); } + + /// Returns a human-readable representation like "GRANT SELECT, UPDATE(x, y) ON db.table". + String toString() const; }; @@ -101,13 +123,38 @@ public: /// Replaces the empty database with `new_database`. void replaceEmptyDatabase(const String & new_database); - /// Returns a human-readable representation like "SELECT, UPDATE(x, y) ON db.table". - /// The returned string isn't prefixed with the "GRANT" keyword. - String toString() const { return AccessRightsElements(*this).toString(); } - String toString(); - - /// Reorder and group elements to show them in more readable form. - void normalize(); + /// Returns a human-readable representation like "GRANT SELECT, UPDATE(x, y) ON db.table". + String toString() const; }; + +class AccessRightsElementsWithOptions : public std::vector +{ +public: + /// Replaces the empty database with `new_database`. + void replaceEmptyDatabase(const String & new_database); + + /// Returns a human-readable representation like "GRANT SELECT, UPDATE(x, y) ON db.table". + String toString() const; +}; + + +inline void AccessRightsElement::replaceEmptyDatabase(const String & new_database) +{ + if (isEmptyDatabase()) + database = new_database; +} + +inline void AccessRightsElements::replaceEmptyDatabase(const String & new_database) +{ + for (auto & element : *this) + element.replaceEmptyDatabase(new_database); +} + +inline void AccessRightsElementsWithOptions::replaceEmptyDatabase(const String & new_database) +{ + for (auto & element : *this) + element.replaceEmptyDatabase(new_database); +} + } diff --git a/src/Access/ContextAccess.cpp b/src/Access/ContextAccess.cpp index e7bd0f8287d..c53d073e43d 100644 --- a/src/Access/ContextAccess.cpp +++ b/src/Access/ContextAccess.cpp @@ -15,8 +15,6 @@ #include #include #include -#include -#include #include #include @@ -35,61 +33,66 @@ namespace ErrorCodes namespace { - enum CheckAccessRightsMode + std::shared_ptr mixAccessRightsFromUserAndRoles(const User & user, const EnabledRolesInfo & roles_info) { - RETURN_FALSE_IF_ACCESS_DENIED, - LOG_WARNING_IF_ACCESS_DENIED, - THROW_IF_ACCESS_DENIED, - }; - - - String formatSkippedMessage() - { - return ""; + auto res = std::make_shared(user.access); + res->makeUnion(roles_info.access); + return res; } - String formatSkippedMessage(const std::string_view & database) + std::shared_ptr applyParamsToAccessRights(const AccessRights & access, const ContextAccessParams & params) { - return ". Skipped database " + backQuoteIfNeed(database); - } + auto res = std::make_shared(access); - String formatSkippedMessage(const std::string_view & database, const std::string_view & table) - { - String str = ". Skipped table "; - if (!database.empty()) - str += backQuoteIfNeed(database) + "."; - str += backQuoteIfNeed(table); - return str; - } + static const AccessFlags table_ddl = AccessType::CREATE_DATABASE | AccessType::CREATE_TABLE | AccessType::CREATE_VIEW + | AccessType::ALTER_TABLE | AccessType::ALTER_VIEW | AccessType::DROP_DATABASE | AccessType::DROP_TABLE | AccessType::DROP_VIEW + | AccessType::TRUNCATE; - String formatSkippedMessage(const std::string_view & database, const std::string_view & table, const std::string_view & column) - { - String str = ". Skipped column " + backQuoteIfNeed(column) + " ON "; - if (!database.empty()) - str += backQuoteIfNeed(database) + "."; - str += backQuoteIfNeed(table); - return str; - } + static const AccessFlags dictionary_ddl = AccessType::CREATE_DICTIONARY | AccessType::DROP_DICTIONARY; + static const AccessFlags table_and_dictionary_ddl = table_ddl | dictionary_ddl; + static const AccessFlags write_table_access = AccessType::INSERT | AccessType::OPTIMIZE; + static const AccessFlags write_dcl_access = AccessType::ACCESS_MANAGEMENT - AccessType::SHOW_ACCESS; - template - String formatSkippedMessage(const std::string_view & database, const std::string_view & table, const std::vector & columns) - { - if (columns.size() == 1) - return formatSkippedMessage(database, table, columns[0]); + if (params.readonly) + res->revoke(write_table_access | table_and_dictionary_ddl | write_dcl_access | AccessType::SYSTEM | AccessType::KILL_QUERY); - String str = ". Skipped columns "; - bool need_comma = false; - for (const auto & column : columns) + if (params.readonly == 1) { - if (std::exchange(need_comma, true)) - str += ", "; - str += backQuoteIfNeed(column); + /// Table functions are forbidden in readonly mode. + /// For example, for readonly = 2 - allowed. + res->revoke(AccessType::CREATE_TEMPORARY_TABLE); } - str += " ON "; - if (!database.empty()) - str += backQuoteIfNeed(database) + "."; - str += backQuoteIfNeed(table); - return str; + + if (!params.allow_ddl) + res->revoke(table_and_dictionary_ddl); + + if (!params.allow_introspection) + res->revoke(AccessType::INTROSPECTION); + + /// Anyone has access to the "system" database. + res->grant(AccessType::SELECT, DatabaseCatalog::SYSTEM_DATABASE); + + if (params.readonly != 1) + { + /// User has access to temporary or external table if such table was resolved in session or query context + res->grant(AccessFlags::allTableFlags() | AccessFlags::allColumnFlags(), DatabaseCatalog::TEMPORARY_DATABASE); + } + + if (params.readonly) + { + /// No grant option in readonly mode. + res->revokeGrantOption(AccessType::ALL); + } + + return res; + } + + + std::array to_array(const UUID & id) + { + std::array ids; + ids[0] = id; + return ids; } } @@ -116,8 +119,12 @@ void ContextAccess::setUser(const UserPtr & user_) const if (!user) { /// User has been dropped. - auto nothing_granted = boost::make_shared(); - boost::range::fill(result_access, nothing_granted); + auto nothing_granted = std::make_shared(); + access = nothing_granted; + access_without_readonly = nothing_granted; + access_with_allow_ddl = nothing_granted; + access_with_allow_introspection = nothing_granted; + access_from_user_and_roles = nothing_granted; subscription_for_user_change = {}; subscription_for_roles_changes = {}; enabled_roles = nullptr; @@ -169,10 +176,33 @@ void ContextAccess::setRolesInfo(const std::shared_ptr & { assert(roles_info_); roles_info = roles_info_; - boost::range::fill(result_access, nullptr /* need recalculate */); enabled_row_policies = manager->getEnabledRowPolicies(*params.user_id, roles_info->enabled_roles); enabled_quota = manager->getEnabledQuota(*params.user_id, user_name, roles_info->enabled_roles, params.address, params.quota_key); enabled_settings = manager->getEnabledSettings(*params.user_id, user->settings, roles_info->enabled_roles, roles_info->settings_from_enabled_roles); + calculateAccessRights(); +} + + +void ContextAccess::calculateAccessRights() const +{ + access_from_user_and_roles = mixAccessRightsFromUserAndRoles(*user, *roles_info); + access = applyParamsToAccessRights(*access_from_user_and_roles, params); + + access_without_readonly = nullptr; + access_with_allow_ddl = nullptr; + access_with_allow_introspection = nullptr; + + if (trace_log) + { + if (roles_info && !roles_info->getCurrentRolesNames().empty()) + { + LOG_TRACE(trace_log, "Current_roles: {}, enabled_roles: {}", + boost::algorithm::join(roles_info->getCurrentRolesNames(), ", "), + boost::algorithm::join(roles_info->getEnabledRolesNames(), ", ")); + } + LOG_TRACE(trace_log, "Settings: readonly={}, allow_ddl={}, allow_introspection_functions={}", params.readonly, params.allow_ddl, params.allow_introspection); + LOG_TRACE(trace_log, "List of all grants: {}", access->toString()); + } } @@ -193,284 +223,6 @@ bool ContextAccess::isClientHostAllowed() const } -template -bool ContextAccess::calculateResultAccessAndCheck(Poco::Logger * log_, const AccessFlags & flags, const Args &... args) const -{ - auto access = calculateResultAccess(grant_option); - bool is_granted = access->isGranted(flags, args...); - - if (trace_log) - LOG_TRACE(trace_log, "Access {}: {}", (is_granted ? "granted" : "denied"), (AccessRightsElement{flags, args...}.toString())); - - if (is_granted) - return true; - - if constexpr (mode == RETURN_FALSE_IF_ACCESS_DENIED) - return false; - - if constexpr (mode == LOG_WARNING_IF_ACCESS_DENIED) - { - if (!log_) - return false; - } - - auto show_error = [&](const String & msg, [[maybe_unused]] int error_code) - { - if constexpr (mode == THROW_IF_ACCESS_DENIED) - throw Exception(user_name + ": " + msg, error_code); - else if constexpr (mode == LOG_WARNING_IF_ACCESS_DENIED) - LOG_WARNING(log_, "{}: {}{}", user_name, msg, formatSkippedMessage(args...)); - }; - - if (!user) - { - show_error("User has been dropped", ErrorCodes::UNKNOWN_USER); - } - else if (grant_option && calculateResultAccess(false, params.readonly, params.allow_ddl, params.allow_introspection)->isGranted(flags, args...)) - { - show_error( - "Not enough privileges. " - "The required privileges have been granted, but without grant option. " - "To execute this query it's necessary to have the grant " - + AccessRightsElement{flags, args...}.toString() + " WITH GRANT OPTION", - ErrorCodes::ACCESS_DENIED); - } - else if (params.readonly && calculateResultAccess(false, false, params.allow_ddl, params.allow_introspection)->isGranted(flags, args...)) - { - if (params.interface == ClientInfo::Interface::HTTP && params.http_method == ClientInfo::HTTPMethod::GET) - show_error( - "Cannot execute query in readonly mode. " - "For queries over HTTP, method GET implies readonly. You should use method POST for modifying queries", - ErrorCodes::READONLY); - else - show_error("Cannot execute query in readonly mode", ErrorCodes::READONLY); - } - else if (!params.allow_ddl && calculateResultAccess(false, params.readonly, true, params.allow_introspection)->isGranted(flags, args...)) - { - show_error("Cannot execute query. DDL queries are prohibited for the user", ErrorCodes::QUERY_IS_PROHIBITED); - } - else if (!params.allow_introspection && calculateResultAccess(false, params.readonly, params.allow_ddl, true)->isGranted(flags, args...)) - { - show_error("Introspection functions are disabled, because setting 'allow_introspection_functions' is set to 0", ErrorCodes::FUNCTION_NOT_ALLOWED); - } - else - { - show_error( - "Not enough privileges. To execute this query it's necessary to have the grant " - + AccessRightsElement{flags, args...}.toString() + (grant_option ? " WITH GRANT OPTION" : ""), - ErrorCodes::ACCESS_DENIED); - } - - return false; -} - - -template -bool ContextAccess::checkAccessImpl(Poco::Logger * log_, const AccessFlags & flags) const -{ - return calculateResultAccessAndCheck(log_, flags); -} - -template -bool ContextAccess::checkAccessImpl(Poco::Logger * log_, const AccessFlags & flags, const std::string_view & database, const Args &... args) const -{ - if (database.empty()) - return calculateResultAccessAndCheck(log_, flags, params.current_database, args...); - else - return calculateResultAccessAndCheck(log_, flags, database, args...); -} - - -template -bool ContextAccess::checkAccessImpl(Poco::Logger * log_, const AccessRightsElement & element) const -{ - if (element.any_database) - { - return checkAccessImpl(log_, element.access_flags); - } - else if (element.any_table) - { - return checkAccessImpl(log_, element.access_flags, element.database); - } - else if (element.any_column) - { - return checkAccessImpl(log_, element.access_flags, element.database, element.table); - } - else - { - return checkAccessImpl(log_, element.access_flags, element.database, element.table, element.columns); - } -} - - -template -bool ContextAccess::checkAccessImpl(Poco::Logger * log_, const AccessRightsElements & elements) const -{ - for (const auto & element : elements) - if (!checkAccessImpl(log_, element)) - return false; - return true; -} - - -void ContextAccess::checkAccess(const AccessFlags & flags) const { checkAccessImpl(nullptr, flags); } -void ContextAccess::checkAccess(const AccessFlags & flags, const std::string_view & database) const { checkAccessImpl(nullptr, flags, database); } -void ContextAccess::checkAccess(const AccessFlags & flags, const std::string_view & database, const std::string_view & table) const { checkAccessImpl(nullptr, flags, database, table); } -void ContextAccess::checkAccess(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const std::string_view & column) const { checkAccessImpl(nullptr, flags, database, table, column); } -void ContextAccess::checkAccess(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const std::vector & columns) const { checkAccessImpl(nullptr, flags, database, table, columns); } -void ContextAccess::checkAccess(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const Strings & columns) const { checkAccessImpl(nullptr, flags, database, table, columns); } -void ContextAccess::checkAccess(const AccessRightsElement & element) const { checkAccessImpl(nullptr, element); } -void ContextAccess::checkAccess(const AccessRightsElements & elements) const { checkAccessImpl(nullptr, elements); } - -bool ContextAccess::isGranted(const AccessFlags & flags) const { return checkAccessImpl(nullptr, flags); } -bool ContextAccess::isGranted(const AccessFlags & flags, const std::string_view & database) const { return checkAccessImpl(nullptr, flags, database); } -bool ContextAccess::isGranted(const AccessFlags & flags, const std::string_view & database, const std::string_view & table) const { return checkAccessImpl(nullptr, flags, database, table); } -bool ContextAccess::isGranted(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const std::string_view & column) const { return checkAccessImpl(nullptr, flags, database, table, column); } -bool ContextAccess::isGranted(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const std::vector & columns) const { return checkAccessImpl(nullptr, flags, database, table, columns); } -bool ContextAccess::isGranted(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const Strings & columns) const { return checkAccessImpl(nullptr, flags, database, table, columns); } -bool ContextAccess::isGranted(const AccessRightsElement & element) const { return checkAccessImpl(nullptr, element); } -bool ContextAccess::isGranted(const AccessRightsElements & elements) const { return checkAccessImpl(nullptr, elements); } - -bool ContextAccess::isGranted(Poco::Logger * log_, const AccessFlags & flags) const { return checkAccessImpl(log_, flags); } -bool ContextAccess::isGranted(Poco::Logger * log_, const AccessFlags & flags, const std::string_view & database) const { return checkAccessImpl(log_, flags, database); } -bool ContextAccess::isGranted(Poco::Logger * log_, const AccessFlags & flags, const std::string_view & database, const std::string_view & table) const { return checkAccessImpl(log_, flags, database, table); } -bool ContextAccess::isGranted(Poco::Logger * log_, const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const std::string_view & column) const { return checkAccessImpl(log_, flags, database, table, column); } -bool ContextAccess::isGranted(Poco::Logger * log_, const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const std::vector & columns) const { return checkAccessImpl(log_, flags, database, table, columns); } -bool ContextAccess::isGranted(Poco::Logger * log_, const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const Strings & columns) const { return checkAccessImpl(log_, flags, database, table, columns); } -bool ContextAccess::isGranted(Poco::Logger * log_, const AccessRightsElement & element) const { return checkAccessImpl(log_, element); } -bool ContextAccess::isGranted(Poco::Logger * log_, const AccessRightsElements & elements) const { return checkAccessImpl(log_, elements); } - -void ContextAccess::checkGrantOption(const AccessFlags & flags) const { checkAccessImpl(nullptr, flags); } -void ContextAccess::checkGrantOption(const AccessFlags & flags, const std::string_view & database) const { checkAccessImpl(nullptr, flags, database); } -void ContextAccess::checkGrantOption(const AccessFlags & flags, const std::string_view & database, const std::string_view & table) const { checkAccessImpl(nullptr, flags, database, table); } -void ContextAccess::checkGrantOption(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const std::string_view & column) const { checkAccessImpl(nullptr, flags, database, table, column); } -void ContextAccess::checkGrantOption(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const std::vector & columns) const { checkAccessImpl(nullptr, flags, database, table, columns); } -void ContextAccess::checkGrantOption(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const Strings & columns) const { checkAccessImpl(nullptr, flags, database, table, columns); } -void ContextAccess::checkGrantOption(const AccessRightsElement & element) const { checkAccessImpl(nullptr, element); } -void ContextAccess::checkGrantOption(const AccessRightsElements & elements) const { checkAccessImpl(nullptr, elements); } - - -void ContextAccess::checkAdminOption(const UUID & role_id) const -{ - if (isGranted(AccessType::ROLE_ADMIN)) - return; - - auto info = getRolesInfo(); - if (info && info->enabled_roles_with_admin_option.count(role_id)) - return; - - if (!user) - throw Exception(user_name + ": User has been dropped", ErrorCodes::UNKNOWN_USER); - - std::optional role_name = manager->readName(role_id); - if (!role_name) - role_name = "ID {" + toString(role_id) + "}"; - throw Exception( - getUserName() + ": Not enough privileges. To execute this query it's necessary to have the grant " + backQuoteIfNeed(*role_name) - + " WITH ADMIN OPTION ", - ErrorCodes::ACCESS_DENIED); -} - - -boost::shared_ptr ContextAccess::calculateResultAccess(bool grant_option) const -{ - return calculateResultAccess(grant_option, params.readonly, params.allow_ddl, params.allow_introspection); -} - - -boost::shared_ptr ContextAccess::calculateResultAccess(bool grant_option, UInt64 readonly_, bool allow_ddl_, bool allow_introspection_) const -{ - size_t index = static_cast(readonly_ != params.readonly) - + static_cast(allow_ddl_ != params.allow_ddl) * 2 + - + static_cast(allow_introspection_ != params.allow_introspection) * 3 - + static_cast(grant_option) * 4; - assert(index < std::size(result_access)); - auto res = result_access[index].load(); - if (res) - return res; - - std::lock_guard lock{mutex}; - res = result_access[index].load(); - if (res) - return res; - - auto merged_access = boost::make_shared(); - - if (grant_option) - { - *merged_access = user->access.access_with_grant_option; - if (roles_info) - merged_access->merge(roles_info->access_with_grant_option); - } - else - { - *merged_access = user->access.access; - if (roles_info) - merged_access->merge(roles_info->access); - } - - static const AccessFlags table_ddl = AccessType::CREATE_DATABASE | AccessType::CREATE_TABLE | AccessType::CREATE_VIEW - | AccessType::ALTER_TABLE | AccessType::ALTER_VIEW | AccessType::DROP_DATABASE | AccessType::DROP_TABLE | AccessType::DROP_VIEW - | AccessType::TRUNCATE; - - static const AccessFlags dictionary_ddl = AccessType::CREATE_DICTIONARY | AccessType::DROP_DICTIONARY; - static const AccessFlags table_and_dictionary_ddl = table_ddl | dictionary_ddl; - static const AccessFlags write_table_access = AccessType::INSERT | AccessType::OPTIMIZE; - static const AccessFlags write_dcl_access = AccessType::ACCESS_MANAGEMENT - AccessType::SHOW_ACCESS; - - if (readonly_) - merged_access->revoke(write_table_access | table_and_dictionary_ddl | write_dcl_access | AccessType::SYSTEM | AccessType::KILL_QUERY); - - if (readonly_ == 1) - { - /// Table functions are forbidden in readonly mode. - /// For example, for readonly = 2 - allowed. - merged_access->revoke(AccessType::CREATE_TEMPORARY_TABLE); - } - - if (!allow_ddl_) - merged_access->revoke(table_and_dictionary_ddl); - - if (!allow_introspection_ && !grant_option) - merged_access->revoke(AccessType::INTROSPECTION); - - /// Anyone has access to the "system" database. - merged_access->grant(AccessType::SELECT, DatabaseCatalog::SYSTEM_DATABASE); - - if (readonly_ != 1) - { - /// User has access to temporary or external table if such table was resolved in session or query context - merged_access->grant(AccessFlags::allTableFlags() | AccessFlags::allColumnFlags(), DatabaseCatalog::TEMPORARY_DATABASE); - } - - if (readonly_ && grant_option) - { - /// No grant option in readonly mode. - merged_access->revoke(AccessType::ALL); - } - - if (trace_log && (params.readonly == readonly_) && (params.allow_ddl == allow_ddl_) && (params.allow_introspection == allow_introspection_)) - { - if (grant_option) - LOG_TRACE(trace_log, "List of all grants: {} WITH GRANT OPTION", merged_access->toString()); - else - LOG_TRACE(trace_log, "List of all grants: {}", merged_access->toString()); - - if (roles_info && !roles_info->getCurrentRolesNames().empty()) - { - LOG_TRACE(trace_log, "Current_roles: {}, enabled_roles: {}", - boost::algorithm::join(roles_info->getCurrentRolesNames(), ", "), - boost::algorithm::join(roles_info->getEnabledRolesNames(), ", ")); - } - LOG_TRACE(trace_log, "Settings: readonly={}, allow_ddl={}, allow_introspection_functions={}", readonly_, allow_ddl_, allow_introspection_); - } - - res = std::move(merged_access); - result_access[index].store(res); - return res; -} - - UserPtr ContextAccess::getUser() const { std::lock_guard lock{mutex}; @@ -520,9 +272,7 @@ std::shared_ptr ContextAccess::getFullAccess() static const std::shared_ptr res = [] { auto full_access = std::shared_ptr(new ContextAccess); - auto everything_granted = boost::make_shared(); - everything_granted->grant(AccessType::ALL); - boost::range::fill(full_access->result_access, everything_granted); + full_access->access = std::make_shared(AccessRights::getFullAccess()); full_access->enabled_quota = EnabledQuota::getUnlimitedQuota(); return full_access; }(); @@ -543,4 +293,284 @@ std::shared_ptr ContextAccess::getSettingsConstraints return enabled_settings ? enabled_settings->getConstraints() : nullptr; } + +std::shared_ptr ContextAccess::getAccess() const +{ + std::lock_guard lock{mutex}; + return access; +} + + +template +bool ContextAccess::isGrantedImpl2(const AccessFlags & flags, const Args &... args) const +{ + bool access_granted; + if constexpr (grant_option) + access_granted = getAccess()->hasGrantOption(flags, args...); + else + access_granted = getAccess()->isGranted(flags, args...); + + if (trace_log) + LOG_TRACE(trace_log, "Access {}: {}{}", (access_granted ? "granted" : "denied"), (AccessRightsElement{flags, args...}.toString()), + (grant_option ? " WITH GRANT OPTION" : "")); + + return access_granted; +} + +template +bool ContextAccess::isGrantedImpl(const AccessFlags & flags) const +{ + return isGrantedImpl2(flags); +} + +template +bool ContextAccess::isGrantedImpl(const AccessFlags & flags, const std::string_view & database, const Args &... args) const +{ + return isGrantedImpl2(flags, database.empty() ? params.current_database : database, args...); +} + +template +bool ContextAccess::isGrantedImpl(const AccessRightsElement & element) const +{ + if (element.any_database) + return isGrantedImpl(element.access_flags); + else if (element.any_table) + return isGrantedImpl(element.access_flags, element.database); + else if (element.any_column) + return isGrantedImpl(element.access_flags, element.database, element.table); + else + return isGrantedImpl(element.access_flags, element.database, element.table, element.columns); +} + +template +bool ContextAccess::isGrantedImpl(const AccessRightsElements & elements) const +{ + for (const auto & element : elements) + if (!isGrantedImpl(element)) + return false; + return true; +} + +bool ContextAccess::isGranted(const AccessFlags & flags) const { return isGrantedImpl(flags); } +bool ContextAccess::isGranted(const AccessFlags & flags, const std::string_view & database) const { return isGrantedImpl(flags, database); } +bool ContextAccess::isGranted(const AccessFlags & flags, const std::string_view & database, const std::string_view & table) const { return isGrantedImpl(flags, database, table); } +bool ContextAccess::isGranted(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const std::string_view & column) const { return isGrantedImpl(flags, database, table, column); } +bool ContextAccess::isGranted(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const std::vector & columns) const { return isGrantedImpl(flags, database, table, columns); } +bool ContextAccess::isGranted(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const Strings & columns) const { return isGrantedImpl(flags, database, table, columns); } +bool ContextAccess::isGranted(const AccessRightsElement & element) const { return isGrantedImpl(element); } +bool ContextAccess::isGranted(const AccessRightsElements & elements) const { return isGrantedImpl(elements); } + +bool ContextAccess::hasGrantOption(const AccessFlags & flags) const { return isGrantedImpl(flags); } +bool ContextAccess::hasGrantOption(const AccessFlags & flags, const std::string_view & database) const { return isGrantedImpl(flags, database); } +bool ContextAccess::hasGrantOption(const AccessFlags & flags, const std::string_view & database, const std::string_view & table) const { return isGrantedImpl(flags, database, table); } +bool ContextAccess::hasGrantOption(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const std::string_view & column) const { return isGrantedImpl(flags, database, table, column); } +bool ContextAccess::hasGrantOption(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const std::vector & columns) const { return isGrantedImpl(flags, database, table, columns); } +bool ContextAccess::hasGrantOption(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const Strings & columns) const { return isGrantedImpl(flags, database, table, columns); } +bool ContextAccess::hasGrantOption(const AccessRightsElement & element) const { return isGrantedImpl(element); } +bool ContextAccess::hasGrantOption(const AccessRightsElements & elements) const { return isGrantedImpl(elements); } + + +template +void ContextAccess::checkAccessImpl2(const AccessFlags & flags, const Args &... args) const +{ + if constexpr (grant_option) + { + if (hasGrantOption(flags, args...)) + return; + } + else + { + if (isGranted(flags, args...)) + return; + } + + auto show_error = [&](const String & msg, int error_code) + { + throw Exception(user_name + ": " + msg, error_code); + }; + + std::lock_guard lock{mutex}; + + if (!user) + show_error("User has been dropped", ErrorCodes::UNKNOWN_USER); + + if (grant_option && access->isGranted(flags, args...)) + { + show_error( + "Not enough privileges. " + "The required privileges have been granted, but without grant option. " + "To execute this query it's necessary to have the grant " + + AccessRightsElement{flags, args...}.toString() + " WITH GRANT OPTION", + ErrorCodes::ACCESS_DENIED); + } + + if (params.readonly) + { + if (!access_without_readonly) + { + Params changed_params = params; + changed_params.readonly = 0; + access_without_readonly = applyParamsToAccessRights(*access_from_user_and_roles, changed_params); + } + + if (access_without_readonly->isGranted(flags, args...)) + { + if (params.interface == ClientInfo::Interface::HTTP && params.http_method == ClientInfo::HTTPMethod::GET) + show_error( + "Cannot execute query in readonly mode. " + "For queries over HTTP, method GET implies readonly. You should use method POST for modifying queries", + ErrorCodes::READONLY); + else + show_error("Cannot execute query in readonly mode", ErrorCodes::READONLY); + } + } + + if (!params.allow_ddl) + { + if (!access_with_allow_ddl) + { + Params changed_params = params; + changed_params.allow_ddl = true; + access_with_allow_ddl = applyParamsToAccessRights(*access_from_user_and_roles, changed_params); + } + + if (access_with_allow_ddl->isGranted(flags, args...)) + { + show_error("Cannot execute query. DDL queries are prohibited for the user", ErrorCodes::QUERY_IS_PROHIBITED); + } + } + + if (!params.allow_introspection) + { + if (!access_with_allow_introspection) + { + Params changed_params = params; + changed_params.allow_introspection = true; + access_with_allow_introspection = applyParamsToAccessRights(*access_from_user_and_roles, changed_params); + } + + if (access_with_allow_introspection->isGranted(flags, args...)) + { + show_error("Introspection functions are disabled, because setting 'allow_introspection_functions' is set to 0", ErrorCodes::FUNCTION_NOT_ALLOWED); + } + } + + show_error( + "Not enough privileges. To execute this query it's necessary to have the grant " + + AccessRightsElement{flags, args...}.toString() + (grant_option ? " WITH GRANT OPTION" : ""), + ErrorCodes::ACCESS_DENIED); +} + +template +void ContextAccess::checkAccessImpl(const AccessFlags & flags) const +{ + checkAccessImpl2(flags); +} + +template +void ContextAccess::checkAccessImpl(const AccessFlags & flags, const std::string_view & database, const Args &... args) const +{ + checkAccessImpl2(flags, database.empty() ? params.current_database : database, args...); +} + +template +void ContextAccess::checkAccessImpl(const AccessRightsElement & element) const +{ + if (element.any_database) + checkAccessImpl(element.access_flags); + else if (element.any_table) + checkAccessImpl(element.access_flags, element.database); + else if (element.any_column) + checkAccessImpl(element.access_flags, element.database, element.table); + else + checkAccessImpl(element.access_flags, element.database, element.table, element.columns); +} + +template +void ContextAccess::checkAccessImpl(const AccessRightsElements & elements) const +{ + for (const auto & element : elements) + checkAccessImpl(element); +} + +void ContextAccess::checkAccess(const AccessFlags & flags) const { checkAccessImpl(flags); } +void ContextAccess::checkAccess(const AccessFlags & flags, const std::string_view & database) const { checkAccessImpl(flags, database); } +void ContextAccess::checkAccess(const AccessFlags & flags, const std::string_view & database, const std::string_view & table) const { checkAccessImpl(flags, database, table); } +void ContextAccess::checkAccess(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const std::string_view & column) const { checkAccessImpl(flags, database, table, column); } +void ContextAccess::checkAccess(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const std::vector & columns) const { checkAccessImpl(flags, database, table, columns); } +void ContextAccess::checkAccess(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const Strings & columns) const { checkAccessImpl(flags, database, table, columns); } +void ContextAccess::checkAccess(const AccessRightsElement & element) const { checkAccessImpl(element); } +void ContextAccess::checkAccess(const AccessRightsElements & elements) const { checkAccessImpl(elements); } + +void ContextAccess::checkGrantOption(const AccessFlags & flags) const { checkAccessImpl(flags); } +void ContextAccess::checkGrantOption(const AccessFlags & flags, const std::string_view & database) const { checkAccessImpl(flags, database); } +void ContextAccess::checkGrantOption(const AccessFlags & flags, const std::string_view & database, const std::string_view & table) const { checkAccessImpl(flags, database, table); } +void ContextAccess::checkGrantOption(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const std::string_view & column) const { checkAccessImpl(flags, database, table, column); } +void ContextAccess::checkGrantOption(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const std::vector & columns) const { checkAccessImpl(flags, database, table, columns); } +void ContextAccess::checkGrantOption(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const Strings & columns) const { checkAccessImpl(flags, database, table, columns); } +void ContextAccess::checkGrantOption(const AccessRightsElement & element) const { checkAccessImpl(element); } +void ContextAccess::checkGrantOption(const AccessRightsElements & elements) const { checkAccessImpl(elements); } + + +template +void ContextAccess::checkAdminOptionImpl(const Container & role_ids, const GetNameFunction & get_name_function) const +{ + if (isGranted(AccessType::ROLE_ADMIN)) + return; + + auto info = getRolesInfo(); + if (!info) + { + if (!user) + throw Exception(user_name + ": User has been dropped", ErrorCodes::UNKNOWN_USER); + return; + } + + size_t i = 0; + for (auto it = std::begin(role_ids); it != std::end(role_ids); ++it, ++i) + { + const UUID & role_id = *it; + if (info->enabled_roles_with_admin_option.count(role_id)) + continue; + + auto role_name = get_name_function(role_id, i); + if (!role_name) + role_name = "ID {" + toString(role_id) + "}"; + String msg = "To execute this query it's necessary to have the role " + backQuoteIfNeed(*role_name) + " granted with ADMIN option"; + if (info->enabled_roles.count(role_id)) + msg = "Role " + backQuote(*role_name) + " is granted, but without ADMIN option. " + msg; + throw Exception(getUserName() + ": Not enough privileges. " + msg, ErrorCodes::ACCESS_DENIED); + } +} + +void ContextAccess::checkAdminOption(const UUID & role_id) const +{ + checkAdminOptionImpl(to_array(role_id), [this](const UUID & id, size_t) { return manager->tryReadName(id); }); +} + +void ContextAccess::checkAdminOption(const UUID & role_id, const String & role_name) const +{ + checkAdminOptionImpl(to_array(role_id), [&role_name](const UUID &, size_t) { return std::optional{role_name}; }); +} + +void ContextAccess::checkAdminOption(const UUID & role_id, const std::unordered_map & names_of_roles) const +{ + checkAdminOptionImpl(to_array(role_id), [&names_of_roles](const UUID & id, size_t) { auto it = names_of_roles.find(id); return (it != names_of_roles.end()) ? it->second : std::optional{}; }); +} + +void ContextAccess::checkAdminOption(const std::vector & role_ids) const +{ + checkAdminOptionImpl(role_ids, [this](const UUID & id, size_t) { return manager->tryReadName(id); }); +} + +void ContextAccess::checkAdminOption(const std::vector & role_ids, const Strings & names_of_roles) const +{ + checkAdminOptionImpl(role_ids, [&names_of_roles](const UUID &, size_t i) { return std::optional{names_of_roles[i]}; }); +} + +void ContextAccess::checkAdminOption(const std::vector & role_ids, const std::unordered_map & names_of_roles) const +{ + checkAdminOptionImpl(role_ids, [&names_of_roles](const UUID & id, size_t) { auto it = names_of_roles.find(id); return (it != names_of_roles.end()) ? it->second : std::optional{}; }); +} + } diff --git a/src/Access/ContextAccess.h b/src/Access/ContextAccess.h index 27bb29a878c..9a5758b79a6 100644 --- a/src/Access/ContextAccess.h +++ b/src/Access/ContextAccess.h @@ -6,7 +6,6 @@ #include #include #include -#include #include #include @@ -30,32 +29,34 @@ class IAST; using ASTPtr = std::shared_ptr; +struct ContextAccessParams +{ + std::optional user_id; + boost::container::flat_set current_roles; + bool use_default_roles = false; + UInt64 readonly = 0; + bool allow_ddl = false; + bool allow_introspection = false; + String current_database; + ClientInfo::Interface interface = ClientInfo::Interface::TCP; + ClientInfo::HTTPMethod http_method = ClientInfo::HTTPMethod::UNKNOWN; + Poco::Net::IPAddress address; + String quota_key; + + auto toTuple() const { return std::tie(user_id, current_roles, use_default_roles, readonly, allow_ddl, allow_introspection, current_database, interface, http_method, address, quota_key); } + friend bool operator ==(const ContextAccessParams & lhs, const ContextAccessParams & rhs) { return lhs.toTuple() == rhs.toTuple(); } + friend bool operator !=(const ContextAccessParams & lhs, const ContextAccessParams & rhs) { return !(lhs == rhs); } + friend bool operator <(const ContextAccessParams & lhs, const ContextAccessParams & rhs) { return lhs.toTuple() < rhs.toTuple(); } + friend bool operator >(const ContextAccessParams & lhs, const ContextAccessParams & rhs) { return rhs < lhs; } + friend bool operator <=(const ContextAccessParams & lhs, const ContextAccessParams & rhs) { return !(rhs < lhs); } + friend bool operator >=(const ContextAccessParams & lhs, const ContextAccessParams & rhs) { return !(lhs < rhs); } +}; + + class ContextAccess { public: - struct Params - { - std::optional user_id; - boost::container::flat_set current_roles; - bool use_default_roles = false; - UInt64 readonly = 0; - bool allow_ddl = false; - bool allow_introspection = false; - String current_database; - ClientInfo::Interface interface = ClientInfo::Interface::TCP; - ClientInfo::HTTPMethod http_method = ClientInfo::HTTPMethod::UNKNOWN; - Poco::Net::IPAddress address; - String quota_key; - - auto toTuple() const { return std::tie(user_id, current_roles, use_default_roles, readonly, allow_ddl, allow_introspection, current_database, interface, http_method, address, quota_key); } - friend bool operator ==(const Params & lhs, const Params & rhs) { return lhs.toTuple() == rhs.toTuple(); } - friend bool operator !=(const Params & lhs, const Params & rhs) { return !(lhs == rhs); } - friend bool operator <(const Params & lhs, const Params & rhs) { return lhs.toTuple() < rhs.toTuple(); } - friend bool operator >(const Params & lhs, const Params & rhs) { return rhs < lhs; } - friend bool operator <=(const Params & lhs, const Params & rhs) { return !(rhs < lhs); } - friend bool operator >=(const Params & lhs, const Params & rhs) { return !(lhs < rhs); } - }; - + using Params = ContextAccessParams; const Params & getParams() const { return params; } /// Returns the current user. The function can return nullptr. @@ -90,16 +91,8 @@ public: /// The function returns nullptr if there are no constraints. std::shared_ptr getSettingsConstraints() const; - /// Checks if a specified access is granted, and throws an exception if not. - /// Empty database means the current database. - void checkAccess(const AccessFlags & flags) const; - void checkAccess(const AccessFlags & flags, const std::string_view & database) const; - void checkAccess(const AccessFlags & flags, const std::string_view & database, const std::string_view & table) const; - void checkAccess(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const std::string_view & column) const; - void checkAccess(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const std::vector & columns) const; - void checkAccess(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const Strings & columns) const; - void checkAccess(const AccessRightsElement & element) const; - void checkAccess(const AccessRightsElements & elements) const; + /// Returns the current access rights. + std::shared_ptr getAccess() const; /// Checks if a specified access is granted. bool isGranted(const AccessFlags & flags) const; @@ -111,17 +104,26 @@ public: bool isGranted(const AccessRightsElement & element) const; bool isGranted(const AccessRightsElements & elements) const; - /// Checks if a specified access is granted, and logs a warning if not. - bool isGranted(Poco::Logger * log_, const AccessFlags & flags) const; - bool isGranted(Poco::Logger * log_, const AccessFlags & flags, const std::string_view & database) const; - bool isGranted(Poco::Logger * log_, const AccessFlags & flags, const std::string_view & database, const std::string_view & table) const; - bool isGranted(Poco::Logger * log_, const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const std::string_view & column) const; - bool isGranted(Poco::Logger * log_, const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const std::vector & columns) const; - bool isGranted(Poco::Logger * log_, const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const Strings & columns) const; - bool isGranted(Poco::Logger * log_, const AccessRightsElement & element) const; - bool isGranted(Poco::Logger * log_, const AccessRightsElements & elements) const; + bool hasGrantOption(const AccessFlags & flags) const; + bool hasGrantOption(const AccessFlags & flags, const std::string_view & database) const; + bool hasGrantOption(const AccessFlags & flags, const std::string_view & database, const std::string_view & table) const; + bool hasGrantOption(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const std::string_view & column) const; + bool hasGrantOption(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const std::vector & columns) const; + bool hasGrantOption(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const Strings & columns) const; + bool hasGrantOption(const AccessRightsElement & element) const; + bool hasGrantOption(const AccessRightsElements & elements) const; + + /// Checks if a specified access is granted, and throws an exception if not. + /// Empty database means the current database. + void checkAccess(const AccessFlags & flags) const; + void checkAccess(const AccessFlags & flags, const std::string_view & database) const; + void checkAccess(const AccessFlags & flags, const std::string_view & database, const std::string_view & table) const; + void checkAccess(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const std::string_view & column) const; + void checkAccess(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const std::vector & columns) const; + void checkAccess(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const Strings & columns) const; + void checkAccess(const AccessRightsElement & element) const; + void checkAccess(const AccessRightsElements & elements) const; - /// Checks if a specified access is granted with grant option, and throws an exception if not. void checkGrantOption(const AccessFlags & flags) const; void checkGrantOption(const AccessFlags & flags, const std::string_view & database) const; void checkGrantOption(const AccessFlags & flags, const std::string_view & database, const std::string_view & table) const; @@ -133,6 +135,11 @@ public: /// Checks if a specified role is granted with admin option, and throws an exception if not. void checkAdminOption(const UUID & role_id) const; + void checkAdminOption(const UUID & role_id, const String & role_name) const; + void checkAdminOption(const UUID & role_id, const std::unordered_map & names_of_roles) const; + void checkAdminOption(const std::vector & role_ids) const; + void checkAdminOption(const std::vector & role_ids, const Strings & names_of_roles) const; + void checkAdminOption(const std::vector & role_ids, const std::unordered_map & names_of_roles) const; /// Makes an instance of ContextAccess which provides full access to everything /// without any limitations. This is used for the global context. @@ -146,24 +153,40 @@ private: void setUser(const UserPtr & user_) const; void setRolesInfo(const std::shared_ptr & roles_info_) const; void setSettingsAndConstraints() const; + void calculateAccessRights() const; - template - bool checkAccessImpl(Poco::Logger * log_, const AccessFlags & flags) const; + template + bool isGrantedImpl(const AccessFlags & flags) const; - template - bool checkAccessImpl(Poco::Logger * log_, const AccessFlags & flags, const std::string_view & database, const Args &... args) const; + template + bool isGrantedImpl(const AccessFlags & flags, const std::string_view & database, const Args &... args) const; - template - bool checkAccessImpl(Poco::Logger * log_, const AccessRightsElement & element) const; + template + bool isGrantedImpl(const AccessRightsElement & element) const; - template - bool checkAccessImpl(Poco::Logger * log_, const AccessRightsElements & elements) const; + template + bool isGrantedImpl(const AccessRightsElements & elements) const; - template - bool calculateResultAccessAndCheck(Poco::Logger * log_, const AccessFlags & flags, const Args &... args) const; + template + bool isGrantedImpl2(const AccessFlags & flags, const Args &... args) const; - boost::shared_ptr calculateResultAccess(bool grant_option) const; - boost::shared_ptr calculateResultAccess(bool grant_option, UInt64 readonly_, bool allow_ddl_, bool allow_introspection_) const; + template + void checkAccessImpl(const AccessFlags & flags) const; + + template + void checkAccessImpl(const AccessFlags & flags, const std::string_view & database, const Args &... args) const; + + template + void checkAccessImpl(const AccessRightsElement & element) const; + + template + void checkAccessImpl(const AccessRightsElements & elements) const; + + template + void checkAccessImpl2(const AccessFlags & flags, const Args &... args) const; + + template + void checkAdminOptionImpl(const Container & role_ids, const GetNameFunction & get_name_function) const; const AccessControlManager * manager = nullptr; const Params params; @@ -174,10 +197,14 @@ private: mutable std::shared_ptr enabled_roles; mutable ext::scope_guard subscription_for_roles_changes; mutable std::shared_ptr roles_info; - mutable boost::atomic_shared_ptr result_access[7]; + mutable std::shared_ptr access; mutable std::shared_ptr enabled_row_policies; mutable std::shared_ptr enabled_quota; mutable std::shared_ptr enabled_settings; + mutable std::shared_ptr access_without_readonly; + mutable std::shared_ptr access_with_allow_ddl; + mutable std::shared_ptr access_with_allow_introspection; + mutable std::shared_ptr access_from_user_and_roles; mutable std::mutex mutex; }; diff --git a/src/Access/EnabledRolesInfo.cpp b/src/Access/EnabledRolesInfo.cpp index 01b90d6fa1e..8069da467ad 100644 --- a/src/Access/EnabledRolesInfo.cpp +++ b/src/Access/EnabledRolesInfo.cpp @@ -28,8 +28,7 @@ bool operator==(const EnabledRolesInfo & lhs, const EnabledRolesInfo & rhs) { return (lhs.current_roles == rhs.current_roles) && (lhs.enabled_roles == rhs.enabled_roles) && (lhs.enabled_roles_with_admin_option == rhs.enabled_roles_with_admin_option) && (lhs.names_of_roles == rhs.names_of_roles) - && (lhs.access == rhs.access) && (lhs.access_with_grant_option == rhs.access_with_grant_option) - && (lhs.settings_from_enabled_roles == rhs.settings_from_enabled_roles); + && (lhs.access == rhs.access) && (lhs.settings_from_enabled_roles == rhs.settings_from_enabled_roles); } } diff --git a/src/Access/EnabledRolesInfo.h b/src/Access/EnabledRolesInfo.h index 45e1bfd9057..f06b7478daf 100644 --- a/src/Access/EnabledRolesInfo.h +++ b/src/Access/EnabledRolesInfo.h @@ -18,7 +18,6 @@ struct EnabledRolesInfo boost::container::flat_set enabled_roles_with_admin_option; std::unordered_map names_of_roles; AccessRights access; - AccessRights access_with_grant_option; SettingsProfileElements settings_from_enabled_roles; Strings getCurrentRolesNames() const; diff --git a/src/Access/GrantedAccess.cpp b/src/Access/GrantedAccess.cpp deleted file mode 100644 index 2af1e0b44ec..00000000000 --- a/src/Access/GrantedAccess.cpp +++ /dev/null @@ -1,22 +0,0 @@ -#include - - -namespace DB -{ - -GrantedAccess::GrantsAndPartialRevokes GrantedAccess::getGrantsAndPartialRevokes() const -{ - GrantsAndPartialRevokes res; - res.grants_with_grant_option = access_with_grant_option.getGrants(); - AccessRights access_without_gg = access; - access_without_gg.revoke(res.grants_with_grant_option); - auto gr = access_without_gg.getGrantsAndPartialRevokes(); - res.grants = std::move(gr.grants); - res.revokes = std::move(gr.revokes); - AccessRights access_with_grant_options_without_r = access_with_grant_option; - access_with_grant_options_without_r.grant(res.revokes); - res.revokes_grant_option = access_with_grant_options_without_r.getPartialRevokes(); - return res; -} - -} diff --git a/src/Access/GrantedAccess.h b/src/Access/GrantedAccess.h deleted file mode 100644 index b8f6bdfe8fb..00000000000 --- a/src/Access/GrantedAccess.h +++ /dev/null @@ -1,55 +0,0 @@ -#pragma once - -#include - - -namespace DB -{ -/// Access rights as they are granted to a role or user. -/// Stores both the access rights themselves and the access rights with grant option. -struct GrantedAccess -{ - AccessRights access; - AccessRights access_with_grant_option; - - template - void grant(const Args &... args) - { - access.grant(args...); - } - - template - void grantWithGrantOption(const Args &... args) - { - access.grant(args...); - access_with_grant_option.grant(args...); - } - - template - void revoke(const Args &... args) - { - access.revoke(args...); - access_with_grant_option.revoke(args...); - } - - template - void revokeGrantOption(const Args &... args) - { - access_with_grant_option.revoke(args...); - } - - struct GrantsAndPartialRevokes - { - AccessRightsElements grants; - AccessRightsElements revokes; - AccessRightsElements grants_with_grant_option; - AccessRightsElements revokes_grant_option; - }; - - /// Retrieves the information about grants and partial revokes. - GrantsAndPartialRevokes getGrantsAndPartialRevokes() const; - - friend bool operator ==(const GrantedAccess & left, const GrantedAccess & right) { return (left.access == right.access) && (left.access_with_grant_option == right.access_with_grant_option); } - friend bool operator !=(const GrantedAccess & left, const GrantedAccess & right) { return !(left == right); } -}; -} diff --git a/src/Access/Role.h b/src/Access/Role.h index 9acb97bdfbd..131bbd69195 100644 --- a/src/Access/Role.h +++ b/src/Access/Role.h @@ -1,7 +1,7 @@ #pragma once #include -#include +#include #include #include @@ -11,7 +11,7 @@ namespace DB struct Role : public IAccessEntity { - GrantedAccess access; + AccessRights access; GrantedRoles granted_roles; SettingsProfileElements settings; diff --git a/src/Access/RoleCache.cpp b/src/Access/RoleCache.cpp index ca8065145f3..3dca4b7719e 100644 --- a/src/Access/RoleCache.cpp +++ b/src/Access/RoleCache.cpp @@ -43,8 +43,7 @@ namespace roles_info.enabled_roles_with_admin_option.emplace(role_id); roles_info.names_of_roles[role_id] = role->getName(); - roles_info.access.merge(role->access.access); - roles_info.access_with_grant_option.merge(role->access.access_with_grant_option); + roles_info.access.makeUnion(role->access); roles_info.settings_from_enabled_roles.merge(role->settings); for (const auto & granted_role : role->granted_roles.roles) diff --git a/src/Access/User.h b/src/Access/User.h index 4852fce375d..13f1e532015 100644 --- a/src/Access/User.h +++ b/src/Access/User.h @@ -1,9 +1,9 @@ #pragma once #include +#include #include #include -#include #include #include #include @@ -17,7 +17,7 @@ struct User : public IAccessEntity { Authentication authentication; AllowedClientHosts allowed_client_hosts = AllowedClientHosts::AnyHostTag{}; - GrantedAccess access; + AccessRights access; GrantedRoles granted_roles; RolesOrUsersSet default_roles = RolesOrUsersSet::AllTag{}; SettingsProfileElements settings; diff --git a/src/Access/ya.make b/src/Access/ya.make index bdd62ae2b7b..77c94b87dfa 100644 --- a/src/Access/ya.make +++ b/src/Access/ya.make @@ -17,7 +17,6 @@ SRCS( EnabledRolesInfo.cpp EnabledRowPolicies.cpp EnabledSettings.cpp - GrantedAccess.cpp GrantedRoles.cpp IAccessEntity.cpp IAccessStorage.cpp diff --git a/src/AggregateFunctions/AggregateFunctionAvg.h b/src/AggregateFunctions/AggregateFunctionAvg.h index 1f3426160cb..95b4836c336 100644 --- a/src/AggregateFunctions/AggregateFunctionAvg.h +++ b/src/AggregateFunctions/AggregateFunctionAvg.h @@ -19,6 +19,8 @@ namespace ErrorCodes template struct AggregateFunctionAvgData { + using NumeratorType = T; + T numerator = 0; Denominator denominator = 0; diff --git a/src/AggregateFunctions/AggregateFunctionAvgWeighted.h b/src/AggregateFunctions/AggregateFunctionAvgWeighted.h index a3d3b9958db..8eb619585c7 100644 --- a/src/AggregateFunctions/AggregateFunctionAvgWeighted.h +++ b/src/AggregateFunctions/AggregateFunctionAvgWeighted.h @@ -16,7 +16,7 @@ public: const auto & values = static_cast(*columns[0]); const auto & weights = static_cast(*columns[1]); - this->data(place).numerator += values.getData()[row_num] * weights.getData()[row_num]; + this->data(place).numerator += static_cast(values.getData()[row_num]) * weights.getData()[row_num]; this->data(place).denominator += weights.getData()[row_num]; } diff --git a/src/AggregateFunctions/AggregateFunctionCount.cpp b/src/AggregateFunctions/AggregateFunctionCount.cpp index b00adaa0f1a..6ea63bedaf0 100644 --- a/src/AggregateFunctions/AggregateFunctionCount.cpp +++ b/src/AggregateFunctions/AggregateFunctionCount.cpp @@ -28,7 +28,8 @@ AggregateFunctionPtr createAggregateFunctionCount(const std::string & name, cons void registerAggregateFunctionCount(AggregateFunctionFactory & factory) { - factory.registerFunction("count", {createAggregateFunctionCount, {true}}, AggregateFunctionFactory::CaseInsensitive); + AggregateFunctionProperties properties = { .returns_default_when_only_null = true, .is_order_dependent = false }; + factory.registerFunction("count", {createAggregateFunctionCount, properties}, AggregateFunctionFactory::CaseInsensitive); } } diff --git a/src/AggregateFunctions/AggregateFunctionFactory.cpp b/src/AggregateFunctions/AggregateFunctionFactory.cpp index 83221df784a..f7c6fe9da14 100644 --- a/src/AggregateFunctions/AggregateFunctionFactory.cpp +++ b/src/AggregateFunctions/AggregateFunctionFactory.cpp @@ -162,6 +162,52 @@ AggregateFunctionPtr AggregateFunctionFactory::tryGet( } +std::optional AggregateFunctionFactory::tryGetPropertiesImpl(const String & name_param, int recursion_level) const +{ + String name = getAliasToOrName(name_param); + Value found; + + /// Find by exact match. + if (auto it = aggregate_functions.find(name); it != aggregate_functions.end()) + { + found = it->second; + } + /// Find by case-insensitive name. + /// Combinators cannot apply for case insensitive (SQL-style) aggregate function names. Only for native names. + else if (recursion_level == 0) + { + if (auto jt = case_insensitive_aggregate_functions.find(Poco::toLower(name)); jt != case_insensitive_aggregate_functions.end()) + found = jt->second; + } + + if (found.creator) + return found.properties; + + /// Combinators of aggregate functions. + /// For every aggregate function 'agg' and combiner '-Comb' there is combined aggregate function with name 'aggComb', + /// that can have different number and/or types of arguments, different result type and different behaviour. + + if (AggregateFunctionCombinatorPtr combinator = AggregateFunctionCombinatorFactory::instance().tryFindSuffix(name)) + { + if (combinator->isForInternalUsageOnly()) + return {}; + + String nested_name = name.substr(0, name.size() - combinator->getName().size()); + + /// NOTE: It's reasonable to also allow to transform properties by combinator. + return tryGetPropertiesImpl(nested_name, recursion_level + 1); + } + + return {}; +} + + +std::optional AggregateFunctionFactory::tryGetProperties(const String & name) const +{ + return tryGetPropertiesImpl(name, 0); +} + + bool AggregateFunctionFactory::isAggregateFunctionName(const String & name, int recursion_level) const { if (aggregate_functions.count(name) || isAlias(name)) diff --git a/src/AggregateFunctions/AggregateFunctionFactory.h b/src/AggregateFunctions/AggregateFunctionFactory.h index 90e44145f4b..143e6562a30 100644 --- a/src/AggregateFunctions/AggregateFunctionFactory.h +++ b/src/AggregateFunctions/AggregateFunctionFactory.h @@ -9,6 +9,7 @@ #include #include #include +#include namespace DB @@ -72,6 +73,9 @@ public: const Array & parameters, AggregateFunctionProperties & out_properties) const; + /// Get properties if the aggregate function exists. + std::optional tryGetProperties(const String & name) const; + bool isAggregateFunctionName(const String & name, int recursion_level = 0) const; private: @@ -83,6 +87,8 @@ private: bool has_null_arguments, int recursion_level) const; + std::optional tryGetPropertiesImpl(const String & name, int recursion_level) const; + private: using AggregateFunctions = std::unordered_map; diff --git a/src/AggregateFunctions/AggregateFunctionGroupArray.cpp b/src/AggregateFunctions/AggregateFunctionGroupArray.cpp index 0c1cb1d0d36..61dbdeef16e 100644 --- a/src/AggregateFunctions/AggregateFunctionGroupArray.cpp +++ b/src/AggregateFunctions/AggregateFunctionGroupArray.cpp @@ -120,8 +120,10 @@ AggregateFunctionPtr createAggregateFunctionGroupArraySample(const std::string & void registerAggregateFunctionGroupArray(AggregateFunctionFactory & factory) { - factory.registerFunction("groupArray", createAggregateFunctionGroupArray); - factory.registerFunction("groupArraySample", createAggregateFunctionGroupArraySample); + AggregateFunctionProperties properties = { .returns_default_when_only_null = false, .is_order_dependent = true }; + + factory.registerFunction("groupArray", { createAggregateFunctionGroupArray, properties }); + factory.registerFunction("groupArraySample", { createAggregateFunctionGroupArraySample, properties }); } } diff --git a/src/AggregateFunctions/AggregateFunctionGroupArrayMoving.cpp b/src/AggregateFunctions/AggregateFunctionGroupArrayMoving.cpp index 31a921c3b2c..f8084e3716f 100644 --- a/src/AggregateFunctions/AggregateFunctionGroupArrayMoving.cpp +++ b/src/AggregateFunctions/AggregateFunctionGroupArrayMoving.cpp @@ -95,8 +95,10 @@ AggregateFunctionPtr createAggregateFunctionMoving(const std::string & name, con void registerAggregateFunctionMoving(AggregateFunctionFactory & factory) { - factory.registerFunction("groupArrayMovingSum", createAggregateFunctionMoving); - factory.registerFunction("groupArrayMovingAvg", createAggregateFunctionMoving); + AggregateFunctionProperties properties = { .returns_default_when_only_null = false, .is_order_dependent = true }; + + factory.registerFunction("groupArrayMovingSum", { createAggregateFunctionMoving, properties }); + factory.registerFunction("groupArrayMovingAvg", { createAggregateFunctionMoving, properties }); } } diff --git a/src/AggregateFunctions/AggregateFunctionGroupArrayMoving.h b/src/AggregateFunctions/AggregateFunctionGroupArrayMoving.h index 19562b37a12..13895dea8d4 100644 --- a/src/AggregateFunctions/AggregateFunctionGroupArrayMoving.h +++ b/src/AggregateFunctions/AggregateFunctionGroupArrayMoving.h @@ -71,7 +71,6 @@ struct MovingAvgData void add(T val, Arena * arena) { sum += val; - value.push_back(sum, arena); } @@ -96,7 +95,8 @@ class MovingImpl final public: using ColVecType = std::conditional_t, ColumnDecimal, ColumnVector>; - using ColVecResult = std::conditional_t, ColumnDecimal, ColumnVector>; // probably for overflow function in the future + // probably for overflow function in the future + using ColVecResult = std::conditional_t, ColumnDecimal, ColumnVector>; explicit MovingImpl(const DataTypePtr & data_type_, UInt64 win_size_ = std::numeric_limits::max()) : IAggregateFunctionDataHelper>({data_type_}, {}) diff --git a/src/AggregateFunctions/AggregateFunctionGroupUniqArray.cpp b/src/AggregateFunctions/AggregateFunctionGroupUniqArray.cpp index 552089bb58d..dd29a64819a 100644 --- a/src/AggregateFunctions/AggregateFunctionGroupUniqArray.cpp +++ b/src/AggregateFunctions/AggregateFunctionGroupUniqArray.cpp @@ -110,7 +110,9 @@ AggregateFunctionPtr createAggregateFunctionGroupUniqArray(const std::string & n void registerAggregateFunctionGroupUniqArray(AggregateFunctionFactory & factory) { - factory.registerFunction("groupUniqArray", createAggregateFunctionGroupUniqArray); + AggregateFunctionProperties properties = { .returns_default_when_only_null = false, .is_order_dependent = true }; + + factory.registerFunction("groupUniqArray", { createAggregateFunctionGroupUniqArray, properties }); } } diff --git a/src/AggregateFunctions/AggregateFunctionMinMaxAny.cpp b/src/AggregateFunctions/AggregateFunctionMinMaxAny.cpp index 9358d361616..a98eaccdabd 100644 --- a/src/AggregateFunctions/AggregateFunctionMinMaxAny.cpp +++ b/src/AggregateFunctions/AggregateFunctionMinMaxAny.cpp @@ -49,13 +49,18 @@ AggregateFunctionPtr createAggregateFunctionArgMax(const std::string & name, con void registerAggregateFunctionsMinMaxAny(AggregateFunctionFactory & factory) { - factory.registerFunction("any", createAggregateFunctionAny); - factory.registerFunction("anyLast", createAggregateFunctionAnyLast); - factory.registerFunction("anyHeavy", createAggregateFunctionAnyHeavy); factory.registerFunction("min", createAggregateFunctionMin, AggregateFunctionFactory::CaseInsensitive); factory.registerFunction("max", createAggregateFunctionMax, AggregateFunctionFactory::CaseInsensitive); - factory.registerFunction("argMin", createAggregateFunctionArgMin); - factory.registerFunction("argMax", createAggregateFunctionArgMax); + + /// The functions below depend on the order of data. + + AggregateFunctionProperties properties = { .returns_default_when_only_null = false, .is_order_dependent = true }; + + factory.registerFunction("any", { createAggregateFunctionAny, properties }); + factory.registerFunction("anyLast", { createAggregateFunctionAnyLast, properties }); + factory.registerFunction("anyHeavy", { createAggregateFunctionAnyHeavy, properties }); + factory.registerFunction("argMin", { createAggregateFunctionArgMin, properties }); + factory.registerFunction("argMax", { createAggregateFunctionArgMax, properties }); } } diff --git a/src/AggregateFunctions/AggregateFunctionTopK.cpp b/src/AggregateFunctions/AggregateFunctionTopK.cpp index 7f2da260c2d..a8cea5eb59b 100644 --- a/src/AggregateFunctions/AggregateFunctionTopK.cpp +++ b/src/AggregateFunctions/AggregateFunctionTopK.cpp @@ -100,7 +100,8 @@ AggregateFunctionPtr createAggregateFunctionTopK(const std::string & name, const threshold = k; } - AggregateFunctionPtr res(createWithNumericType(*argument_types[0], threshold, load_factor, argument_types, params)); + AggregateFunctionPtr res(createWithNumericType( + *argument_types[0], threshold, load_factor, argument_types, params)); if (!res) res = AggregateFunctionPtr(createWithExtraTypes(argument_types[0], threshold, load_factor, params)); @@ -116,8 +117,10 @@ AggregateFunctionPtr createAggregateFunctionTopK(const std::string & name, const void registerAggregateFunctionTopK(AggregateFunctionFactory & factory) { - factory.registerFunction("topK", createAggregateFunctionTopK); - factory.registerFunction("topKWeighted", createAggregateFunctionTopK); + AggregateFunctionProperties properties = { .returns_default_when_only_null = false, .is_order_dependent = true }; + + factory.registerFunction("topK", { createAggregateFunctionTopK, properties }); + factory.registerFunction("topKWeighted", { createAggregateFunctionTopK, properties }); } } diff --git a/src/AggregateFunctions/AggregateFunctionTopK.h b/src/AggregateFunctions/AggregateFunctionTopK.h index 68317d0bdf0..f77fc482685 100644 --- a/src/AggregateFunctions/AggregateFunctionTopK.h +++ b/src/AggregateFunctions/AggregateFunctionTopK.h @@ -47,7 +47,7 @@ public: DataTypePtr getReturnType() const override { - return std::make_shared(std::make_shared>()); + return std::make_shared(this->argument_types[0]); } void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override diff --git a/src/AggregateFunctions/AggregateFunctionUniq.cpp b/src/AggregateFunctions/AggregateFunctionUniq.cpp index 40742ae336e..32fdb188529 100644 --- a/src/AggregateFunctions/AggregateFunctionUniq.cpp +++ b/src/AggregateFunctions/AggregateFunctionUniq.cpp @@ -122,14 +122,16 @@ AggregateFunctionPtr createAggregateFunctionUniq(const std::string & name, const void registerAggregateFunctionsUniq(AggregateFunctionFactory & factory) { + AggregateFunctionProperties properties = { .returns_default_when_only_null = true, .is_order_dependent = false }; + factory.registerFunction("uniq", - {createAggregateFunctionUniq, {true}}); + {createAggregateFunctionUniq, properties}); factory.registerFunction("uniqHLL12", - {createAggregateFunctionUniq, {true}}); + {createAggregateFunctionUniq, properties}); factory.registerFunction("uniqExact", - {createAggregateFunctionUniq>, {true}}); + {createAggregateFunctionUniq>, properties}); } } diff --git a/src/AggregateFunctions/IAggregateFunction.h b/src/AggregateFunctions/IAggregateFunction.h index 40d589f773d..eb9c560af98 100644 --- a/src/AggregateFunctions/IAggregateFunction.h +++ b/src/AggregateFunctions/IAggregateFunction.h @@ -289,6 +289,11 @@ struct AggregateFunctionProperties * or we should return non-Nullable type with default value (example: count, countDistinct). */ bool returns_default_when_only_null = false; + + /** Result varies depending on the data order (example: groupArray). + * Some may also name this property as "non-commutative". + */ + bool is_order_dependent = false; }; diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 63799a1df9f..f631732bad3 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -73,6 +73,10 @@ if(USE_RDKAFKA) add_headers_and_sources(dbms Storages/Kafka) endif() +if (USE_AMQPCPP) + add_headers_and_sources(dbms Storages/RabbitMQ) +endif() + if (USE_AWS_S3) add_headers_and_sources(dbms Common/S3) add_headers_and_sources(dbms Disks/S3) @@ -262,6 +266,9 @@ if (USE_RDKAFKA) endif() endif() +if (USE_AMQPCPP) + dbms_target_link_libraries(PUBLIC amqp-cpp) +endif() if(RE2_INCLUDE_DIR) target_include_directories(clickhouse_common_io SYSTEM BEFORE PUBLIC ${RE2_INCLUDE_DIR}) @@ -365,6 +372,14 @@ target_include_directories (clickhouse_common_io SYSTEM BEFORE PUBLIC ${DOUBLE_C target_include_directories (clickhouse_common_io SYSTEM BEFORE PUBLIC ${MSGPACK_INCLUDE_DIR}) +if (USE_ORC) + target_include_directories (clickhouse_common_io SYSTEM BEFORE PUBLIC ${ORC_INCLUDE_DIR}) + configure_file ( + "${ORC_INCLUDE_DIR}/orc/orc-config.hh.in" + "${ORC_INCLUDE_DIR}/orc/orc-config.hh" + ) +endif () + if (ENABLE_TESTS AND USE_GTEST) macro (grep_gtest_sources BASE_DIR DST_VAR) # Cold match files that are not in tests/ directories diff --git a/src/Client/ConnectionPool.h b/src/Client/ConnectionPool.h index bbdcae894e7..95cb81c8052 100644 --- a/src/Client/ConnectionPool.h +++ b/src/Client/ConnectionPool.h @@ -33,6 +33,8 @@ public: virtual Entry get(const ConnectionTimeouts & timeouts, const Settings * settings = nullptr, bool force_connected = true) = 0; + + virtual Int64 getPriority() const { return 1; } }; using ConnectionPoolPtr = std::shared_ptr; @@ -54,7 +56,8 @@ public: const String & password_, const String & client_name_ = "client", Protocol::Compression compression_ = Protocol::Compression::Enable, - Protocol::Secure secure_ = Protocol::Secure::Disable) + Protocol::Secure secure_ = Protocol::Secure::Disable, + Int64 priority_ = 1) : Base(max_connections_, &Poco::Logger::get("ConnectionPool (" + host_ + ":" + toString(port_) + ")")), host(host_), @@ -64,7 +67,8 @@ public: password(password_), client_name(client_name_), compression(compression_), - secure{secure_} + secure(secure_), + priority(priority_) { } @@ -93,6 +97,11 @@ public: return host + ":" + toString(port); } + Int64 getPriority() const override + { + return priority; + } + protected: /** Creates a new object to put in the pool. */ ConnectionPtr allocObject() override @@ -111,8 +120,9 @@ private: String password; String client_name; - Protocol::Compression compression; /// Whether to compress data when interacting with the server. - Protocol::Secure secure; /// Whether to encrypt data when interacting with the server. + Protocol::Compression compression; /// Whether to compress data when interacting with the server. + Protocol::Secure secure; /// Whether to encrypt data when interacting with the server. + Int64 priority; /// priority from }; diff --git a/src/Client/ConnectionPoolWithFailover.cpp b/src/Client/ConnectionPoolWithFailover.cpp index 128a4836a42..5d01a3dd196 100644 --- a/src/Client/ConnectionPoolWithFailover.cpp +++ b/src/Client/ConnectionPoolWithFailover.cpp @@ -90,6 +90,14 @@ IConnectionPool::Entry ConnectionPoolWithFailover::get(const ConnectionTimeouts return Base::get(max_ignored_errors, fallback_to_stale_replicas, try_get_entry, get_priority); } +Int64 ConnectionPoolWithFailover::getPriority() const +{ + return (*std::max_element(nested_pools.begin(), nested_pools.end(), [](const auto &a, const auto &b) + { + return a->getPriority() - b->getPriority(); + }))->getPriority(); +} + ConnectionPoolWithFailover::Status ConnectionPoolWithFailover::getStatus() const { const Base::PoolStates states = getPoolStates(); diff --git a/src/Client/ConnectionPoolWithFailover.h b/src/Client/ConnectionPoolWithFailover.h index 10dea98c8f7..d9baa3aff66 100644 --- a/src/Client/ConnectionPoolWithFailover.h +++ b/src/Client/ConnectionPoolWithFailover.h @@ -47,6 +47,8 @@ public: const Settings * settings, bool force_connected) override; /// From IConnectionPool + Int64 getPriority() const override; /// From IConnectionPool + /** Allocates up to the specified number of connections to work. * Connections provide access to different replicas of one shard. */ diff --git a/src/Columns/ColumnConst.cpp b/src/Columns/ColumnConst.cpp index 545c0b1b300..2fa1fbce32d 100644 --- a/src/Columns/ColumnConst.cpp +++ b/src/Columns/ColumnConst.cpp @@ -120,7 +120,9 @@ void ColumnConst::getPermutation(bool /*reverse*/, size_t /*limit*/, int /*nan_d res[i] = i; } -void ColumnConst::updatePermutation(bool, size_t, int, Permutation &, EqualRanges &) const {} +void ColumnConst::updatePermutation(bool, size_t, int, Permutation &, EqualRanges &) const +{ +} void ColumnConst::updateWeakHash32(WeakHash32 & hash) const { diff --git a/src/Common/ErrorCodes.cpp b/src/Common/ErrorCodes.cpp index 05e4c9e14bf..0c305075c66 100644 --- a/src/Common/ErrorCodes.cpp +++ b/src/Common/ErrorCodes.cpp @@ -496,6 +496,7 @@ namespace ErrorCodes extern const int NO_SUITABLE_FUNCTION_IMPLEMENTATION = 527; extern const int CASSANDRA_INTERNAL_ERROR = 528; extern const int NOT_A_LEADER = 529; + extern const int CANNOT_CONNECT_RABBITMQ = 530; extern const int KEEPER_EXCEPTION = 999; extern const int POCO_EXCEPTION = 1000; diff --git a/src/Common/PoolWithFailoverBase.h b/src/Common/PoolWithFailoverBase.h index 5b4e736a907..cee747d106f 100644 --- a/src/Common/PoolWithFailoverBase.h +++ b/src/Common/PoolWithFailoverBase.h @@ -64,6 +64,8 @@ public: , shared_pool_states(nested_pools.size()) , log(log_) { + for (size_t i = 0;i < nested_pools.size(); ++i) + shared_pool_states[i].config_priority = nested_pools[i]->getPriority(); } struct TryResult @@ -304,6 +306,9 @@ template struct PoolWithFailoverBase::PoolState { UInt64 error_count = 0; + /// Priority from the configuration. + Int64 config_priority = 1; + /// Priority from the GetPriorityFunc. Int64 priority = 0; UInt32 random = 0; @@ -314,8 +319,8 @@ struct PoolWithFailoverBase::PoolState static bool compare(const PoolState & lhs, const PoolState & rhs) { - return std::forward_as_tuple(lhs.error_count, lhs.priority, lhs.random) - < std::forward_as_tuple(rhs.error_count, rhs.priority, rhs.random); + return std::forward_as_tuple(lhs.error_count, lhs.config_priority, lhs.priority, lhs.random) + < std::forward_as_tuple(rhs.error_count, rhs.config_priority, rhs.priority, rhs.random); } private: diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp index 7a7a6bc6162..5c88b2ee849 100644 --- a/src/Common/ProfileEvents.cpp +++ b/src/Common/ProfileEvents.cpp @@ -196,8 +196,12 @@ M(PerfCpuMigrations, "Number of times the process has migrated to a new CPU") \ M(PerfAlignmentFaults, "Number of alignment faults. These happen when unaligned memory accesses happen; the kernel can handle these but it reduces performance. This happens only on some architectures (never on x86).") \ M(PerfEmulationFaults, "Number of emulation faults. The kernel sometimes traps on unimplemented instructions and emulates them for user space. This can negatively impact performance.") \ - M(PerfPageFaultsMinor, "This counts the number of minor page faults. These did not require disk I/O to handle.") \ - M(PerfPageFaultsMajor, "This counts the number of major page faults. These required disk I/O to handle.") \ + M(PerfMinEnabledTime, "For all events, minimum time that an event was enabled. Used to track event multiplexing influence") \ + M(PerfMinEnabledRunningTime, "Running time for event with minimum enabled time. Used to track the amount of event multiplexing") \ + M(PerfDataTLBReferences, "Data TLB references") \ + M(PerfDataTLBMisses, "Data TLB misses") \ + M(PerfInstructionTLBReferences, "Instruction TLB references") \ + M(PerfInstructionTLBMisses, "Instruction TLB misses") \ \ M(CreatedHTTPConnections, "Total amount of created HTTP connections (closed or opened).") \ \ diff --git a/src/Common/StatusFile.cpp b/src/Common/StatusFile.cpp index d228fdb42b6..3766908f9cd 100644 --- a/src/Common/StatusFile.cpp +++ b/src/Common/StatusFile.cpp @@ -30,8 +30,21 @@ namespace ErrorCodes } -StatusFile::StatusFile(const std::string & path_) - : path(path_) +StatusFile::FillFunction StatusFile::write_pid = [](WriteBuffer & out) +{ + out << getpid(); +}; + +StatusFile::FillFunction StatusFile::write_full_info = [](WriteBuffer & out) +{ + out << "PID: " << getpid() << "\n" + << "Started at: " << LocalDateTime(time(nullptr)) << "\n" + << "Revision: " << ClickHouseRevision::get() << "\n"; +}; + + +StatusFile::StatusFile(std::string path_, FillFunction fill_) + : path(std::move(path_)), fill(std::move(fill_)) { /// If file already exists. NOTE Minor race condition. if (Poco::File(path).exists()) @@ -72,13 +85,8 @@ StatusFile::StatusFile(const std::string & path_) throwFromErrnoWithPath("Cannot lseek " + path, path, ErrorCodes::CANNOT_SEEK_THROUGH_FILE); /// Write information about current server instance to the file. - { - WriteBufferFromFileDescriptor out(fd, 1024); - out - << "PID: " << getpid() << "\n" - << "Started at: " << LocalDateTime(time(nullptr)) << "\n" - << "Revision: " << ClickHouseRevision::get() << "\n"; - } + WriteBufferFromFileDescriptor out(fd, 1024); + fill(out); } catch (...) { diff --git a/src/Common/StatusFile.h b/src/Common/StatusFile.h index 0dde3e3d16f..5115e54428f 100644 --- a/src/Common/StatusFile.h +++ b/src/Common/StatusFile.h @@ -1,23 +1,33 @@ #pragma once #include +#include #include namespace DB { +class WriteBuffer; + /** Provides that no more than one server works with one data directory. */ class StatusFile : private boost::noncopyable { public: - explicit StatusFile(const std::string & path_); + using FillFunction = std::function; + + StatusFile(std::string path_, FillFunction fill_); ~StatusFile(); + /// You can use one of these functions to fill the file or provide your own. + static FillFunction write_pid; + static FillFunction write_full_info; + private: const std::string path; + FillFunction fill; int fd = -1; }; diff --git a/src/Common/ThreadProfileEvents.cpp b/src/Common/ThreadProfileEvents.cpp index 1d65a16ba66..04c29841e23 100644 --- a/src/Common/ThreadProfileEvents.cpp +++ b/src/Common/ThreadProfileEvents.cpp @@ -147,6 +147,19 @@ thread_local PerfEventsCounters current_thread_counters; .settings_name = #LOCAL_NAME \ } +// One event for cache accesses and one for cache misses. +// Type is ACCESS or MISS +#define CACHE_EVENT(PERF_NAME, LOCAL_NAME, TYPE) \ + PerfEventInfo \ + { \ + .event_type = perf_type_id::PERF_TYPE_HW_CACHE, \ + .event_config = (PERF_NAME) \ + | (PERF_COUNT_HW_CACHE_OP_READ << 8) \ + | (PERF_COUNT_HW_CACHE_RESULT_ ## TYPE << 16), \ + .profile_event = ProfileEvents::LOCAL_NAME, \ + .settings_name = #LOCAL_NAME \ + } + // descriptions' source: http://man7.org/linux/man-pages/man2/perf_event_open.2.html static const PerfEventInfo raw_events_info[] = { HARDWARE_EVENT(PERF_COUNT_HW_CPU_CYCLES, PerfCpuCycles), @@ -167,8 +180,19 @@ static const PerfEventInfo raw_events_info[] = { SOFTWARE_EVENT(PERF_COUNT_SW_CPU_MIGRATIONS, PerfCpuMigrations), SOFTWARE_EVENT(PERF_COUNT_SW_ALIGNMENT_FAULTS, PerfAlignmentFaults), SOFTWARE_EVENT(PERF_COUNT_SW_EMULATION_FAULTS, PerfEmulationFaults), - SOFTWARE_EVENT(PERF_COUNT_SW_PAGE_FAULTS_MIN, PerfPageFaultsMinor), - SOFTWARE_EVENT(PERF_COUNT_SW_PAGE_FAULTS_MAJ, PerfPageFaultsMajor) + + // Don't add them -- they are the same as SoftPageFaults and HardPageFaults, + // match well numerically. + // SOFTWARE_EVENT(PERF_COUNT_SW_PAGE_FAULTS_MIN, PerfPageFaultsMinor), + // SOFTWARE_EVENT(PERF_COUNT_SW_PAGE_FAULTS_MAJ, PerfPageFaultsMajor), + + CACHE_EVENT(PERF_COUNT_HW_CACHE_DTLB, PerfDataTLBReferences, ACCESS), + CACHE_EVENT(PERF_COUNT_HW_CACHE_DTLB, PerfDataTLBMisses, MISS), + + // Apparently it doesn't make sense to treat these values as relative: + // https://stackoverflow.com/questions/49933319/how-to-interpret-perf-itlb-loads-itlb-load-misses + CACHE_EVENT(PERF_COUNT_HW_CACHE_ITLB, PerfInstructionTLBReferences, ACCESS), + CACHE_EVENT(PERF_COUNT_HW_CACHE_ITLB, PerfInstructionTLBMisses, MISS), }; static_assert(sizeof(raw_events_info) / sizeof(raw_events_info[0]) == NUMBER_OF_RAW_EVENTS); @@ -455,7 +479,12 @@ void PerfEventsCounters::finalizeProfileEvents(ProfileEvents::Counters & profile } } - // actually process counters' values + // Actually process counters' values. Track the minimal time that a performance + // counter was enabled, and the corresponding running time, to give some idea + // about the amount of counter multiplexing. + UInt64 min_enabled_time = -1; + UInt64 running_time_for_min_enabled_time = 0; + for (size_t i = 0; i < NUMBER_OF_RAW_EVENTS; ++i) { int fd = thread_events_descriptors_holder.descriptors[i]; @@ -469,14 +498,30 @@ void PerfEventsCounters::finalizeProfileEvents(ProfileEvents::Counters & profile // Account for counter multiplexing. time_running and time_enabled are // not reset by PERF_EVENT_IOC_RESET, so we don't use it and calculate // deltas from old values. + const auto enabled = current_value.time_enabled - previous_value.time_enabled; + const auto running = current_value.time_running - previous_value.time_running; const UInt64 delta = (current_value.value - previous_value.value) - * (current_value.time_enabled - previous_value.time_enabled) - / std::max(1.f, - float(current_value.time_running - previous_value.time_running)); + * enabled / std::max(1.f, float(running)); + + if (min_enabled_time > enabled) + { + min_enabled_time = enabled; + running_time_for_min_enabled_time = running; + } profile_events.increment(info.profile_event, delta); } + // If we had at least one enabled event, also show multiplexing-related + // statistics. + if (min_enabled_time != UInt64(-1)) + { + profile_events.increment(ProfileEvents::PerfMinEnabledTime, + min_enabled_time); + profile_events.increment(ProfileEvents::PerfMinEnabledRunningTime, + running_time_for_min_enabled_time); + } + // Store current counter values for the next profiling period. memcpy(previous_values, current_values, sizeof(current_values)); } diff --git a/src/Common/ThreadProfileEvents.h b/src/Common/ThreadProfileEvents.h index a4ee0628629..7118e927162 100644 --- a/src/Common/ThreadProfileEvents.h +++ b/src/Common/ThreadProfileEvents.h @@ -53,8 +53,12 @@ namespace ProfileEvents extern const Event PerfCpuMigrations; extern const Event PerfAlignmentFaults; extern const Event PerfEmulationFaults; - extern const Event PerfPageFaultsMinor; - extern const Event PerfPageFaultsMajor; + extern const Event PerfMinEnabledTime; + extern const Event PerfMinEnabledRunningTime; + extern const Event PerfDataTLBReferences; + extern const Event PerfDataTLBMisses; + extern const Event PerfInstructionTLBReferences; + extern const Event PerfInstructionTLBMisses; #endif } @@ -158,7 +162,7 @@ struct PerfEventValue UInt64 time_running = 0; }; -static constexpr size_t NUMBER_OF_RAW_EVENTS = 18; +static constexpr size_t NUMBER_OF_RAW_EVENTS = 20; struct PerfDescriptorsHolder : boost::noncopyable { diff --git a/src/Common/XDBCBridgeHelper.h b/src/Common/XDBCBridgeHelper.h index 9f20c75182d..a425cd36b11 100644 --- a/src/Common/XDBCBridgeHelper.h +++ b/src/Common/XDBCBridgeHelper.h @@ -41,6 +41,7 @@ public: virtual Poco::URI getMainURI() const = 0; virtual Poco::URI getColumnsInfoURI() const = 0; virtual IdentifierQuotingStyle getIdentifierQuotingStyle() = 0; + virtual bool isSchemaAllowed() = 0; virtual String getName() const = 0; virtual ~IXDBCBridgeHelper() = default; @@ -61,6 +62,7 @@ private: Poco::Logger * log = &Poco::Logger::get(BridgeHelperMixin::getName() + "BridgeHelper"); std::optional quote_style; + std::optional is_schema_allowed; protected: auto getConnectionString() const @@ -80,6 +82,7 @@ public: static constexpr inline auto MAIN_HANDLER = "/"; static constexpr inline auto COL_INFO_HANDLER = "/columns_info"; static constexpr inline auto IDENTIFIER_QUOTE_HANDLER = "/identifier_quote"; + static constexpr inline auto SCHEMA_ALLOWED_HANDLER = "/schema_allowed"; static constexpr inline auto PING_OK_ANSWER = "Ok."; XDBCBridgeHelper(const Context & global_context_, const Poco::Timespan & http_timeout_, const std::string & connection_string_) @@ -128,6 +131,27 @@ public: return *quote_style; } + bool isSchemaAllowed() override + { + if (!is_schema_allowed.has_value()) + { + startBridgeSync(); + + auto uri = createBaseURI(); + uri.setPath(SCHEMA_ALLOWED_HANDLER); + uri.addQueryParameter("connection_string", getConnectionString()); + + ReadWriteBufferFromHTTP buf( + uri, Poco::Net::HTTPRequest::HTTP_POST, {}, ConnectionTimeouts::getHTTPTimeouts(context)); + + bool res; + readBoolText(res, buf); + is_schema_allowed = res; + } + + return *is_schema_allowed; + } + /** * @todo leaky abstraction - used by external API's */ diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 4f9e3b8426a..9e8f96aa520 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -74,6 +74,7 @@ struct Settings : public SettingsCollection M(SettingMilliseconds, connection_pool_max_wait_ms, 0, "The wait time when the connection pool is full.", 0) \ M(SettingMilliseconds, replace_running_query_max_wait_ms, 5000, "The wait time for running query with the same query_id to finish when setting 'replace_running_query' is active.", 0) \ M(SettingMilliseconds, kafka_max_wait_ms, 5000, "The wait time for reading from Kafka before retry.", 0) \ + M(SettingMilliseconds, rabbitmq_max_wait_ms, 5000, "The wait time for reading from RabbitMQ before retry.", 0) \ M(SettingUInt64, poll_interval, DBMS_DEFAULT_POLL_INTERVAL, "Block at the query wait loop on the server for the specified number of seconds.", 0) \ M(SettingUInt64, idle_connection_timeout, 3600, "Close idle TCP connections after specified number of seconds.", 0) \ M(SettingUInt64, distributed_connections_pool_size, DBMS_DEFAULT_DISTRIBUTED_CONNECTIONS_POOL_SIZE, "Maximum number of connections with one remote server in the pool.", 0) \ @@ -335,6 +336,7 @@ struct Settings : public SettingsCollection M(SettingBool, enable_unaligned_array_join, false, "Allow ARRAY JOIN with multiple arrays that have different sizes. When this settings is enabled, arrays will be resized to the longest one.", 0) \ M(SettingBool, optimize_read_in_order, true, "Enable ORDER BY optimization for reading data in corresponding order in MergeTree tables.", 0) \ M(SettingBool, optimize_aggregation_in_order, false, "Enable GROUP BY optimization for aggregating data in corresponding order in MergeTree tables.", 0) \ + M(SettingUInt64, read_in_order_two_level_merge_threshold, 100, "Minimal number of parts to read to run preliminary merge step during multithread reading in order of primary key.", 0) \ M(SettingBool, low_cardinality_allow_in_native_format, true, "Use LowCardinality type in Native format. Otherwise, convert LowCardinality columns to ordinary for select query, and convert ordinary columns to required LowCardinality for insert query.", 0) \ M(SettingBool, cancel_http_readonly_queries_on_client_close, false, "Cancel HTTP readonly queries when a client closes the connection without waiting for response.", 0) \ M(SettingBool, external_table_functions_use_nulls, true, "If it is set to true, external table functions will implicitly use Nullable type if needed. Otherwise NULLs will be substituted with default values. Currently supported only by 'mysql' and 'odbc' table functions.", 0) \ diff --git a/src/Core/config_core.h.in b/src/Core/config_core.h.in index 620c23c21cc..5991c12a1f2 100644 --- a/src/Core/config_core.h.in +++ b/src/Core/config_core.h.in @@ -5,6 +5,7 @@ #cmakedefine01 USE_ICU #cmakedefine01 USE_MYSQL #cmakedefine01 USE_RDKAFKA +#cmakedefine01 USE_AMQPCPP #cmakedefine01 USE_EMBEDDED_COMPILER #cmakedefine01 USE_INTERNAL_LLVM_LIBRARY #cmakedefine01 USE_SSL diff --git a/src/DataStreams/BlockIO.h b/src/DataStreams/BlockIO.h index d4733e6aebe..91d7efac8d1 100644 --- a/src/DataStreams/BlockIO.h +++ b/src/DataStreams/BlockIO.h @@ -31,8 +31,8 @@ struct BlockIO QueryPipeline pipeline; /// Callbacks for query logging could be set here. - std::function finish_callback; - std::function exception_callback; + std::function finish_callback; + std::function exception_callback; /// When it is true, don't bother sending any non-empty blocks to the out stream bool null_format = false; @@ -41,7 +41,13 @@ struct BlockIO void onFinish() { if (finish_callback) - finish_callback(in.get(), out.get()); + { + QueryPipeline * pipeline_ptr = nullptr; + if (pipeline.initialized()) + pipeline_ptr = &pipeline; + + finish_callback(in.get(), out.get(), pipeline_ptr); + } } void onException() diff --git a/src/DataStreams/CheckConstraintsBlockOutputStream.cpp b/src/DataStreams/CheckConstraintsBlockOutputStream.cpp index 8e075e5bf08..7a67074dbdf 100644 --- a/src/DataStreams/CheckConstraintsBlockOutputStream.cpp +++ b/src/DataStreams/CheckConstraintsBlockOutputStream.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -14,6 +15,7 @@ namespace DB namespace ErrorCodes { extern const int VIOLATED_CONSTRAINT; + extern const int LOGICAL_ERROR; } @@ -40,46 +42,72 @@ void CheckConstraintsBlockOutputStream::write(const Block & block) for (size_t i = 0; i < expressions.size(); ++i) { auto constraint_expr = expressions[i]; - constraint_expr->execute(block_to_calculate); + + auto * constraint_ptr = constraints.constraints[i]->as(); + ColumnWithTypeAndName res_column = block_to_calculate.getByPosition(block_to_calculate.columns() - 1); - const ColumnUInt8 & res_column_uint8 = assert_cast(*res_column.column); - const UInt8 * data = res_column_uint8.getData().data(); - size_t size = res_column_uint8.size(); + if (!isUInt8(res_column.type)) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Constraint {} does not return a value of type UInt8", + backQuote(constraint_ptr->name)); - /// Is violated. - if (!memoryIsByte(data, size, 1)) + if (const ColumnConst * res_const = typeid_cast(res_column.column.get())) { - size_t row_idx = 0; - for (; row_idx < size; ++row_idx) - if (data[row_idx] != 1) - break; + UInt8 value = res_const->getValue(); - Names related_columns = constraint_expr->getRequiredColumns(); - - std::stringstream exception_message; - - auto * constraint_ptr = constraints.constraints[i]->as(); - exception_message << "Constraint " << backQuote(constraint_ptr->name) - << " for table " << table_id.getNameForLogs() - << " is violated at row " << (rows_written + row_idx + 1) - << ". Expression: (" << serializeAST(*(constraint_ptr->expr), true) << ")" - << ". Column values"; - - bool first = true; - for (const auto & name : related_columns) + /// Is violated. + if (!value) { - const IColumn & column = *block.getByName(name).column; - assert(row_idx < column.size()); + std::stringstream exception_message; - exception_message << (first ? ": " : ", ") - << backQuoteIfNeed(name) << " = " << applyVisitor(FieldVisitorToString(), column[row_idx]); + exception_message << "Constraint " << backQuote(constraint_ptr->name) + << " for table " << table_id.getNameForLogs() + << " is violated, because it is a constant expression returning 0." + << " It is most likely an error in table definition."; - first = false; + throw Exception{exception_message.str(), ErrorCodes::VIOLATED_CONSTRAINT}; } + } + else + { + const ColumnUInt8 & res_column_uint8 = assert_cast(*res_column.column); - throw Exception{exception_message.str(), ErrorCodes::VIOLATED_CONSTRAINT}; + const UInt8 * data = res_column_uint8.getData().data(); + size_t size = res_column_uint8.size(); + + /// Is violated. + if (!memoryIsByte(data, size, 1)) + { + size_t row_idx = 0; + for (; row_idx < size; ++row_idx) + if (data[row_idx] != 1) + break; + + Names related_columns = constraint_expr->getRequiredColumns(); + + std::stringstream exception_message; + + exception_message << "Constraint " << backQuote(constraint_ptr->name) + << " for table " << table_id.getNameForLogs() + << " is violated at row " << (rows_written + row_idx + 1) + << ". Expression: (" << serializeAST(*(constraint_ptr->expr), true) << ")" + << ". Column values"; + + bool first = true; + for (const auto & name : related_columns) + { + const IColumn & column = *block.getByName(name).column; + assert(row_idx < column.size()); + + exception_message << (first ? ": " : ", ") + << backQuoteIfNeed(name) << " = " << applyVisitor(FieldVisitorToString(), column[row_idx]); + + first = false; + } + + throw Exception{exception_message.str(), ErrorCodes::VIOLATED_CONSTRAINT}; + } } } } diff --git a/src/DataTypes/DataTypeNullable.h b/src/DataTypes/DataTypeNullable.h index 1766b399c2a..22d403da6c4 100644 --- a/src/DataTypes/DataTypeNullable.h +++ b/src/DataTypes/DataTypeNullable.h @@ -90,7 +90,7 @@ public: bool canBeComparedWithCollation() const override { return nested_data_type->canBeComparedWithCollation(); } bool canBeUsedAsVersion() const override { return false; } bool isSummable() const override { return nested_data_type->isSummable(); } - bool canBeUsedInBooleanContext() const override { return nested_data_type->canBeUsedInBooleanContext(); } + bool canBeUsedInBooleanContext() const override { return nested_data_type->canBeUsedInBooleanContext() || onlyNull(); } bool haveMaximumSizeOfValue() const override { return nested_data_type->haveMaximumSizeOfValue(); } size_t getMaximumSizeOfValueInMemory() const override { return 1 + nested_data_type->getMaximumSizeOfValueInMemory(); } bool isNullable() const override { return true; } diff --git a/src/Databases/DatabaseDictionary.cpp b/src/Databases/DatabaseDictionary.cpp index 6c5433cab38..986e36de8cf 100644 --- a/src/Databases/DatabaseDictionary.cpp +++ b/src/Databases/DatabaseDictionary.cpp @@ -28,7 +28,11 @@ namespace if (!load_result.config) return nullptr; DictionaryStructure dictionary_structure = ExternalDictionariesLoader::getDictionaryStructure(*load_result.config); - return StorageDictionary::create(StorageID(database_name, load_result.name), load_result.name, dictionary_structure); + return StorageDictionary::create( + StorageID(database_name, load_result.name), + load_result.name, + dictionary_structure, + StorageDictionary::Location::DictionaryDatabase); } catch (Exception & e) { diff --git a/src/Databases/DatabaseOnDisk.cpp b/src/Databases/DatabaseOnDisk.cpp index 9191633e553..43846ff6d64 100644 --- a/src/Databases/DatabaseOnDisk.cpp +++ b/src/Databases/DatabaseOnDisk.cpp @@ -389,6 +389,9 @@ void DatabaseOnDisk::iterateMetadataFiles(const Context & context, const Iterati } }; + /// Metadata files to load: name and flag for .tmp_drop files + std::set> metadata_files; + Poco::DirectoryIterator dir_end; for (Poco::DirectoryIterator dir_it(getMetadataPath()); dir_it != dir_end; ++dir_it) { @@ -404,7 +407,7 @@ void DatabaseOnDisk::iterateMetadataFiles(const Context & context, const Iterati if (endsWith(dir_it.name(), tmp_drop_ext)) { /// There are files that we tried to delete previously - process_tmp_drop_metadata_file(dir_it.name()); + metadata_files.emplace(dir_it.name(), false); } else if (endsWith(dir_it.name(), ".sql.tmp")) { @@ -415,12 +418,26 @@ void DatabaseOnDisk::iterateMetadataFiles(const Context & context, const Iterati else if (endsWith(dir_it.name(), ".sql")) { /// The required files have names like `table_name.sql` - process_metadata_file(dir_it.name()); + metadata_files.emplace(dir_it.name(), true); } else throw Exception("Incorrect file extension: " + dir_it.name() + " in metadata directory " + getMetadataPath(), ErrorCodes::INCORRECT_FILE_NAME); } + + /// Read and parse metadata in parallel + ThreadPool pool(SettingMaxThreads().getAutoValue()); + for (const auto & file : metadata_files) + { + pool.scheduleOrThrowOnError([&]() + { + if (file.second) + process_metadata_file(file.first); + else + process_tmp_drop_metadata_file(file.first); + }); + } + pool.wait(); } ASTPtr DatabaseOnDisk::parseQueryFromMetadata(Poco::Logger * logger, const Context & context, const String & metadata_file_path, bool throw_on_error /*= true*/, bool remove_empty /*= false*/) diff --git a/src/Databases/DatabaseOrdinary.cpp b/src/Databases/DatabaseOrdinary.cpp index 9194558dffb..9e7d2b52199 100644 --- a/src/Databases/DatabaseOrdinary.cpp +++ b/src/Databases/DatabaseOrdinary.cpp @@ -112,11 +112,12 @@ void DatabaseOrdinary::loadStoredObjects(Context & context, bool has_force_resto * which does not correspond to order tables creation and does not correspond to order of their location on disk. */ using FileNames = std::map; + std::mutex file_names_mutex; FileNames file_names; size_t total_dictionaries = 0; - auto process_metadata = [&context, &file_names, &total_dictionaries, this](const String & file_name) + auto process_metadata = [&context, &file_names, &total_dictionaries, &file_names_mutex, this](const String & file_name) { fs::path path(getMetadataPath()); fs::path file_path(file_name); @@ -128,6 +129,7 @@ void DatabaseOrdinary::loadStoredObjects(Context & context, bool has_force_resto if (ast) { auto * create_query = ast->as(); + std::lock_guard lock{file_names_mutex}; file_names[file_name] = ast; total_dictionaries += create_query->is_dictionary; } diff --git a/src/Databases/DatabaseWithDictionaries.cpp b/src/Databases/DatabaseWithDictionaries.cpp index e0f2aa9286b..9be7e4d8b3e 100644 --- a/src/Databases/DatabaseWithDictionaries.cpp +++ b/src/Databases/DatabaseWithDictionaries.cpp @@ -49,7 +49,8 @@ void DatabaseWithDictionaries::attachDictionary(const String & dictionary_name, StorageDictionary::create( StorageID(getDatabaseName(), dictionary_name), full_name, - ExternalDictionariesLoader::getDictionaryStructure(*attach_info.config)), + ExternalDictionariesLoader::getDictionaryStructure(*attach_info.config), + StorageDictionary::Location::SameDatabaseAndNameAsDictionary), lock); } catch (...) diff --git a/src/Dictionaries/CassandraDictionarySource.cpp b/src/Dictionaries/CassandraDictionarySource.cpp index c41f528db91..5c7fd4f50fd 100644 --- a/src/Dictionaries/CassandraDictionarySource.cpp +++ b/src/Dictionaries/CassandraDictionarySource.cpp @@ -102,7 +102,7 @@ CassandraDictionarySource::CassandraDictionarySource( , dict_struct(dict_struct_) , settings(settings_) , sample_block(sample_block_) - , query_builder(dict_struct, settings.db, settings.table, settings.where, IdentifierQuotingStyle::DoubleQuotes) + , query_builder(dict_struct, settings.db, "", settings.table, settings.where, IdentifierQuotingStyle::DoubleQuotes) { cassandraCheck(cass_cluster_set_contact_points(cluster, settings.host.c_str())); if (settings.port) diff --git a/src/Dictionaries/ClickHouseDictionarySource.cpp b/src/Dictionaries/ClickHouseDictionarySource.cpp index 9d3f6063a21..180750d143a 100644 --- a/src/Dictionaries/ClickHouseDictionarySource.cpp +++ b/src/Dictionaries/ClickHouseDictionarySource.cpp @@ -66,7 +66,7 @@ ClickHouseDictionarySource::ClickHouseDictionarySource( , where{config.getString(config_prefix + ".where", "")} , update_field{config.getString(config_prefix + ".update_field", "")} , invalidate_query{config.getString(config_prefix + ".invalidate_query", "")} - , query_builder{dict_struct, db, table, where, IdentifierQuotingStyle::Backticks} + , query_builder{dict_struct, db, "", table, where, IdentifierQuotingStyle::Backticks} , sample_block{sample_block_} , context(context_) , is_local{isLocalAddress({host, port}, secure ? context.getTCPPortSecure().value_or(0) : context.getTCPPort())} @@ -97,7 +97,7 @@ ClickHouseDictionarySource::ClickHouseDictionarySource(const ClickHouseDictionar , update_field{other.update_field} , invalidate_query{other.invalidate_query} , invalidate_query_response{other.invalidate_query_response} - , query_builder{dict_struct, db, table, where, IdentifierQuotingStyle::Backticks} + , query_builder{dict_struct, db, "", table, where, IdentifierQuotingStyle::Backticks} , sample_block{other.sample_block} , context(other.context) , is_local{other.is_local} diff --git a/src/Dictionaries/ExternalQueryBuilder.cpp b/src/Dictionaries/ExternalQueryBuilder.cpp index e64f04d28f2..0cf7e28eb29 100644 --- a/src/Dictionaries/ExternalQueryBuilder.cpp +++ b/src/Dictionaries/ExternalQueryBuilder.cpp @@ -19,22 +19,12 @@ namespace ErrorCodes ExternalQueryBuilder::ExternalQueryBuilder( const DictionaryStructure & dict_struct_, const std::string & db_, + const std::string & schema_, const std::string & table_, const std::string & where_, IdentifierQuotingStyle quoting_style_) - : dict_struct(dict_struct_), db(db_), where(where_), quoting_style(quoting_style_) -{ - if (auto pos = table_.find('.'); pos != std::string::npos) - { - schema = table_.substr(0, pos); - table = table_.substr(pos + 1); - } - else - { - schema = ""; - table = table_; - } -} + : dict_struct(dict_struct_), db(db_), schema(schema_), table(table_), where(where_), quoting_style(quoting_style_) +{} void ExternalQueryBuilder::writeQuoted(const std::string & s, WriteBuffer & out) const diff --git a/src/Dictionaries/ExternalQueryBuilder.h b/src/Dictionaries/ExternalQueryBuilder.h index 3011efbc895..4c0e876b5db 100644 --- a/src/Dictionaries/ExternalQueryBuilder.h +++ b/src/Dictionaries/ExternalQueryBuilder.h @@ -18,8 +18,8 @@ struct ExternalQueryBuilder { const DictionaryStructure & dict_struct; std::string db; - std::string table; std::string schema; + std::string table; const std::string & where; IdentifierQuotingStyle quoting_style; @@ -28,6 +28,7 @@ struct ExternalQueryBuilder ExternalQueryBuilder( const DictionaryStructure & dict_struct_, const std::string & db_, + const std::string & schema_, const std::string & table_, const std::string & where_, IdentifierQuotingStyle quoting_style_); diff --git a/src/Dictionaries/MySQLDictionarySource.cpp b/src/Dictionaries/MySQLDictionarySource.cpp index 505ce7b0c12..f016f2bf537 100644 --- a/src/Dictionaries/MySQLDictionarySource.cpp +++ b/src/Dictionaries/MySQLDictionarySource.cpp @@ -68,7 +68,7 @@ MySQLDictionarySource::MySQLDictionarySource( , dont_check_update_time{config.getBool(config_prefix + ".dont_check_update_time", false)} , sample_block{sample_block_} , pool{mysqlxx::PoolFactory::instance().get(config, config_prefix)} - , query_builder{dict_struct, db, table, where, IdentifierQuotingStyle::Backticks} + , query_builder{dict_struct, db, "", table, where, IdentifierQuotingStyle::Backticks} , load_all_query{query_builder.composeLoadAllQuery()} , invalidate_query{config.getString(config_prefix + ".invalidate_query", "")} , close_connection{config.getBool(config_prefix + ".close_connection", false) || config.getBool(config_prefix + ".share_connection", false)} @@ -87,7 +87,7 @@ MySQLDictionarySource::MySQLDictionarySource(const MySQLDictionarySource & other , dont_check_update_time{other.dont_check_update_time} , sample_block{other.sample_block} , pool{other.pool} - , query_builder{dict_struct, db, table, where, IdentifierQuotingStyle::Backticks} + , query_builder{dict_struct, db, "", table, where, IdentifierQuotingStyle::Backticks} , load_all_query{other.load_all_query} , last_modification{other.last_modification} , invalidate_query{other.invalidate_query} diff --git a/src/Dictionaries/XDBCDictionarySource.cpp b/src/Dictionaries/XDBCDictionarySource.cpp index 92af20e646b..b3393d55e5d 100644 --- a/src/Dictionaries/XDBCDictionarySource.cpp +++ b/src/Dictionaries/XDBCDictionarySource.cpp @@ -27,6 +27,7 @@ namespace DB namespace ErrorCodes { extern const int SUPPORT_IS_DISABLED; + extern const int ILLEGAL_TYPE_OF_ARGUMENT; } namespace @@ -60,6 +61,39 @@ namespace std::unique_ptr read_buf; BlockInputStreamPtr reader; }; + + + ExternalQueryBuilder makeExternalQueryBuilder(const DictionaryStructure & dict_struct_, + const std::string & db_, + const std::string & schema_, + const std::string & table_, + const std::string & where_, + IXDBCBridgeHelper & bridge_) + { + std::string schema = schema_; + std::string table = table_; + + if (bridge_.isSchemaAllowed()) + { + if (schema.empty()) + { + if (auto pos = table.find('.'); pos != std::string::npos) + { + schema = table.substr(0, pos); + table = table.substr(pos + 1); + } + } + } + else + { + if (!schema.empty()) + throw Exception{"Dictionary source of type " + bridge_.getName() + " specifies a schema but schema is not supported by " + + bridge_.getName() + "-driver", + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; + } + + return {dict_struct_, db_, schema, table, where_, bridge_.getIdentifierQuotingStyle()}; + } } static const UInt64 max_block_size = 8192; @@ -76,11 +110,12 @@ XDBCDictionarySource::XDBCDictionarySource( , update_time{std::chrono::system_clock::from_time_t(0)} , dict_struct{dict_struct_} , db{config_.getString(config_prefix_ + ".db", "")} + , schema{config_.getString(config_prefix_ + ".schema", "")} , table{config_.getString(config_prefix_ + ".table")} , where{config_.getString(config_prefix_ + ".where", "")} , update_field{config_.getString(config_prefix_ + ".update_field", "")} , sample_block{sample_block_} - , query_builder{dict_struct, db, table, where, bridge_->getIdentifierQuotingStyle()} + , query_builder{makeExternalQueryBuilder(dict_struct, db, schema, table, where, *bridge_)} , load_all_query{query_builder.composeLoadAllQuery()} , invalidate_query{config_.getString(config_prefix_ + ".invalidate_query", "")} , bridge_helper{bridge_} @@ -104,7 +139,7 @@ XDBCDictionarySource::XDBCDictionarySource(const XDBCDictionarySource & other) , where{other.where} , update_field{other.update_field} , sample_block{other.sample_block} - , query_builder{dict_struct, db, table, where, other.bridge_helper->getIdentifierQuotingStyle()} + , query_builder{other.query_builder} , load_all_query{other.load_all_query} , invalidate_query{other.invalidate_query} , invalidate_query_response{other.invalidate_query_response} diff --git a/src/Dictionaries/XDBCDictionarySource.h b/src/Dictionaries/XDBCDictionarySource.h index 253f802d8fd..87bc42c76ab 100644 --- a/src/Dictionaries/XDBCDictionarySource.h +++ b/src/Dictionaries/XDBCDictionarySource.h @@ -69,6 +69,7 @@ private: std::chrono::time_point update_time; const DictionaryStructure dict_struct; const std::string db; + const std::string schema; const std::string table; const std::string where; const std::string update_field; diff --git a/src/Disks/DiskLocal.h b/src/Disks/DiskLocal.h index 61a3994b655..3dab4614d5d 100644 --- a/src/Disks/DiskLocal.h +++ b/src/Disks/DiskLocal.h @@ -99,6 +99,8 @@ public: void createHardLink(const String & src_path, const String & dst_path) override; + const String getType() const override { return "local"; } + private: bool tryReserve(UInt64 bytes); diff --git a/src/Disks/DiskMemory.h b/src/Disks/DiskMemory.h index b0c1d30c61d..f7948019fe8 100644 --- a/src/Disks/DiskMemory.h +++ b/src/Disks/DiskMemory.h @@ -90,6 +90,8 @@ public: void createHardLink(const String & src_path, const String & dst_path) override; + const String getType() const override { return "memory"; } + private: void createDirectoriesImpl(const String & path); void replaceFileImpl(const String & from_path, const String & to_path); diff --git a/src/Disks/IDisk.h b/src/Disks/IDisk.h index 011c75402f4..77a52a7a5d6 100644 --- a/src/Disks/IDisk.h +++ b/src/Disks/IDisk.h @@ -171,6 +171,9 @@ public: /// Create hardlink from `src_path` to `dst_path`. virtual void createHardLink(const String & src_path, const String & dst_path) = 0; + + /// Return disk type - "local", "s3", etc. + virtual const String getType() const = 0; }; using DiskPtr = std::shared_ptr; diff --git a/src/Disks/S3/DiskS3.cpp b/src/Disks/S3/DiskS3.cpp index 71b5991f770..873b54353ad 100644 --- a/src/Disks/S3/DiskS3.cpp +++ b/src/Disks/S3/DiskS3.cpp @@ -20,6 +20,9 @@ #include #include +#include + + namespace DB { @@ -60,10 +63,14 @@ namespace struct Metadata { /// Metadata file version. - static constexpr UInt32 VERSION = 1; + static constexpr UInt32 VERSION_ABSOLUTE_PATHS = 1; + static constexpr UInt32 VERSION_RELATIVE_PATHS = 2; using PathAndSize = std::pair; + /// S3 root path. + const String & s3_root_path; + /// Disk path. const String & disk_path; /// Relative path to metadata file on local FS. @@ -76,8 +83,8 @@ namespace UInt32 ref_count; /// Load metadata by path or create empty if `create` flag is set. - explicit Metadata(const String & disk_path_, const String & metadata_file_path_, bool create = false) - : disk_path(disk_path_), metadata_file_path(metadata_file_path_), total_size(0), s3_objects(0), ref_count(0) + explicit Metadata(const String & s3_root_path_, const String & disk_path_, const String & metadata_file_path_, bool create = false) + : s3_root_path(s3_root_path_), disk_path(disk_path_), metadata_file_path(metadata_file_path_), total_size(0), s3_objects(0), ref_count(0) { if (create) return; @@ -87,10 +94,10 @@ namespace UInt32 version; readIntText(version, buf); - if (version != VERSION) + if (version != VERSION_RELATIVE_PATHS && version != VERSION_ABSOLUTE_PATHS) throw Exception( "Unknown metadata file version. Path: " + disk_path + metadata_file_path - + " Version: " + std::to_string(version) + ", Expected version: " + std::to_string(VERSION), + + " Version: " + std::to_string(version) + ", Maximum expected version: " + std::to_string(VERSION_RELATIVE_PATHS), ErrorCodes::UNKNOWN_FORMAT); assertChar('\n', buf); @@ -108,6 +115,15 @@ namespace readIntText(s3_object_size, buf); assertChar('\t', buf); readEscapedString(s3_object_path, buf); + if (version == VERSION_ABSOLUTE_PATHS) + { + if (!boost::algorithm::starts_with(s3_object_path, s3_root_path)) + throw Exception( + "Path in metadata does not correspond S3 root path. Path: " + s3_object_path + + ", root path: " + s3_root_path + ", disk path: " + disk_path_, + ErrorCodes::UNKNOWN_FORMAT); + s3_object_path = s3_object_path.substr(s3_root_path.size()); + } assertChar('\n', buf); s3_objects[i] = {s3_object_path, s3_object_size}; } @@ -127,7 +143,7 @@ namespace { WriteBufferFromFile buf(disk_path + metadata_file_path, 1024); - writeIntText(VERSION, buf); + writeIntText(VERSION_RELATIVE_PATHS, buf); writeChar('\n', buf); writeIntText(s3_objects.size(), buf); @@ -213,7 +229,7 @@ namespace const auto & [path, size] = metadata.s3_objects[i]; if (size > offset) { - auto buf = std::make_unique(client_ptr, bucket, path, buf_size); + auto buf = std::make_unique(client_ptr, bucket, metadata.s3_root_path + path, buf_size); buf->seek(offset, SEEK_SET); return buf; } @@ -242,7 +258,7 @@ namespace ++current_buf_idx; const auto & path = metadata.s3_objects[current_buf_idx].first; - current_buf = std::make_unique(client_ptr, bucket, path, buf_size); + current_buf = std::make_unique(client_ptr, bucket, metadata.s3_root_path + path, buf_size); current_buf->next(); working_buffer = current_buf->buffer(); absolute_position += working_buffer.size(); @@ -272,7 +288,7 @@ namespace size_t min_upload_part_size, size_t buf_size_) : WriteBufferFromFileBase(buf_size_, nullptr, 0) - , impl(WriteBufferFromS3(client_ptr_, bucket_, s3_path_, min_upload_part_size, buf_size_)) + , impl(WriteBufferFromS3(client_ptr_, bucket_, metadata_.s3_root_path + s3_path_, min_upload_part_size, buf_size_)) , metadata(std::move(metadata_)) , s3_path(s3_path_) { @@ -440,7 +456,7 @@ bool DiskS3::isDirectory(const String & path) const size_t DiskS3::getFileSize(const String & path) const { - Metadata metadata(metadata_path, path); + Metadata metadata(s3_root_path, metadata_path, path); return metadata.total_size; } @@ -493,16 +509,16 @@ void DiskS3::copyFile(const String & from_path, const String & to_path) if (exists(to_path)) remove(to_path); - Metadata from(metadata_path, from_path); - Metadata to(metadata_path, to_path, true); + Metadata from(s3_root_path, metadata_path, from_path); + Metadata to(s3_root_path, metadata_path, to_path, true); for (const auto & [path, size] : from.s3_objects) { - auto new_path = s3_root_path + getRandomName(); + auto new_path = getRandomName(); Aws::S3::Model::CopyObjectRequest req; - req.SetCopySource(bucket + "/" + path); + req.SetCopySource(bucket + "/" + s3_root_path + path); req.SetBucket(bucket); - req.SetKey(new_path); + req.SetKey(s3_root_path + new_path); throwIfError(client->CopyObject(req)); to.addObject(new_path, size); @@ -513,7 +529,7 @@ void DiskS3::copyFile(const String & from_path, const String & to_path) std::unique_ptr DiskS3::readFile(const String & path, size_t buf_size, size_t, size_t, size_t) const { - Metadata metadata(metadata_path, path); + Metadata metadata(s3_root_path, metadata_path, path); LOG_DEBUG(&Poco::Logger::get("DiskS3"), "Read from file by path: {}. Existing S3 objects: {}", backQuote(metadata_path + path), metadata.s3_objects.size()); @@ -525,27 +541,27 @@ std::unique_ptr DiskS3::writeFile(const String & path, { bool exist = exists(path); /// Path to store new S3 object. - auto s3_path = s3_root_path + getRandomName(); + auto s3_path = getRandomName(); if (!exist || mode == WriteMode::Rewrite) { /// If metadata file exists - remove and create new. if (exist) remove(path); - Metadata metadata(metadata_path, path, true); + Metadata metadata(s3_root_path, metadata_path, path, true); /// Save empty metadata to disk to have ability to get file size while buffer is not finalized. metadata.save(); - LOG_DEBUG(&Poco::Logger::get("DiskS3"), "Write to file by path: {} New S3 path: {}", backQuote(metadata_path + path), s3_path); + LOG_DEBUG(&Poco::Logger::get("DiskS3"), "Write to file by path: {} New S3 path: {}", backQuote(metadata_path + path), s3_root_path + s3_path); return std::make_unique(client, bucket, metadata, s3_path, min_upload_part_size, buf_size); } else { - Metadata metadata(metadata_path, path); + Metadata metadata(s3_root_path, metadata_path, path); LOG_DEBUG(&Poco::Logger::get("DiskS3"), "Append to file by path: {}. New S3 path: {}. Existing S3 objects: {}.", - backQuote(metadata_path + path), s3_path, metadata.s3_objects.size()); + backQuote(metadata_path + path), s3_root_path + s3_path, metadata.s3_objects.size()); return std::make_unique(client, bucket, metadata, s3_path, min_upload_part_size, buf_size); } @@ -558,7 +574,7 @@ void DiskS3::remove(const String & path) Poco::File file(metadata_path + path); if (file.isFile()) { - Metadata metadata(metadata_path, path); + Metadata metadata(s3_root_path, metadata_path, path); /// If there is no references - delete content from S3. if (metadata.ref_count == 0) @@ -569,7 +585,7 @@ void DiskS3::remove(const String & path) /// TODO: Make operation idempotent. Do not throw exception if key is already deleted. Aws::S3::Model::DeleteObjectRequest request; request.SetBucket(bucket); - request.SetKey(s3_object_path); + request.SetKey(s3_root_path + s3_object_path); throwIfError(client->DeleteObject(request)); } } @@ -644,7 +660,7 @@ Poco::Timestamp DiskS3::getLastModified(const String & path) void DiskS3::createHardLink(const String & src_path, const String & dst_path) { /// Increment number of references. - Metadata src(metadata_path, src_path); + Metadata src(s3_root_path, metadata_path, src_path); ++src.ref_count; src.save(); @@ -655,7 +671,7 @@ void DiskS3::createHardLink(const String & src_path, const String & dst_path) void DiskS3::createFile(const String & path) { /// Create empty metadata file. - Metadata metadata(metadata_path, path, true); + Metadata metadata(s3_root_path, metadata_path, path, true); metadata.save(); } diff --git a/src/Disks/S3/DiskS3.h b/src/Disks/S3/DiskS3.h index 5fa8e8358a6..82168c55bb5 100644 --- a/src/Disks/S3/DiskS3.h +++ b/src/Disks/S3/DiskS3.h @@ -96,6 +96,8 @@ public: void setReadOnly(const String & path) override; + const String getType() const override { return "s3"; } + private: bool tryReserve(UInt64 bytes); diff --git a/src/Disks/S3/ProxyListConfiguration.h b/src/Disks/S3/ProxyListConfiguration.h index a3fe83bfc49..14e23eb04d0 100644 --- a/src/Disks/S3/ProxyListConfiguration.h +++ b/src/Disks/S3/ProxyListConfiguration.h @@ -1,5 +1,7 @@ #pragma once +#include // for std::atomic + #include "ProxyConfiguration.h" namespace DB::S3 diff --git a/src/Disks/S3/registerDiskS3.cpp b/src/Disks/S3/registerDiskS3.cpp index 999a81bd413..119ba037c96 100644 --- a/src/Disks/S3/registerDiskS3.cpp +++ b/src/Disks/S3/registerDiskS3.cpp @@ -46,7 +46,8 @@ namespace throw Exception("Only HTTP/HTTPS schemas allowed in proxy resolver config: " + proxy_scheme, ErrorCodes::BAD_ARGUMENTS); auto proxy_port = proxy_resolver_config.getUInt(prefix + ".proxy_port"); - LOG_DEBUG(&Poco::Logger::get("DiskS3"), "Configured proxy resolver: {}, Scheme: {}, Port: {}", endpoint.toString(), proxy_scheme, proxy_port); + LOG_DEBUG(&Poco::Logger::get("DiskS3"), "Configured proxy resolver: {}, Scheme: {}, Port: {}", + endpoint.toString(), proxy_scheme, proxy_port); return std::make_shared(endpoint, proxy_scheme, proxy_port); } diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index c4abb265498..f09a13d51ba 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -362,6 +362,7 @@ FormatFactory::FormatFactory() #if !defined(ARCADIA_BUILD) registerInputFormatProcessorCapnProto(*this); registerInputFormatProcessorORC(*this); + registerOutputFormatProcessorORC(*this); registerInputFormatProcessorParquet(*this); registerOutputFormatProcessorParquet(*this); registerInputFormatProcessorArrow(*this); diff --git a/src/Formats/FormatFactory.h b/src/Formats/FormatFactory.h index 392165b4e45..d8a1ab55bd1 100644 --- a/src/Formats/FormatFactory.h +++ b/src/Formats/FormatFactory.h @@ -175,6 +175,9 @@ void registerInputFormatProcessorTemplate(FormatFactory & factory); void registerOutputFormatProcessorTemplate(FormatFactory & factory); void registerInputFormatProcessorMsgPack(FormatFactory & factory); void registerOutputFormatProcessorMsgPack(FormatFactory & factory); +void registerInputFormatProcessorORC(FormatFactory & factory); +void registerOutputFormatProcessorORC(FormatFactory & factory); + /// File Segmentation Engines for parallel reading @@ -207,6 +210,5 @@ void registerOutputFormatProcessorPostgreSQLWire(FormatFactory & factory); void registerInputFormatProcessorCapnProto(FormatFactory & factory); void registerInputFormatProcessorRegexp(FormatFactory & factory); void registerInputFormatProcessorJSONAsString(FormatFactory & factory); -void registerInputFormatProcessorORC(FormatFactory & factory); } diff --git a/src/Functions/FunctionsComparison.h b/src/Functions/FunctionsComparison.h index 1d4fb00cc60..577ea19fe8a 100644 --- a/src/Functions/FunctionsComparison.h +++ b/src/Functions/FunctionsComparison.h @@ -12,6 +12,7 @@ #include #include +#include #include #include #include @@ -931,6 +932,8 @@ private: if (0 == tuple_size) throw Exception("Comparison of zero-sized tuples is not implemented.", ErrorCodes::NOT_IMPLEMENTED); + ColumnsWithTypeAndName convolution_types(tuple_size); + Block tmp_block; for (size_t i = 0; i < tuple_size; ++i) { @@ -938,9 +941,10 @@ private: tmp_block.insert(y[i]); auto impl = func_compare->build({x[i], y[i]}); + convolution_types[i].type = impl->getReturnType(); /// Comparison of the elements. - tmp_block.insert({ nullptr, std::make_shared(), "" }); + tmp_block.insert({ nullptr, impl->getReturnType(), "" }); impl->execute(tmp_block, {i * 3, i * 3 + 1}, i * 3 + 2, input_rows_count); } @@ -952,14 +956,13 @@ private: } /// Logical convolution. - tmp_block.insert({ nullptr, std::make_shared(), "" }); ColumnNumbers convolution_args(tuple_size); for (size_t i = 0; i < tuple_size; ++i) convolution_args[i] = i * 3 + 2; - ColumnsWithTypeAndName convolution_types(convolution_args.size(), { nullptr, std::make_shared(), "" }); auto impl = func_convolution->build(convolution_types); + tmp_block.insert({ nullptr, impl->getReturnType(), "" }); impl->execute(tmp_block, convolution_args, tuple_size * 3, input_rows_count); block.getByPosition(result).column = tmp_block.getByPosition(tuple_size * 3).column; @@ -978,49 +981,71 @@ private: size_t tuple_size, size_t input_rows_count) { - ColumnsWithTypeAndName bin_args = {{ nullptr, std::make_shared(), "" }, - { nullptr, std::make_shared(), "" }}; - - auto func_and_adaptor = func_and->build(bin_args); - auto func_or_adaptor = func_or->build(bin_args); - Block tmp_block; /// Pairwise comparison of the inequality of all elements; on the equality of all elements except the last. + /// (x[i], y[i], x[i] < y[i], x[i] == y[i]) for (size_t i = 0; i < tuple_size; ++i) { tmp_block.insert(x[i]); tmp_block.insert(y[i]); - tmp_block.insert({ nullptr, std::make_shared(), "" }); + tmp_block.insert(ColumnWithTypeAndName()); // pos == i * 4 + 2 if (i + 1 != tuple_size) { auto impl_head = func_compare_head->build({x[i], y[i]}); + tmp_block.getByPosition(i * 4 + 2).type = impl_head->getReturnType(); impl_head->execute(tmp_block, {i * 4, i * 4 + 1}, i * 4 + 2, input_rows_count); - tmp_block.insert({ nullptr, std::make_shared(), "" }); + tmp_block.insert(ColumnWithTypeAndName()); // i * 4 + 3 auto impl_equals = func_equals->build({x[i], y[i]}); + tmp_block.getByPosition(i * 4 + 3).type = impl_equals->getReturnType(); impl_equals->execute(tmp_block, {i * 4, i * 4 + 1}, i * 4 + 3, input_rows_count); } else { auto impl_tail = func_compare_tail->build({x[i], y[i]}); + tmp_block.getByPosition(i * 4 + 2).type = impl_tail->getReturnType(); impl_tail->execute(tmp_block, {i * 4, i * 4 + 1}, i * 4 + 2, input_rows_count); } } /// Combination. Complex code - make a drawing. It can be replaced by a recursive comparison of tuples. + /// Last column contains intermediate result. + /// Code is generally equivalent to: + /// res = `x < y`[tuple_size - 1]; + /// for (int i = tuple_size - 2; i >= 0; --i) + /// res = (res && `x == y`[i]) || `x < y`[i]; size_t i = tuple_size - 1; while (i > 0) { - tmp_block.insert({ nullptr, std::make_shared(), "" }); - func_and_adaptor->execute(tmp_block, {tmp_block.columns() - 2, (i - 1) * 4 + 3}, tmp_block.columns() - 1, input_rows_count); - tmp_block.insert({ nullptr, std::make_shared(), "" }); - func_or_adaptor->execute(tmp_block, {tmp_block.columns() - 2, (i - 1) * 4 + 2}, tmp_block.columns() - 1, input_rows_count); --i; + + size_t and_lhs_pos = tmp_block.columns() - 1; // res + size_t and_rhs_pos = i * 4 + 3; // `x == y`[i] + tmp_block.insert(ColumnWithTypeAndName()); + + ColumnsWithTypeAndName and_args = {{ nullptr, tmp_block.getByPosition(and_lhs_pos).type, "" }, + { nullptr, tmp_block.getByPosition(and_rhs_pos).type, "" }}; + + auto func_and_adaptor = func_and->build(and_args); + tmp_block.getByPosition(tmp_block.columns() - 1).type = func_and_adaptor->getReturnType(); + func_and_adaptor->execute(tmp_block, {and_lhs_pos, and_rhs_pos}, tmp_block.columns() - 1, input_rows_count); + + size_t or_lhs_pos = tmp_block.columns() - 1; // (res && `x == y`[i]) + size_t or_rhs_pos = i * 4 + 2; // `x < y`[i] + tmp_block.insert(ColumnWithTypeAndName()); + + ColumnsWithTypeAndName or_args = {{ nullptr, tmp_block.getByPosition(or_lhs_pos).type, "" }, + { nullptr, tmp_block.getByPosition(or_rhs_pos).type, "" }}; + + auto func_or_adaptor = func_or->build(or_args); + tmp_block.getByPosition(tmp_block.columns() - 1).type = func_or_adaptor->getReturnType(); + func_or_adaptor->execute(tmp_block, {or_lhs_pos, or_rhs_pos}, tmp_block.columns() - 1, input_rows_count); + } block.getByPosition(result).column = tmp_block.getByPosition(tmp_block.columns() - 1).column; @@ -1109,13 +1134,20 @@ public: auto adaptor = FunctionOverloadResolverAdaptor(std::make_unique( FunctionComparison::create(context))); + bool has_nullable = false; + size_t size = left_tuple->getElements().size(); for (size_t i = 0; i < size; ++i) { ColumnsWithTypeAndName args = {{nullptr, left_tuple->getElements()[i], ""}, {nullptr, right_tuple->getElements()[i], ""}}; - adaptor.build(args); + has_nullable = has_nullable || adaptor.build(args)->getReturnType()->isNullable(); } + + /// If any element comparison is nullable, return type will also be nullable. + /// We useDefaultImplementationForNulls, but it doesn't work for tuples. + if (has_nullable) + return std::make_shared(std::make_shared()); } return std::make_shared(); @@ -1135,7 +1167,7 @@ public: /// NOTE: Nullable types are special case. /// (BTW, this function use default implementation for Nullable, so Nullable types cannot be here. Check just in case.) /// NOTE: We consider NaN comparison to be implementation specific (and in our implementation NaNs are sometimes equal sometimes not). - if (left_type->equals(*right_type) && !left_type->isNullable() && col_left_untyped == col_right_untyped) + if (left_type->equals(*right_type) && !left_type->isNullable() && !isTuple(left_type) && col_left_untyped == col_right_untyped) { /// Always true: =, <=, >= if constexpr (std::is_same_v, EqualsOp> diff --git a/src/Functions/FunctionsConversion.cpp b/src/Functions/FunctionsConversion.cpp index ef1e78baa16..da42c8a2623 100644 --- a/src/Functions/FunctionsConversion.cpp +++ b/src/Functions/FunctionsConversion.cpp @@ -78,6 +78,7 @@ void registerFunctionsConversion(FunctionFactory & factory) factory.registerFunction(); factory.registerFunction(); + factory.registerFunction(); factory.registerFunction(); factory.registerFunction(); factory.registerFunction(); diff --git a/src/Functions/FunctionsConversion.h b/src/Functions/FunctionsConversion.h index 0dee48709b9..932a8cd5bed 100644 --- a/src/Functions/FunctionsConversion.h +++ b/src/Functions/FunctionsConversion.h @@ -534,7 +534,8 @@ enum class ConvertFromStringExceptionMode enum class ConvertFromStringParsingMode { Normal, - BestEffort /// Only applicable for DateTime. Will use sophisticated method, that is slower. + BestEffort, /// Only applicable for DateTime. Will use sophisticated method, that is slower. + BestEffortUS }; template , NameToDecimal128OrNull, ConvertFromStringExceptionMode::Null>; struct NameParseDateTimeBestEffort { static constexpr auto name = "parseDateTimeBestEffort"; }; +struct NameParseDateTimeBestEffortUS { static constexpr auto name = "parseDateTimeBestEffortUS"; }; struct NameParseDateTimeBestEffortOrZero { static constexpr auto name = "parseDateTimeBestEffortOrZero"; }; struct NameParseDateTimeBestEffortOrNull { static constexpr auto name = "parseDateTimeBestEffortOrNull"; }; struct NameParseDateTime64BestEffort { static constexpr auto name = "parseDateTime64BestEffort"; }; @@ -1587,6 +1595,8 @@ struct NameParseDateTime64BestEffortOrNull { static constexpr auto name = "parse using FunctionParseDateTimeBestEffort = FunctionConvertFromString< DataTypeDateTime, NameParseDateTimeBestEffort, ConvertFromStringExceptionMode::Throw, ConvertFromStringParsingMode::BestEffort>; +using FunctionParseDateTimeBestEffortUS = FunctionConvertFromString< + DataTypeDateTime, NameParseDateTimeBestEffortUS, ConvertFromStringExceptionMode::Throw, ConvertFromStringParsingMode::BestEffortUS>; using FunctionParseDateTimeBestEffortOrZero = FunctionConvertFromString< DataTypeDateTime, NameParseDateTimeBestEffortOrZero, ConvertFromStringExceptionMode::Zero, ConvertFromStringParsingMode::BestEffort>; using FunctionParseDateTimeBestEffortOrNull = FunctionConvertFromString< diff --git a/src/Functions/GeoHash.cpp b/src/Functions/GeoHash.cpp index 7fdeb52b15c..3ebc6f3d0fc 100644 --- a/src/Functions/GeoHash.cpp +++ b/src/Functions/GeoHash.cpp @@ -115,6 +115,7 @@ inline Encoded merge(const Encoded & encodedLon, const Encoded & encodedLat, uin result.fill(0); const auto bits = (precision * BITS_PER_SYMBOL) / 2; + assert(bits < 255); uint8_t i = 0; for (; i < bits; ++i) { diff --git a/src/Functions/MatchImpl.h b/src/Functions/MatchImpl.h index a851fe3dd58..70c8419fcc9 100644 --- a/src/Functions/MatchImpl.h +++ b/src/Functions/MatchImpl.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include #include @@ -27,7 +28,7 @@ namespace ErrorCodes } -/// Is the LIKE expression reduced to finding a substring in a string? +/// Is the [I]LIKE expression reduced to finding a substring in a string? static inline bool likePatternIsStrstr(const String & pattern, String & res) { res = ""; @@ -67,17 +68,21 @@ static inline bool likePatternIsStrstr(const String & pattern, String & res) return true; } -/** 'like' - if true, treat pattern as SQL LIKE; if false - treat pattern as re2 regexp. +/** 'like' - if true, treat pattern as SQL LIKE or ILIKE; if false - treat pattern as re2 regexp. * NOTE: We want to run regexp search for whole block by one call (as implemented in function 'position') * but for that, regexp engine must support \0 bytes and their interpretation as string boundaries. */ -template +template struct MatchImpl { static constexpr bool use_default_implementation_for_constants = true; using ResultType = UInt8; + using Searcher = std::conditional_t; + static void vectorConstant( const ColumnString::Chars & data, const ColumnString::Offsets & offsets, const std::string & pattern, PaddedPODArray & res) { @@ -85,7 +90,8 @@ struct MatchImpl return; String strstr_pattern; - /// A simple case where the LIKE expression reduces to finding a substring in a string + + /// A simple case where the [I]LIKE expression reduces to finding a substring in a string if (like && likePatternIsStrstr(pattern, strstr_pattern)) { const UInt8 * begin = data.data(); @@ -96,7 +102,7 @@ struct MatchImpl size_t i = 0; /// TODO You need to make that `searcher` is common to all the calls of the function. - Volnitsky searcher(strstr_pattern.data(), strstr_pattern.size(), end - pos); + Searcher searcher(strstr_pattern.data(), strstr_pattern.size(), end - pos); /// We will search for the next occurrence in all rows at once. while (pos < end && end != (pos = searcher.search(pos, end - pos))) @@ -126,7 +132,10 @@ struct MatchImpl { size_t size = offsets.size(); - auto regexp = Regexps::get(pattern); + constexpr int flags = case_insensitive ? + Regexps::Regexp::RE_CASELESS : 0; + + auto regexp = Regexps::get(pattern, flags); std::string required_substring; bool is_trivial; @@ -170,7 +179,7 @@ struct MatchImpl /// The current index in the array of strings. size_t i = 0; - Volnitsky searcher(required_substring.data(), required_substring.size(), end - pos); + Searcher searcher(required_substring.data(), required_substring.size(), end - pos); /// We will search for the next occurrence in all rows at once. while (pos < end && end != (pos = searcher.search(pos, end - pos))) @@ -248,7 +257,7 @@ struct MatchImpl /// If pattern is larger than string size - it cannot be found. if (strstr_pattern.size() <= n) { - Volnitsky searcher(strstr_pattern.data(), strstr_pattern.size(), end - pos); + Searcher searcher(strstr_pattern.data(), strstr_pattern.size(), end - pos); /// We will search for the next occurrence in all rows at once. while (pos < end && end != (pos = searcher.search(pos, end - pos))) @@ -328,7 +337,7 @@ struct MatchImpl /// If required substring is larger than string size - it cannot be found. if (strstr_pattern.size() <= n) { - Volnitsky searcher(required_substring.data(), required_substring.size(), end - pos); + Searcher searcher(required_substring.data(), required_substring.size(), end - pos); /// We will search for the next occurrence in all rows at once. while (pos < end && end != (pos = searcher.search(pos, end - pos))) diff --git a/src/Functions/PolygonUtils.h b/src/Functions/PolygonUtils.h index 6e1b03a47bd..d2843e3ec6a 100644 --- a/src/Functions/PolygonUtils.h +++ b/src/Functions/PolygonUtils.h @@ -315,7 +315,7 @@ void PointInPolygonWithGrid::buildGrid() if (has_empty_bound) return; - cells.assign(grid_size * grid_size, {}); + cells.assign(size_t(grid_size) * grid_size, {}); const Point & min_corner = box.min_corner(); diff --git a/src/Functions/Regexps.h b/src/Functions/Regexps.h index a9b1b336c79..cbfbbf7107d 100644 --- a/src/Functions/Regexps.h +++ b/src/Functions/Regexps.h @@ -59,19 +59,20 @@ namespace Regexps * In destructor, it returns the object back to the Pool for further reuse. */ template - inline Pool::Pointer get(const std::string & pattern) + inline Pool::Pointer get(const std::string & pattern, int flags = 0) { /// C++11 has thread-safe function-local statics on most modern compilers. static Pool known_regexps; /// Different variables for different pattern parameters. - return known_regexps.get(pattern, [&pattern] + return known_regexps.get(pattern, [flags, &pattern] { - int flags = OptimizedRegularExpression::RE_DOT_NL; + int flags_final = flags | OptimizedRegularExpression::RE_DOT_NL; + if (no_capture) - flags |= OptimizedRegularExpression::RE_NO_CAPTURE; + flags_final |= OptimizedRegularExpression::RE_NO_CAPTURE; ProfileEvents::increment(ProfileEvents::RegexpCreated); - return new Regexp{createRegexp(pattern, flags)}; + return new Regexp{createRegexp(pattern, flags_final)}; }); } } diff --git a/src/Functions/array/FunctionArrayMapped.h b/src/Functions/array/FunctionArrayMapped.h index 346f2e35555..2a040f80efe 100644 --- a/src/Functions/array/FunctionArrayMapped.h +++ b/src/Functions/array/FunctionArrayMapped.h @@ -73,7 +73,7 @@ public: if (!array_type) throw Exception("Argument " + toString(i + 2) + " of function " + getName() + " must be array. Found " + arguments[i + 1]->getName() + " instead.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - nested_types[i] = removeLowCardinality(array_type->getNestedType()); + nested_types[i] = recursiveRemoveLowCardinality(array_type->getNestedType()); } const DataTypeFunction * function_type = checkAndGetDataType(arguments[0].get()); @@ -190,9 +190,7 @@ public: const ColumnConst * column_const_array = checkAndGetColumnConst(column_array_ptr.get()); if (!column_const_array) throw Exception("Expected array column, found " + column_array_ptr->getName(), ErrorCodes::ILLEGAL_COLUMN); - column_array_ptr = column_const_array->convertToFullColumn(); - if (column_array_ptr->lowCardinality()) - column_array_ptr = column_array_ptr->convertToFullColumnIfLowCardinality(); + column_array_ptr = recursiveRemoveLowCardinality(column_const_array->convertToFullColumn()); column_array = checkAndGetColumn(column_array_ptr.get()); } @@ -218,7 +216,7 @@ public: } arrays.emplace_back(ColumnWithTypeAndName(column_array->getDataPtr(), - removeLowCardinality(array_type->getNestedType()), + recursiveRemoveLowCardinality(array_type->getNestedType()), array_with_type_and_name.name)); } diff --git a/src/Functions/array/arraySum.cpp b/src/Functions/array/arraySum.cpp index 1c9a4853a16..1aedcb6ef92 100644 --- a/src/Functions/array/arraySum.cpp +++ b/src/Functions/array/arraySum.cpp @@ -51,6 +51,7 @@ struct ArraySumImpl const ColVecType * column = checkAndGetColumn(&*mapped); + /// Constant case. if (!column) { const ColumnConst * column_const = checkAndGetColumnConst(&*mapped); @@ -58,7 +59,7 @@ struct ArraySumImpl if (!column_const) return false; - const Element x = column_const->template getValue(); + const Result x = column_const->template getValue(); // NOLINT typename ColVecResult::MutablePtr res_column; if constexpr (IsDecimalNumber) @@ -75,6 +76,7 @@ struct ArraySumImpl size_t pos = 0; for (size_t i = 0; i < offsets.size(); ++i) { + /// Just multiply the value by array size. res[i] = x * (offsets[i] - pos); pos = offsets[i]; } diff --git a/src/Functions/defaultValueOfArgumentType.cpp b/src/Functions/defaultValueOfArgumentType.cpp index 85da76ce694..b07a92bd677 100644 --- a/src/Functions/defaultValueOfArgumentType.cpp +++ b/src/Functions/defaultValueOfArgumentType.cpp @@ -22,6 +22,7 @@ public: } bool useDefaultImplementationForNulls() const override { return false; } + bool useDefaultImplementationForLowCardinalityColumns() const override { return false; } size_t getNumberOfArguments() const override { diff --git a/src/Functions/formatDateTime.cpp b/src/Functions/formatDateTime.cpp index 080c1108deb..ad69d07c337 100644 --- a/src/Functions/formatDateTime.cpp +++ b/src/Functions/formatDateTime.cpp @@ -188,6 +188,16 @@ private: writeNumber2(target, ToISOWeekImpl::execute(source, timezone)); } + static void ISO8601Year2(char * target, Time source, const DateLUTImpl & timezone) // NOLINT + { + writeNumber2(target, ToISOYearImpl::execute(source, timezone) % 100); + } + + static void ISO8601Year4(char * target, Time source, const DateLUTImpl & timezone) // NOLINT + { + writeNumber4(target, ToISOYearImpl::execute(source, timezone)); + } + static void year2(char * target, Time source, const DateLUTImpl & timezone) { writeNumber2(target, ToYearImpl::execute(source, timezone) % 100); @@ -459,6 +469,18 @@ public: result.append("0000-00-00"); break; + // Last two digits of year of ISO 8601 week number (see %G) + case 'g': + instructions.emplace_back(&Action::ISO8601Year2, 2); + result.append("00"); + break; + + // Year of ISO 8601 week number (see %V) + case 'G': + instructions.emplace_back(&Action::ISO8601Year4, 4); + result.append("0000"); + break; + // Day of the year (001-366) 235 case 'j': instructions.emplace_back(&Action::dayOfYear, 3); diff --git a/src/Functions/greatCircleDistance.cpp b/src/Functions/greatCircleDistance.cpp index bff92d7738d..5707326c60a 100644 --- a/src/Functions/greatCircleDistance.cpp +++ b/src/Functions/greatCircleDistance.cpp @@ -102,8 +102,6 @@ void geodistInit() inline float geodistDegDiff(float f) { f = fabsf(f); - while (f > 360) - f -= 360; if (f > 180) f = 360 - f; return f; diff --git a/src/Functions/hasThreadFuzzer.cpp b/src/Functions/hasThreadFuzzer.cpp new file mode 100644 index 00000000000..1420efb5dde --- /dev/null +++ b/src/Functions/hasThreadFuzzer.cpp @@ -0,0 +1,50 @@ +#include +#include +#include +#include + + +namespace DB +{ + +/** Returns whether Thread Fuzzer is effective. + * It can be used in tests to prevent too long runs. + */ +class FunctionHasThreadFuzzer : public IFunction +{ +public: + static constexpr auto name = "hasThreadFuzzer"; + static FunctionPtr create(const Context &) + { + return std::make_shared(); + } + + String getName() const override + { + return name; + } + + size_t getNumberOfArguments() const override + { + return 0; + } + + DataTypePtr getReturnTypeImpl(const DataTypes & /*arguments*/) const override + { + return std::make_shared(); + } + + void executeImpl(Block & block, const ColumnNumbers &, size_t result, size_t input_rows_count) override + { + block.getByPosition(result).column = DataTypeUInt8().createColumnConst(input_rows_count, ThreadFuzzer::instance().isEffective()); + } +}; + + +void registerFunctionHasThreadFuzzer(FunctionFactory & factory) +{ + factory.registerFunction(); +} + +} + diff --git a/src/Functions/if.cpp b/src/Functions/if.cpp index 593cf56caf8..6e46a03c69a 100644 --- a/src/Functions/if.cpp +++ b/src/Functions/if.cpp @@ -432,8 +432,7 @@ private: const PaddedPODArray & cond_data = cond_col->getData(); size_t rows = cond_data.size(); - if ((col_then_fixed || col_then_const_fixed) - && (col_else_fixed || col_else_const_fixed)) + if (isFixedString(block.getByPosition(result).type)) { /// The result is FixedString. @@ -448,16 +447,19 @@ private: else if (col_then_const_fixed && col_else_fixed) conditional(ConstSource(*col_then_const_fixed), FixedStringSource(*col_else_fixed), sink, cond_data); else if (col_then_const_fixed && col_else_const_fixed) - conditional(ConstSource(*col_then_const_fixed), ConstSource(*col_else_const_fixed), sink, cond_data); + conditional(ConstSource(*col_then_const_fixed), + ConstSource(*col_else_const_fixed), sink, cond_data); + else + return false; block.getByPosition(result).column = std::move(col_res_untyped); return true; } - if ((col_then || col_then_const || col_then_fixed || col_then_const_fixed) - && (col_else || col_else_const || col_else_fixed || col_else_const_fixed)) + if (isString(block.getByPosition(result).type)) { /// The result is String. + auto col_res = ColumnString::create(); auto sink = StringSink(*col_res, rows); @@ -485,6 +487,17 @@ private: conditional(ConstSource(*col_then_const), ConstSource(*col_else_const_fixed), sink, cond_data); else if (col_then_const_fixed && col_else_const) conditional(ConstSource(*col_then_const_fixed), ConstSource(*col_else_const), sink, cond_data); + else if (col_then_fixed && col_else_fixed) + conditional(FixedStringSource(*col_then_fixed), FixedStringSource(*col_else_fixed), sink, cond_data); + else if (col_then_fixed && col_else_const_fixed) + conditional(FixedStringSource(*col_then_fixed), ConstSource(*col_else_const_fixed), sink, cond_data); + else if (col_then_const_fixed && col_else_fixed) + conditional(ConstSource(*col_then_const_fixed), FixedStringSource(*col_else_fixed), sink, cond_data); + else if (col_then_const_fixed && col_else_const_fixed) + conditional(ConstSource(*col_then_const_fixed), + ConstSource(*col_else_const_fixed), sink, cond_data); + else + return false; block.getByPosition(result).column = std::move(col_res); return true; @@ -590,7 +603,8 @@ private: return true; } - static void executeGeneric(const ColumnUInt8 * cond_col, Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) + static void executeGeneric( + const ColumnUInt8 * cond_col, Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) { /// Convert both columns to the common type (if needed). diff --git a/src/Functions/ilike.cpp b/src/Functions/ilike.cpp new file mode 100644 index 00000000000..a39a907eff2 --- /dev/null +++ b/src/Functions/ilike.cpp @@ -0,0 +1,24 @@ +#include "FunctionsStringSearch.h" +#include "FunctionFactory.h" +#include "MatchImpl.h" + +namespace DB +{ + +struct NameILike +{ + static constexpr auto name = "ilike"; +}; + +namespace +{ + using ILikeImpl = MatchImpl; +} + +using FunctionILike = FunctionsStringSearch; + +void registerFunctionILike(FunctionFactory & factory) +{ + factory.registerFunction(); +} +} diff --git a/src/Functions/initializeAggregation.cpp b/src/Functions/initializeAggregation.cpp new file mode 100644 index 00000000000..81bfa19a55a --- /dev/null +++ b/src/Functions/initializeAggregation.cpp @@ -0,0 +1,161 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + extern const int ILLEGAL_COLUMN; + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int BAD_ARGUMENTS; +} + + +class FunctionInitializeAggregation : public IFunction +{ +public: + static constexpr auto name = "initializeAggregation"; + static FunctionPtr create(const Context &) { return std::make_shared(); } + + String getName() const override { return name; } + + bool isVariadic() const override { return true; } + size_t getNumberOfArguments() const override { return 0; } + + bool useDefaultImplementationForConstants() const override { return true; } + ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {0}; } + + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override; + + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override; + +private: + mutable AggregateFunctionPtr aggregate_function; +}; + + +DataTypePtr FunctionInitializeAggregation::getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const +{ + if (arguments.size() < 2) + throw Exception("Number of arguments for function " + getName() + " doesn't match: passed " + + toString(arguments.size()) + ", should be at least 2.", + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + + const ColumnConst * aggregate_function_name_column = checkAndGetColumnConst(arguments[0].column.get()); + if (!aggregate_function_name_column) + throw Exception("First argument for function " + getName() + " must be constant string: name of aggregate function.", + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + DataTypes argument_types(arguments.size() - 1); + for (size_t i = 1, size = arguments.size(); i < size; ++i) + { + argument_types[i - 1] = arguments[i].type; + } + + if (!aggregate_function) + { + String aggregate_function_name_with_params = aggregate_function_name_column->getValue(); + + if (aggregate_function_name_with_params.empty()) + throw Exception("First argument for function " + getName() + " (name of aggregate function) cannot be empty.", + ErrorCodes::BAD_ARGUMENTS); + + String aggregate_function_name; + Array params_row; + getAggregateFunctionNameAndParametersArray(aggregate_function_name_with_params, + aggregate_function_name, params_row, "function " + getName()); + + AggregateFunctionProperties properties; + aggregate_function = AggregateFunctionFactory::instance().get(aggregate_function_name, argument_types, params_row, properties); + } + + return aggregate_function->getReturnType(); +} + + +void FunctionInitializeAggregation::executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) +{ + IAggregateFunction & agg_func = *aggregate_function; + std::unique_ptr arena = std::make_unique(); + + const size_t num_arguments_columns = arguments.size() - 1; + + std::vector materialized_columns(num_arguments_columns); + std::vector aggregate_arguments_vec(num_arguments_columns); + + for (size_t i = 0; i < num_arguments_columns; ++i) + { + const IColumn * col = block.getByPosition(arguments[i + 1]).column.get(); + materialized_columns.emplace_back(col->convertToFullColumnIfConst()); + aggregate_arguments_vec[i] = &(*materialized_columns.back()); + } + + const IColumn ** aggregate_arguments = aggregate_arguments_vec.data(); + + MutableColumnPtr result_holder = block.getByPosition(result).type->createColumn(); + IColumn & res_col = *result_holder; + + /// AggregateFunction's states should be inserted into column using specific way + auto * res_col_aggregate_function = typeid_cast(&res_col); + + if (!res_col_aggregate_function && agg_func.isState()) + throw Exception("State function " + agg_func.getName() + " inserts results into non-state column " + + block.getByPosition(result).type->getName(), ErrorCodes::ILLEGAL_COLUMN); + + PODArray places(input_rows_count); + for (size_t i = 0; i < input_rows_count; ++i) + { + places[i] = arena->alignedAlloc(agg_func.sizeOfData(), agg_func.alignOfData()); + try + { + agg_func.create(places[i]); + } + catch (...) + { + for (size_t j = 0; j < i; ++j) + agg_func.destroy(places[j]); + throw; + } + } + + SCOPE_EXIT({ + for (size_t i = 0; i < input_rows_count; ++i) + agg_func.destroy(places[i]); + }); + + { + auto * that = &agg_func; + /// Unnest consecutive trailing -State combinators + while (auto * func = typeid_cast(that)) + that = func->getNestedFunction().get(); + that->addBatch(input_rows_count, places.data(), 0, aggregate_arguments, arena.get()); + } + + for (size_t i = 0; i < input_rows_count; ++i) + if (!res_col_aggregate_function) + agg_func.insertResultInto(places[i], res_col, arena.get()); + else + res_col_aggregate_function->insertFrom(places[i]); + block.getByPosition(result).column = std::move(result_holder); +} + + +void registerFunctionInitializeAggregation(FunctionFactory & factory) +{ + factory.registerFunction(); +} + +} diff --git a/src/Functions/intDiv.cpp b/src/Functions/intDiv.cpp index d21e779045f..7e34f106147 100644 --- a/src/Functions/intDiv.cpp +++ b/src/Functions/intDiv.cpp @@ -26,12 +26,11 @@ struct DivideIntegralByConstantImpl static NO_INLINE void vectorConstant(const A * __restrict a_pos, B b, ResultType * __restrict c_pos, size_t size) { - if (unlikely(b == 0)) - throw Exception("Division by zero", ErrorCodes::ILLEGAL_DIVISION); - #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wsign-compare" + /// Division by -1. By the way, we avoid FPE by division of the largest negative number by -1. + /// And signed integer overflow is well defined in C++20. if (unlikely(is_signed_v && b == -1)) { for (size_t i = 0; i < size; ++i) @@ -39,8 +38,20 @@ struct DivideIntegralByConstantImpl return; } + /// Division with too large divisor. + if (unlikely(b > std::numeric_limits::max() + || (std::is_signed_v && std::is_signed_v && b < std::numeric_limits::lowest()))) + { + for (size_t i = 0; i < size; ++i) + c_pos[i] = 0; + return; + } + #pragma GCC diagnostic pop + if (unlikely(static_cast(b) == 0)) + throw Exception("Division by zero", ErrorCodes::ILLEGAL_DIVISION); + libdivide::divider divider(b); const A * a_end = a_pos + size; diff --git a/src/Functions/isZeroOrNull.cpp b/src/Functions/isZeroOrNull.cpp new file mode 100644 index 00000000000..ee2b87e9bab --- /dev/null +++ b/src/Functions/isZeroOrNull.cpp @@ -0,0 +1,118 @@ +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; + extern const int ILLEGAL_COLUMN; +} + +/// Returns 1 if argument is zero or NULL. +/// It can be used to negate filter in WHERE condition. +/// "WHERE isZeroOrNull(expr)" will return exactly the same rows that "WHERE expr" will filter out. +class FunctionIsZeroOrNull : public IFunction +{ +public: + static constexpr auto name = "isZeroOrNull"; + + static FunctionPtr create(const Context &) + { + return std::make_shared(); + } + + std::string getName() const override + { + return name; + } + + size_t getNumberOfArguments() const override { return 1; } + bool useDefaultImplementationForNulls() const override { return false; } + bool useDefaultImplementationForConstants() const override { return true; } + ColumnNumbers getArgumentsThatDontImplyNullableReturnType(size_t /*number_of_arguments*/) const override { return {0}; } + + DataTypePtr getReturnTypeImpl(const DataTypes & types) const override + { + if (!isNumber(removeNullable(types.at(0)))) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "The argument of function {} must have simple numeric type, possibly Nullable", name); + + return std::make_shared(); + } + + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override + { + const ColumnPtr & input_column = block.getByPosition(arguments[0]).column; + + if (const ColumnNullable * input_column_nullable = checkAndGetColumn(input_column.get())) + { + const NullMap & null_map = input_column_nullable->getNullMapData(); + const IColumn * nested_column = &input_column_nullable->getNestedColumn(); + + if (!castTypeToEither< + ColumnUInt8, ColumnUInt16, ColumnUInt32, ColumnUInt64, + ColumnInt8, ColumnInt16, ColumnInt32, ColumnInt64, + ColumnFloat32, ColumnFloat64>( + nested_column, [&](const auto & column) + { + auto res = ColumnUInt8::create(input_rows_count); + processNullable(column.getData(), null_map, res->getData(), input_rows_count); + block.getByPosition(result).column = std::move(res); + return true; + })) + { + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "The argument of function {} must have simple numeric type, possibly Nullable", name); + } + } + else + { + if (!castTypeToEither< + ColumnUInt8, ColumnUInt16, ColumnUInt32, ColumnUInt64, + ColumnInt8, ColumnInt16, ColumnInt32, ColumnInt64, + ColumnFloat32, ColumnFloat64>( + input_column.get(), [&](const auto & column) + { + auto res = ColumnUInt8::create(input_rows_count); + processNotNullable(column.getData(), res->getData(), input_rows_count); + block.getByPosition(result).column = std::move(res); + return true; + })) + { + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "The argument of function {} must have simple numeric type, possibly Nullable", name); + } + } + } + +private: + template + void processNotNullable(const InputData & input_data, ColumnUInt8::Container & result_data, size_t input_rows_count) + { + for (size_t i = 0; i < input_rows_count; ++i) + result_data[i] = !input_data[i]; + } + + template + void processNullable(const InputData & input_data, const NullMap & input_null_map, + ColumnUInt8::Container & result_data, size_t input_rows_count) + { + for (size_t i = 0; i < input_rows_count; ++i) + result_data[i] = input_null_map[i] || !input_data[i]; + } +}; + + +void registerFunctionIsZeroOrNull(FunctionFactory & factory) +{ + factory.registerFunction(); +} + +} + diff --git a/src/Functions/like.cpp b/src/Functions/like.cpp index c07f68dfb56..f334cef7917 100644 --- a/src/Functions/like.cpp +++ b/src/Functions/like.cpp @@ -11,11 +11,15 @@ struct NameLike static constexpr auto name = "like"; }; -using FunctionLike = FunctionsStringSearch, NameLike>; +namespace +{ + using LikeImpl = MatchImpl; +} + +using FunctionLike = FunctionsStringSearch; void registerFunctionLike(FunctionFactory & factory) { factory.registerFunction(); } - } diff --git a/src/Functions/likePatternToRegexp.h b/src/Functions/likePatternToRegexp.h index 3a078b468c2..24cb6ea78c7 100644 --- a/src/Functions/likePatternToRegexp.h +++ b/src/Functions/likePatternToRegexp.h @@ -4,7 +4,8 @@ namespace DB { -/// Transforms the LIKE expression into regexp re2. For example, abc%def -> ^abc.*def$ + +/// Transforms the [I]LIKE expression into regexp re2. For example, abc%def -> ^abc.*def$ inline String likePatternToRegexp(const String & pattern) { String res; diff --git a/src/Functions/modulo.cpp b/src/Functions/modulo.cpp index 631b7d12263..c90a590da61 100644 --- a/src/Functions/modulo.cpp +++ b/src/Functions/modulo.cpp @@ -27,12 +27,10 @@ struct ModuloByConstantImpl static NO_INLINE void vectorConstant(const A * __restrict src, B b, ResultType * __restrict dst, size_t size) { - if (unlikely(b == 0)) - throw Exception("Division by zero", ErrorCodes::ILLEGAL_DIVISION); - #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wsign-compare" + /// Modulo with too small divisor. if (unlikely((std::is_signed_v && b == -1) || b == 1)) { for (size_t i = 0; i < size; ++i) @@ -40,8 +38,20 @@ struct ModuloByConstantImpl return; } + /// Modulo with too large divisor. + if (unlikely(b > std::numeric_limits::max() + || (std::is_signed_v && std::is_signed_v && b < std::numeric_limits::lowest()))) + { + for (size_t i = 0; i < size; ++i) + dst[i] = src[i]; + return; + } + #pragma GCC diagnostic pop + if (unlikely(static_cast(b) == 0)) + throw Exception("Division by zero", ErrorCodes::ILLEGAL_DIVISION); + libdivide::divider divider(b); /// Here we failed to make the SSE variant from libdivide give an advantage. diff --git a/src/Functions/multiIf.cpp b/src/Functions/multiIf.cpp index b57c9f6316a..5f3f62fe6cb 100644 --- a/src/Functions/multiIf.cpp +++ b/src/Functions/multiIf.cpp @@ -39,6 +39,7 @@ public: bool isVariadic() const override { return true; } size_t getNumberOfArguments() const override { return 0; } bool useDefaultImplementationForNulls() const override { return false; } + ColumnNumbers getArgumentsThatDontImplyNullableReturnType(size_t number_of_arguments) const override { ColumnNumbers args; @@ -70,7 +71,6 @@ public: throw Exception{"Invalid number of arguments for function " + getName(), ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH}; - for_conditions([&](const DataTypePtr & arg) { const IDataType * nested_type; diff --git a/src/Functions/notILike.cpp b/src/Functions/notILike.cpp new file mode 100644 index 00000000000..0f95564b09e --- /dev/null +++ b/src/Functions/notILike.cpp @@ -0,0 +1,24 @@ +#include "FunctionsStringSearch.h" +#include "FunctionFactory.h" +#include "MatchImpl.h" + +namespace DB +{ + +struct NameNotILike +{ + static constexpr auto name = "notILike"; +}; + +namespace +{ + using NotILikeImpl = MatchImpl; +} + +using FunctionNotILike = FunctionsStringSearch; + +void registerFunctionNotILike(FunctionFactory & factory) +{ + factory.registerFunction(); +} +} diff --git a/src/Functions/registerFunctionsMiscellaneous.cpp b/src/Functions/registerFunctionsMiscellaneous.cpp index 5eb1e3e47c0..8207fcb8edd 100644 --- a/src/Functions/registerFunctionsMiscellaneous.cpp +++ b/src/Functions/registerFunctionsMiscellaneous.cpp @@ -58,6 +58,8 @@ void registerFunctionGetMacro(FunctionFactory &); void registerFunctionGetScalar(FunctionFactory &); void registerFunctionIsConstant(FunctionFactory &); void registerFunctionGlobalVariable(FunctionFactory &); +void registerFunctionHasThreadFuzzer(FunctionFactory &); +void registerFunctionInitializeAggregation(FunctionFactory &); #if USE_ICU void registerFunctionConvertCharset(FunctionFactory &); @@ -116,6 +118,8 @@ void registerFunctionsMiscellaneous(FunctionFactory & factory) registerFunctionGetScalar(factory); registerFunctionIsConstant(factory); registerFunctionGlobalVariable(factory); + registerFunctionHasThreadFuzzer(factory); + registerFunctionInitializeAggregation(factory); #if USE_ICU registerFunctionConvertCharset(factory); diff --git a/src/Functions/registerFunctionsNull.cpp b/src/Functions/registerFunctionsNull.cpp index e8894e19907..238133fbb67 100644 --- a/src/Functions/registerFunctionsNull.cpp +++ b/src/Functions/registerFunctionsNull.cpp @@ -10,6 +10,7 @@ void registerFunctionIfNull(FunctionFactory & factory); void registerFunctionNullIf(FunctionFactory & factory); void registerFunctionAssumeNotNull(FunctionFactory & factory); void registerFunctionToNullable(FunctionFactory & factory); +void registerFunctionIsZeroOrNull(FunctionFactory & factory); void registerFunctionsNull(FunctionFactory & factory) @@ -21,6 +22,7 @@ void registerFunctionsNull(FunctionFactory & factory) registerFunctionNullIf(factory); registerFunctionAssumeNotNull(factory); registerFunctionToNullable(factory); + registerFunctionIsZeroOrNull(factory); } } diff --git a/src/Functions/registerFunctionsStringRegexp.cpp b/src/Functions/registerFunctionsStringRegexp.cpp index 2a0a3c0ea1f..61853b19d11 100644 --- a/src/Functions/registerFunctionsStringRegexp.cpp +++ b/src/Functions/registerFunctionsStringRegexp.cpp @@ -4,7 +4,9 @@ namespace DB class FunctionFactory; void registerFunctionLike(FunctionFactory &); +void registerFunctionILike(FunctionFactory &); void registerFunctionNotLike(FunctionFactory &); +void registerFunctionNotILike(FunctionFactory &); void registerFunctionMatch(FunctionFactory &); void registerFunctionExtract(FunctionFactory &); void registerFunctionReplaceOne(FunctionFactory &); @@ -24,7 +26,9 @@ void registerFunctionExtractAllGroupsHorizontal(FunctionFactory &); void registerFunctionsStringRegexp(FunctionFactory & factory) { registerFunctionLike(factory); + registerFunctionILike(factory); registerFunctionNotLike(factory); + registerFunctionNotILike(factory); registerFunctionMatch(factory); registerFunctionExtract(factory); registerFunctionReplaceOne(factory); @@ -41,6 +45,4 @@ void registerFunctionsStringRegexp(FunctionFactory & factory) registerFunctionExtractAllGroupsVertical(factory); registerFunctionExtractAllGroupsHorizontal(factory); } - } - diff --git a/src/Functions/ya.make b/src/Functions/ya.make index 0c1a181471d..72ac36f880a 100644 --- a/src/Functions/ya.make +++ b/src/Functions/ya.make @@ -92,8 +92,8 @@ SRCS( array/emptyArrayToSingle.cpp array/hasAll.cpp array/hasAny.cpp - array/hasSubstr.cpp array/has.cpp + array/hasSubstr.cpp array/indexOf.cpp array/length.cpp array/range.cpp @@ -219,6 +219,7 @@ SRCS( h3ToParent.cpp h3ToString.cpp hasColumnInTable.cpp + hasThreadFuzzer.cpp hasTokenCaseInsensitive.cpp hasToken.cpp hostName.cpp @@ -228,7 +229,9 @@ SRCS( ifNull.cpp IFunction.cpp ignore.cpp + ilike.cpp in.cpp + initializeAggregation.cpp intDiv.cpp intDivOrZero.cpp intExp10.cpp @@ -240,6 +243,7 @@ SRCS( isNotNull.cpp isNull.cpp isValidUTF8.cpp + isZeroOrNull.cpp jumpConsistentHash.cpp lcm.cpp least.cpp @@ -288,6 +292,7 @@ SRCS( neighbor.cpp notEmpty.cpp notEquals.cpp + notILike.cpp notLike.cpp now64.cpp now.cpp diff --git a/src/IO/S3/PocoHTTPClient.cpp b/src/IO/S3/PocoHTTPClient.cpp index 0dfa80ca107..56632b22071 100644 --- a/src/IO/S3/PocoHTTPClient.cpp +++ b/src/IO/S3/PocoHTTPClient.cpp @@ -106,7 +106,7 @@ void PocoHTTPClient::MakeRequestInternal( if (request.GetContentBody()) { - LOG_DEBUG(log, "Writing request body."); + LOG_TRACE(log, "Writing request body."); if (attempt > 0) /// rewind content body buffer. { request.GetContentBody()->clear(); @@ -116,7 +116,7 @@ void PocoHTTPClient::MakeRequestInternal( LOG_DEBUG(log, "Written {} bytes to request body", size); } - LOG_DEBUG(log, "Receiving response..."); + LOG_TRACE(log, "Receiving response..."); auto & response_body_stream = session->receiveResponse(poco_response); int status_code = static_cast(poco_response.getStatus()); diff --git a/src/IO/S3Common.cpp b/src/IO/S3Common.cpp index 2c75a137222..20ff38150eb 100644 --- a/src/IO/S3Common.cpp +++ b/src/IO/S3Common.cpp @@ -31,7 +31,7 @@ const std::pair & convertLogLevel(Aws::U {Aws::Utils::Logging::LogLevel::Error, {DB::LogsLevel::error, Poco::Message::PRIO_ERROR}}, {Aws::Utils::Logging::LogLevel::Warn, {DB::LogsLevel::warning, Poco::Message::PRIO_WARNING}}, {Aws::Utils::Logging::LogLevel::Info, {DB::LogsLevel::information, Poco::Message::PRIO_INFORMATION}}, - {Aws::Utils::Logging::LogLevel::Debug, {DB::LogsLevel::debug, Poco::Message::PRIO_DEBUG}}, + {Aws::Utils::Logging::LogLevel::Debug, {DB::LogsLevel::trace, Poco::Message::PRIO_TRACE}}, {Aws::Utils::Logging::LogLevel::Trace, {DB::LogsLevel::trace, Poco::Message::PRIO_TRACE}}, }; return mapping.at(log_level); diff --git a/src/IO/parseDateTimeBestEffort.cpp b/src/IO/parseDateTimeBestEffort.cpp index 4b6183e9c0b..e98dbbc0480 100644 --- a/src/IO/parseDateTimeBestEffort.cpp +++ b/src/IO/parseDateTimeBestEffort.cpp @@ -88,7 +88,7 @@ struct DateTimeSubsecondPart UInt8 digits; }; -template +template ReturnType parseDateTimeBestEffortImpl( time_t & res, ReadBuffer & in, @@ -264,11 +264,11 @@ ReturnType parseDateTimeBestEffortImpl( /// DD-MM-YY /// DD - UInt8 hour_or_day_of_month = 0; + UInt8 hour_or_day_of_month_or_month = 0; if (num_digits == 2) - readDecimalNumber<2>(hour_or_day_of_month, digits); + readDecimalNumber<2>(hour_or_day_of_month_or_month, digits); else if (num_digits == 1) //-V547 - readDecimalNumber<1>(hour_or_day_of_month, digits); + readDecimalNumber<1>(hour_or_day_of_month_or_month, digits); else return on_error("Cannot read DateTime: logical error, unexpected branch in code", ErrorCodes::LOGICAL_ERROR); @@ -277,7 +277,7 @@ ReturnType parseDateTimeBestEffortImpl( if (has_time) return on_error("Cannot read DateTime: time component is duplicated", ErrorCodes::CANNOT_PARSE_DATETIME); - hour = hour_or_day_of_month; + hour = hour_or_day_of_month_or_month; has_time = true; num_digits = readDigits(digits, sizeof(digits), in); @@ -309,29 +309,48 @@ ReturnType parseDateTimeBestEffortImpl( if (month) return on_error("Cannot read DateTime: month is duplicated", ErrorCodes::CANNOT_PARSE_DATETIME); - day_of_month = hour_or_day_of_month; - - num_digits = readDigits(digits, sizeof(digits), in); - - if (num_digits == 2) - readDecimalNumber<2>(month, digits); - else if (num_digits == 1) - readDecimalNumber<1>(month, digits); - else if (num_digits == 0) + if constexpr (is_us_style) { - /// Month in alphabetical form - - char alpha[9]; /// The longest month name: September - size_t num_alpha = readAlpha(alpha, sizeof(alpha), in); - - if (num_alpha < 3) - return on_error("Cannot read DateTime: unexpected number of alphabetical characters after day of month: " + toString(num_alpha), ErrorCodes::CANNOT_PARSE_DATETIME); - - if (!read_alpha_month(alpha)) - return on_error("Cannot read DateTime: alphabetical characters after day of month don't look like month: " + std::string(alpha, 3), ErrorCodes::CANNOT_PARSE_DATETIME); + month = hour_or_day_of_month_or_month; + num_digits = readDigits(digits, sizeof(digits), in); + if (num_digits == 2) + readDecimalNumber<2>(day_of_month, digits); + else if (num_digits == 1) + readDecimalNumber<1>(day_of_month, digits); + else + return on_error("Cannot read DateTime: unexpected number of decimal digits after month: " + toString(num_digits), ErrorCodes::CANNOT_PARSE_DATETIME); } else - return on_error("Cannot read DateTime: unexpected number of decimal digits after day of month: " + toString(num_digits), ErrorCodes::CANNOT_PARSE_DATETIME); + { + day_of_month = hour_or_day_of_month_or_month; + + num_digits = readDigits(digits, sizeof(digits), in); + + if (num_digits == 2) + readDecimalNumber<2>(month, digits); + else if (num_digits == 1) + readDecimalNumber<1>(month, digits); + else if (num_digits == 0) + { + /// Month in alphabetical form + + char alpha[9]; /// The longest month name: September + size_t num_alpha = readAlpha(alpha, sizeof(alpha), in); + + if (num_alpha < 3) + return on_error("Cannot read DateTime: unexpected number of alphabetical characters after day of month: " + toString(num_alpha), ErrorCodes::CANNOT_PARSE_DATETIME); + + if (!read_alpha_month(alpha)) + return on_error("Cannot read DateTime: alphabetical characters after day of month don't look like month: " + std::string(alpha, 3), ErrorCodes::CANNOT_PARSE_DATETIME); + } + else + return on_error("Cannot read DateTime: unexpected number of decimal digits after day of month: " + toString(num_digits), ErrorCodes::CANNOT_PARSE_DATETIME); + + + } + + if (month > 12) + std::swap(month, day_of_month); if (checkChar('/', in) || checkChar('.', in) || checkChar('-', in)) { @@ -358,9 +377,9 @@ ReturnType parseDateTimeBestEffortImpl( else { if (day_of_month) - hour = hour_or_day_of_month; + hour = hour_or_day_of_month_or_month; else - day_of_month = hour_or_day_of_month; + day_of_month = hour_or_day_of_month_or_month; } } else if (num_digits != 0) @@ -522,6 +541,22 @@ ReturnType parseDateTimeBestEffortImpl( if (!day_of_month) day_of_month = 1; + auto is_leap_year = (year % 400 == 0) || (year % 100 != 0 && year % 4 == 0); + + auto check_date = [](const auto & is_leap_year_, const auto & month_, const auto & day_) + { + if ((month_ == 1 || month_ == 3 || month_ == 5 || month_ == 7 || month_ == 8 || month_ == 10 || month_ == 12) && day_ >= 1 && day_ <= 31) + return true; + else if (month_ == 2 && ((is_leap_year_ && day_ >= 1 && day_ <= 29) || (!is_leap_year_ && day_ >= 1 && day_ <= 28))) + return true; + else if ((month_ == 4 || month_ == 6 || month_ == 9 || month_ == 11) && day_ >= 1 && day_ <= 30) + return true; + return false; + }; + + if (!check_date(is_leap_year, month, day_of_month)) + return on_error("Cannot read DateTime: logical error, unexpected date: " + std::to_string(year) + "-" + std::to_string(month) + "-" + std::to_string(day_of_month), ErrorCodes::LOGICAL_ERROR); + if (is_pm && hour < 12) hour += 12; @@ -565,12 +600,12 @@ ReturnType parseDateTime64BestEffortImpl(DateTime64 & res, UInt32 scale, ReadBuf if constexpr (std::is_same_v) { - if (!parseDateTimeBestEffortImpl(whole, in, local_time_zone, utc_time_zone, &subsecond)) + if (!parseDateTimeBestEffortImpl(whole, in, local_time_zone, utc_time_zone, &subsecond)) return false; } else { - parseDateTimeBestEffortImpl(whole, in, local_time_zone, utc_time_zone, &subsecond); + parseDateTimeBestEffortImpl(whole, in, local_time_zone, utc_time_zone, &subsecond); } @@ -598,12 +633,17 @@ ReturnType parseDateTime64BestEffortImpl(DateTime64 & res, UInt32 scale, ReadBuf void parseDateTimeBestEffort(time_t & res, ReadBuffer & in, const DateLUTImpl & local_time_zone, const DateLUTImpl & utc_time_zone) { - parseDateTimeBestEffortImpl(res, in, local_time_zone, utc_time_zone, nullptr); + parseDateTimeBestEffortImpl(res, in, local_time_zone, utc_time_zone, nullptr); +} + +void parseDateTimeBestEffortUS(time_t & res, ReadBuffer & in, const DateLUTImpl & local_time_zone, const DateLUTImpl & utc_time_zone) +{ + parseDateTimeBestEffortImpl(res, in, local_time_zone, utc_time_zone, nullptr); } bool tryParseDateTimeBestEffort(time_t & res, ReadBuffer & in, const DateLUTImpl & local_time_zone, const DateLUTImpl & utc_time_zone) { - return parseDateTimeBestEffortImpl(res, in, local_time_zone, utc_time_zone, nullptr); + return parseDateTimeBestEffortImpl(res, in, local_time_zone, utc_time_zone, nullptr); } void parseDateTime64BestEffort(DateTime64 & res, UInt32 scale, ReadBuffer & in, const DateLUTImpl & local_time_zone, const DateLUTImpl & utc_time_zone) diff --git a/src/IO/parseDateTimeBestEffort.h b/src/IO/parseDateTimeBestEffort.h index 54432b5db2f..093bca571d8 100644 --- a/src/IO/parseDateTimeBestEffort.h +++ b/src/IO/parseDateTimeBestEffort.h @@ -56,6 +56,7 @@ class ReadBuffer; */ void parseDateTimeBestEffort(time_t & res, ReadBuffer & in, const DateLUTImpl & local_time_zone, const DateLUTImpl & utc_time_zone); +void parseDateTimeBestEffortUS(time_t & res, ReadBuffer & in, const DateLUTImpl & local_time_zone, const DateLUTImpl & utc_time_zone); bool tryParseDateTimeBestEffort(time_t & res, ReadBuffer & in, const DateLUTImpl & local_time_zone, const DateLUTImpl & utc_time_zone); void parseDateTime64BestEffort(DateTime64 & res, UInt32 scale, ReadBuffer & in, const DateLUTImpl & local_time_zone, const DateLUTImpl & utc_time_zone); bool tryParseDateTime64BestEffort(DateTime64 & res, UInt32 scale, ReadBuffer & in, const DateLUTImpl & local_time_zone, const DateLUTImpl & utc_time_zone); diff --git a/src/Interpreters/Aggregator.cpp b/src/Interpreters/Aggregator.cpp index afd94e3b5b8..13c7f6ddb35 100644 --- a/src/Interpreters/Aggregator.cpp +++ b/src/Interpreters/Aggregator.cpp @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -1217,10 +1218,17 @@ Block Aggregator::prepareBlockAndFill( if (aggregate_functions[i]->isState()) { /// The ColumnAggregateFunction column captures the shared ownership of the arena with aggregate function states. - ColumnAggregateFunction & column_aggregate_func = assert_cast(*final_aggregate_columns[i]); + if (auto * column_aggregate_func = typeid_cast(final_aggregate_columns[i].get())) + for (auto & pool : data_variants.aggregates_pools) + column_aggregate_func->addArena(pool); - for (auto & pool : data_variants.aggregates_pools) - column_aggregate_func.addArena(pool); + /// Aggregate state can be wrapped into array if aggregate function ends with -Resample combinator. + final_aggregate_columns[i]->forEachSubcolumn([&data_variants](auto & subcolumn) + { + if (auto * column_aggregate_func = typeid_cast(subcolumn.get())) + for (auto & pool : data_variants.aggregates_pools) + column_aggregate_func->addArena(pool); + }); } } } diff --git a/src/Interpreters/Cluster.cpp b/src/Interpreters/Cluster.cpp index c01d0188e5c..6558ebf63d5 100644 --- a/src/Interpreters/Cluster.cpp +++ b/src/Interpreters/Cluster.cpp @@ -72,8 +72,9 @@ bool Cluster::Address::isLocal(UInt16 clickhouse_port) const } -Cluster::Address::Address(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, UInt32 shard_index_, UInt32 replica_index_) : - shard_index(shard_index_), replica_index(replica_index_) +Cluster::Address::Address( + const Poco::Util::AbstractConfiguration & config, const String & config_prefix, UInt32 shard_index_, UInt32 replica_index_) + : shard_index(shard_index_), replica_index(replica_index_) { host_name = config.getString(config_prefix + ".host"); port = static_cast(config.getInt(config_prefix + ".port")); @@ -85,18 +86,20 @@ Cluster::Address::Address(const Poco::Util::AbstractConfiguration & config, cons default_database = config.getString(config_prefix + ".default_database", ""); secure = config.getBool(config_prefix + ".secure", false) ? Protocol::Secure::Enable : Protocol::Secure::Disable; compression = config.getBool(config_prefix + ".compression", true) ? Protocol::Compression::Enable : Protocol::Compression::Disable; + priority = config.getInt(config_prefix + ".priority", 1); const char * port_type = secure == Protocol::Secure::Enable ? "tcp_port_secure" : "tcp_port"; is_local = isLocal(config.getInt(port_type, 0)); } -Cluster::Address::Address(const String & host_port_, const String & user_, const String & password_, UInt16 clickhouse_port, bool secure_) +Cluster::Address::Address(const String & host_port_, const String & user_, const String & password_, UInt16 clickhouse_port, bool secure_, Int64 priority_) : user(user_), password(password_) { auto parsed_host_port = parseAddress(host_port_, clickhouse_port); host_name = parsed_host_port.first; port = parsed_host_port.second; secure = secure_ ? Protocol::Secure::Enable : Protocol::Secure::Disable; + priority = priority_; is_local = isLocal(clickhouse_port); } @@ -208,6 +211,7 @@ Cluster::Address Cluster::Address::fromFullString(const String & full_string) address.user = unescapeForFileName(std::string(address_begin, has_pw ? colon : user_pw_end)); address.password = has_pw ? unescapeForFileName(std::string(colon + 1, user_pw_end)) : std::string(); address.default_database = has_db ? unescapeForFileName(std::string(has_db + 1, address_end)) : std::string(); + // address.priority ignored return address; } } @@ -301,7 +305,8 @@ Cluster::Cluster(const Poco::Util::AbstractConfiguration & config, const Setting settings.distributed_connections_pool_size, address.host_name, address.port, address.default_database, address.user, address.password, - "server", address.compression, address.secure); + "server", address.compression, + address.secure, address.priority); info.pool = std::make_shared( ConnectionPoolPtrs{pool}, settings.load_balancing); @@ -374,7 +379,8 @@ Cluster::Cluster(const Poco::Util::AbstractConfiguration & config, const Setting settings.distributed_connections_pool_size, replica.host_name, replica.port, replica.default_database, replica.user, replica.password, - "server", replica.compression, replica.secure); + "server", replica.compression, + replica.secure, replica.priority); all_replicas_pools.emplace_back(replica_pool); if (replica.is_local) @@ -413,7 +419,8 @@ Cluster::Cluster(const Poco::Util::AbstractConfiguration & config, const Setting Cluster::Cluster(const Settings & settings, const std::vector> & names, - const String & username, const String & password, UInt16 clickhouse_port, bool treat_local_as_remote, bool secure) + const String & username, const String & password, UInt16 clickhouse_port, bool treat_local_as_remote, + bool secure, Int64 priority) { UInt32 current_shard_num = 1; @@ -421,7 +428,7 @@ Cluster::Cluster(const Settings & settings, const std::vector(ConnectionPoolPtrs{pool}, settings.load_balancing); info.per_replica_pools = {std::move(pool)}; diff --git a/src/Interpreters/Cluster.h b/src/Interpreters/Cluster.h index 517083d2606..9633577bf6a 100644 --- a/src/Interpreters/Cluster.h +++ b/src/Interpreters/Cluster.h @@ -28,7 +28,8 @@ public: /// This parameter is needed only to check that some address is local (points to ourself). Cluster(const Settings & settings, const std::vector> & names, const String & username, const String & password, - UInt16 clickhouse_port, bool treat_local_as_remote, bool secure = false); + UInt16 clickhouse_port, bool treat_local_as_remote, + bool secure = false, Int64 priority = 1); Cluster(const Cluster &)= delete; Cluster & operator=(const Cluster &) = delete; @@ -44,7 +45,7 @@ public: * * example01-01-1 * 9000 - * + * * * ... * or in and inside in elements: @@ -52,7 +53,7 @@ public: * * example01-01-1 * 9000 - * + * * * */ @@ -73,6 +74,8 @@ public: Protocol::Compression compression = Protocol::Compression::Enable; Protocol::Secure secure = Protocol::Secure::Disable; + Int64 priority = 1; + Address() = default; Address( const Poco::Util::AbstractConfiguration & config, @@ -84,7 +87,8 @@ public: const String & user_, const String & password_, UInt16 clickhouse_port, - bool secure_ = false); + bool secure_ = false, + Int64 priority_ = 1); /// Returns 'escaped_host_name:port' String toString() const; diff --git a/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp b/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp index bfa6fae0977..51f7e93552a 100644 --- a/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp +++ b/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp @@ -170,7 +170,9 @@ void SelectStreamFactory::createForShard( ProfileEvents::increment(ProfileEvents::DistributedConnectionMissingTable); if (shard_info.hasRemoteConnections()) { - LOG_WARNING(&Poco::Logger::get("ClusterProxy::SelectStreamFactory"), "There is no table {} on local replica of shard {}, will try remote replicas.", main_table.getNameForLogs(), shard_info.shard_num); + LOG_WARNING(&Poco::Logger::get("ClusterProxy::SelectStreamFactory"), + "There is no table {} on local replica of shard {}, will try remote replicas.", + main_table.getNameForLogs(), shard_info.shard_num); emplace_remote_stream(); } else @@ -254,7 +256,8 @@ void SelectStreamFactory::createForShard( catch (const Exception & ex) { if (ex.code() == ErrorCodes::ALL_CONNECTION_TRIES_FAILED) - LOG_WARNING(&Poco::Logger::get("ClusterProxy::SelectStreamFactory"), "Connections to remote replicas of local shard {} failed, will use stale local replica", shard_num); + LOG_WARNING(&Poco::Logger::get("ClusterProxy::SelectStreamFactory"), + "Connections to remote replicas of local shard {} failed, will use stale local replica", shard_num); else throw; } diff --git a/src/Interpreters/ClusterProxy/executeQuery.cpp b/src/Interpreters/ClusterProxy/executeQuery.cpp index aac78b755da..451a8873f41 100644 --- a/src/Interpreters/ClusterProxy/executeQuery.cpp +++ b/src/Interpreters/ClusterProxy/executeQuery.cpp @@ -28,7 +28,7 @@ Context removeUserRestrictionsFromSettings(const Context & context, const Settin new_settings.max_concurrent_queries_for_user.changed = false; new_settings.max_memory_usage_for_user.changed = false; - if (settings.force_optimize_skip_unused_shards_nesting) + if (settings.force_optimize_skip_unused_shards_nesting && settings.force_optimize_skip_unused_shards) { if (new_settings.force_optimize_skip_unused_shards_nesting == 1) { @@ -48,7 +48,7 @@ Context removeUserRestrictionsFromSettings(const Context & context, const Settin } } - if (settings.optimize_skip_unused_shards_nesting) + if (settings.optimize_skip_unused_shards_nesting && settings.optimize_skip_unused_shards) { if (new_settings.optimize_skip_unused_shards_nesting == 1) { diff --git a/src/Interpreters/CrossToInnerJoinVisitor.cpp b/src/Interpreters/CrossToInnerJoinVisitor.cpp index 5ebebae2578..604bfc7774f 100644 --- a/src/Interpreters/CrossToInnerJoinVisitor.cpp +++ b/src/Interpreters/CrossToInnerJoinVisitor.cpp @@ -132,7 +132,7 @@ public: { /// leave other comparisons as is } - else if (functionIsLikeOperator(node.name) || /// LIKE, NOT LIKE + else if (functionIsLikeOperator(node.name) || /// LIKE, NOT LIKE, ILIKE, NOT ILIKE functionIsInOperator(node.name)) /// IN, NOT IN { /// leave as is. It's not possible to make push down here cause of unknown aliases and not implemented JOIN predicates. diff --git a/src/Interpreters/DDLWorker.cpp b/src/Interpreters/DDLWorker.cpp index 28436f192b0..2278c0e452f 100644 --- a/src/Interpreters/DDLWorker.cpp +++ b/src/Interpreters/DDLWorker.cpp @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -1278,7 +1279,7 @@ private: }; -BlockIO executeDDLQueryOnCluster(const ASTPtr & query_ptr_, const Context & context, AccessRightsElements && query_required_access) +BlockIO executeDDLQueryOnCluster(const ASTPtr & query_ptr_, const Context & context, AccessRightsElements && query_requires_access, bool query_requires_grant_option) { /// Remove FORMAT and INTO OUTFILE if exists ASTPtr query_ptr = query_ptr_->clone(); @@ -1323,10 +1324,10 @@ BlockIO executeDDLQueryOnCluster(const ASTPtr & query_ptr_, const Context & cont /// the local current database or a shard's default database. bool need_replace_current_database = (std::find_if( - query_required_access.begin(), - query_required_access.end(), + query_requires_access.begin(), + query_requires_access.end(), [](const AccessRightsElement & elem) { return elem.isEmptyDatabase(); }) - != query_required_access.end()); + != query_requires_access.end()); if (need_replace_current_database) { @@ -1355,29 +1356,31 @@ BlockIO executeDDLQueryOnCluster(const ASTPtr & query_ptr_, const Context & cont AddDefaultDatabaseVisitor visitor(current_database); visitor.visitDDL(query_ptr); - query_required_access.replaceEmptyDatabase(current_database); + query_requires_access.replaceEmptyDatabase(current_database); } else { - size_t old_num_elements = query_required_access.size(); - for (size_t i = 0; i != old_num_elements; ++i) + for (size_t i = 0; i != query_requires_access.size();) { - auto & element = query_required_access[i]; + auto & element = query_requires_access[i]; if (element.isEmptyDatabase()) { - element.setDatabase(shard_default_databases[0]); - for (size_t j = 1; j != shard_default_databases.size(); ++j) - { - query_required_access.push_back(element); - query_required_access.back().setDatabase(shard_default_databases[j]); - } + query_requires_access.insert(query_requires_access.begin() + i + 1, shard_default_databases.size() - 1, element); + for (size_t j = 0; j != shard_default_databases.size(); ++j) + query_requires_access[i + j].replaceEmptyDatabase(shard_default_databases[j]); + i += shard_default_databases.size(); } + else + ++i; } } } /// Check access rights, assume that all servers have the same users config - context.checkAccess(query_required_access); + if (query_requires_grant_option) + context.getAccess()->checkGrantOption(query_requires_access); + else + context.checkAccess(query_requires_access); DDLLogEntry entry; entry.hosts = std::move(hosts); @@ -1394,6 +1397,10 @@ BlockIO executeDDLQueryOnCluster(const ASTPtr & query_ptr_, const Context & cont return io; } +BlockIO executeDDLQueryOnCluster(const ASTPtr & query_ptr, const Context & context, const AccessRightsElements & query_requires_access, bool query_requires_grant_option) +{ + return executeDDLQueryOnCluster(query_ptr, context, AccessRightsElements{query_requires_access}, query_requires_grant_option); +} BlockIO executeDDLQueryOnCluster(const ASTPtr & query_ptr_, const Context & context) { diff --git a/src/Interpreters/DDLWorker.h b/src/Interpreters/DDLWorker.h index d764eab626f..544fb3da27d 100644 --- a/src/Interpreters/DDLWorker.h +++ b/src/Interpreters/DDLWorker.h @@ -29,8 +29,9 @@ struct DDLTask; /// Pushes distributed DDL query to the queue -BlockIO executeDDLQueryOnCluster(const ASTPtr & query_ptr, const Context & context, AccessRightsElements && query_required_access); BlockIO executeDDLQueryOnCluster(const ASTPtr & query_ptr, const Context & context); +BlockIO executeDDLQueryOnCluster(const ASTPtr & query_ptr, const Context & context, const AccessRightsElements & query_requires_access, bool query_requires_grant_option = false); +BlockIO executeDDLQueryOnCluster(const ASTPtr & query_ptr, const Context & context, AccessRightsElements && query_requires_access, bool query_requires_grant_option = false); class DDLWorker diff --git a/src/Interpreters/DuplicateOrderByVisitor.h b/src/Interpreters/DuplicateOrderByVisitor.h index 85f34377e54..72e0419f114 100644 --- a/src/Interpreters/DuplicateOrderByVisitor.h +++ b/src/Interpreters/DuplicateOrderByVisitor.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include #include @@ -24,7 +25,9 @@ public: bool & is_stateful; void visit(ASTFunction & ast_function, ASTPtr &) { - if (ast_function.name == "any" || ast_function.name == "groupArray") + auto aggregate_function_properties = AggregateFunctionFactory::instance().tryGetProperties(ast_function.name); + + if (aggregate_function_properties && aggregate_function_properties->is_order_dependent) { is_stateful = true; return; @@ -85,7 +88,6 @@ public: if (done) return; - /// Disable optimization for distributed tables for (const auto & elem : select_query.children) { if (elem->as() && !elem->as()->is_standalone) diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index 44aa70b1697..bfb7abe8fe5 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -78,6 +78,7 @@ namespace ErrorCodes extern const int ILLEGAL_PREWHERE; extern const int LOGICAL_ERROR; extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER; } namespace @@ -636,6 +637,11 @@ bool SelectQueryExpressionAnalyzer::appendPrewhere( step.required_output.push_back(prewhere_column_name); step.can_remove_required_output.push_back(true); + auto filter_type = step.actions->getSampleBlock().getByName(prewhere_column_name).type; + if (!filter_type->canBeUsedInBooleanContext()) + throw Exception("Invalid type for filter in PREWHERE: " + filter_type->getName(), + ErrorCodes::ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER); + { /// Remove unused source_columns from prewhere actions. auto tmp_actions = std::make_shared(sourceColumns(), context); @@ -716,11 +722,17 @@ bool SelectQueryExpressionAnalyzer::appendWhere(ExpressionActionsChain & chain, ExpressionActionsChain::Step & step = chain.lastStep(sourceColumns()); - step.required_output.push_back(select_query->where()->getColumnName()); + auto where_column_name = select_query->where()->getColumnName(); + step.required_output.push_back(where_column_name); step.can_remove_required_output = {true}; getRootActions(select_query->where(), only_types, step.actions); + auto filter_type = step.actions->getSampleBlock().getByName(where_column_name).type; + if (!filter_type->canBeUsedInBooleanContext()) + throw Exception("Invalid type for filter in WHERE: " + filter_type->getName(), + ErrorCodes::ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER); + return true; } diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index 3e09d728c4c..503807be0a7 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -325,7 +325,6 @@ ColumnsDescription InterpreterCreateQuery::getColumnsDescription( const auto tmp_column_name = final_column_name + "_tmp"; const auto * data_type_ptr = column_names_and_types.back().type.get(); - default_expr_list->children.emplace_back( setAlias(addTypeConversionToAST(std::make_shared(tmp_column_name), data_type_ptr->getName()), final_column_name)); diff --git a/src/Interpreters/InterpreterCreateQuery.h b/src/Interpreters/InterpreterCreateQuery.h index cf27f68ad73..30f18aa4134 100644 --- a/src/Interpreters/InterpreterCreateQuery.h +++ b/src/Interpreters/InterpreterCreateQuery.h @@ -45,7 +45,8 @@ public: internal = internal_; } - /// Obtain information about columns, their types, default values and column comments, for case when columns in CREATE query is specified explicitly. + /// Obtain information about columns, their types, default values and column comments, + /// for case when columns in CREATE query is specified explicitly. static ColumnsDescription getColumnsDescription(const ASTExpressionList & columns, const Context & context, bool sanity_check_compression_codecs); static ConstraintsDescription getConstraintsDescription(const ASTExpressionList * constraints); diff --git a/src/Interpreters/InterpreterGrantQuery.cpp b/src/Interpreters/InterpreterGrantQuery.cpp index 8981c06f962..2f468507eb6 100644 --- a/src/Interpreters/InterpreterGrantQuery.cpp +++ b/src/Interpreters/InterpreterGrantQuery.cpp @@ -9,100 +9,243 @@ #include #include #include +#include namespace DB { namespace { - template - void updateFromQueryImpl(T & grantee, const ASTGrantQuery & query, const std::vector & roles_from_query, const String & current_database) + using Kind = ASTGrantQuery::Kind; + + void doGrantAccess( + AccessRights & current_access, + const AccessRightsElements & access_to_grant, + bool with_grant_option) + { + if (with_grant_option) + current_access.grantWithGrantOption(access_to_grant); + else + current_access.grant(access_to_grant); + } + + + AccessRightsElements getFilteredAccessRightsElementsToRevoke( + const AccessRights & current_access, const AccessRightsElements & access_to_revoke, bool grant_option) + { + AccessRights intersection; + if (grant_option) + intersection.grantWithGrantOption(access_to_revoke); + else + intersection.grant(access_to_revoke); + intersection.makeIntersection(current_access); + + AccessRightsElements res; + for (auto & element : intersection.getElements()) + { + if ((element.kind == Kind::GRANT) && (element.grant_option || !grant_option)) + res.emplace_back(std::move(element)); + } + + return res; + } + + void doRevokeAccess( + AccessRights & current_access, + const AccessRightsElements & access_to_revoke, + bool grant_option, + const std::shared_ptr & context) + { + if (context && !context->hasGrantOption(access_to_revoke)) + context->checkGrantOption(getFilteredAccessRightsElementsToRevoke(current_access, access_to_revoke, grant_option)); + + if (grant_option) + current_access.revokeGrantOption(access_to_revoke); + else + current_access.revoke(access_to_revoke); + } + + + void doGrantRoles(GrantedRoles & granted_roles, + const RolesOrUsersSet & roles_to_grant, + bool with_admin_option) + { + auto ids = roles_to_grant.getMatchingIDs(); + + if (with_admin_option) + granted_roles.grantWithAdminOption(ids); + else + granted_roles.grant(ids); + } + + + std::vector + getFilteredListOfRolesToRevoke(const GrantedRoles & granted_roles, const RolesOrUsersSet & roles_to_revoke, bool admin_option) + { + std::vector ids; + if (roles_to_revoke.all) + { + boost::range::set_difference( + admin_option ? granted_roles.roles_with_admin_option : granted_roles.roles, + roles_to_revoke.except_ids, + std::back_inserter(ids)); + } + else + { + boost::range::set_intersection( + admin_option ? granted_roles.roles_with_admin_option : granted_roles.roles, + roles_to_revoke.getMatchingIDs(), + std::back_inserter(ids)); + } + return ids; + } + + void doRevokeRoles(GrantedRoles & granted_roles, + RolesOrUsersSet * default_roles, + const RolesOrUsersSet & roles_to_revoke, + bool admin_option, + const std::unordered_map & names_of_roles, + const std::shared_ptr & context) + { + auto ids = getFilteredListOfRolesToRevoke(granted_roles, roles_to_revoke, admin_option); + + if (context) + context->checkAdminOption(ids, names_of_roles); + + if (admin_option) + granted_roles.revokeAdminOption(ids); + else + { + granted_roles.revoke(ids); + if (default_roles) + { + for (const UUID & id : ids) + default_roles->ids.erase(id); + for (const UUID & id : ids) + default_roles->except_ids.erase(id); + } + } + } + + + template + void collectRoleNamesTemplate( + std::unordered_map & names_of_roles, + const T & grantee, + const ASTGrantQuery & query, + const RolesOrUsersSet & roles_from_query, + const AccessControlManager & access_control) + { + for (const auto & id : getFilteredListOfRolesToRevoke(grantee.granted_roles, roles_from_query, query.admin_option)) + { + auto name = access_control.tryReadName(id); + if (name) + names_of_roles.emplace(id, std::move(*name)); + } + } + + void collectRoleNames( + std::unordered_map & names_of_roles, + const IAccessEntity & grantee, + const ASTGrantQuery & query, + const RolesOrUsersSet & roles_from_query, + const AccessControlManager & access_control) + { + if (const auto * user = typeid_cast(&grantee)) + collectRoleNamesTemplate(names_of_roles, *user, query, roles_from_query, access_control); + else if (const auto * role = typeid_cast(&grantee)) + collectRoleNamesTemplate(names_of_roles, *role, query, roles_from_query, access_control); + } + + + template + void updateFromQueryTemplate( + T & grantee, + const ASTGrantQuery & query, + const RolesOrUsersSet & roles_from_query, + const std::unordered_map & names_of_roles, + const std::shared_ptr & context) { - using Kind = ASTGrantQuery::Kind; if (!query.access_rights_elements.empty()) { if (query.kind == Kind::GRANT) - { - if (query.grant_option) - grantee.access.grantWithGrantOption(query.access_rights_elements, current_database); - else - grantee.access.grant(query.access_rights_elements, current_database); - } + doGrantAccess(grantee.access, query.access_rights_elements, query.grant_option); else - { - if (query.grant_option) - grantee.access.revokeGrantOption(query.access_rights_elements, current_database); - else - grantee.access.revoke(query.access_rights_elements, current_database); - } + doRevokeAccess(grantee.access, query.access_rights_elements, query.grant_option, context); } if (!roles_from_query.empty()) { if (query.kind == Kind::GRANT) - { - if (query.admin_option) - grantee.granted_roles.grantWithAdminOption(roles_from_query); - else - grantee.granted_roles.grant(roles_from_query); - } + doGrantRoles(grantee.granted_roles, roles_from_query, query.admin_option); else { - if (query.admin_option) - grantee.granted_roles.revokeAdminOption(roles_from_query); - else - grantee.granted_roles.revoke(roles_from_query); - + RolesOrUsersSet * grantee_default_roles = nullptr; if constexpr (std::is_same_v) - { - for (const UUID & role_from_query : roles_from_query) - grantee.default_roles.ids.erase(role_from_query); - } + grantee_default_roles = &grantee.default_roles; + doRevokeRoles(grantee.granted_roles, grantee_default_roles, roles_from_query, query.admin_option, names_of_roles, context); } } } + + void updateFromQueryImpl( + IAccessEntity & grantee, + const ASTGrantQuery & query, + const RolesOrUsersSet & roles_from_query, + const std::unordered_map & names_or_roles, + const std::shared_ptr & context) + { + if (auto * user = typeid_cast(&grantee)) + updateFromQueryTemplate(*user, query, roles_from_query, names_or_roles, context); + else if (auto * role = typeid_cast(&grantee)) + updateFromQueryTemplate(*role, query, roles_from_query, names_or_roles, context); + } } BlockIO InterpreterGrantQuery::execute() { auto & query = query_ptr->as(); - auto & access_control = context.getAccessControlManager(); - auto access = context.getAccess(); - access->checkGrantOption(query.access_rights_elements); - - std::vector roles_from_query; - if (query.roles) - { - roles_from_query = RolesOrUsersSet{*query.roles, access_control}.getMatchingIDs(access_control); - for (const UUID & role_from_query : roles_from_query) - access->checkAdminOption(role_from_query); - } + query.replaceCurrentUserTagWithName(context.getUserName()); if (!query.cluster.empty()) - { - query.replaceCurrentUserTagWithName(context.getUserName()); - return executeDDLQueryOnCluster(query_ptr, context); - } + return executeDDLQueryOnCluster(query_ptr, context, query.access_rights_elements, true); + + auto access = context.getAccess(); + auto & access_control = context.getAccessControlManager(); + query.replaceEmptyDatabaseWithCurrent(context.getCurrentDatabase()); + + RolesOrUsersSet roles_from_query; + if (query.roles) + roles_from_query = RolesOrUsersSet{*query.roles, access_control}; std::vector to_roles = RolesOrUsersSet{*query.to_roles, access_control, context.getUserID()}.getMatchingIDs(access_control); - String current_database = context.getCurrentDatabase(); + + std::unordered_map names_of_roles; + if (!roles_from_query.empty() && (query.kind == Kind::REVOKE)) + { + for (const auto & id : to_roles) + { + auto entity = access_control.tryRead(id); + if (entity) + collectRoleNames(names_of_roles, *entity, query, roles_from_query, access_control); + } + } + + if (query.kind == Kind::GRANT) /// For Kind::REVOKE the grant/admin option is checked inside updateFromQueryImpl(). + { + if (!query.access_rights_elements.empty()) + access->checkGrantOption(query.access_rights_elements); + + if (!roles_from_query.empty()) + access->checkAdminOption(roles_from_query.getMatchingIDs()); + } auto update_func = [&](const AccessEntityPtr & entity) -> AccessEntityPtr { auto clone = entity->clone(); - if (auto user = typeid_cast>(clone)) - { - updateFromQueryImpl(*user, query, roles_from_query, current_database); - return user; - } - else if (auto role = typeid_cast>(clone)) - { - updateFromQueryImpl(*role, query, roles_from_query, current_database); - return role; - } - else - return entity; + updateFromQueryImpl(*clone, query, roles_from_query, names_of_roles, access); + return clone; }; access_control.update(to_roles, update_func); @@ -113,19 +256,19 @@ BlockIO InterpreterGrantQuery::execute() void InterpreterGrantQuery::updateUserFromQuery(User & user, const ASTGrantQuery & query) { - std::vector roles_from_query; + RolesOrUsersSet roles_from_query; if (query.roles) - roles_from_query = RolesOrUsersSet{*query.roles}.getMatchingIDs(); - updateFromQueryImpl(user, query, roles_from_query, {}); + roles_from_query = RolesOrUsersSet{*query.roles}; + updateFromQueryImpl(user, query, roles_from_query, {}, nullptr); } void InterpreterGrantQuery::updateRoleFromQuery(Role & role, const ASTGrantQuery & query) { - std::vector roles_from_query; + RolesOrUsersSet roles_from_query; if (query.roles) - roles_from_query = RolesOrUsersSet{*query.roles}.getMatchingIDs(); - updateFromQueryImpl(role, query, roles_from_query, {}); + roles_from_query = RolesOrUsersSet{*query.roles}; + updateFromQueryImpl(role, query, roles_from_query, {}, nullptr); } } diff --git a/src/Interpreters/InterpreterKillQueryQuery.cpp b/src/Interpreters/InterpreterKillQueryQuery.cpp index 82c134aeba6..80710600db6 100644 --- a/src/Interpreters/InterpreterKillQueryQuery.cpp +++ b/src/Interpreters/InterpreterKillQueryQuery.cpp @@ -84,19 +84,20 @@ static QueryDescriptors extractQueriesExceptMeAndCheckAccess(const Block & proce const ColumnString & user_col = typeid_cast(*processes_block.getByName("user").column); const ClientInfo & my_client = context.getProcessListElement()->getClientInfo(); - std::optional can_kill_query_started_by_another_user_cached; - auto can_kill_query_started_by_another_user = [&]() -> bool + bool access_denied = false; + std::optional is_kill_query_granted_value; + auto is_kill_query_granted = [&]() -> bool { - if (!can_kill_query_started_by_another_user_cached) + if (!is_kill_query_granted_value) { - can_kill_query_started_by_another_user_cached - = context.getAccess()->isGranted(&Poco::Logger::get("InterpreterKillQueryQuery"), AccessType::KILL_QUERY); + is_kill_query_granted_value = context.getAccess()->isGranted(AccessType::KILL_QUERY); + if (!*is_kill_query_granted_value) + access_denied = true; } - return *can_kill_query_started_by_another_user_cached; + return *is_kill_query_granted_value; }; String query_user; - bool access_denied = false; for (size_t i = 0; i < num_processes; ++i) { @@ -107,11 +108,8 @@ static QueryDescriptors extractQueriesExceptMeAndCheckAccess(const Block & proce auto query_id = query_id_col.getDataAt(i).toString(); query_user = user_col.getDataAt(i).toString(); - if ((my_client.current_user != query_user) && !can_kill_query_started_by_another_user()) - { - access_denied = true; + if ((my_client.current_user != query_user) && !is_kill_query_granted()) continue; - } res.emplace_back(std::move(query_id), query_user, i, false); } @@ -269,7 +267,7 @@ BlockIO InterpreterKillQueryQuery::execute() ParserAlterCommand parser; auto command_ast = parseQuery(parser, command_col.getDataAt(i).toString(), 0, context.getSettingsRef().max_parser_depth); required_access_rights = InterpreterAlterQuery::getRequiredAccessForCommand(command_ast->as(), table_id.database_name, table_id.table_name); - if (!access->isGranted(&Poco::Logger::get("InterpreterKillQueryQuery"), required_access_rights)) + if (!access->isGranted(required_access_rights)) { access_denied = true; continue; diff --git a/src/Interpreters/InterpreterShowGrantsQuery.cpp b/src/Interpreters/InterpreterShowGrantsQuery.cpp index ebb0d871c8b..45e065dcfd9 100644 --- a/src/Interpreters/InterpreterShowGrantsQuery.cpp +++ b/src/Interpreters/InterpreterShowGrantsQuery.cpp @@ -35,44 +35,33 @@ namespace std::shared_ptr to_roles = std::make_shared(); to_roles->names.push_back(grantee.getName()); - auto grants_and_partial_revokes = grantee.access.getGrantsAndPartialRevokes(); + std::shared_ptr current_query = nullptr; - for (bool grant_option : {false, true}) + auto elements = grantee.access.getElements(); + for (const auto & element : elements) { - using Kind = ASTGrantQuery::Kind; - for (Kind kind : {Kind::GRANT, Kind::REVOKE}) + if (current_query) { - AccessRightsElements * elements = nullptr; - if (grant_option) - elements = (kind == Kind::GRANT) ? &grants_and_partial_revokes.grants_with_grant_option : &grants_and_partial_revokes.revokes_grant_option; - else - elements = (kind == Kind::GRANT) ? &grants_and_partial_revokes.grants : &grants_and_partial_revokes.revokes; - elements->normalize(); - - std::shared_ptr grant_query = nullptr; - for (size_t i = 0; i != elements->size(); ++i) - { - const auto & element = (*elements)[i]; - bool prev_element_on_same_db_and_table = false; - if (grant_query) - { - const auto & prev_element = grant_query->access_rights_elements.back(); - if ((element.database == prev_element.database) && (element.any_database == prev_element.any_database) - && (element.table == prev_element.table) && (element.any_table == prev_element.any_table)) - prev_element_on_same_db_and_table = true; - } - if (!prev_element_on_same_db_and_table) - { - grant_query = std::make_shared(); - grant_query->kind = kind; - grant_query->attach = attach_mode; - grant_query->grant_option = grant_option; - grant_query->to_roles = to_roles; - res.push_back(grant_query); - } - grant_query->access_rights_elements.emplace_back(std::move(element)); - } + const auto & prev_element = current_query->access_rights_elements.back(); + bool continue_using_current_query = (element.database == prev_element.database) + && (element.any_database == prev_element.any_database) && (element.table == prev_element.table) + && (element.any_table == prev_element.any_table) && (element.grant_option == current_query->grant_option) + && (element.kind == current_query->kind); + if (!continue_using_current_query) + current_query = nullptr; } + + if (!current_query) + { + current_query = std::make_shared(); + current_query->kind = element.kind; + current_query->attach = attach_mode; + current_query->grant_option = element.grant_option; + current_query->to_roles = to_roles; + res.push_back(current_query); + } + + current_query->access_rights_elements.emplace_back(std::move(element)); } auto grants_roles = grantee.granted_roles.getGrants(); diff --git a/src/Interpreters/InterpreterSystemQuery.cpp b/src/Interpreters/InterpreterSystemQuery.cpp index 9b1712ac407..7c80b681114 100644 --- a/src/Interpreters/InterpreterSystemQuery.cpp +++ b/src/Interpreters/InterpreterSystemQuery.cpp @@ -152,8 +152,16 @@ void InterpreterSystemQuery::startStopAction(StorageActionBlockType action_type, if (!table) continue; - if (!access->isGranted(log, getRequiredAccessType(action_type), elem.first, iterator->name())) + if (!access->isGranted(getRequiredAccessType(action_type), elem.first, iterator->name())) + { + LOG_INFO( + log, + "Access {} denied, skipping {}.{}", + toString(getRequiredAccessType(action_type)), + elem.first, + iterator->name()); continue; + } if (start) manager->remove(table, action_type); diff --git a/src/Interpreters/JoinToSubqueryTransformVisitor.cpp b/src/Interpreters/JoinToSubqueryTransformVisitor.cpp index a34276b5519..5f38f410e04 100644 --- a/src/Interpreters/JoinToSubqueryTransformVisitor.cpp +++ b/src/Interpreters/JoinToSubqueryTransformVisitor.cpp @@ -257,7 +257,7 @@ struct ColumnAliasesMatcher if (!last_table) { IdentifierSemantic::coverName(node, alias); - node.setAlias(""); + node.setAlias({}); } } else if (node.compound()) diff --git a/src/Interpreters/MergeJoin.cpp b/src/Interpreters/MergeJoin.cpp index bb054169e71..1478c36dd23 100644 --- a/src/Interpreters/MergeJoin.cpp +++ b/src/Interpreters/MergeJoin.cpp @@ -396,7 +396,6 @@ MergeJoin::MergeJoin(std::shared_ptr table_join_, const Block & right if (required_right_keys.count(column.name)) right_columns_to_add.insert(ColumnWithTypeAndName{nullptr, column.type, column.name}); - JoinCommon::removeLowCardinalityInplace(right_columns_to_add); JoinCommon::createMissedColumns(right_columns_to_add); if (nullable_right_side) @@ -513,7 +512,7 @@ bool MergeJoin::saveRightBlock(Block && block) bool MergeJoin::addJoinedBlock(const Block & src_block, bool) { Block block = materializeBlock(src_block); - JoinCommon::removeLowCardinalityInplace(block); + JoinCommon::removeLowCardinalityInplace(block, table_join->keyNamesRight()); sortBlock(block, right_sort_description); return saveRightBlock(std::move(block)); @@ -525,7 +524,7 @@ void MergeJoin::joinBlock(Block & block, ExtraBlockPtr & not_processed) { JoinCommon::checkTypesOfKeys(block, table_join->keyNamesLeft(), right_table_keys, table_join->keyNamesRight()); materializeBlockInplace(block); - JoinCommon::removeLowCardinalityInplace(block); + JoinCommon::removeLowCardinalityInplace(block, table_join->keyNamesLeft()); sortBlock(block, left_sort_description); } diff --git a/src/Interpreters/MutationsInterpreter.cpp b/src/Interpreters/MutationsInterpreter.cpp index 51b0cf92484..6ea656f0056 100644 --- a/src/Interpreters/MutationsInterpreter.cpp +++ b/src/Interpreters/MutationsInterpreter.cpp @@ -36,8 +36,9 @@ namespace ErrorCodes namespace { + /// Helps to detect situations, where non-deterministic functions may be used in mutations of Replicated*MergeTree. -class FirstNonDeterministicFuncMatcher +class FirstNonDeterministicFunctionMatcher { public: struct Data @@ -70,18 +71,18 @@ public: } }; -using FirstNonDeterministicFuncFinder = InDepthNodeVisitor; +using FirstNonDeterministicFunctionFinder = InDepthNodeVisitor; -std::optional findFirstNonDeterministicFuncName(const MutationCommand & command, const Context & context) +std::optional findFirstNonDeterministicFunctionName(const MutationCommand & command, const Context & context) { - FirstNonDeterministicFuncMatcher::Data finder_data{context, std::nullopt}; + FirstNonDeterministicFunctionMatcher::Data finder_data{context, std::nullopt}; switch (command.type) { case MutationCommand::UPDATE: { auto update_assignments_ast = command.ast->as().update_assignments->clone(); - FirstNonDeterministicFuncFinder(finder_data).visit(update_assignments_ast); + FirstNonDeterministicFunctionFinder(finder_data).visit(update_assignments_ast); if (finder_data.nondeterministic_function_name) return finder_data.nondeterministic_function_name; @@ -92,7 +93,7 @@ std::optional findFirstNonDeterministicFuncName(const MutationCommand & case MutationCommand::DELETE: { auto predicate_ast = command.predicate->clone(); - FirstNonDeterministicFuncFinder(finder_data).visit(predicate_ast); + FirstNonDeterministicFunctionFinder(finder_data).visit(predicate_ast); return finder_data.nondeterministic_function_name; } @@ -343,7 +344,7 @@ ASTPtr MutationsInterpreter::prepare(bool dry_run) if (stages.empty() || !stages.back().column_to_updated.empty()) stages.emplace_back(context); - auto negated_predicate = makeASTFunction("not", command.predicate->clone()); + auto negated_predicate = makeASTFunction("isZeroOrNull", command.predicate->clone()); stages.back().filters.push_back(negated_predicate); } else if (command.type == MutationCommand::UPDATE) @@ -506,7 +507,9 @@ ASTPtr MutationsInterpreter::prepare(bool dry_run) } const ASTPtr select_query = prepareInterpreterSelectQuery(stages_copy, /* dry_run = */ true); - InterpreterSelectQuery interpreter{select_query, context, storage, metadata_snapshot, SelectQueryOptions().analyze(/* dry_run = */ false).ignoreLimits()}; + InterpreterSelectQuery interpreter{ + select_query, context, storage, metadata_snapshot, + SelectQueryOptions().analyze(/* dry_run = */ false).ignoreLimits()}; auto first_stage_header = interpreter.getSampleBlock(); auto in = std::make_shared(first_stage_header); @@ -530,7 +533,6 @@ ASTPtr MutationsInterpreter::prepareInterpreterSelectQuery(std::vector & { NamesAndTypesList all_columns = metadata_snapshot->getColumns().getAllPhysical(); - /// Next, for each stage calculate columns changed by this and previous stages. for (size_t i = 0; i < prepared_stages.size(); ++i) { @@ -681,7 +683,7 @@ void MutationsInterpreter::validate() { for (const auto & command : commands) { - const auto nondeterministic_func_name = findFirstNonDeterministicFuncName(command, context); + const auto nondeterministic_func_name = findFirstNonDeterministicFunctionName(command, context); if (nondeterministic_func_name) throw Exception( "ALTER UPDATE/ALTER DELETE statements must use only deterministic functions! " diff --git a/src/Interpreters/PredicateRewriteVisitor.cpp b/src/Interpreters/PredicateRewriteVisitor.cpp index 7fc45044a88..2a4bd4c1fd2 100644 --- a/src/Interpreters/PredicateRewriteVisitor.cpp +++ b/src/Interpreters/PredicateRewriteVisitor.cpp @@ -76,7 +76,7 @@ static void cleanAliasAndCollectIdentifiers(ASTPtr & predicate, std::vectortryGetAlias(); !alias.empty()) - predicate->setAlias(""); + predicate->setAlias({}); if (ASTIdentifier * identifier = predicate->as()) identifiers.emplace_back(identifier); diff --git a/src/Interpreters/Set.cpp b/src/Interpreters/Set.cpp index 6f3031d5e7d..f331f3cecb3 100644 --- a/src/Interpreters/Set.cpp +++ b/src/Interpreters/Set.cpp @@ -441,9 +441,14 @@ void Set::checkColumnsNumber(size_t num_key_columns) const } } +bool Set::areTypesEqual(size_t set_type_idx, const DataTypePtr & other_type) const +{ + return removeNullable(recursiveRemoveLowCardinality(data_types[set_type_idx]))->equals(*removeNullable(recursiveRemoveLowCardinality(other_type))); +} + void Set::checkTypesEqual(size_t set_type_idx, const DataTypePtr & other_type) const { - if (!removeNullable(recursiveRemoveLowCardinality(data_types[set_type_idx]))->equals(*removeNullable(recursiveRemoveLowCardinality(other_type)))) + if (!this->areTypesEqual(set_type_idx, other_type)) throw Exception("Types of column " + toString(set_type_idx + 1) + " in section IN don't match: " + other_type->getName() + " on the left, " + data_types[set_type_idx]->getName() + " on the right", ErrorCodes::TYPE_MISMATCH); diff --git a/src/Interpreters/Set.h b/src/Interpreters/Set.h index a4c8fd59245..933bace5e45 100644 --- a/src/Interpreters/Set.h +++ b/src/Interpreters/Set.h @@ -74,6 +74,7 @@ public: Columns getSetElements() const { return { set_elements.begin(), set_elements.end() }; } void checkColumnsNumber(size_t num_key_columns) const; + bool areTypesEqual(size_t set_type_idx, const DataTypePtr & other_type) const; void checkTypesEqual(size_t set_type_idx, const DataTypePtr & other_type) const; private: diff --git a/src/Interpreters/SystemLog.h b/src/Interpreters/SystemLog.h index 48cff8bf061..7d533a3bab7 100644 --- a/src/Interpreters/SystemLog.h +++ b/src/Interpreters/SystemLog.h @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -62,7 +63,6 @@ namespace ErrorCodes #define DBMS_SYSTEM_LOG_QUEUE_SIZE 1048576 -class Context; class QueryLog; class QueryThreadLog; class PartLog; @@ -425,7 +425,11 @@ void SystemLog::flushImpl(const std::vector & to_flush, insert->table_id = table_id; ASTPtr query_ptr(insert.release()); - InterpreterInsertQuery interpreter(query_ptr, context); + // we need query context to do inserts to target table with MV containing subqueries or joins + auto insert_context = Context(context); + insert_context.makeQueryContext(); + + InterpreterInsertQuery interpreter(query_ptr, insert_context); BlockIO io = interpreter.execute(); io.out->writePrefix(); diff --git a/src/Interpreters/ThreadStatusExt.cpp b/src/Interpreters/ThreadStatusExt.cpp index 04265734ce7..e3e695f80f9 100644 --- a/src/Interpreters/ThreadStatusExt.cpp +++ b/src/Interpreters/ThreadStatusExt.cpp @@ -191,6 +191,10 @@ void ThreadStatus::finalizePerformanceCounters() performance_counters_finalized = true; updatePerformanceCounters(); + // We want to close perf file descriptors if the perf events were enabled for + // one query. What this code does in practice is less clear -- e.g., if I run + // 'select 1 settings metrics_perf_events_enabled = 1', I still get + // query_context->getSettingsRef().metrics_perf_events_enabled == 0 *shrug*. bool close_perf_descriptors = true; if (query_context) close_perf_descriptors = !query_context->getSettingsRef().metrics_perf_events_enabled; diff --git a/src/Interpreters/createBlockSelector.cpp b/src/Interpreters/createBlockSelector.cpp index 0759b9d9601..c3d4bcdda48 100644 --- a/src/Interpreters/createBlockSelector.cpp +++ b/src/Interpreters/createBlockSelector.cpp @@ -21,6 +21,8 @@ IColumn::Selector createBlockSelector( const std::vector & slots) { const auto total_weight = slots.size(); + assert(total_weight != 0); + size_t num_rows = column.size(); IColumn::Selector selector(num_rows); diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index 41fa60324ec..860c56b1052 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -479,7 +479,8 @@ static std::tuple executeQueryImpl( } /// Also make possible for caller to log successful query finish and exception during execution. - auto finish_callback = [elem, &context, log_queries, log_queries_min_type = settings.log_queries_min_type] (IBlockInputStream * stream_in, IBlockOutputStream * stream_out) mutable + auto finish_callback = [elem, &context, log_queries, log_queries_min_type = settings.log_queries_min_type] + (IBlockInputStream * stream_in, IBlockOutputStream * stream_out, QueryPipeline * query_pipeline) mutable { QueryStatus * process_list_elem = context.getProcessListElement(); @@ -528,6 +529,14 @@ static std::tuple executeQueryImpl( elem.result_bytes = counting_stream->getProgress().read_bytes; } } + else if (query_pipeline) + { + if (const auto * output_format = query_pipeline->getOutputFormat()) + { + elem.result_rows = output_format->getResultRows(); + elem.result_bytes = output_format->getResultBytes(); + } + } if (elem.read_rows != 0) { diff --git a/src/Interpreters/join_common.cpp b/src/Interpreters/join_common.cpp index 6dd3a202d4d..a17d3b43e69 100644 --- a/src/Interpreters/join_common.cpp +++ b/src/Interpreters/join_common.cpp @@ -104,6 +104,16 @@ void removeLowCardinalityInplace(Block & block) } } +void removeLowCardinalityInplace(Block & block, const Names & names) +{ + for (const String & column_name : names) + { + auto & col = block.getByName(column_name); + col.column = recursiveRemoveLowCardinality(col.column); + col.type = recursiveRemoveLowCardinality(col.type); + } +} + void splitAdditionalColumns(const Block & sample_block, const Names & key_names, Block & block_keys, Block & block_others) { block_others = materializeBlock(sample_block); diff --git a/src/Interpreters/join_common.h b/src/Interpreters/join_common.h index 47fa082e700..81eb0dfa688 100644 --- a/src/Interpreters/join_common.h +++ b/src/Interpreters/join_common.h @@ -20,6 +20,7 @@ Columns materializeColumns(const Block & block, const Names & names); ColumnRawPtrs materializeColumnsInplace(Block & block, const Names & names); ColumnRawPtrs getRawPointers(const Columns & columns); void removeLowCardinalityInplace(Block & block); +void removeLowCardinalityInplace(Block & block, const Names & names); /// Split key and other columns by keys name list void splitAdditionalColumns(const Block & sample_block, const Names & key_names, Block & block_keys, Block & block_others); diff --git a/src/Interpreters/misc.h b/src/Interpreters/misc.h index bc994194336..30379567366 100644 --- a/src/Interpreters/misc.h +++ b/src/Interpreters/misc.h @@ -17,7 +17,7 @@ inline bool functionIsInOrGlobalInOperator(const std::string & name) inline bool functionIsLikeOperator(const std::string & name) { - return name == "like" || name == "notLike"; + return name == "like" || name == "ilike" || name == "notLike" || name == "notILike"; } inline bool functionIsJoinGet(const std::string & name) diff --git a/src/Interpreters/sortBlock.cpp b/src/Interpreters/sortBlock.cpp index 4b8d4f2b859..cb3c36e5356 100644 --- a/src/Interpreters/sortBlock.cpp +++ b/src/Interpreters/sortBlock.cpp @@ -187,6 +187,9 @@ void sortBlock(Block & block, const SortDescription & description, UInt64 limit) if (ranges.empty()) break; + if (column.column_const) + continue; + if (isCollationRequired(column.description)) { const ColumnString & column_string = assert_cast(*column.column); diff --git a/src/Interpreters/tests/users.cpp b/src/Interpreters/tests/users.cpp index 5c7d66ed7ed..acd0cfd0519 100644 --- a/src/Interpreters/tests/users.cpp +++ b/src/Interpreters/tests/users.cpp @@ -218,7 +218,7 @@ void runOneTest(const TestDescriptor & test_descriptor) try { - res = acl_manager.read(entry.user_name)->access.access.isGranted(DB::AccessType::ALL, entry.database_name); + res = acl_manager.read(entry.user_name)->access.isGranted(DB::AccessType::ALL, entry.database_name); } catch (const Poco::Exception &) { diff --git a/src/Parsers/ASTAlterQuery.cpp b/src/Parsers/ASTAlterQuery.cpp index 1309037ec01..8b23302a05c 100644 --- a/src/Parsers/ASTAlterQuery.cpp +++ b/src/Parsers/ASTAlterQuery.cpp @@ -75,8 +75,9 @@ void ASTAlterCommand::formatImpl( settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "ADD COLUMN " << (if_not_exists ? "IF NOT EXISTS " : "") << (settings.hilite ? hilite_none : ""); col_decl->formatImpl(settings, state, frame); - /// AFTER - if (column) + if (first) + settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << " FIRST " << (settings.hilite ? hilite_none : ""); + else if (column) /// AFTER { settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << " AFTER " << (settings.hilite ? hilite_none : ""); column->formatImpl(settings, state, frame); @@ -97,6 +98,14 @@ void ASTAlterCommand::formatImpl( { settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "MODIFY COLUMN " << (if_exists ? "IF EXISTS " : "") << (settings.hilite ? hilite_none : ""); col_decl->formatImpl(settings, state, frame); + + if (first) + settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << " FIRST " << (settings.hilite ? hilite_none : ""); + else if (column) /// AFTER + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << " AFTER " << (settings.hilite ? hilite_none : ""); + column->formatImpl(settings, state, frame); + } } else if (type == ASTAlterCommand::COMMENT_COLUMN) { diff --git a/src/Parsers/ASTAlterQuery.h b/src/Parsers/ASTAlterQuery.h index a9ae06863a9..f2dd997bd1f 100644 --- a/src/Parsers/ASTAlterQuery.h +++ b/src/Parsers/ASTAlterQuery.h @@ -68,7 +68,7 @@ public: */ ASTPtr col_decl; - /** The ADD COLUMN query here optionally stores the name of the column following AFTER + /** The ADD COLUMN and MODIFY COLUMN query here optionally stores the name of the column following AFTER * The DROP query stores the column name for deletion here * Also used for RENAME COLUMN. */ @@ -136,6 +136,8 @@ public: bool if_exists = false; /// option for DROP_COLUMN, MODIFY_COLUMN, COMMENT_COLUMN + bool first = false; /// option for ADD_COLUMN, MODIFY_COLUMN + DataDestinationType move_destination_type; /// option for MOVE PART/PARTITION String move_destination_name; /// option for MOVE PART/PARTITION diff --git a/src/Parsers/ASTFunction.cpp b/src/Parsers/ASTFunction.cpp index 988ac66916d..ec46eb4ac37 100644 --- a/src/Parsers/ASTFunction.cpp +++ b/src/Parsers/ASTFunction.cpp @@ -54,7 +54,7 @@ ASTPtr ASTFunction::clone() const } -/** A special hack. If it's LIKE or NOT LIKE expression and the right hand side is a string literal, +/** A special hack. If it's [I]LIKE or NOT [I]LIKE expression and the right hand side is a string literal, * we will highlight unescaped metacharacters % and _ in string literal for convenience. * Motivation: most people are unaware that _ is a metacharacter and forgot to properly escape it with two backslashes. * With highlighting we make it clearly obvious. @@ -168,7 +168,9 @@ void ASTFunction::formatImplWithoutAlias(const FormatSettings & settings, Format "greater", " > ", "equals", " = ", "like", " LIKE ", + "ilike", " ILIKE ", "notLike", " NOT LIKE ", + "notILike", " NOT ILIKE ", "in", " IN ", "notIn", " NOT IN ", "globalIn", " GLOBAL IN ", @@ -186,7 +188,7 @@ void ASTFunction::formatImplWithoutAlias(const FormatSettings & settings, Format settings.ostr << (settings.hilite ? hilite_operator : "") << func[1] << (settings.hilite ? hilite_none : ""); bool special_hilite = settings.hilite - && (name == "like" || name == "notLike") + && (name == "like" || name == "notLike" || name == "ilike" || name == "notILike") && highlightStringLiteralWithMetacharacters(arguments->children[1], settings, "%_"); /// Format x IN 1 as x IN (1): put parens around rhs even if there is a single element in set. diff --git a/src/Parsers/ASTFunction.h b/src/Parsers/ASTFunction.h index effc9a6cea9..f44eba30ee3 100644 --- a/src/Parsers/ASTFunction.h +++ b/src/Parsers/ASTFunction.h @@ -32,7 +32,7 @@ protected: template std::shared_ptr makeASTFunction(const String & name, Args &&... args) { - const auto function = std::make_shared(); + auto function = std::make_shared(); function->name = name; function->arguments = std::make_shared(); diff --git a/src/Parsers/ASTGrantQuery.cpp b/src/Parsers/ASTGrantQuery.cpp index cf1943477b2..ae9649cdddc 100644 --- a/src/Parsers/ASTGrantQuery.cpp +++ b/src/Parsers/ASTGrantQuery.cpp @@ -133,6 +133,12 @@ void ASTGrantQuery::formatImpl(const FormatSettings & settings, FormatState &, F } +void ASTGrantQuery::replaceEmptyDatabaseWithCurrent(const String & current_database) +{ + access_rights_elements.replaceEmptyDatabase(current_database); +} + + void ASTGrantQuery::replaceCurrentUserTagWithName(const String & current_user_name) const { if (to_roles) diff --git a/src/Parsers/ASTGrantQuery.h b/src/Parsers/ASTGrantQuery.h index 9a11f5dc509..c36e42689a5 100644 --- a/src/Parsers/ASTGrantQuery.h +++ b/src/Parsers/ASTGrantQuery.h @@ -19,11 +19,7 @@ class ASTRolesOrUsersSet; class ASTGrantQuery : public IAST, public ASTQueryWithOnCluster { public: - enum class Kind - { - GRANT, - REVOKE, - }; + using Kind = AccessRightsElementWithOptions::Kind; Kind kind = Kind::GRANT; bool attach = false; AccessRightsElements access_rights_elements; @@ -35,6 +31,7 @@ public: String getID(char) const override; ASTPtr clone() const override; void formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override; + void replaceEmptyDatabaseWithCurrent(const String & current_database); void replaceCurrentUserTagWithName(const String & current_user_name) const; ASTPtr getRewrittenASTWithoutOnCluster(const std::string &) const override { return removeOnCluster(clone()); } }; diff --git a/src/Parsers/ASTShowTablesQuery.cpp b/src/Parsers/ASTShowTablesQuery.cpp index 25a638c77d4..5a284109cf2 100644 --- a/src/Parsers/ASTShowTablesQuery.cpp +++ b/src/Parsers/ASTShowTablesQuery.cpp @@ -22,8 +22,13 @@ void ASTShowTablesQuery::formatQueryImpl(const FormatSettings & settings, Format else if (clusters) { settings.ostr << (settings.hilite ? hilite_keyword : "") << "SHOW CLUSTERS" << (settings.hilite ? hilite_none : ""); + if (!like.empty()) - settings.ostr << (settings.hilite ? hilite_keyword : "") << (not_like ? " NOT" : "") << " LIKE " << (settings.hilite ? hilite_none : "") + settings.ostr + << (settings.hilite ? hilite_keyword : "") + << (not_like ? " NOT" : "") + << (case_insensitive_like ? " ILIKE " : " LIKE ") + << (settings.hilite ? hilite_none : "") << std::quoted(like, '\''); if (limit_length) @@ -47,8 +52,13 @@ void ASTShowTablesQuery::formatQueryImpl(const FormatSettings & settings, Format << backQuoteIfNeed(from); if (!like.empty()) - settings.ostr << (settings.hilite ? hilite_keyword : "") << (not_like ? " NOT" : "") << " LIKE " << (settings.hilite ? hilite_none : "") + settings.ostr + << (settings.hilite ? hilite_keyword : "") + << (not_like ? " NOT" : "") + << (case_insensitive_like ? " ILIKE " : " LIKE ") + << (settings.hilite ? hilite_none : "") << std::quoted(like, '\''); + else if (where_expression) { settings.ostr << (settings.hilite ? hilite_keyword : "") << " WHERE " << (settings.hilite ? hilite_none : ""); diff --git a/src/Parsers/ASTShowTablesQuery.h b/src/Parsers/ASTShowTablesQuery.h index f14d6e7bd33..acf365be91a 100644 --- a/src/Parsers/ASTShowTablesQuery.h +++ b/src/Parsers/ASTShowTablesQuery.h @@ -19,10 +19,14 @@ public: bool cluster{false}; bool dictionaries{false}; bool temporary{false}; + String cluster_str; String from; String like; + bool not_like{false}; + bool case_insensitive_like{false}; + ASTPtr where_expression; ASTPtr limit_length; diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp index 1b8dbccdcc1..563dd376ac3 100644 --- a/src/Parsers/ExpressionElementParsers.cpp +++ b/src/Parsers/ExpressionElementParsers.cpp @@ -1115,6 +1115,7 @@ const char * ParserAlias::restricted_keywords[] = "NOT", "BETWEEN", "LIKE", + "ILIKE", nullptr }; diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp index e33e80f1f18..778ebe9f5d3 100644 --- a/src/Parsers/ExpressionListParsers.cpp +++ b/src/Parsers/ExpressionListParsers.cpp @@ -47,7 +47,9 @@ const char * ParserComparisonExpression::operators[] = ">", "greater", "=", "equals", "LIKE", "like", + "ILIKE", "ilike", "NOT LIKE", "notLike", + "NOT ILIKE", "notILike", "IN", "in", "NOT IN", "notIn", "GLOBAL IN", "globalIn", diff --git a/src/Parsers/ParserAlterQuery.cpp b/src/Parsers/ParserAlterQuery.cpp index f90d010e9de..3f22aff9cf5 100644 --- a/src/Parsers/ParserAlterQuery.cpp +++ b/src/Parsers/ParserAlterQuery.cpp @@ -63,6 +63,7 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected ParserKeyword s_freeze("FREEZE"); ParserKeyword s_partition("PARTITION"); + ParserKeyword s_first("FIRST"); ParserKeyword s_after("AFTER"); ParserKeyword s_if_not_exists("IF NOT EXISTS"); ParserKeyword s_if_exists("IF EXISTS"); @@ -115,7 +116,9 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected if (!parser_col_decl.parse(pos, command->col_decl, expected)) return false; - if (s_after.ignore(pos, expected)) + if (s_first.ignore(pos, expected)) + command->first = true; + else if (s_after.ignore(pos, expected)) { if (!parser_name.parse(pos, command->column, expected)) return false; @@ -429,6 +432,14 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected if (!parser_modify_col_decl.parse(pos, command->col_decl, expected)) return false; + if (s_first.ignore(pos, expected)) + command->first = true; + else if (s_after.ignore(pos, expected)) + { + if (!parser_name.parse(pos, command->column, expected)) + return false; + } + command->type = ASTAlterCommand::MODIFY_COLUMN; } else if (s_modify_order_by.ignore(pos, expected)) diff --git a/src/Parsers/ParserGrantQuery.cpp b/src/Parsers/ParserGrantQuery.cpp index 62efd5314ac..6e42b165b21 100644 --- a/src/Parsers/ParserGrantQuery.cpp +++ b/src/Parsers/ParserGrantQuery.cpp @@ -19,6 +19,8 @@ namespace ErrorCodes namespace { + using Kind = ASTGrantQuery::Kind; + bool parseAccessFlags(IParser::Pos & pos, Expected & expected, AccessFlags & access_flags) { static constexpr auto is_one_of_access_type_words = [](IParser::Pos & pos_) @@ -154,13 +156,16 @@ namespace } - bool parseRoles(IParser::Pos & pos, Expected & expected, bool id_mode, std::shared_ptr & roles) + bool parseRoles(IParser::Pos & pos, Expected & expected, Kind kind, bool id_mode, std::shared_ptr & roles) { return IParserBase::wrapParseImpl(pos, [&] { - ASTPtr ast; ParserRolesOrUsersSet roles_p; roles_p.allowRoleNames().useIDMode(id_mode); + if (kind == Kind::REVOKE) + roles_p.allowAll(); + + ASTPtr ast; if (!roles_p.parse(pos, ast, expected)) return false; @@ -174,7 +179,6 @@ namespace { return IParserBase::wrapParseImpl(pos, [&] { - using Kind = ASTGrantQuery::Kind; if (kind == Kind::GRANT) { if (!ParserKeyword{"TO"}.ignore(pos, expected)) @@ -217,7 +221,6 @@ bool ParserGrantQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) attach = true; } - using Kind = ASTGrantQuery::Kind; Kind kind; if (ParserKeyword{"GRANT"}.ignore(pos, expected)) kind = Kind::GRANT; @@ -242,7 +245,7 @@ bool ParserGrantQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) AccessRightsElements elements; std::shared_ptr roles; - if (!parseAccessRightsElements(pos, expected, elements) && !parseRoles(pos, expected, attach, roles)) + if (!parseAccessRightsElements(pos, expected, elements) && !parseRoles(pos, expected, kind, attach, roles)) return false; if (cluster.empty()) diff --git a/src/Parsers/ParserShowTablesQuery.cpp b/src/Parsers/ParserShowTablesQuery.cpp index ee50d23ffc8..66ecdf61c58 100644 --- a/src/Parsers/ParserShowTablesQuery.cpp +++ b/src/Parsers/ParserShowTablesQuery.cpp @@ -28,6 +28,7 @@ bool ParserShowTablesQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec ParserKeyword s_in("IN"); ParserKeyword s_not("NOT"); ParserKeyword s_like("LIKE"); + ParserKeyword s_ilike("ILIKE"); ParserKeyword s_where("WHERE"); ParserKeyword s_limit("LIMIT"); ParserStringLiteral like_p; @@ -53,8 +54,11 @@ bool ParserShowTablesQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec if (s_not.ignore(pos, expected)) query->not_like = true; - if (s_like.ignore(pos, expected)) + if (bool insensitive = s_ilike.ignore(pos, expected); insensitive || s_like.ignore(pos, expected)) { + if (insensitive) + query->case_insensitive_like = true; + if (!like_p.parse(pos, like, expected)) return false; } @@ -98,8 +102,11 @@ bool ParserShowTablesQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec if (s_not.ignore(pos, expected)) query->not_like = true; - if (s_like.ignore(pos, expected)) + if (bool insensitive = s_ilike.ignore(pos, expected); insensitive || s_like.ignore(pos, expected)) { + if (insensitive) + query->case_insensitive_like = true; + if (!like_p.parse(pos, like, expected)) return false; } @@ -119,6 +126,7 @@ bool ParserShowTablesQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec } tryGetIdentifierNameInto(database, query->from); + if (like) query->like = safeGet(like->as().value); diff --git a/src/Parsers/ParserShowTablesQuery.h b/src/Parsers/ParserShowTablesQuery.h index 4fd11d8e2a0..3b8bb033275 100644 --- a/src/Parsers/ParserShowTablesQuery.h +++ b/src/Parsers/ParserShowTablesQuery.h @@ -7,14 +7,14 @@ namespace DB { /** Query like this: - * SHOW TABLES [FROM db] [[NOT] LIKE 'str'] [LIMIT expr] + * SHOW TABLES [FROM db] [[NOT] [I]LIKE 'str'] [LIMIT expr] * or * SHOW DATABASES. */ class ParserShowTablesQuery : public IParserBase { protected: - const char * getName() const override { return "SHOW [TEMPORARY] TABLES|DATABASES|CLUSTERS|CLUSTER 'name' [[NOT] LIKE 'str'] [LIMIT expr]"; } + const char * getName() const override { return "SHOW [TEMPORARY] TABLES|DATABASES|CLUSTERS|CLUSTER 'name' [[NOT] [I]LIKE 'str'] [LIMIT expr]"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; }; diff --git a/src/Parsers/makeASTForLogicalFunction.cpp b/src/Parsers/makeASTForLogicalFunction.cpp index eaae38740aa..02c9da926c9 100644 --- a/src/Parsers/makeASTForLogicalFunction.cpp +++ b/src/Parsers/makeASTForLogicalFunction.cpp @@ -7,21 +7,6 @@ namespace DB { -ASTPtr makeASTForLogicalNot(ASTPtr argument) -{ - bool b; - if (tryGetLiteralBool(argument.get(), b)) - return std::make_shared(Field{UInt8(!b)}); - - auto function = std::make_shared(); - auto exp_list = std::make_shared(); - function->name = "not"; - function->arguments = exp_list; - function->children.push_back(exp_list); - exp_list->children.push_back(argument); - return function; -} - ASTPtr makeASTForLogicalAnd(ASTs && arguments) { @@ -100,4 +85,5 @@ bool tryGetLiteralBool(const IAST * ast, bool & value) return false; } } + } diff --git a/src/Parsers/makeASTForLogicalFunction.h b/src/Parsers/makeASTForLogicalFunction.h index 5c1096cab6e..8c3718bfcde 100644 --- a/src/Parsers/makeASTForLogicalFunction.h +++ b/src/Parsers/makeASTForLogicalFunction.h @@ -5,9 +5,6 @@ namespace DB { -/// Makes an AST calculating NOT argument. -ASTPtr makeASTForLogicalNot(ASTPtr argument); - /// Makes an AST calculating argument1 AND argument2 AND ... AND argumentN. ASTPtr makeASTForLogicalAnd(ASTs && arguments); diff --git a/src/Processors/Formats/IOutputFormat.cpp b/src/Processors/Formats/IOutputFormat.cpp index 334843036dc..f7fc6170cad 100644 --- a/src/Processors/Formats/IOutputFormat.cpp +++ b/src/Processors/Formats/IOutputFormat.cpp @@ -59,6 +59,8 @@ void IOutputFormat::work() switch (current_block_kind) { case Main: + result_rows += current_chunk.getNumRows(); + result_bytes += current_chunk.allocatedBytes(); consume(std::move(current_chunk)); break; case Totals: diff --git a/src/Processors/Formats/IOutputFormat.h b/src/Processors/Formats/IOutputFormat.h index 2e3db50ee6e..ae5e4d72d3c 100644 --- a/src/Processors/Formats/IOutputFormat.h +++ b/src/Processors/Formats/IOutputFormat.h @@ -79,6 +79,14 @@ public: void setTotals(const Block & totals) { consumeTotals(Chunk(totals.getColumns(), totals.rows())); } void setExtremes(const Block & extremes) { consumeExtremes(Chunk(extremes.getColumns(), extremes.rows())); } + + size_t getResultRows() const { return result_rows; } + size_t getResultBytes() const { return result_bytes; } + +private: + /// Counters for consumed chunks. Are used for QueryLog. + size_t result_rows = 0; + size_t result_bytes = 0; }; } diff --git a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp index a8aac2d51ee..a998378125f 100644 --- a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp +++ b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp @@ -14,6 +14,7 @@ #include #include #include +#include namespace DB @@ -263,6 +264,7 @@ namespace DB for (size_t column_i = 0, columns = header.columns(); column_i < columns; ++column_i) { ColumnWithTypeAndName header_column = header.getByPosition(column_i); + const auto column_type = recursiveRemoveLowCardinality(header_column.type); if (name_to_column_ptr.find(header_column.name) == name_to_column_ptr.end()) // TODO: What if some columns were not presented? Insert NULLs? What if a column is not nullable? @@ -273,13 +275,13 @@ namespace DB arrow::Type::type arrow_type = arrow_column->type()->id(); // TODO: check if a column is const? - if (!header_column.type->isNullable() && arrow_column->null_count()) + if (!column_type->isNullable() && arrow_column->null_count()) { throw Exception{"Can not insert NULL data into non-nullable column \"" + header_column.name + "\"", ErrorCodes::CANNOT_INSERT_NULL_IN_ORDINARY_COLUMN}; } - const bool target_column_is_nullable = header_column.type->isNullable() || arrow_column->null_count(); + const bool target_column_is_nullable = column_type->isNullable() || arrow_column->null_count(); DataTypePtr internal_nested_type; @@ -304,15 +306,6 @@ namespace DB const DataTypePtr internal_type = target_column_is_nullable ? makeNullable(internal_nested_type) : internal_nested_type; - const std::string internal_nested_type_name = internal_nested_type->getName(); - - const DataTypePtr column_nested_type = header_column.type->isNullable() - ? static_cast(header_column.type.get())->getNestedType() - : header_column.type; - - const DataTypePtr column_type = header_column.type; - - const std::string column_nested_type_name = column_nested_type->getName(); ColumnWithTypeAndName column; column.name = header_column.name; @@ -373,8 +366,8 @@ namespace DB else column.column = std::move(read_column); - column.column = castColumn(column, column_type); - column.type = column_type; + column.column = castColumn(column, header_column.type); + column.type = header_column.type; num_rows = column.column->size(); columns_list.push_back(std::move(column.column)); } diff --git a/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp b/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp index 29268d3894c..c1fce04f3b2 100644 --- a/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp +++ b/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp @@ -12,6 +12,7 @@ #include #include #include +#include namespace DB @@ -220,7 +221,8 @@ namespace DB { // TODO: constructed every iteration ColumnWithTypeAndName column = header.safeGetByPosition(column_i); - column.column = chunk.getColumns()[column_i]; + column.column = recursiveRemoveLowCardinality(chunk.getColumns()[column_i]); + column.type = recursiveRemoveLowCardinality(column.type); const bool is_column_nullable = column.type->isNullable(); const auto & column_nested_type diff --git a/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp b/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp new file mode 100644 index 00000000000..3bf2a9dbf59 --- /dev/null +++ b/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp @@ -0,0 +1,455 @@ +#include + +#if USE_ORC + +#include +#include + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int ILLEGAL_COLUMN; +} + +ORCOutputStream::ORCOutputStream(WriteBuffer & out_) : out(out_) {} + +uint64_t ORCOutputStream::getLength() const +{ + return out.count(); +} + +uint64_t ORCOutputStream::getNaturalWriteSize() const +{ + out.nextIfAtEnd(); + return out.available(); +} + +void ORCOutputStream::write(const void* buf, size_t length) +{ + out.write(static_cast(buf), length); +} + +ORCBlockOutputFormat::ORCBlockOutputFormat(WriteBuffer & out_, const Block & header_, const FormatSettings & format_settings_) + : IOutputFormat(header_, out_), format_settings{format_settings_}, output_stream(out_), data_types(header_.getDataTypes()) +{ + schema = orc::createStructType(); + options.setCompression(orc::CompressionKind::CompressionKind_NONE); + size_t columns_count = header_.columns(); + for (size_t i = 0; i != columns_count; ++i) + schema->addStructField(header_.safeGetByPosition(i).name, getORCType(data_types[i])); + writer = orc::createWriter(*schema, &output_stream, options); +} + +ORC_UNIQUE_PTR ORCBlockOutputFormat::getORCType(const DataTypePtr & type) +{ + switch (type->getTypeId()) + { + case TypeIndex::UInt8: [[fallthrough]]; + case TypeIndex::Int8: + { + return orc::createPrimitiveType(orc::TypeKind::BYTE); + } + case TypeIndex::UInt16: [[fallthrough]]; + case TypeIndex::Int16: + { + return orc::createPrimitiveType(orc::TypeKind::SHORT); + } + case TypeIndex::UInt32: [[fallthrough]]; + case TypeIndex::Int32: + { + return orc::createPrimitiveType(orc::TypeKind::INT); + } + case TypeIndex::UInt64: [[fallthrough]]; + case TypeIndex::Int64: + { + return orc::createPrimitiveType(orc::TypeKind::LONG); + } + case TypeIndex::Float32: + { + return orc::createPrimitiveType(orc::TypeKind::FLOAT); + } + case TypeIndex::Float64: + { + return orc::createPrimitiveType(orc::TypeKind::DOUBLE); + } + case TypeIndex::Date: + { + return orc::createPrimitiveType(orc::TypeKind::DATE); + } + case TypeIndex::DateTime: [[fallthrough]]; + case TypeIndex::DateTime64: + { + return orc::createPrimitiveType(orc::TypeKind::TIMESTAMP); + } + case TypeIndex::FixedString: [[fallthrough]]; + case TypeIndex::String: + { + return orc::createPrimitiveType(orc::TypeKind::BINARY); + } + case TypeIndex::Nullable: + { + return getORCType(removeNullable(type)); + } + case TypeIndex::Array: + { + const auto * array_type = typeid_cast(type.get()); + return orc::createListType(getORCType(array_type->getNestedType())); + } + case TypeIndex::Decimal32: + { + const auto * decimal_type = typeid_cast *>(type.get()); + return orc::createDecimalType(decimal_type->getPrecision(), decimal_type->getScale()); + } + case TypeIndex::Decimal64: + { + const auto * decimal_type = typeid_cast *>(type.get()); + return orc::createDecimalType(decimal_type->getPrecision(), decimal_type->getScale()); + } + case TypeIndex::Decimal128: + { + const auto * decimal_type = typeid_cast *>(type.get()); + return orc::createDecimalType(decimal_type->getPrecision(), decimal_type->getScale()); + } + default: + { + throw Exception("Type " + type->getName() + " is not supported for ORC output format", ErrorCodes::ILLEGAL_COLUMN); + } + } +} + +template +void ORCBlockOutputFormat::writeNumbers( + orc::ColumnVectorBatch * orc_column, + const IColumn & column, + const PaddedPODArray * null_bytemap, + ConvertFunc convert) +{ + NumberVectorBatch * number_orc_column = dynamic_cast(orc_column); + const auto & number_column = assert_cast &>(column); + number_orc_column->resize(number_column.size()); + + for (size_t i = 0; i != number_column.size(); ++i) + { + if (null_bytemap && (*null_bytemap)[i]) + { + number_orc_column->notNull[i] = 0; + continue; + } + number_orc_column->data[i] = convert(number_column.getElement(i)); + } + number_orc_column->numElements = number_column.size(); +} + +template +void ORCBlockOutputFormat::writeDecimals( + orc::ColumnVectorBatch * orc_column, + const IColumn & column, + DataTypePtr & type, + const PaddedPODArray * null_bytemap, + ConvertFunc convert) +{ + DecimalVectorBatch *decimal_orc_column = dynamic_cast(orc_column); + const auto & decimal_column = assert_cast &>(column); + const auto * decimal_type = typeid_cast *>(type.get()); + decimal_orc_column->precision = decimal_type->getPrecision(); + decimal_orc_column->scale = decimal_type->getScale(); + decimal_orc_column->resize(decimal_column.size()); + for (size_t i = 0; i != decimal_column.size(); ++i) + { + if (null_bytemap && (*null_bytemap)[i]) + { + decimal_orc_column->notNull[i] = 0; + continue; + } + decimal_orc_column->values[i] = convert(decimal_column.getElement(i).value); + } + decimal_orc_column->numElements = decimal_column.size(); +} + +template +void ORCBlockOutputFormat::writeStrings( + orc::ColumnVectorBatch * orc_column, + const IColumn & column, + const PaddedPODArray * null_bytemap) +{ + orc::StringVectorBatch * string_orc_column = dynamic_cast(orc_column); + const auto & string_column = assert_cast(column); + string_orc_column->resize(string_column.size()); + + for (size_t i = 0; i != string_column.size(); ++i) + { + if (null_bytemap && (*null_bytemap)[i]) + { + string_orc_column->notNull[i] = 0; + continue; + } + const StringRef & string = string_column.getDataAt(i); + string_orc_column->data[i] = const_cast(string.data); + string_orc_column->length[i] = string.size; + } + string_orc_column->numElements = string_column.size(); +} + +template +void ORCBlockOutputFormat::writeDateTimes( + orc::ColumnVectorBatch * orc_column, + const IColumn & column, + const PaddedPODArray * null_bytemap, + GetSecondsFunc get_seconds, + GetNanosecondsFunc get_nanoseconds) +{ + orc::TimestampVectorBatch * timestamp_orc_column = dynamic_cast(orc_column); + const auto & timestamp_column = assert_cast(column); + timestamp_orc_column->resize(timestamp_column.size()); + + for (size_t i = 0; i != timestamp_column.size(); ++i) + { + if (null_bytemap && (*null_bytemap)[i]) + { + timestamp_orc_column->notNull[i] = 0; + continue; + } + timestamp_orc_column->data[i] = get_seconds(timestamp_column.getElement(i)); + timestamp_orc_column->nanoseconds[i] = get_nanoseconds(timestamp_column.getElement(i)); + } + timestamp_orc_column->numElements = timestamp_column.size(); +} + +void ORCBlockOutputFormat::writeColumn( + orc::ColumnVectorBatch * orc_column, + const IColumn & column, + DataTypePtr & type, + const PaddedPODArray * null_bytemap) +{ + if (null_bytemap) + { + orc_column->hasNulls = true; + orc_column->notNull.resize(column.size()); + } + switch (type->getTypeId()) + { + case TypeIndex::Int8: + { + /// Note: Explicit cast to avoid clang-tidy error: 'signed char' to 'long' conversion; consider casting to 'unsigned char' first. + writeNumbers(orc_column, column, null_bytemap, [](const Int8 & value){ return static_cast(value); }); + break; + } + case TypeIndex::UInt8: + { + writeNumbers(orc_column, column, null_bytemap, [](const UInt8 & value){ return value; }); + break; + } + case TypeIndex::Int16: + { + writeNumbers(orc_column, column, null_bytemap, [](const Int16 & value){ return value; }); + break; + } + case TypeIndex::Date: [[fallthrough]]; + case TypeIndex::UInt16: + { + writeNumbers(orc_column, column, null_bytemap, [](const UInt16 & value){ return value; }); + break; + } + case TypeIndex::Int32: + { + writeNumbers(orc_column, column, null_bytemap, [](const Int32 & value){ return value; }); + break; + } + case TypeIndex::UInt32: + { + writeNumbers(orc_column, column, null_bytemap, [](const UInt32 & value){ return value; }); + break; + } + case TypeIndex::Int64: + { + writeNumbers(orc_column, column, null_bytemap, [](const Int64 & value){ return value; }); + break; + } + case TypeIndex::UInt64: + { + writeNumbers(orc_column, column, null_bytemap, [](const UInt64 & value){ return value; }); + break; + } + case TypeIndex::Float32: + { + writeNumbers(orc_column, column, null_bytemap, [](const Float32 & value){ return value; }); + break; + } + case TypeIndex::Float64: + { + writeNumbers(orc_column, column, null_bytemap, [](const Float64 & value){ return value; }); + break; + } + case TypeIndex::FixedString: + { + writeStrings(orc_column, column, null_bytemap); + break; + } + case TypeIndex::String: + { + writeStrings(orc_column, column, null_bytemap); + break; + } + case TypeIndex::DateTime: + { + writeDateTimes( + orc_column, + column, null_bytemap, + [](UInt32 value){ return value; }, + [](UInt32){ return 0; }); + break; + } + case TypeIndex::DateTime64: + { + const auto * timestamp_type = assert_cast(type.get()); + UInt32 scale = timestamp_type->getScale(); + writeDateTimes( + orc_column, + column, null_bytemap, + [scale](UInt64 value){ return value / std::pow(10, scale); }, + [scale](UInt64 value){ return (value % UInt64(std::pow(10, scale))) * std::pow(10, 9 - scale); }); + break; + } + case TypeIndex::Decimal32:; + { + writeDecimals( + orc_column, + column, + type, + null_bytemap, + [](Int32 value){ return value; }); + break; + } + case TypeIndex::Decimal64: + { + writeDecimals( + orc_column, + column, + type, + null_bytemap, + [](Int64 value){ return value; }); + break; + } + case TypeIndex::Decimal128: + { + writeDecimals( + orc_column, + column, + type, + null_bytemap, + [](Int128 value){ return orc::Int128(value >> 64, (value << 64) >> 64); }); + break; + } + case TypeIndex::Nullable: + { + const auto & nullable_column = assert_cast(column); + const PaddedPODArray & new_null_bytemap = assert_cast &>(*nullable_column.getNullMapColumnPtr()).getData(); + auto nested_type = removeNullable(type); + writeColumn(orc_column, nullable_column.getNestedColumn(), nested_type, &new_null_bytemap); + break; + } + case TypeIndex::Array: + { + orc::ListVectorBatch * list_orc_column = dynamic_cast(orc_column); + const auto & list_column = assert_cast(column); + auto nested_type = assert_cast(*type).getNestedType(); + const ColumnArray::Offsets & offsets = list_column.getOffsets(); + list_orc_column->resize(list_column.size()); + /// The length of list i in ListVectorBatch is offsets[i+1] - offsets[i]. + list_orc_column->offsets[0] = 0; + for (size_t i = 0; i != list_column.size(); ++i) + { + list_orc_column->offsets[i + 1] = offsets[i]; + } + orc::ColumnVectorBatch * nested_orc_column = list_orc_column->elements.get(); + writeColumn(nested_orc_column, list_column.getData(), nested_type, null_bytemap); + list_orc_column->numElements = list_column.size(); + break; + } + default: + throw Exception("Type " + type->getName() + " is not supported for ORC output format", ErrorCodes::ILLEGAL_COLUMN); + } +} + +size_t ORCBlockOutputFormat::getColumnSize(const IColumn & column, DataTypePtr & type) +{ + if (type->getTypeId() == TypeIndex::Array) + { + auto nested_type = assert_cast(*type).getNestedType(); + const IColumn & nested_column = assert_cast(column).getData(); + return getColumnSize(nested_column, nested_type); + } + return column.size(); +} + +size_t ORCBlockOutputFormat::getMaxColumnSize(Chunk & chunk) +{ + size_t columns_num = chunk.getNumColumns(); + size_t max_column_size = 0; + for (size_t i = 0; i != columns_num; ++i) + { + max_column_size = std::max(max_column_size, getColumnSize(*chunk.getColumns()[i], data_types[i])); + } + return max_column_size; +} + +void ORCBlockOutputFormat::consume(Chunk chunk) +{ + size_t columns_num = chunk.getNumColumns(); + size_t rows_num = chunk.getNumRows(); + /// getMaxColumnSize is needed to write arrays. + /// The size of the batch must be no less than total amount of array elements. + ORC_UNIQUE_PTR batch = writer->createRowBatch(getMaxColumnSize(chunk)); + orc::StructVectorBatch *root = dynamic_cast(batch.get()); + for (size_t i = 0; i != columns_num; ++i) + { + writeColumn(root->fields[i], *chunk.getColumns()[i], data_types[i], nullptr); + } + root->numElements = rows_num; + writer->add(*batch); +} + +void ORCBlockOutputFormat::finalize() +{ + writer->close(); +} + +void registerOutputFormatProcessorORC(FormatFactory & factory) +{ + factory.registerOutputFormatProcessor("ORC", []( + WriteBuffer & buf, + const Block & sample, + FormatFactory::WriteCallback, + const FormatSettings & format_settings) + { + return std::make_shared(buf, sample, format_settings); + }); +} + +} + +#else + +namespace DB +{ + class FormatFactory; + void registerOutputFormatProcessorORC(FormatFactory &) + { + } +} + +#endif diff --git a/src/Processors/Formats/Impl/ORCBlockOutputFormat.h b/src/Processors/Formats/Impl/ORCBlockOutputFormat.h new file mode 100644 index 00000000000..ce599dabe23 --- /dev/null +++ b/src/Processors/Formats/Impl/ORCBlockOutputFormat.h @@ -0,0 +1,83 @@ +#pragma once + +#if !defined(ARCADIA_BUILD) +#include "config_formats.h" +#endif + +#if USE_ORC +#include +#include +#include +#include + +namespace DB +{ + +class WriteBuffer; + +/// orc::Writer writes only in orc::OutputStream +class ORCOutputStream : public orc::OutputStream +{ +public: + ORCOutputStream(WriteBuffer & out_); + + uint64_t getLength() const override; + uint64_t getNaturalWriteSize() const override; + void write(const void* buf, size_t length) override; + + void close() override {} + const std::string& getName() const override { return name; } + +private: + WriteBuffer & out; + std::string name = "ORCOutputStream"; +}; + +class ORCBlockOutputFormat : public IOutputFormat +{ +public: + ORCBlockOutputFormat(WriteBuffer & out_, const Block & header_, const FormatSettings & format_settings_); + + String getName() const override { return "ORCBlockOutputFormat"; } + void consume(Chunk chunk) override; + void finalize() override; + +private: + ORC_UNIQUE_PTR getORCType(const DataTypePtr & type); + + /// ConvertFunc is needed for type UInt8, because firstly UInt8 (char8_t) must be + /// converted to unsigned char (bugprone-signed-char-misuse in clang). + template + void writeNumbers(orc::ColumnVectorBatch * orc_column, const IColumn & column, const PaddedPODArray * null_bytemap, ConvertFunc convert); + + /// ConvertFunc is needed to convert ClickHouse Int128 to ORC Int128. + template + void writeDecimals(orc::ColumnVectorBatch * orc_column, const IColumn & column, DataTypePtr & type, + const PaddedPODArray * null_bytemap, ConvertFunc convert); + + template + void writeStrings(orc::ColumnVectorBatch * orc_column, const IColumn & column, const PaddedPODArray * null_bytemap); + + /// ORC column TimestampVectorBatch stores only seconds and nanoseconds, + /// GetSecondsFunc and GetNanosecondsFunc are needed to extract them from DataTime type. + template + void writeDateTimes(orc::ColumnVectorBatch * orc_column, const IColumn & column, const PaddedPODArray * null_bytemap, + GetSecondsFunc get_seconds, GetNanosecondsFunc get_nanoseconds); + + void writeColumn(orc::ColumnVectorBatch * orc_column, const IColumn & column, DataTypePtr & type, const PaddedPODArray * null_bytemap); + + /// These two functions are needed to know maximum nested size of arrays to + /// create an ORC Batch with the appropriate size + size_t getColumnSize(const IColumn & column, DataTypePtr & type); + size_t getMaxColumnSize(Chunk & chunk); + + const FormatSettings format_settings; + ORCOutputStream output_stream; + DataTypes data_types; + ORC_UNIQUE_PTR writer; + ORC_UNIQUE_PTR schema; + orc::WriterOptions options; +}; + +} +#endif diff --git a/src/Processors/QueryPipeline.h b/src/Processors/QueryPipeline.h index aa04ac9ea47..7990b0b79f5 100644 --- a/src/Processors/QueryPipeline.h +++ b/src/Processors/QueryPipeline.h @@ -133,6 +133,8 @@ public: void addCreatingSetsTransform(ProcessorPtr transform); /// Resize pipeline to single output and add IOutputFormat. Pipeline will be completed after this transformation. void setOutputFormat(ProcessorPtr output); + /// Get current OutputFormat. + IOutputFormat * getOutputFormat() const { return output_format; } /// Sink is a processor with single input port and no output ports. Creates sink for each output port. /// Pipeline will be completed after this transformation. void setSinks(const ProcessorGetterWithStreamKind & getter); @@ -192,6 +194,13 @@ public: /// Set upper limit for the recommend number of threads void setMaxThreads(size_t max_threads_) { max_threads = max_threads_; } + /// Update upper limit for the recommend number of threads + void limitMaxThreads(size_t max_threads_) + { + if (max_threads == 0 || max_threads_ < max_threads) + max_threads = max_threads_; + } + /// Convert query pipeline to single or several pipes. Pipe getPipe() &&; Pipes getPipes() &&; diff --git a/src/Processors/QueryPlan/QueryPlan.cpp b/src/Processors/QueryPlan/QueryPlan.cpp index d3ec3edfd10..2e5d2a3a5f4 100644 --- a/src/Processors/QueryPlan/QueryPlan.cpp +++ b/src/Processors/QueryPlan/QueryPlan.cpp @@ -156,8 +156,8 @@ QueryPipelinePtr QueryPlan::buildQueryPipeline() bool limit_max_threads = frame.pipelines.empty(); last_pipeline = frame.node->step->updatePipeline(std::move(frame.pipelines)); - if (limit_max_threads) - last_pipeline->setMaxThreads(max_threads); + if (limit_max_threads && max_threads) + last_pipeline->limitMaxThreads(max_threads); stack.pop(); } diff --git a/src/Processors/QueryPlan/ReadFromStorageStep.cpp b/src/Processors/QueryPlan/ReadFromStorageStep.cpp index 52528ba3c7a..f3f7dd1bc8b 100644 --- a/src/Processors/QueryPlan/ReadFromStorageStep.cpp +++ b/src/Processors/QueryPlan/ReadFromStorageStep.cpp @@ -114,7 +114,7 @@ ReadFromStorageStep::ReadFromStorageStep( } } - if (pipes.size() == 1) + if (pipes.size() == 1 && !storage->isView()) pipeline->setMaxThreads(1); for (auto & pipe : pipes) diff --git a/src/Server/MySQLHandler.cpp b/src/Server/MySQLHandler.cpp index a97182f15fc..7d91b18983d 100644 --- a/src/Server/MySQLHandler.cpp +++ b/src/Server/MySQLHandler.cpp @@ -45,6 +45,10 @@ namespace ErrorCodes extern const int SUPPORT_IS_DISABLED; } +static String selectEmptyReplacementQuery(const String & query); +static String showTableStatusReplacementQuery(const String & query); +static String killConnectionIdReplacementQuery(const String & query); + MySQLHandler::MySQLHandler(IServer & server_, const Poco::Net::StreamSocket & socket_, bool ssl_enabled, size_t connection_id_) : Poco::Net::TCPServerConnection(socket_) @@ -57,6 +61,10 @@ MySQLHandler::MySQLHandler(IServer & server_, const Poco::Net::StreamSocket & so server_capability_flags = CLIENT_PROTOCOL_41 | CLIENT_SECURE_CONNECTION | CLIENT_PLUGIN_AUTH | CLIENT_PLUGIN_AUTH_LENENC_CLIENT_DATA | CLIENT_CONNECT_WITH_DB | CLIENT_DEPRECATE_EOF; if (ssl_enabled) server_capability_flags |= CLIENT_SSL; + + replacements.emplace("KILL QUERY", killConnectionIdReplacementQuery); + replacements.emplace("SHOW TABLE STATUS LIKE", showTableStatusReplacementQuery); + replacements.emplace("SHOW VARIABLES", selectEmptyReplacementQuery); } void MySQLHandler::run() @@ -103,7 +111,8 @@ void MySQLHandler::run() { if (!handshake_response.database.empty()) connection_context.setCurrentDatabase(handshake_response.database); - connection_context.setCurrentQueryId(""); + connection_context.setCurrentQueryId(Poco::format("mysql:%lu", connection_id)); + } catch (const Exception & exc) { @@ -284,20 +293,18 @@ void MySQLHandler::comQuery(ReadBuffer & payload) } else { - String replacement_query = "SELECT ''"; + String replacement_query; bool should_replace = false; bool with_output = false; - // This is a workaround in order to support adding ClickHouse to MySQL using federated server. - if (0 == strncasecmp("SHOW TABLE STATUS LIKE", query.c_str(), 22)) + for (auto const & x : replacements) { - should_replace = true; - replacement_query = boost::replace_all_copy(query, "SHOW TABLE STATUS LIKE ", show_table_status_replacement_query); - } - - if (0 == strncasecmp("SHOW VARIABLES", query.c_str(), 13)) - { - should_replace = true; + if (0 == strncasecmp(x.first.c_str(), query.c_str(), x.first.size())) + { + should_replace = true; + replacement_query = x.second(query); + break; + } } ReadBufferFromString replacement(replacement_query); @@ -372,26 +379,63 @@ static bool isFederatedServerSetupSetCommand(const String & query) return 1 == std::regex_match(query, expr); } -const String MySQLHandler::show_table_status_replacement_query("SELECT" - " name AS Name," - " engine AS Engine," - " '10' AS Version," - " 'Dynamic' AS Row_format," - " 0 AS Rows," - " 0 AS Avg_row_length," - " 0 AS Data_length," - " 0 AS Max_data_length," - " 0 AS Index_length," - " 0 AS Data_free," - " 'NULL' AS Auto_increment," - " metadata_modification_time AS Create_time," - " metadata_modification_time AS Update_time," - " metadata_modification_time AS Check_time," - " 'utf8_bin' AS Collation," - " 'NULL' AS Checksum," - " '' AS Create_options," - " '' AS Comment" - " FROM system.tables" - " WHERE name LIKE "); +/// Replace "[query(such as SHOW VARIABLES...)]" into "". +static String selectEmptyReplacementQuery(const String & query) +{ + std::ignore = query; + return "select ''"; +} + +/// Replace "SHOW TABLE STATUS LIKE 'xx'" into "SELECT ... FROM system.tables WHERE name LIKE 'xx'". +static String showTableStatusReplacementQuery(const String & query) +{ + const String prefix = "SHOW TABLE STATUS LIKE "; + if (query.size() > prefix.size()) + { + String suffix = query.data() + prefix.length(); + return ( + "SELECT" + " name AS Name," + " engine AS Engine," + " '10' AS Version," + " 'Dynamic' AS Row_format," + " 0 AS Rows," + " 0 AS Avg_row_length," + " 0 AS Data_length," + " 0 AS Max_data_length," + " 0 AS Index_length," + " 0 AS Data_free," + " 'NULL' AS Auto_increment," + " metadata_modification_time AS Create_time," + " metadata_modification_time AS Update_time," + " metadata_modification_time AS Check_time," + " 'utf8_bin' AS Collation," + " 'NULL' AS Checksum," + " '' AS Create_options," + " '' AS Comment" + " FROM system.tables" + " WHERE name LIKE " + + suffix); + } + return query; +} + +/// Replace "KILL QUERY [connection_id]" into "KILL QUERY WHERE query_id = 'mysql:[connection_id]'". +static String killConnectionIdReplacementQuery(const String & query) +{ + const String prefix = "KILL QUERY "; + if (query.size() > prefix.size()) + { + String suffix = query.data() + prefix.length(); + static const std::regex expr{"^[0-9]"}; + if (std::regex_match(suffix, expr)) + { + String replacement = Poco::format("KILL QUERY WHERE query_id = 'mysql:%s'", suffix); + return replacement; + } + } + return query; +} } + diff --git a/src/Server/MySQLHandler.h b/src/Server/MySQLHandler.h index 5f506089493..f7596850a8b 100644 --- a/src/Server/MySQLHandler.h +++ b/src/Server/MySQLHandler.h @@ -72,7 +72,9 @@ protected: bool secure_connection = false; private: - static const String show_table_status_replacement_query; + using ReplacementFn = std::function; + using Replacements = std::unordered_map; + Replacements replacements; }; #if USE_SSL diff --git a/src/Storages/AlterCommands.cpp b/src/Storages/AlterCommands.cpp index 5d892ca07d4..fc72effca9a 100644 --- a/src/Storages/AlterCommands.cpp +++ b/src/Storages/AlterCommands.cpp @@ -83,6 +83,7 @@ std::optional AlterCommand::parse(const ASTAlterCommand * command_ if (ast_col_decl.ttl) command.ttl = ast_col_decl.ttl; + command.first = command_ast->first; command.if_not_exists = command_ast->if_not_exists; return command; @@ -133,6 +134,10 @@ std::optional AlterCommand::parse(const ASTAlterCommand * command_ if (ast_col_decl.codec) command.codec = compression_codec_factory.get(ast_col_decl.codec, command.data_type, sanity_check_compression_codecs); + if (command_ast->column) + command.after_column = getIdentifierName(command_ast->column); + + command.first = command_ast->first; command.if_exists = command_ast->if_exists; return command; @@ -269,7 +274,7 @@ void AlterCommand::apply(StorageInMemoryMetadata & metadata, const Context & con column.codec = codec; column.ttl = ttl; - metadata.columns.add(column, after_column); + metadata.columns.add(column, after_column, first); /// Slow, because each time a list is copied metadata.columns.flattenNested(); @@ -282,7 +287,7 @@ void AlterCommand::apply(StorageInMemoryMetadata & metadata, const Context & con } else if (type == MODIFY_COLUMN) { - metadata.columns.modify(column_name, [&](ColumnDescription & column) + metadata.columns.modify(column_name, after_column, first, [&](ColumnDescription & column) { if (codec) { diff --git a/src/Storages/AlterCommands.h b/src/Storages/AlterCommands.h index 82e438f6a45..1a80957e875 100644 --- a/src/Storages/AlterCommands.h +++ b/src/Storages/AlterCommands.h @@ -54,9 +54,12 @@ struct AlterCommand /// For COMMENT column std::optional comment; - /// For ADD - after which column to add a new one. If an empty string, add to the end. To add to the beginning now it is impossible. + /// For ADD or MODIFY - after which column to add a new one. If an empty string, add to the end. String after_column; + /// For ADD_COLUMN, MODIFY_COLUMN - Add to the begin if it is true. + bool first = false; + /// For DROP_COLUMN, MODIFY_COLUMN, COMMENT_COLUMN bool if_exists = false; diff --git a/src/Storages/ColumnsDescription.cpp b/src/Storages/ColumnsDescription.cpp index 8c92307dcb7..c1a5c1f77a0 100644 --- a/src/Storages/ColumnsDescription.cpp +++ b/src/Storages/ColumnsDescription.cpp @@ -167,7 +167,7 @@ static auto getNameRange(const ColumnsDescription::Container & columns, const St return std::make_pair(begin, end); } -void ColumnsDescription::add(ColumnDescription column, const String & after_column) +void ColumnsDescription::add(ColumnDescription column, const String & after_column, bool first) { if (has(column.name)) throw Exception("Cannot add column " + column.name + ": column with this name already exists", @@ -175,7 +175,9 @@ void ColumnsDescription::add(ColumnDescription column, const String & after_colu auto insert_it = columns.cend(); - if (!after_column.empty()) + if (first) + insert_it = columns.cbegin(); + else if (!after_column.empty()) { auto range = getNameRange(columns, after_column); if (range.first == range.second) @@ -211,6 +213,38 @@ void ColumnsDescription::rename(const String & column_from, const String & colum }); } +void ColumnsDescription::modifyColumnOrder(const String & column_name, const String & after_column, bool first) +{ + const auto & reorder_column = [&](auto get_new_pos) + { + auto column_range = getNameRange(columns, column_name); + + if (column_range.first == column_range.second) + throw Exception("There is no column " + column_name + " in table.", ErrorCodes::NO_SUCH_COLUMN_IN_TABLE); + + std::vector moving_columns; + for (auto list_it = column_range.first; list_it != column_range.second;) + { + moving_columns.emplace_back(*list_it); + list_it = columns.get<0>().erase(list_it); + } + + columns.get<0>().insert(get_new_pos(), moving_columns.begin(), moving_columns.end()); + }; + + if (first) + reorder_column([&]() { return columns.cbegin(); }); + else if (!after_column.empty() && column_name != after_column) + { + /// Checked first + auto range = getNameRange(columns, after_column); + if (range.first == range.second) + throw Exception("Wrong column name. Cannot find column " + after_column + " to insert after", + ErrorCodes::NO_SUCH_COLUMN_IN_TABLE); + + reorder_column([&]() { return getNameRange(columns, after_column).second; }); + } +} void ColumnsDescription::flattenNested() { diff --git a/src/Storages/ColumnsDescription.h b/src/Storages/ColumnsDescription.h index dda10db002d..1d6d04f303f 100644 --- a/src/Storages/ColumnsDescription.h +++ b/src/Storages/ColumnsDescription.h @@ -34,6 +34,8 @@ struct ColumnDescription ASTPtr ttl; ColumnDescription() = default; + ColumnDescription(ColumnDescription &&) = default; + ColumnDescription(const ColumnDescription &) = default; ColumnDescription(String name_, DataTypePtr type_); bool operator==(const ColumnDescription & other) const; @@ -52,7 +54,7 @@ public: explicit ColumnsDescription(NamesAndTypesList ordinary_); /// `after_column` can be a Nested column name; - void add(ColumnDescription column, const String & after_column = String()); + void add(ColumnDescription column, const String & after_column = String(), bool first = false); /// `column_name` can be a Nested column name; void remove(const String & column_name); @@ -84,12 +86,20 @@ public: template void modify(const String & column_name, F && f) + { + modify(column_name, String(), false, std::forward(f)); + } + + template + void modify(const String & column_name, const String & after_column, bool first, F && f) { auto it = columns.get<1>().find(column_name); if (it == columns.get<1>().end()) throw Exception("Cannot find column " + column_name + " in ColumnsDescription", ErrorCodes::LOGICAL_ERROR); if (!columns.get<1>().modify(it, std::forward(f))) throw Exception("Cannot modify ColumnDescription for column " + column_name + ": column name cannot be changed", ErrorCodes::LOGICAL_ERROR); + + modifyColumnOrder(column_name, after_column, first); } Names getNamesOfPhysical() const; @@ -120,6 +130,8 @@ public: private: Container columns; + + void modifyColumnOrder(const String & column_name, const String & after_column, bool first); }; /// Validate default expressions and corresponding types compatibility, i.e. diff --git a/src/Storages/Distributed/DistributedBlockOutputStream.cpp b/src/Storages/Distributed/DistributedBlockOutputStream.cpp index bf9efef1ba6..546eb5a15a4 100644 --- a/src/Storages/Distributed/DistributedBlockOutputStream.cpp +++ b/src/Storages/Distributed/DistributedBlockOutputStream.cpp @@ -15,7 +15,6 @@ #include #include #include -#include #include #include diff --git a/src/Storages/Kafka/KafkaBlockInputStream.cpp b/src/Storages/Kafka/KafkaBlockInputStream.cpp index 078d2e74771..c5f598a756c 100644 --- a/src/Storages/Kafka/KafkaBlockInputStream.cpp +++ b/src/Storages/Kafka/KafkaBlockInputStream.cpp @@ -140,6 +140,13 @@ Block KafkaBlockInputStream::readImpl() if (new_rows) { + // In read_kafka_message(), ReadBufferFromKafkaConsumer::nextImpl() + // will be called, that may make something unusable, i.e. clean + // ReadBufferFromKafkaConsumer::messages, which is accessed from + // ReadBufferFromKafkaConsumer::currentTopic() (and other helpers). + if (buffer->isStalled()) + throw Exception("Polled messages became unusable", ErrorCodes::LOGICAL_ERROR); + buffer->storeLastReadMessageOffset(); auto topic = buffer->currentTopic(); @@ -186,14 +193,14 @@ Block KafkaBlockInputStream::readImpl() total_rows = total_rows + new_rows; } - else if (buffer->isStalled()) - { - ++failed_poll_attempts; - } else if (buffer->polledDataUnusable()) { break; } + else if (buffer->isStalled()) + { + ++failed_poll_attempts; + } else { LOG_WARNING(log, "Parsing of message (topic: {}, partition: {}, offset: {}) return no rows.", buffer->currentTopic(), buffer->currentPartition(), buffer->currentOffset()); diff --git a/src/Storages/Kafka/ReadBufferFromKafkaConsumer.cpp b/src/Storages/Kafka/ReadBufferFromKafkaConsumer.cpp index 3fd28cde5e5..5dea41d049e 100644 --- a/src/Storages/Kafka/ReadBufferFromKafkaConsumer.cpp +++ b/src/Storages/Kafka/ReadBufferFromKafkaConsumer.cpp @@ -420,12 +420,6 @@ void ReadBufferFromKafkaConsumer::resetIfStopped() /// Do commit messages implicitly after we processed the previous batch. bool ReadBufferFromKafkaConsumer::nextImpl() { - - /// NOTE: ReadBuffer was implemented with an immutable underlying contents in mind. - /// If we failed to poll any message once - don't try again. - /// Otherwise, the |poll_timeout| expectations get flawn. - resetIfStopped(); - if (!allowed || !hasMorePolledMessages()) return false; diff --git a/src/Storages/Kafka/StorageKafka.cpp b/src/Storages/Kafka/StorageKafka.cpp index 6499941a68d..c0f7ef8e44d 100644 --- a/src/Storages/Kafka/StorageKafka.cpp +++ b/src/Storages/Kafka/StorageKafka.cpp @@ -125,7 +125,6 @@ StorageKafka::StorageKafka( std::unique_ptr kafka_settings_) : IStorage(table_id_) , global_context(context_.getGlobalContext()) - , kafka_context(std::make_shared(global_context)) , kafka_settings(std::move(kafka_settings_)) , topics(parseTopics(global_context.getMacros()->expand(kafka_settings->kafka_topic_list.value))) , brokers(global_context.getMacros()->expand(kafka_settings->kafka_broker_list.value)) @@ -145,9 +144,6 @@ StorageKafka::StorageKafka( setInMemoryMetadata(storage_metadata); task = global_context.getSchedulePool().createTask(log->name(), [this]{ threadFunc(); }); task->deactivate(); - - kafka_context->makeQueryContext(); - kafka_context->applySettingsChanges(settings_adjustments); } SettingsChanges StorageKafka::createSettingsAdjustments() @@ -274,9 +270,10 @@ void StorageKafka::shutdown() LOG_TRACE(log, "Waiting for cleanup"); task->deactivate(); - // Close all consumers + LOG_TRACE(log, "Closing consumers"); for (size_t i = 0; i < num_created_consumers; ++i) auto buffer = popReadBuffer(); + LOG_TRACE(log, "Consumers closed"); rd_kafka_wait_destroyed(CLEANUP_TIMEOUT_MS); } @@ -530,6 +527,10 @@ bool StorageKafka::streamToViews() size_t block_size = getMaxBlockSize(); + auto kafka_context = std::make_shared(global_context); + kafka_context->makeQueryContext(); + kafka_context->applySettingsChanges(settings_adjustments); + // Create a stream for each consumer and join them in a union stream // Only insert into dependent views and expect that input blocks contain virtual columns InterpreterInsertQuery interpreter(insert, *kafka_context, false, true, true); diff --git a/src/Storages/Kafka/StorageKafka.h b/src/Storages/Kafka/StorageKafka.h index 6f479ba2089..b7e6ea2a7e0 100644 --- a/src/Storages/Kafka/StorageKafka.h +++ b/src/Storages/Kafka/StorageKafka.h @@ -70,7 +70,6 @@ protected: private: // Configuration and state Context & global_context; - std::shared_ptr kafka_context; std::unique_ptr kafka_settings; const Names topics; const String brokers; diff --git a/src/Storages/MergeTree/KeyCondition.cpp b/src/Storages/MergeTree/KeyCondition.cpp index 281f8511a59..929242b2815 100644 --- a/src/Storages/MergeTree/KeyCondition.cpp +++ b/src/Storages/MergeTree/KeyCondition.cpp @@ -617,17 +617,40 @@ bool KeyCondition::tryPrepareSetIndex( const ASTPtr & right_arg = args[1]; - PreparedSetKey set_key; + SetPtr prepared_set; if (right_arg->as() || right_arg->as()) - set_key = PreparedSetKey::forSubquery(*right_arg); + { + auto set_it = prepared_sets.find(PreparedSetKey::forSubquery(*right_arg)); + if (set_it == prepared_sets.end()) + return false; + + prepared_set = set_it->second; + } else - set_key = PreparedSetKey::forLiteral(*right_arg, data_types); + { + /// We have `PreparedSetKey::forLiteral` but it is useless here as we don't have enough information + /// about types in left argument of the IN operator. Instead, we manually iterate through all the sets + /// and find the one for the right arg based on the AST structure (getTreeHash), after that we check + /// that the types it was prepared with are compatible with the types of the primary key. + auto set_ast_hash = right_arg->getTreeHash(); + auto set_it = std::find_if( + prepared_sets.begin(), prepared_sets.end(), + [&](const auto & candidate_entry) + { + if (candidate_entry.first.ast_hash != set_ast_hash) + return false; - auto set_it = prepared_sets.find(set_key); - if (set_it == prepared_sets.end()) - return false; + for (size_t i = 0; i < indexes_mapping.size(); ++i) + if (!candidate_entry.second->areTypesEqual(indexes_mapping[i].tuple_index, data_types[i])) + return false; - const SetPtr & prepared_set = set_it->second; + return true; + }); + if (set_it == prepared_sets.end()) + return false; + + prepared_set = set_it->second; + } /// The index can be prepared if the elements of the set were saved in advance. if (!prepared_set->hasExplicitSetElements()) @@ -635,7 +658,7 @@ bool KeyCondition::tryPrepareSetIndex( prepared_set->checkColumnsNumber(left_args_count); for (size_t i = 0; i < indexes_mapping.size(); ++i) - prepared_set->checkTypesEqual(indexes_mapping[i].tuple_index, removeLowCardinality(data_types[i])); + prepared_set->checkTypesEqual(indexes_mapping[i].tuple_index, data_types[i]); out.set_index = std::make_shared(prepared_set->getSetElements(), std::move(indexes_mapping)); @@ -818,6 +841,7 @@ bool KeyCondition::tryParseAtomFromAST(const ASTPtr & node, const Context & cont func_name = "greaterOrEquals"; else if (func_name == "in" || func_name == "notIn" || func_name == "like" || func_name == "notLike" || + func_name == "ilike" || func_name == "notIlike" || func_name == "startsWith") { /// "const IN data_column" doesn't make sense (unlike "data_column IN const") diff --git a/src/Storages/MergeTree/KeyCondition.h b/src/Storages/MergeTree/KeyCondition.h index f12acdbf7bf..16197b9fa69 100644 --- a/src/Storages/MergeTree/KeyCondition.h +++ b/src/Storages/MergeTree/KeyCondition.h @@ -20,7 +20,7 @@ using FunctionBasePtr = std::shared_ptr; class ExpressionActions; using ExpressionActionsPtr = std::shared_ptr; -/** A field, that can be stored in two reperesenations: +/** A field, that can be stored in two representations: * - A standalone field. * - A field with reference to its position in a block. * It's needed for execution of functions on ranges during diff --git a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp b/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp index 0e2dca76f58..8cb24bb0cd6 100644 --- a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp +++ b/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp @@ -6,6 +6,7 @@ #include #include #include +#include namespace DB @@ -316,6 +317,12 @@ void MergeTreeBaseSelectProcessor::executePrewhereActions(Block & block, const P prewhere_info->alias_actions->execute(block); prewhere_info->prewhere_actions->execute(block); + auto & prewhere_column = block.getByName(prewhere_info->prewhere_column_name); + + if (!prewhere_column.type->canBeUsedInBooleanContext()) + throw Exception("Invalid type for filter in PREWHERE: " + prewhere_column.type->getName(), + ErrorCodes::LOGICAL_ERROR); + if (prewhere_info->remove_prewhere_column) block.erase(prewhere_info->prewhere_column_name); else diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index e35aa4b1181..1adb245d9e1 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -1495,6 +1495,8 @@ void MergeTreeData::changeSettings( { if (new_settings) { + bool has_storage_policy_changed = false; + const auto & new_changes = new_settings->as().changes; for (const auto & change : new_changes) @@ -1503,28 +1505,34 @@ void MergeTreeData::changeSettings( StoragePolicyPtr new_storage_policy = global_context.getStoragePolicy(change.value.safeGet()); StoragePolicyPtr old_storage_policy = getStoragePolicy(); - checkStoragePolicy(new_storage_policy); - - std::unordered_set all_diff_disk_names; - for (const auto & disk : new_storage_policy->getDisks()) - all_diff_disk_names.insert(disk->getName()); - for (const auto & disk : old_storage_policy->getDisks()) - all_diff_disk_names.erase(disk->getName()); - - for (const String & disk_name : all_diff_disk_names) + /// StoragePolicy of different version or name is guaranteed to have different pointer + if (new_storage_policy != old_storage_policy) { - auto disk = new_storage_policy->getDiskByName(disk_name); - if (disk->exists(relative_data_path)) - throw Exception("New storage policy contain disks which already contain data of a table with the same name", ErrorCodes::LOGICAL_ERROR); - } + checkStoragePolicy(new_storage_policy); - for (const String & disk_name : all_diff_disk_names) - { - auto disk = new_storage_policy->getDiskByName(disk_name); - disk->createDirectories(relative_data_path); - disk->createDirectories(relative_data_path + "detached"); + std::unordered_set all_diff_disk_names; + for (const auto & disk : new_storage_policy->getDisks()) + all_diff_disk_names.insert(disk->getName()); + for (const auto & disk : old_storage_policy->getDisks()) + all_diff_disk_names.erase(disk->getName()); + + for (const String & disk_name : all_diff_disk_names) + { + auto disk = new_storage_policy->getDiskByName(disk_name); + if (disk->exists(relative_data_path)) + throw Exception("New storage policy contain disks which already contain data of a table with the same name", ErrorCodes::LOGICAL_ERROR); + } + + for (const String & disk_name : all_diff_disk_names) + { + auto disk = new_storage_policy->getDiskByName(disk_name); + disk->createDirectories(relative_data_path); + disk->createDirectories(relative_data_path + "detached"); + } + /// FIXME how would that be done while reloading configuration??? + + has_storage_policy_changed = true; } - /// FIXME how would that be done while reloading configuration??? } MergeTreeSettings copy = *getSettings(); @@ -1533,6 +1541,9 @@ void MergeTreeData::changeSettings( StorageInMemoryMetadata new_metadata = getInMemoryMetadata(); new_metadata.setSettingsChanges(new_settings); setInMemoryMetadata(new_metadata); + + if (has_storage_policy_changed) + startBackgroundMovesIfNeeded(); } } @@ -3291,12 +3302,11 @@ bool MergeTreeData::selectPartsAndMove() bool MergeTreeData::areBackgroundMovesNeeded() const { auto policy = getStoragePolicy(); - auto metadata_snapshot = getInMemoryMetadataPtr(); if (policy->getVolumes().size() > 1) return true; - return policy->getVolumes().size() == 1 && policy->getVolumes()[0]->getDisks().size() > 1 && metadata_snapshot->hasAnyMoveTTL(); + return policy->getVolumes().size() == 1 && policy->getVolumes()[0]->getDisks().size() > 1; } bool MergeTreeData::movePartsToSpace(const DataPartsVector & parts, SpacePtr space) diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index e46e8b3a646..8fcb879b3ff 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -794,8 +794,6 @@ protected: void checkStoragePolicy(const StoragePolicyPtr & new_storage_policy) const; - void setStoragePolicy(const String & new_storage_policy_name, bool only_check = false); - /// Calculates column sizes in compressed form for the current state of data_parts. Call with data_parts mutex locked. void calculateColumnSizesImpl(); /// Adds or subtracts the contribution of the part to compressed column sizes. @@ -873,6 +871,8 @@ private: CurrentlyMovingPartsTagger checkPartsForMove(const DataPartsVector & parts, SpacePtr space); bool canUsePolymorphicParts(const MergeTreeSettings & settings, String * out_reason) const; + + virtual void startBackgroundMovesIfNeeded() = 0; }; } diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index fefd9dc6e15..69e819a3cf5 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -968,6 +968,7 @@ Pipes MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsWithOrder( }; const size_t min_marks_per_stream = (sum_marks - 1) / num_streams + 1; + bool need_preliminary_merge = (parts.size() > settings.read_in_order_two_level_merge_threshold); for (size_t i = 0; i < num_streams && !parts.empty(); ++i) { @@ -1069,7 +1070,7 @@ Pipes MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsWithOrder( } } - if (pipes.size() > 1) + if (pipes.size() > 1 && need_preliminary_merge) { SortDescription sort_description; for (size_t j = 0; j < input_order_info->order_key_prefix_descr.size(); ++j) @@ -1087,7 +1088,10 @@ Pipes MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsWithOrder( res.emplace_back(std::move(pipes), std::move(merging_sorted)); } else - res.emplace_back(std::move(pipes.front())); + { + for (auto && pipe : pipes) + res.emplace_back(std::move(pipe)); + } } return res; diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp index 50a234b18f6..194614cf421 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp @@ -40,13 +40,10 @@ ReplicatedMergeTreeTableMetadata::ReplicatedMergeTreeTableMetadata(const MergeTr /// So rules in zookeeper metadata is following: /// - When we have only ORDER BY, than store it in "primary key:" row of /metadata /// - When we have both, than store PRIMARY KEY in "primary key:" row and ORDER BY in "sorting key:" row of /metadata - if (!metadata_snapshot->isPrimaryKeyDefined()) - primary_key = formattedAST(metadata_snapshot->getSortingKey().expression_list_ast); - else - { - primary_key = formattedAST(metadata_snapshot->getPrimaryKey().expression_list_ast); + + primary_key = formattedAST(metadata_snapshot->getPrimaryKey().expression_list_ast); + if (metadata_snapshot->isPrimaryKeyDefined()) sorting_key = formattedAST(metadata_snapshot->getSortingKey().expression_list_ast); - } data_format_version = data.format_version; diff --git a/src/Storages/RabbitMQ/Buffer_fwd.h b/src/Storages/RabbitMQ/Buffer_fwd.h new file mode 100644 index 00000000000..5be2c6fdf6a --- /dev/null +++ b/src/Storages/RabbitMQ/Buffer_fwd.h @@ -0,0 +1,14 @@ +#pragma once + +#include + +namespace DB +{ + +class ReadBufferFromRabbitMQConsumer; +using ConsumerBufferPtr = std::shared_ptr; + +class WriteBufferToRabbitMQProducer; +using ProducerBufferPtr = std::shared_ptr; + +} diff --git a/src/Storages/RabbitMQ/RabbitMQBlockInputStream.cpp b/src/Storages/RabbitMQ/RabbitMQBlockInputStream.cpp new file mode 100644 index 00000000000..e10a4eb0f96 --- /dev/null +++ b/src/Storages/RabbitMQ/RabbitMQBlockInputStream.cpp @@ -0,0 +1,156 @@ +#include +#include +#include +#include + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +namespace DB +{ + +RabbitMQBlockInputStream::RabbitMQBlockInputStream( + StorageRabbitMQ & storage_, + const StorageMetadataPtr & metadata_snapshot_, + const Context & context_, + const Names & columns, + Poco::Logger * log_) + : storage(storage_) + , metadata_snapshot(metadata_snapshot_) + , context(context_) + , column_names(columns) + , log(log_) + , non_virtual_header(metadata_snapshot->getSampleBlockNonMaterialized()) + , virtual_header(metadata_snapshot->getSampleBlockForColumns({"_exchange"}, storage.getVirtuals(), storage.getStorageID())) +{ +} + + +RabbitMQBlockInputStream::~RabbitMQBlockInputStream() +{ + if (!claimed) + return; + + storage.pushReadBuffer(buffer); +} + + +Block RabbitMQBlockInputStream::getHeader() const +{ + return metadata_snapshot->getSampleBlockForColumns(column_names, storage.getVirtuals(), storage.getStorageID()); +} + + +void RabbitMQBlockInputStream::readPrefixImpl() +{ + auto timeout = std::chrono::milliseconds(context.getSettingsRef().rabbitmq_max_wait_ms.totalMilliseconds()); + + buffer = storage.popReadBuffer(timeout); + claimed = !!buffer; + + if (!buffer || finished) + return; + + buffer->checkSubscription(); +} + + +Block RabbitMQBlockInputStream::readImpl() +{ + if (!buffer || finished) + return Block(); + + finished = true; + + MutableColumns result_columns = non_virtual_header.cloneEmptyColumns(); + MutableColumns virtual_columns = virtual_header.cloneEmptyColumns(); + + auto input_format = FormatFactory::instance().getInputFormat( + storage.getFormatName(), *buffer, non_virtual_header, context, 1); + + InputPort port(input_format->getPort().getHeader(), input_format.get()); + connect(input_format->getPort(), port); + port.setNeeded(); + + auto read_rabbitmq_message = [&] + { + size_t new_rows = 0; + + while (true) + { + auto status = input_format->prepare(); + + switch (status) + { + case IProcessor::Status::Ready: + input_format->work(); + break; + + case IProcessor::Status::Finished: + input_format->resetParser(); + return new_rows; + + case IProcessor::Status::PortFull: + { + auto chunk = port.pull(); + + auto chunk_rows = chunk.getNumRows(); + new_rows += chunk_rows; + + auto columns = chunk.detachColumns(); + + for (size_t i = 0, s = columns.size(); i < s; ++i) + { + result_columns[i]->insertRangeFrom(*columns[i], 0, columns[i]->size()); + } + break; + } + case IProcessor::Status::NeedData: + case IProcessor::Status::Async: + case IProcessor::Status::Wait: + case IProcessor::Status::ExpandPipeline: + throw Exception("Source processor returned status " + IProcessor::statusToName(status), ErrorCodes::LOGICAL_ERROR); + } + } + }; + + size_t total_rows = 0; + + while (true) + { + if (buffer->eof()) + break; + + auto new_rows = read_rabbitmq_message(); + + auto exchange_name = buffer->getExchange(); + + for (size_t i = 0; i < new_rows; ++i) + { + virtual_columns[0]->insert(exchange_name); + } + + total_rows = total_rows + new_rows; + buffer->allowNext(); + + if (!new_rows || !checkTimeLimit()) + break; + } + + if (total_rows == 0) + return Block(); + + auto result_block = non_virtual_header.cloneWithColumns(std::move(result_columns)); + auto virtual_block = virtual_header.cloneWithColumns(std::move(virtual_columns)); + + for (const auto & column : virtual_block.getColumnsWithTypeAndName()) + { + result_block.insert(column); + } + + return result_block; +} + +} diff --git a/src/Storages/RabbitMQ/RabbitMQBlockInputStream.h b/src/Storages/RabbitMQ/RabbitMQBlockInputStream.h new file mode 100644 index 00000000000..7db80065608 --- /dev/null +++ b/src/Storages/RabbitMQ/RabbitMQBlockInputStream.h @@ -0,0 +1,42 @@ +#pragma once + +#include +#include +#include +#include + + +namespace DB +{ +class RabbitMQBlockInputStream : public IBlockInputStream +{ + +public: + RabbitMQBlockInputStream( + StorageRabbitMQ & storage_, + const StorageMetadataPtr & metadata_snapshot_, + const Context & context_, + const Names & columns, + Poco::Logger * log_); + + ~RabbitMQBlockInputStream() override; + + String getName() const override { return storage.getName(); } + Block getHeader() const override; + + void readPrefixImpl() override; + Block readImpl() override; + +private: + StorageRabbitMQ & storage; + StorageMetadataPtr metadata_snapshot; + Context context; + Names column_names; + Poco::Logger * log; + bool finished = false, claimed = false; + const Block non_virtual_header, virtual_header; + + ConsumerBufferPtr buffer; +}; + +} diff --git a/src/Storages/RabbitMQ/RabbitMQBlockOutputStream.cpp b/src/Storages/RabbitMQ/RabbitMQBlockOutputStream.cpp new file mode 100644 index 00000000000..1a03fc4969e --- /dev/null +++ b/src/Storages/RabbitMQ/RabbitMQBlockOutputStream.cpp @@ -0,0 +1,61 @@ +#include +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int CANNOT_CREATE_IO_BUFFER; +} + + +RabbitMQBlockOutputStream::RabbitMQBlockOutputStream( + StorageRabbitMQ & storage_, + const StorageMetadataPtr & metadata_snapshot_, + const Context & context_) + : storage(storage_) + , metadata_snapshot(metadata_snapshot_) + , context(context_) +{ +} + + +Block RabbitMQBlockOutputStream::getHeader() const +{ + return metadata_snapshot->getSampleBlockNonMaterialized(); +} + + +void RabbitMQBlockOutputStream::writePrefix() +{ + buffer = storage.createWriteBuffer(); + if (!buffer) + throw Exception("Failed to create RabbitMQ producer!", ErrorCodes::CANNOT_CREATE_IO_BUFFER); + + buffer->activateWriting(); + + child = FormatFactory::instance().getOutput( + storage.getFormatName(), *buffer, getHeader(), context, [this](const Columns & /* columns */, size_t /* rows */) + { + buffer->countRow(); + }); +} + + +void RabbitMQBlockOutputStream::write(const Block & block) +{ + child->write(block); +} + + +void RabbitMQBlockOutputStream::writeSuffix() +{ + child->writeSuffix(); +} + +} diff --git a/src/Storages/RabbitMQ/RabbitMQBlockOutputStream.h b/src/Storages/RabbitMQ/RabbitMQBlockOutputStream.h new file mode 100644 index 00000000000..f8ed79438f4 --- /dev/null +++ b/src/Storages/RabbitMQ/RabbitMQBlockOutputStream.h @@ -0,0 +1,30 @@ +#pragma once + +#include +#include +#include + + +namespace DB +{ + +class RabbitMQBlockOutputStream : public IBlockOutputStream +{ + +public: + explicit RabbitMQBlockOutputStream(StorageRabbitMQ & storage_, const StorageMetadataPtr & metadata_snapshot_, const Context & context_); + + Block getHeader() const override; + + void writePrefix() override; + void write(const Block & block) override; + void writeSuffix() override; + +private: + StorageRabbitMQ & storage; + StorageMetadataPtr metadata_snapshot; + Context context; + ProducerBufferPtr buffer; + BlockOutputStreamPtr child; +}; +} diff --git a/src/Storages/RabbitMQ/RabbitMQHandler.cpp b/src/Storages/RabbitMQ/RabbitMQHandler.cpp new file mode 100644 index 00000000000..5d17ff23b64 --- /dev/null +++ b/src/Storages/RabbitMQ/RabbitMQHandler.cpp @@ -0,0 +1,46 @@ +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int CANNOT_CONNECT_RABBITMQ; +} + +/* The object of this class is shared between concurrent consumers (who share the same connection == share the same + * event loop and handler). + */ +RabbitMQHandler::RabbitMQHandler(uv_loop_t * loop_, Poco::Logger * log_) : + AMQP::LibUvHandler(loop_), + loop(loop_), + log(log_) +{ +} + +void RabbitMQHandler::onError(AMQP::TcpConnection * connection, const char * message) +{ + LOG_ERROR(log, "Library error report: {}", message); + + if (!connection->usable() || !connection->ready()) + throw Exception("Connection error", ErrorCodes::CANNOT_CONNECT_RABBITMQ); +} + +void RabbitMQHandler::startLoop() +{ + std::lock_guard lock(startup_mutex); + /// stop_loop variable is updated in a separate thread + while (!stop_loop.load()) + uv_run(loop, UV_RUN_NOWAIT); +} + +void RabbitMQHandler::iterateLoop() +{ + std::unique_lock lock(startup_mutex, std::defer_lock); + if (lock.try_lock()) + uv_run(loop, UV_RUN_NOWAIT); +} + +} diff --git a/src/Storages/RabbitMQ/RabbitMQHandler.h b/src/Storages/RabbitMQ/RabbitMQHandler.h new file mode 100644 index 00000000000..5893ace1d2f --- /dev/null +++ b/src/Storages/RabbitMQ/RabbitMQHandler.h @@ -0,0 +1,33 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +class RabbitMQHandler : public AMQP::LibUvHandler +{ + +public: + RabbitMQHandler(uv_loop_t * loop_, Poco::Logger * log_); + void onError(AMQP::TcpConnection * connection, const char * message) override; + + void stop() { stop_loop.store(true); } + void startLoop(); + void iterateLoop(); + +private: + uv_loop_t * loop; + Poco::Logger * log; + + std::atomic stop_loop = false; + std::mutex startup_mutex; +}; + +} diff --git a/src/Storages/RabbitMQ/RabbitMQSettings.cpp b/src/Storages/RabbitMQ/RabbitMQSettings.cpp new file mode 100644 index 00000000000..efb73396515 --- /dev/null +++ b/src/Storages/RabbitMQ/RabbitMQSettings.cpp @@ -0,0 +1,42 @@ +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; + extern const int UNKNOWN_SETTING; +} + +IMPLEMENT_SETTINGS_COLLECTION(RabbitMQSettings, LIST_OF_RABBITMQ_SETTINGS) + +void RabbitMQSettings::loadFromQuery(ASTStorage & storage_def) +{ + if (storage_def.settings) + { + try + { + applyChanges(storage_def.settings->changes); + } + catch (Exception & e) + { + if (e.code() == ErrorCodes::UNKNOWN_SETTING) + throw Exception(e.message() + " for storage " + storage_def.engine->name, ErrorCodes::BAD_ARGUMENTS); + else + e.rethrow(); + } + } + else + { + auto settings_ast = std::make_shared(); + settings_ast->is_standalone = false; + storage_def.set(storage_def.settings, settings_ast); + } +} +} diff --git a/src/Storages/RabbitMQ/RabbitMQSettings.h b/src/Storages/RabbitMQ/RabbitMQSettings.h new file mode 100644 index 00000000000..5cd52ed9ef7 --- /dev/null +++ b/src/Storages/RabbitMQ/RabbitMQSettings.h @@ -0,0 +1,27 @@ +#pragma once + +#include + +namespace DB +{ + class ASTStorage; + + struct RabbitMQSettings : public SettingsCollection + { + +#define LIST_OF_RABBITMQ_SETTINGS(M) \ + M(SettingString, rabbitmq_host_port, "", "A host-port to connect to RabbitMQ server.", 0) \ + M(SettingString, rabbitmq_routing_key_list, "5672", "A string of routing keys, separated by dots.", 0) \ + M(SettingString, rabbitmq_exchange_name, "clickhouse-exchange", "The exchange name, to which messages are sent.", 0) \ + M(SettingString, rabbitmq_format, "", "The message format.", 0) \ + M(SettingChar, rabbitmq_row_delimiter, '\0', "The character to be considered as a delimiter.", 0) \ + M(SettingString, rabbitmq_exchange_type, "default", "The exchange type.", 0) \ + M(SettingUInt64, rabbitmq_num_consumers, 1, "The number of consumer channels per table.", 0) \ + M(SettingUInt64, rabbitmq_num_queues, 1, "The number of queues per consumer.", 0) \ + M(SettingBool, rabbitmq_transactional_channel, false, "Use transactional channel for publishing.", 0) \ + + DECLARE_SETTINGS_COLLECTION(LIST_OF_RABBITMQ_SETTINGS) + + void loadFromQuery(ASTStorage & storage_def); + }; +} diff --git a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp new file mode 100644 index 00000000000..be42749300d --- /dev/null +++ b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp @@ -0,0 +1,421 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "Poco/Timer.h" +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; +} + +namespace ExchangeType +{ + /// Note that default here means default by implementation and not by rabbitmq settings + static const String DEFAULT = "default"; + static const String FANOUT = "fanout"; + static const String DIRECT = "direct"; + static const String TOPIC = "topic"; + static const String HASH = "consistent_hash"; + static const String HEADERS = "headers"; +} + +static const auto QUEUE_SIZE = 50000; /// Equals capacity of a single rabbitmq queue + +ReadBufferFromRabbitMQConsumer::ReadBufferFromRabbitMQConsumer( + ChannelPtr consumer_channel_, + HandlerPtr event_handler_, + const String & exchange_name_, + const Names & routing_keys_, + size_t channel_id_, + Poco::Logger * log_, + char row_delimiter_, + bool bind_by_id_, + size_t num_queues_, + const String & exchange_type_, + const String & local_exchange_, + const std::atomic & stopped_) + : ReadBuffer(nullptr, 0) + , consumer_channel(std::move(consumer_channel_)) + , event_handler(event_handler_) + , exchange_name(exchange_name_) + , routing_keys(routing_keys_) + , channel_id(channel_id_) + , log(log_) + , row_delimiter(row_delimiter_) + , bind_by_id(bind_by_id_) + , num_queues(num_queues_) + , exchange_type(exchange_type_) + , local_exchange(local_exchange_) + , local_default_exchange(local_exchange + "_" + ExchangeType::DIRECT) + , local_hash_exchange(local_exchange + "_" + ExchangeType::HASH) + , stopped(stopped_) + , messages(QUEUE_SIZE * num_queues) +{ + exchange_type_set = exchange_type != ExchangeType::DEFAULT; + + /* One queue per consumer can handle up to 50000 messages. More queues per consumer can be added. + * By default there is one queue per consumer. + */ + for (size_t queue_id = 0; queue_id < num_queues; ++queue_id) + { + /// Queue bingings must be declared before any publishing => it must be done here and not in readPrefix() + initQueueBindings(queue_id); + } +} + + +ReadBufferFromRabbitMQConsumer::~ReadBufferFromRabbitMQConsumer() +{ + consumer_channel->close(); + + messages.clear(); + BufferBase::set(nullptr, 0, 0); +} + + +void ReadBufferFromRabbitMQConsumer::initExchange() +{ + /* This direct-exchange is used for default implemenation and for INSERT query (so it is always declared). If exchange_type + * is not set, then there are only two exchanges - external, defined by the client, and local, unique for each table (default). + * This strict division to external and local exchanges is needed to avoid too much complexity with defining exchange_name + * for INSERT query producer and, in general, it is better to distinguish them into separate ones. + */ + consumer_channel->declareExchange(local_default_exchange, AMQP::direct).onError([&](const char * message) + { + local_exchange_declared = false; + LOG_ERROR(log, "Failed to declare local direct-exchange. Reason: {}", message); + }); + + if (!exchange_type_set) + { + consumer_channel->declareExchange(exchange_name, AMQP::fanout).onError([&](const char * message) + { + local_exchange_declared = false; + LOG_ERROR(log, "Failed to declare default fanout-exchange. Reason: {}", message); + }); + + /// With fanout exchange the binding key is ignored - a parameter might be arbitrary. All distribution lies on local_exchange. + consumer_channel->bindExchange(exchange_name, local_default_exchange, routing_keys[0]).onError([&](const char * message) + { + local_exchange_declared = false; + LOG_ERROR(log, "Failed to bind local direct-exchange to fanout-exchange. Reason: {}", message); + }); + + return; + } + + AMQP::ExchangeType type; + if (exchange_type == ExchangeType::FANOUT) type = AMQP::ExchangeType::fanout; + else if (exchange_type == ExchangeType::DIRECT) type = AMQP::ExchangeType::direct; + else if (exchange_type == ExchangeType::TOPIC) type = AMQP::ExchangeType::topic; + else if (exchange_type == ExchangeType::HASH) type = AMQP::ExchangeType::consistent_hash; + else if (exchange_type == ExchangeType::HEADERS) type = AMQP::ExchangeType::headers; + else throw Exception("Invalid exchange type", ErrorCodes::BAD_ARGUMENTS); + + /* Declare client's exchange of the specified type and bind it to hash-exchange (if it is not already hash-exchange), which + * will evenly distribute messages between all consumers. (This enables better scaling as without hash-exchange - the only + * option to avoid getting the same messages more than once - is having only one consumer with one queue) + */ + consumer_channel->declareExchange(exchange_name, type).onError([&](const char * message) + { + local_exchange_declared = false; + LOG_ERROR(log, "Failed to declare client's {} exchange. Reason: {}", exchange_type, message); + }); + + /// No need for declaring hash-exchange if there is only one consumer with one queue or exchange type is already hash + if (!bind_by_id) + return; + + hash_exchange = true; + + if (exchange_type == ExchangeType::HASH) + return; + + /* By default hash exchange distributes messages based on a hash value of a routing key, which must be a string integer. But + * in current case we use hash exchange for binding to another exchange of some other type, which needs its own routing keys + * of other types: headers, patterns and string-keys. This means that hash property must be changed. + */ + AMQP::Table binding_arguments; + binding_arguments["hash-property"] = "message_id"; + + /// Declare exchange for sharding. + consumer_channel->declareExchange(local_hash_exchange, AMQP::consistent_hash, binding_arguments) + .onError([&](const char * message) + { + local_exchange_declared = false; + LOG_ERROR(log, "Failed to declare {} exchange: {}", exchange_type, message); + }); + + /// Then bind client's exchange to sharding exchange (by keys, specified by the client): + + if (exchange_type == ExchangeType::HEADERS) + { + AMQP::Table binding_arguments; + std::vector matching; + + for (const auto & header : routing_keys) + { + boost::split(matching, header, [](char c){ return c == '='; }); + binding_arguments[matching[0]] = matching[1]; + matching.clear(); + } + + /// Routing key can be arbitrary here. + consumer_channel->bindExchange(exchange_name, local_hash_exchange, routing_keys[0], binding_arguments) + .onError([&](const char * message) + { + local_exchange_declared = false; + LOG_ERROR(log, "Failed to bind local hash exchange to client's exchange. Reason: {}", message); + }); + } + else + { + for (const auto & routing_key : routing_keys) + { + consumer_channel->bindExchange(exchange_name, local_hash_exchange, routing_key).onError([&](const char * message) + { + local_exchange_declared = false; + LOG_ERROR(log, "Failed to bind local hash exchange to client's exchange. Reason: {}", message); + }); + } + } +} + + +void ReadBufferFromRabbitMQConsumer::initQueueBindings(const size_t queue_id) +{ + /// These variables might be updated later from a separate thread in onError callbacks. + if (!local_exchange_declared || (exchange_type_set && !local_hash_exchange_declared)) + { + initExchange(); + local_exchange_declared = true; + local_hash_exchange_declared = true; + } + + bool default_bindings_created = false, default_bindings_error = false; + bool bindings_created = false, bindings_error = false; + + consumer_channel->declareQueue(AMQP::exclusive) + .onSuccess([&](const std::string & queue_name_, int /* msgcount */, int /* consumercount */) + { + queues.emplace_back(queue_name_); + subscribed_queue[queue_name_] = false; + + String binding_key = routing_keys[0]; + + /* Every consumer has at least one unique queue. Bind the queues to exchange based on the consumer_channel_id + * in case there is one queue per consumer and bind by queue_id in case there is more than 1 queue per consumer. + * (queue_id is based on channel_id) + */ + if (bind_by_id || hash_exchange) + { + if (queues.size() == 1) + { + binding_key = std::to_string(channel_id); + } + else + { + binding_key = std::to_string(channel_id + queue_id); + } + } + + /// Bind queue to exchange that is used for INSERT query and also for default implementation. + consumer_channel->bindQueue(local_default_exchange, queue_name_, binding_key) + .onSuccess([&] + { + default_bindings_created = true; + }) + .onError([&](const char * message) + { + default_bindings_error = true; + LOG_ERROR(log, "Failed to bind to key {}. Reason: {}", binding_key, message); + }); + + /* Subscription can probably be moved back to readPrefix(), but not sure whether it is better in regard to speed, because + * if moved there, it must(!) be wrapped inside a channel->onSuccess callback or any other, otherwise + * consumer might fail to subscribe and no resubscription will help. + */ + subscribe(queues.back()); + + LOG_DEBUG(log, "Queue " + queue_name_ + " is declared"); + + if (exchange_type_set) + { + if (hash_exchange) + { + /* If exchange_type == hash, then bind directly to this client's exchange (because there is no need for a distributor + * exchange as it is already hash-exchange), otherwise hash-exchange is a local distributor exchange. + */ + String current_hash_exchange = exchange_type == ExchangeType::HASH ? exchange_name : local_hash_exchange; + + /// If hash-exchange is used for messages distribution, then the binding key is ignored - can be arbitrary. + consumer_channel->bindQueue(current_hash_exchange, queue_name_, binding_key) + .onSuccess([&] + { + bindings_created = true; + }) + .onError([&](const char * message) + { + bindings_error = true; + LOG_ERROR(log, "Failed to create queue binding to key {}. Reason: {}", binding_key, message); + }); + } + else if (exchange_type == ExchangeType::HEADERS) + { + AMQP::Table binding_arguments; + std::vector matching; + + /// It is not parsed for the second time - if it was parsed above, then it would never end up here. + for (const auto & header : routing_keys) + { + boost::split(matching, header, [](char c){ return c == '='; }); + binding_arguments[matching[0]] = matching[1]; + matching.clear(); + } + + consumer_channel->bindQueue(exchange_name, queue_name_, routing_keys[0], binding_arguments) + .onSuccess([&] + { + bindings_created = true; + }) + .onError([&](const char * message) + { + bindings_error = true; + LOG_ERROR(log, "Failed to bind queue to key. Reason: {}", message); + }); + } + else + { + /// Means there is only one queue with one consumer - no even distribution needed - no hash-exchange. + for (const auto & routing_key : routing_keys) + { + /// Binding directly to exchange, specified by the client. + consumer_channel->bindQueue(exchange_name, queue_name_, routing_key) + .onSuccess([&] + { + bindings_created = true; + }) + .onError([&](const char * message) + { + bindings_error = true; + LOG_ERROR(log, "Failed to bind queue to key. Reason: {}", message); + }); + } + } + } + }) + .onError([&](const char * message) + { + default_bindings_error = true; + LOG_ERROR(log, "Failed to declare queue on the channel. Reason: {}", message); + }); + + /* Run event loop (which updates local variables in a separate thread) until bindings are created or failed to be created. + * It is important at this moment to make sure that queue bindings are created before any publishing can happen because + * otherwise messages will be routed nowhere. + */ + while (!default_bindings_created && !default_bindings_error || (exchange_type_set && !bindings_created && !bindings_error)) + { + iterateEventLoop(); + } +} + + +void ReadBufferFromRabbitMQConsumer::subscribe(const String & queue_name) +{ + if (subscribed_queue[queue_name]) + return; + + consumer_channel->consume(queue_name, AMQP::noack) + .onSuccess([&](const std::string & /* consumer */) + { + subscribed_queue[queue_name] = true; + consumer_error = false; + ++count_subscribed; + + LOG_TRACE(log, "Consumer {} is subscribed to queue {}", channel_id, queue_name); + }) + .onReceived([&](const AMQP::Message & message, uint64_t /* deliveryTag */, bool /* redelivered */) + { + size_t message_size = message.bodySize(); + if (message_size && message.body() != nullptr) + { + String message_received = std::string(message.body(), message.body() + message_size); + if (row_delimiter != '\0') + { + message_received += row_delimiter; + } + + messages.push(message_received); + } + }) + .onError([&](const char * message) + { + consumer_error = true; + LOG_ERROR(log, "Consumer {} failed. Reason: {}", channel_id, message); + }); +} + + +void ReadBufferFromRabbitMQConsumer::checkSubscription() +{ + if (count_subscribed == num_queues) + return; + + wait_subscribed = num_queues; + + /// These variables are updated in a separate thread. + while (count_subscribed != wait_subscribed && !consumer_error) + { + iterateEventLoop(); + } + + LOG_TRACE(log, "Consumer {} is subscribed to {} queues", channel_id, count_subscribed); + + /// Updated in callbacks which are run by the loop. + if (count_subscribed == num_queues) + return; + + /// A case that should never normally happen. + for (auto & queue : queues) + { + subscribe(queue); + } +} + + +void ReadBufferFromRabbitMQConsumer::iterateEventLoop() +{ + event_handler->iterateLoop(); +} + + +bool ReadBufferFromRabbitMQConsumer::nextImpl() +{ + if (stopped || !allowed) + return false; + + if (messages.tryPop(current)) + { + auto * new_position = const_cast(current.data()); + BufferBase::set(new_position, current.size(), 0); + allowed = false; + + return true; + } + + return false; +} + +} diff --git a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h new file mode 100644 index 00000000000..9dbb42bd648 --- /dev/null +++ b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h @@ -0,0 +1,87 @@ +#pragma once + +#include +#include +#include +#include +#include +#include + +namespace Poco +{ + class Logger; +} + +namespace DB +{ + +using ChannelPtr = std::shared_ptr; +using HandlerPtr = std::shared_ptr; + +class ReadBufferFromRabbitMQConsumer : public ReadBuffer +{ + +public: + ReadBufferFromRabbitMQConsumer( + ChannelPtr consumer_channel_, + HandlerPtr event_handler_, + const String & exchange_name_, + const Names & routing_keys_, + size_t channel_id_, + Poco::Logger * log_, + char row_delimiter_, + bool bind_by_id_, + size_t num_queues_, + const String & exchange_type_, + const String & local_exchange_, + const std::atomic & stopped_); + + ~ReadBufferFromRabbitMQConsumer() override; + + void allowNext() { allowed = true; } // Allow to read next message. + void checkSubscription(); + + auto getExchange() const { return exchange_name; } + +private: + ChannelPtr consumer_channel; + HandlerPtr event_handler; + + const String exchange_name; + const Names routing_keys; + const size_t channel_id; + const bool bind_by_id; + const size_t num_queues; + + const String exchange_type; + const String local_exchange; + const String local_default_exchange; + const String local_hash_exchange; + + Poco::Logger * log; + char row_delimiter; + bool stalled = false; + bool allowed = true; + const std::atomic & stopped; + + String default_local_exchange; + bool local_exchange_declared = false, local_hash_exchange_declared = false; + bool exchange_type_set = false, hash_exchange = false; + + std::atomic consumer_error = false; + std::atomic count_subscribed = 0, wait_subscribed; + + ConcurrentBoundedQueue messages; + String current; + std::vector queues; + std::unordered_map subscribed_queue; + + bool nextImpl() override; + + void initExchange(); + void initQueueBindings(const size_t queue_id); + void subscribe(const String & queue_name); + void iterateEventLoop(); + +}; +} diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp new file mode 100644 index 00000000000..9d5e7fcd652 --- /dev/null +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp @@ -0,0 +1,580 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +static const auto CONNECT_SLEEP = 200; +static const auto RETRIES_MAX = 1000; +static const auto HEARTBEAT_RESCHEDULE_MS = 3000; + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; + extern const int BAD_ARGUMENTS; + extern const int CANNOT_CONNECT_RABBITMQ; +} + + +StorageRabbitMQ::StorageRabbitMQ( + const StorageID & table_id_, + Context & context_, + const ColumnsDescription & columns_, + const String & host_port_, + const Names & routing_keys_, + const String & exchange_name_, + const String & format_name_, + char row_delimiter_, + const String & exchange_type_, + size_t num_consumers_, + size_t num_queues_, + const bool use_transactional_channel_) + : IStorage(table_id_) + , global_context(context_.getGlobalContext()) + , rabbitmq_context(Context(global_context)) + , routing_keys(global_context.getMacros()->expand(routing_keys_)) + , exchange_name(exchange_name_) + , format_name(global_context.getMacros()->expand(format_name_)) + , row_delimiter(row_delimiter_) + , num_consumers(num_consumers_) + , num_queues(num_queues_) + , exchange_type(exchange_type_) + , use_transactional_channel(use_transactional_channel_) + , log(&Poco::Logger::get("StorageRabbitMQ (" + table_id_.table_name + ")")) + , semaphore(0, num_consumers_) + , login_password(std::make_pair( + global_context.getConfigRef().getString("rabbitmq.username"), + global_context.getConfigRef().getString("rabbitmq.password"))) + , parsed_address(parseAddress(global_context.getMacros()->expand(host_port_), 5672)) +{ + loop = std::make_unique(); + uv_loop_init(loop.get()); + + event_handler = std::make_shared(loop.get(), log); + connection = std::make_shared(event_handler.get(), AMQP::Address(parsed_address.first, parsed_address.second, AMQP::Login(login_password.first, login_password.second), "/")); + + size_t cnt_retries = 0; + while (!connection->ready() && ++cnt_retries != RETRIES_MAX) + { + event_handler->iterateLoop(); + std::this_thread::sleep_for(std::chrono::milliseconds(CONNECT_SLEEP)); + } + + if (!connection->ready()) + throw Exception("Cannot set up connection for consumers", ErrorCodes::CANNOT_CONNECT_RABBITMQ); + + rabbitmq_context.makeQueryContext(); + StorageInMemoryMetadata storage_metadata; + storage_metadata.setColumns(columns_); + setInMemoryMetadata(storage_metadata); + + streaming_task = global_context.getSchedulePool().createTask("RabbitMQStreamingTask", [this]{ threadFunc(); }); + streaming_task->deactivate(); + heartbeat_task = global_context.getSchedulePool().createTask("RabbitMQHeartbeatTask", [this]{ heartbeatFunc(); }); + heartbeat_task->deactivate(); + + bind_by_id = num_consumers > 1 || num_queues > 1; + + auto table_id = getStorageID(); + String table_name = table_id.table_name; + + /// Make sure that local exchange name is unique for each table and is not the same as client's exchange name + local_exchange_name = exchange_name + "_" + table_name; + + /// One looping task for all consumers as they share the same connection == the same handler == the same event loop + looping_task = global_context.getSchedulePool().createTask("RabbitMQLoopingTask", [this]{ loopingFunc(); }); + looping_task->deactivate(); +} + + +void StorageRabbitMQ::heartbeatFunc() +{ + if (!stream_cancelled) + { + LOG_TRACE(log, "Sending RabbitMQ heartbeat"); + connection->heartbeat(); + heartbeat_task->scheduleAfter(HEARTBEAT_RESCHEDULE_MS); + } +} + + +void StorageRabbitMQ::loopingFunc() +{ + LOG_DEBUG(log, "Starting event looping iterations"); + event_handler->startLoop(); +} + + +Pipes StorageRabbitMQ::read( + const Names & column_names, + const StorageMetadataPtr & metadata_snapshot, + const SelectQueryInfo & /* query_info */, + const Context & context, + QueryProcessingStage::Enum /* processed_stage */, + size_t /* max_block_size */, + unsigned /* num_streams */) +{ + if (num_created_consumers == 0) + return {}; + + Pipes pipes; + pipes.reserve(num_created_consumers); + + auto sample_block = metadata_snapshot->getSampleBlockForColumns(column_names, getVirtuals(), getStorageID()); + for (size_t i = 0; i < num_created_consumers; ++i) + { + auto rabbit_stream = std::make_shared( + *this, metadata_snapshot, context, column_names, log); + auto converting_stream = std::make_shared( + rabbit_stream, sample_block, ConvertingBlockInputStream::MatchColumnsMode::Name); + pipes.emplace_back(std::make_shared(converting_stream)); + } + + if (!loop_started) + { + loop_started = true; + looping_task->activateAndSchedule(); + } + + LOG_DEBUG(log, "Starting reading {} streams", pipes.size()); + return pipes; +} + + +BlockOutputStreamPtr StorageRabbitMQ::write(const ASTPtr &, const StorageMetadataPtr & metadata_snapshot, const Context & context) +{ + return std::make_shared(*this, metadata_snapshot, context); +} + + +void StorageRabbitMQ::startup() +{ + for (size_t i = 0; i < num_consumers; ++i) + { + try + { + pushReadBuffer(createReadBuffer()); + ++num_created_consumers; + } + catch (const AMQP::Exception & e) + { + std::cerr << e.what(); + throw; + } + } + + streaming_task->activateAndSchedule(); + heartbeat_task->activateAndSchedule(); +} + + +void StorageRabbitMQ::shutdown() +{ + stream_cancelled = true; + + event_handler->stop(); + + looping_task->deactivate(); + streaming_task->deactivate(); + heartbeat_task->deactivate(); + + for (size_t i = 0; i < num_created_consumers; ++i) + { + popReadBuffer(); + } + + connection->close(); +} + + +void StorageRabbitMQ::pushReadBuffer(ConsumerBufferPtr buffer) +{ + std::lock_guard lock(mutex); + buffers.push_back(buffer); + semaphore.set(); +} + + +ConsumerBufferPtr StorageRabbitMQ::popReadBuffer() +{ + return popReadBuffer(std::chrono::milliseconds::zero()); +} + + +ConsumerBufferPtr StorageRabbitMQ::popReadBuffer(std::chrono::milliseconds timeout) +{ + // Wait for the first free buffer + if (timeout == std::chrono::milliseconds::zero()) + semaphore.wait(); + else + { + if (!semaphore.tryWait(timeout.count())) + return nullptr; + } + + // Take the first available buffer from the list + std::lock_guard lock(mutex); + auto buffer = buffers.back(); + buffers.pop_back(); + + return buffer; +} + + +ConsumerBufferPtr StorageRabbitMQ::createReadBuffer() +{ + if (update_channel_id) + next_channel_id += num_queues; + update_channel_id = true; + + ChannelPtr consumer_channel = std::make_shared(connection.get()); + + return std::make_shared( + consumer_channel, event_handler, exchange_name, routing_keys, + next_channel_id, log, row_delimiter, bind_by_id, num_queues, + exchange_type, local_exchange_name, stream_cancelled); +} + + +ProducerBufferPtr StorageRabbitMQ::createWriteBuffer() +{ + return std::make_shared( + parsed_address, global_context, login_password, routing_keys[0], local_exchange_name, + log, num_consumers * num_queues, bind_by_id, use_transactional_channel, + row_delimiter ? std::optional{row_delimiter} : std::nullopt, 1, 1024); +} + + +bool StorageRabbitMQ::checkDependencies(const StorageID & table_id) +{ + // Check if all dependencies are attached + auto dependencies = DatabaseCatalog::instance().getDependencies(table_id); + if (dependencies.empty()) + return true; + + // Check the dependencies are ready? + for (const auto & db_tab : dependencies) + { + auto table = DatabaseCatalog::instance().tryGetTable(db_tab, global_context); + if (!table) + return false; + + // If it materialized view, check it's target table + auto * materialized_view = dynamic_cast(table.get()); + if (materialized_view && !materialized_view->tryGetTargetTable()) + return false; + + // Check all its dependencies + if (!checkDependencies(db_tab)) + return false; + } + + return true; +} + + +void StorageRabbitMQ::threadFunc() +{ + try + { + auto table_id = getStorageID(); + // Check if at least one direct dependency is attached + size_t dependencies_count = DatabaseCatalog::instance().getDependencies(table_id).size(); + + if (dependencies_count) + { + // Keep streaming as long as there are attached views and streaming is not cancelled + while (!stream_cancelled && num_created_consumers > 0) + { + if (!checkDependencies(table_id)) + break; + + LOG_DEBUG(log, "Started streaming to {} attached views", dependencies_count); + + if (!streamToViews()) + break; + } + } + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + } + + /// Wait for attached views + if (!stream_cancelled) + streaming_task->schedule(); +} + + +bool StorageRabbitMQ::streamToViews() +{ + auto table_id = getStorageID(); + auto table = DatabaseCatalog::instance().getTable(table_id, global_context); + if (!table) + throw Exception("Engine table " + table_id.getNameForLogs() + " doesn't exist.", ErrorCodes::LOGICAL_ERROR); + + // Create an INSERT query for streaming data + auto insert = std::make_shared(); + insert->table_id = table_id; + + InterpreterInsertQuery interpreter(insert, rabbitmq_context, false, true, true); + auto block_io = interpreter.execute(); + + // Create a stream for each consumer and join them in a union stream + BlockInputStreams streams; + streams.reserve(num_created_consumers); + + auto metadata_snapshot = getInMemoryMetadataPtr(); + auto column_names = block_io.out->getHeader().getNames(); + auto sample_block = metadata_snapshot->getSampleBlockForColumns(column_names, getVirtuals(), getStorageID()); + for (size_t i = 0; i < num_created_consumers; ++i) + { + auto rabbit_stream = std::make_shared(*this, metadata_snapshot, rabbitmq_context, column_names, log); + auto converting_stream = std::make_shared(rabbit_stream, sample_block, ConvertingBlockInputStream::MatchColumnsMode::Name); + + streams.emplace_back(converting_stream); + + // Limit read batch to maximum block size to allow DDL + IBlockInputStream::LocalLimits limits; + const Settings & settings = global_context.getSettingsRef(); + limits.speed_limits.max_execution_time = settings.stream_flush_interval_ms; + limits.timeout_overflow_mode = OverflowMode::BREAK; + rabbit_stream->setLimits(limits); + } + + if (!loop_started) + { + loop_started = true; + looping_task->activateAndSchedule(); + } + + // Join multiple streams if necessary + BlockInputStreamPtr in; + if (streams.size() > 1) + in = std::make_shared(streams, nullptr, streams.size()); + else + in = streams[0]; + + std::atomic stub = {false}; + copyData(*in, *block_io.out, &stub); + + // Check whether the limits were applied during query execution + bool limits_applied = false; + const BlockStreamProfileInfo & info = in->getProfileInfo(); + limits_applied = info.hasAppliedLimit(); + + return limits_applied; +} + + +void registerStorageRabbitMQ(StorageFactory & factory) +{ + auto creator_fn = [](const StorageFactory::Arguments & args) + { + ASTs & engine_args = args.engine_args; + size_t args_count = engine_args.size(); + bool has_settings = args.storage_def->settings; + + RabbitMQSettings rabbitmq_settings; + if (has_settings) + { + rabbitmq_settings.loadFromQuery(*args.storage_def); + } + + String host_port = rabbitmq_settings.rabbitmq_host_port; + if (args_count >= 1) + { + const auto * ast = engine_args[0]->as(); + if (ast && ast->value.getType() == Field::Types::String) + { + host_port = safeGet(ast->value); + } + else + { + throw Exception(String("RabbitMQ host:port must be a string"), ErrorCodes::BAD_ARGUMENTS); + } + } + + String routing_key_list = rabbitmq_settings.rabbitmq_routing_key_list.value; + if (args_count >= 2) + { + engine_args[1] = evaluateConstantExpressionAsLiteral(engine_args[1], args.local_context); + routing_key_list = engine_args[1]->as().value.safeGet(); + } + + Names routing_keys; + boost::split(routing_keys, routing_key_list, [](char c){ return c == ','; }); + for (String & key : routing_keys) + { + boost::trim(key); + } + + String exchange = rabbitmq_settings.rabbitmq_exchange_name.value; + if (args_count >= 3) + { + engine_args[2] = evaluateConstantExpressionOrIdentifierAsLiteral(engine_args[2], args.local_context); + + const auto * ast = engine_args[2]->as(); + if (ast && ast->value.getType() == Field::Types::String) + { + exchange = safeGet(ast->value); + } + } + + String format = rabbitmq_settings.rabbitmq_format.value; + if (args_count >= 4) + { + engine_args[3] = evaluateConstantExpressionOrIdentifierAsLiteral(engine_args[3], args.local_context); + + const auto * ast = engine_args[3]->as(); + if (ast && ast->value.getType() == Field::Types::String) + { + format = safeGet(ast->value); + } + else + { + throw Exception("Format must be a string", ErrorCodes::BAD_ARGUMENTS); + } + } + + char row_delimiter = rabbitmq_settings.rabbitmq_row_delimiter; + if (args_count >= 5) + { + engine_args[4] = evaluateConstantExpressionOrIdentifierAsLiteral(engine_args[4], args.local_context); + + const auto * ast = engine_args[4]->as(); + String arg; + if (ast && ast->value.getType() == Field::Types::String) + { + arg = safeGet(ast->value); + } + else + { + throw Exception("Row delimiter must be a char", ErrorCodes::BAD_ARGUMENTS); + } + if (arg.size() > 1) + { + throw Exception("Row delimiter must be a char", ErrorCodes::BAD_ARGUMENTS); + } + else if (arg.empty()) + { + row_delimiter = '\0'; + } + else + { + row_delimiter = arg[0]; + } + } + + String exchange_type = rabbitmq_settings.rabbitmq_exchange_type.value; + if (args_count >= 6) + { + engine_args[5] = evaluateConstantExpressionOrIdentifierAsLiteral(engine_args[5], args.local_context); + + const auto * ast = engine_args[5]->as(); + if (ast && ast->value.getType() == Field::Types::String) + { + exchange_type = safeGet(ast->value); + } + + if (exchange_type != "fanout" && exchange_type != "direct" && exchange_type != "topic" + && exchange_type != "headers" && exchange_type != "consistent_hash") + throw Exception("Invalid exchange type", ErrorCodes::BAD_ARGUMENTS); + } + + UInt64 num_consumers = rabbitmq_settings.rabbitmq_num_consumers; + if (args_count >= 7) + { + const auto * ast = engine_args[6]->as(); + if (ast && ast->value.getType() == Field::Types::UInt64) + { + num_consumers = safeGet(ast->value); + } + else + { + throw Exception("Number of consumers must be a positive integer", ErrorCodes::BAD_ARGUMENTS); + } + } + + UInt64 num_queues = rabbitmq_settings.rabbitmq_num_queues; + if (args_count >= 8) + { + const auto * ast = engine_args[7]->as(); + if (ast && ast->value.getType() == Field::Types::UInt64) + { + num_consumers = safeGet(ast->value); + } + else + { + throw Exception("Number of queues must be a positive integer", ErrorCodes::BAD_ARGUMENTS); + } + } + + bool use_transactional_channel = static_cast(rabbitmq_settings.rabbitmq_transactional_channel); + if (args_count >= 9) + { + const auto * ast = engine_args[8]->as(); + if (ast && ast->value.getType() == Field::Types::UInt64) + { + use_transactional_channel = static_cast(safeGet(ast->value)); + } + else + { + throw Exception("Transactional channel parameter is a bool", ErrorCodes::BAD_ARGUMENTS); + } + } + + return StorageRabbitMQ::create( + args.table_id, args.context, args.columns, + host_port, routing_keys, exchange, format, row_delimiter, exchange_type, num_consumers, + num_queues, use_transactional_channel); + }; + + factory.registerStorage("RabbitMQ", creator_fn, StorageFactory::StorageFeatures{ .supports_settings = true, }); + +} + + +NamesAndTypesList StorageRabbitMQ::getVirtuals() const +{ + return NamesAndTypesList{ + {"_exchange", std::make_shared()} + }; +} + +} diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.h b/src/Storages/RabbitMQ/StorageRabbitMQ.h new file mode 100644 index 00000000000..e1c8b33c91e --- /dev/null +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.h @@ -0,0 +1,123 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +using ChannelPtr = std::shared_ptr; + +class StorageRabbitMQ final: public ext::shared_ptr_helper, public IStorage +{ + friend struct ext::shared_ptr_helper; + +public: + std::string getName() const override { return "RabbitMQ"; } + + bool supportsSettings() const override { return true; } + bool noPushingToViews() const override { return true; } + + void startup() override; + void shutdown() override; + + Pipes read( + const Names & column_names, + const StorageMetadataPtr & metadata_snapshot, + const SelectQueryInfo & query_info, + const Context & context, + QueryProcessingStage::Enum processed_stage, + size_t max_block_size, + unsigned num_streams) override; + + BlockOutputStreamPtr write( + const ASTPtr & query, + const StorageMetadataPtr & metadata_snapshot, + const Context & context) override; + + void pushReadBuffer(ConsumerBufferPtr buf); + ConsumerBufferPtr popReadBuffer(); + ConsumerBufferPtr popReadBuffer(std::chrono::milliseconds timeout); + + ProducerBufferPtr createWriteBuffer(); + + const String & getFormatName() const { return format_name; } + NamesAndTypesList getVirtuals() const override; + + +protected: + StorageRabbitMQ( + const StorageID & table_id_, + Context & context_, + const ColumnsDescription & columns_, + const String & host_port_, + const Names & routing_keys_, + const String & exchange_name_, + const String & format_name_, + char row_delimiter_, + const String & exchange_type_, + size_t num_consumers_, + size_t num_queues_, + const bool use_transactional_channel_); + +private: + Context global_context; + Context rabbitmq_context; + + Names routing_keys; + const String exchange_name; + String local_exchange_name; + + const String format_name; + char row_delimiter; + size_t num_consumers; + size_t num_created_consumers = 0; + bool bind_by_id; + size_t num_queues; + const String exchange_type; + const bool use_transactional_channel; + + Poco::Logger * log; + std::pair parsed_address; + std::pair login_password; + + std::shared_ptr loop; + std::shared_ptr event_handler; + std::shared_ptr connection; /// Connection for all consumers + + Poco::Semaphore semaphore; + std::mutex mutex; + std::vector buffers; /// available buffers for RabbitMQ consumers + + size_t next_channel_id = 1; /// Must >= 1 because it is used as a binding key, which has to be > 0 + bool update_channel_id = false; + std::atomic loop_started = false; + + BackgroundSchedulePool::TaskHolder streaming_task; + BackgroundSchedulePool::TaskHolder heartbeat_task; + BackgroundSchedulePool::TaskHolder looping_task; + + std::atomic stream_cancelled{false}; + + ConsumerBufferPtr createReadBuffer(); + + void threadFunc(); + void heartbeatFunc(); + void loopingFunc(); + + void pingConnection() { connection->heartbeat(); } + bool streamToViews(); + bool checkDependencies(const StorageID & table_id); +}; + +} diff --git a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp new file mode 100644 index 00000000000..d96a1c02db8 --- /dev/null +++ b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp @@ -0,0 +1,230 @@ +#include +#include "Core/Block.h" +#include "Columns/ColumnString.h" +#include "Columns/ColumnsNumber.h" +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int CANNOT_CONNECT_RABBITMQ; +} + +static const auto QUEUE_SIZE = 50000; +static const auto CONNECT_SLEEP = 200; +static const auto RETRIES_MAX = 1000; +static const auto LOOP_WAIT = 10; + +WriteBufferToRabbitMQProducer::WriteBufferToRabbitMQProducer( + std::pair & parsed_address, + Context & global_context, + const std::pair & login_password_, + const String & routing_key_, + const String & exchange_, + Poco::Logger * log_, + size_t num_queues_, + bool bind_by_id_, + bool use_transactional_channel_, + std::optional delimiter, + size_t rows_per_message, + size_t chunk_size_) + : WriteBuffer(nullptr, 0) + , login_password(login_password_) + , routing_key(routing_key_) + , exchange_name(exchange_ + "_direct") + , log(log_) + , num_queues(num_queues_) + , bind_by_id(bind_by_id_) + , use_transactional_channel(use_transactional_channel_) + , delim(delimiter) + , max_rows(rows_per_message) + , chunk_size(chunk_size_) + , payloads(QUEUE_SIZE * num_queues) +{ + + loop = std::make_unique(); + uv_loop_init(loop.get()); + + event_handler = std::make_unique(loop.get(), log); + connection = std::make_unique(event_handler.get(), AMQP::Address(parsed_address.first, parsed_address.second, AMQP::Login(login_password.first, login_password.second), "/")); + + /* The reason behind making a separate connection for each concurrent producer is explained here: + * https://github.com/CopernicaMarketingSoftware/AMQP-CPP/issues/128#issuecomment-300780086 - publishing from + * different threads (as outputStreams are asynchronous) with the same connection leads to internal library errors. + */ + size_t cnt_retries = 0; + while (!connection->ready() && ++cnt_retries != RETRIES_MAX) + { + event_handler->iterateLoop(); + std::this_thread::sleep_for(std::chrono::milliseconds(CONNECT_SLEEP)); + } + + if (!connection->ready()) + { + throw Exception("Cannot set up connection for producer", ErrorCodes::CANNOT_CONNECT_RABBITMQ); + } + + producer_channel = std::make_shared(connection.get()); + checkExchange(); + + /// If publishing should be wrapped in transactions + if (use_transactional_channel) + { + producer_channel->startTransaction(); + } + + writing_task = global_context.getSchedulePool().createTask("RabbitMQWritingTask", [this]{ writingFunc(); }); + writing_task->deactivate(); +} + + +WriteBufferToRabbitMQProducer::~WriteBufferToRabbitMQProducer() +{ + stop_loop.store(true); + writing_task->deactivate(); + checkExchange(); + + connection->close(); + assert(rows == 0 && chunks.empty()); +} + + +void WriteBufferToRabbitMQProducer::countRow() +{ + if (++rows % max_rows == 0) + { + const std::string & last_chunk = chunks.back(); + size_t last_chunk_size = offset(); + + if (delim && last_chunk[last_chunk_size - 1] == delim) + --last_chunk_size; + + std::string payload; + payload.reserve((chunks.size() - 1) * chunk_size + last_chunk_size); + + for (auto i = chunks.begin(), e = --chunks.end(); i != e; ++i) + payload.append(*i); + + payload.append(last_chunk, 0, last_chunk_size); + + rows = 0; + chunks.clear(); + set(nullptr, 0); + + payloads.push(payload); + } +} + + +void WriteBufferToRabbitMQProducer::writingFunc() +{ + String payload; + + while (!stop_loop || !payloads.empty()) + { + while (!payloads.empty()) + { + payloads.pop(payload); + next_queue = next_queue % num_queues + 1; + + if (bind_by_id) + { + producer_channel->publish(exchange_name, std::to_string(next_queue), payload); + } + else + { + producer_channel->publish(exchange_name, routing_key, payload); + } + } + iterateEventLoop(); + } +} + + +void WriteBufferToRabbitMQProducer::checkExchange() +{ + std::atomic exchange_declared = false, exchange_error = false; + + /// The AMQP::passive flag indicates that it should only be checked if there is a valid exchange with the given name. + producer_channel->declareExchange(exchange_name, AMQP::direct, AMQP::passive) + .onSuccess([&]() + { + exchange_declared = true; + }) + .onError([&](const char * message) + { + exchange_error = true; + LOG_ERROR(log, "Exchange for INSERT query was not declared. Reason: {}", message); + }); + + /// These variables are updated in a separate thread and starting the loop blocks current thread + while (!exchange_declared && !exchange_error) + { + iterateEventLoop(); + } +} + + +void WriteBufferToRabbitMQProducer::finilizeProducer() +{ + /// This will make sure everything is published + checkExchange(); + + if (use_transactional_channel) + { + std::atomic answer_received = false, wait_rollback = false; + producer_channel->commitTransaction() + .onSuccess([&]() + { + answer_received = true; + LOG_TRACE(log, "All messages were successfully published"); + }) + .onError([&](const char * message) + { + answer_received = true; + wait_rollback = true; + LOG_TRACE(log, "Publishing not successful: {}", message); + producer_channel->rollbackTransaction() + .onSuccess([&]() + { + wait_rollback = false; + }) + .onError([&](const char * message) + { + LOG_ERROR(log, "Failed to rollback transaction: {}", message); + wait_rollback = false; + }); + }); + + size_t count_retries = 0; + while ((!answer_received || wait_rollback) && ++count_retries != RETRIES_MAX) + { + iterateEventLoop(); + std::this_thread::sleep_for(std::chrono::milliseconds(LOOP_WAIT)); + } + } +} + + +void WriteBufferToRabbitMQProducer::nextImpl() +{ + chunks.push_back(std::string()); + chunks.back().resize(chunk_size); + set(chunks.back().data(), chunk_size); +} + + +void WriteBufferToRabbitMQProducer::iterateEventLoop() +{ + event_handler->iterateLoop(); +} + +} diff --git a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h new file mode 100644 index 00000000000..26a52b0b41c --- /dev/null +++ b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h @@ -0,0 +1,76 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +using ChannelPtr = std::shared_ptr; + +class WriteBufferToRabbitMQProducer : public WriteBuffer +{ +public: + WriteBufferToRabbitMQProducer( + std::pair & parsed_address, + Context & global_context, + const std::pair & login_password_, + const String & routing_key_, + const String & exchange_, + Poco::Logger * log_, + size_t num_queues_, + bool bind_by_id_, + bool use_transactional_channel_, + std::optional delimiter, + size_t rows_per_message, + size_t chunk_size_ + ); + + ~WriteBufferToRabbitMQProducer() override; + + void countRow(); + void activateWriting() { writing_task->activateAndSchedule(); } + +private: + void nextImpl() override; + void checkExchange(); + void iterateEventLoop(); + void writingFunc(); + void finilizeProducer(); + + const std::pair login_password; + const String routing_key; + const String exchange_name; + const bool bind_by_id; + const size_t num_queues; + const bool use_transactional_channel; + + BackgroundSchedulePool::TaskHolder writing_task; + std::atomic stop_loop = false; + + std::unique_ptr loop; + std::unique_ptr event_handler; + std::unique_ptr connection; + ChannelPtr producer_channel; + + ConcurrentBoundedQueue payloads; + size_t next_queue = 0; + + Poco::Logger * log; + const std::optional delim; + const size_t max_rows; + const size_t chunk_size; + size_t count_mes = 0; + size_t rows = 0; + std::list chunks; +}; + +} diff --git a/src/Storages/StorageBuffer.cpp b/src/Storages/StorageBuffer.cpp index 85b61dd34f9..3bc88f5a289 100644 --- a/src/Storages/StorageBuffer.cpp +++ b/src/Storages/StorageBuffer.cpp @@ -713,7 +713,10 @@ void StorageBuffer::writeBlockToDestination(const Block & block, StoragePtr tabl for (const auto & column : block_to_write) list_of_columns->children.push_back(std::make_shared(column.name)); - InterpreterInsertQuery interpreter{insert, global_context, allow_materialized}; + auto insert_context = Context(global_context); + insert_context.makeQueryContext(); + + InterpreterInsertQuery interpreter{insert, insert_context, allow_materialized}; auto block_io = interpreter.execute(); block_io.out->writePrefix(); diff --git a/src/Storages/StorageDictionary.cpp b/src/Storages/StorageDictionary.cpp index 83a093d5635..4ea028c7ca8 100644 --- a/src/Storages/StorageDictionary.cpp +++ b/src/Storages/StorageDictionary.cpp @@ -96,19 +96,31 @@ String StorageDictionary::generateNamesAndTypesDescription(const NamesAndTypesLi StorageDictionary::StorageDictionary( const StorageID & table_id_, const String & dictionary_name_, - const DictionaryStructure & dictionary_structure_) + const ColumnsDescription & columns_, + Location location_) : IStorage(table_id_) , dictionary_name(dictionary_name_) + , location(location_) { StorageInMemoryMetadata storage_metadata; - storage_metadata.setColumns(ColumnsDescription{getNamesAndTypes(dictionary_structure_)}); + storage_metadata.setColumns(columns_); setInMemoryMetadata(storage_metadata); } +StorageDictionary::StorageDictionary( + const StorageID & table_id_, const String & dictionary_name_, const DictionaryStructure & dictionary_structure_, Location location_) + : StorageDictionary(table_id_, dictionary_name_, ColumnsDescription{getNamesAndTypes(dictionary_structure_)}, location_) +{ +} + + void StorageDictionary::checkTableCanBeDropped() const { - throw Exception("Cannot detach dictionary " + backQuote(dictionary_name) + " as table, use DETACH DICTIONARY query.", ErrorCodes::CANNOT_DETACH_DICTIONARY_AS_TABLE); + if (location == Location::SameDatabaseAndNameAsDictionary) + throw Exception("Cannot detach dictionary " + backQuote(dictionary_name) + " as table, use DETACH DICTIONARY query", ErrorCodes::CANNOT_DETACH_DICTIONARY_AS_TABLE); + if (location == Location::DictionaryDatabase) + throw Exception("Cannot detach table " + getStorageID().getFullTableName() + " from a database with DICTIONARY engine", ErrorCodes::CANNOT_DETACH_DICTIONARY_AS_TABLE); } Pipes StorageDictionary::read( @@ -141,11 +153,14 @@ void registerStorageDictionary(StorageFactory & factory) args.engine_args[0] = evaluateConstantExpressionOrIdentifierAsLiteral(args.engine_args[0], args.local_context); String dictionary_name = args.engine_args[0]->as().value.safeGet(); - const auto & dictionary = args.context.getExternalDictionariesLoader().getDictionary(dictionary_name); - const DictionaryStructure & dictionary_structure = dictionary->getStructure(); - checkNamesAndTypesCompatibleWithDictionary(dictionary_name, args.columns, dictionary_structure); + if (!args.attach) + { + const auto & dictionary = args.context.getExternalDictionariesLoader().getDictionary(dictionary_name); + const DictionaryStructure & dictionary_structure = dictionary->getStructure(); + checkNamesAndTypesCompatibleWithDictionary(dictionary_name, args.columns, dictionary_structure); + } - return StorageDictionary::create(args.table_id, dictionary_name, dictionary_structure); + return StorageDictionary::create(args.table_id, dictionary_name, args.columns, StorageDictionary::Location::Custom); }); } diff --git a/src/Storages/StorageDictionary.h b/src/Storages/StorageDictionary.h index 6175902381b..f152f8c9932 100644 --- a/src/Storages/StorageDictionary.h +++ b/src/Storages/StorageDictionary.h @@ -30,14 +30,40 @@ public: const String & dictionaryName() const { return dictionary_name; } + /// Specifies where the table is located relative to the dictionary. + enum class Location + { + /// Table was created automatically as an element of a database with the Dictionary engine. + DictionaryDatabase, + + /// Table was created automatically along with a dictionary + /// and has the same database and name as the dictionary. + /// It provides table-like access to the dictionary. + /// User cannot drop that table. + SameDatabaseAndNameAsDictionary, + + /// Table was created explicitly by a statement like + /// CREATE TABLE ... ENGINE=Dictionary + /// User chose the table's database and name and can drop that table. + Custom, + }; + private: - String dictionary_name; + const String dictionary_name; + const Location location; protected: StorageDictionary( const StorageID & table_id_, const String & dictionary_name_, - const DictionaryStructure & dictionary_structure); + const ColumnsDescription & columns_, + Location location_); + + StorageDictionary( + const StorageID & table_id_, + const String & dictionary_name_, + const DictionaryStructure & dictionary_structure, + Location location_); }; } diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index a9d2d6cfdfd..37703f9a719 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -722,7 +722,7 @@ ClusterPtr StorageDistributed::getOptimizedCluster(const Context & context, cons std::stringstream exception_message; if (!has_sharding_key) exception_message << "No sharding key"; - else if (sharding_key_is_deterministic) + else if (!sharding_key_is_deterministic) exception_message << "Sharding key is not deterministic"; else exception_message << "Sharding key " << sharding_key_column_name << " is not used"; diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index c3b5758c7a5..69da73653eb 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -102,12 +102,7 @@ void StorageMergeTree::startup() /// Ensure that thread started only after assignment to 'merging_mutating_task_handle' is done. merge_pool.startTask(merging_mutating_task_handle); - if (areBackgroundMovesNeeded()) - { - auto & move_pool = global_context.getBackgroundMovePool(); - moving_task_handle = move_pool.createTask([this] { return movePartsTask(); }); - move_pool.startTask(moving_task_handle); - } + startBackgroundMovesIfNeeded(); } catch (...) { @@ -464,6 +459,18 @@ bool StorageMergeTree::isMutationDone(Int64 mutation_version) const return true; } + +void StorageMergeTree::startBackgroundMovesIfNeeded() +{ + if (areBackgroundMovesNeeded() && !moving_task_handle) + { + auto & move_pool = global_context.getBackgroundMovePool(); + moving_task_handle = move_pool.createTask([this] { return movePartsTask(); }); + move_pool.startTask(moving_task_handle); + } +} + + std::vector StorageMergeTree::getMutationsStatus() const { std::lock_guard lock(currently_processing_in_background_mutex); diff --git a/src/Storages/StorageMergeTree.h b/src/Storages/StorageMergeTree.h index 9a45fd285dc..9418f1a073c 100644 --- a/src/Storages/StorageMergeTree.h +++ b/src/Storages/StorageMergeTree.h @@ -159,6 +159,8 @@ private: /// Just checks versions of each active data part bool isMutationDone(Int64 mutation_version) const; + void startBackgroundMovesIfNeeded() override; + friend class MergeTreeBlockOutputStream; friend class MergeTreeData; friend struct CurrentlyMergingPartsTagger; diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 96e376f85fe..fb21a567572 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -3263,12 +3263,7 @@ void StorageReplicatedMergeTree::startup() pool.startTask(queue_task_handle); } - if (areBackgroundMovesNeeded()) - { - auto & pool = global_context.getBackgroundMovePool(); - move_parts_task_handle = pool.createTask([this] { return movePartsTask(); }); - pool.startTask(move_parts_task_handle); - } + startBackgroundMovesIfNeeded(); } catch (...) { @@ -5702,4 +5697,16 @@ MutationCommands StorageReplicatedMergeTree::getFirtsAlterMutationCommandsForPar { return queue.getFirstAlterMutationCommandsForPart(part); } + + +void StorageReplicatedMergeTree::startBackgroundMovesIfNeeded() +{ + if (areBackgroundMovesNeeded() && !move_parts_task_handle) + { + auto & pool = global_context.getBackgroundMovePool(); + move_parts_task_handle = pool.createTask([this] { return movePartsTask(); }); + pool.startTask(move_parts_task_handle); + } +} + } diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h index 712d997b26c..078b8d90458 100644 --- a/src/Storages/StorageReplicatedMergeTree.h +++ b/src/Storages/StorageReplicatedMergeTree.h @@ -551,6 +551,8 @@ private: MutationCommands getFirtsAlterMutationCommandsForPart(const DataPartPtr & part) const override; + void startBackgroundMovesIfNeeded() override; + protected: /** If not 'attach', either creates a new table in ZK, or adds a replica to an existing table. */ diff --git a/src/Storages/System/StorageSystemBuildOptions.generated.cpp.in b/src/Storages/System/StorageSystemBuildOptions.generated.cpp.in index 97998e11ea5..3c3b96b9cff 100644 --- a/src/Storages/System/StorageSystemBuildOptions.generated.cpp.in +++ b/src/Storages/System/StorageSystemBuildOptions.generated.cpp.in @@ -32,6 +32,7 @@ const char * auto_config_build[] "BUILD_INCLUDE_DIRECTORIES", "@BUILD_INCLUDE_DIRECTORIES@", "STATIC", "@USE_STATIC_LIBRARIES@", "SPLIT_BINARY", "@CLICKHOUSE_SPLIT_BINARY@", + "UNBUNDLED", "@UNBUNDLED@", "USE_EMBEDDED_COMPILER", "@USE_EMBEDDED_COMPILER@", "USE_GLIBC_COMPATIBILITY", "@GLIBC_COMPATIBILITY@", "USE_JEMALLOC", "@ENABLE_JEMALLOC@", diff --git a/src/Storages/System/StorageSystemDisks.cpp b/src/Storages/System/StorageSystemDisks.cpp index cf00bbb5254..554b8cfd1eb 100644 --- a/src/Storages/System/StorageSystemDisks.cpp +++ b/src/Storages/System/StorageSystemDisks.cpp @@ -22,6 +22,7 @@ StorageSystemDisks::StorageSystemDisks(const std::string & name_) {"free_space", std::make_shared()}, {"total_space", std::make_shared()}, {"keep_free_space", std::make_shared()}, + {"type", std::make_shared()}, })); setInMemoryMetadata(storage_metadata); } @@ -42,6 +43,7 @@ Pipes StorageSystemDisks::read( MutableColumnPtr col_free = ColumnUInt64::create(); MutableColumnPtr col_total = ColumnUInt64::create(); MutableColumnPtr col_keep = ColumnUInt64::create(); + MutableColumnPtr col_type = ColumnString::create(); for (const auto & [disk_name, disk_ptr] : context.getDisksMap()) { @@ -50,6 +52,7 @@ Pipes StorageSystemDisks::read( col_free->insert(disk_ptr->getAvailableSpace()); col_total->insert(disk_ptr->getTotalSpace()); col_keep->insert(disk_ptr->getKeepingFreeSpace()); + col_type->insert(disk_ptr->getType()); } Columns res_columns; @@ -58,6 +61,7 @@ Pipes StorageSystemDisks::read( res_columns.emplace_back(std::move(col_free)); res_columns.emplace_back(std::move(col_total)); res_columns.emplace_back(std::move(col_keep)); + res_columns.emplace_back(std::move(col_type)); UInt64 num_rows = res_columns.at(0)->size(); Chunk chunk(std::move(res_columns), num_rows); diff --git a/src/Storages/System/StorageSystemGrants.cpp b/src/Storages/System/StorageSystemGrants.cpp index a663e3307fe..360256c1f45 100644 --- a/src/Storages/System/StorageSystemGrants.cpp +++ b/src/Storages/System/StorageSystemGrants.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -17,7 +18,7 @@ namespace DB { using EntityType = IAccessEntity::Type; - +using Kind = AccessRightsElementWithOptions::Kind; NamesAndTypesList StorageSystemGrants::getNamesAndTypes() { @@ -63,7 +64,7 @@ void StorageSystemGrants::fillData(MutableColumns & res_columns, const Context & const String * database, const String * table, const String * column, - bool is_partial_revoke, + Kind kind, bool grant_option) { if (grantee_type == EntityType::USER) @@ -118,15 +119,13 @@ void StorageSystemGrants::fillData(MutableColumns & res_columns, const Context & column_column_null_map.push_back(true); } - column_is_partial_revoke.push_back(is_partial_revoke); + column_is_partial_revoke.push_back(kind == Kind::REVOKE); column_grant_option.push_back(grant_option); }; auto add_rows = [&](const String & grantee_name, IAccessEntity::Type grantee_type, - const AccessRightsElements & elements, - bool is_partial_revoke, - bool grant_option) + const AccessRightsElementsWithOptions & elements) { for (const auto & element : elements) { @@ -140,13 +139,13 @@ void StorageSystemGrants::fillData(MutableColumns & res_columns, const Context & if (element.any_column) { for (const auto & access_type : access_types) - add_row(grantee_name, grantee_type, access_type, database, table, nullptr, is_partial_revoke, grant_option); + add_row(grantee_name, grantee_type, access_type, database, table, nullptr, element.kind, element.grant_option); } else { for (const auto & access_type : access_types) for (const auto & column : element.columns) - add_row(grantee_name, grantee_type, access_type, database, table, &column, is_partial_revoke, grant_option); + add_row(grantee_name, grantee_type, access_type, database, table, &column, element.kind, element.grant_option); } } }; @@ -157,7 +156,7 @@ void StorageSystemGrants::fillData(MutableColumns & res_columns, const Context & if (!entity) continue; - const GrantedAccess * access = nullptr; + const AccessRights * access = nullptr; if (auto role = typeid_cast(entity)) access = &role->access; else if (auto user = typeid_cast(entity)) @@ -167,13 +166,8 @@ void StorageSystemGrants::fillData(MutableColumns & res_columns, const Context & const String & grantee_name = entity->getName(); const auto grantee_type = entity->getType(); - auto grants_and_revokes = access->access.getGrantsAndPartialRevokes(); - auto grants_and_revokes_with_grant_option = access->access_with_grant_option.getGrantsAndPartialRevokes(); - - add_rows(grantee_name, grantee_type, grants_and_revokes.grants, /* is_partial_revoke = */ false, /* grant_option = */ false); - add_rows(grantee_name, grantee_type, grants_and_revokes.revokes, /* is_partial_revoke = */ true, /* grant_option = */ false); - add_rows(grantee_name, grantee_type, grants_and_revokes_with_grant_option.grants, /* is_partial_revoke = */ false, /* grant_option = */ true); - add_rows(grantee_name, grantee_type, grants_and_revokes_with_grant_option.revokes, /* is_partial_revoke = */ true, /* grant_option = */ true); + auto elements = access->getElements(); + add_rows(grantee_name, grantee_type, elements); } } diff --git a/src/Storages/registerStorages.cpp b/src/Storages/registerStorages.cpp index 88caeca3c89..f9170839bc8 100644 --- a/src/Storages/registerStorages.cpp +++ b/src/Storages/registerStorages.cpp @@ -48,6 +48,10 @@ void registerStorages() #if USE_RDKAFKA registerStorageKafka(factory); #endif + + #if USE_AMQPCPP + registerStorageRabbitMQ(factory); + #endif } } diff --git a/src/Storages/registerStorages.h b/src/Storages/registerStorages.h index 2b5d209d719..546da9d3bce 100644 --- a/src/Storages/registerStorages.h +++ b/src/Storages/registerStorages.h @@ -49,6 +49,10 @@ void registerStorageMongoDB(StorageFactory & factory); void registerStorageKafka(StorageFactory & factory); #endif +#if USE_AMQPCPP +void registerStorageRabbitMQ(StorageFactory & factory); +#endif + void registerStorages(); } diff --git a/src/Storages/tests/gtest_transform_query_for_external_database.cpp b/src/Storages/tests/gtest_transform_query_for_external_database.cpp index 318d667d9b0..bd7d7d5d1b8 100644 --- a/src/Storages/tests/gtest_transform_query_for_external_database.cpp +++ b/src/Storages/tests/gtest_transform_query_for_external_database.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -27,6 +28,8 @@ struct State {"apply_type", std::make_shared()}, {"apply_status", std::make_shared()}, {"create_time", std::make_shared()}, + {"field", std::make_shared()}, + {"value", std::make_shared()}, }; static const State & instance() @@ -117,3 +120,12 @@ TEST(TransformQueryForExternalDatabase, Issue7245) R"(SELECT "apply_id", "apply_type", "apply_status", "create_time" FROM "test"."table" WHERE ("apply_type" = 2) AND ("create_time" > '2018-12-25 01:02:03') AND ("apply_status" IN (3, 4)))", state.context, state.columns); } + +TEST(TransformQueryForExternalDatabase, Aliases) +{ + const State & state = State::instance(); + + check("SELECT field AS value, field AS display WHERE field NOT IN ('') AND display LIKE '%test%'", + R"(SELECT "field" FROM "test"."table" WHERE ("field" NOT IN ('')) AND ("field" LIKE '%test%'))", + state.context, state.columns); +} diff --git a/src/Storages/transformQueryForExternalDatabase.cpp b/src/Storages/transformQueryForExternalDatabase.cpp index 11b98b782e0..0d34f0b3068 100644 --- a/src/Storages/transformQueryForExternalDatabase.cpp +++ b/src/Storages/transformQueryForExternalDatabase.cpp @@ -71,6 +71,24 @@ public: } }; +class DropAliasesMatcher +{ +public: + struct Data {}; + Data data; + + static bool needChildVisit(ASTPtr &, const ASTPtr &) + { + return true; + } + + static void visit(ASTPtr & node, Data) + { + if (!node->tryGetAlias().empty()) + node->setAlias({}); + } +}; + void replaceConstantExpressions(ASTPtr & node, const Context & context, const NamesAndTypesList & all_columns) { auto syntax_result = SyntaxAnalyzer(context).analyze(node, all_columns); @@ -80,6 +98,13 @@ void replaceConstantExpressions(ASTPtr & node, const Context & context, const Na visitor.visit(node); } +void dropAliases(ASTPtr & node) +{ + DropAliasesMatcher::Data data; + InDepthNodeVisitor visitor(data); + visitor.visit(node); +} + bool isCompatible(const IAST & node) { @@ -192,6 +217,9 @@ String transformQueryForExternalDatabase( } } + ASTPtr select_ptr = select; + dropAliases(select_ptr); + std::stringstream out; IAST::FormatSettings settings(out, true); settings.identifier_quoting_style = identifier_quoting_style; diff --git a/src/TableFunctions/TableFunctionRemote.cpp b/src/TableFunctions/TableFunctionRemote.cpp index cfeb3907136..7fa3868d142 100644 --- a/src/TableFunctions/TableFunctionRemote.cpp +++ b/src/TableFunctions/TableFunctionRemote.cpp @@ -186,6 +186,9 @@ StoragePtr TableFunctionRemote::executeImpl(const ASTPtr & ast_function, const C secure); } + if (!remote_table_function_ptr && remote_table.empty()) + throw Exception("The name of remote table cannot be empty", ErrorCodes::BAD_ARGUMENTS); + auto remote_table_id = StorageID::createEmpty(); remote_table_id.database_name = remote_database; remote_table_id.table_name = remote_table; diff --git a/tests/clickhouse-test b/tests/clickhouse-test index 79fa2c15068..fc3cfb0b432 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -4,6 +4,7 @@ import sys import os import os.path import re +import json from argparse import ArgumentParser from argparse import FileType @@ -377,6 +378,76 @@ def check_server_started(client, retry_count): return False +class BuildFlags(object): + THREAD = 'thread-sanitizer' + ADDRESS = 'address-sanitizer' + UNDEFINED = 'ub-sanitizer' + MEMORY = 'memory-sanitizer' + DEBUG = 'debug-build' + UNBUNDLED = 'unbundled-build' + RELEASE = 'release-build' + DATABASE_ATOMIC = 'database-atomic' + POLYMORPHIC_PARTS = 'polymorphic-parts' + + +def collect_build_flags(client): + clickhouse_proc = Popen(shlex.split(client), stdin=PIPE, stdout=PIPE, stderr=PIPE) + (stdout, stderr) = clickhouse_proc.communicate("SELECT value FROM system.build_options WHERE name = 'CXX_FLAGS'") + result = [] + + if clickhouse_proc.returncode == 0: + if '-fsanitize=thread' in stdout: + result.append(BuildFlags.THREAD) + elif '-fsanitize=address' in stdout: + result.append(BuildFlags.ADDRESS) + elif '-fsanitize=undefined' in stdout: + result.append(BuildFlags.UNDEFINED) + elif '-fsanitize=memory' in stdout: + result.append(BuildFlags.MEMORY) + else: + raise Exception("Cannot get inforamtion about build from server errorcode {}, stderr {}".format(clickhouse_proc.returncode, stderr)) + + clickhouse_proc = Popen(shlex.split(client), stdin=PIPE, stdout=PIPE, stderr=PIPE) + (stdout, stderr) = clickhouse_proc.communicate("SELECT value FROM system.build_options WHERE name = 'BUILD_TYPE'") + + if clickhouse_proc.returncode == 0: + if 'Debug' in stdout: + result.append(BuildFlags.DEBUG) + elif 'RelWithDebInfo' in stdout or 'Release' in stdout: + result.append(BuildFlags.RELEASE) + else: + raise Exception("Cannot get inforamtion about build from server errorcode {}, stderr {}".format(clickhouse_proc.returncode, stderr)) + + clickhouse_proc = Popen(shlex.split(client), stdin=PIPE, stdout=PIPE, stderr=PIPE) + (stdout, stderr) = clickhouse_proc.communicate("SELECT value FROM system.build_options WHERE name = 'UNBUNDLED'") + + if clickhouse_proc.returncode == 0: + if 'ON' in stdout or '1' in stdout: + result.append(BuildFlags.UNBUNDLED) + else: + raise Exception("Cannot get inforamtion about build from server errorcode {}, stderr {}".format(clickhouse_proc.returncode, stderr)) + + clickhouse_proc = Popen(shlex.split(client), stdin=PIPE, stdout=PIPE, stderr=PIPE) + (stdout, stderr) = clickhouse_proc.communicate("SELECT value FROM system.settings WHERE name = 'default_database_engine'") + + if clickhouse_proc.returncode == 0: + if 'Atomic' in stdout: + result.append(BuildFlags.DATABASE_ATOMIC) + else: + raise Exception("Cannot get inforamtion about build from server errorcode {}, stderr {}".format(clickhouse_proc.returncode, stderr)) + + clickhouse_proc = Popen(shlex.split(client), stdin=PIPE, stdout=PIPE, stderr=PIPE) + (stdout, stderr) = clickhouse_proc.communicate("SELECT value FROM system.merge_tree_settings WHERE name = 'min_bytes_for_wide_part'") + + if clickhouse_proc.returncode == 0: + if '10485760' in stdout: + result.append(BuildFlags.POLYMORPHIC_PARTS) + else: + raise Exception("Cannot get inforamtion about build from server errorcode {}, stderr {}".format(clickhouse_proc.returncode, stderr)) + + return result + + def main(args): global SERVER_DIED global exit_code @@ -392,6 +463,16 @@ def main(args): if not check_server_started(args.client, args.server_check_retries): raise Exception("clickhouse-server is not responding. Cannot execute 'SELECT 1' query.") + build_flags = collect_build_flags(args.client) + if args.use_skip_list: + tests_to_skip_from_list = collect_tests_to_skip(args.skip_list_path, build_flags) + else: + tests_to_skip_from_list = {} + + if args.skip: + args.skip = set(args.skip) | tests_to_skip_from_list + else: + args.skip = tests_to_skip_from_list base_dir = os.path.abspath(args.queries) tmp_dir = os.path.abspath(args.tmp) @@ -604,6 +685,21 @@ def get_additional_client_options_url(args): return '' +def collect_tests_to_skip(skip_list_path, build_flags): + result = set([]) + if not os.path.exists(skip_list_path): + return result + + with open(skip_list_path, 'r') as skip_list_file: + skip_dict = json.load(skip_list_file) + for build_flag in build_flags: + result |= set(skip_dict[build_flag]) + + if len(result) > 0: + print("Found file with skip-list {}, {} test will be skipped".format(skip_list_path, len(result))) + + return result + if __name__ == '__main__': parser=ArgumentParser(description='ClickHouse functional tests') parser.add_argument('-q', '--queries', help='Path to queries dir') @@ -627,6 +723,8 @@ if __name__ == '__main__': parser.add_argument('-j', '--jobs', default=1, nargs='?', type=int, help='Run all tests in parallel') parser.add_argument('-U', '--unified', default=3, type=int, help='output NUM lines of unified context') parser.add_argument('-r', '--server-check-retries', default=30, type=int, help='Num of tries to execute SELECT 1 before tests started') + parser.add_argument('--skip-list-path', help="Path to skip-list file") + parser.add_argument('--use-skip-list', action='store_true', default=False, help="Use skip list to skip tests if found") parser.add_argument('--no-stateless', action='store_true', help='Disable all stateless tests') parser.add_argument('--no-stateful', action='store_true', help='Disable all stateful tests') @@ -655,6 +753,10 @@ if __name__ == '__main__': if args.queries is None: print("Failed to detect path to the queries directory. Please specify it with '--queries' option.", file=sys.stderr) exit(1) + + if args.skip_list_path is None: + args.skip_list_path = os.path.join(args.queries, 'skip_list.json') + if args.tmp is None: args.tmp = args.queries if args.client is None: diff --git a/tests/integration/README.md b/tests/integration/README.md index c72c009a0d6..a3eb577d609 100644 --- a/tests/integration/README.md +++ b/tests/integration/README.md @@ -25,12 +25,13 @@ To check, that you have access to Docker, run `docker ps`. Run the tests with the `pytest` command. To select which tests to run, use: `pytest -k ` By default tests are run with system-wide client binary, server binary and base configs. To change that, -set the following environment variables: +set the following environment variables:` * `CLICKHOUSE_TESTS_SERVER_BIN_PATH` to choose the server binary. * `CLICKHOUSE_TESTS_CLIENT_BIN_PATH` to choose the client binary. * `CLICKHOUSE_TESTS_BASE_CONFIG_DIR` to choose the directory from which base configs (`config.xml` and `users.xml`) are taken. +For tests that use common docker compose files you may need to set up their path with environment variable: `DOCKER_COMPOSE_DIR=$HOME/ClickHouse/docker/test/integration/runner/compose` ### Running with runner script diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index dec14361a0f..f3e5dc1fab5 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -31,7 +31,7 @@ from .hdfs_api import HDFSApi HELPERS_DIR = p.dirname(__file__) CLICKHOUSE_ROOT_DIR = p.join(p.dirname(__file__), "../../..") -DOCKER_COMPOSE_DIR = p.join(CLICKHOUSE_ROOT_DIR, "docker/test/integration/compose/") +LOCAL_DOCKER_COMPOSE_DIR = p.join(CLICKHOUSE_ROOT_DIR, "docker/test/integration/runner/compose/") DEFAULT_ENV_NAME = 'env_file' SANITIZER_SIGN = "==================" @@ -52,7 +52,7 @@ def subprocess_check_call(args): def subprocess_call(args): - # Uncomment for debugging + # Uncomment for debugging..; # print('run:', ' ' . join(args)) subprocess.call(args) @@ -67,6 +67,17 @@ def get_odbc_bridge_path(): return '/usr/bin/clickhouse-odbc-bridge' return path +def get_docker_compose_path(): + compose_path = os.environ.get('DOCKER_COMPOSE_DIR') + if compose_path is not None: + return os.path.dirname(compose_path) + else: + if os.path.exists(os.path.dirname('/compose/')): + return os.path.dirname('/compose/') #default in docker runner container + else: + print("Fallback docker_compose_path to LOCAL_DOCKER_COMPOSE_DIR: {}".format(LOCAL_DOCKER_COMPOSE_DIR)) + return LOCAL_DOCKER_COMPOSE_DIR + class ClickHouseCluster: """ClickHouse cluster with several instances and (possibly) ZooKeeper. @@ -109,6 +120,7 @@ class ClickHouseCluster: self.base_zookeeper_cmd = None self.base_mysql_cmd = [] self.base_kafka_cmd = [] + self.base_rabbitmq_cmd = [] self.base_cassandra_cmd = [] self.pre_zookeeper_commands = [] self.instances = {} @@ -116,6 +128,7 @@ class ClickHouseCluster: self.with_mysql = False self.with_postgres = False self.with_kafka = False + self.with_rabbitmq = False self.with_odbc_drivers = False self.with_hdfs = False self.with_mongo = False @@ -148,7 +161,7 @@ class ClickHouseCluster: return cmd def add_instance(self, name, config_dir=None, main_configs=None, user_configs=None, macros=None, - with_zookeeper=False, with_mysql=False, with_kafka=False, clickhouse_path_dir=None, + with_zookeeper=False, with_mysql=False, with_kafka=False, with_rabbitmq=False, clickhouse_path_dir=None, with_odbc_drivers=False, with_postgres=False, with_hdfs=False, with_mongo=False, with_redis=False, with_minio=False, with_cassandra=False, hostname=None, env_variables=None, image="yandex/clickhouse-integration-test", @@ -172,24 +185,26 @@ class ClickHouseCluster: instance = ClickHouseInstance( self, self.base_dir, name, config_dir, main_configs or [], user_configs or [], macros or {}, with_zookeeper, - self.zookeeper_config_path, with_mysql, with_kafka, with_mongo, with_redis, with_minio, with_cassandra, + self.zookeeper_config_path, with_mysql, with_kafka, with_rabbitmq, with_mongo, with_redis, with_minio, with_cassandra, self.base_configs_dir, self.server_bin_path, self.odbc_bridge_bin_path, clickhouse_path_dir, with_odbc_drivers, hostname=hostname, env_variables=env_variables or {}, image=image, stay_alive=stay_alive, ipv4_address=ipv4_address, ipv6_address=ipv6_address, with_installed_binary=with_installed_binary, tmpfs=tmpfs or []) + docker_compose_yml_dir = get_docker_compose_path() + self.instances[name] = instance if ipv4_address is not None or ipv6_address is not None: self.with_net_trics = True - self.base_cmd.extend(['--file', p.join(DOCKER_COMPOSE_DIR, 'docker_compose_net.yml')]) + self.base_cmd.extend(['--file', p.join(docker_compose_yml_dir, 'docker_compose_net.yml')]) self.base_cmd.extend(['--file', instance.docker_compose_path]) cmds = [] if with_zookeeper and not self.with_zookeeper: if not zookeeper_docker_compose_path: - zookeeper_docker_compose_path = p.join(DOCKER_COMPOSE_DIR, 'docker_compose_zookeeper.yml') + zookeeper_docker_compose_path = p.join(docker_compose_yml_dir, 'docker_compose_zookeeper.yml') self.with_zookeeper = True self.zookeeper_use_tmpfs = zookeeper_use_tmpfs @@ -200,79 +215,86 @@ class ClickHouseCluster: if with_mysql and not self.with_mysql: self.with_mysql = True - self.base_cmd.extend(['--file', p.join(DOCKER_COMPOSE_DIR, 'docker_compose_mysql.yml')]) + self.base_cmd.extend(['--file', p.join(docker_compose_yml_dir, 'docker_compose_mysql.yml')]) self.base_mysql_cmd = ['docker-compose', '--project-directory', self.base_dir, '--project-name', - self.project_name, '--file', p.join(DOCKER_COMPOSE_DIR, 'docker_compose_mysql.yml')] + self.project_name, '--file', p.join(docker_compose_yml_dir, 'docker_compose_mysql.yml')] cmds.append(self.base_mysql_cmd) if with_postgres and not self.with_postgres: self.with_postgres = True - self.base_cmd.extend(['--file', p.join(DOCKER_COMPOSE_DIR, 'docker_compose_postgres.yml')]) + self.base_cmd.extend(['--file', p.join(docker_compose_yml_dir, 'docker_compose_postgres.yml')]) self.base_postgres_cmd = ['docker-compose', '--project-directory', self.base_dir, '--project-name', - self.project_name, '--file', p.join(DOCKER_COMPOSE_DIR, 'docker_compose_postgres.yml')] + self.project_name, '--file', p.join(docker_compose_yml_dir, 'docker_compose_postgres.yml')] cmds.append(self.base_postgres_cmd) if with_odbc_drivers and not self.with_odbc_drivers: self.with_odbc_drivers = True if not self.with_mysql: self.with_mysql = True - self.base_cmd.extend(['--file', p.join(DOCKER_COMPOSE_DIR, 'docker_compose_mysql.yml')]) + self.base_cmd.extend(['--file', p.join(docker_compose_yml_dir, 'docker_compose_mysql.yml')]) self.base_mysql_cmd = ['docker-compose', '--project-directory', self.base_dir, '--project-name', - self.project_name, '--file', p.join(DOCKER_COMPOSE_DIR, 'docker_compose_mysql.yml')] + self.project_name, '--file', p.join(docker_compose_yml_dir, 'docker_compose_mysql.yml')] cmds.append(self.base_mysql_cmd) if not self.with_postgres: self.with_postgres = True - self.base_cmd.extend(['--file', p.join(DOCKER_COMPOSE_DIR, 'docker_compose_postgres.yml')]) + self.base_cmd.extend(['--file', p.join(docker_compose_yml_dir, 'docker_compose_postgres.yml')]) self.base_postgres_cmd = ['docker-compose', '--project-directory', self.base_dir, '--project-name', self.project_name, '--file', - p.join(DOCKER_COMPOSE_DIR, 'docker_compose_postgres.yml')] + p.join(docker_compose_yml_dir, 'docker_compose_postgres.yml')] cmds.append(self.base_postgres_cmd) if with_kafka and not self.with_kafka: self.with_kafka = True - self.base_cmd.extend(['--file', p.join(DOCKER_COMPOSE_DIR, 'docker_compose_kafka.yml')]) + self.base_cmd.extend(['--file', p.join(docker_compose_yml_dir, 'docker_compose_kafka.yml')]) self.base_kafka_cmd = ['docker-compose', '--project-directory', self.base_dir, '--project-name', - self.project_name, '--file', p.join(DOCKER_COMPOSE_DIR, 'docker_compose_kafka.yml')] + self.project_name, '--file', p.join(docker_compose_yml_dir, 'docker_compose_kafka.yml')] cmds.append(self.base_kafka_cmd) + if with_rabbitmq and not self.with_rabbitmq: + self.with_rabbitmq = True + self.base_cmd.extend(['--file', p.join(docker_compose_yml_dir, 'docker_compose_rabbitmq.yml')]) + self.base_rabbitmq_cmd = ['docker-compose', '--project-directory', self.base_dir, '--project-name', + self.project_name, '--file', p.join(docker_compose_yml_dir, 'docker_compose_rabbitmq.yml')] + cmds.append(self.base_rabbitmq_cmd) + if with_hdfs and not self.with_hdfs: self.with_hdfs = True - self.base_cmd.extend(['--file', p.join(DOCKER_COMPOSE_DIR, 'docker_compose_hdfs.yml')]) + self.base_cmd.extend(['--file', p.join(docker_compose_yml_dir, 'docker_compose_hdfs.yml')]) self.base_hdfs_cmd = ['docker-compose', '--project-directory', self.base_dir, '--project-name', - self.project_name, '--file', p.join(DOCKER_COMPOSE_DIR, 'docker_compose_hdfs.yml')] + self.project_name, '--file', p.join(docker_compose_yml_dir, 'docker_compose_hdfs.yml')] cmds.append(self.base_hdfs_cmd) if with_mongo and not self.with_mongo: self.with_mongo = True - self.base_cmd.extend(['--file', p.join(DOCKER_COMPOSE_DIR, 'docker_compose_mongo.yml')]) + self.base_cmd.extend(['--file', p.join(docker_compose_yml_dir, 'docker_compose_mongo.yml')]) self.base_mongo_cmd = ['docker-compose', '--project-directory', self.base_dir, '--project-name', - self.project_name, '--file', p.join(DOCKER_COMPOSE_DIR, 'docker_compose_mongo.yml')] + self.project_name, '--file', p.join(docker_compose_yml_dir, 'docker_compose_mongo.yml')] cmds.append(self.base_mongo_cmd) if self.with_net_trics: for cmd in cmds: - cmd.extend(['--file', p.join(DOCKER_COMPOSE_DIR, 'docker_compose_net.yml')]) + cmd.extend(['--file', p.join(docker_compose_yml_dir, 'docker_compose_net.yml')]) if with_redis and not self.with_redis: self.with_redis = True - self.base_cmd.extend(['--file', p.join(DOCKER_COMPOSE_DIR, 'docker_compose_redis.yml')]) + self.base_cmd.extend(['--file', p.join(docker_compose_yml_dir, 'docker_compose_redis.yml')]) self.base_redis_cmd = ['docker-compose', '--project-directory', self.base_dir, '--project-name', - self.project_name, '--file', p.join(DOCKER_COMPOSE_DIR, 'docker_compose_redis.yml')] + self.project_name, '--file', p.join(docker_compose_yml_dir, 'docker_compose_redis.yml')] if with_minio and not self.with_minio: self.with_minio = True - self.base_cmd.extend(['--file', p.join(DOCKER_COMPOSE_DIR, 'docker_compose_minio.yml')]) + self.base_cmd.extend(['--file', p.join(docker_compose_yml_dir, 'docker_compose_minio.yml')]) self.base_minio_cmd = ['docker-compose', '--project-directory', self.base_dir, '--project-name', - self.project_name, '--file', p.join(DOCKER_COMPOSE_DIR, 'docker_compose_minio.yml')] + self.project_name, '--file', p.join(docker_compose_yml_dir, 'docker_compose_minio.yml')] cmds.append(self.base_minio_cmd) if with_cassandra and not self.with_cassandra: self.with_cassandra = True - self.base_cmd.extend(['--file', p.join(DOCKER_COMPOSE_DIR, 'docker_compose_cassandra.yml')]) + self.base_cmd.extend(['--file', p.join(docker_compose_yml_dir, 'docker_compose_cassandra.yml')]) self.base_cassandra_cmd = ['docker-compose', '--project-directory', self.base_dir, '--project-name', - self.project_name, '--file', p.join(DOCKER_COMPOSE_DIR, 'docker_compose_cassandra.yml')] + self.project_name, '--file', p.join(docker_compose_yml_dir, 'docker_compose_cassandra.yml')] return instance @@ -529,6 +551,10 @@ class ClickHouseCluster: self.kafka_docker_id = self.get_instance_docker_id('kafka1') self.wait_schema_registry_to_start(120) + if self.with_rabbitmq and self.base_rabbitmq_cmd: + subprocess_check_call(self.base_rabbitmq_cmd + common_opts + ['--renew-anon-volumes']) + self.rabbitmq_docker_id = self.get_instance_docker_id('rabbitmq1') + if self.with_hdfs and self.base_hdfs_cmd: subprocess_check_call(self.base_hdfs_cmd + common_opts) self.wait_hdfs_to_start(120) @@ -681,7 +707,7 @@ class ClickHouseInstance: def __init__( self, cluster, base_path, name, custom_config_dir, custom_main_configs, custom_user_configs, macros, - with_zookeeper, zookeeper_config_path, with_mysql, with_kafka, with_mongo, with_redis, with_minio, with_cassandra, + with_zookeeper, zookeeper_config_path, with_mysql, with_kafka, with_rabbitmq, with_mongo, with_redis, with_minio, with_cassandra, base_configs_dir, server_bin_path, odbc_bridge_bin_path, clickhouse_path_dir, with_odbc_drivers, hostname=None, env_variables=None, image="yandex/clickhouse-integration-test", @@ -708,6 +734,7 @@ class ClickHouseInstance: self.with_mysql = with_mysql self.with_kafka = with_kafka + self.with_rabbitmq = with_rabbitmq self.with_mongo = with_mongo self.with_redis = with_redis self.with_minio = with_minio @@ -1058,6 +1085,9 @@ class ClickHouseInstance: depends_on.append("kafka1") depends_on.append("schema-registry") + if self.with_rabbitmq: + depends_on.append("rabbitmq1") + if self.with_zookeeper: depends_on.append("zoo1") depends_on.append("zoo2") @@ -1137,3 +1167,4 @@ class ClickHouseKiller(object): def __exit__(self, exc_type, exc_val, exc_tb): self.clickhouse_node.restore_clickhouse() + diff --git a/tests/integration/runner b/tests/integration/runner index 399c87dcf06..a009a1ce647 100755 --- a/tests/integration/runner +++ b/tests/integration/runner @@ -76,6 +76,12 @@ if __name__ == "__main__": default=False, help="Don't use net host in parent docker container") + parser.add_argument( + "--docker-image-version", + default="latest", + help="Version of docker image which runner will use to run tests") + + parser.add_argument('pytest_args', nargs='*', help="args for pytest command") args = parser.parse_args() @@ -106,7 +112,7 @@ if __name__ == "__main__": cfg=args.configs_dir, pth=args.clickhouse_root, opts=' '.join(args.pytest_args), - img=DIND_INTEGRATION_TESTS_IMAGE_NAME, + img=DIND_INTEGRATION_TESTS_IMAGE_NAME + ":" + args.docker_image_version, name=CONTAINER_NAME, command=args.command ) diff --git a/tests/integration/test_create_user_and_login/test.py b/tests/integration/test_create_user_and_login/test.py index ae75f69d28a..392a4ef98ee 100644 --- a/tests/integration/test_create_user_and_login/test.py +++ b/tests/integration/test_create_user_and_login/test.py @@ -64,6 +64,56 @@ def test_grant_option(): instance.query('REVOKE SELECT ON test.table FROM A, B') +def test_revoke_requires_grant_option(): + instance.query("CREATE USER A") + instance.query("CREATE USER B") + + instance.query("GRANT SELECT ON test.table TO B") + assert instance.query("SHOW GRANTS FOR B") == "GRANT SELECT ON test.table TO B\n" + + expected_error = "Not enough privileges" + assert expected_error in instance.query_and_get_error("REVOKE SELECT ON test.table FROM B", user='A') + assert instance.query("SHOW GRANTS FOR B") == "GRANT SELECT ON test.table TO B\n" + + instance.query("GRANT SELECT ON test.table TO A") + expected_error = "privileges have been granted, but without grant option" + assert expected_error in instance.query_and_get_error("REVOKE SELECT ON test.table FROM B", user='A') + assert instance.query("SHOW GRANTS FOR B") == "GRANT SELECT ON test.table TO B\n" + + instance.query("GRANT SELECT ON test.table TO A WITH GRANT OPTION") + assert instance.query("SHOW GRANTS FOR B") == "GRANT SELECT ON test.table TO B\n" + instance.query("REVOKE SELECT ON test.table FROM B", user='A') + assert instance.query("SHOW GRANTS FOR B") == "" + + instance.query("GRANT SELECT ON test.table TO B") + assert instance.query("SHOW GRANTS FOR B") == "GRANT SELECT ON test.table TO B\n" + instance.query("REVOKE SELECT ON test.* FROM B", user='A') + assert instance.query("SHOW GRANTS FOR B") == "" + + instance.query("GRANT SELECT ON test.table TO B") + assert instance.query("SHOW GRANTS FOR B") == "GRANT SELECT ON test.table TO B\n" + instance.query("REVOKE ALL ON test.* FROM B", user='A') + assert instance.query("SHOW GRANTS FOR B") == "" + + instance.query("GRANT SELECT ON test.table TO B") + assert instance.query("SHOW GRANTS FOR B") == "GRANT SELECT ON test.table TO B\n" + instance.query("REVOKE ALL ON *.* FROM B", user='A') + assert instance.query("SHOW GRANTS FOR B") == "" + + instance.query("REVOKE GRANT OPTION FOR ALL ON *.* FROM A") + instance.query("GRANT SELECT ON test.table TO B") + assert instance.query("SHOW GRANTS FOR B") == "GRANT SELECT ON test.table TO B\n" + expected_error = "privileges have been granted, but without grant option" + assert expected_error in instance.query_and_get_error("REVOKE SELECT ON test.table FROM B", user='A') + assert instance.query("SHOW GRANTS FOR B") == "GRANT SELECT ON test.table TO B\n" + + instance.query("GRANT SELECT ON test.* TO A WITH GRANT OPTION") + instance.query("GRANT SELECT ON test.table TO B") + assert instance.query("SHOW GRANTS FOR B") == "GRANT SELECT ON test.table TO B\n" + instance.query("REVOKE SELECT ON test.table FROM B", user='A') + assert instance.query("SHOW GRANTS FOR B") == "" + + def test_introspection(): instance.query("CREATE USER A") instance.query("CREATE USER B") @@ -100,7 +150,6 @@ def test_introspection(): assert instance.query("SELECT * from system.grants WHERE user_name IN ('A', 'B') ORDER BY user_name, access_type, grant_option") ==\ TSV([[ "A", "\N", "SELECT", "test", "table", "\N", 0, 0 ], - [ "B", "\N", "CREATE", "\N", "\N", "\N", 0, 0 ], [ "B", "\N", "CREATE", "\N", "\N", "\N", 0, 1 ]]) diff --git a/tests/integration/test_dictionaries_depend_on_dictionaries/__init__.py b/tests/integration/test_dictionaries_dependency/__init__.py similarity index 100% rename from tests/integration/test_dictionaries_depend_on_dictionaries/__init__.py rename to tests/integration/test_dictionaries_dependency/__init__.py diff --git a/tests/integration/test_dictionaries_dependency/configs/disable_lazy_load.xml b/tests/integration/test_dictionaries_dependency/configs/disable_lazy_load.xml new file mode 100644 index 00000000000..d01f7a0155b --- /dev/null +++ b/tests/integration/test_dictionaries_dependency/configs/disable_lazy_load.xml @@ -0,0 +1,4 @@ + + false + + diff --git a/tests/integration/test_dictionaries_dependency/test.py b/tests/integration/test_dictionaries_dependency/test.py new file mode 100644 index 00000000000..4505bf73a7c --- /dev/null +++ b/tests/integration/test_dictionaries_dependency/test.py @@ -0,0 +1,108 @@ +import pytest +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) +node1 = cluster.add_instance('node1', stay_alive=True) +node2 = cluster.add_instance('node2', stay_alive=True, main_configs=['configs/disable_lazy_load.xml']) +nodes = [node1, node2] + + +@pytest.fixture(scope="module", autouse=True) +def start_cluster(): + try: + cluster.start() + for node in nodes: + node.query("CREATE DATABASE IF NOT EXISTS test") + node.query("CREATE DATABASE IF NOT EXISTS atest") + node.query("CREATE DATABASE IF NOT EXISTS ztest") + node.query("CREATE TABLE test.source(x UInt64, y UInt64) ENGINE=Log") + node.query("INSERT INTO test.source VALUES (5,6)") + + node.query("CREATE DICTIONARY test.dict(x UInt64, y UInt64) PRIMARY KEY x "\ + "SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'source' DB 'test')) "\ + "LAYOUT(FLAT()) LIFETIME(0)") + yield cluster + + finally: + cluster.shutdown() + + +@pytest.fixture(autouse=True) +def cleanup_after_test(): + try: + yield + finally: + for node in nodes: + node.query("DROP DICTIONARY IF EXISTS test.adict") + node.query("DROP DICTIONARY IF EXISTS test.zdict") + node.query("DROP DICTIONARY IF EXISTS atest.dict") + node.query("DROP DICTIONARY IF EXISTS ztest.dict") + node.query("DROP TABLE IF EXISTS test.atbl") + node.query("DROP TABLE IF EXISTS test.ztbl") + node.query("DROP TABLE IF EXISTS atest.tbl") + node.query("DROP TABLE IF EXISTS ztest.tbl") + node.query("DROP DATABASE IF EXISTS dict_db") + + +@pytest.mark.parametrize("node", nodes) +def test_dependency_via_implicit_table(node): + d_names = ["test.adict", "test.zdict", "atest.dict", "ztest.dict"] + for d_name in d_names: + node.query("CREATE DICTIONARY {}(x UInt64, y UInt64) PRIMARY KEY x "\ + "SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'dict' DB 'test')) "\ + "LAYOUT(FLAT()) LIFETIME(0)".format(d_name)) + + def check(): + for d_name in d_names: + assert node.query("SELECT dictGet({}, 'y', toUInt64(5))".format(d_name)) == "6\n" + + check() + + # Restart must not break anything. + node.restart_clickhouse() + check() + + +@pytest.mark.parametrize("node", nodes) +def test_dependency_via_explicit_table(node): + tbl_names = ["test.atbl", "test.ztbl", "atest.tbl", "ztest.tbl"] + d_names = ["test.other_{}".format(i) for i in range(0, len(tbl_names))] + for i in range(0, len(tbl_names)): + tbl_name = tbl_names[i] + tbl_database, tbl_shortname = tbl_name.split('.') + d_name = d_names[i] + node.query("CREATE TABLE {}(x UInt64, y UInt64) ENGINE=Dictionary('test.dict')".format(tbl_name)) + node.query("CREATE DICTIONARY {}(x UInt64, y UInt64) PRIMARY KEY x "\ + "SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE '{}' DB '{}')) "\ + "LAYOUT(FLAT()) LIFETIME(0)".format(d_name, tbl_shortname, tbl_database)) + + def check(): + for d_name in d_names: + assert node.query("SELECT dictGet({}, 'y', toUInt64(5))".format(d_name)) == "6\n" + + check() + + # Restart must not break anything. + node.restart_clickhouse() + check() + + +@pytest.mark.parametrize("node", nodes) +def test_dependency_via_dictionary_database(node): + node.query("CREATE DATABASE dict_db ENGINE=Dictionary") + + d_names = ["test.adict", "test.zdict", "atest.dict", "ztest.dict"] + for d_name in d_names: + node.query("CREATE DICTIONARY {}(x UInt64, y UInt64) PRIMARY KEY x "\ + "SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'test.dict' DB 'dict_db')) "\ + "LAYOUT(FLAT()) LIFETIME(0)".format(d_name)) + + def check(): + for d_name in d_names: + assert node.query("SELECT dictGet({}, 'y', toUInt64(5))".format(d_name)) == "6\n" + + check() + + # Restart must not break anything. + node.restart_clickhouse() + check() diff --git a/tests/integration/test_dictionaries_dependency_xml/__init__.py b/tests/integration/test_dictionaries_dependency_xml/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_dictionaries_depend_on_dictionaries/configs/config.xml b/tests/integration/test_dictionaries_dependency_xml/configs/config.xml similarity index 100% rename from tests/integration/test_dictionaries_depend_on_dictionaries/configs/config.xml rename to tests/integration/test_dictionaries_dependency_xml/configs/config.xml diff --git a/tests/integration/test_dictionaries_depend_on_dictionaries/configs/dictionaries/dep_x.xml b/tests/integration/test_dictionaries_dependency_xml/configs/dictionaries/dep_x.xml similarity index 100% rename from tests/integration/test_dictionaries_depend_on_dictionaries/configs/dictionaries/dep_x.xml rename to tests/integration/test_dictionaries_dependency_xml/configs/dictionaries/dep_x.xml diff --git a/tests/integration/test_dictionaries_depend_on_dictionaries/configs/dictionaries/dep_y.xml b/tests/integration/test_dictionaries_dependency_xml/configs/dictionaries/dep_y.xml similarity index 100% rename from tests/integration/test_dictionaries_depend_on_dictionaries/configs/dictionaries/dep_y.xml rename to tests/integration/test_dictionaries_dependency_xml/configs/dictionaries/dep_y.xml diff --git a/tests/integration/test_dictionaries_depend_on_dictionaries/configs/dictionaries/dep_z.xml b/tests/integration/test_dictionaries_dependency_xml/configs/dictionaries/dep_z.xml similarity index 100% rename from tests/integration/test_dictionaries_depend_on_dictionaries/configs/dictionaries/dep_z.xml rename to tests/integration/test_dictionaries_dependency_xml/configs/dictionaries/dep_z.xml diff --git a/tests/integration/test_dictionaries_depend_on_dictionaries/configs/users.xml b/tests/integration/test_dictionaries_dependency_xml/configs/users.xml similarity index 100% rename from tests/integration/test_dictionaries_depend_on_dictionaries/configs/users.xml rename to tests/integration/test_dictionaries_dependency_xml/configs/users.xml diff --git a/tests/integration/test_dictionaries_depend_on_dictionaries/test.py b/tests/integration/test_dictionaries_dependency_xml/test.py similarity index 100% rename from tests/integration/test_dictionaries_depend_on_dictionaries/test.py rename to tests/integration/test_dictionaries_dependency_xml/test.py diff --git a/tests/integration/test_disk_types/__init__.py b/tests/integration/test_disk_types/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_disk_types/configs/config.xml b/tests/integration/test_disk_types/configs/config.xml new file mode 100644 index 00000000000..a3ec8b3a58a --- /dev/null +++ b/tests/integration/test_disk_types/configs/config.xml @@ -0,0 +1,42 @@ + + + + trace + /var/log/clickhouse-server/clickhouse-server.log + /var/log/clickhouse-server/clickhouse-server.err.log + 1000M + 10 + + + + + + s3 + http://minio1:9001/root/data/ + minio + minio123 + + + memory + + + + + 9000 + 127.0.0.1 + + + + true + none + + AcceptCertificateHandler + + + + + 500 + 5368709120 + ./clickhouse/ + users.xml + diff --git a/tests/integration/test_disk_types/configs/users.xml b/tests/integration/test_disk_types/configs/users.xml new file mode 100644 index 00000000000..6061af8e33d --- /dev/null +++ b/tests/integration/test_disk_types/configs/users.xml @@ -0,0 +1,23 @@ + + + + + + + + + + + + ::/0 + + default + default + + + + + + + + diff --git a/tests/integration/test_disk_types/test.py b/tests/integration/test_disk_types/test.py new file mode 100644 index 00000000000..04346388b47 --- /dev/null +++ b/tests/integration/test_disk_types/test.py @@ -0,0 +1,36 @@ +import pytest +from helpers.cluster import ClickHouseCluster + +disk_types = { + "default" : "local", + "disk_s3" : "s3", + "disk_memory" : "memory", +} + +@pytest.fixture(scope="module") + +def cluster(): + try: + cluster = ClickHouseCluster(__file__) + cluster.add_instance("node", config_dir="configs", with_minio=True) + cluster.start() + yield cluster + finally: + cluster.shutdown() + +def test_different_types(cluster): + node = cluster.instances["node"] + responce = node.query("SELECT * FROM system.disks") + disks = responce.split("\n") + for disk in disks: + if disk == '': # skip empty line (after split at last position) + continue + fields = disk.split("\t") + assert len(fields) >= 6 + assert disk_types.get(fields[0], "UNKNOWN") == fields[5] + +def test_select_by_type(cluster): + node = cluster.instances["node"] + for name, disk_type in disk_types.items(): + assert node.query("SELECT name FROM system.disks WHERE type='" + disk_type + "'") == name + "\n" + diff --git a/tests/integration/test_distributed_load_balancing/configs/remote_servers.xml b/tests/integration/test_distributed_load_balancing/configs/remote_servers.xml index 9efd681e74e..b424e975bfe 100644 --- a/tests/integration/test_distributed_load_balancing/configs/remote_servers.xml +++ b/tests/integration/test_distributed_load_balancing/configs/remote_servers.xml @@ -17,6 +17,49 @@ + + + + n1 + 9000 + 0 + + + n2 + 9000 + + + + + n3 + 9000 + 0 + + + + + + + + n1 + 9000 + -1 + + + n2 + 9000 + + 0 + + + n3 + 9000 + -1 + + + + + n1 diff --git a/tests/integration/test_distributed_load_balancing/test.py b/tests/integration/test_distributed_load_balancing/test.py index 07986de6a85..c538dc7fb3a 100644 --- a/tests/integration/test_distributed_load_balancing/test.py +++ b/tests/integration/test_distributed_load_balancing/test.py @@ -43,6 +43,20 @@ def bootstrap(): currentDatabase(), data) """.format()) + n.query(""" + CREATE TABLE dist_priority AS data + Engine=Distributed( + replicas_priority_cluster, + currentDatabase(), + data) + """.format()) + n.query(""" + CREATE TABLE dist_priority_negative AS data + Engine=Distributed( + replicas_priority_negative_cluster, + currentDatabase(), + data) + """.format()) def make_uuid(): return uuid.uuid4().hex @@ -56,7 +70,7 @@ def start_cluster(): finally: cluster.shutdown() -def get_node(query_node, *args, **kwargs): +def get_node(query_node, table='dist', *args, **kwargs): query_id = make_uuid() settings = { @@ -70,7 +84,7 @@ def get_node(query_node, *args, **kwargs): else: kwargs['settings'].update(settings) - query_node.query('SELECT * FROM dist', *args, **kwargs) + query_node.query('SELECT * FROM ' + table, *args, **kwargs) for n in cluster.instances.values(): n.query('SYSTEM FLUSH LOGS') @@ -120,7 +134,6 @@ def test_load_balancing_first_or_random(): assert len(unique_nodes) == 1, unique_nodes assert unique_nodes == set(['n1']) -# TODO: last_used will be reset on config reload, hence may fail def test_load_balancing_round_robin(): unique_nodes = set() for _ in range(0, nodes): @@ -128,6 +141,18 @@ def test_load_balancing_round_robin(): assert len(unique_nodes) == nodes, unique_nodes assert unique_nodes == set(['n1', 'n2', 'n3']) +@pytest.mark.parametrize('dist_table', [ + ('dist_priority'), + ('dist_priority_negative'), +]) +def test_load_balancing_priority_round_robin(dist_table): + unique_nodes = set() + for _ in range(0, nodes): + unique_nodes.add(get_node(n1, dist_table, settings={'load_balancing': 'round_robin'})) + assert len(unique_nodes) == 2, unique_nodes + # n2 has bigger priority in config + assert unique_nodes == set(['n1', 'n3']) + def test_distributed_replica_max_ignored_errors(): settings = { 'load_balancing': 'in_order', diff --git a/tests/integration/test_mysql_protocol/test.py b/tests/integration/test_mysql_protocol/test.py index 4ab225aee20..507445537b8 100644 --- a/tests/integration/test_mysql_protocol/test.py +++ b/tests/integration/test_mysql_protocol/test.py @@ -138,6 +138,34 @@ def test_mysql_client(mysql_client, server_address): assert stdout == '\n'.join(['column', '0', '0', '1', '1', '5', '5', 'tmp_column', '0', '1', '']) + # Show table status. + code, (stdout, stderr) = mysql_client.exec_run(''' + mysql --protocol tcp -h {host} -P {port} default -u default + --password=123 -e "show table status like 'xx';" + '''.format(host=server_address, port=server_port), demux=True) + assert code == 0 + + # show variables. + code, (stdout, stderr) = mysql_client.exec_run(''' + mysql --protocol tcp -h {host} -P {port} default -u default + --password=123 -e "show variables;" + '''.format(host=server_address, port=server_port), demux=True) + assert code == 0 + + # Kill query. + code, (stdout, stderr) = mysql_client.exec_run(''' + mysql --protocol tcp -h {host} -P {port} default -u default + --password=123 -e "kill query 0;" + '''.format(host=server_address, port=server_port), demux=True) + assert code == 0 + + code, (stdout, stderr) = mysql_client.exec_run(''' + mysql --protocol tcp -h {host} -P {port} default -u default + --password=123 -e "kill query where query_id='mysql:0';" + '''.format(host=server_address, port=server_port), demux=True) + assert code == 0 + + def test_mysql_federated(mysql_server, server_address): # For some reason it occasionally fails without retries. retries = 100 diff --git a/tests/integration/test_role/test.py b/tests/integration/test_role/test.py index e668b461389..ce6e4e53512 100644 --- a/tests/integration/test_role/test.py +++ b/tests/integration/test_role/test.py @@ -97,6 +97,48 @@ def test_admin_option(): assert instance.query("SELECT * FROM test_table", user='B') == "1\t5\n2\t10\n" +def test_revoke_requires_admin_option(): + instance.query("CREATE USER A, B") + instance.query("CREATE ROLE R1, R2") + + instance.query("GRANT R1 TO B") + assert instance.query("SHOW GRANTS FOR B") == "GRANT R1 TO B\n" + + expected_error = "necessary to have the role R1 granted" + assert expected_error in instance.query_and_get_error("REVOKE R1 FROM B", user='A') + assert instance.query("SHOW GRANTS FOR B") == "GRANT R1 TO B\n" + + instance.query("GRANT R1 TO A") + expected_error = "granted, but without ADMIN option" + assert expected_error in instance.query_and_get_error("REVOKE R1 FROM B", user='A') + assert instance.query("SHOW GRANTS FOR B") == "GRANT R1 TO B\n" + + instance.query("GRANT R1 TO A WITH ADMIN OPTION") + instance.query("REVOKE R1 FROM B", user='A') + assert instance.query("SHOW GRANTS FOR B") == "" + + instance.query("GRANT R1 TO B") + assert instance.query("SHOW GRANTS FOR B") == "GRANT R1 TO B\n" + instance.query("REVOKE ALL FROM B", user='A') + assert instance.query("SHOW GRANTS FOR B") == "" + + instance.query("GRANT R1, R2 TO B") + assert instance.query("SHOW GRANTS FOR B") == "GRANT R1, R2 TO B\n" + expected_error = "necessary to have the role R2 granted" + assert expected_error in instance.query_and_get_error("REVOKE ALL FROM B", user='A') + assert instance.query("SHOW GRANTS FOR B") == "GRANT R1, R2 TO B\n" + instance.query("REVOKE ALL EXCEPT R2 FROM B", user='A') + assert instance.query("SHOW GRANTS FOR B") == "GRANT R2 TO B\n" + instance.query("GRANT R2 TO A WITH ADMIN OPTION") + instance.query("REVOKE ALL FROM B", user='A') + assert instance.query("SHOW GRANTS FOR B") == "" + + instance.query("GRANT R1, R2 TO B") + assert instance.query("SHOW GRANTS FOR B") == "GRANT R1, R2 TO B\n" + instance.query("REVOKE ALL FROM B", user='A') + assert instance.query("SHOW GRANTS FOR B") == "" + + def test_introspection(): instance.query("CREATE USER A") instance.query("CREATE USER B") @@ -140,7 +182,6 @@ def test_introspection(): assert instance.query("SELECT * from system.grants WHERE user_name IN ('A', 'B') OR role_name IN ('R1', 'R2') ORDER BY user_name, role_name, access_type, grant_option") ==\ TSV([[ "A", "\N", "SELECT", "test", "table", "\N", 0, 0 ], - [ "B", "\N", "CREATE", "\N", "\N", "\N", 0, 0 ], [ "B", "\N", "CREATE", "\N", "\N", "\N", 0, 1 ], [ "\N", "R2", "SELECT", "test", "table", "\N", 0, 0 ], [ "\N", "R2", "SELECT", "test", "table", "x", 1, 0 ]]) diff --git a/tests/integration/test_settings_constraints_distributed/configs/remote_servers.xml b/tests/integration/test_settings_constraints_distributed/configs/config.d/remote_servers.xml similarity index 100% rename from tests/integration/test_settings_constraints_distributed/configs/remote_servers.xml rename to tests/integration/test_settings_constraints_distributed/configs/config.d/remote_servers.xml diff --git a/tests/integration/test_settings_constraints_distributed/configs/users.d/allow_introspection_functions.xml b/tests/integration/test_settings_constraints_distributed/configs/users.d/allow_introspection_functions.xml new file mode 100644 index 00000000000..ccfdf6a63f6 --- /dev/null +++ b/tests/integration/test_settings_constraints_distributed/configs/users.d/allow_introspection_functions.xml @@ -0,0 +1,7 @@ + + + + 1 + + + diff --git a/tests/integration/test_settings_constraints_distributed/test.py b/tests/integration/test_settings_constraints_distributed/test.py index 86456f8a099..7f0f8868bcf 100644 --- a/tests/integration/test_settings_constraints_distributed/test.py +++ b/tests/integration/test_settings_constraints_distributed/test.py @@ -8,9 +8,9 @@ from helpers.test_tools import assert_eq_with_retry cluster = ClickHouseCluster(__file__) -node1 = cluster.add_instance('node1') -node2 = cluster.add_instance('node2') -distributed = cluster.add_instance('distributed', main_configs=["configs/remote_servers.xml"], stay_alive=True) +node1 = cluster.add_instance('node1', config_dir="configs") +node2 = cluster.add_instance('node2', config_dir="configs") +distributed = cluster.add_instance('distributed', config_dir="configs", stay_alive=True) @pytest.fixture(scope="module", autouse=True) diff --git a/tests/integration/test_settings_profile/test.py b/tests/integration/test_settings_profile/test.py index 752aa2da75d..21fdac9da7a 100644 --- a/tests/integration/test_settings_profile/test.py +++ b/tests/integration/test_settings_profile/test.py @@ -177,11 +177,18 @@ def test_allow_ddl(): def test_allow_introspection(): + assert "Introspection functions are disabled" in instance.query_and_get_error("SELECT demangle('a')") assert "Not enough privileges" in instance.query_and_get_error("SELECT demangle('a')", user="robin") - - instance.query("GRANT ALL ON *.* TO robin") - assert "Introspection functions are disabled" in instance.query_and_get_error("SELECT demangle('a')", user="robin") + assert "Not enough privileges" in instance.query_and_get_error("SELECT demangle('a')", user="robin", settings={"allow_introspection_functions":1}) + assert "Introspection functions are disabled" in instance.query_and_get_error("GRANT demangle ON *.* TO robin") + assert "Not enough privileges" in instance.query_and_get_error("GRANT demangle ON *.* TO robin", user="robin") + assert "Not enough privileges" in instance.query_and_get_error("GRANT demangle ON *.* TO robin", user="robin", settings={"allow_introspection_functions":1}) + + assert instance.query("SELECT demangle('a')", settings={"allow_introspection_functions":1}) == "signed char\n" + instance.query("GRANT demangle ON *.* TO robin", settings={"allow_introspection_functions":1}) + + assert "Introspection functions are disabled" in instance.query_and_get_error("SELECT demangle('a')", user="robin") instance.query("ALTER USER robin SETTINGS allow_introspection_functions=1") assert instance.query("SELECT demangle('a')", user="robin") == "signed char\n" @@ -194,5 +201,5 @@ def test_allow_introspection(): instance.query("DROP SETTINGS PROFILE xyz") assert "Introspection functions are disabled" in instance.query_and_get_error("SELECT demangle('a')", user="robin") - instance.query("REVOKE ALL ON *.* FROM robin") + instance.query("REVOKE demangle ON *.* FROM robin", settings={"allow_introspection_functions":1}) assert "Not enough privileges" in instance.query_and_get_error("SELECT demangle('a')", user="robin") diff --git a/tests/integration/test_storage_rabbitmq/__init__.py b/tests/integration/test_storage_rabbitmq/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_storage_rabbitmq/configs/log_conf.xml b/tests/integration/test_storage_rabbitmq/configs/log_conf.xml new file mode 100644 index 00000000000..f9d15e572aa --- /dev/null +++ b/tests/integration/test_storage_rabbitmq/configs/log_conf.xml @@ -0,0 +1,11 @@ + + + trace + /var/log/clickhouse-server/log.log + /var/log/clickhouse-server/log.err.log + 1000M + 10 + /var/log/clickhouse-server/stderr.log + /var/log/clickhouse-server/stdout.log + + diff --git a/tests/integration/test_storage_rabbitmq/configs/rabbitmq.xml b/tests/integration/test_storage_rabbitmq/configs/rabbitmq.xml new file mode 100644 index 00000000000..3002b6aa415 --- /dev/null +++ b/tests/integration/test_storage_rabbitmq/configs/rabbitmq.xml @@ -0,0 +1,6 @@ + + + root + clickhouse + + diff --git a/tests/integration/test_storage_rabbitmq/test.py b/tests/integration/test_storage_rabbitmq/test.py new file mode 100644 index 00000000000..42b7101f9c6 --- /dev/null +++ b/tests/integration/test_storage_rabbitmq/test.py @@ -0,0 +1,1532 @@ +import os.path as p +import random +import threading +import time +import pytest + +from random import randrange +import pika +from sys import getdefaultencoding + +from helpers.cluster import ClickHouseCluster +from helpers.test_tools import TSV +from helpers.client import QueryRuntimeException +from helpers.network import PartitionManager + +import json +import subprocess + +from google.protobuf.internal.encoder import _VarintBytes + +cluster = ClickHouseCluster(__file__) +instance = cluster.add_instance('instance', + config_dir='configs', + main_configs=['configs/rabbitmq.xml','configs/log_conf.xml'], + with_rabbitmq=True) +rabbitmq_id = '' + + +# Helpers + +def check_rabbitmq_is_available(): + p = subprocess.Popen(('docker', + 'exec', + '-i', + rabbitmq_id, + 'rabbitmqctl', + 'await_startup'), + stdout=subprocess.PIPE) + p.communicate() + return p.returncode == 0 + + +def enable_consistent_hash_plugin(): + p = subprocess.Popen(('docker', + 'exec', + '-i', + rabbitmq_id, + "rabbitmq-plugins", "enable", "rabbitmq_consistent_hash_exchange"), + stdout=subprocess.PIPE) + p.communicate() + return p.returncode == 0 + + +def wait_rabbitmq_is_available(max_retries=50): + retries = 0 + while True: + if check_rabbitmq_is_available(): + break + else: + retries += 1 + if retries > max_retries: + raise "RabbitMQ is not available" + print("Waiting for RabbitMQ to start up") + time.sleep(1) + + +def wait_rabbitmq_plugin_enabled(max_retries=50): + retries = 0 + while True: + if enable_consistent_hash_plugin(): + break + else: + retries += 1 + if retries > max_retries: + raise "RabbitMQ plugin is not available" + print("Waiting for plugin") + time.sleep(1) + + +def rabbitmq_check_result(result, check=False, ref_file='test_rabbitmq_json.reference'): + fpath = p.join(p.dirname(__file__), ref_file) + with open(fpath) as reference: + if check: + assert TSV(result) == TSV(reference) + else: + return TSV(result) == TSV(reference) + + +# Fixtures + +@pytest.fixture(scope="module") +def rabbitmq_cluster(): + try: + global rabbitmq_id + cluster.start() + rabbitmq_id = instance.cluster.rabbitmq_docker_id + print("rabbitmq_id is {}".format(rabbitmq_id)) + instance.query('CREATE DATABASE test') + + yield cluster + + finally: + cluster.shutdown() + + +@pytest.fixture(autouse=True) +def rabbitmq_setup_teardown(): + wait_rabbitmq_is_available() + wait_rabbitmq_plugin_enabled() + print("RabbitMQ is available - running test") + yield # run test + instance.query('DROP TABLE IF EXISTS test.rabbitmq') + + +# Tests + +@pytest.mark.timeout(180) +def test_rabbitmq_select_from_new_syntax_table(rabbitmq_cluster): + instance.query(''' + CREATE TABLE test.rabbitmq (key UInt64, value UInt64) + ENGINE = RabbitMQ + SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', + rabbitmq_routing_key_list = 'new', + rabbitmq_exchange_name = 'clickhouse-exchange', + rabbitmq_format = 'JSONEachRow', + rabbitmq_row_delimiter = '\\n'; + ''') + + credentials = pika.PlainCredentials('root', 'clickhouse') + parameters = pika.ConnectionParameters('localhost', 5672, '/', credentials) + connection = pika.BlockingConnection(parameters) + channel = connection.channel() + channel.exchange_declare(exchange='clickhouse-exchange', exchange_type='fanout') + + messages = [] + for i in range(25): + messages.append(json.dumps({'key': i, 'value': i})) + + for message in messages: + channel.basic_publish(exchange='clickhouse-exchange', routing_key='new', body=message) + + messages = [] + for i in range(25, 50): + messages.append(json.dumps({'key': i, 'value': i})) + for message in messages: + channel.basic_publish(exchange='clickhouse-exchange', routing_key='new', body=message) + + connection.close() + + result = '' + while True: + result += instance.query('SELECT * FROM test.rabbitmq', ignore_error=True) + if rabbitmq_check_result(result): + break + + rabbitmq_check_result(result, True) + + +@pytest.mark.timeout(180) +def test_rabbitmq_select_from_old_syntax_table(rabbitmq_cluster): + instance.query(''' + CREATE TABLE test.rabbitmq (key UInt64, value UInt64) + ENGINE = RabbitMQ('rabbitmq1:5672', 'old', 'clickhouse-exchange', 'JSONEachRow', '\\n'); + ''') + + credentials = pika.PlainCredentials('root', 'clickhouse') + parameters = pika.ConnectionParameters('localhost', 5672, '/', credentials) + connection = pika.BlockingConnection(parameters) + channel = connection.channel() + channel.exchange_declare(exchange='clickhouse-exchange', exchange_type='fanout') + + messages = [] + for i in range(50): + messages.append(json.dumps({'key': i, 'value': i})) + + for message in messages: + channel.basic_publish(exchange='clickhouse-exchange', routing_key='old', body=message) + + connection.close() + + result = '' + while True: + result += instance.query('SELECT * FROM test.rabbitmq', ignore_error=True) + if rabbitmq_check_result(result): + break + + rabbitmq_check_result(result, True) + + +@pytest.mark.timeout(180) +def test_rabbitmq_select_empty(rabbitmq_cluster): + instance.query(''' + CREATE TABLE test.rabbitmq (key UInt64, value UInt64) + ENGINE = RabbitMQ + SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', + rabbitmq_routing_key_list = 'empty', + rabbitmq_format = 'TSV', + rabbitmq_row_delimiter = '\\n'; + ''') + + assert int(instance.query('SELECT count() FROM test.rabbitmq')) == 0 + + +@pytest.mark.timeout(180) +def test_rabbitmq_json_without_delimiter(rabbitmq_cluster): + instance.query(''' + CREATE TABLE test.rabbitmq (key UInt64, value UInt64) + ENGINE = RabbitMQ + SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', + rabbitmq_routing_key_list = 'json', + rabbitmq_exchange_name = 'clickhouse-exchange', + rabbitmq_format = 'JSONEachRow' + ''') + + credentials = pika.PlainCredentials('root', 'clickhouse') + parameters = pika.ConnectionParameters('localhost', 5672, '/', credentials) + connection = pika.BlockingConnection(parameters) + channel = connection.channel() + channel.exchange_declare(exchange='clickhouse-exchange', exchange_type='fanout') + + messages = '' + for i in range(25): + messages += json.dumps({'key': i, 'value': i}) + '\n' + + all_messages = [messages] + for message in all_messages: + channel.basic_publish(exchange='clickhouse-exchange', routing_key='json', body=message) + + messages = '' + for i in range(25, 50): + messages += json.dumps({'key': i, 'value': i}) + '\n' + all_messages = [messages] + for message in all_messages: + channel.basic_publish(exchange='clickhouse-exchange', routing_key='json', body=message) + + result = '' + while True: + result += instance.query('SELECT * FROM test.rabbitmq', ignore_error=True) + if rabbitmq_check_result(result): + break + + connection.close() + rabbitmq_check_result(result, True) + + +@pytest.mark.timeout(180) +def test_rabbitmq_csv_with_delimiter(rabbitmq_cluster): + instance.query(''' + CREATE TABLE test.rabbitmq (key UInt64, value UInt64) + ENGINE = RabbitMQ + SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', + rabbitmq_routing_key_list = 'csv', + rabbitmq_exchange_name = 'clickhouse-exchange', + rabbitmq_format = 'CSV', + rabbitmq_row_delimiter = '\\n'; + ''') + + credentials = pika.PlainCredentials('root', 'clickhouse') + parameters = pika.ConnectionParameters('localhost', 5672, '/', credentials) + connection = pika.BlockingConnection(parameters) + channel = connection.channel() + channel.exchange_declare(exchange='clickhouse-exchange', exchange_type='fanout') + + messages = [] + for i in range(50): + messages.append('{i}, {i}'.format(i=i)) + + for message in messages: + channel.basic_publish(exchange='clickhouse-exchange', routing_key='csv', body=message) + + result = '' + while True: + result += instance.query('SELECT * FROM test.rabbitmq', ignore_error=True) + if rabbitmq_check_result(result): + break + + + connection.close() + rabbitmq_check_result(result, True) + + +@pytest.mark.timeout(180) +def test_rabbitmq_tsv_with_delimiter(rabbitmq_cluster): + instance.query(''' + CREATE TABLE test.rabbitmq (key UInt64, value UInt64) + ENGINE = RabbitMQ + SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', + rabbitmq_routing_key_list = 'tsv', + rabbitmq_exchange_name = 'clickhouse-exchange', + rabbitmq_format = 'TSV', + rabbitmq_row_delimiter = '\\n'; + ''') + + credentials = pika.PlainCredentials('root', 'clickhouse') + parameters = pika.ConnectionParameters('localhost', 5672, '/', credentials) + connection = pika.BlockingConnection(parameters) + channel = connection.channel() + channel.exchange_declare(exchange='clickhouse-exchange', exchange_type='fanout') + + messages = [] + for i in range(50): + messages.append('{i}\t{i}'.format(i=i)) + + for message in messages: + channel.basic_publish(exchange='clickhouse-exchange', routing_key='tsv', body=message) + + result = '' + while True: + result += instance.query('SELECT * FROM test.rabbitmq', ignore_error=True) + if rabbitmq_check_result(result): + break + + connection.close() + rabbitmq_check_result(result, True) + + +@pytest.mark.timeout(180) +def test_rabbitmq_materialized_view(rabbitmq_cluster): + instance.query(''' + DROP TABLE IF EXISTS test.view; + DROP TABLE IF EXISTS test.consumer; + CREATE TABLE test.rabbitmq (key UInt64, value UInt64) + ENGINE = RabbitMQ + SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', + rabbitmq_routing_key_list = 'mv', + rabbitmq_format = 'JSONEachRow', + rabbitmq_row_delimiter = '\\n'; + CREATE TABLE test.view (key UInt64, value UInt64) + ENGINE = MergeTree() + ORDER BY key; + CREATE MATERIALIZED VIEW test.consumer TO test.view AS + SELECT * FROM test.rabbitmq; + ''') + + credentials = pika.PlainCredentials('root', 'clickhouse') + parameters = pika.ConnectionParameters('localhost', 5672, '/', credentials) + connection = pika.BlockingConnection(parameters) + channel = connection.channel() + + messages = [] + for i in range(50): + messages.append(json.dumps({'key': i, 'value': i})) + for message in messages: + channel.basic_publish(exchange='clickhouse-exchange', routing_key='mv', body=message) + + while True: + result = instance.query('SELECT * FROM test.view') + if (rabbitmq_check_result(result)): + break; + + instance.query(''' + DROP TABLE test.consumer; + DROP TABLE test.view; + ''') + + connection.close() + rabbitmq_check_result(result, True) + + +@pytest.mark.timeout(180) +def test_rabbitmq_materialized_view_with_subquery(rabbitmq_cluster): + instance.query(''' + DROP TABLE IF EXISTS test.view; + DROP TABLE IF EXISTS test.consumer; + CREATE TABLE test.rabbitmq (key UInt64, value UInt64) + ENGINE = RabbitMQ + SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', + rabbitmq_routing_key_list = 'mvsq', + rabbitmq_format = 'JSONEachRow', + rabbitmq_row_delimiter = '\\n'; + CREATE TABLE test.view (key UInt64, value UInt64) + ENGINE = MergeTree() + ORDER BY key; + CREATE MATERIALIZED VIEW test.consumer TO test.view AS + SELECT * FROM (SELECT * FROM test.rabbitmq); + ''') + + credentials = pika.PlainCredentials('root', 'clickhouse') + parameters = pika.ConnectionParameters('localhost', 5672, '/', credentials) + connection = pika.BlockingConnection(parameters) + channel = connection.channel() + + messages = [] + for i in range(50): + messages.append(json.dumps({'key': i, 'value': i})) + for message in messages: + channel.basic_publish(exchange='clickhouse-exchange', routing_key='mvsq', body=message) + + while True: + result = instance.query('SELECT * FROM test.view') + if rabbitmq_check_result(result): + break + + instance.query(''' + DROP TABLE test.consumer; + DROP TABLE test.view; + ''') + + connection.close(); + rabbitmq_check_result(result, True) + + +@pytest.mark.timeout(180) +def test_rabbitmq_many_materialized_views(rabbitmq_cluster): + instance.query(''' + DROP TABLE IF EXISTS test.view1; + DROP TABLE IF EXISTS test.view2; + DROP TABLE IF EXISTS test.consumer1; + DROP TABLE IF EXISTS test.consumer2; + CREATE TABLE test.rabbitmq (key UInt64, value UInt64) + ENGINE = RabbitMQ + SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', + rabbitmq_routing_key_list = 'mmv', + rabbitmq_format = 'JSONEachRow', + rabbitmq_row_delimiter = '\\n'; + CREATE TABLE test.view1 (key UInt64, value UInt64) + ENGINE = MergeTree() + ORDER BY key; + CREATE TABLE test.view2 (key UInt64, value UInt64) + ENGINE = MergeTree() + ORDER BY key; + CREATE MATERIALIZED VIEW test.consumer1 TO test.view1 AS + SELECT * FROM test.rabbitmq; + CREATE MATERIALIZED VIEW test.consumer2 TO test.view2 AS + SELECT * FROM test.rabbitmq; + ''') + + credentials = pika.PlainCredentials('root', 'clickhouse') + parameters = pika.ConnectionParameters('localhost', 5672, '/', credentials) + connection = pika.BlockingConnection(parameters) + channel = connection.channel() + + messages = [] + for i in range(50): + messages.append(json.dumps({'key': i, 'value': i})) + for message in messages: + channel.basic_publish(exchange='clickhouse-exchange', routing_key='mmv', body=message) + + while True: + result1 = instance.query('SELECT * FROM test.view1') + result2 = instance.query('SELECT * FROM test.view2') + if rabbitmq_check_result(result1) and rabbitmq_check_result(result2): + break + + instance.query(''' + DROP TABLE test.consumer1; + DROP TABLE test.consumer2; + DROP TABLE test.view1; + DROP TABLE test.view2; + ''') + + rabbitmq_check_result(result1, True) + rabbitmq_check_result(result2, True) + + +@pytest.mark.timeout(240) +def test_rabbitmq_big_message(rabbitmq_cluster): + # Create batchs of messages of size ~100Kb + rabbitmq_messages = 1000 + batch_messages = 1000 + messages = [json.dumps({'key': i, 'value': 'x' * 100}) * batch_messages for i in range(rabbitmq_messages)] + + credentials = pika.PlainCredentials('root', 'clickhouse') + parameters = pika.ConnectionParameters('localhost', 5672, '/', credentials) + connection = pika.BlockingConnection(parameters) + channel = connection.channel() + + instance.query(''' + DROP TABLE IF EXISTS test.view; + DROP TABLE IF EXISTS test.consumer; + CREATE TABLE test.rabbitmq (key UInt64, value String) + ENGINE = RabbitMQ + SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', + rabbitmq_routing_key_list = 'big', + rabbitmq_format = 'JSONEachRow'; + CREATE TABLE test.view (key UInt64, value String) + ENGINE = MergeTree + ORDER BY key; + CREATE MATERIALIZED VIEW test.consumer TO test.view AS + SELECT * FROM test.rabbitmq; + ''') + + for message in messages: + channel.basic_publish(exchange='clickhouse-exchange', routing_key='big', body=message) + + while True: + result = instance.query('SELECT count() FROM test.view') + print("Result", result, "Expected", batch_messages * rabbitmq_messages) + if int(result) == batch_messages * rabbitmq_messages: + break + + connection.close() + instance.query(''' + DROP TABLE test.consumer; + DROP TABLE test.view; + ''') + + assert int(result) == rabbitmq_messages*batch_messages, 'ClickHouse lost some messages: {}'.format(result) + + +@pytest.mark.timeout(420) +def test_rabbitmq_sharding_between_channels_publish(rabbitmq_cluster): + + NUM_CHANNELS = 5 + + instance.query(''' + CREATE TABLE test.rabbitmq (key UInt64, value UInt64) + ENGINE = RabbitMQ + SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', + rabbitmq_num_consumers = 5, + rabbitmq_format = 'JSONEachRow', + rabbitmq_row_delimiter = '\\n'; + DROP TABLE IF EXISTS test.view; + DROP TABLE IF EXISTS test.consumer; + CREATE TABLE test.view (key UInt64, value UInt64) + ENGINE = MergeTree + ORDER BY key; + CREATE MATERIALIZED VIEW test.consumer TO test.view AS + SELECT * FROM test.rabbitmq; + ''') + + time.sleep(1) + + i = [0] + messages_num = 10000 + + credentials = pika.PlainCredentials('root', 'clickhouse') + parameters = pika.ConnectionParameters('localhost', 5672, '/', credentials) + def produce(): + connection = pika.BlockingConnection(parameters) + channel = connection.channel() + channel.exchange_declare(exchange='clickhouse-exchange', exchange_type='fanout') + + messages = [] + for _ in range(messages_num): + messages.append(json.dumps({'key': i[0], 'value': i[0]})) + i[0] += 1 + key = str(randrange(1, NUM_CHANNELS)) + for message in messages: + channel.basic_publish(exchange='clickhouse-exchange', routing_key=key, body=message) + connection.close() + + threads = [] + threads_num = 20 + + for _ in range(threads_num): + threads.append(threading.Thread(target=produce)) + for thread in threads: + time.sleep(random.uniform(0, 1)) + thread.start() + + while True: + result = instance.query('SELECT count() FROM test.view') + time.sleep(1) + print("Result", result, "Expected", messages_num * threads_num) + if int(result) == messages_num * threads_num: + break + + for thread in threads: + thread.join() + + assert int(result) == messages_num * threads_num, 'ClickHouse lost some messages: {}'.format(result) + + +@pytest.mark.timeout(420) +def test_rabbitmq_sharding_between_queues_publish(rabbitmq_cluster): + + NUM_QUEUES = 4 + + instance.query(''' + CREATE TABLE test.rabbitmq (key UInt64, value UInt64) + ENGINE = RabbitMQ + SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', + rabbitmq_num_queues = 4, + rabbitmq_format = 'JSONEachRow', + rabbitmq_row_delimiter = '\\n'; + DROP TABLE IF EXISTS test.view; + DROP TABLE IF EXISTS test.consumer; + CREATE TABLE test.view (key UInt64, value UInt64) + ENGINE = MergeTree + ORDER BY key; + CREATE MATERIALIZED VIEW test.consumer TO test.view AS + SELECT * FROM test.rabbitmq; + ''') + + time.sleep(1) + + i = [0] + messages_num = 10000 + + credentials = pika.PlainCredentials('root', 'clickhouse') + parameters = pika.ConnectionParameters('localhost', 5672, '/', credentials) + def produce(): + connection = pika.BlockingConnection(parameters) + channel = connection.channel() + channel.exchange_declare(exchange='clickhouse-exchange', exchange_type='fanout') + + messages = [] + for _ in range(messages_num): + messages.append(json.dumps({'key': i[0], 'value': i[0]})) + i[0] += 1 + key = str(randrange(1, NUM_QUEUES)) + for message in messages: + channel.basic_publish(exchange='clickhouse-exchange', routing_key=key, body=message) + connection.close() + + threads = [] + threads_num = 20 + + for _ in range(threads_num): + threads.append(threading.Thread(target=produce)) + for thread in threads: + time.sleep(random.uniform(0, 1)) + thread.start() + + while True: + result = instance.query('SELECT count() FROM test.view') + time.sleep(1) + if int(result) == messages_num * threads_num: + break + + for thread in threads: + thread.join() + + assert int(result) == messages_num * threads_num, 'ClickHouse lost some messages: {}'.format(result) + + +@pytest.mark.timeout(420) +def test_rabbitmq_sharding_between_channels_and_queues_publish(rabbitmq_cluster): + + NUM_CONSUMERS = 10 + NUM_QUEUES = 2 + + instance.query(''' + CREATE TABLE test.rabbitmq (key UInt64, value UInt64) + ENGINE = RabbitMQ + SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', + rabbitmq_num_queues = 2, + rabbitmq_num_consumers = 10, + rabbitmq_format = 'JSONEachRow', + rabbitmq_row_delimiter = '\\n'; + DROP TABLE IF EXISTS test.view; + DROP TABLE IF EXISTS test.consumer; + CREATE TABLE test.view (key UInt64, value UInt64) + ENGINE = MergeTree + ORDER BY key + SETTINGS old_parts_lifetime=5, cleanup_delay_period=2, cleanup_delay_period_random_add=3; + CREATE MATERIALIZED VIEW test.consumer TO test.view AS + SELECT * FROM test.rabbitmq; + ''') + + time.sleep(1) + + i = [0] + messages_num = 10000 + + credentials = pika.PlainCredentials('root', 'clickhouse') + parameters = pika.ConnectionParameters('localhost', 5672, '/', credentials) + def produce(): + connection = pika.BlockingConnection(parameters) + channel = connection.channel() + channel.exchange_declare(exchange='clickhouse-exchange', exchange_type='fanout') + + messages = [] + for _ in range(messages_num): + messages.append(json.dumps({'key': i[0], 'value': i[0]})) + i[0] += 1 + key = str(randrange(1, NUM_QUEUES * NUM_CONSUMERS)) + for message in messages: + channel.basic_publish(exchange='clickhouse-exchange', routing_key=key, body=message) + connection.close() + + threads = [] + threads_num = 20 + + for _ in range(threads_num): + threads.append(threading.Thread(target=produce)) + for thread in threads: + time.sleep(random.uniform(0, 1)) + thread.start() + + while True: + result = instance.query('SELECT count() FROM test.view') + time.sleep(1) + if int(result) == messages_num * threads_num: + break + + for thread in threads: + thread.join() + + assert int(result) == messages_num * threads_num, 'ClickHouse lost some messages: {}'.format(result) + + +@pytest.mark.timeout(420) +def test_rabbitmq_read_only_combo(rabbitmq_cluster): + + NUM_MV = 5; + NUM_CONSUMERS = 4 + + instance.query(''' + CREATE TABLE test.rabbitmq (key UInt64, value UInt64) + ENGINE = RabbitMQ + SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', + rabbitmq_num_consumers = 4, + rabbitmq_format = 'JSONEachRow', + rabbitmq_row_delimiter = '\\n'; + ''') + + for mv_id in range(NUM_MV): + table_name = 'view{}'.format(mv_id) + print("Setting up {}".format(table_name)) + + instance.query(''' + DROP TABLE IF EXISTS test.{0}; + DROP TABLE IF EXISTS test.{0}_mv; + CREATE TABLE test.{0} (key UInt64, value UInt64) + ENGINE = MergeTree() + ORDER BY key; + CREATE MATERIALIZED VIEW test.{0}_mv TO test.{0} AS + SELECT * FROM test.rabbitmq; + '''.format(table_name)) + + time.sleep(2) + + i = [0] + messages_num = 10000 + + credentials = pika.PlainCredentials('root', 'clickhouse') + parameters = pika.ConnectionParameters('localhost', 5672, '/', credentials) + def produce(): + connection = pika.BlockingConnection(parameters) + channel = connection.channel() + channel.exchange_declare(exchange='clickhouse-exchange', exchange_type='fanout') + + messages = [] + for _ in range(messages_num): + messages.append(json.dumps({'key': i[0], 'value': i[0]})) + i[0] += 1 + key = str(randrange(1, NUM_CONSUMERS)) + for message in messages: + channel.basic_publish(exchange='clickhouse-exchange', routing_key=key, body=message) + connection.close() + + threads = [] + threads_num = 20 + + for _ in range(threads_num): + threads.append(threading.Thread(target=produce)) + for thread in threads: + time.sleep(random.uniform(0, 1)) + thread.start() + + while True: + result = 0 + for view in range(NUM_MV): + result += int(instance.query('SELECT count() FROM test.view{0}'.format(view))) + if int(result) == messages_num * threads_num * NUM_MV: + break + time.sleep(1) + + for thread in threads: + thread.join() + + for mv_id in range(NUM_MV): + table_name = 'view{}'.format(mv_id) + instance.query(''' + DROP TABLE IF EXISTS test.{0}; + '''.format(table_name)) + + + assert int(result) == messages_num * threads_num * NUM_MV, 'ClickHouse lost some messages: {}'.format(result) + + +@pytest.mark.timeout(240) +def test_rabbitmq_insert(rabbitmq_cluster): + instance.query(''' + CREATE TABLE test.rabbitmq (key UInt64, value UInt64) + ENGINE = RabbitMQ + SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', + rabbitmq_exchange_name = 'insert', + rabbitmq_routing_key_list = 'insert1', + rabbitmq_format = 'TSV', + rabbitmq_row_delimiter = '\\n'; + ''') + + credentials = pika.PlainCredentials('root', 'clickhouse') + parameters = pika.ConnectionParameters('localhost', 5672, '/', credentials) + consumer_connection = pika.BlockingConnection(parameters) + + consumer = consumer_connection.channel() + consumer.exchange_declare(exchange='insert_rabbitmq_direct', exchange_type='direct') + result = consumer.queue_declare(queue='') + queue_name = result.method.queue + consumer.queue_bind(exchange='insert_rabbitmq_direct', queue=queue_name, routing_key='insert1') + + values = [] + for i in range(50): + values.append("({i}, {i})".format(i=i)) + values = ','.join(values) + + while True: + try: + instance.query("INSERT INTO test.rabbitmq VALUES {}".format(values)) + break + except QueryRuntimeException as e: + if 'Local: Timed out.' in str(e): + continue + else: + raise + + insert_messages = [] + def onReceived(channel, method, properties, body): + i = 0 + insert_messages.append(body.decode()) + if (len(insert_messages) == 50): + channel.stop_consuming() + + consumer.basic_qos(prefetch_count=50) + consumer.basic_consume(onReceived, queue_name) + consumer.start_consuming() + consumer_connection.close() + + result = '\n'.join(insert_messages) + rabbitmq_check_result(result, True) + + +@pytest.mark.timeout(240) +def test_rabbitmq_many_inserts(rabbitmq_cluster): + instance.query(''' + DROP TABLE IF EXISTS test.rabbitmq_many; + DROP TABLE IF EXISTS test.view_many; + DROP TABLE IF EXISTS test.consumer_many; + CREATE TABLE test.rabbitmq_many (key UInt64, value UInt64) + ENGINE = RabbitMQ + SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', + rabbitmq_routing_key_list = 'insert2', + rabbitmq_format = 'TSV', + rabbitmq_row_delimiter = '\\n'; + CREATE TABLE test.view_many (key UInt64, value UInt64) + ENGINE = MergeTree + ORDER BY key + SETTINGS old_parts_lifetime=5, cleanup_delay_period=2, cleanup_delay_period_random_add=3; + CREATE MATERIALIZED VIEW test.consumer_many TO test.view_many AS + SELECT * FROM test.rabbitmq_many; + ''') + + messages_num = 1000 + def insert(): + values = [] + for i in range(messages_num): + values.append("({i}, {i})".format(i=i)) + values = ','.join(values) + + while True: + try: + instance.query("INSERT INTO test.rabbitmq_many VALUES {}".format(values)) + break + except QueryRuntimeException as e: + if 'Local: Timed out.' in str(e): + continue + else: + raise + + threads = [] + threads_num = 20 + for _ in range(threads_num): + threads.append(threading.Thread(target=insert)) + for thread in threads: + time.sleep(random.uniform(0, 1)) + thread.start() + + while True: + result = instance.query('SELECT count() FROM test.view_many') + time.sleep(1) + if int(result) == messages_num * threads_num: + break + + instance.query(''' + DROP TABLE IF EXISTS test.rabbitmq_many; + DROP TABLE IF EXISTS test.consumer_many; + DROP TABLE IF EXISTS test.view_many; + ''') + + for thread in threads: + thread.join() + + assert int(result) == messages_num * threads_num, 'ClickHouse lost some messages: {}'.format(result) + + +@pytest.mark.timeout(240) +def test_rabbitmq_sharding_between_channels_and_queues_insert(rabbitmq_cluster): + instance.query(''' + DROP TABLE IF EXISTS test.view_sharding; + DROP TABLE IF EXISTS test.consumer_sharding; + CREATE TABLE test.rabbitmq_sharding (key UInt64, value UInt64) + ENGINE = RabbitMQ + SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', + rabbitmq_num_consumers = 5, + rabbitmq_num_queues = 2, + rabbitmq_format = 'TSV', + rabbitmq_row_delimiter = '\\n'; + CREATE TABLE test.view_sharding (key UInt64, value UInt64) + ENGINE = MergeTree + ORDER BY key + SETTINGS old_parts_lifetime=5, cleanup_delay_period=2, cleanup_delay_period_random_add=3; + CREATE MATERIALIZED VIEW test.consumer_sharding TO test.view_sharding AS + SELECT * FROM test.rabbitmq_sharding; + ''') + + messages_num = 10000 + def insert(): + values = [] + for i in range(messages_num): + values.append("({i}, {i})".format(i=i)) + values = ','.join(values) + + while True: + try: + instance.query("INSERT INTO test.rabbitmq_sharding VALUES {}".format(values)) + break + except QueryRuntimeException as e: + if 'Local: Timed out.' in str(e): + continue + else: + raise + + threads = [] + threads_num = 20 + for _ in range(threads_num): + threads.append(threading.Thread(target=insert)) + for thread in threads: + time.sleep(random.uniform(0, 1)) + thread.start() + + while True: + result = instance.query('SELECT count() FROM test.view_sharding') + time.sleep(1) + if int(result) == messages_num * threads_num: + break + + instance.query(''' + DROP TABLE IF EXISTS test.rabbitmq_sharding; + DROP TABLE IF EXISTS test.consumer_sharding; + DROP TABLE IF EXISTS test.view_sharding; + ''') + + for thread in threads: + thread.join() + + assert int(result) == messages_num * threads_num, 'ClickHouse lost some messages: {}'.format(result) + + +@pytest.mark.timeout(420) +def test_rabbitmq_overloaded_insert(rabbitmq_cluster): + instance.query(''' + DROP TABLE IF EXISTS test.view_overload; + DROP TABLE IF EXISTS test.consumer_overload; + CREATE TABLE test.rabbitmq_overload (key UInt64, value UInt64) + ENGINE = RabbitMQ + SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', + rabbitmq_num_consumers = 10, + rabbitmq_format = 'TSV', + rabbitmq_row_delimiter = '\\n'; + CREATE TABLE test.view_overload (key UInt64, value UInt64) + ENGINE = MergeTree + ORDER BY key + SETTINGS old_parts_lifetime=5, cleanup_delay_period=2, cleanup_delay_period_random_add=3; + CREATE MATERIALIZED VIEW test.consumer_overload TO test.view_overload AS + SELECT * FROM test.rabbitmq_overload; + ''') + + messages_num = 100000 + def insert(): + values = [] + for i in range(messages_num): + values.append("({i}, {i})".format(i=i)) + values = ','.join(values) + + while True: + try: + instance.query("INSERT INTO test.rabbitmq_overload VALUES {}".format(values)) + break + except QueryRuntimeException as e: + if 'Local: Timed out.' in str(e): + continue + else: + raise + + threads = [] + threads_num = 5 + for _ in range(threads_num): + threads.append(threading.Thread(target=insert)) + for thread in threads: + time.sleep(random.uniform(0, 1)) + thread.start() + + while True: + result = instance.query('SELECT count() FROM test.view_overload') + time.sleep(1) + print("Result", int(result), "Expected", messages_num * threads_num) + if int(result) == messages_num * threads_num: + break + + instance.query(''' + DROP TABLE IF EXISTS test.rabbitmq_overload; + DROP TABLE IF EXISTS test.consumer_overload; + DROP TABLE IF EXISTS test.view_overload; + ''') + + for thread in threads: + thread.join() + + assert int(result) == messages_num * threads_num, 'ClickHouse lost some messages: {}'.format(result) + + +@pytest.mark.timeout(420) +def test_rabbitmq_direct_exchange(rabbitmq_cluster): + instance.query(''' + DROP TABLE IF EXISTS test.destination; + CREATE TABLE test.destination(key UInt64, value UInt64, + _consumed_by LowCardinality(String)) + ENGINE = MergeTree() + ORDER BY key + SETTINGS old_parts_lifetime=5, cleanup_delay_period=2, cleanup_delay_period_random_add=3; + ''') + + num_tables = 5 + for consumer_id in range(num_tables): + print("Setting up table {}".format(consumer_id)) + instance.query(''' + DROP TABLE IF EXISTS test.direct_exchange_{0}; + DROP TABLE IF EXISTS test.direct_exchange_{0}_mv; + CREATE TABLE test.direct_exchange_{0} (key UInt64, value UInt64) + ENGINE = RabbitMQ + SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', + rabbitmq_num_consumers = 5, + rabbitmq_exchange_name = 'direct_exchange_testing', + rabbitmq_exchange_type = 'direct', + rabbitmq_routing_key_list = 'direct_{0}', + rabbitmq_format = 'JSONEachRow', + rabbitmq_row_delimiter = '\\n'; + CREATE MATERIALIZED VIEW test.direct_exchange_{0}_mv TO test.destination AS + SELECT key, value, '{0}' as _consumed_by FROM test.direct_exchange_{0}; + '''.format(consumer_id)) + + i = [0] + messages_num = 1000 + + credentials = pika.PlainCredentials('root', 'clickhouse') + parameters = pika.ConnectionParameters('localhost', 5672, '/', credentials) + connection = pika.BlockingConnection(parameters) + channel = connection.channel() + channel.exchange_declare(exchange='direct_exchange_testing', exchange_type='direct') + + messages = [] + for _ in range(messages_num): + messages.append(json.dumps({'key': i[0], 'value': i[0]})) + i[0] += 1 + + key_num = 0 + for num in range(num_tables): + key = "direct_" + str(key_num) + key_num += 1 + for message in messages: + mes_id = str(randrange(10)) + channel.basic_publish( + exchange='direct_exchange_testing', routing_key=key, + properties=pika.BasicProperties(message_id=mes_id), body=message) + + connection.close() + + while True: + result = instance.query('SELECT count() FROM test.destination') + time.sleep(1) + if int(result) == messages_num * num_tables: + break + + for consumer_id in range(num_tables): + instance.query(''' + DROP TABLE IF EXISTS test.direct_exchange_{0}; + DROP TABLE IF EXISTS test.direct_exchange_{0}_mv; + '''.format(consumer_id)) + + instance.query(''' + DROP TABLE IF EXISTS test.destination; + ''') + + assert int(result) == messages_num * num_tables, 'ClickHouse lost some messages: {}'.format(result) + + +@pytest.mark.timeout(420) +def test_rabbitmq_fanout_exchange(rabbitmq_cluster): + instance.query(''' + DROP TABLE IF EXISTS test.destination; + CREATE TABLE test.destination(key UInt64, value UInt64, + _consumed_by LowCardinality(String)) + ENGINE = MergeTree() + ORDER BY key; + ''') + + num_tables = 5 + for consumer_id in range(num_tables): + print("Setting up table {}".format(consumer_id)) + instance.query(''' + DROP TABLE IF EXISTS test.fanout_exchange_{0}; + DROP TABLE IF EXISTS test.fanout_exchange_{0}_mv; + CREATE TABLE test.fanout_exchange_{0} (key UInt64, value UInt64) + ENGINE = RabbitMQ + SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', + rabbitmq_num_consumers = 5, + rabbitmq_routing_key_list = 'key_{0}', + rabbitmq_exchange_name = 'fanout_exchange_testing', + rabbitmq_exchange_type = 'fanout', + rabbitmq_format = 'JSONEachRow', + rabbitmq_row_delimiter = '\\n'; + CREATE MATERIALIZED VIEW test.fanout_exchange_{0}_mv TO test.destination AS + SELECT key, value, '{0}' as _consumed_by FROM test.fanout_exchange_{0}; + '''.format(consumer_id)) + + i = [0] + messages_num = 1000 + + credentials = pika.PlainCredentials('root', 'clickhouse') + parameters = pika.ConnectionParameters('localhost', 5672, '/', credentials) + connection = pika.BlockingConnection(parameters) + channel = connection.channel() + channel.exchange_declare(exchange='fanout_exchange_testing', exchange_type='fanout') + + messages = [] + for _ in range(messages_num): + messages.append(json.dumps({'key': i[0], 'value': i[0]})) + i[0] += 1 + + key_num = 0 + for message in messages: + mes_id = str(randrange(10)) + channel.basic_publish( + exchange='fanout_exchange_testing', routing_key='', + properties=pika.BasicProperties(message_id=mes_id), body=message) + + connection.close() + + while True: + result = instance.query('SELECT count() FROM test.destination') + time.sleep(1) + if int(result) == messages_num * num_tables: + break + + for consumer_id in range(num_tables): + instance.query(''' + DROP TABLE IF EXISTS test.fanout_exchange_{0}; + DROP TABLE IF EXISTS test.fanout_exchange_{0}_mv; + '''.format(consumer_id)) + + instance.query(''' + DROP TABLE IF EXISTS test.destination; + ''') + + assert int(result) == messages_num * num_tables, 'ClickHouse lost some messages: {}'.format(result) + + +@pytest.mark.timeout(420) +def test_rabbitmq_topic_exchange(rabbitmq_cluster): + instance.query(''' + DROP TABLE IF EXISTS test.destination; + CREATE TABLE test.destination(key UInt64, value UInt64, + _consumed_by LowCardinality(String)) + ENGINE = MergeTree() + ORDER BY key; + ''') + + num_tables = 5 + for consumer_id in range(num_tables): + print("Setting up table {}".format(consumer_id)) + instance.query(''' + DROP TABLE IF EXISTS test.topic_exchange_{0}; + DROP TABLE IF EXISTS test.topic_exchange_{0}_mv; + CREATE TABLE test.topic_exchange_{0} (key UInt64, value UInt64) + ENGINE = RabbitMQ + SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', + rabbitmq_num_consumers = 5, + rabbitmq_exchange_name = 'topic_exchange_testing', + rabbitmq_exchange_type = 'topic', + rabbitmq_routing_key_list = '*.{0}', + rabbitmq_format = 'JSONEachRow', + rabbitmq_row_delimiter = '\\n'; + CREATE MATERIALIZED VIEW test.topic_exchange_{0}_mv TO test.destination AS + SELECT key, value, '{0}' as _consumed_by FROM test.topic_exchange_{0}; + '''.format(consumer_id)) + + for consumer_id in range(num_tables): + print("Setting up table {}".format(num_tables + consumer_id)) + instance.query(''' + DROP TABLE IF EXISTS test.topic_exchange_{0}; + DROP TABLE IF EXISTS test.topic_exchange_{0}_mv; + CREATE TABLE test.topic_exchange_{0} (key UInt64, value UInt64) + ENGINE = RabbitMQ + SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', + rabbitmq_num_consumers = 4, + rabbitmq_exchange_name = 'topic_exchange_testing', + rabbitmq_exchange_type = 'topic', + rabbitmq_routing_key_list = '*.logs', + rabbitmq_format = 'JSONEachRow', + rabbitmq_row_delimiter = '\\n'; + CREATE MATERIALIZED VIEW test.topic_exchange_{0}_mv TO test.destination AS + SELECT key, value, '{0}' as _consumed_by FROM test.topic_exchange_{0}; + '''.format(num_tables + consumer_id)) + + i = [0] + messages_num = 1000 + + credentials = pika.PlainCredentials('root', 'clickhouse') + parameters = pika.ConnectionParameters('localhost', 5672, '/', credentials) + connection = pika.BlockingConnection(parameters) + channel = connection.channel() + channel.exchange_declare(exchange='topic_exchange_testing', exchange_type='topic') + + messages = [] + for _ in range(messages_num): + messages.append(json.dumps({'key': i[0], 'value': i[0]})) + i[0] += 1 + + key_num = 0 + for num in range(num_tables): + key = "topic." + str(key_num) + key_num += 1 + for message in messages: + channel.basic_publish(exchange='topic_exchange_testing', routing_key=key, body=message) + + key = "random.logs" + for message in messages: + mes_id = str(randrange(10)) + channel.basic_publish( + exchange='topic_exchange_testing', routing_key=key, + properties=pika.BasicProperties(message_id=mes_id), body=message) + + connection.close() + + while True: + result = instance.query('SELECT count() FROM test.destination') + time.sleep(1) + if int(result) == messages_num * num_tables + messages_num * num_tables: + break + + for consumer_id in range(num_tables * 2): + instance.query(''' + DROP TABLE IF EXISTS test.topic_exchange_{0}; + DROP TABLE IF EXISTS test.topic_exchange_{0}_mv; + '''.format(consumer_id)) + + instance.query(''' + DROP TABLE IF EXISTS test.destination; + ''') + + assert int(result) == messages_num * num_tables + messages_num * num_tables, 'ClickHouse lost some messages: {}'.format(result) + + +@pytest.mark.timeout(420) +def test_rabbitmq_hash_exchange(rabbitmq_cluster): + instance.query(''' + DROP TABLE IF EXISTS test.destination; + CREATE TABLE test.destination(key UInt64, value UInt64, + _consumed_by LowCardinality(String)) + ENGINE = MergeTree() + ORDER BY key; + ''') + + num_tables = 4 + for consumer_id in range(num_tables): + table_name = 'rabbitmq_consumer{}'.format(consumer_id) + print("Setting up {}".format(table_name)) + instance.query(''' + DROP TABLE IF EXISTS test.{0}; + DROP TABLE IF EXISTS test.{0}_mv; + CREATE TABLE test.{0} (key UInt64, value UInt64) + ENGINE = RabbitMQ + SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', + rabbitmq_num_consumers = 10, + rabbitmq_exchange_type = 'consistent_hash', + rabbitmq_exchange_name = 'hash_exchange_testing', + rabbitmq_format = 'JSONEachRow', + rabbitmq_row_delimiter = '\\n'; + CREATE MATERIALIZED VIEW test.{0}_mv TO test.destination AS + SELECT key, value, '{0}' as _consumed_by FROM test.{0}; + '''.format(table_name)) + + i = [0] + messages_num = 500 + + credentials = pika.PlainCredentials('root', 'clickhouse') + parameters = pika.ConnectionParameters('localhost', 5672, '/', credentials) + + def produce(): + # init connection here because otherwise python rabbitmq client might fail + connection = pika.BlockingConnection(parameters) + channel = connection.channel() + channel.exchange_declare(exchange='hash_exchange_testing', exchange_type='x-consistent-hash') + messages = [] + for _ in range(messages_num): + messages.append(json.dumps({'key': i[0], 'value': i[0]})) + i[0] += 1 + for message in messages: + key = str(randrange(10)) + channel.basic_publish(exchange='hash_exchange_testing', routing_key=key, body=message) + connection.close() + + threads = [] + threads_num = 10 + + for _ in range(threads_num): + threads.append(threading.Thread(target=produce)) + for thread in threads: + time.sleep(random.uniform(0, 1)) + thread.start() + + while True: + result = instance.query('SELECT count() FROM test.destination') + time.sleep(1) + if int(result) == messages_num * threads_num: + break + + for consumer_id in range(num_tables): + table_name = 'rabbitmq_consumer{}'.format(consumer_id) + instance.query(''' + DROP TABLE IF EXISTS test.{0}; + DROP TABLE IF EXISTS test.{0}_mv; + '''.format(table_name)) + + instance.query(''' + DROP TABLE IF EXISTS test.destination; + ''') + + for thread in threads: + thread.join() + + assert int(result) == messages_num * threads_num, 'ClickHouse lost some messages: {}'.format(result) + + +@pytest.mark.timeout(420) +def test_rabbitmq_multiple_bindings(rabbitmq_cluster): + instance.query(''' + DROP TABLE IF EXISTS test.destination; + CREATE TABLE test.destination(key UInt64, value UInt64, + _consumed_by LowCardinality(String)) + ENGINE = MergeTree() + ORDER BY key; + ''') + + instance.query(''' + DROP TABLE IF EXISTS test.bindings_1; + DROP TABLE IF EXISTS test.bindings_1_mv; + CREATE TABLE test.bindings_1 (key UInt64, value UInt64) + ENGINE = RabbitMQ + SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', + rabbitmq_num_consumers = 5, + rabbitmq_num_queues = 2, + rabbitmq_exchange_name = 'multiple_bindings_testing', + rabbitmq_exchange_type = 'direct', + rabbitmq_routing_key_list = 'key1,key2,key3,key4,key5', + rabbitmq_format = 'JSONEachRow', + rabbitmq_row_delimiter = '\\n'; + CREATE MATERIALIZED VIEW test.bindings_1_mv TO test.destination AS + SELECT * FROM test.bindings_1; + ''') + + # in case num_consumers and num_queues are not set - multiple bindings are implemented differently, so test them too + instance.query(''' + DROP TABLE IF EXISTS test.bindings_2; + DROP TABLE IF EXISTS test.bindings_2_mv; + CREATE TABLE test.bindings_2 (key UInt64, value UInt64) + ENGINE = RabbitMQ + SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', + rabbitmq_exchange_name = 'multiple_bindings_testing', + rabbitmq_exchange_type = 'direct', + rabbitmq_routing_key_list = 'key1,key2,key3,key4,key5', + rabbitmq_format = 'JSONEachRow', + rabbitmq_row_delimiter = '\\n'; + CREATE MATERIALIZED VIEW test.bindings_2_mv TO test.destination AS + SELECT * FROM test.bindings_2; + ''') + + i = [0] + messages_num = 500 + + credentials = pika.PlainCredentials('root', 'clickhouse') + parameters = pika.ConnectionParameters('localhost', 5672, '/', credentials) + + def produce(): + # init connection here because otherwise python rabbitmq client might fail + connection = pika.BlockingConnection(parameters) + channel = connection.channel() + channel.exchange_declare(exchange='multiple_bindings_testing', exchange_type='direct') + + messages = [] + for _ in range(messages_num): + messages.append(json.dumps({'key': i[0], 'value': i[0]})) + i[0] += 1 + + keys = ['key1', 'key2', 'key3', 'key4', 'key5'] + + for key in keys: + for message in messages: + mes_id = str(randrange(10)) + channel.basic_publish(exchange='multiple_bindings_testing', routing_key=key, + properties=pika.BasicProperties(message_id=mes_id), body=message) + + connection.close() + + threads = [] + threads_num = 10 + + for _ in range(threads_num): + threads.append(threading.Thread(target=produce)) + for thread in threads: + time.sleep(random.uniform(0, 1)) + thread.start() + + while True: + result = instance.query('SELECT count() FROM test.destination') + time.sleep(1) + if int(result) == messages_num * threads_num * 5 * 2: + break + + for thread in threads: + thread.join() + + instance.query(''' + DROP TABLE IF EXISTS test.bindings_1; + DROP TABLE IF EXISTS test.bindings_2; + DROP TABLE IF EXISTS test.destination; + ''') + + assert int(result) == messages_num * threads_num * 5 * 2, 'ClickHouse lost some messages: {}'.format(result) + + +@pytest.mark.timeout(420) +def test_rabbitmq_headers_exchange(rabbitmq_cluster): + instance.query(''' + DROP TABLE IF EXISTS test.destination; + CREATE TABLE test.destination(key UInt64, value UInt64, + _consumed_by LowCardinality(String)) + ENGINE = MergeTree() + ORDER BY key; + ''') + + num_tables_to_receive = 3 + for consumer_id in range(num_tables_to_receive): + print("Setting up table {}".format(consumer_id)) + instance.query(''' + DROP TABLE IF EXISTS test.headers_exchange_{0}; + DROP TABLE IF EXISTS test.headers_exchange_{0}_mv; + CREATE TABLE test.headers_exchange_{0} (key UInt64, value UInt64) + ENGINE = RabbitMQ + SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', + rabbitmq_num_consumers = 4, + rabbitmq_exchange_name = 'headers_exchange_testing', + rabbitmq_exchange_type = 'headers', + rabbitmq_routing_key_list = 'x-match=all,format=logs,type=report,year=2020', + rabbitmq_format = 'JSONEachRow', + rabbitmq_row_delimiter = '\\n'; + CREATE MATERIALIZED VIEW test.headers_exchange_{0}_mv TO test.destination AS + SELECT key, value, '{0}' as _consumed_by FROM test.headers_exchange_{0}; + '''.format(consumer_id)) + + num_tables_to_ignore = 2 + for consumer_id in range(num_tables_to_ignore): + print("Setting up table {}".format(consumer_id + num_tables_to_receive)) + instance.query(''' + DROP TABLE IF EXISTS test.headers_exchange_{0}; + DROP TABLE IF EXISTS test.headers_exchange_{0}_mv; + CREATE TABLE test.headers_exchange_{0} (key UInt64, value UInt64) + ENGINE = RabbitMQ + SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', + rabbitmq_exchange_name = 'headers_exchange_testing', + rabbitmq_exchange_type = 'headers', + rabbitmq_routing_key_list = 'x-match=all,format=logs,type=report,year=2019', + rabbitmq_format = 'JSONEachRow', + rabbitmq_row_delimiter = '\\n'; + CREATE MATERIALIZED VIEW test.headers_exchange_{0}_mv TO test.destination AS + SELECT key, value, '{0}' as _consumed_by FROM test.headers_exchange_{0}; + '''.format(consumer_id + num_tables_to_receive)) + + i = [0] + messages_num = 1000 + + credentials = pika.PlainCredentials('root', 'clickhouse') + parameters = pika.ConnectionParameters('localhost', 5672, '/', credentials) + connection = pika.BlockingConnection(parameters) + channel = connection.channel() + channel.exchange_declare(exchange='headers_exchange_testing', exchange_type='headers') + + messages = [] + for _ in range(messages_num): + messages.append(json.dumps({'key': i[0], 'value': i[0]})) + i[0] += 1 + + fields={} + fields['format']='logs' + fields['type']='report' + fields['year']='2020' + + key_num = 0 + for message in messages: + mes_id = str(randrange(10)) + channel.basic_publish(exchange='headers_exchange_testing', routing_key='', + properties=pika.BasicProperties(headers=fields, message_id=mes_id), body=message) + + connection.close() + + while True: + result = instance.query('SELECT count() FROM test.destination') + time.sleep(1) + if int(result) == messages_num * num_tables_to_receive: + break + + for consumer_id in range(num_tables_to_receive + num_tables_to_ignore): + instance.query(''' + DROP TABLE IF EXISTS test.direct_exchange_{0}; + DROP TABLE IF EXISTS test.direct_exchange_{0}_mv; + '''.format(consumer_id)) + + instance.query(''' + DROP TABLE IF EXISTS test.destination; + ''') + + assert int(result) == messages_num * num_tables_to_receive, 'ClickHouse lost some messages: {}'.format(result) + + +if __name__ == '__main__': + cluster.start() + raw_input("Cluster created, press any key to destroy...") + cluster.shutdown() diff --git a/tests/integration/test_storage_rabbitmq/test_rabbitmq_json.reference b/tests/integration/test_storage_rabbitmq/test_rabbitmq_json.reference new file mode 100644 index 00000000000..959bb2aad74 --- /dev/null +++ b/tests/integration/test_storage_rabbitmq/test_rabbitmq_json.reference @@ -0,0 +1,50 @@ +0 0 +1 1 +2 2 +3 3 +4 4 +5 5 +6 6 +7 7 +8 8 +9 9 +10 10 +11 11 +12 12 +13 13 +14 14 +15 15 +16 16 +17 17 +18 18 +19 19 +20 20 +21 21 +22 22 +23 23 +24 24 +25 25 +26 26 +27 27 +28 28 +29 29 +30 30 +31 31 +32 32 +33 33 +34 34 +35 35 +36 36 +37 37 +38 38 +39 39 +40 40 +41 41 +42 42 +43 43 +44 44 +45 45 +46 46 +47 47 +48 48 +49 49 diff --git a/tests/integration/test_ttl_move/configs/config.d/storage_configuration.xml b/tests/integration/test_ttl_move/configs/config.d/storage_configuration.xml index b48de85007a..47bf9f56cdd 100644 --- a/tests/integration/test_ttl_move/configs/config.d/storage_configuration.xml +++ b/tests/integration/test_ttl_move/configs/config.d/storage_configuration.xml @@ -40,6 +40,20 @@ + + + + default + +
+ jbod1 +
+ + external + +
+
+
diff --git a/tests/integration/test_ttl_move/test.py b/tests/integration/test_ttl_move/test.py index 26bd36b8cb6..1894f88029e 100644 --- a/tests/integration/test_ttl_move/test.py +++ b/tests/integration/test_ttl_move/test.py @@ -160,6 +160,53 @@ def test_inserts_to_disk_work(started_cluster, name, engine, positive): pass +@pytest.mark.parametrize("name,engine", [ + ("mt_test_moves_work_after_storage_policy_change","MergeTree()"), + ("replicated_mt_test_moves_work_after_storage_policy_change","ReplicatedMergeTree('/clickhouse/test_moves_work_after_storage_policy_change', '1')"), +]) +def test_moves_work_after_storage_policy_change(started_cluster, name, engine): + try: + node1.query(""" + CREATE TABLE {name} ( + s1 String, + d1 DateTime + ) ENGINE = {engine} + ORDER BY tuple() + """.format(name=name, engine=engine)) + + node1.query("""ALTER TABLE {name} MODIFY SETTING storage_policy='default_with_small_jbod_with_external'""".format(name=name)) + + # Second expression is preferred because d1 > now()-3600. + node1.query("""ALTER TABLE {name} MODIFY TTL now()-3600 TO DISK 'jbod1', d1 TO DISK 'external'""".format(name=name)) + + wait_expire_1 = 12 + wait_expire_2 = 4 + time_1 = time.time() + wait_expire_1 + time_2 = time.time() + wait_expire_1 + wait_expire_2 + + wait_expire_1_thread = threading.Thread(target=time.sleep, args=(wait_expire_1,)) + wait_expire_1_thread.start() + + data = [] # 10MB in total + for i in range(10): + data.append(("'{}'".format(get_random_string(1024 * 1024)), "toDateTime({})".format(time_1))) # 1MB row + + node1.query("INSERT INTO {} (s1, d1) VALUES {}".format(name, ",".join(["(" + ",".join(x) + ")" for x in data]))) + used_disks = get_used_disks_for_table(node1, name) + assert set(used_disks) == {"jbod1"} + + wait_expire_1_thread.join() + time.sleep(wait_expire_2/2) + + used_disks = get_used_disks_for_table(node1, name) + assert set(used_disks) == {"external"} + + assert node1.query("SELECT count() FROM {name}".format(name=name)).strip() == "10" + + finally: + node1.query("DROP TABLE IF EXISTS {}".format(name)) + + @pytest.mark.parametrize("name,engine,positive", [ ("mt_test_moves_to_disk_do_not_work","MergeTree()",0), ("replicated_mt_test_moves_to_disk_do_not_work","ReplicatedMergeTree('/clickhouse/replicated_test_moves_to_disk_do_not_work', '1')",0), diff --git a/tests/performance/cpu_synthetic.xml b/tests/performance/cpu_synthetic.xml index 2888f7bbbd6..e08e06f9833 100644 --- a/tests/performance/cpu_synthetic.xml +++ b/tests/performance/cpu_synthetic.xml @@ -21,7 +21,7 @@ PageCharset тоже почти всегда непуст, но его сред SELECT count() FROM hits_10m_single WHERE NOT ignore(sipHash64(SearchPhrase)) SETTINGS max_threads = 1 SELECT count() FROM hits_100m_single WHERE NOT ignore(sipHash64(SearchPhrase)) -SELECT count() FROM hits_10m_single WHERE NOT ignore(MD5(SearchPhrase)) SETTINGS max_threads = 1 + SELECT count() FROM hits_100m_single WHERE NOT ignore(MD5(SearchPhrase)) diff --git a/tests/performance/date_time.xml b/tests/performance/date_time_long.xml similarity index 85% rename from tests/performance/date_time.xml rename to tests/performance/date_time_long.xml index 43cd1f353e2..b358945b35b 100644 --- a/tests/performance/date_time.xml +++ b/tests/performance/date_time_long.xml @@ -1,11 +1,5 @@ - - - - long - - - + long datetime_transform @@ -126,8 +120,8 @@ - SELECT count() FROM numbers(10000000) WHERE NOT ignore(toDateTime('2017-01-01 00:00:00') + number % 100000000 + rand() % 100000 AS t, {datetime_transform}(t, '{time_zone}')) - SELECT count() FROM numbers(10000000) WHERE NOT ignore(toDate('2017-01-01') + number % 1000 + rand() % 10 AS t, {date_transform}(t)) - SELECT count() FROM numbers(10000000) WHERE NOT ignore(toDateTime('2017-01-01 00:00:00') + number % 100000000 + rand() % 100000 AS t, {binary_function}(t, 1)) - SELECT count() FROM numbers(10000000) WHERE NOT ignore(toDateTime('2017-01-01 00:00:00') + number % 100000000 + rand() % 100000 AS t, toStartOfInterval(t, INTERVAL 1 month)) + SELECT count() FROM numbers(100000000) WHERE NOT ignore(toDateTime('2017-01-01 00:00:00') + number % 100000000 + rand() % 100000 AS t, {datetime_transform}(t, '{time_zone}')) + SELECT count() FROM numbers(100000000) WHERE NOT ignore(toDate('2017-01-01') + number % 1000 + rand() % 10 AS t, {date_transform}(t)) + SELECT count() FROM numbers(100000000) WHERE NOT ignore(toDateTime('2017-01-01 00:00:00') + number % 100000000 + rand() % 100000 AS t, {binary_function}(t, 1)) + SELECT count() FROM numbers(100000000) WHERE NOT ignore(toDateTime('2017-01-01 00:00:00') + number % 100000000 + rand() % 100000 AS t, toStartOfInterval(t, INTERVAL 1 month)) diff --git a/tests/performance/date_time_short.xml b/tests/performance/date_time_short.xml new file mode 100644 index 00000000000..a9fd0908694 --- /dev/null +++ b/tests/performance/date_time_short.xml @@ -0,0 +1,40 @@ + + + + + date_transform + + toDayOfWeek + toMonday + toRelativeDayNum + toYYYYMMDDhhmmss + toUnixTimestamp + + + + time_zone + + Europe/Moscow + + + + binary_function + + lessOrEquals + greater + plus + addWeeks + + + + + SELECT count() FROM numbers(100000000) WHERE NOT ignore(toDateTime('2017-01-01 00:00:00') + number % 100000000 + rand() % 100000 AS t, {date_transform}(t, '{time_zone}')) + SELECT count() FROM numbers(100000000) WHERE NOT ignore(toDate('2017-01-01') + number % 1000 + rand() % 10 AS t, {date_transform}(t)) + SELECT count() FROM numbers(100000000) WHERE NOT ignore(toDateTime('2017-01-01 00:00:00') + number % 100000000 + rand() % 100000 AS t, {binary_function}(t, 1)) + SELECT count() FROM numbers(100000000) WHERE NOT ignore(toDateTime('2017-01-01 00:00:00') + number % 100000000 + rand() % 100000 AS t, toStartOfInterval(t, INTERVAL 1 month)) + diff --git a/tests/performance/float_formatting.xml b/tests/performance/float_formatting.xml index f65efbe83cf..4588f048d20 100644 --- a/tests/performance/float_formatting.xml +++ b/tests/performance/float_formatting.xml @@ -1,10 +1,9 @@ - - - long - - - - + + expr @@ -49,6 +48,6 @@ - SELECT count() FROM numbers(1000000) WHERE NOT ignore(toString({expr})) - SELECT count() FROM zeros(1000000) WHERE NOT ignore(toString({expr_zero})) + SELECT count() FROM numbers(10000000) WHERE NOT ignore(toString({expr})) + SELECT count() FROM zeros(10000000) WHERE NOT ignore(toString({expr_zero})) diff --git a/tests/performance/float_parsing.xml b/tests/performance/float_parsing.xml index 2acbb636fed..7d159fbfced 100644 --- a/tests/performance/float_parsing.xml +++ b/tests/performance/float_parsing.xml @@ -1,10 +1,4 @@ - - long - - - - expr diff --git a/tests/performance/polymorphic_parts_l.xml b/tests/performance/polymorphic_parts_l.xml new file mode 100644 index 00000000000..d2ae9417bf7 --- /dev/null +++ b/tests/performance/polymorphic_parts_l.xml @@ -0,0 +1,35 @@ + + + CREATE TABLE hits_wide AS hits_10m_single ENGINE = MergeTree() + PARTITION BY toYYYYMM(EventDate) + ORDER BY (CounterID, EventDate, intHash32(UserID)) + SAMPLE BY intHash32(UserID) + SETTINGS min_rows_for_wide_part = 0, min_bytes_for_wide_part = 0 + + + CREATE TABLE hits_compact AS hits_10m_single ENGINE = MergeTree() + PARTITION BY toYYYYMM(EventDate) + ORDER BY (CounterID, EventDate, intHash32(UserID)) + SAMPLE BY intHash32(UserID) + SETTINGS min_bytes_for_wide_part = '10M' + + + CREATE TABLE hits_buffer AS hits_10m_single + ENGINE = Buffer(default, hits_wide, 1, 0, 0, 10000, 10000, 0, 0) + + + + + 1000 + 1000 + + + + INSERT INTO hits_wide(UserID) SELECT rand() FROM numbers(100000) + INSERT INTO hits_compact(UserID) SELECT rand() FROM numbers(100000) + INSERT INTO hits_buffer(UserID) SELECT rand() FROM numbers(100000) + + DROP TABLE IF EXISTS hits_wide + DROP TABLE IF EXISTS hits_compact + DROP TABLE IF EXISTS hits_buffer + diff --git a/tests/performance/polymorphic_parts_m.xml b/tests/performance/polymorphic_parts_m.xml new file mode 100644 index 00000000000..54a81def55e --- /dev/null +++ b/tests/performance/polymorphic_parts_m.xml @@ -0,0 +1,35 @@ + + + CREATE TABLE hits_wide AS hits_10m_single ENGINE = MergeTree() + PARTITION BY toYYYYMM(EventDate) + ORDER BY (CounterID, EventDate, intHash32(UserID)) + SAMPLE BY intHash32(UserID) + SETTINGS min_rows_for_wide_part = 0, min_bytes_for_wide_part = 0 + + + CREATE TABLE hits_compact AS hits_10m_single ENGINE = MergeTree() + PARTITION BY toYYYYMM(EventDate) + ORDER BY (CounterID, EventDate, intHash32(UserID)) + SAMPLE BY intHash32(UserID) + SETTINGS min_bytes_for_wide_part = '10M' + + + CREATE TABLE hits_buffer AS hits_10m_single + ENGINE = Buffer(default, hits_wide, 1, 0, 0, 10000, 10000, 0, 0) + + + + + 100 + 100 + + + + INSERT INTO hits_wide(UserID) SELECT rand() FROM numbers(10000) + INSERT INTO hits_compact(UserID) SELECT rand() FROM numbers(100000) + INSERT INTO hits_buffer(UserID) SELECT rand() FROM numbers(10000) + + DROP TABLE IF EXISTS hits_wide + DROP TABLE IF EXISTS hits_compact + DROP TABLE IF EXISTS hits_buffer + diff --git a/tests/performance/polymorphic_parts.xml b/tests/performance/polymorphic_parts_s.xml similarity index 52% rename from tests/performance/polymorphic_parts.xml rename to tests/performance/polymorphic_parts_s.xml index a8e305953d0..b4dd87a7ae3 100644 --- a/tests/performance/polymorphic_parts.xml +++ b/tests/performance/polymorphic_parts_s.xml @@ -15,23 +15,19 @@ CREATE TABLE hits_buffer AS hits_10m_single - ENGINE = Buffer(merge, hits, 16, 10, 100, 10000, 1000000, 10000000, 100000000) + ENGINE = Buffer(default, hits_wide, 1, 0, 0, 10000, 10000, 0, 0) - INSERT INTO hits_wide(UserID) VALUES (rand()) - INSERT INTO hits_wide(UserID) SELECT rand() FROM numbers(100) - INSERT INTO hits_wide(UserID) SELECT rand() FROM numbers(1000) - INSERT INTO hits_wide(UserID) SELECT rand() FROM numbers(10000) + + + 1 + 1 + - INSERT INTO hits_compact(UserID) VALUES (rand()) - INSERT INTO hits_compact(UserID) SELECT rand() FROM numbers(100) + + INSERT INTO hits_wide(UserID) SELECT rand() FROM numbers(100) INSERT INTO hits_compact(UserID) SELECT rand() FROM numbers(1000) - INSERT INTO hits_compact(UserID) SELECT rand() FROM numbers(10000) - - INSERT INTO hits_buffer(UserID) VALUES (rand()) - INSERT INTO hits_buffer(UserID) SELECT rand() FROM numbers(100) - INSERT INTO hits_buffer(UserID) SELECT rand() FROM numbers(1000) - INSERT INTO hits_buffer(UserID) SELECT rand() FROM numbers(10000) + INSERT INTO hits_buffer(UserID) SELECT rand() FROM numbers(100) DROP TABLE IF EXISTS hits_wide DROP TABLE IF EXISTS hits_compact diff --git a/tests/performance/read_in_order_many_parts.xml b/tests/performance/read_in_order_many_parts.xml new file mode 100644 index 00000000000..bb6004d061e --- /dev/null +++ b/tests/performance/read_in_order_many_parts.xml @@ -0,0 +1,32 @@ + + + 1 + 1 + 200 + 8 + + + + + table + + mt_20_parts + mt_200_parts + + + + + CREATE TABLE mt_20_parts(id UInt32, val1 UInt32, val2 UInt32) ENGINE = MergeTree ORDER BY val1 PARTITION BY id % 20 + CREATE TABLE mt_200_parts(id UInt32, val1 UInt32, val2 UInt32) ENGINE = MergeTree ORDER BY val1 PARTITION BY id % 200 + + INSERT INTO mt_20_parts SELECT number, rand() % 10000, rand() FROM numbers_mt(100000000) + INSERT INTO mt_200_parts SELECT number, rand() % 10000, rand() FROM numbers_mt(100000000) + OPTIMIZE TABLE mt_20_parts FINAL + OPTIMIZE TABLE mt_200_parts FINAL + + SELECT val2 FROM {table} ORDER BY val1 LIMIT 100 FORMAT Null + SELECT val2 FROM {table} ORDER BY val1 LIMIT 100000 FORMAT Null + SELECT sum(val2) FROM {table} GROUP BY val1 FORMAT Null + + DROP TABLE IF EXISTS {table} + diff --git a/tests/performance/select_format.xml b/tests/performance/select_format.xml index 0cd0e3fe732..bbe489c06c6 100644 --- a/tests/performance/select_format.xml +++ b/tests/performance/select_format.xml @@ -35,6 +35,7 @@ ODBCDriver2 Avro MsgPack + ORC diff --git a/tests/performance/set.xml b/tests/performance/set.xml index 576a26390d1..09301d5637c 100644 --- a/tests/performance/set.xml +++ b/tests/performance/set.xml @@ -1,30 +1,37 @@ - - long - - - - - table + table_small zeros(10000000) zeros_mt(100000000) - size + table_large + + zeros(100000000) + zeros_mt(1000000000) + + + + size_large + + 1048576 + 10000000 + + + + size_small 1 16 1024 16384 - 1048576 - 10000000 - SELECT count() FROM {table} WHERE rand64() IN (SELECT number FROM system.numbers LIMIT {size}) + SELECT count() FROM {table_large} WHERE rand64() IN (SELECT number FROM system.numbers LIMIT {size_small}) + SELECT count() FROM {table_small} WHERE rand64() IN (SELECT number FROM system.numbers LIMIT {size_large}) diff --git a/tests/performance/sum_map.xml b/tests/performance/sum_map.xml index cfad530652c..b732c150220 100644 --- a/tests/performance/sum_map.xml +++ b/tests/performance/sum_map.xml @@ -22,7 +22,7 @@ - CREATE TEMPORARY TABLE sum_map_{scale} AS + CREATE TABLE sum_map_{scale} ENGINE Memory AS SELECT arrayMap(x -> (x % 23), range(50)) AS key, arrayMap(x -> intDiv(number, x + 1), range(50)) AS val diff --git a/tests/queries/0_stateless/00569_parse_date_time_best_effort.reference b/tests/queries/0_stateless/00569_parse_date_time_best_effort.reference index bf11e1c1d71..b353b66f8eb 100644 --- a/tests/queries/0_stateless/00569_parse_date_time_best_effort.reference +++ b/tests/queries/0_stateless/00569_parse_date_time_best_effort.reference @@ -58,7 +58,7 @@ 2017-01 03:04:05 MSD Jun 2017-05-31 23:04:05 2017-05-31 23:04:05 2017-01 03:04 MSD Jun 2017-05-31 23:04:00 2017-05-31 23:04:00 2017/01/31 2017-01-31 00:00:00 2017-01-31 00:00:00 - 2017/01/32 0000-00-00 00:00:00 0000-00-00 00:00:00 + 2017/01/32 ᴺᵁᴸᴸ 0000-00-00 00:00:00 2017-01 MSD Jun 2017-05-31 20:00:00 2017-05-31 20:00:00 201701 MSD Jun 2017-05-31 20:00:00 2017-05-31 20:00:00 2017 25 1:2:3 ᴺᵁᴸᴸ 0000-00-00 00:00:00 diff --git a/tests/queries/0_stateless/00612_pk_in_tuple.reference b/tests/queries/0_stateless/00612_pk_in_tuple.reference index 74e8e642f20..294ab7491c1 100644 --- a/tests/queries/0_stateless/00612_pk_in_tuple.reference +++ b/tests/queries/0_stateless/00612_pk_in_tuple.reference @@ -35,3 +35,4 @@ max(key) from tab_00612 any left join (select key, arrayJoin(n.x) as val from ta max(key) from tab_00612 any left join (select key, arrayJoin(n.x) as val from tab_00612) using key where (key, val) in ((1, 1), (2, 2)) 2 1 +1 diff --git a/tests/queries/0_stateless/00612_pk_in_tuple.sql b/tests/queries/0_stateless/00612_pk_in_tuple.sql index 499474d1b0a..081e8a75092 100644 --- a/tests/queries/0_stateless/00612_pk_in_tuple.sql +++ b/tests/queries/0_stateless/00612_pk_in_tuple.sql @@ -43,4 +43,6 @@ CREATE TABLE tab_00612 (key1 Int32, id1 Int64, c1 Int64) ENGINE = MergeTree PA insert into tab_00612 values ( -1, 1, 0 ); SELECT count(*) FROM tab_00612 PREWHERE id1 IN (1); +SELECT count() FROM tab_00612 WHERE (key1, id1) IN (-1, 1) AND (key1, 1) IN (-1, 1) SETTINGS force_primary_key = 1; + drop table tab_00612; diff --git a/tests/queries/0_stateless/00612_pk_in_tuple_perf.reference b/tests/queries/0_stateless/00612_pk_in_tuple_perf.reference new file mode 100644 index 00000000000..5bea74275a1 --- /dev/null +++ b/tests/queries/0_stateless/00612_pk_in_tuple_perf.reference @@ -0,0 +1,4 @@ +1 + "rows_read": 2, +1 + "rows_read": 2, diff --git a/tests/queries/0_stateless/00612_pk_in_tuple_perf.sh b/tests/queries/0_stateless/00612_pk_in_tuple_perf.sh new file mode 100755 index 00000000000..8f3e89098e6 --- /dev/null +++ b/tests/queries/0_stateless/00612_pk_in_tuple_perf.sh @@ -0,0 +1,45 @@ +#!/usr/bin/env bash + + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. $CURDIR/../shell_config.sh + + + +$CLICKHOUSE_CLIENT --multiquery <&1 | grep real | grep -Po "0m\K[0-9\.]*" | tr -d '.' | sed "s/^0*//"` +$CLICKHOUSE_CLIENT -q "SELECT '01193_metadata_loading', $elapsed_ms FORMAT Null" # it will be printed to server log + +if [[ $elapsed_ms -le $max_time_ms ]]; then echo ok; fi + +#$CLICKHOUSE_CLIENT -q "SELECT count() * $count_multiplier, i, d, s, n.i, n.f FROM $db.table_merge GROUP BY i, d, s, n.i, n.f ORDER BY i" +$CLICKHOUSE_CLIENT -q "SELECT 8000, i, d, s, n.i, n.f FROM $db.table_1_1 GROUP BY i, d, s, n.i, n.f ORDER BY i" + +$CLICKHOUSE_CLIENT -q "DROP DATABASE $db" diff --git a/tests/queries/0_stateless/01267_alter_default_key_columns.reference b/tests/queries/0_stateless/01267_alter_default_key_columns_zookeeper.reference similarity index 100% rename from tests/queries/0_stateless/01267_alter_default_key_columns.reference rename to tests/queries/0_stateless/01267_alter_default_key_columns_zookeeper.reference diff --git a/tests/queries/0_stateless/01267_alter_default_key_columns.sql b/tests/queries/0_stateless/01267_alter_default_key_columns_zookeeper.sql similarity index 79% rename from tests/queries/0_stateless/01267_alter_default_key_columns.sql rename to tests/queries/0_stateless/01267_alter_default_key_columns_zookeeper.sql index 16d6065516f..827161d4d57 100644 --- a/tests/queries/0_stateless/01267_alter_default_key_columns.sql +++ b/tests/queries/0_stateless/01267_alter_default_key_columns_zookeeper.sql @@ -11,11 +11,8 @@ DROP TABLE IF EXISTS test_alter_r2; CREATE TABLE test_alter_r1 (x Date, s String) ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/alter', 'r1') ORDER BY s PARTITION BY x; CREATE TABLE test_alter_r2 (x Date, s String) ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/alter', 'r2') ORDER BY s PARTITION BY x; -ALTER TABLE test_alter_r1 MODIFY COLUMN s DEFAULT 'Hello'; -ALTER TABLE test_alter_r2 MODIFY COLUMN x DEFAULT '2000-01-01'; - -SYSTEM SYNC REPLICA test_alter_r1; -SYSTEM SYNC REPLICA test_alter_r2; +ALTER TABLE test_alter_r1 MODIFY COLUMN s DEFAULT 'Hello' SETTINGS replication_alter_partitions_sync = 2; +ALTER TABLE test_alter_r2 MODIFY COLUMN x DEFAULT '2000-01-01' SETTINGS replication_alter_partitions_sync = 2; DESCRIBE TABLE test_alter_r1; DESCRIBE TABLE test_alter_r2; diff --git a/tests/queries/0_stateless/01307_orc_output_format.reference b/tests/queries/0_stateless/01307_orc_output_format.reference new file mode 100644 index 00000000000..da719072eb2 --- /dev/null +++ b/tests/queries/0_stateless/01307_orc_output_format.reference @@ -0,0 +1,6 @@ +255 65535 4294967295 100000000000 -128 -32768 -2147483648 -100000000000 2.02 10000.0000001 String 2020 2021-12-19 2021-12-19 03:00:00 1.0001 1.0000000100 100000.00000000000001000000 1 +4 1234 3244467295 500000000000 -1 -256 -14741221 -7000000000 100.1 14321.032141201 Another string 2000 2024-10-04 2028-04-21 01:20:00 34.1234 123123.1231231230 123123123.12312312312312300000 \N +42 42 42 42 42 42 42 42 42.42 42.42 42 4242 1970-02-12 1970-01-01 03:00:42 42.4200 42.4242424200 424242.42424242424242000000 42 +255 65535 4294967295 100000000000 -128 -32768 -2147483648 -100000000000 2.02 10000.0000001 String 2020 2021-12-19 2021-12-19 03:00:00 1.0001 1.0000000100 100000.00000000000001000000 1 +4 1234 3244467295 500000000000 -1 -256 -14741221 -7000000000 100.1 14321.032141201 Another string 2000 2024-10-04 2028-04-21 01:20:00 34.1234 123123.1231231230 123123123.12312312312312300000 \N +42 42 42 42 42 42 42 42 42.42 42.42 42 4242 1970-02-12 1970-01-01 03:00:42 42.4200 42.4242424200 424242.42424242424242000000 42 diff --git a/tests/queries/0_stateless/01307_orc_output_format.sh b/tests/queries/0_stateless/01307_orc_output_format.sh new file mode 100755 index 00000000000..c46131dcff6 --- /dev/null +++ b/tests/queries/0_stateless/01307_orc_output_format.sh @@ -0,0 +1,20 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. $CURDIR/../shell_config.sh + +$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS orc"; + +$CLICKHOUSE_CLIENT --query="CREATE TABLE orc (uint8 UInt8, uint16 UInt16, uint32 UInt32, uint64 UInt64, int8 Int8, int16 Int16, int32 Int32, int64 Int64, float Float32, double Float64, string String, fixed FixedString(4), date Date, datetime DateTime, decimal32 Decimal32(4), decimal64 Decimal64(10), decimal128 Decimal128(20), nullable Nullable(Int32)) ENGINE = Memory"; + +$CLICKHOUSE_CLIENT --query="INSERT INTO orc VALUES (255, 65535, 4294967295, 100000000000, -128, -32768, -2147483648, -100000000000, 2.02, 10000.0000001, 'String', '2020', 18980, 1639872000, 1.0001, 1.00000001, 100000.00000000000001, 1), (4, 1234, 3244467295, 500000000000, -1, -256, -14741221, -7000000000, 100.1, 14321.032141201, 'Another string', '2000', 20000, 1839882000, 34.1234, 123123.123123123, 123123123.123123123123123, NULL), (42, 42, 42, 42, 42, 42, 42, 42, 42.42, 42.42, '42', '4242', 42, 42, 42.42, 42.42424242, 424242.42424242424242, 42)"; + +$CLICKHOUSE_CLIENT --query="SELECT * FROM orc FORMAT ORC" > $CURDIR/tmp_orc_test_all_types.orc; + +cat $CURDIR/tmp_orc_test_all_types.orc | $CLICKHOUSE_CLIENT --query="INSERT INTO orc FORMAT ORC"; + +rm $CURDIR/tmp_orc_test_all_types.orc + +$CLICKHOUSE_CLIENT --query="SELECT * FROM orc"; + +$CLICKHOUSE_CLIENT --query="DROP TABLE orc"; diff --git a/tests/queries/0_stateless/01308_orc_output_format_arrays.reference b/tests/queries/0_stateless/01308_orc_output_format_arrays.reference new file mode 100644 index 00000000000..1f9646ac112 Binary files /dev/null and b/tests/queries/0_stateless/01308_orc_output_format_arrays.reference differ diff --git a/tests/queries/0_stateless/01308_orc_output_format_arrays.sh b/tests/queries/0_stateless/01308_orc_output_format_arrays.sh new file mode 100755 index 00000000000..8e36cf604ea --- /dev/null +++ b/tests/queries/0_stateless/01308_orc_output_format_arrays.sh @@ -0,0 +1,15 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. $CURDIR/../shell_config.sh + +$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS orc"; + +$CLICKHOUSE_CLIENT --query="CREATE TABLE orc (array1 Array(Int32), array2 Array(Array(Int32))) ENGINE = Memory"; + +$CLICKHOUSE_CLIENT --query="INSERT INTO orc VALUES ([1,2,3,4,5], [[1,2], [3,4], [5]]), ([42], [[42, 42], [42]])"; + +$CLICKHOUSE_CLIENT --query="SELECT * FROM orc FORMAT ORC"; + +$CLICKHOUSE_CLIENT --query="DROP TABLE orc"; + diff --git a/tests/queries/0_stateless/01323_if_with_nulls.reference b/tests/queries/0_stateless/01323_if_with_nulls.reference index 6bf2d206e0b..80697f97f5c 100644 --- a/tests/queries/0_stateless/01323_if_with_nulls.reference +++ b/tests/queries/0_stateless/01323_if_with_nulls.reference @@ -18,3 +18,7 @@ Nullable(UInt8) \N 1 ok ok ok Nullable(UInt8) \N 1 ok ok ok Nullable(UInt8) \N 1 ok ok ok \N 1 Nullable(Int8) \N ok +\N Nullable(Float64) 0 +\N Nullable(Float64) 0 +1 +1 diff --git a/tests/queries/0_stateless/01323_if_with_nulls.sql b/tests/queries/0_stateless/01323_if_with_nulls.sql index f2cd943988f..6a4df79d765 100644 --- a/tests/queries/0_stateless/01323_if_with_nulls.sql +++ b/tests/queries/0_stateless/01323_if_with_nulls.sql @@ -36,3 +36,20 @@ SELECT b_num, isNull(b_num), toTypeName(b_num), b_num = 0, if(b_num = 0, 'fail', FROM (SELECT 1 k, toInt8(1) a_num) AS x LEFT JOIN (SELECT 2 k, toInt8(1) b_num) AS y USING (k); + +-- test case from https://github.com/ClickHouse/ClickHouse/issues/7347 +DROP TABLE IF EXISTS test_nullable_float_issue7347; +CREATE TABLE test_nullable_float_issue7347 (ne UInt64,test Nullable(Float64)) ENGINE = MergeTree() PRIMARY KEY (ne) ORDER BY (ne); +INSERT INTO test_nullable_float_issue7347 VALUES (1,NULL); + +SELECT test, toTypeName(test), IF(test = 0, 1, 0) FROM test_nullable_float_issue7347; + +WITH materialize(CAST(NULL, 'Nullable(Float64)')) AS test SELECT test, toTypeName(test), IF(test = 0, 1, 0); + +DROP TABLE test_nullable_float_issue7347; + +-- test case from https://github.com/ClickHouse/ClickHouse/issues/10846 + +SELECT if(isFinite(toUInt64OrZero(toNullable('123'))), 1, 0); + +SELECT if(materialize(isFinite(toUInt64OrZero(toNullable('123')))), 1, 0); diff --git a/tests/queries/0_stateless/01351_parse_date_time_best_effort_us.reference b/tests/queries/0_stateless/01351_parse_date_time_best_effort_us.reference new file mode 100644 index 00000000000..22a778b715a --- /dev/null +++ b/tests/queries/0_stateless/01351_parse_date_time_best_effort_us.reference @@ -0,0 +1,14 @@ + s a + + 1970/01/02 010203Z 1970-01-02 01:02:03 + 01-02-2001 UTC 2001-01-02 00:00:00 + 10.23.1990 1990-10-23 00:00:00 + 01-02-2017 03:04:05+1 2017-01-02 02:04:05 + 01/02/2017 03:04:05+300 2017-01-02 00:04:05 + 01.02.2017 03:04:05GMT 2017-01-02 03:04:05 + 01-02-2017 03:04:05 MSD 2017-01-01 23:04:05 + 01.02.17 03:04:05 MSD Feb 2017-02-01 23:04:05 + 01/02/2017 03:04:05 MSK 2017-01-02 00:04:05 + 12/13/2019 2019-12-13 00:00:00 + 13/12/2019 2019-12-13 00:00:00 + 03/04/2019 2019-03-04 00:00:00 diff --git a/tests/queries/0_stateless/01351_parse_date_time_best_effort_us.sql b/tests/queries/0_stateless/01351_parse_date_time_best_effort_us.sql new file mode 100644 index 00000000000..0c33b2de49f --- /dev/null +++ b/tests/queries/0_stateless/01351_parse_date_time_best_effort_us.sql @@ -0,0 +1,20 @@ +SELECT + s, + parseDateTimeBestEffortUS(s, 'UTC') AS a +FROM +( + SELECT arrayJoin([ +'1970/01/02 010203Z', +'01-02-2001 UTC', +'10.23.1990', +'01-02-2017 03:04:05+1', +'01/02/2017 03:04:05+300', +'01.02.2017 03:04:05GMT', +'01-02-2017 03:04:05 MSD', +'01.02.17 03:04:05 MSD Feb', +'01/02/2017 03:04:05 MSK', +'12/13/2019', +'13/12/2019', +'03/04/2019' +]) AS s) +FORMAT PrettySpaceNoEscapes; diff --git a/tests/queries/0_stateless/01353_low_cardinality_join_types.reference b/tests/queries/0_stateless/01353_low_cardinality_join_types.reference new file mode 100644 index 00000000000..85d3f3d598b --- /dev/null +++ b/tests/queries/0_stateless/01353_low_cardinality_join_types.reference @@ -0,0 +1,36 @@ +- +LowCardinality(UInt64) UInt64 String LowCardinality(String) +- +UInt64 LowCardinality(UInt64) LowCardinality(String) String +- +LowCardinality(UInt64) LowCardinality(UInt64) LowCardinality(String) LowCardinality(String) +- +LowCardinality(UInt64) UInt64 String LowCardinality(String) +LowCardinality(UInt64) UInt64 String LowCardinality(String) +LowCardinality(UInt64) UInt64 String LowCardinality(String) +- +UInt64 LowCardinality(UInt64) LowCardinality(String) String +UInt64 LowCardinality(UInt64) LowCardinality(String) String +UInt64 LowCardinality(UInt64) LowCardinality(String) String +- +LowCardinality(UInt64) LowCardinality(UInt64) LowCardinality(String) LowCardinality(String) +LowCardinality(UInt64) LowCardinality(UInt64) LowCardinality(String) LowCardinality(String) +LowCardinality(UInt64) LowCardinality(UInt64) LowCardinality(String) LowCardinality(String) +- +LowCardinality(UInt64) UInt64 String LowCardinality(String) +- +UInt64 LowCardinality(UInt64) LowCardinality(String) String +- +LowCardinality(UInt64) LowCardinality(UInt64) LowCardinality(String) LowCardinality(String) +- +LowCardinality(UInt64) UInt64 String LowCardinality(String) +LowCardinality(UInt64) UInt64 String LowCardinality(String) +LowCardinality(UInt64) UInt64 String LowCardinality(String) +- +UInt64 LowCardinality(UInt64) LowCardinality(String) String +UInt64 LowCardinality(UInt64) LowCardinality(String) String +UInt64 LowCardinality(UInt64) LowCardinality(String) String +- +LowCardinality(UInt64) LowCardinality(UInt64) LowCardinality(String) LowCardinality(String) +LowCardinality(UInt64) LowCardinality(UInt64) LowCardinality(String) LowCardinality(String) +LowCardinality(UInt64) LowCardinality(UInt64) LowCardinality(String) LowCardinality(String) diff --git a/tests/queries/0_stateless/01353_low_cardinality_join_types.sql b/tests/queries/0_stateless/01353_low_cardinality_join_types.sql new file mode 100644 index 00000000000..91ebe97fa48 --- /dev/null +++ b/tests/queries/0_stateless/01353_low_cardinality_join_types.sql @@ -0,0 +1,75 @@ +set join_algorithm = 'hash'; + +select '-'; +select toTypeName(materialize(js1.k)), toTypeName(materialize(js2.k)), toTypeName(materialize(js1.s)), toTypeName(materialize(js2.s)) +from (select toLowCardinality(number) k, toString(number) s from numbers(2)) as js1 +join (select number+1 k, toLowCardinality(toString(number+1)) s from numbers(2)) as js2 +using k order by js1.k, js2.k; + +select '-'; +select toTypeName(materialize(js1.k)), toTypeName(materialize(js2.k)), toTypeName(materialize(js1.s)), toTypeName(materialize(js2.s)) +from (select number k, toLowCardinality(toString(number)) s from numbers(2)) as js1 +join (select toLowCardinality(number+1) k, toString(number+1) s from numbers(2)) as js2 +using k order by js1.k, js2.k; + +select '-'; +select toTypeName(materialize(js1.k)), toTypeName(materialize(js2.k)), toTypeName(materialize(js1.s)), toTypeName(materialize(js2.s)) +from (select toLowCardinality(number) k, toLowCardinality(toString(number)) s from numbers(2)) as js1 +join (select toLowCardinality(number+1) k, toLowCardinality(toString(number+1)) s from numbers(2)) as js2 +using k order by js1.k, js2.k; + +select '-'; +select toTypeName(materialize(js1.k)), toTypeName(materialize(js2.k)), toTypeName(materialize(js1.s)), toTypeName(materialize(js2.s)) +from (select toLowCardinality(number) k, toString(number) s from numbers(2)) as js1 +full join (select number+1 k, toLowCardinality(toString(number+1)) s from numbers(2)) as js2 +using k order by js1.k, js2.k; + +select '-'; +select toTypeName(materialize(js1.k)), toTypeName(materialize(js2.k)), toTypeName(materialize(js1.s)), toTypeName(materialize(js2.s)) +from (select number k, toLowCardinality(toString(number)) s from numbers(2)) as js1 +full join (select toLowCardinality(number+1) k, toString(number+1) s from numbers(2)) as js2 +using k order by js1.k, js2.k; + +select '-'; +select toTypeName(materialize(js1.k)), toTypeName(materialize(js2.k)), toTypeName(materialize(js1.s)), toTypeName(materialize(js2.s)) +from (select toLowCardinality(number) k, toLowCardinality(toString(number)) s from numbers(2)) as js1 +full join (select toLowCardinality(number+1) k, toLowCardinality(toString(number+1)) s from numbers(2)) as js2 +using k order by js1.k, js2.k; + +set join_algorithm = 'prefer_partial_merge'; + +select '-'; +select toTypeName(materialize(js1.k)), toTypeName(materialize(js2.k)), toTypeName(materialize(js1.s)), toTypeName(materialize(js2.s)) +from (select toLowCardinality(number) k, toString(number) s from numbers(2)) as js1 +join (select number+1 k, toLowCardinality(toString(number+1)) s from numbers(2)) as js2 +using k order by js1.k, js2.k; + +select '-'; +select toTypeName(materialize(js1.k)), toTypeName(materialize(js2.k)), toTypeName(materialize(js1.s)), toTypeName(materialize(js2.s)) +from (select number k, toLowCardinality(toString(number)) s from numbers(2)) as js1 +join (select toLowCardinality(number+1) k, toString(number+1) s from numbers(2)) as js2 +using k order by js1.k, js2.k; + +select '-'; +select toTypeName(materialize(js1.k)), toTypeName(materialize(js2.k)), toTypeName(materialize(js1.s)), toTypeName(materialize(js2.s)) +from (select toLowCardinality(number) k, toLowCardinality(toString(number)) s from numbers(2)) as js1 +join (select toLowCardinality(number+1) k, toLowCardinality(toString(number+1)) s from numbers(2)) as js2 +using k order by js1.k, js2.k; + +select '-'; +select toTypeName(materialize(js1.k)), toTypeName(materialize(js2.k)), toTypeName(materialize(js1.s)), toTypeName(materialize(js2.s)) +from (select toLowCardinality(number) k, toString(number) s from numbers(2)) as js1 +full join (select number+1 k, toLowCardinality(toString(number+1)) s from numbers(2)) as js2 +using k order by js1.k, js2.k; + +select '-'; +select toTypeName(materialize(js1.k)), toTypeName(materialize(js2.k)), toTypeName(materialize(js1.s)), toTypeName(materialize(js2.s)) +from (select number k, toLowCardinality(toString(number)) s from numbers(2)) as js1 +full join (select toLowCardinality(number+1) k, toString(number+1) s from numbers(2)) as js2 +using k order by js1.k, js2.k; + +select '-'; +select toTypeName(materialize(js1.k)), toTypeName(materialize(js2.k)), toTypeName(materialize(js1.s)), toTypeName(materialize(js2.s)) +from (select toLowCardinality(number) k, toLowCardinality(toString(number)) s from numbers(2)) as js1 +full join (select toLowCardinality(number+1) k, toLowCardinality(toString(number+1)) s from numbers(2)) as js2 +using k order by js1.k, js2.k; diff --git a/tests/queries/0_stateless/01353_nullable_tuple.reference b/tests/queries/0_stateless/01353_nullable_tuple.reference new file mode 100644 index 00000000000..b7939182780 --- /dev/null +++ b/tests/queries/0_stateless/01353_nullable_tuple.reference @@ -0,0 +1,92 @@ +single argument +1 +0 +1 +0 +1 +0 +- 1 +1 +1 +1 +0 +0 +0 +0 +0 +0 +1 +1 +1 +- 2 +1 +1 +1 +0 +0 +0 +0 +0 +1 +1 +1 +1 +- 3 +1 +1 +1 +1 +1 +1 +- 4 +\N +\N +\N +\N +\N +\N +two arguments +1 +1 +1 +1 +1 +1 +- 1 +0 +0 +0 +0 +0 +0 +- 2 +1 +1 +1 +1 +1 +1 +- 3 +\N +\N +\N +\N +\N +1 +\N +\N +0 +many arguments +1 +1 +0 +0 +1 +0 +1 +\N +\N +\N +\N +\N +\N diff --git a/tests/queries/0_stateless/01353_nullable_tuple.sql b/tests/queries/0_stateless/01353_nullable_tuple.sql new file mode 100644 index 00000000000..f757e2c42d7 --- /dev/null +++ b/tests/queries/0_stateless/01353_nullable_tuple.sql @@ -0,0 +1,107 @@ +select 'single argument'; +select tuple(number) = tuple(number) from numbers(1); +select tuple(number) = tuple(number + 1) from numbers(1); +select tuple(toNullable(number)) = tuple(number) from numbers(1); +select tuple(toNullable(number)) = tuple(number + 1) from numbers(1); +select tuple(toNullable(number)) = tuple(toNullable(number)) from numbers(1); +select tuple(toNullable(number)) = tuple(toNullable(number + 1)) from numbers(1); +select '- 1'; +select tuple(toNullable(number)) < tuple(number + 1) from numbers(1); +select tuple(number) < tuple(toNullable(number + 1)) from numbers(1); +select tuple(toNullable(number)) < tuple(toNullable(number + 1)) from numbers(1); + +select tuple(toNullable(number)) > tuple(number + 1) from numbers(1); +select tuple(number) > tuple(toNullable(number + 1)) from numbers(1); +select tuple(toNullable(number)) > tuple(toNullable(number + 1)) from numbers(1); + +select tuple(toNullable(number + 1)) < tuple(number) from numbers(1); +select tuple(number + 1) < tuple(toNullable(number)) from numbers(1); +select tuple(toNullable(number + 1)) < tuple(toNullable(number + 1)) from numbers(1); + +select tuple(toNullable(number + 1)) > tuple(number) from numbers(1); +select tuple(number + 1) > tuple(toNullable(number)) from numbers(1); +select tuple(toNullable(number + 1)) > tuple(toNullable(number)) from numbers(1); + +select '- 2'; +select tuple(toNullable(number)) <= tuple(number + 1) from numbers(1); +select tuple(number) <= tuple(toNullable(number + 1)) from numbers(1); +select tuple(toNullable(number)) <= tuple(toNullable(number + 1)) from numbers(1); + +select tuple(toNullable(number)) >= tuple(number + 1) from numbers(1); +select tuple(number) > tuple(toNullable(number + 1)) from numbers(1); +select tuple(toNullable(number)) >= tuple(toNullable(number + 1)) from numbers(1); + +select tuple(toNullable(number + 1)) <= tuple(number) from numbers(1); +select tuple(number + 1) <= tuple(toNullable(number)) from numbers(1); +select tuple(toNullable(number + 1)) <= tuple(toNullable(number + 1)) from numbers(1); + +select tuple(toNullable(number + 1)) >= tuple(number) from numbers(1); +select tuple(number + 1) >= tuple(toNullable(number)) from numbers(1); +select tuple(toNullable(number + 1)) >= tuple(toNullable(number)) from numbers(1); + +select '- 3'; +select tuple(toNullable(number)) <= tuple(number) from numbers(1); +select tuple(number) <= tuple(toNullable(number)) from numbers(1); +select tuple(toNullable(number)) <= tuple(toNullable(number)) from numbers(1); + +select tuple(toNullable(number)) >= tuple(number) from numbers(1); +select tuple(number) >= tuple(toNullable(number)) from numbers(1); +select tuple(toNullable(number)) >= tuple(toNullable(number)) from numbers(1); + +select '- 4'; +select tuple(number) = tuple(materialize(toUInt64OrNull(''))) from numbers(1); +select tuple(materialize(toUInt64OrNull(''))) = tuple(materialize(toUInt64OrNull(''))) from numbers(1); +select tuple(number) <= tuple(materialize(toUInt64OrNull(''))) from numbers(1); +select tuple(materialize(toUInt64OrNull(''))) <= tuple(materialize(toUInt64OrNull(''))) from numbers(1); +select tuple(number) >= tuple(materialize(toUInt64OrNull(''))) from numbers(1); +select tuple(materialize(toUInt64OrNull(''))) >= tuple(materialize(toUInt64OrNull(''))) from numbers(1); + +select 'two arguments'; +select tuple(toNullable(number), number) = tuple(number, number) from numbers(1); +select tuple(toNullable(number), toNullable(number)) = tuple(number, number) from numbers(1); +select tuple(toNullable(number), toNullable(number)) = tuple(toNullable(number), number) from numbers(1); +select tuple(toNullable(number), toNullable(number)) = tuple(toNullable(number), toNullable(number)) from numbers(1); +select tuple(number, toNullable(number)) = tuple(toNullable(number), toNullable(number)) from numbers(1); +select tuple(number, toNullable(number)) = tuple(toNullable(number), number) from numbers(1); + +select '- 1'; +select tuple(toNullable(number), number) < tuple(number, number) from numbers(1); +select tuple(toNullable(number), toNullable(number)) < tuple(number, number) from numbers(1); +select tuple(toNullable(number), toNullable(number)) < tuple(toNullable(number), number) from numbers(1); +select tuple(toNullable(number), toNullable(number)) < tuple(toNullable(number), toNullable(number)) from numbers(1); +select tuple(number, toNullable(number)) < tuple(toNullable(number), toNullable(number)) from numbers(1); +select tuple(number, toNullable(number)) < tuple(toNullable(number), number) from numbers(1); + +select '- 2'; +select tuple(toNullable(number), number) < tuple(number, number + 1) from numbers(1); +select tuple(toNullable(number), toNullable(number)) < tuple(number, number + 1) from numbers(1); +select tuple(toNullable(number), toNullable(number)) < tuple(toNullable(number + 1), number) from numbers(1); +select tuple(toNullable(number), toNullable(number)) < tuple(toNullable(number + 1), toNullable(number)) from numbers(1); +select tuple(number, toNullable(number)) < tuple(toNullable(number), toNullable(number + 1)) from numbers(1); +select tuple(number, toNullable(number)) < tuple(toNullable(number), number + 1) from numbers(1); + +select '- 3'; +select tuple(materialize(toUInt64OrNull('')), number) = tuple(number, number) from numbers(1); +select tuple(materialize(toUInt64OrNull('')), number) = tuple(number, toUInt64OrNull('')) from numbers(1); +select tuple(materialize(toUInt64OrNull('')), toUInt64OrNull('')) = tuple(toUInt64OrNull(''), toUInt64OrNull('')) from numbers(1); +select tuple(number, materialize(toUInt64OrNull(''))) < tuple(number, number) from numbers(1); +select tuple(number, materialize(toUInt64OrNull(''))) <= tuple(number, number) from numbers(1); +select tuple(number, materialize(toUInt64OrNull(''))) < tuple(number + 1, number) from numbers(1); +select tuple(number, materialize(toUInt64OrNull(''))) > tuple(number, number) from numbers(1); +select tuple(number, materialize(toUInt64OrNull(''))) >= tuple(number, number) from numbers(1); +select tuple(number, materialize(toUInt64OrNull(''))) > tuple(number + 1, number) from numbers(1); + +select 'many arguments'; +select tuple(toNullable(number), number, number) = tuple(number, number, number) from numbers(1); +select tuple(toNullable(number), materialize('a'), number) = tuple(number, materialize('a'), number) from numbers(1); +select tuple(toNullable(number), materialize('a'), number) = tuple(number, materialize('a'), number + 1) from numbers(1); +select tuple(toNullable(number), number, number) < tuple(number, number, number) from numbers(1); +select tuple(toNullable(number), number, number) <= tuple(number, number, number) from numbers(1); +select tuple(toNullable(number), materialize('a'), number) < tuple(number, materialize('a'), number) from numbers(1); +select tuple(toNullable(number), materialize('a'), number) < tuple(number, materialize('a'), number + 1) from numbers(1); +select tuple(toNullable(number), number, materialize(toUInt64OrNull(''))) = tuple(number, number, number) from numbers(1); +select tuple(toNullable(number), materialize('a'), materialize(toUInt64OrNull(''))) = tuple(number, materialize('a'), number) from numbers(1); +select tuple(toNullable(number), materialize('a'), materialize(toUInt64OrNull(''))) = tuple(number, materialize('a'), number + 1) from numbers(1); +select tuple(toNullable(number), number, materialize(toUInt64OrNull(''))) <= tuple(number, number, number) from numbers(1); +select tuple(toNullable(number), materialize('a'), materialize(toUInt64OrNull(''))) <= tuple(number, materialize('a'), number) from numbers(1); +select tuple(toNullable(number), materialize('a'), materialize(toUInt64OrNull(''))) <= tuple(number, materialize('a'), number + 1) from numbers(1); diff --git a/tests/queries/0_stateless/01353_topk_enum.reference b/tests/queries/0_stateless/01353_topk_enum.reference new file mode 100644 index 00000000000..d650850c434 --- /dev/null +++ b/tests/queries/0_stateless/01353_topk_enum.reference @@ -0,0 +1 @@ +['test','world','hello',''] diff --git a/tests/queries/0_stateless/01353_topk_enum.sql b/tests/queries/0_stateless/01353_topk_enum.sql new file mode 100644 index 00000000000..ba048401b23 --- /dev/null +++ b/tests/queries/0_stateless/01353_topk_enum.sql @@ -0,0 +1 @@ +WITH CAST(round(sqrt(number)) % 4 AS Enum('' = 0, 'hello' = 1, 'world' = 2, 'test' = 3)) AS x SELECT topK(10)(x) FROM numbers(1000); diff --git a/tests/queries/0_stateless/01354_order_by_tuple_collate_const.reference b/tests/queries/0_stateless/01354_order_by_tuple_collate_const.reference new file mode 100644 index 00000000000..0359bf9ccb2 --- /dev/null +++ b/tests/queries/0_stateless/01354_order_by_tuple_collate_const.reference @@ -0,0 +1,66 @@ +0 +0 +1 +1 +10 +10 +2 +2 +3 +3 +4 +4 +5 +5 +6 +6 +7 +7 +8 +8 +9 +9 +0 +1 +10 +2 +3 +4 +5 +6 +7 +8 +9 +0 +1 +10 +2 +3 +4 +5 +6 +7 +8 +9 +0 +1 +10 +2 +3 +4 +5 +6 +7 +8 +9 +0 +1 +10 +2 +3 +4 +5 +6 +7 +8 +9 diff --git a/tests/queries/0_stateless/01354_order_by_tuple_collate_const.sql b/tests/queries/0_stateless/01354_order_by_tuple_collate_const.sql new file mode 100644 index 00000000000..d9b596f3ec3 --- /dev/null +++ b/tests/queries/0_stateless/01354_order_by_tuple_collate_const.sql @@ -0,0 +1 @@ +SELECT number FROM numbers(11) ORDER BY arrayJoin(['а', 'я', '\0�', '', 'Я', '']) ASC, toString(number) ASC, 'y' ASC COLLATE 'el'; diff --git a/tests/queries/0_stateless/01354_tuple_low_cardinality_array_mapped_bug.reference b/tests/queries/0_stateless/01354_tuple_low_cardinality_array_mapped_bug.reference new file mode 100644 index 00000000000..aa47d0d46d4 --- /dev/null +++ b/tests/queries/0_stateless/01354_tuple_low_cardinality_array_mapped_bug.reference @@ -0,0 +1,2 @@ +0 +0 diff --git a/tests/queries/0_stateless/01354_tuple_low_cardinality_array_mapped_bug.sql b/tests/queries/0_stateless/01354_tuple_low_cardinality_array_mapped_bug.sql new file mode 100644 index 00000000000..80a1a7c46eb --- /dev/null +++ b/tests/queries/0_stateless/01354_tuple_low_cardinality_array_mapped_bug.sql @@ -0,0 +1,8 @@ +SELECT arrayExists(x -> ((x.1) = 'pattern'), cast([tuple('a', 1)] as Array(Tuple(LowCardinality(String), UInt8)))); + +DROP TABLE IF EXISTS table; +CREATE TABLE table (id Int32, values Array(Tuple(LowCardinality(String), Int32)), date Date) ENGINE MergeTree() PARTITION BY toYYYYMM(date) ORDER BY (id, date); + +SELECT count(*) FROM table WHERE (arrayExists(x -> ((x.1) = toLowCardinality('pattern')), values) = 1); + +DROP TABLE IF EXISTS table; diff --git a/tests/queries/0_stateless/01355_CSV_input_format_allow_errors.reference b/tests/queries/0_stateless/01355_CSV_input_format_allow_errors.reference new file mode 100644 index 00000000000..5d20150fdc1 --- /dev/null +++ b/tests/queries/0_stateless/01355_CSV_input_format_allow_errors.reference @@ -0,0 +1,40 @@ +File generated: +0,0 +1,0 +2,0 +3,0 +4,AAAAAAA +5,0 +6,AAAAAAA +****************** +attempt to parse w/o flags +Return code: 117 +OK: stderr contains a message 'is not like Int64' +****************** +attempt to parse with input_format_allow_errors_ratio=0.1 +Return code: 117 +OK: stderr contains a message 'Already have 1 errors out of 5 rows, which is 0.2' +****************** +attempt to parse with input_format_allow_errors_ratio=0.3 +0 0 +1 0 +2 0 +3 0 +4 0 +5 0 +6 0 +Return code: 0 +****************** +attempt to parse with input_format_allow_errors_num=1 +Return code: 117 +OK: stderr contains a message 'Already have 2 errors out of 7 rows' +****************** +attempt to parse with input_format_allow_errors_num=2 +0 0 +1 0 +2 0 +3 0 +4 0 +5 0 +6 0 +Return code: 0 diff --git a/tests/queries/0_stateless/01355_CSV_input_format_allow_errors.sh b/tests/queries/0_stateless/01355_CSV_input_format_allow_errors.sh new file mode 100755 index 00000000000..0bd575fad3f --- /dev/null +++ b/tests/queries/0_stateless/01355_CSV_input_format_allow_errors.sh @@ -0,0 +1,45 @@ +#!/usr/bin/env bash +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. "$CURDIR/../shell_config.sh" + +SAMPLE_FILE="$CURDIR/01355_sample_data.csv" +STD_ERROR_CAPTURED="$CURDIR/01355_std_error_captured.log" + +echo 'File generated:' +${CLICKHOUSE_LOCAL} -q "SELECT number, if(number in (4,6), 'AAAAAAA', '0') from numbers(7) FORMAT TSV" | tr '\t' ',' >"$SAMPLE_FILE" +cat "$SAMPLE_FILE" + +echo '******************' +echo 'attempt to parse w/o flags' +cat "$SAMPLE_FILE" | clickhouse-local --input-format=CSV --structure='num1 Int64, num2 Int64' --query='SELECT * from table' 2>"$STD_ERROR_CAPTURED" +echo "Return code: $?" +expected_error_message='is not like Int64' +cat "$STD_ERROR_CAPTURED" | grep -q "$expected_error_message" && echo "OK: stderr contains a message '$expected_error_message'" || echo "FAILED: Error message is wrong" + +echo '******************' +echo 'attempt to parse with input_format_allow_errors_ratio=0.1' +cat "$SAMPLE_FILE" | clickhouse-local --input-format=CSV --structure='num1 Int64, num2 Int64' --query='SELECT * from table' --input_format_allow_errors_ratio=0.1 2>"$STD_ERROR_CAPTURED" +echo "Return code: $?" +expected_error_message='Already have 1 errors out of 5 rows, which is 0.2' +cat "$STD_ERROR_CAPTURED" | grep -q "$expected_error_message" && echo "OK: stderr contains a message '$expected_error_message'" || echo "FAILED: Error message is wrong" + +echo '******************' +echo 'attempt to parse with input_format_allow_errors_ratio=0.3' +cat "$SAMPLE_FILE" | clickhouse-local --input-format=CSV --structure='num1 Int64, num2 Int64' --query='SELECT * from table' --input_format_allow_errors_ratio=0.3 2>"$STD_ERROR_CAPTURED" +echo "Return code: $?" +cat "$STD_ERROR_CAPTURED" + +echo '******************' +echo 'attempt to parse with input_format_allow_errors_num=1' +cat "$SAMPLE_FILE" | clickhouse-local --input-format=CSV --structure='num1 Int64, num2 Int64' --query='SELECT * from table' --input_format_allow_errors_num=1 2>"$STD_ERROR_CAPTURED" +echo "Return code: $?" +expected_error_message='Already have 2 errors out of 7 rows' +cat "$STD_ERROR_CAPTURED" | grep -q "$expected_error_message" && echo "OK: stderr contains a message '$expected_error_message'" || echo "FAILED: Error message is wrong" + +echo '******************' +echo 'attempt to parse with input_format_allow_errors_num=2' +cat "$SAMPLE_FILE" | clickhouse-local --input-format=CSV --structure='num1 Int64, num2 Int64' --query='SELECT * from table' --input_format_allow_errors_num=2 2>"$STD_ERROR_CAPTURED" +echo "Return code: $?" +cat "$STD_ERROR_CAPTURED" + +rm "$STD_ERROR_CAPTURED" "$SAMPLE_FILE" diff --git a/tests/queries/0_stateless/01355_alter_column_with_order.reference b/tests/queries/0_stateless/01355_alter_column_with_order.reference new file mode 100644 index 00000000000..247795a13c8 --- /dev/null +++ b/tests/queries/0_stateless/01355_alter_column_with_order.reference @@ -0,0 +1,40 @@ +Added1 UInt32 +CounterID UInt32 +StartDate Date +UserID UInt32 +VisitID UInt32 +NestedColumn.A Array(UInt8) +NestedColumn.S Array(String) +Added2 UInt32 +ToDrop UInt32 +Added3 UInt32 +Added1 UInt32 +CounterID UInt32 +StartDate Date +UserID UInt32 +VisitID UInt32 +NestedColumn.A Array(UInt8) +NestedColumn.S Array(String) +Added2 UInt32 +ToDrop UInt32 +Added3 UInt32 +Added2 UInt32 +Added1 UInt32 +CounterID UInt32 +Added3 UInt32 +StartDate Date +UserID UInt32 +VisitID UInt32 +NestedColumn.A Array(UInt8) +NestedColumn.S Array(String) +ToDrop UInt32 +Added2 UInt32 +Added1 UInt32 +CounterID UInt32 +Added3 UInt32 +StartDate Date +UserID UInt32 +VisitID UInt32 +NestedColumn.A Array(UInt8) +NestedColumn.S Array(String) +ToDrop UInt32 diff --git a/tests/queries/0_stateless/01355_alter_column_with_order.sql b/tests/queries/0_stateless/01355_alter_column_with_order.sql new file mode 100644 index 00000000000..b3ae8f43042 --- /dev/null +++ b/tests/queries/0_stateless/01355_alter_column_with_order.sql @@ -0,0 +1,25 @@ +DROP TABLE IF EXISTS alter_test; + +CREATE TABLE alter_test (CounterID UInt32, StartDate Date, UserID UInt32, VisitID UInt32, NestedColumn Nested(A UInt8, S String), ToDrop UInt32) ENGINE = MergeTree(StartDate, intHash32(UserID), (CounterID, StartDate, intHash32(UserID), VisitID), 8192); + +ALTER TABLE alter_test ADD COLUMN Added1 UInt32 FIRST; + +ALTER TABLE alter_test ADD COLUMN Added2 UInt32 AFTER NestedColumn; + +ALTER TABLE alter_test ADD COLUMN Added3 UInt32 AFTER ToDrop; + +DESC alter_test; +DETACH TABLE alter_test; +ATTACH TABLE alter_test; +DESC alter_test; + +ALTER TABLE alter_test MODIFY COLUMN Added2 UInt32 FIRST; + +ALTER TABLE alter_test MODIFY COLUMN Added3 UInt32 AFTER CounterID; + +DESC alter_test; +DETACH TABLE alter_test; +ATTACH TABLE alter_test; +DESC alter_test; + +DROP TABLE IF EXISTS alter_test; diff --git a/tests/queries/0_stateless/01355_defaultValueOfArgumentType_bug.reference b/tests/queries/0_stateless/01355_defaultValueOfArgumentType_bug.reference new file mode 100644 index 00000000000..4165503c4b5 --- /dev/null +++ b/tests/queries/0_stateless/01355_defaultValueOfArgumentType_bug.reference @@ -0,0 +1 @@ + LowCardinality(String) diff --git a/tests/queries/0_stateless/01355_defaultValueOfArgumentType_bug.sql b/tests/queries/0_stateless/01355_defaultValueOfArgumentType_bug.sql new file mode 100644 index 00000000000..e3168eb09a0 --- /dev/null +++ b/tests/queries/0_stateless/01355_defaultValueOfArgumentType_bug.sql @@ -0,0 +1,4 @@ +SELECT + materialize(toLowCardinality('')) AS lc, + toTypeName(lc) +WHERE lc = defaultValueOfArgumentType(lc) diff --git a/tests/queries/0_stateless/01355_if_fixed_string.reference b/tests/queries/0_stateless/01355_if_fixed_string.reference new file mode 100644 index 00000000000..43c8af518b4 --- /dev/null +++ b/tests/queries/0_stateless/01355_if_fixed_string.reference @@ -0,0 +1,40 @@ +0\0\0\0\0 String +1\0 String +-2\0\0\0 String +3\0 String +-4\0\0\0 String +5\0 String +-6\0\0\0 String +7\0 String +-8\0\0\0 String +9\0 String +0\0 FixedString(2) +1\0 FixedString(2) +-2 FixedString(2) +3\0 FixedString(2) +-4 FixedString(2) +5\0 FixedString(2) +-6 FixedString(2) +7\0 FixedString(2) +-8 FixedString(2) +9\0 FixedString(2) +0 String +1 String +-2 String +3 String +-4 String +5 String +-6 String +7 String +-8 String +9 String +0\0 FixedString(2) +1\0 FixedString(2) +-2 FixedString(2) +3\0 FixedString(2) +-4 FixedString(2) +5\0 FixedString(2) +-6 FixedString(2) +7\0 FixedString(2) +-8 FixedString(2) +9\0 FixedString(2) diff --git a/tests/queries/0_stateless/01355_if_fixed_string.sql b/tests/queries/0_stateless/01355_if_fixed_string.sql new file mode 100644 index 00000000000..a0afcc5f197 --- /dev/null +++ b/tests/queries/0_stateless/01355_if_fixed_string.sql @@ -0,0 +1,5 @@ +SELECT if(number % 2, toFixedString(toString(number), 2), toFixedString(toString(-number), 5)) AS x, toTypeName(x) FROM system.numbers LIMIT 10; +SELECT if(number % 2, toFixedString(toString(number), 2), toFixedString(toString(-number), 2)) AS x, toTypeName(x) FROM system.numbers LIMIT 10; + +SELECT multiIf(number % 2, toFixedString(toString(number), 2), toFixedString(toString(-number), 5)) AS x, toTypeName(x) FROM system.numbers LIMIT 10; +SELECT multiIf(number % 2, toFixedString(toString(number), 2), toFixedString(toString(-number), 2)) AS x, toTypeName(x) FROM system.numbers LIMIT 10; diff --git a/tests/queries/0_stateless/01355_ilike.reference b/tests/queries/0_stateless/01355_ilike.reference new file mode 100644 index 00000000000..8b2bd84f27a --- /dev/null +++ b/tests/queries/0_stateless/01355_ilike.reference @@ -0,0 +1,44 @@ +0 +1 +1 +1 +1 +0 +1 +1 +1 +1 +1 +1 +0 +0 +0 +0 +0 +1 +1 +1 +0 +0 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +0 +0 +1 +1 +0 +1 +1 +1 +1 +1 +test1 +test2 diff --git a/tests/queries/0_stateless/01355_ilike.sql b/tests/queries/0_stateless/01355_ilike.sql new file mode 100644 index 00000000000..6f08be2dab2 --- /dev/null +++ b/tests/queries/0_stateless/01355_ilike.sql @@ -0,0 +1,61 @@ +SELECT 'Hello' ILIKE ''; +SELECT 'Hello' ILIKE '%'; +SELECT 'Hello' ILIKE '%%'; +SELECT 'Hello' ILIKE '%%%'; +SELECT 'Hello' ILIKE '%_%'; +SELECT 'Hello' ILIKE '_'; +SELECT 'Hello' ILIKE '_%'; +SELECT 'Hello' ILIKE '%_'; + +SELECT 'Hello' ILIKE 'H%o'; +SELECT 'hello' ILIKE 'H%o'; +SELECT 'hello' ILIKE 'h%o'; +SELECT 'Hello' ILIKE 'h%o'; + +SELECT 'Hello' NOT ILIKE 'H%o'; +SELECT 'hello' NOT ILIKE 'H%o'; +SELECT 'hello' NOT ILIKE 'h%o'; +SELECT 'Hello' NOT ILIKE 'h%o'; + +SELECT 'OHello' ILIKE '%lhell%'; +SELECT 'Ohello' ILIKE '%hell%'; +SELECT 'hEllo' ILIKE '%HEL%'; + +SELECT 'OHello' NOT ILIKE '%lhell%'; +SELECT 'Ohello' NOT ILIKE '%hell%'; +SELECT 'hEllo' NOT ILIKE '%HEL%'; + +SELECT materialize('prepre_f') ILIKE '%pre_f%'; + +SELECT 'abcdef' ILIKE '%aBc%def%'; +SELECT 'ABCDDEF' ILIKE '%abc%def%'; +SELECT 'Abc\nDef' ILIKE '%abc%def%'; +SELECT 'abc\ntdef' ILIKE '%abc%def%'; +SELECT 'abct\ndef' ILIKE '%abc%dEf%'; +SELECT 'abc\n\ndeF' ILIKE '%abc%def%'; +SELECT 'abc\n\ntdef' ILIKE '%abc%deF%'; +SELECT 'Abc\nt\ndef' ILIKE '%abc%def%'; +SELECT 'abct\n\ndef' ILIKE '%abc%def%'; +SELECT 'ab\ndef' ILIKE '%Abc%def%'; +SELECT 'aBc\nef' ILIKE '%ABC%DEF%'; + +SELECT CAST('hello' AS FixedString(5)) ILIKE '%he%o%'; + +SELECT 'ёЁё' ILIKE 'Ё%Ё'; +SELECT 'ощщЁё' ILIKE 'Щ%Ё'; +SELECT 'ощЩЁё' ILIKE '%Щ%Ё'; + +SELECT 'Щущпандер' ILIKE '%щп%е%'; +SELECT 'Щущпандер' ILIKE '%щП%е%'; +SELECT 'ощщЁё' ILIKE '%щ%'; +SELECT 'ощЩЁё' ILIKE '%ё%'; + +SHOW TABLES NOT ILIKE '%'; +DROP DATABASE IF EXISTS test_01355; +CREATE DATABASE test_01355; +USE test_01355; +CREATE TABLE test1 (x UInt8) ENGINE = Memory; +CREATE TABLE test2 (x UInt8) ENGINE = Memory; +SHOW TABLES ILIKE 'tES%'; +SHOW TABLES NOT ILIKE 'TeS%'; +DROP DATABASE test_01355; diff --git a/tests/queries/0_stateless/01356_initialize_aggregation.reference b/tests/queries/0_stateless/01356_initialize_aggregation.reference new file mode 100644 index 00000000000..63ebb1717d6 --- /dev/null +++ b/tests/queries/0_stateless/01356_initialize_aggregation.reference @@ -0,0 +1,4 @@ +3 +[999,998,997,996,995,994,993,992,991,990] +[1] +[990,991,992,993,994,995,996,997,998,999] diff --git a/tests/queries/0_stateless/01356_initialize_aggregation.sql b/tests/queries/0_stateless/01356_initialize_aggregation.sql new file mode 100644 index 00000000000..07a5ca1892b --- /dev/null +++ b/tests/queries/0_stateless/01356_initialize_aggregation.sql @@ -0,0 +1,4 @@ +SELECT uniqMerge(state) FROM (SELECT initializeAggregation('uniqState', number % 3) AS state FROM system.numbers LIMIT 10000); +SELECT topKWeightedMerge(10)(state) FROM (SELECT initializeAggregation('topKWeightedState(10)', number, number) AS state FROM system.numbers LIMIT 1000); +SELECT topKWeightedMerge(10)(state) FROM (SELECT initializeAggregation('topKWeightedState(10)', 1, number) AS state FROM system.numbers LIMIT 1000); +SELECT topKWeightedMerge(10)(state) FROM (SELECT initializeAggregation('topKWeightedState(10)', number, 1) AS state FROM system.numbers LIMIT 1000); diff --git a/tests/queries/0_stateless/01356_state_resample.reference b/tests/queries/0_stateless/01356_state_resample.reference new file mode 100644 index 00000000000..40c606b4a68 --- /dev/null +++ b/tests/queries/0_stateless/01356_state_resample.reference @@ -0,0 +1,8 @@ +[900,910,920,930,940,950,960,970,980,990,1000,1010,1020,1030,1040,1050,1060,1070,1080,1090] +[900,910,920,930,940,950,960,970,980,990,1000,1010,1020,1030,1040,1050,1060,1070,1080,1090] +[360,243,306,372,252,315,384,261,324,396,270,333,408,279,342,420,288,351,432,297] +[300,364,246,309,376,255,318,388,264,327,400,273,336,412,282,345,424,291,354,436] +[240,303,368,249,312,380,258,321,392,267,330,404,276,339,416,285,348,428,294,357] +[[0,20,40],[1,21,41],[2,22,42],[3,23,43],[4,24,44],[5,25,45],[6,26,46],[7,27,47],[8,28,48],[9,29,49],[10,30],[11,31],[12,32],[13,33],[14,34],[15,35],[16,36],[17,37],[18,38],[19,39]] +[[0,20,40],[1,21,41],[2,22,42],[3,23,43],[4,24,44],[5,25,45],[6,26,46],[7,27,47],[8,28,48],[9,29,49],[10,30],[11,31],[12,32],[13,33],[14,34],[15,35],[16,36],[17,37],[18,38],[19,39]] +[1800,1820,1840,1860,1880,1900,1920,1940,1960,1980,2000,2020,2040,2060,2080,2100,2120,2140,2160,2180] diff --git a/tests/queries/0_stateless/01356_state_resample.sql b/tests/queries/0_stateless/01356_state_resample.sql new file mode 100644 index 00000000000..6be28e19d87 --- /dev/null +++ b/tests/queries/0_stateless/01356_state_resample.sql @@ -0,0 +1,14 @@ +select sumResample(0, 20, 1)(number, number % 20) from numbers(200); +select arrayMap(x -> finalizeAggregation(x), state) from (select sumStateResample(0, 20, 1)(number, number % 20) as state from numbers(200)); +select arrayMap(x -> finalizeAggregation(x), state) from +( + select sumStateResample(0,20,1)(number, number%20) as state from numbers(200) group by number % 3 +); + +select groupArrayResample(0, 20, 1)(number, number % 20) from numbers(50); +select arrayMap(x -> finalizeAggregation(x), state) from (select groupArrayStateResample(0, 20, 1)(number, number % 20) state from numbers(50)); + +select arrayMap(x -> finalizeAggregation(x), state) from +( + select sumStateResample(0, 20, 1)(number, number % 20) as state from remote('127.0.0.{1,2}', numbers(200)) +); diff --git a/tests/queries/0_stateless/01356_view_threads.reference b/tests/queries/0_stateless/01356_view_threads.reference new file mode 100644 index 00000000000..4e9079198d5 --- /dev/null +++ b/tests/queries/0_stateless/01356_view_threads.reference @@ -0,0 +1,3 @@ +0 249999500000 +1 250000000000 +1 diff --git a/tests/queries/0_stateless/01356_view_threads.sql b/tests/queries/0_stateless/01356_view_threads.sql new file mode 100644 index 00000000000..5290ec555af --- /dev/null +++ b/tests/queries/0_stateless/01356_view_threads.sql @@ -0,0 +1,12 @@ +drop table if exists table_01356_view_threads; + +create view table_01356_view_threads as select number % 10 as g, sum(number) as s from numbers_mt(1000000) group by g; + +set log_queries = 1; +set max_threads = 16; +select g % 2 as gg, sum(s) from table_01356_view_threads group by gg order by gg; + +system flush logs; +select length(thread_ids) >= 16 from system.query_log where event_date >= today() - 1 and lower(query) like '%select g % 2 as gg, sum(s) from table_01356_view_threads group by gg order by gg%' and type = 'QueryFinish' order by query_start_time desc limit 1; + +drop table if exists table_01356_view_threads; diff --git a/tests/queries/0_stateless/01356_wrong_filter-type_bug.reference b/tests/queries/0_stateless/01356_wrong_filter-type_bug.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/01356_wrong_filter-type_bug.sql b/tests/queries/0_stateless/01356_wrong_filter-type_bug.sql new file mode 100644 index 00000000000..b3f48967ba2 --- /dev/null +++ b/tests/queries/0_stateless/01356_wrong_filter-type_bug.sql @@ -0,0 +1,9 @@ +drop table if exists t0; + +CREATE TABLE t0 (`c0` String, `c1` Int32 CODEC(NONE), `c2` Int32) ENGINE = MergeTree() ORDER BY tuple(); +insert into t0 values ('a', 1, 2); + +SELECT t0.c2, t0.c1, t0.c0 FROM t0 PREWHERE t0.c0 ORDER BY ((t0.c2)>=(t0.c1)), (((- (((t0.c0)>(t0.c0))))) IS NULL) FORMAT TabSeparatedWithNamesAndTypes; -- {serverError 59} +SELECT t0.c2, t0.c1, t0.c0 FROM t0 WHERE t0.c0 ORDER BY ((t0.c2)>=(t0.c1)), (((- (((t0.c0)>(t0.c0))))) IS NULL) FORMAT TabSeparatedWithNamesAndTypes settings optimize_move_to_prewhere=0; -- {serverError 59} + +drop table if exists t0; diff --git a/tests/queries/0_stateless/01357_result_rows.reference b/tests/queries/0_stateless/01357_result_rows.reference new file mode 100644 index 00000000000..cd121fd3feb --- /dev/null +++ b/tests/queries/0_stateless/01357_result_rows.reference @@ -0,0 +1,2 @@ +1 +1 1 diff --git a/tests/queries/0_stateless/01357_result_rows.sql b/tests/queries/0_stateless/01357_result_rows.sql new file mode 100644 index 00000000000..17c2d15979a --- /dev/null +++ b/tests/queries/0_stateless/01357_result_rows.sql @@ -0,0 +1,5 @@ +set log_queries = 1; +select count() > 0 from system.settings; + +system flush logs; +select result_rows, result_bytes >= 8 from system.query_log where event_date >= today() - 1 and lower(query) like '%select count() > 0 from system.settings%' and type = 'QueryFinish' order by query_start_time desc limit 1; diff --git a/tests/queries/0_stateless/01357_version_collapsing_attach_detach_zookeeper.reference b/tests/queries/0_stateless/01357_version_collapsing_attach_detach_zookeeper.reference new file mode 100644 index 00000000000..353c70aec11 --- /dev/null +++ b/tests/queries/0_stateless/01357_version_collapsing_attach_detach_zookeeper.reference @@ -0,0 +1,3 @@ +metadata format version: 1\ndate column: \nsampling expression: \nindex granularity: 8192\nmode: 7\nsign column: sign\nprimary key: key1, key2\ndata format version: 1\npartition key: d\ngranularity bytes: 10485760\n +1 +1 diff --git a/tests/queries/0_stateless/01357_version_collapsing_attach_detach_zookeeper.sql b/tests/queries/0_stateless/01357_version_collapsing_attach_detach_zookeeper.sql new file mode 100644 index 00000000000..0086ec5c2a3 --- /dev/null +++ b/tests/queries/0_stateless/01357_version_collapsing_attach_detach_zookeeper.sql @@ -0,0 +1,26 @@ +DROP TABLE IF EXISTS versioned_collapsing_table; + +CREATE TABLE versioned_collapsing_table( + d Date, + key1 UInt64, + key2 UInt32, + value String, + sign Int8, + version UInt16 +) +ENGINE = ReplicatedVersionedCollapsingMergeTree('/clickhouse/versioned_collapsing_table', '1', sign, version) +PARTITION BY d +ORDER BY (key1, key2); + +INSERT INTO versioned_collapsing_table VALUES (toDate('2019-10-10'), 1, 1, 'Hello', -1, 1); + +SELECT value FROM system.zookeeper WHERE path = '/clickhouse/versioned_collapsing_table' and name = 'metadata'; + +SELECT COUNT() FROM versioned_collapsing_table; + +DETACH TABLE versioned_collapsing_table; +ATTACH TABLE versioned_collapsing_table; + +SELECT COUNT() FROM versioned_collapsing_table; + +DROP TABLE IF EXISTS versioned_collapsing_table; diff --git a/tests/queries/0_stateless/01358_constexpr_constraint.reference b/tests/queries/0_stateless/01358_constexpr_constraint.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/01358_constexpr_constraint.sql b/tests/queries/0_stateless/01358_constexpr_constraint.sql new file mode 100644 index 00000000000..799f6f32259 --- /dev/null +++ b/tests/queries/0_stateless/01358_constexpr_constraint.sql @@ -0,0 +1,12 @@ +CREATE TEMPORARY TABLE constrained +( + `URL` String, + CONSTRAINT is_yandex CHECK domainWithoutWWW(URL) = domainWithoutWWW(URL), + CONSTRAINT is_utf8 CHECK isValidUTF8(URL) +); + +insert into constrained values ('a'); + +DROP TEMPORARY TABLE constrained; +CREATE TEMPORARY TABLE constrained (x UInt8, CONSTRAINT bogus CHECK 0); +INSERT INTO constrained VALUES (1); -- { serverError 469 } diff --git a/tests/queries/0_stateless/01358_lc_parquet.reference b/tests/queries/0_stateless/01358_lc_parquet.reference new file mode 100644 index 00000000000..450af4cf648 --- /dev/null +++ b/tests/queries/0_stateless/01358_lc_parquet.reference @@ -0,0 +1,3 @@ +abc +ghi +\N diff --git a/tests/queries/0_stateless/01358_lc_parquet.sh b/tests/queries/0_stateless/01358_lc_parquet.sh new file mode 100755 index 00000000000..859bb2ebac7 --- /dev/null +++ b/tests/queries/0_stateless/01358_lc_parquet.sh @@ -0,0 +1,25 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. $CURDIR/../shell_config.sh + +$CLICKHOUSE_CLIENT -q "drop table if exists test_lc" +$CLICKHOUSE_CLIENT -q "drop table if exists test" +$CLICKHOUSE_CLIENT -q "create table test (a String) Engine = MergeTree order by tuple()" +$CLICKHOUSE_CLIENT -q "create table test_lc (a LowCardinality(String)) Engine = MergeTree order by tuple()" +$CLICKHOUSE_CLIENT -q "select 'abc' as a format Parquet" | $CLICKHOUSE_CLIENT -q "insert into test_lc format Parquet" +$CLICKHOUSE_CLIENT -q "select a from test_lc format Parquet" | $CLICKHOUSE_CLIENT -q "insert into test format Parquet" +$CLICKHOUSE_CLIENT -q "select a from test order by a" +$CLICKHOUSE_CLIENT -q "drop table if exists test_lc" +$CLICKHOUSE_CLIENT -q "drop table if exists test" + +$CLICKHOUSE_CLIENT -q "drop table if exists test_lc" +$CLICKHOUSE_CLIENT -q "drop table if exists test" +$CLICKHOUSE_CLIENT -q "create table test (a Nullable(String)) Engine = MergeTree order by tuple()" +$CLICKHOUSE_CLIENT -q "create table test_lc (a LowCardinality(Nullable(String))) Engine = MergeTree order by tuple()" +$CLICKHOUSE_CLIENT -q "select 'ghi' as a format Parquet" | $CLICKHOUSE_CLIENT -q "insert into test_lc format Parquet" +$CLICKHOUSE_CLIENT -q "select cast(Null as Nullable(String)) as a format Parquet" | $CLICKHOUSE_CLIENT -q "insert into test_lc format Parquet" +$CLICKHOUSE_CLIENT -q "select a from test_lc format Parquet" | $CLICKHOUSE_CLIENT -q "insert into test format Parquet" +$CLICKHOUSE_CLIENT -q "select a from test order by a" +$CLICKHOUSE_CLIENT -q "drop table if exists test_lc" +$CLICKHOUSE_CLIENT -q "drop table if exists test" diff --git a/tests/queries/0_stateless/01358_mutation_delete_null_rows.reference b/tests/queries/0_stateless/01358_mutation_delete_null_rows.reference new file mode 100644 index 00000000000..126874237b7 --- /dev/null +++ b/tests/queries/0_stateless/01358_mutation_delete_null_rows.reference @@ -0,0 +1,20 @@ +-------- +0 1 x=0 +1 0 x<>0 +3 0 x<>0 +\N \N x<>0 +-------- +2020-01-01 2 0 leave +2020-01-02 aaa 0 1 delete +2020-01-03 2 0 leave +2020-01-04 2 0 leave +2020-01-05 \N 2 0 leave +2020-01-06 aaa 0 1 delete +2020-01-07 aaa 0 1 delete +2020-01-08 aaa \N \N leave +-------- +2020-01-01 2 +2020-01-03 2 +2020-01-04 2 +2020-01-05 \N 2 +2020-01-08 aaa \N diff --git a/tests/queries/0_stateless/01358_mutation_delete_null_rows.sql b/tests/queries/0_stateless/01358_mutation_delete_null_rows.sql new file mode 100644 index 00000000000..e8aabf1aa37 --- /dev/null +++ b/tests/queries/0_stateless/01358_mutation_delete_null_rows.sql @@ -0,0 +1,26 @@ +select '--------'; +SELECT arrayJoin([0, 1, 3, NULL]) AS x, x = 0, if(x = 0, 'x=0', 'x<>0') ORDER BY x; + +select '--------'; +drop table if exists mutation_delete_null_rows; + +CREATE TABLE mutation_delete_null_rows +( + `EventDate` Date, + `CounterID` Nullable(String), + `UserID` Nullable(UInt32) +) +ENGINE = MergeTree() +ORDER BY EventDate; + +INSERT INTO mutation_delete_null_rows VALUES ('2020-01-01', '', 2)('2020-01-02', 'aaa', 0); +INSERT INTO mutation_delete_null_rows VALUES ('2020-01-03', '', 2)('2020-01-04', '', 2)('2020-01-05', NULL, 2)('2020-01-06', 'aaa', 0)('2020-01-07', 'aaa', 0)('2020-01-08', 'aaa', NULL); + +SELECT *,UserID = 0 as UserIDEquals0, if(UserID = 0, 'delete', 'leave') as verdict FROM mutation_delete_null_rows ORDER BY EventDate; + +ALTER TABLE mutation_delete_null_rows DELETE WHERE UserID = 0 SETTINGS mutations_sync=1; + +select '--------'; +SELECT * FROM mutation_delete_null_rows ORDER BY EventDate; + +drop table mutation_delete_null_rows; diff --git a/tests/queries/0_stateless/01359_codeql.reference b/tests/queries/0_stateless/01359_codeql.reference new file mode 100644 index 00000000000..8b68939b011 --- /dev/null +++ b/tests/queries/0_stateless/01359_codeql.reference @@ -0,0 +1 @@ +4294967291 diff --git a/tests/queries/0_stateless/01359_codeql.sql b/tests/queries/0_stateless/01359_codeql.sql new file mode 100644 index 00000000000..9f68661eef5 --- /dev/null +++ b/tests/queries/0_stateless/01359_codeql.sql @@ -0,0 +1,2 @@ +-- In previous ClickHouse versions, the multiplications was made in a wrong type leading to overflow. +SELECT round(avgWeighted(x, y)) FROM (SELECT 0xFFFFFFFF AS x, 1000000000 AS y UNION ALL SELECT 1 AS x, 1 AS y); diff --git a/tests/queries/0_stateless/01359_geodistance_loop.reference b/tests/queries/0_stateless/01359_geodistance_loop.reference new file mode 100644 index 00000000000..8484d062f57 --- /dev/null +++ b/tests/queries/0_stateless/01359_geodistance_loop.reference @@ -0,0 +1 @@ +inf diff --git a/tests/queries/0_stateless/01359_geodistance_loop.sql b/tests/queries/0_stateless/01359_geodistance_loop.sql new file mode 100644 index 00000000000..4c555a2538e --- /dev/null +++ b/tests/queries/0_stateless/01359_geodistance_loop.sql @@ -0,0 +1 @@ +SELECT geoDistance(0., 0., -inf, 1.); diff --git a/tests/queries/0_stateless/01360_division_overflow.reference b/tests/queries/0_stateless/01360_division_overflow.reference new file mode 100644 index 00000000000..a4acafeee60 --- /dev/null +++ b/tests/queries/0_stateless/01360_division_overflow.reference @@ -0,0 +1,6 @@ +0 +0 +0 +1 +3 +5 diff --git a/tests/queries/0_stateless/01360_division_overflow.sql b/tests/queries/0_stateless/01360_division_overflow.sql new file mode 100644 index 00000000000..75601a36536 --- /dev/null +++ b/tests/queries/0_stateless/01360_division_overflow.sql @@ -0,0 +1,5 @@ +select intDiv(materialize(toInt32(1)), 0x100000000); +select intDiv(materialize(toInt32(1)), -0x100000000); +select intDiv(materialize(toInt32(1)), -9223372036854775808); +select materialize(toInt32(1)) % -9223372036854775808; +select value % -9223372036854775808 from (select toInt32(arrayJoin([3, 5])) value); diff --git a/tests/queries/0_stateless/01360_materialized_view_with_join_on_query_log.reference b/tests/queries/0_stateless/01360_materialized_view_with_join_on_query_log.reference new file mode 100644 index 00000000000..f6349a0b9b4 --- /dev/null +++ b/tests/queries/0_stateless/01360_materialized_view_with_join_on_query_log.reference @@ -0,0 +1,11 @@ +1 +1 +1 +1 +1 +=== system.query_log === +main_dashboard_bottom_query 2 +main_dashboard_top_query 2 +=== slowlog === +main_dashboard_bottom_query 1 +main_dashboard_top_query 1 diff --git a/tests/queries/0_stateless/01360_materialized_view_with_join_on_query_log.sql b/tests/queries/0_stateless/01360_materialized_view_with_join_on_query_log.sql new file mode 100644 index 00000000000..4f216ae647f --- /dev/null +++ b/tests/queries/0_stateless/01360_materialized_view_with_join_on_query_log.sql @@ -0,0 +1,54 @@ +DROP TABLE IF EXISTS slow_log; +DROP TABLE IF EXISTS expected_times; + +CREATE TABLE expected_times (QUERY_GROUP_ID String, max_query_duration_ms UInt64) Engine=Memory; +INSERT INTO expected_times VALUES('main_dashboard_top_query', 100), ('main_dashboard_bottom_query', 100); + +SET log_queries=1; +SELECT 1; +SYSTEM FLUSH LOGS; + +CREATE MATERIALIZED VIEW slow_log Engine=Memory AS +( + SELECT * FROM + ( + SELECT + extract(query,'/\\*\\s*QUERY_GROUP_ID:(.*?)\\s*\\*/') as QUERY_GROUP_ID, + * + FROM system.query_log + WHERE type<>1 and event_date >= yesterday() and event_time > now() - 120 + ) as ql + INNER JOIN expected_times USING (QUERY_GROUP_ID) + WHERE query_duration_ms > max_query_duration_ms +); + +SELECT 1 /* QUERY_GROUP_ID:main_dashboard_top_query */; +SELECT 1 /* QUERY_GROUP_ID:main_dashboard_bottom_query */; + +SELECT 1 WHERE not ignore(sleep(0.105)) /* QUERY_GROUP_ID:main_dashboard_top_query */; +SELECT 1 WHERE not ignore(sleep(0.105)) /* QUERY_GROUP_ID:main_dashboard_bottom_query */; + +SET log_queries=0; +SYSTEM FLUSH LOGS; + +SELECT '=== system.query_log ==='; + +SELECT + extract(query,'/\\*\\s*QUERY_GROUP_ID:(.*?)\\s*\\*/') as QUERY_GROUP_ID, + count() +FROM system.query_log +WHERE type<>1 and event_date >= yesterday() and event_time > now() - 20 and QUERY_GROUP_ID<>'' +GROUP BY QUERY_GROUP_ID +ORDER BY QUERY_GROUP_ID; + +SELECT '=== slowlog ==='; + +SELECT + QUERY_GROUP_ID, + count() +FROM slow_log +GROUP BY QUERY_GROUP_ID +ORDER BY QUERY_GROUP_ID; + +DROP TABLE slow_log; +DROP TABLE expected_times; diff --git a/tests/queries/0_stateless/01361_buffer_table_flush_with_materialized_view.reference b/tests/queries/0_stateless/01361_buffer_table_flush_with_materialized_view.reference new file mode 100644 index 00000000000..083edaac248 --- /dev/null +++ b/tests/queries/0_stateless/01361_buffer_table_flush_with_materialized_view.reference @@ -0,0 +1,3 @@ +2 +2 +2 diff --git a/tests/queries/0_stateless/01361_buffer_table_flush_with_materialized_view.sql b/tests/queries/0_stateless/01361_buffer_table_flush_with_materialized_view.sql new file mode 100644 index 00000000000..424c38d5590 --- /dev/null +++ b/tests/queries/0_stateless/01361_buffer_table_flush_with_materialized_view.sql @@ -0,0 +1,37 @@ +DROP TABLE IF EXISTS t1_01361; +DROP TABLE IF EXISTS t2_01361; +DROP TABLE IF EXISTS mv1_01361; +DROP TABLE IF EXISTS b1_01361; + +CREATE TABLE t1_01361 ( + i UInt32, + time DateTime +) ENGINE = MergeTree() +PARTITION BY time +ORDER BY time; + +CREATE TABLE t2_01361 ( + i UInt32, + time DateTime +) ENGINE = MergeTree() +PARTITION BY time +ORDER BY time; + +CREATE MATERIALIZED VIEW mv1_01361 +TO t2_01361 +AS SELECT * FROM (SELECT * FROM t1_01361); + +CREATE TABLE b1_01361 AS t1_01361 +ENGINE = Buffer(currentDatabase(), t1_01361, 1, 0, 0, 1, 1, 1, 1); + +INSERT INTO b1_01361 VALUES (1, now()); +INSERT INTO b1_01361 VALUES (2, now()); + +SELECT count() FROM b1_01361; +SELECT count() FROM t1_01361; +SELECT count() FROM t2_01361; + +DROP TABLE IF EXISTS t1_01361; +DROP TABLE IF EXISTS t2_01361; +DROP TABLE IF EXISTS mv1_01361; +DROP TABLE IF EXISTS b1_01361; diff --git a/tests/queries/0_stateless/01361_fover_remote_num_tries.reference b/tests/queries/0_stateless/01361_fover_remote_num_tries.reference new file mode 100644 index 00000000000..64bb6b746dc --- /dev/null +++ b/tests/queries/0_stateless/01361_fover_remote_num_tries.reference @@ -0,0 +1 @@ +30 diff --git a/tests/queries/0_stateless/01361_fover_remote_num_tries.sh b/tests/queries/0_stateless/01361_fover_remote_num_tries.sh new file mode 100755 index 00000000000..97e1a5eda51 --- /dev/null +++ b/tests/queries/0_stateless/01361_fover_remote_num_tries.sh @@ -0,0 +1,6 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. $CURDIR/../shell_config.sh + +$CLICKHOUSE_CLIENT --connections_with_failover_max_tries 10 --query "SELECT hostName() FROM remote('128.1.2.3', default.tmp)" 2>&1 | grep -o -P 'connect timed out|Network is unreachable' | wc -l diff --git a/tests/queries/0_stateless/01362_year_of_ISO8601_week_modificators_for_formatDateTime.reference b/tests/queries/0_stateless/01362_year_of_ISO8601_week_modificators_for_formatDateTime.reference new file mode 100644 index 00000000000..7ac92da1e7d --- /dev/null +++ b/tests/queries/0_stateless/01362_year_of_ISO8601_week_modificators_for_formatDateTime.reference @@ -0,0 +1,10 @@ +2009 +09 +2009 +09 +2010 +10 +2019 +19 +2019 +19 diff --git a/tests/queries/0_stateless/01362_year_of_ISO8601_week_modificators_for_formatDateTime.sql b/tests/queries/0_stateless/01362_year_of_ISO8601_week_modificators_for_formatDateTime.sql new file mode 100644 index 00000000000..0a7cc047c1f --- /dev/null +++ b/tests/queries/0_stateless/01362_year_of_ISO8601_week_modificators_for_formatDateTime.sql @@ -0,0 +1,10 @@ +SELECT formatDateTime(toDate('2010-01-01'), '%G'); -- Friday (first day of the year) attributed to week 53 of the previous year (2009) +SELECT formatDateTime(toDate('2010-01-01'), '%g'); +SELECT formatDateTime(toDate('2010-01-03'), '%G'); -- Sunday, last day attributed to week 53 of the previous year (2009) +SELECT formatDateTime(toDate('2010-01-03'), '%g'); +SELECT formatDateTime(toDate('2010-01-04'), '%G'); -- Monday, first day in the year attributed to week 01 of the current year (2010) +SELECT formatDateTime(toDate('2010-01-04'), '%g'); +SELECT formatDateTime(toDate('2018-12-31'), '%G'); -- Monday (last day of the year) attributed to 01 week of next year (2019) +SELECT formatDateTime(toDate('2018-12-31'), '%g'); +SELECT formatDateTime(toDate('2019-01-01'), '%G'); -- Tuesday (first day of the year) attributed to 01 week of this year (2019) +SELECT formatDateTime(toDate('2019-01-01'), '%g'); diff --git a/tests/queries/0_stateless/01370_client_autocomplete_word_break_characters.reference b/tests/queries/0_stateless/01370_client_autocomplete_word_break_characters.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/01370_client_autocomplete_word_break_characters.sh b/tests/queries/0_stateless/01370_client_autocomplete_word_break_characters.sh new file mode 100755 index 00000000000..0e895e55cd0 --- /dev/null +++ b/tests/queries/0_stateless/01370_client_autocomplete_word_break_characters.sh @@ -0,0 +1,29 @@ +#!/usr/bin/expect -f + +log_user 0 +set timeout 1 +match_max 100000 + +spawn clickhouse-client +expect ":) " + +# Make a query +send -- "SET max_distributed" +expect "SET max_distributed" + +# Wait for suggestions to load, they are loaded in background +set is_done 0 +while {$is_done == 0} { + send -- "\t" + expect { + "_connections" { + set is_done 1 + } + default { + sleep 1 + } + } +} + +send -- "\3\4" +expect eof diff --git a/tests/queries/0_stateless/01372_remote_table_function_empty_table.reference b/tests/queries/0_stateless/01372_remote_table_function_empty_table.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/01372_remote_table_function_empty_table.sql b/tests/queries/0_stateless/01372_remote_table_function_empty_table.sql new file mode 100644 index 00000000000..698c323d73f --- /dev/null +++ b/tests/queries/0_stateless/01372_remote_table_function_empty_table.sql @@ -0,0 +1 @@ +SELECT * FROM remote('127..2', 'a.'); -- { serverError 36 } diff --git a/tests/queries/0_stateless/01372_wrong_order_by_removal.reference b/tests/queries/0_stateless/01372_wrong_order_by_removal.reference new file mode 100644 index 00000000000..c1b36bd53ca --- /dev/null +++ b/tests/queries/0_stateless/01372_wrong_order_by_removal.reference @@ -0,0 +1 @@ +SELECT \n k,\n groupArrayMovingSum(v)\nFROM \n(\n SELECT \n k,\n dt,\n v\n FROM moving_sum_num\n ORDER BY \n k ASC,\n dt ASC\n)\nGROUP BY k\nORDER BY k ASC diff --git a/tests/queries/0_stateless/01372_wrong_order_by_removal.sql b/tests/queries/0_stateless/01372_wrong_order_by_removal.sql new file mode 100644 index 00000000000..93f3388676b --- /dev/null +++ b/tests/queries/0_stateless/01372_wrong_order_by_removal.sql @@ -0,0 +1,11 @@ +CREATE TEMPORARY TABLE moving_sum_num +( + `k` String, + `dt` DateTime, + `v` UInt64 +); + +SET enable_debug_queries = 1; + +-- ORDER BY from subquery shall not be removed. +ANALYZE SELECT k, groupArrayMovingSum(v) FROM (SELECT * FROM moving_sum_num ORDER BY k, dt) GROUP BY k ORDER BY k; diff --git a/tests/queries/0_stateless/01373_is_zero_or_null.reference b/tests/queries/0_stateless/01373_is_zero_or_null.reference new file mode 100644 index 00000000000..d9caaa2089a --- /dev/null +++ b/tests/queries/0_stateless/01373_is_zero_or_null.reference @@ -0,0 +1,22 @@ +1 1 +1 1 +0 0 +\N 1 +--- +1 1 +1 1 +0 0 +--- +hello +world +--- +hello +world +--- +hello +world +\N +--- +3 +--- +4 diff --git a/tests/queries/0_stateless/01373_is_zero_or_null.sql b/tests/queries/0_stateless/01373_is_zero_or_null.sql new file mode 100644 index 00000000000..32458dc9f62 --- /dev/null +++ b/tests/queries/0_stateless/01373_is_zero_or_null.sql @@ -0,0 +1,29 @@ +SELECT NOT x, isZeroOrNull(x) FROM (SELECT arrayJoin([1, 2, 3, NULL]) = 3 AS x); +SELECT '---'; +SELECT NOT x, isZeroOrNull(x) FROM (SELECT arrayJoin([1, 2, 3]) = 3 AS x); +SELECT '---'; +CREATE TEMPORARY TABLE test (x String NULL); +INSERT INTO test VALUES ('hello'), ('world'), ('xyz'), (NULL); + +SELECT * FROM test WHERE x != 'xyz'; +SELECT '---'; +SELECT * FROM test WHERE NOT x = 'xyz'; +SELECT '---'; +SELECT * FROM test WHERE isZeroOrNull(x = 'xyz'); +SELECT '---'; + +SELECT count() FROM +( + SELECT * FROM test WHERE x != 'xyz' + UNION ALL + SELECT * FROM test WHERE NOT x != 'xyz' +); + +SELECT '---'; + +SELECT count() FROM +( + SELECT * FROM test WHERE x != 'xyz' + UNION ALL + SELECT * FROM test WHERE isZeroOrNull(x != 'xyz') +); diff --git a/tests/queries/0_stateless/01374_if_nullable_filimonov.reference b/tests/queries/0_stateless/01374_if_nullable_filimonov.reference new file mode 100644 index 00000000000..ebe52278bb3 --- /dev/null +++ b/tests/queries/0_stateless/01374_if_nullable_filimonov.reference @@ -0,0 +1,8 @@ +2 0 leave +0 1 delete +\N \N leave +--- +0 1 Definitely x = 0 +1 0 We cannot say that x = 0 +3 0 We cannot say that x = 0 +\N \N We cannot say that x = 0 diff --git a/tests/queries/0_stateless/01374_if_nullable_filimonov.sql b/tests/queries/0_stateless/01374_if_nullable_filimonov.sql new file mode 100644 index 00000000000..0fadfb85fe4 --- /dev/null +++ b/tests/queries/0_stateless/01374_if_nullable_filimonov.sql @@ -0,0 +1,9 @@ +SELECT + UserID, + UserID = 0, + if(UserID = 0, 'delete', 'leave') +FROM VALUES('UserID Nullable(UInt8)', (2), (0), (NULL)); + +SELECT '---'; + +SELECT arrayJoin([0, 1, 3, NULL]) AS x, x = 0, if(x = 0, 'Definitely x = 0', 'We cannot say that x = 0'); diff --git a/tests/queries/0_stateless/01375_null_issue_3767.reference b/tests/queries/0_stateless/01375_null_issue_3767.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/01375_null_issue_3767.sql b/tests/queries/0_stateless/01375_null_issue_3767.sql new file mode 100644 index 00000000000..88b18e001f9 --- /dev/null +++ b/tests/queries/0_stateless/01375_null_issue_3767.sql @@ -0,0 +1,9 @@ +DROP TABLE IF EXISTS null_issue_3767; + +CREATE TABLE null_issue_3767 (value Nullable(String)) ENGINE=Memory; + +INSERT INTO null_issue_3767 (value) VALUES ('A String'), (NULL); + +SELECT value FROM null_issue_3767 WHERE value NOT IN ('A String'); + +DROP TABLE null_issue_3767; diff --git a/tests/queries/0_stateless/arcadia_skip_list.txt b/tests/queries/0_stateless/arcadia_skip_list.txt index 2b61d384b00..63917583ae3 100644 --- a/tests/queries/0_stateless/arcadia_skip_list.txt +++ b/tests/queries/0_stateless/arcadia_skip_list.txt @@ -57,6 +57,7 @@ 01044_h3_edge_angle 01046_materialized_view_with_join_over_distributed 01050_clickhouse_dict_source_with_subquery +01053_ssd_dictionary 01059_storage_file_brotli 01070_h3_get_base_cell 01070_h3_hex_area_m2 @@ -114,6 +115,7 @@ 01273_h3EdgeAngle_range_check 01274_alter_rename_column_distributed 01276_system_licenses +01280_ssd_complex_key_dictionary 01291_distributed_low_cardinality_memory_efficient 01292_create_user 01293_show_clusters @@ -121,7 +123,9 @@ 01294_system_distributed_on_cluster 01297_alter_distributed 01303_aggregate_function_nothing_serde +01307_orc_output_format +01308_orc_output_format_arrays 01319_query_formatting_in_server_log 01326_build_id -01053_ssd_dictionary -01280_ssd_complex_key_dictionary +01354_order_by_tuple_collate_const +01370_client_autocomplete_word_break_characters diff --git a/tests/queries/skip_list.json b/tests/queries/skip_list.json new file mode 100644 index 00000000000..2ef448e5d99 --- /dev/null +++ b/tests/queries/skip_list.json @@ -0,0 +1,131 @@ +{ + "thread-sanitizer": [ + "00281", + "00877", + "00985", + "avx2", + "query_profiler", + "memory_profiler", + "01083_expressions_in_engine_arguments", + "00505_shard_secure", + "00505_secure", + "01103_check_cpu_instructions_at_startup", + "01098_temporary_and_external_tables", + "00152_insert_different_granularity", + "00151_replace_partition_with_different_granularity" + ], + "address-sanitizer": [ + "00281", + "00877", + "avx2", + "query_profiler", + "memory_profiler", + "odbc_roundtrip", + "01103_check_cpu_instructions_at_startup" + ], + "ub-sanitizer": [ + "00281", + "capnproto", + "avx2", + "query_profiler", + "memory_profiler", + "01103_check_cpu_instructions_at_startup", + "00900_orc_load" + ], + "memory-sanitizer": [ + "00281", + "capnproto", + "avx2", + "query_profiler", + "memory_profiler", + "01103_check_cpu_instructions_at_startup", + "01086_odbc_roundtrip", + "00877_memory_limit_for_new_delete", + "01114_mysql_database_engine_segfault" + ], + "debug-build": [ + "00281", + "avx2", + "query_profiler", + "memory_profiler", + "00899_long_attach", + "00980_alter_settings_race", + "00834_kill_mutation_replicated_zookeeper", + "00834_kill_mutation", + "01200_mutations_memory_consumption", + "01103_check_cpu_instructions_at_startup", + "01037_polygon_dicts_", + "hyperscan" + ], + "unbundled-build": [ + "00429", + "00428", + "00877", + "pocopatch", + "parquet", + "xxhash", + "avx2", + "_h3", + "query_profiler", + "memory_profiler", + "orc_load", + "01033_storage_odbc_parsing_exception_check", + "avro", + "01072_optimize_skip_unused_shards_const_expr_eval", + "00505_secure", + "00505_shard_secure", + "odbc_roundtrip", + "01103_check_cpu_instructions_at_startup", + "01114_mysql_database_engine_segfault", + "00834_cancel_http_readonly_queries_on_client_close", + "_arrow", + "01099_parallel_distributed_insert_select", + "01300_client_save_history_when_terminated", + "orc_output", + "01370_client_autocomplete_word_break_characters" + ], + "release-build": [ + "avx2" + ], + "database-atomic": [ + "00065_loyalty_with_storage_join", + "avx", + "00738_lock_for_inner_table", + "00699_materialized_view_mutations", + "00609_mv_index_in_in", + "00510_materizlized_view_and_deduplication_zookeeper", + "00604_show_create_database", + "00080_show_tables_and_system_tables", + "01272_suspicious_codecs", + "01249_bad_arguments_for_bloom_filter", + "00423_storage_log_single_thread", + "00311_array_primary_key", + "00226_zookeeper_deduplication_and_unexpected_parts", + "00180_attach_materialized_view", + "00116_storage_set", + "00816_long_concurrent_alter_column", + "00992_system_parts_race_condition_zookeeper" + ], + "polymorphic-parts": [ + "avx", + "01045_order_by_pk_special_storages", + "01042_check_query_and_last_granule_size", + "00961_checksums_in_system_parts_columns_table", + "00933_test_fix_extra_seek_on_compressed_cache", + "00926_adaptive_index_granularity_collapsing_merge_tree", + "00926_adaptive_index_granularity_merge_tree", + "00926_adaptive_index_granularity_replacing_merge_tree", + "00926_adaptive_index_granularity_versioned_collapsing_merge_tree", + "00804_test_delta_codec_compression", + "00731_long_merge_tree_select_opened_files", + "00653_verification_monotonic_data_load", + "00484_preferred_max_column_in_block_size_bytes", + "00446_clear_column_in_partition_zookeeper", + "00443_preferred_block_size_bytes", + "00160_merge_and_index_in_in", + "01055_compact_parts", + "01039_mergetree_exec_time", + "00933_ttl_simple", + "00753_system_columns_and_system_tables" + ] +} diff --git a/utils/build/build_debian_unbundled.sh b/utils/build/build_debian_unbundled.sh index f5f59ce7a13..5b2129fc5bf 100755 --- a/utils/build/build_debian_unbundled.sh +++ b/utils/build/build_debian_unbundled.sh @@ -22,5 +22,5 @@ env TEST_RUN=1 \ `# Use all possible contrib libs from system` \ `# psmisc - killall` \ `# gdb - symbol test in pbuilder` \ - EXTRAPACKAGES="psmisc libboost-program-options-dev libboost-system-dev libboost-filesystem-dev libboost-thread-dev libboost-regex-dev libboost-iostreams-dev zlib1g-dev liblz4-dev libdouble-conversion-dev libsparsehash-dev librdkafka-dev libpoco-dev unixodbc-dev libsparsehash-dev libgoogle-perftools-dev libzstd-dev libre2-dev libunwind-dev googletest libcctz-dev libcapnp-dev libjemalloc-dev libssl-dev libunwind-dev libgsasl7-dev libxml2-dev libbrotli-dev libhyperscan-dev rapidjson-dev $EXTRAPACKAGES" \ + EXTRAPACKAGES="psmisc libboost-program-options-dev libboost-system-dev libboost-filesystem-dev libboost-thread-dev libboost-regex-dev libboost-iostreams-dev zlib1g-dev liblz4-dev libdouble-conversion-dev libsparsehash-dev librdkafka-dev libpoco-dev unixodbc-dev libsparsehash-dev libgoogle-perftools-dev libzstd-dev libre2-dev libunwind-dev googletest libcctz-dev libcapnp-dev libjemalloc-dev libssl-dev libcurl4-openssl-dev libunwind-dev libgsasl7-dev libxml2-dev libbrotli-dev libhyperscan-dev rapidjson-dev $EXTRAPACKAGES" \ pdebuild --configfile $ROOT_DIR/debian/.pbuilderrc $PDEBUILD_OPT diff --git a/utils/simple-backport/backport.sh b/utils/simple-backport/backport.sh index 80a5d82d6f0..71920304d56 100755 --- a/utils/simple-backport/backport.sh +++ b/utils/simple-backport/backport.sh @@ -26,16 +26,18 @@ then echo Some commits will be missed, review these manually. fi -# NOTE keep in sync with ./changelog.sh. +# NOTE keep in sync with ./backport.sh. # Search for PR numbers in commit messages. First variant is normal merge, and second # variant is squashed. Next are some backport message variants. -find_prs=(sed -n "s/^.*Merge pull request #\([[:digit:]]\+\).*$/\1/p; +find_prs=(sed -n "s/^.*merg[eding]*.*#\([[:digit:]]\+\).*$/\1/Ip; s/^.*(#\([[:digit:]]\+\))$/\1/p; - s/^.*back[- ]*port[ed of]*#\([[:digit:]]\+\).*$/\1/Ip; - s/^.*cherry[- ]*pick[ed of]*#\([[:digit:]]\+\).*$/\1/Ip") + s/^.*back[- ]*port[ed of]*.*#\([[:digit:]]\+\).*$/\1/Ip; + s/^.*cherry[- ]*pick[ed of]*.*#\([[:digit:]]\+\).*$/\1/Ip") -"${find_prs[@]}" master-log.txt | sort -rn > master-prs.txt -"${find_prs[@]}" "$branch-log.txt" | sort -rn > "$branch-prs.txt" +# awk is to filter out small task numbers from different task tracker, which are +# referenced by documentation commits like '* DOCSUP-824: query log (#115)'. +"${find_prs[@]}" master-log.txt | sort -rn | uniq | awk '$0 > 1000 { print $0 }' > master-prs.txt +"${find_prs[@]}" "$branch-log.txt" | sort -rn | uniq | awk '$0 > 1000 { print $0 }' > "$branch-prs.txt" # Find all master PRs that are not in branch by calculating differences of two PR lists. grep -f "$branch-prs.txt" -F -x -v master-prs.txt > "$branch-diff-prs.txt" diff --git a/utils/simple-backport/changelog.sh b/utils/simple-backport/changelog.sh index 75a54a50b92..33908414235 100755 --- a/utils/simple-backport/changelog.sh +++ b/utils/simple-backport/changelog.sh @@ -1,6 +1,8 @@ #!/bin/bash set -e +script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" + from="$1" to="$2" log_command=(git log "$from..$to" --first-parent) @@ -19,14 +21,17 @@ fi # NOTE keep in sync with ./backport.sh. # Search for PR numbers in commit messages. First variant is normal merge, and second # variant is squashed. Next are some backport message variants. -find_prs=(sed -n "s/^.*Merge pull request #\([[:digit:]]\+\).*$/\1/p; +find_prs=(sed -n "s/^.*merg[eding]*.*#\([[:digit:]]\+\).*$/\1/Ip; s/^.*(#\([[:digit:]]\+\))$/\1/p; - s/^.*back[- ]*port[ed of]*#\([[:digit:]]\+\).*$/\1/Ip; - s/^.*cherry[- ]*pick[ed of]*#\([[:digit:]]\+\).*$/\1/Ip") + s/^.*back[- ]*port[ed of]*.*#\([[:digit:]]\+\).*$/\1/Ip; + s/^.*cherry[- ]*pick[ed of]*.*#\([[:digit:]]\+\).*$/\1/Ip") -"${find_prs[@]}" "changelog-log.txt" | sort -rn | uniq > "changelog-prs.txt" +# awk is to filter out small task numbers from different task tracker, which are +# referenced by documentation commits like '* DOCSUP-824: query log (#115)'. +"${find_prs[@]}" "changelog-log.txt" | sort -rn | uniq | awk '$0 > 1000 { print $0 }' > "changelog-prs.txt" echo "$(wc -l < "changelog-prs.txt") PRs added between $from and $to." +if [ $(wc -l < "changelog-prs.txt") -eq 0 ] ; then exit 0 ; fi function github_download() { @@ -82,5 +87,5 @@ done echo "### ClickHouse release $to FIXME as compared to $from " > changelog.md -./format-changelog.py changelog-prs-filtered.txt >> changelog.md +"$script_dir/format-changelog.py" changelog-prs-filtered.txt >> changelog.md cat changelog.md diff --git a/utils/simple-backport/format-changelog.py b/utils/simple-backport/format-changelog.py index 705d1903c78..ccda88c6809 100755 --- a/utils/simple-backport/format-changelog.py +++ b/utils/simple-backport/format-changelog.py @@ -1,12 +1,13 @@ #!/usr/bin/python3 -import os -import sys -import itertools import argparse -import json import collections +import fuzzywuzzy.fuzz +import itertools +import json +import os import re +import sys parser = argparse.ArgumentParser(description='Format changelog for given PRs.') parser.add_argument('file', metavar='FILE', type=argparse.FileType('r', encoding='utf-8'), nargs='?', default=sys.stdin, help='File with PR numbers, one per line.') @@ -26,7 +27,7 @@ def parse_one_pull_request(item): if lines: i = 0 while i < len(lines): - if re.match(r'(?i).*category.*:$', lines[i]): + if re.match(r'(?i)^[>*_ ]*change\s*log\s*category', lines[i]): i += 1 if i >= len(lines): break @@ -37,7 +38,7 @@ def parse_one_pull_request(item): break category = re.sub(r'^[-*\s]*', '', lines[i]) i += 1 - elif re.match(r'(?i)^\**\s*(Short description|Change\s*log entry)', lines[i]): + elif re.match(r'(?i)^[>*_ ]*(short\s*description|change\s*log\s*entry)', lines[i]): i += 1 # Can have one empty line between header and the entry itself. Filter it out. if i < len(lines) and not lines[i]: @@ -74,6 +75,11 @@ def parse_one_pull_request(item): return True +# This array gives the preferred category order, and is also used to +# normalize category names. +categories_preferred_order = ['Backward Incompatible Change', + 'New Feature', 'Bug Fix', 'Improvement', 'Performance Improvement', + 'Build/Testing/Packaging Improvement', 'Other'] category_to_pr = collections.defaultdict(lambda: []) users = {} @@ -84,6 +90,13 @@ for line in args.file: continue assert(pr['category']) + + # Normalize category name + for c in categories_preferred_order: + if fuzzywuzzy.fuzz.ratio(pr['category'], c) >= 90: + pr['category'] = c + break + category_to_pr[pr['category']].append(pr) user_id = pr['user']['id'] users[user_id] = json.loads(open(f'user{user_id}.json').read()) @@ -103,7 +116,6 @@ def print_category(category): print() # Print categories in preferred order -categories_preferred_order = ['Backward Incompatible Change', 'New Feature', 'Bug Fix', 'Improvement', 'Performance Improvement', 'Build/Testing/Packaging Improvement', 'Other'] for category in categories_preferred_order: if category in category_to_pr: print_category(category) diff --git a/website/benchmark/hardware/index.html b/website/benchmark/hardware/index.html index 61f9d99f5d8..88ddb2d0868 100644 --- a/website/benchmark/hardware/index.html +++ b/website/benchmark/hardware/index.html @@ -64,7 +64,9 @@ Results for MacBook Pro are from Denis Glazachev. MacOS Catalina Version Results for AMD EPYC 7702 are from Peng Gao in sina.com.
Results for Intel NUC are from Alexander Zaitsev, Altinity.
Xeon Gold 6230 server is using 4 x SAMSUNG datacenter class SSD in RAID-10.
-Results for Yandex Managed ClickHouse for "cold cache" are biased and should not be compared, because cache was not flushed for every next query.

+Results for Yandex Managed ClickHouse for "cold cache" are biased and should not be compared, because cache was not flushed for every next query.
+Results for AWS Lightsail is from Vamsi Krishna B. +

{% endblock %} diff --git a/website/benchmark/hardware/results/046_aws_lightsail_4vcpu.json b/website/benchmark/hardware/results/046_aws_lightsail_4vcpu.json new file mode 100644 index 00000000000..ab55813b8e8 --- /dev/null +++ b/website/benchmark/hardware/results/046_aws_lightsail_4vcpu.json @@ -0,0 +1,54 @@ +[ + { + "system": "AWS Lightsail 4vCPU", + "system_full": "AWS Lightsail E5-2686 v4 @ 2.30GHz, 16 GiB RAM", + "time": "2020-07-02 00:00:00", + "kind": "cloud", + "result": + [ + [0.002, 0.001, 0.001], + [0.046, 0.026, 0.025], + [0.156, 0.077, 0.078], + [0.746, 0.098, 0.095], + [1.383, 0.233, 0.218], + [2.161, 0.646, 0.626], + [0.041, 0.037, 0.038], + [0.032, 0.029, 0.026], + [1.494, 1.190, 1.159], + [1.843, 1.354, 1.357], + [0.841, 0.375, 0.375], + [1.254, 0.446, 0.448], + [2.235, 1.792, 1.746], + [4.175, 2.354, 2.315], + [2.602, 2.075, 2.042], + [2.258, 2.085, 2.058], + [6.402, 5.909, 5.895], + [4.178, 3.618, 3.670], + [12.978, 12.037, 11.764], + [0.754, 0.107, 0.102], + [19.615, 1.888, 1.868], + [21.740, 2.208, 2.171], + [41.009, 5.277, 5.245], + [38.068, 2.475, 2.435], + [4.739, 0.693, 0.680], + [1.766, 0.549, 0.542], + [4.730, 0.684, 0.672], + [19.010, 1.849, 1.811], + [15.999, 3.086, 3.099], + [3.655, 3.609, 3.593], + [3.967, 1.768, 1.836], + [10.566, 3.036, 2.963], + [20.065, 19.091, null], + [21.474, 8.597, 8.501], + [21.484, 8.563, 8.533], + [3.850, 3.487, 3.477], + [0.408, 0.240, 0.239], + [0.125, 0.087, 0.084], + [0.132, 0.073, 0.073], + [0.685, 0.471, 0.480], + [0.089, 0.028, 0.025], + [0.044, 0.027, 0.018], + [0.007, 0.007, 0.006] + ] + } +] diff --git a/website/blog/README.md b/website/blog/README.md index 4b2e122608d..c99f293f669 100644 --- a/website/blog/README.md +++ b/website/blog/README.md @@ -38,10 +38,14 @@ tags: ['meetup','Beijing','China','events'] ![ClickHouse branded Beijing duck](https://blog-images.clickhouse.tech/en/2019/clickhouse-meetup-in-beijing-on-june-8-2019/9.jpg) ``` +## How To Preview My Post? + +Use [deploy-to-test.sh](https://github.com/ClickHouse/ClickHouse/blob/master/docs/tools/deploy-to-test.sh) script. Note that on the first use you'll need to follow the steps in its first comment, and [install prerequisites for build.py](https://github.com/ClickHouse/ClickHouse/blob/master/docs/tools/README.md#use-buildpy-use-build-py). Alternatively, you can use `--livereload=N` argument of [build.py](https://github.com/ClickHouse/ClickHouse/blob/master/docs/tools/build.py). + ## How To Add a New Blog Language? If you want to write a guest post, you are welcome to use your native language or make multiple posts in multiple languages - Unlike documentation, blog languages are independent, i.e. they have partially overlapping sets of posts and it's ok. Most posts are written only in one language because they are not relevant to audiences of other languages. +Unlike documentation, blog languages are independent, i.e. they have partially overlapping sets of posts and it's ok. Most posts are written only in one language because they are not relevant to audiences of other languages. At the moment it's not so straightforward to set up a new language for blog and it won't be documented for now, but you can just create a language directory with the first post as described above and we'll configure the website infrastructure to include it during/after merging the pull-request. diff --git a/website/blog/en/2020/package-repository-behind-cdn.md b/website/blog/en/2020/package-repository-behind-cdn.md new file mode 100644 index 00000000000..c5857bcd4a4 --- /dev/null +++ b/website/blog/en/2020/package-repository-behind-cdn.md @@ -0,0 +1,71 @@ +--- +title: 'Package Repository Behind CDN' +image: 'https://blog-images.clickhouse.tech/en/2020/package-repository-behind-cdn/main.jpg' +date: '2020-07-02' +tags: ['article', 'CDN', 'Cloudflare', 'repository', 'deb', 'rpm', 'tgz'] +--- + +On initial open-source launch, ClickHouse packages were published at an independent repository implemented on Yandex infrastructure. We'd love to use the default repositories of Linux distributions, but, unfortunately, they have their own strict rules on third-party library usage and software compilation options. These rules happen to contradict with how ClickHouse is produced. In 2018 ClickHouse was added to [official Debian repository](https://packages.debian.org/sid/clickhouse-server) as an experiment, but it didn't get much traction. Adaptation to those rules ended up producing more like a demo version of ClickHouse with crippled performance and limited features. + +!!! info "TL;DR" + If you have configured your system to use for fetching ClickHouse packages, replace it with . + +Distributing packages via our own repository was working totally fine until ClickHouse has started getting traction in countries far from Moscow, most notably the USA and China. Downloading large files of packages from remote location was especially painful for Chinese ClickHouse users, likely due to how China is connected to the rest of the world via its famous firewall. But at least it worked (with high latencies and low throughput), while in some smaller countries there was completely no access to this repository and people living there had to host their own mirrors on neutral ground as a workaround. + +Earlier this year we made the ClickHouse official website to be served via global CDN by [Cloudflare](https://www.cloudflare.com) on a `clickhouse.tech` domain. To solve the download issues discussed above, we have also configured a new location for ClickHouse packages that are also served by Cloudflare at [repo.clickhouse.tech](https://repo.clickhouse.tech). It used to have some quirks, but now it seems to be working fine while improving throughput and latencies in remote geographical locations by over an order of magnitude. + +## Switching To Repository Behind CDN + +This transition has some more benefits besides improving the package fetching, but let's get back to them in a minute. One of the key reasons for this post is that we can't actually influence the repository configuration of ClickHouse users. We have updated all instructions, but for people who have followed these instructions earlier, **action is required** to use the new location behind CDN. Basically, you need to replace `http://repo.yandex.ru/clickhouse/` with `https://repo.clickhouse.tech/` in your package manager configuration. + +One-liner for Ubuntu or Debian: +```bash +sudo apt-get install apt-transport-https ca-certificates && sudo perl -pi -e 's|http://repo.yandex.ru/clickhouse/|https://repo.clickhouse.tech/|g' /etc/apt/sources.list.d/clickhouse.list && sudo apt-get update +``` + +One-liner for RedHat or CentOS: +```bash +sudo perl -pi -e 's|http://repo.yandex.ru/clickhouse/|https://repo.clickhouse.tech/|g' /etc/yum.repos.d/clickhouse* +``` + +As you might have noticed, the domain name is not the only thing that has changed: the new URL uses `https://` protocol. Usually, it's considered less important for package repositories compared to normal websites because most package managers check [GPG signatures](https://en.wikipedia.org/wiki/GNU_Privacy_Guard) for what they download anyway. However it still has some benefits: for example, it's not so uncommon for people to download packages via browser, `curl` or `wget`, and install them manually (while for [tgz](https://repo.clickhouse.tech/tgz/) builds it's the only option). Fewer opportunities for sniffing traffic can't hurt either. The downside is that `apt` in some Debian flavors has no HTTPS support by default and needs a couple more packages to be installed (`apt-transport-https` and `ca-certificates`). + +## Investigating Repository Usage + +The next important thing we obtained by using Cloudflare for our package repository is observability. Of course the same could have been implemented from scratch, but it'd require extra resources to develop and maintain, while Cloudflare provides quite rich tools for analyzing what's going on in your domains. + +!!! info "Did you know?" + It's kind of off-topic, but those Cloudflare features are internally based on ClickHouse, see their [HTTP analytics](https://blog.cloudflare.com/http-analytics-for-6m-requests-per-second-using-clickhouse/) and [DNS analytics](https://blog.cloudflare.com/how-cloudflare-analyzes-1m-dns-queries-per-second/) blog posts. + +Just a few weeks ago they have also added [cache analytics](https://blog.cloudflare.com/introducing-cache-analytics/) feature, which allowed to drill into how effectively the content is cached on CDN edges and improve the CDN configuration accordingly. For example, it allowed debugging some inconsistencies in cached repository metadata. + +## Digging Deeper + +All those built-in observability tools provided by Cloudflare share one weak point: they are purely technical and generic, without any domain-specific awareness. They excel at debugging low-level issues, but it's hard to get a higher-level picture based on them. With our package repository scenario, we're not so interested in frequent metadata update requests, but we'd like to see reports on package downloads by version, kind, and so on. We definitely didn't want to operate a separate infrastructure to get those reports, but given there was no out-of-the-box solution, we had to be creative and managed to find a cool middle ground. + +Ever heard the [“serverless computing”](https://en.wikipedia.org/wiki/Serverless_computing) hype recently? That was the basic idea: let's assemble a bunch of serverless or managed services to get what we want, without any dedicated servers. The plan was pretty straightforward: + +1. Dump details about package downloads to a ClickHouse database. +2. Connect some [BI](https://en.wikipedia.org/wiki/Business_intelligence) tool to that ClickHouse database and configure required charts/dashboards. + +Implementing it required a little bit of research, but the overall solution appeared to be quite elegant: + +1. For a ClickHouse database, it was a no-brainer to use [Yandex Managed Service for ClickHouse](https://cloud.yandex.com/services/managed-clickhouse). With a few clicks in the admin interface, we got a running ClickHouse cluster with properly configured high-availability and automated backups. Ad-hoc SQL queries could be run from that same admin interface. +2. Cloudflare allows customers to run custom code on CDN edge servers in a serverless fashion (so-called [workers](https://workers.cloudflare.com)). Those workers are executed in a tight sandbox which doesn't allow for anything complicated, but this feature fits perfectly to gather some data about download events and send it somewhere else. This is normally a paid feature, but special thanks to Connor Peshek from Cloudflare who arranged a lot of extra features for free on `clickhouse.tech` when we have applied to their [open-source support program](https://developers.cloudflare.com/sponsorships/). +3. To avoid publicly exposing yet another ClickHouse instance (like we did with **[playground](https://clickhouse.tech/docs/en/getting-started/playground/)** regardless of being a 100% anti-pattern), the download event data is sent to [Yandex Cloud Functions](https://cloud.yandex.com/services/functions). It's a generic serverless computing framework at Yandex Cloud, which also allows running custom code without maintaining any servers, but with less strict sandbox limitations and direct access to other cloud services like Managed ClickHouse that was needed for this task. +4. It didn't require much effort to choose a visualization tool either, as [DataLens BI](https://cloud.yandex.com/docs/datalens/) is tightly integrated with ClickHouse, capable to build what's required right from the UI, and satisfies the “no servers” requirement because it's a SaaS solution. Public access option for charts and dashboards have also appeared to be handy. + +There's not so much data collected yet, but here's a live example of how the resulting data visualization looks like. For example, here we can see that LTS releases of ClickHouse are not so popular yet *(yes, we have [LTS releases](https://clickhouse.tech/docs/en/faq/operations/production/)!)*: +![iframe](https://datalens.yandex/qk01mwxkgiysm?_embedded=1) + +While here we confirmed that `rpm` is at least as popular as `deb`: +![iframe](https://datalens.yandex/lfvldsf92i2uh?_embedded=1) + +Or you can take a look at all key charts for `repo.clickhouse.tech` together on a handy **[dashboard](https://datalens.yandex/pjzq4rot3t2ql)** with a filtering possibility. + +## Lessons Learned + +* CDN is a must-have if you want people from all over the world to download some artifacts that you produce. Beware the huge pay-for-traffic bills from most CDN providers though. +* Generic technical system metrics and drill-downs are a good starting point, but not always enough. +* Serverless is not a myth. Nowadays it is indeed possible to build useful products by just integrating various infrastructure services together, without any dedicated servers to take care of. + diff --git a/website/images/logo-180x180.png b/website/images/logo-180x180.png new file mode 100644 index 00000000000..ee9bae1f61e Binary files /dev/null and b/website/images/logo-180x180.png differ diff --git a/website/templates/blog/rss.xml b/website/templates/blog/rss.xml new file mode 100644 index 00000000000..1f584b522aa --- /dev/null +++ b/website/templates/blog/rss.xml @@ -0,0 +1,23 @@ + + + {{ config.site_name }} + {{ config.site_url }} + + + {{ config.extra.today|to_rfc882 }} + + {% for post in config.extra.post_meta.values() %} + {% set url = config.extra.website_url + post['url'] %} + + {{ post['title'] }} + ]]> + {{ post['date']|to_rfc882 }} + {{ url }} + {{ url }} + {# TODO: #} + + {% endfor %} + + diff --git a/website/templates/common_meta.html b/website/templates/common_meta.html index 11a36414cd7..89a650fba6a 100644 --- a/website/templates/common_meta.html +++ b/website/templates/common_meta.html @@ -6,6 +6,7 @@ {% if title %}{{ title }}{% else %}{{ _('ClickHouse - fast open-source OLAP DBMS') }}{% endif %} + @@ -26,7 +27,7 @@ {% if page and page.meta.tags %} + content="{% for tag in page.meta.tags %}{{tag}}{{ ', ' if not loop.last }}{% endfor %}" /> {% else %} @@ -45,3 +46,7 @@ {% for prefetch_item in prefetch_items %} {% endfor %} + +{% if is_blog %} + +{% endif %} diff --git a/website/templates/docs/content.html b/website/templates/docs/content.html index 711ab0bd3b8..29db92e377a 100644 --- a/website/templates/docs/content.html +++ b/website/templates/docs/content.html @@ -7,21 +7,19 @@ {% endif %} {% if ancestors %} {% set ancestor_ns = namespace(level=ancestors|length) %} -
+ {% endif %} {% include "templates/docs/machine-translated.html" %}